datachain 0.8.2__tar.gz → 0.8.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (305) hide show
  1. {datachain-0.8.2 → datachain-0.8.4}/.github/workflows/tests-studio.yml +1 -1
  2. {datachain-0.8.2 → datachain-0.8.4}/.github/workflows/tests.yml +1 -1
  3. {datachain-0.8.2 → datachain-0.8.4}/.pre-commit-config.yaml +1 -1
  4. {datachain-0.8.2 → datachain-0.8.4}/PKG-INFO +6 -6
  5. {datachain-0.8.2 → datachain-0.8.4}/mkdocs.yml +1 -0
  6. {datachain-0.8.2 → datachain-0.8.4}/pyproject.toml +5 -5
  7. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/cache.py +4 -2
  8. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/catalog/catalog.py +100 -54
  9. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/catalog/datasource.py +4 -6
  10. datachain-0.8.4/src/datachain/cli/__init__.py +311 -0
  11. datachain-0.8.4/src/datachain/cli/commands/__init__.py +29 -0
  12. datachain-0.8.4/src/datachain/cli/commands/datasets.py +129 -0
  13. datachain-0.8.4/src/datachain/cli/commands/du.py +14 -0
  14. datachain-0.8.4/src/datachain/cli/commands/index.py +12 -0
  15. datachain-0.8.4/src/datachain/cli/commands/ls.py +169 -0
  16. datachain-0.8.4/src/datachain/cli/commands/misc.py +28 -0
  17. datachain-0.8.4/src/datachain/cli/commands/query.py +53 -0
  18. datachain-0.8.4/src/datachain/cli/commands/show.py +38 -0
  19. datachain-0.8.4/src/datachain/cli/parser/__init__.py +547 -0
  20. datachain-0.8.4/src/datachain/cli/parser/job.py +120 -0
  21. datachain-0.8.4/src/datachain/cli/parser/studio.py +126 -0
  22. datachain-0.8.4/src/datachain/cli/parser/utils.py +63 -0
  23. datachain-0.8.2/src/datachain/cli_utils.py → datachain-0.8.4/src/datachain/cli/utils.py +27 -1
  24. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/client/azure.py +21 -1
  25. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/client/fsspec.py +45 -13
  26. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/client/gcs.py +10 -2
  27. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/client/local.py +4 -4
  28. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/client/s3.py +10 -0
  29. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/dataset.py +1 -0
  30. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/func/__init__.py +2 -2
  31. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/func/conditional.py +52 -0
  32. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/func/func.py +5 -1
  33. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/arrow.py +4 -0
  34. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/dc.py +18 -3
  35. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/file.py +1 -1
  36. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/listing.py +36 -3
  37. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/signal_schema.py +89 -27
  38. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/listing.py +1 -5
  39. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/node.py +27 -1
  40. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/progress.py +2 -2
  41. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/query/session.py +1 -1
  42. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/studio.py +58 -38
  43. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/utils.py +1 -1
  44. {datachain-0.8.2 → datachain-0.8.4}/src/datachain.egg-info/PKG-INFO +6 -6
  45. {datachain-0.8.2 → datachain-0.8.4}/src/datachain.egg-info/SOURCES.txt +15 -2
  46. {datachain-0.8.2 → datachain-0.8.4}/src/datachain.egg-info/requires.txt +5 -5
  47. {datachain-0.8.2 → datachain-0.8.4}/tests/conftest.py +1 -1
  48. datachain-0.8.4/tests/func/fake-service-account-credentials.json +9 -0
  49. {datachain-0.8.2 → datachain-0.8.4}/tests/func/test_catalog.py +150 -12
  50. {datachain-0.8.2 → datachain-0.8.4}/tests/func/test_datachain.py +6 -2
  51. {datachain-0.8.2 → datachain-0.8.4}/tests/func/test_pull.py +1 -0
  52. {datachain-0.8.2 → datachain-0.8.4}/tests/test_cli_e2e.py +6 -6
  53. {datachain-0.8.2 → datachain-0.8.4}/tests/test_cli_studio.py +18 -15
  54. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/lib/test_arrow.py +9 -0
  55. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/lib/test_datachain.py +13 -5
  56. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/lib/test_signal_schema.py +280 -32
  57. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/sql/test_conditional.py +43 -0
  58. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/test_cli_parsing.py +2 -17
  59. datachain-0.8.4/tests/unit/test_client_gcs.py +14 -0
  60. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/test_client_s3.py +6 -0
  61. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/test_config.py +9 -9
  62. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/test_func.py +19 -1
  63. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/test_listing.py +1 -1
  64. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/test_session.py +5 -0
  65. datachain-0.8.2/src/datachain/cli.py +0 -1475
  66. datachain-0.8.2/tests/unit/test_client_gcs.py +0 -6
  67. {datachain-0.8.2 → datachain-0.8.4}/.cruft.json +0 -0
  68. {datachain-0.8.2 → datachain-0.8.4}/.gitattributes +0 -0
  69. {datachain-0.8.2 → datachain-0.8.4}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  70. {datachain-0.8.2 → datachain-0.8.4}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  71. {datachain-0.8.2 → datachain-0.8.4}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  72. {datachain-0.8.2 → datachain-0.8.4}/.github/codecov.yaml +0 -0
  73. {datachain-0.8.2 → datachain-0.8.4}/.github/dependabot.yml +0 -0
  74. {datachain-0.8.2 → datachain-0.8.4}/.github/workflows/benchmarks.yml +0 -0
  75. {datachain-0.8.2 → datachain-0.8.4}/.github/workflows/release.yml +0 -0
  76. {datachain-0.8.2 → datachain-0.8.4}/.github/workflows/update-template.yaml +0 -0
  77. {datachain-0.8.2 → datachain-0.8.4}/.gitignore +0 -0
  78. {datachain-0.8.2 → datachain-0.8.4}/CODE_OF_CONDUCT.rst +0 -0
  79. {datachain-0.8.2 → datachain-0.8.4}/LICENSE +0 -0
  80. {datachain-0.8.2 → datachain-0.8.4}/README.rst +0 -0
  81. {datachain-0.8.2 → datachain-0.8.4}/docs/assets/captioned_cartoons.png +0 -0
  82. {datachain-0.8.2 → datachain-0.8.4}/docs/assets/datachain-white.svg +0 -0
  83. {datachain-0.8.2 → datachain-0.8.4}/docs/assets/datachain.svg +0 -0
  84. {datachain-0.8.2 → datachain-0.8.4}/docs/contributing.md +0 -0
  85. {datachain-0.8.2 → datachain-0.8.4}/docs/css/github-permalink-style.css +0 -0
  86. {datachain-0.8.2 → datachain-0.8.4}/docs/examples.md +0 -0
  87. {datachain-0.8.2 → datachain-0.8.4}/docs/index.md +0 -0
  88. {datachain-0.8.2 → datachain-0.8.4}/docs/overrides/main.html +0 -0
  89. {datachain-0.8.2 → datachain-0.8.4}/docs/quick-start.md +0 -0
  90. {datachain-0.8.2 → datachain-0.8.4}/docs/references/datachain.md +0 -0
  91. {datachain-0.8.2 → datachain-0.8.4}/docs/references/datatype.md +0 -0
  92. {datachain-0.8.2 → datachain-0.8.4}/docs/references/file.md +0 -0
  93. {datachain-0.8.2 → datachain-0.8.4}/docs/references/index.md +0 -0
  94. {datachain-0.8.2 → datachain-0.8.4}/docs/references/sql.md +0 -0
  95. {datachain-0.8.2 → datachain-0.8.4}/docs/references/torch.md +0 -0
  96. {datachain-0.8.2 → datachain-0.8.4}/docs/references/udf.md +0 -0
  97. {datachain-0.8.2 → datachain-0.8.4}/docs/tutorials.md +0 -0
  98. {datachain-0.8.2 → datachain-0.8.4}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  99. {datachain-0.8.2 → datachain-0.8.4}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  100. {datachain-0.8.2 → datachain-0.8.4}/examples/computer_vision/openimage-detect.py +0 -0
  101. {datachain-0.8.2 → datachain-0.8.4}/examples/computer_vision/ultralytics-bbox.py +0 -0
  102. {datachain-0.8.2 → datachain-0.8.4}/examples/computer_vision/ultralytics-pose.py +0 -0
  103. {datachain-0.8.2 → datachain-0.8.4}/examples/computer_vision/ultralytics-segment.py +0 -0
  104. {datachain-0.8.2 → datachain-0.8.4}/examples/get_started/common_sql_functions.py +0 -0
  105. {datachain-0.8.2 → datachain-0.8.4}/examples/get_started/json-csv-reader.py +0 -0
  106. {datachain-0.8.2 → datachain-0.8.4}/examples/get_started/torch-loader.py +0 -0
  107. {datachain-0.8.2 → datachain-0.8.4}/examples/get_started/udfs/parallel.py +0 -0
  108. {datachain-0.8.2 → datachain-0.8.4}/examples/get_started/udfs/simple.py +0 -0
  109. {datachain-0.8.2 → datachain-0.8.4}/examples/get_started/udfs/stateful.py +0 -0
  110. {datachain-0.8.2 → datachain-0.8.4}/examples/llm_and_nlp/claude-query.py +0 -0
  111. {datachain-0.8.2 → datachain-0.8.4}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  112. {datachain-0.8.2 → datachain-0.8.4}/examples/llm_and_nlp/unstructured-embeddings-gen.py +0 -0
  113. {datachain-0.8.2 → datachain-0.8.4}/examples/llm_and_nlp/unstructured-summary-map.py +0 -0
  114. {datachain-0.8.2 → datachain-0.8.4}/examples/multimodal/clip_inference.py +0 -0
  115. {datachain-0.8.2 → datachain-0.8.4}/examples/multimodal/hf_pipeline.py +0 -0
  116. {datachain-0.8.2 → datachain-0.8.4}/examples/multimodal/openai_image_desc_lib.py +0 -0
  117. {datachain-0.8.2 → datachain-0.8.4}/examples/multimodal/wds.py +0 -0
  118. {datachain-0.8.2 → datachain-0.8.4}/examples/multimodal/wds_filtered.py +0 -0
  119. {datachain-0.8.2 → datachain-0.8.4}/noxfile.py +0 -0
  120. {datachain-0.8.2 → datachain-0.8.4}/setup.cfg +0 -0
  121. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/__init__.py +0 -0
  122. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/__main__.py +0 -0
  123. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/asyn.py +0 -0
  124. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/catalog/__init__.py +0 -0
  125. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/catalog/loader.py +0 -0
  126. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/client/__init__.py +0 -0
  127. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/client/fileslice.py +0 -0
  128. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/client/hf.py +0 -0
  129. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/config.py +0 -0
  130. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/data_storage/__init__.py +0 -0
  131. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/data_storage/db_engine.py +0 -0
  132. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/data_storage/job.py +0 -0
  133. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/data_storage/metastore.py +0 -0
  134. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/data_storage/schema.py +0 -0
  135. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/data_storage/serializer.py +0 -0
  136. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/data_storage/sqlite.py +0 -0
  137. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/data_storage/warehouse.py +0 -0
  138. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/error.py +0 -0
  139. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/func/aggregate.py +0 -0
  140. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/func/array.py +0 -0
  141. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/func/base.py +0 -0
  142. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/func/numeric.py +0 -0
  143. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/func/path.py +0 -0
  144. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/func/random.py +0 -0
  145. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/func/string.py +0 -0
  146. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/func/window.py +0 -0
  147. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/job.py +0 -0
  148. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/__init__.py +0 -0
  149. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/clip.py +0 -0
  150. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/convert/__init__.py +0 -0
  151. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/convert/flatten.py +0 -0
  152. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/convert/python_to_sql.py +0 -0
  153. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/convert/sql_to_python.py +0 -0
  154. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/convert/unflatten.py +0 -0
  155. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  156. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/data_model.py +0 -0
  157. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/dataset_info.py +0 -0
  158. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/diff.py +0 -0
  159. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/hf.py +0 -0
  160. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/image.py +0 -0
  161. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/listing_info.py +0 -0
  162. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/meta_formats.py +0 -0
  163. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/model_store.py +0 -0
  164. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/pytorch.py +0 -0
  165. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/settings.py +0 -0
  166. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/tar.py +0 -0
  167. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/text.py +0 -0
  168. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/udf.py +0 -0
  169. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/udf_signature.py +0 -0
  170. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/utils.py +0 -0
  171. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/vfile.py +0 -0
  172. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/webdataset.py +0 -0
  173. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/lib/webdataset_laion.py +0 -0
  174. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/model/__init__.py +0 -0
  175. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/model/bbox.py +0 -0
  176. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/model/pose.py +0 -0
  177. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/model/segment.py +0 -0
  178. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/model/ultralytics/__init__.py +0 -0
  179. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/model/ultralytics/bbox.py +0 -0
  180. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/model/ultralytics/pose.py +0 -0
  181. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/model/ultralytics/segment.py +0 -0
  182. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/nodes_fetcher.py +0 -0
  183. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/nodes_thread_pool.py +0 -0
  184. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/py.typed +0 -0
  185. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/query/__init__.py +0 -0
  186. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/query/batch.py +0 -0
  187. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/query/dataset.py +0 -0
  188. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/query/dispatch.py +0 -0
  189. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/query/metrics.py +0 -0
  190. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/query/params.py +0 -0
  191. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/query/queue.py +0 -0
  192. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/query/schema.py +0 -0
  193. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/query/udf.py +0 -0
  194. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/query/utils.py +0 -0
  195. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/remote/__init__.py +0 -0
  196. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/remote/studio.py +0 -0
  197. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/sql/__init__.py +0 -0
  198. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/sql/default/__init__.py +0 -0
  199. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/sql/default/base.py +0 -0
  200. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/sql/functions/__init__.py +0 -0
  201. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/sql/functions/aggregate.py +0 -0
  202. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/sql/functions/array.py +0 -0
  203. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/sql/functions/conditional.py +0 -0
  204. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/sql/functions/numeric.py +0 -0
  205. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/sql/functions/path.py +0 -0
  206. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/sql/functions/random.py +0 -0
  207. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/sql/functions/string.py +0 -0
  208. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/sql/selectable.py +0 -0
  209. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/sql/sqlite/__init__.py +0 -0
  210. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/sql/sqlite/base.py +0 -0
  211. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/sql/sqlite/types.py +0 -0
  212. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/sql/sqlite/vector.py +0 -0
  213. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/sql/types.py +0 -0
  214. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/sql/utils.py +0 -0
  215. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/telemetry.py +0 -0
  216. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/toolkit/__init__.py +0 -0
  217. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/toolkit/split.py +0 -0
  218. {datachain-0.8.2 → datachain-0.8.4}/src/datachain/torch/__init__.py +0 -0
  219. {datachain-0.8.2 → datachain-0.8.4}/src/datachain.egg-info/dependency_links.txt +0 -0
  220. {datachain-0.8.2 → datachain-0.8.4}/src/datachain.egg-info/entry_points.txt +0 -0
  221. {datachain-0.8.2 → datachain-0.8.4}/src/datachain.egg-info/top_level.txt +0 -0
  222. {datachain-0.8.2 → datachain-0.8.4}/tests/__init__.py +0 -0
  223. {datachain-0.8.2 → datachain-0.8.4}/tests/benchmarks/__init__.py +0 -0
  224. {datachain-0.8.2 → datachain-0.8.4}/tests/benchmarks/conftest.py +0 -0
  225. {datachain-0.8.2 → datachain-0.8.4}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  226. {datachain-0.8.2 → datachain-0.8.4}/tests/benchmarks/datasets/.dvc/config +0 -0
  227. {datachain-0.8.2 → datachain-0.8.4}/tests/benchmarks/datasets/.gitignore +0 -0
  228. {datachain-0.8.2 → datachain-0.8.4}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  229. {datachain-0.8.2 → datachain-0.8.4}/tests/benchmarks/test_datachain.py +0 -0
  230. {datachain-0.8.2 → datachain-0.8.4}/tests/benchmarks/test_ls.py +0 -0
  231. {datachain-0.8.2 → datachain-0.8.4}/tests/benchmarks/test_version.py +0 -0
  232. {datachain-0.8.2 → datachain-0.8.4}/tests/data.py +0 -0
  233. {datachain-0.8.2 → datachain-0.8.4}/tests/examples/__init__.py +0 -0
  234. {datachain-0.8.2 → datachain-0.8.4}/tests/examples/test_examples.py +0 -0
  235. {datachain-0.8.2 → datachain-0.8.4}/tests/examples/test_wds_e2e.py +0 -0
  236. {datachain-0.8.2 → datachain-0.8.4}/tests/examples/wds_data.py +0 -0
  237. {datachain-0.8.2 → datachain-0.8.4}/tests/func/__init__.py +0 -0
  238. {datachain-0.8.2 → datachain-0.8.4}/tests/func/test_client.py +0 -0
  239. {datachain-0.8.2 → datachain-0.8.4}/tests/func/test_dataset_query.py +0 -0
  240. {datachain-0.8.2 → datachain-0.8.4}/tests/func/test_datasets.py +0 -0
  241. {datachain-0.8.2 → datachain-0.8.4}/tests/func/test_feature_pickling.py +0 -0
  242. {datachain-0.8.2 → datachain-0.8.4}/tests/func/test_listing.py +0 -0
  243. {datachain-0.8.2 → datachain-0.8.4}/tests/func/test_ls.py +0 -0
  244. {datachain-0.8.2 → datachain-0.8.4}/tests/func/test_meta_formats.py +0 -0
  245. {datachain-0.8.2 → datachain-0.8.4}/tests/func/test_metrics.py +0 -0
  246. {datachain-0.8.2 → datachain-0.8.4}/tests/func/test_pytorch.py +0 -0
  247. {datachain-0.8.2 → datachain-0.8.4}/tests/func/test_query.py +0 -0
  248. {datachain-0.8.2 → datachain-0.8.4}/tests/func/test_session.py +0 -0
  249. {datachain-0.8.2 → datachain-0.8.4}/tests/func/test_toolkit.py +0 -0
  250. {datachain-0.8.2 → datachain-0.8.4}/tests/scripts/feature_class.py +0 -0
  251. {datachain-0.8.2 → datachain-0.8.4}/tests/scripts/feature_class_exception.py +0 -0
  252. {datachain-0.8.2 → datachain-0.8.4}/tests/scripts/feature_class_parallel.py +0 -0
  253. {datachain-0.8.2 → datachain-0.8.4}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  254. {datachain-0.8.2 → datachain-0.8.4}/tests/scripts/name_len_slow.py +0 -0
  255. {datachain-0.8.2 → datachain-0.8.4}/tests/test_atomicity.py +0 -0
  256. {datachain-0.8.2 → datachain-0.8.4}/tests/test_query_e2e.py +0 -0
  257. {datachain-0.8.2 → datachain-0.8.4}/tests/test_telemetry.py +0 -0
  258. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/__init__.py +0 -0
  259. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/lib/__init__.py +0 -0
  260. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/lib/conftest.py +0 -0
  261. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/lib/test_clip.py +0 -0
  262. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  263. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/lib/test_datachain_merge.py +0 -0
  264. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/lib/test_diff.py +0 -0
  265. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/lib/test_feature.py +0 -0
  266. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/lib/test_feature_utils.py +0 -0
  267. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/lib/test_file.py +0 -0
  268. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/lib/test_hf.py +0 -0
  269. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/lib/test_image.py +0 -0
  270. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/lib/test_listing_info.py +0 -0
  271. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/lib/test_models.py +0 -0
  272. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/lib/test_schema.py +0 -0
  273. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/lib/test_sql_to_python.py +0 -0
  274. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/lib/test_text.py +0 -0
  275. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/lib/test_udf_signature.py +0 -0
  276. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/lib/test_utils.py +0 -0
  277. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/lib/test_webdataset.py +0 -0
  278. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/sql/__init__.py +0 -0
  279. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/sql/sqlite/__init__.py +0 -0
  280. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/sql/sqlite/test_types.py +0 -0
  281. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/sql/sqlite/test_utils.py +0 -0
  282. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/sql/test_array.py +0 -0
  283. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/sql/test_path.py +0 -0
  284. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/sql/test_random.py +0 -0
  285. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/sql/test_selectable.py +0 -0
  286. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/sql/test_string.py +0 -0
  287. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/test_asyn.py +0 -0
  288. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/test_cache.py +0 -0
  289. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/test_catalog.py +0 -0
  290. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/test_catalog_loader.py +0 -0
  291. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/test_client.py +0 -0
  292. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/test_data_storage.py +0 -0
  293. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/test_database_engine.py +0 -0
  294. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/test_dataset.py +0 -0
  295. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/test_dispatch.py +0 -0
  296. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/test_fileslice.py +0 -0
  297. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/test_metastore.py +0 -0
  298. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/test_module_exports.py +0 -0
  299. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/test_query.py +0 -0
  300. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/test_query_metrics.py +0 -0
  301. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/test_query_params.py +0 -0
  302. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/test_serializer.py +0 -0
  303. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/test_utils.py +0 -0
  304. {datachain-0.8.2 → datachain-0.8.4}/tests/unit/test_warehouse.py +0 -0
  305. {datachain-0.8.2 → datachain-0.8.4}/tests/utils.py +0 -0
@@ -32,7 +32,7 @@ jobs:
32
32
  POSTGRES_DB: database
33
33
  POSTGRES_HOST_AUTH_METHOD: trust
34
34
  clickhouse:
35
- image: clickhouse/clickhouse-server:24.6
35
+ image: clickhouse/clickhouse-server:24.8
36
36
  ports:
37
37
  - 8123:8123
38
38
  - 9010:9000
@@ -138,7 +138,7 @@ jobs:
138
138
  matrix:
139
139
  os: [ubuntu-latest, windows-latest]
140
140
  pyv: ['3.9', '3.12']
141
- group: ['get_started', 'llm_and_nlp or computer_vision', 'multimodal']
141
+ group: ['get_started', 'computer_vision', 'llm_and_nlp', 'multimodal']
142
142
  exclude:
143
143
  - {os: ubuntu-latest, pyv: '3.9', group: 'multimodal'}
144
144
  - {os: ubuntu-latest, pyv: '3.12', group: 'multimodal'}
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.8.4'
27
+ rev: 'v0.8.6'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.8.2
3
+ Version: 0.8.4
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -50,7 +50,7 @@ Requires-Dist: websockets
50
50
  Provides-Extra: docs
51
51
  Requires-Dist: mkdocs>=1.5.2; extra == "docs"
52
52
  Requires-Dist: mkdocs-gen-files>=0.5.0; extra == "docs"
53
- Requires-Dist: mkdocs-material>=9.3.1; extra == "docs"
53
+ Requires-Dist: mkdocs-material==9.5.22; extra == "docs"
54
54
  Requires-Dist: mkdocs-section-index>=0.3.6; extra == "docs"
55
55
  Requires-Dist: mkdocstrings-python>=1.6.3; extra == "docs"
56
56
  Requires-Dist: mkdocs-literate-nav>=0.6.1; extra == "docs"
@@ -72,7 +72,7 @@ Requires-Dist: pytest<9,>=8; extra == "tests"
72
72
  Requires-Dist: pytest-sugar>=0.9.6; extra == "tests"
73
73
  Requires-Dist: pytest-cov>=4.1.0; extra == "tests"
74
74
  Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
75
- Requires-Dist: pytest-servers[all]>=0.5.8; extra == "tests"
75
+ Requires-Dist: pytest-servers[all]>=0.5.9; extra == "tests"
76
76
  Requires-Dist: pytest-benchmark[histogram]; extra == "tests"
77
77
  Requires-Dist: pytest-xdist>=3.3.1; extra == "tests"
78
78
  Requires-Dist: virtualenv; extra == "tests"
@@ -84,7 +84,7 @@ Requires-Dist: requests-mock; extra == "tests"
84
84
  Requires-Dist: scipy; extra == "tests"
85
85
  Provides-Extra: dev
86
86
  Requires-Dist: datachain[docs,tests]; extra == "dev"
87
- Requires-Dist: mypy==1.14.0; extra == "dev"
87
+ Requires-Dist: mypy==1.14.1; extra == "dev"
88
88
  Requires-Dist: types-python-dateutil; extra == "dev"
89
89
  Requires-Dist: types-pytz; extra == "dev"
90
90
  Requires-Dist: types-PyYAML; extra == "dev"
@@ -95,11 +95,11 @@ Requires-Dist: datachain[tests]; extra == "examples"
95
95
  Requires-Dist: defusedxml; extra == "examples"
96
96
  Requires-Dist: accelerate; extra == "examples"
97
97
  Requires-Dist: unstructured_ingest[embed-huggingface]; extra == "examples"
98
- Requires-Dist: unstructured[pdf]; extra == "examples"
98
+ Requires-Dist: unstructured[pdf]<0.16.12; extra == "examples"
99
99
  Requires-Dist: pdfplumber==0.11.4; extra == "examples"
100
100
  Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
101
101
  Requires-Dist: onnx==1.16.1; extra == "examples"
102
- Requires-Dist: ultralytics==8.3.53; extra == "examples"
102
+ Requires-Dist: ultralytics==8.3.55; extra == "examples"
103
103
 
104
104
  ================
105
105
  |logo| DataChain
@@ -136,6 +136,7 @@ plugins:
136
136
  show_root_heading: true
137
137
  show_signature_annotations: true
138
138
  show_symbol_type_heading: true
139
+ show_symbol_type_toc: true
139
140
  signature_crossrefs: true
140
141
  import:
141
142
  - https://docs.python.org/3/objects.inv
@@ -56,7 +56,7 @@ dependencies = [
56
56
  docs = [
57
57
  "mkdocs>=1.5.2",
58
58
  "mkdocs-gen-files>=0.5.0",
59
- "mkdocs-material>=9.3.1",
59
+ "mkdocs-material==9.5.22",
60
60
  "mkdocs-section-index>=0.3.6",
61
61
  "mkdocstrings-python>=1.6.3",
62
62
  "mkdocs-literate-nav>=0.6.1"
@@ -83,7 +83,7 @@ tests = [
83
83
  "pytest-sugar>=0.9.6",
84
84
  "pytest-cov>=4.1.0",
85
85
  "pytest-mock>=3.12.0",
86
- "pytest-servers[all]>=0.5.8",
86
+ "pytest-servers[all]>=0.5.9",
87
87
  "pytest-benchmark[histogram]",
88
88
  "pytest-xdist>=3.3.1",
89
89
  "virtualenv",
@@ -96,7 +96,7 @@ tests = [
96
96
  ]
97
97
  dev = [
98
98
  "datachain[docs,tests]",
99
- "mypy==1.14.0",
99
+ "mypy==1.14.1",
100
100
  "types-python-dateutil",
101
101
  "types-pytz",
102
102
  "types-PyYAML",
@@ -108,11 +108,11 @@ examples = [
108
108
  "defusedxml",
109
109
  "accelerate",
110
110
  "unstructured_ingest[embed-huggingface]",
111
- "unstructured[pdf]",
111
+ "unstructured[pdf]<0.16.12",
112
112
  "pdfplumber==0.11.4",
113
113
  "huggingface_hub[hf_transfer]",
114
114
  "onnx==1.16.1",
115
- "ultralytics==8.3.53"
115
+ "ultralytics==8.3.55"
116
116
  ]
117
117
 
118
118
  [project.urls]
@@ -61,14 +61,16 @@ class DataChainCache:
61
61
  tmp_info = odb_fs.join(self.odb.tmp_dir, tmp_fname()) # type: ignore[arg-type]
62
62
  size = file.size
63
63
  if size < 0:
64
- size = await client.get_size(from_path)
64
+ size = await client.get_size(from_path, version_id=file.version)
65
65
  cb = callback or TqdmCallback(
66
66
  tqdm_kwargs={"desc": odb_fs.name(from_path), "bytes": True},
67
67
  tqdm_cls=Tqdm,
68
68
  size=size,
69
69
  )
70
70
  try:
71
- await client.get_file(from_path, tmp_info, callback=cb)
71
+ await client.get_file(
72
+ from_path, tmp_info, callback=cb, version_id=file.version
73
+ )
72
74
  finally:
73
75
  if not callback:
74
76
  cb.close()
@@ -240,7 +240,8 @@ class DatasetRowsFetcher(NodesThreadPool):
240
240
  class NodeGroup:
241
241
  """Class for a group of nodes from the same source"""
242
242
 
243
- listing: "Listing"
243
+ listing: Optional["Listing"]
244
+ client: "Client"
244
245
  sources: list[DataSource]
245
246
 
246
247
  # The source path within the bucket
@@ -268,9 +269,7 @@ class NodeGroup:
268
269
  Download this node group to cache.
269
270
  """
270
271
  if self.sources:
271
- self.listing.client.fetch_nodes(
272
- self.iternodes(recursive), shared_progress_bar=pbar
273
- )
272
+ self.client.fetch_nodes(self.iternodes(recursive), shared_progress_bar=pbar)
274
273
 
275
274
 
276
275
  def check_output_dataset_file(
@@ -375,7 +374,7 @@ def collect_nodes_for_cp(
375
374
 
376
375
  # Collect all sources to process
377
376
  for node_group in node_groups:
378
- listing: Listing = node_group.listing
377
+ listing: Optional[Listing] = node_group.listing
379
378
  valid_sources: list[DataSource] = []
380
379
  for dsrc in node_group.sources:
381
380
  if dsrc.is_single_object():
@@ -383,6 +382,7 @@ def collect_nodes_for_cp(
383
382
  total_files += 1
384
383
  valid_sources.append(dsrc)
385
384
  else:
385
+ assert listing
386
386
  node = dsrc.node
387
387
  if not recursive:
388
388
  print(f"{node.full_path} is a directory (not copied).")
@@ -433,37 +433,51 @@ def instantiate_node_groups(
433
433
  )
434
434
 
435
435
  output_dir = output
436
+ output_file = None
436
437
  if copy_to_filename:
437
438
  output_dir = os.path.dirname(output)
438
439
  if not output_dir:
439
440
  output_dir = "."
441
+ output_file = os.path.basename(output)
440
442
 
441
443
  # Instantiate these nodes
442
444
  for node_group in node_groups:
443
445
  if not node_group.sources:
444
446
  continue
445
- listing: Listing = node_group.listing
447
+ listing: Optional[Listing] = node_group.listing
446
448
  source_path: str = node_group.source_path
447
449
 
448
450
  copy_dir_contents = always_copy_dir_contents or source_path.endswith("/")
449
- instantiated_nodes = listing.collect_nodes_to_instantiate(
450
- node_group.sources,
451
- copy_to_filename,
452
- recursive,
453
- copy_dir_contents,
454
- source_path,
455
- node_group.is_edatachain,
456
- node_group.is_dataset,
457
- )
458
- if not virtual_only:
459
- listing.instantiate_nodes(
460
- instantiated_nodes,
461
- output_dir,
462
- total_files,
463
- force=force,
464
- shared_progress_bar=instantiate_progress_bar,
451
+ if not listing:
452
+ source = node_group.sources[0]
453
+ client = source.client
454
+ node = NodeWithPath(source.node, [output_file or source.node.path])
455
+ instantiated_nodes = [node]
456
+ if not virtual_only:
457
+ node.instantiate(
458
+ client, output_dir, instantiate_progress_bar, force=force
459
+ )
460
+ else:
461
+ instantiated_nodes = listing.collect_nodes_to_instantiate(
462
+ node_group.sources,
463
+ copy_to_filename,
464
+ recursive,
465
+ copy_dir_contents,
466
+ source_path,
467
+ node_group.is_edatachain,
468
+ node_group.is_dataset,
465
469
  )
470
+ if not virtual_only:
471
+ listing.instantiate_nodes(
472
+ instantiated_nodes,
473
+ output_dir,
474
+ total_files,
475
+ force=force,
476
+ shared_progress_bar=instantiate_progress_bar,
477
+ )
478
+
466
479
  node_group.instantiated_nodes = instantiated_nodes
480
+
467
481
  if instantiate_progress_bar:
468
482
  instantiate_progress_bar.close()
469
483
 
@@ -592,7 +606,7 @@ class Catalog:
592
606
  client_config=None,
593
607
  object_name="file",
594
608
  skip_indexing=False,
595
- ) -> tuple["Listing", str]:
609
+ ) -> tuple[Optional["Listing"], "Client", str]:
596
610
  from datachain.lib.dc import DataChain
597
611
  from datachain.listing import Listing
598
612
 
@@ -603,16 +617,19 @@ class Catalog:
603
617
  list_ds_name, list_uri, list_path, _ = get_listing(
604
618
  source, self.session, update=update
605
619
  )
620
+ lst = None
621
+ client = Client.get_client(list_uri, self.cache, **self.client_config)
622
+
623
+ if list_ds_name:
624
+ lst = Listing(
625
+ self.metastore.clone(),
626
+ self.warehouse.clone(),
627
+ client,
628
+ dataset_name=list_ds_name,
629
+ object_name=object_name,
630
+ )
606
631
 
607
- lst = Listing(
608
- self.metastore.clone(),
609
- self.warehouse.clone(),
610
- Client.get_client(list_uri, self.cache, **self.client_config),
611
- dataset_name=list_ds_name,
612
- object_name=object_name,
613
- )
614
-
615
- return lst, list_path
632
+ return lst, client, list_path
616
633
 
617
634
  def _remove_dataset_rows_and_warehouse_info(
618
635
  self, dataset: DatasetRecord, version: int, **kwargs
@@ -635,13 +652,13 @@ class Catalog:
635
652
  ) -> Optional[list["DataSource"]]:
636
653
  enlisted_sources = []
637
654
  for src in sources: # Opt: parallel
638
- listing, file_path = self.enlist_source(
655
+ listing, client, file_path = self.enlist_source(
639
656
  src,
640
657
  update,
641
658
  client_config=client_config or self.client_config,
642
659
  skip_indexing=skip_indexing,
643
660
  )
644
- enlisted_sources.append((listing, file_path))
661
+ enlisted_sources.append((listing, client, file_path))
645
662
 
646
663
  if only_index:
647
664
  # sometimes we don't really need listing result (e.g on indexing process)
@@ -649,10 +666,16 @@ class Catalog:
649
666
  return None
650
667
 
651
668
  dsrc_all: list[DataSource] = []
652
- for listing, file_path in enlisted_sources:
653
- nodes = listing.expand_path(file_path)
654
- dir_only = file_path.endswith("/")
655
- dsrc_all.extend(DataSource(listing, node, dir_only) for node in nodes)
669
+ for listing, client, file_path in enlisted_sources:
670
+ if not listing:
671
+ nodes = [Node.from_file(client.get_file_info(file_path))]
672
+ dir_only = False
673
+ else:
674
+ nodes = listing.expand_path(file_path)
675
+ dir_only = file_path.endswith("/")
676
+ dsrc_all.extend(
677
+ DataSource(listing, client, node, dir_only) for node in nodes
678
+ )
656
679
  return dsrc_all
657
680
 
658
681
  def enlist_sources_grouped(
@@ -667,7 +690,7 @@ class Catalog:
667
690
 
668
691
  def _row_to_node(d: dict[str, Any]) -> Node:
669
692
  del d["file__source"]
670
- return Node.from_dict(d)
693
+ return Node.from_row(d)
671
694
 
672
695
  enlisted_sources: list[tuple[bool, bool, Any]] = []
673
696
  client_config = client_config or self.client_config
@@ -677,7 +700,7 @@ class Catalog:
677
700
  edatachain_data = parse_edatachain_file(src)
678
701
  indexed_sources = []
679
702
  for ds in edatachain_data:
680
- listing, source_path = self.enlist_source(
703
+ listing, _, source_path = self.enlist_source(
681
704
  ds["data-source"]["uri"],
682
705
  update,
683
706
  client_config=client_config,
@@ -701,6 +724,7 @@ class Catalog:
701
724
  client = self.get_client(source, **client_config)
702
725
  uri = client.uri
703
726
  dataset_name, _, _, _ = get_listing(uri, self.session)
727
+ assert dataset_name
704
728
  listing = Listing(
705
729
  self.metastore.clone(),
706
730
  self.warehouse.clone(),
@@ -713,6 +737,7 @@ class Catalog:
713
737
  indexed_sources.append(
714
738
  (
715
739
  listing,
740
+ client,
716
741
  source,
717
742
  [_row_to_node(r) for r in rows],
718
743
  ds_name,
@@ -722,25 +747,28 @@ class Catalog:
722
747
 
723
748
  enlisted_sources.append((False, True, indexed_sources))
724
749
  else:
725
- listing, source_path = self.enlist_source(
750
+ listing, client, source_path = self.enlist_source(
726
751
  src, update, client_config=client_config
727
752
  )
728
- enlisted_sources.append((False, False, (listing, source_path)))
753
+ enlisted_sources.append((False, False, (listing, client, source_path)))
729
754
 
730
755
  node_groups = []
731
756
  for is_datachain, is_dataset, payload in enlisted_sources: # Opt: parallel
732
757
  if is_dataset:
733
758
  for (
734
759
  listing,
760
+ client,
735
761
  source_path,
736
762
  nodes,
737
763
  dataset_name,
738
764
  dataset_version,
739
765
  ) in payload:
740
- dsrc = [DataSource(listing, node) for node in nodes]
766
+ assert listing
767
+ dsrc = [DataSource(listing, client, node) for node in nodes]
741
768
  node_groups.append(
742
769
  NodeGroup(
743
770
  listing,
771
+ client,
744
772
  dsrc,
745
773
  source_path,
746
774
  dataset_name=dataset_name,
@@ -749,18 +777,30 @@ class Catalog:
749
777
  )
750
778
  elif is_datachain:
751
779
  for listing, source_path, paths in payload:
752
- dsrc = [DataSource(listing, listing.resolve_path(p)) for p in paths]
780
+ assert listing
781
+ dsrc = [
782
+ DataSource(listing, listing.client, listing.resolve_path(p))
783
+ for p in paths
784
+ ]
753
785
  node_groups.append(
754
- NodeGroup(listing, dsrc, source_path, is_edatachain=True)
786
+ NodeGroup(
787
+ listing,
788
+ listing.client,
789
+ dsrc,
790
+ source_path,
791
+ is_edatachain=True,
792
+ )
755
793
  )
756
794
  else:
757
- listing, source_path = payload
758
- as_container = source_path.endswith("/")
759
- dsrc = [
760
- DataSource(listing, n, as_container)
761
- for n in listing.expand_path(source_path, use_glob=not no_glob)
762
- ]
763
- node_groups.append(NodeGroup(listing, dsrc, source_path))
795
+ listing, client, source_path = payload
796
+ if not listing:
797
+ nodes = [Node.from_file(client.get_file_info(source_path))]
798
+ as_container = False
799
+ else:
800
+ as_container = source_path.endswith("/")
801
+ nodes = listing.expand_path(source_path, use_glob=not no_glob)
802
+ dsrc = [DataSource(listing, client, n, as_container) for n in nodes]
803
+ node_groups.append(NodeGroup(listing, client, dsrc, source_path))
764
804
 
765
805
  return node_groups
766
806
 
@@ -1196,10 +1236,16 @@ class Catalog:
1196
1236
 
1197
1237
  return q.to_db_records()
1198
1238
 
1199
- def signed_url(self, source: str, path: str, client_config=None) -> str:
1239
+ def signed_url(
1240
+ self,
1241
+ source: str,
1242
+ path: str,
1243
+ version_id: Optional[str] = None,
1244
+ client_config=None,
1245
+ ) -> str:
1200
1246
  client_config = client_config or self.client_config
1201
1247
  client = Client.get_client(source, self.cache, **client_config)
1202
- return client.url(path)
1248
+ return client.url(path, version_id=version_id)
1203
1249
 
1204
1250
  def export_dataset_table(
1205
1251
  self,
@@ -4,21 +4,19 @@ from datachain.node import DirType, NodeWithPath
4
4
 
5
5
 
6
6
  class DataSource:
7
- def __init__(self, listing, node, as_container=False):
7
+ def __init__(self, listing, client, node, as_container=False):
8
8
  self.listing = listing
9
+ self.client = client
9
10
  self.node = node
10
11
  self.as_container = (
11
12
  as_container # Indicates whether a .tar file is handled as a container
12
13
  )
13
14
 
14
- def get_full_path(self):
15
- return self.get_node_full_path(self.node)
16
-
17
15
  def get_node_full_path(self, node):
18
- return self.listing.client.get_full_path(node.full_path)
16
+ return self.client.get_full_path(node.full_path)
19
17
 
20
18
  def get_node_full_path_from_path(self, full_path):
21
- return self.listing.client.get_full_path(full_path)
19
+ return self.client.get_full_path(full_path)
22
20
 
23
21
  def is_single_object(self):
24
22
  return self.node.dir_type == DirType.FILE or (