datachain 0.7.1__tar.gz → 0.7.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (285) hide show
  1. {datachain-0.7.1 → datachain-0.7.3}/.github/workflows/benchmarks.yml +1 -1
  2. {datachain-0.7.1 → datachain-0.7.3}/.github/workflows/release.yml +1 -1
  3. {datachain-0.7.1 → datachain-0.7.3}/.github/workflows/tests-studio.yml +1 -1
  4. {datachain-0.7.1 → datachain-0.7.3}/.github/workflows/tests.yml +3 -3
  5. {datachain-0.7.1 → datachain-0.7.3}/.pre-commit-config.yaml +1 -1
  6. {datachain-0.7.1/src/datachain.egg-info → datachain-0.7.3}/PKG-INFO +2 -2
  7. {datachain-0.7.1 → datachain-0.7.3}/README.rst +1 -1
  8. datachain-0.7.3/docs/references/sql.md +18 -0
  9. {datachain-0.7.1 → datachain-0.7.3}/examples/computer_vision/openimage-detect.py +2 -2
  10. {datachain-0.7.1 → datachain-0.7.3}/examples/get_started/common_sql_functions.py +4 -5
  11. {datachain-0.7.1 → datachain-0.7.3}/examples/multimodal/clip_inference.py +3 -4
  12. {datachain-0.7.1 → datachain-0.7.3}/examples/multimodal/wds.py +1 -1
  13. {datachain-0.7.1 → datachain-0.7.3}/examples/multimodal/wds_filtered.py +6 -10
  14. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/__init__.py +0 -2
  15. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/catalog/catalog.py +12 -9
  16. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/cli.py +109 -9
  17. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/client/fsspec.py +9 -9
  18. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/data_storage/metastore.py +63 -11
  19. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/data_storage/schema.py +2 -2
  20. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/data_storage/sqlite.py +5 -4
  21. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/data_storage/warehouse.py +18 -18
  22. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/dataset.py +142 -14
  23. datachain-0.7.3/src/datachain/func/__init__.py +49 -0
  24. {datachain-0.7.1/src/datachain/lib → datachain-0.7.3/src/datachain}/func/aggregate.py +13 -11
  25. datachain-0.7.3/src/datachain/func/array.py +176 -0
  26. datachain-0.7.3/src/datachain/func/base.py +23 -0
  27. datachain-0.7.3/src/datachain/func/conditional.py +81 -0
  28. datachain-0.7.3/src/datachain/func/func.py +384 -0
  29. datachain-0.7.3/src/datachain/func/path.py +110 -0
  30. datachain-0.7.3/src/datachain/func/random.py +23 -0
  31. datachain-0.7.3/src/datachain/func/string.py +154 -0
  32. datachain-0.7.3/src/datachain/func/window.py +49 -0
  33. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/arrow.py +24 -12
  34. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/data_model.py +25 -9
  35. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/dataset_info.py +9 -5
  36. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/dc.py +94 -56
  37. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/hf.py +1 -1
  38. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/signal_schema.py +1 -1
  39. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/utils.py +1 -0
  40. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/webdataset_laion.py +5 -5
  41. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/model/bbox.py +2 -2
  42. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/model/pose.py +5 -5
  43. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/model/segment.py +2 -2
  44. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/nodes_fetcher.py +2 -2
  45. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/query/dataset.py +57 -34
  46. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/remote/studio.py +40 -8
  47. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/sql/__init__.py +0 -2
  48. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/sql/selectable.py +11 -5
  49. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/sql/sqlite/base.py +11 -2
  50. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/studio.py +29 -0
  51. {datachain-0.7.1 → datachain-0.7.3/src/datachain.egg-info}/PKG-INFO +2 -2
  52. {datachain-0.7.1 → datachain-0.7.3}/src/datachain.egg-info/SOURCES.txt +11 -3
  53. {datachain-0.7.1 → datachain-0.7.3}/tests/func/test_catalog.py +21 -0
  54. {datachain-0.7.1 → datachain-0.7.3}/tests/func/test_datachain.py +37 -6
  55. {datachain-0.7.1 → datachain-0.7.3}/tests/func/test_datasets.py +1 -1
  56. {datachain-0.7.1 → datachain-0.7.3}/tests/func/test_pull.py +1 -1
  57. {datachain-0.7.1 → datachain-0.7.3}/tests/test_cli_studio.py +119 -0
  58. {datachain-0.7.1 → datachain-0.7.3}/tests/test_query_e2e.py +30 -40
  59. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/lib/test_arrow.py +34 -6
  60. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/lib/test_datachain.py +37 -22
  61. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/lib/test_hf.py +2 -2
  62. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/lib/test_sql_to_python.py +0 -3
  63. datachain-0.7.3/tests/unit/sql/sqlite/__init__.py +0 -0
  64. datachain-0.7.3/tests/unit/sql/test_array.py +73 -0
  65. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/sql/test_conditional.py +25 -10
  66. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/sql/test_path.py +10 -9
  67. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/sql/test_random.py +2 -2
  68. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/sql/test_string.py +2 -2
  69. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/test_database_engine.py +15 -4
  70. datachain-0.7.3/tests/unit/test_func.py +256 -0
  71. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/test_session.py +2 -1
  72. datachain-0.7.1/docs/references/sql.md +0 -18
  73. datachain-0.7.1/src/datachain/lib/func/__init__.py +0 -32
  74. datachain-0.7.1/src/datachain/lib/func/func.py +0 -152
  75. datachain-0.7.1/src/datachain/sql/functions/__init__.py +0 -26
  76. datachain-0.7.1/tests/unit/sql/test_array.py +0 -20
  77. {datachain-0.7.1 → datachain-0.7.3}/.cruft.json +0 -0
  78. {datachain-0.7.1 → datachain-0.7.3}/.gitattributes +0 -0
  79. {datachain-0.7.1 → datachain-0.7.3}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  80. {datachain-0.7.1 → datachain-0.7.3}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  81. {datachain-0.7.1 → datachain-0.7.3}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  82. {datachain-0.7.1 → datachain-0.7.3}/.github/codecov.yaml +0 -0
  83. {datachain-0.7.1 → datachain-0.7.3}/.github/dependabot.yml +0 -0
  84. {datachain-0.7.1 → datachain-0.7.3}/.github/workflows/update-template.yaml +0 -0
  85. {datachain-0.7.1 → datachain-0.7.3}/.gitignore +0 -0
  86. {datachain-0.7.1 → datachain-0.7.3}/CODE_OF_CONDUCT.rst +0 -0
  87. {datachain-0.7.1 → datachain-0.7.3}/CONTRIBUTING.rst +0 -0
  88. {datachain-0.7.1 → datachain-0.7.3}/LICENSE +0 -0
  89. {datachain-0.7.1 → datachain-0.7.3}/docs/assets/captioned_cartoons.png +0 -0
  90. {datachain-0.7.1 → datachain-0.7.3}/docs/assets/datachain-white.svg +0 -0
  91. {datachain-0.7.1 → datachain-0.7.3}/docs/assets/datachain.svg +0 -0
  92. {datachain-0.7.1 → datachain-0.7.3}/docs/index.md +0 -0
  93. {datachain-0.7.1 → datachain-0.7.3}/docs/overrides/main.html +0 -0
  94. {datachain-0.7.1 → datachain-0.7.3}/docs/references/datachain.md +0 -0
  95. {datachain-0.7.1 → datachain-0.7.3}/docs/references/datatype.md +0 -0
  96. {datachain-0.7.1 → datachain-0.7.3}/docs/references/file.md +0 -0
  97. {datachain-0.7.1 → datachain-0.7.3}/docs/references/index.md +0 -0
  98. {datachain-0.7.1 → datachain-0.7.3}/docs/references/torch.md +0 -0
  99. {datachain-0.7.1 → datachain-0.7.3}/docs/references/udf.md +0 -0
  100. {datachain-0.7.1 → datachain-0.7.3}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  101. {datachain-0.7.1 → datachain-0.7.3}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  102. {datachain-0.7.1 → datachain-0.7.3}/examples/computer_vision/ultralytics-bbox.py +0 -0
  103. {datachain-0.7.1 → datachain-0.7.3}/examples/computer_vision/ultralytics-pose.py +0 -0
  104. {datachain-0.7.1 → datachain-0.7.3}/examples/computer_vision/ultralytics-segment.py +0 -0
  105. {datachain-0.7.1 → datachain-0.7.3}/examples/get_started/json-csv-reader.py +0 -0
  106. {datachain-0.7.1 → datachain-0.7.3}/examples/get_started/torch-loader.py +0 -0
  107. {datachain-0.7.1 → datachain-0.7.3}/examples/get_started/udfs/parallel.py +0 -0
  108. {datachain-0.7.1 → datachain-0.7.3}/examples/get_started/udfs/simple.py +0 -0
  109. {datachain-0.7.1 → datachain-0.7.3}/examples/get_started/udfs/stateful.py +0 -0
  110. {datachain-0.7.1 → datachain-0.7.3}/examples/llm_and_nlp/claude-query.py +0 -0
  111. {datachain-0.7.1 → datachain-0.7.3}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  112. {datachain-0.7.1 → datachain-0.7.3}/examples/llm_and_nlp/unstructured-embeddings-gen.py +0 -0
  113. {datachain-0.7.1 → datachain-0.7.3}/examples/llm_and_nlp/unstructured-summary-map.py +0 -0
  114. {datachain-0.7.1 → datachain-0.7.3}/examples/multimodal/hf_pipeline.py +0 -0
  115. {datachain-0.7.1 → datachain-0.7.3}/examples/multimodal/openai_image_desc_lib.py +0 -0
  116. {datachain-0.7.1 → datachain-0.7.3}/mkdocs.yml +0 -0
  117. {datachain-0.7.1 → datachain-0.7.3}/noxfile.py +0 -0
  118. {datachain-0.7.1 → datachain-0.7.3}/pyproject.toml +0 -0
  119. {datachain-0.7.1 → datachain-0.7.3}/setup.cfg +0 -0
  120. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/__main__.py +0 -0
  121. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/asyn.py +0 -0
  122. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/cache.py +0 -0
  123. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/catalog/__init__.py +0 -0
  124. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/catalog/datasource.py +0 -0
  125. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/catalog/loader.py +0 -0
  126. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/cli_utils.py +0 -0
  127. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/client/__init__.py +0 -0
  128. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/client/azure.py +0 -0
  129. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/client/fileslice.py +0 -0
  130. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/client/gcs.py +0 -0
  131. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/client/hf.py +0 -0
  132. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/client/local.py +0 -0
  133. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/client/s3.py +0 -0
  134. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/config.py +0 -0
  135. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/data_storage/__init__.py +0 -0
  136. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/data_storage/db_engine.py +0 -0
  137. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/data_storage/id_generator.py +0 -0
  138. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/data_storage/job.py +0 -0
  139. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/data_storage/serializer.py +0 -0
  140. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/error.py +0 -0
  141. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/job.py +0 -0
  142. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/__init__.py +0 -0
  143. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/clip.py +0 -0
  144. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/convert/__init__.py +0 -0
  145. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/convert/flatten.py +0 -0
  146. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/convert/python_to_sql.py +0 -0
  147. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/convert/sql_to_python.py +0 -0
  148. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/convert/unflatten.py +0 -0
  149. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  150. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/file.py +0 -0
  151. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/image.py +0 -0
  152. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/listing.py +0 -0
  153. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/listing_info.py +0 -0
  154. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/meta_formats.py +0 -0
  155. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/model_store.py +0 -0
  156. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/pytorch.py +0 -0
  157. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/settings.py +0 -0
  158. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/tar.py +0 -0
  159. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/text.py +0 -0
  160. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/udf.py +0 -0
  161. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/udf_signature.py +0 -0
  162. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/vfile.py +0 -0
  163. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/lib/webdataset.py +0 -0
  164. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/listing.py +0 -0
  165. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/model/__init__.py +0 -0
  166. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/model/ultralytics/__init__.py +0 -0
  167. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/model/ultralytics/bbox.py +0 -0
  168. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/model/ultralytics/pose.py +0 -0
  169. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/model/ultralytics/segment.py +0 -0
  170. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/node.py +0 -0
  171. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/nodes_thread_pool.py +0 -0
  172. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/progress.py +0 -0
  173. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/py.typed +0 -0
  174. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/query/__init__.py +0 -0
  175. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/query/batch.py +0 -0
  176. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/query/dispatch.py +0 -0
  177. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/query/metrics.py +0 -0
  178. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/query/params.py +0 -0
  179. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/query/queue.py +0 -0
  180. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/query/schema.py +0 -0
  181. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/query/session.py +0 -0
  182. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/remote/__init__.py +0 -0
  183. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/sql/default/__init__.py +0 -0
  184. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/sql/default/base.py +0 -0
  185. {datachain-0.7.1/tests/benchmarks → datachain-0.7.3/src/datachain/sql/functions}/__init__.py +0 -0
  186. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/sql/functions/aggregate.py +0 -0
  187. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/sql/functions/array.py +0 -0
  188. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/sql/functions/conditional.py +0 -0
  189. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/sql/functions/path.py +0 -0
  190. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/sql/functions/random.py +0 -0
  191. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/sql/functions/string.py +0 -0
  192. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/sql/sqlite/__init__.py +0 -0
  193. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/sql/sqlite/types.py +0 -0
  194. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/sql/sqlite/vector.py +0 -0
  195. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/sql/types.py +0 -0
  196. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/sql/utils.py +0 -0
  197. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/telemetry.py +0 -0
  198. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/toolkit/__init__.py +0 -0
  199. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/toolkit/split.py +0 -0
  200. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/torch/__init__.py +0 -0
  201. {datachain-0.7.1 → datachain-0.7.3}/src/datachain/utils.py +0 -0
  202. {datachain-0.7.1 → datachain-0.7.3}/src/datachain.egg-info/dependency_links.txt +0 -0
  203. {datachain-0.7.1 → datachain-0.7.3}/src/datachain.egg-info/entry_points.txt +0 -0
  204. {datachain-0.7.1 → datachain-0.7.3}/src/datachain.egg-info/requires.txt +0 -0
  205. {datachain-0.7.1 → datachain-0.7.3}/src/datachain.egg-info/top_level.txt +0 -0
  206. {datachain-0.7.1 → datachain-0.7.3}/tests/__init__.py +0 -0
  207. {datachain-0.7.1/tests/examples → datachain-0.7.3/tests/benchmarks}/__init__.py +0 -0
  208. {datachain-0.7.1 → datachain-0.7.3}/tests/benchmarks/conftest.py +0 -0
  209. {datachain-0.7.1 → datachain-0.7.3}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  210. {datachain-0.7.1 → datachain-0.7.3}/tests/benchmarks/datasets/.dvc/config +0 -0
  211. {datachain-0.7.1 → datachain-0.7.3}/tests/benchmarks/datasets/.gitignore +0 -0
  212. {datachain-0.7.1 → datachain-0.7.3}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  213. {datachain-0.7.1 → datachain-0.7.3}/tests/benchmarks/test_datachain.py +0 -0
  214. {datachain-0.7.1 → datachain-0.7.3}/tests/benchmarks/test_ls.py +0 -0
  215. {datachain-0.7.1 → datachain-0.7.3}/tests/benchmarks/test_version.py +0 -0
  216. {datachain-0.7.1 → datachain-0.7.3}/tests/conftest.py +0 -0
  217. {datachain-0.7.1 → datachain-0.7.3}/tests/data.py +0 -0
  218. {datachain-0.7.1/tests/func → datachain-0.7.3/tests/examples}/__init__.py +0 -0
  219. {datachain-0.7.1 → datachain-0.7.3}/tests/examples/test_examples.py +0 -0
  220. {datachain-0.7.1 → datachain-0.7.3}/tests/examples/test_wds_e2e.py +0 -0
  221. {datachain-0.7.1 → datachain-0.7.3}/tests/examples/wds_data.py +0 -0
  222. {datachain-0.7.1/tests/unit → datachain-0.7.3/tests/func}/__init__.py +0 -0
  223. {datachain-0.7.1 → datachain-0.7.3}/tests/func/test_client.py +0 -0
  224. {datachain-0.7.1 → datachain-0.7.3}/tests/func/test_dataset_query.py +0 -0
  225. {datachain-0.7.1 → datachain-0.7.3}/tests/func/test_feature_pickling.py +0 -0
  226. {datachain-0.7.1 → datachain-0.7.3}/tests/func/test_listing.py +0 -0
  227. {datachain-0.7.1 → datachain-0.7.3}/tests/func/test_ls.py +0 -0
  228. {datachain-0.7.1 → datachain-0.7.3}/tests/func/test_meta_formats.py +0 -0
  229. {datachain-0.7.1 → datachain-0.7.3}/tests/func/test_metrics.py +0 -0
  230. {datachain-0.7.1 → datachain-0.7.3}/tests/func/test_pytorch.py +0 -0
  231. {datachain-0.7.1 → datachain-0.7.3}/tests/func/test_query.py +0 -0
  232. {datachain-0.7.1 → datachain-0.7.3}/tests/func/test_toolkit.py +0 -0
  233. {datachain-0.7.1 → datachain-0.7.3}/tests/scripts/feature_class.py +0 -0
  234. {datachain-0.7.1 → datachain-0.7.3}/tests/scripts/feature_class_exception.py +0 -0
  235. {datachain-0.7.1 → datachain-0.7.3}/tests/scripts/feature_class_parallel.py +0 -0
  236. {datachain-0.7.1 → datachain-0.7.3}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  237. {datachain-0.7.1 → datachain-0.7.3}/tests/scripts/name_len_slow.py +0 -0
  238. {datachain-0.7.1 → datachain-0.7.3}/tests/test_atomicity.py +0 -0
  239. {datachain-0.7.1 → datachain-0.7.3}/tests/test_cli_e2e.py +0 -0
  240. {datachain-0.7.1 → datachain-0.7.3}/tests/test_telemetry.py +0 -0
  241. {datachain-0.7.1/tests/unit/lib → datachain-0.7.3/tests/unit}/__init__.py +0 -0
  242. {datachain-0.7.1/tests/unit/sql → datachain-0.7.3/tests/unit/lib}/__init__.py +0 -0
  243. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/lib/conftest.py +0 -0
  244. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/lib/test_clip.py +0 -0
  245. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  246. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/lib/test_datachain_merge.py +0 -0
  247. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/lib/test_feature.py +0 -0
  248. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/lib/test_feature_utils.py +0 -0
  249. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/lib/test_file.py +0 -0
  250. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/lib/test_image.py +0 -0
  251. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/lib/test_listing_info.py +0 -0
  252. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/lib/test_models.py +0 -0
  253. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/lib/test_schema.py +0 -0
  254. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/lib/test_signal_schema.py +0 -0
  255. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/lib/test_text.py +0 -0
  256. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/lib/test_udf_signature.py +0 -0
  257. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/lib/test_utils.py +0 -0
  258. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/lib/test_webdataset.py +0 -0
  259. {datachain-0.7.1/tests/unit/sql/sqlite → datachain-0.7.3/tests/unit/sql}/__init__.py +0 -0
  260. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/sql/sqlite/test_types.py +0 -0
  261. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/sql/sqlite/test_utils.py +0 -0
  262. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/sql/test_selectable.py +0 -0
  263. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/test_asyn.py +0 -0
  264. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/test_cache.py +0 -0
  265. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/test_catalog.py +0 -0
  266. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/test_catalog_loader.py +0 -0
  267. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/test_cli_parsing.py +0 -0
  268. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/test_client.py +0 -0
  269. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/test_client_s3.py +0 -0
  270. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/test_config.py +0 -0
  271. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/test_data_storage.py +0 -0
  272. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/test_dataset.py +0 -0
  273. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/test_dispatch.py +0 -0
  274. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/test_fileslice.py +0 -0
  275. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/test_id_generator.py +0 -0
  276. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/test_listing.py +0 -0
  277. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/test_metastore.py +0 -0
  278. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/test_module_exports.py +0 -0
  279. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/test_query.py +0 -0
  280. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/test_query_metrics.py +0 -0
  281. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/test_query_params.py +0 -0
  282. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/test_serializer.py +0 -0
  283. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/test_utils.py +0 -0
  284. {datachain-0.7.1 → datachain-0.7.3}/tests/unit/test_warehouse.py +0 -0
  285. {datachain-0.7.1 → datachain-0.7.3}/tests/utils.py +0 -0
@@ -25,7 +25,7 @@ jobs:
25
25
  python-version: '3.12'
26
26
 
27
27
  - name: Setup uv
28
- uses: astral-sh/setup-uv@v3
28
+ uses: astral-sh/setup-uv@v4
29
29
  with:
30
30
  enable-cache: true
31
31
  cache-suffix: benchmarks
@@ -27,7 +27,7 @@ jobs:
27
27
  python-version: '3.12'
28
28
 
29
29
  - name: Setup uv
30
- uses: astral-sh/setup-uv@v3
30
+ uses: astral-sh/setup-uv@v4
31
31
 
32
32
  - name: Install nox
33
33
  run: uv pip install nox --system
@@ -81,7 +81,7 @@ jobs:
81
81
  python-version: ${{ matrix.pyv }}
82
82
 
83
83
  - name: Setup uv
84
- uses: astral-sh/setup-uv@v3
84
+ uses: astral-sh/setup-uv@v4
85
85
  with:
86
86
  enable-cache: true
87
87
  cache-suffix: studio
@@ -28,7 +28,7 @@ jobs:
28
28
  python-version: '3.9'
29
29
 
30
30
  - name: Setup uv
31
- uses: astral-sh/setup-uv@v3
31
+ uses: astral-sh/setup-uv@v4
32
32
  with:
33
33
  enable-cache: true
34
34
  cache-suffix: lint
@@ -82,7 +82,7 @@ jobs:
82
82
  python-version: ${{ matrix.pyv }}
83
83
 
84
84
  - name: Setup uv
85
- uses: astral-sh/setup-uv@v3
85
+ uses: astral-sh/setup-uv@v4
86
86
  with:
87
87
  enable-cache: true
88
88
  cache-suffix: tests-${{ matrix.pyv }}
@@ -142,7 +142,7 @@ jobs:
142
142
  python-version: ${{ matrix.pyv }}
143
143
 
144
144
  - name: Setup uv
145
- uses: astral-sh/setup-uv@v3
145
+ uses: astral-sh/setup-uv@v4
146
146
  with:
147
147
  enable-cache: true
148
148
  cache-suffix: examples-${{ matrix.pyv }}
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.7.4'
27
+ rev: 'v0.8.0'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.7.1
3
+ Version: 0.7.3
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -139,7 +139,7 @@ Key Features
139
139
  ============
140
140
 
141
141
  📂 **Multimodal Dataset Versioning.**
142
- - Version unstructured data without redundant data copies, by supporitng
142
+ - Version unstructured data without redundant data copies, by supporting
143
143
  references to S3, GCP, Azure, and local file systems.
144
144
  - Multimodal data support: images, video, text, PDFs, JSONs, CSVs, parquet, etc.
145
145
  - Unite files and metadata together into persistent, versioned, columnar datasets.
@@ -37,7 +37,7 @@ Key Features
37
37
  ============
38
38
 
39
39
  📂 **Multimodal Dataset Versioning.**
40
- - Version unstructured data without redundant data copies, by supporitng
40
+ - Version unstructured data without redundant data copies, by supporting
41
41
  references to S3, GCP, Azure, and local file systems.
42
42
  - Multimodal data support: images, video, text, PDFs, JSONs, CSVs, parquet, etc.
43
43
  - Unite files and metadata together into persistent, versioned, columnar datasets.
@@ -0,0 +1,18 @@
1
+ # SQL
2
+
3
+ Use SQL functions to operate on the underlying database storing the chain data. Useful
4
+ for operations like [`DataChain.filter`](datachain.md#datachain.lib.dc.DataChain.filter)
5
+ and [`DataChain.mutate`](datachain.md#datachain.lib.dc.DataChain.mutate). Import
6
+ these functions from `datachain.sql.functions`.
7
+
8
+ ::: datachain.func.avg
9
+ ::: datachain.func.count
10
+ ::: datachain.func.greatest
11
+ ::: datachain.func.least
12
+ ::: datachain.func.max
13
+ ::: datachain.func.min
14
+ ::: datachain.func.rand
15
+ ::: datachain.func.sum
16
+ ::: datachain.func.array
17
+ ::: datachain.func.path
18
+ ::: datachain.func.string
@@ -3,7 +3,7 @@ import json
3
3
  from PIL import Image
4
4
 
5
5
  from datachain import C, DataChain, File, model
6
- from datachain.sql.functions import path
6
+ from datachain.func import path
7
7
 
8
8
 
9
9
  def openimage_detect(args):
@@ -48,7 +48,7 @@ source = "gs://datachain-demo/openimages-v6-test-jsonpairs/"
48
48
  .filter(C("file.path").glob("*.jpg") | C("file.path").glob("*.json"))
49
49
  .agg(
50
50
  openimage_detect,
51
- partition_by=path.file_stem(C("file.path")),
51
+ partition_by=path.file_stem("file.path"),
52
52
  params=["file"],
53
53
  output={"file": File, "bbox": model.BBox},
54
54
  )
@@ -1,6 +1,5 @@
1
1
  from datachain import C, DataChain
2
- from datachain.sql import literal
3
- from datachain.sql.functions import array, greatest, least, path, string
2
+ from datachain.func import array, greatest, least, path, string
4
3
 
5
4
 
6
5
  def num_chars_udf(file):
@@ -18,7 +17,7 @@ dc.map(num_chars_udf, params=["file"], output={"num_chars": list[str]}).select(
18
17
  (
19
18
  dc.mutate(
20
19
  length=string.length(path.name(C("file.path"))),
21
- parts=string.split(path.name(C("file.path")), literal(".")),
20
+ parts=string.split(path.name(C("file.path")), "."),
22
21
  )
23
22
  .select("file.path", "length", "parts")
24
23
  .show(5)
@@ -35,8 +34,8 @@ dc.map(num_chars_udf, params=["file"], output={"num_chars": list[str]}).select(
35
34
 
36
35
 
37
36
  chain = dc.mutate(
38
- a=array.length(string.split(C("file.path"), literal("/"))),
39
- b=array.length(string.split(path.name(C("file.path")), literal("0"))),
37
+ a=array.length(string.split("file.path", "/")),
38
+ b=array.length(string.split(path.name("file.path"), "0")),
40
39
  )
41
40
 
42
41
  (
@@ -3,8 +3,7 @@ import torch
3
3
  from torch.nn.functional import cosine_similarity
4
4
  from torch.utils.data import DataLoader
5
5
 
6
- from datachain import C, DataChain
7
- from datachain.sql.functions import path
6
+ from datachain import C, DataChain, func
8
7
 
9
8
  source = "gs://datachain-demo/50k-laion-files/000000/00000000*"
10
9
 
@@ -18,8 +17,8 @@ def create_dataset():
18
17
  )
19
18
  return imgs.merge(
20
19
  captions,
21
- on=path.file_stem(imgs.c("file.path")),
22
- right_on=path.file_stem(captions.c("file.path")),
20
+ on=func.path.file_stem(imgs.c("file.path")),
21
+ right_on=func.path.file_stem(captions.c("file.path")),
23
22
  )
24
23
 
25
24
 
@@ -1,9 +1,9 @@
1
1
  import os
2
2
 
3
3
  from datachain import DataChain
4
+ from datachain.func import path
4
5
  from datachain.lib.webdataset import process_webdataset
5
6
  from datachain.lib.webdataset_laion import WDSLaion, process_laion_meta
6
- from datachain.sql.functions import path
7
7
 
8
8
  IMAGE_TARS = os.getenv(
9
9
  "IMAGE_TARS", "gs://datachain-demo/datacomp-small/shards/000000[0-5]*.tar"
@@ -1,9 +1,7 @@
1
1
  import datachain.error
2
- from datachain import C, DataChain
2
+ from datachain import C, DataChain, func
3
3
  from datachain.lib.webdataset import process_webdataset
4
4
  from datachain.lib.webdataset_laion import WDSLaion
5
- from datachain.sql import literal
6
- from datachain.sql.functions import array, greatest, least, string
7
5
 
8
6
  name = "wds"
9
7
  try:
@@ -20,14 +18,12 @@ except datachain.error.DatasetNotFoundError:
20
18
  wds.print_schema()
21
19
 
22
20
  filtered = (
23
- wds.filter(string.length(C("laion.txt")) > 5)
24
- .filter(array.length(string.split(C("laion.txt"), literal(" "))) > 2)
21
+ wds.filter(func.string.length("laion.txt") > 5)
22
+ .filter(func.array.length(func.string.split("laion.txt", " ")) > 2)
23
+ .filter(func.least("laion.json.original_width", "laion.json.original_height") > 200)
25
24
  .filter(
26
- least(C("laion.json.original_width"), C("laion.json.original_height")) > 200
27
- )
28
- .filter(
29
- greatest(C("laion.json.original_width"), C("laion.json.original_height"))
30
- / least(C("laion.json.original_width"), C("laion.json.original_height"))
25
+ func.greatest("laion.json.original_width", "laion.json.original_height")
26
+ / func.least("laion.json.original_width", "laion.json.original_height")
31
27
  < 3.0
32
28
  )
33
29
  .save()
@@ -1,4 +1,3 @@
1
- from datachain.lib import func
2
1
  from datachain.lib.data_model import DataModel, DataType, is_chain_type
3
2
  from datachain.lib.dc import C, Column, DataChain, Sys
4
3
  from datachain.lib.file import (
@@ -35,7 +34,6 @@ __all__ = [
35
34
  "Sys",
36
35
  "TarVFile",
37
36
  "TextFile",
38
- "func",
39
37
  "is_chain_type",
40
38
  "metrics",
41
39
  "param",
@@ -38,6 +38,7 @@ from datachain.dataset import (
38
38
  DATASET_PREFIX,
39
39
  QUERY_DATASET_PREFIX,
40
40
  DatasetDependency,
41
+ DatasetListRecord,
41
42
  DatasetRecord,
42
43
  DatasetStats,
43
44
  DatasetStatus,
@@ -54,7 +55,6 @@ from datachain.error import (
54
55
  QueryScriptCancelError,
55
56
  QueryScriptRunError,
56
57
  )
57
- from datachain.listing import Listing
58
58
  from datachain.node import DirType, Node, NodeWithPath
59
59
  from datachain.nodes_thread_pool import NodesThreadPool
60
60
  from datachain.remote.studio import StudioClient
@@ -73,9 +73,10 @@ if TYPE_CHECKING:
73
73
  AbstractMetastore,
74
74
  AbstractWarehouse,
75
75
  )
76
- from datachain.dataset import DatasetVersion
76
+ from datachain.dataset import DatasetListVersion
77
77
  from datachain.job import Job
78
78
  from datachain.lib.file import File
79
+ from datachain.listing import Listing
79
80
 
80
81
  logger = logging.getLogger("datachain")
81
82
 
@@ -236,7 +237,7 @@ class DatasetRowsFetcher(NodesThreadPool):
236
237
  class NodeGroup:
237
238
  """Class for a group of nodes from the same source"""
238
239
 
239
- listing: Listing
240
+ listing: "Listing"
240
241
  sources: list[DataSource]
241
242
 
242
243
  # The source path within the bucket
@@ -591,8 +592,9 @@ class Catalog:
591
592
  client_config=None,
592
593
  object_name="file",
593
594
  skip_indexing=False,
594
- ) -> tuple[Listing, str]:
595
+ ) -> tuple["Listing", str]:
595
596
  from datachain.lib.dc import DataChain
597
+ from datachain.listing import Listing
596
598
 
597
599
  DataChain.from_storage(
598
600
  source, session=self.session, update=update, object_name=object_name
@@ -660,7 +662,8 @@ class Catalog:
660
662
  no_glob: bool = False,
661
663
  client_config=None,
662
664
  ) -> list[NodeGroup]:
663
- from datachain.query import DatasetQuery
665
+ from datachain.listing import Listing
666
+ from datachain.query.dataset import DatasetQuery
664
667
 
665
668
  def _row_to_node(d: dict[str, Any]) -> Node:
666
669
  del d["file__source"]
@@ -876,7 +879,7 @@ class Catalog:
876
879
  def update_dataset_version_with_warehouse_info(
877
880
  self, dataset: DatasetRecord, version: int, rows_dropped=False, **kwargs
878
881
  ) -> None:
879
- from datachain.query import DatasetQuery
882
+ from datachain.query.dataset import DatasetQuery
880
883
 
881
884
  dataset_version = dataset.get_version(version)
882
885
 
@@ -1133,7 +1136,7 @@ class Catalog:
1133
1136
 
1134
1137
  return direct_dependencies
1135
1138
 
1136
- def ls_datasets(self, include_listing: bool = False) -> Iterator[DatasetRecord]:
1139
+ def ls_datasets(self, include_listing: bool = False) -> Iterator[DatasetListRecord]:
1137
1140
  datasets = self.metastore.list_datasets()
1138
1141
  for d in datasets:
1139
1142
  if not d.is_bucket_listing or include_listing:
@@ -1142,7 +1145,7 @@ class Catalog:
1142
1145
  def list_datasets_versions(
1143
1146
  self,
1144
1147
  include_listing: bool = False,
1145
- ) -> Iterator[tuple[DatasetRecord, "DatasetVersion", Optional["Job"]]]:
1148
+ ) -> Iterator[tuple[DatasetListRecord, "DatasetListVersion", Optional["Job"]]]:
1146
1149
  """Iterate over all dataset versions with related jobs."""
1147
1150
  datasets = list(self.ls_datasets(include_listing=include_listing))
1148
1151
 
@@ -1177,7 +1180,7 @@ class Catalog:
1177
1180
  def ls_dataset_rows(
1178
1181
  self, name: str, version: int, offset=None, limit=None
1179
1182
  ) -> list[dict]:
1180
- from datachain.query import DatasetQuery
1183
+ from datachain.query.dataset import DatasetQuery
1181
1184
 
1182
1185
  dataset = self.get_dataset(name)
1183
1186
 
@@ -18,7 +18,12 @@ from datachain.cli_utils import BooleanOptionalAction, CommaSeparatedArgs, KeyVa
18
18
  from datachain.config import Config
19
19
  from datachain.error import DataChainError
20
20
  from datachain.lib.dc import DataChain
21
- from datachain.studio import list_datasets, process_studio_cli_args
21
+ from datachain.studio import (
22
+ edit_studio_dataset,
23
+ list_datasets,
24
+ process_studio_cli_args,
25
+ remove_studio_dataset,
26
+ )
22
27
  from datachain.telemetry import telemetry
23
28
 
24
29
  if TYPE_CHECKING:
@@ -403,21 +408,44 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
403
408
  parse_edit_dataset.add_argument(
404
409
  "--new-name",
405
410
  action="store",
406
- default="",
407
411
  help="Dataset new name",
408
412
  )
409
413
  parse_edit_dataset.add_argument(
410
414
  "--description",
411
415
  action="store",
412
- default="",
413
416
  help="Dataset description",
414
417
  )
415
418
  parse_edit_dataset.add_argument(
416
419
  "--labels",
417
- default=[],
418
420
  nargs="+",
419
421
  help="Dataset labels",
420
422
  )
423
+ parse_edit_dataset.add_argument(
424
+ "--studio",
425
+ action="store_true",
426
+ default=False,
427
+ help="Edit dataset from Studio",
428
+ )
429
+ parse_edit_dataset.add_argument(
430
+ "-L",
431
+ "--local",
432
+ action="store_true",
433
+ default=False,
434
+ help="Edit local dataset only",
435
+ )
436
+ parse_edit_dataset.add_argument(
437
+ "-a",
438
+ "--all",
439
+ action="store_true",
440
+ default=True,
441
+ help="Edit both datasets from studio and local",
442
+ )
443
+ parse_edit_dataset.add_argument(
444
+ "--team",
445
+ action="store",
446
+ default=None,
447
+ help="The team to edit a dataset. By default, it will use team from config.",
448
+ )
421
449
 
422
450
  datasets_parser = subp.add_parser(
423
451
  "datasets", parents=[parent_parser], description="List datasets"
@@ -466,6 +494,32 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
466
494
  action=BooleanOptionalAction,
467
495
  help="Force delete registered dataset with all of it's versions",
468
496
  )
497
+ rm_dataset_parser.add_argument(
498
+ "--studio",
499
+ action="store_true",
500
+ default=False,
501
+ help="Remove dataset from Studio",
502
+ )
503
+ rm_dataset_parser.add_argument(
504
+ "-L",
505
+ "--local",
506
+ action="store_true",
507
+ default=False,
508
+ help="Remove local datasets only",
509
+ )
510
+ rm_dataset_parser.add_argument(
511
+ "-a",
512
+ "--all",
513
+ action="store_true",
514
+ default=True,
515
+ help="Remove both local and studio",
516
+ )
517
+ rm_dataset_parser.add_argument(
518
+ "--team",
519
+ action="store",
520
+ default=None,
521
+ help="The team to delete a dataset. By default, it will use team from config.",
522
+ )
469
523
 
470
524
  dataset_stats_parser = subp.add_parser(
471
525
  "dataset-stats",
@@ -909,8 +963,40 @@ def rm_dataset(
909
963
  name: str,
910
964
  version: Optional[int] = None,
911
965
  force: Optional[bool] = False,
966
+ studio: bool = False,
967
+ local: bool = False,
968
+ all: bool = True,
969
+ team: Optional[str] = None,
970
+ ):
971
+ token = Config().read().get("studio", {}).get("token")
972
+ all, local, studio = _determine_flavors(studio, local, all, token)
973
+
974
+ if all or local:
975
+ catalog.remove_dataset(name, version=version, force=force)
976
+
977
+ if (all or studio) and token:
978
+ remove_studio_dataset(team, name, version, force)
979
+
980
+
981
+ def edit_dataset(
982
+ catalog: "Catalog",
983
+ name: str,
984
+ new_name: Optional[str] = None,
985
+ description: Optional[str] = None,
986
+ labels: Optional[list[str]] = None,
987
+ studio: bool = False,
988
+ local: bool = False,
989
+ all: bool = True,
990
+ team: Optional[str] = None,
912
991
  ):
913
- catalog.remove_dataset(name, version=version, force=force)
992
+ token = Config().read().get("studio", {}).get("token")
993
+ all, local, studio = _determine_flavors(studio, local, all, token)
994
+
995
+ if all or local:
996
+ catalog.edit_dataset(name, new_name, description, labels)
997
+
998
+ if (all or studio) and token:
999
+ edit_studio_dataset(team, name, new_name, description, labels)
914
1000
 
915
1001
 
916
1002
  def dataset_stats(
@@ -957,7 +1043,7 @@ def show(
957
1043
  schema: bool = False,
958
1044
  ) -> None:
959
1045
  from datachain.lib.dc import DataChain
960
- from datachain.query import DatasetQuery
1046
+ from datachain.query.dataset import DatasetQuery
961
1047
  from datachain.utils import show_records
962
1048
 
963
1049
  dataset = catalog.get_dataset(name)
@@ -1127,11 +1213,16 @@ def main(argv: Optional[list[str]] = None) -> int: # noqa: C901, PLR0912, PLR09
1127
1213
  edatachain_file=args.edatachain_file,
1128
1214
  )
1129
1215
  elif args.command == "edit-dataset":
1130
- catalog.edit_dataset(
1216
+ edit_dataset(
1217
+ catalog,
1131
1218
  args.name,
1132
- description=args.description,
1133
1219
  new_name=args.new_name,
1220
+ description=args.description,
1134
1221
  labels=args.labels,
1222
+ studio=args.studio,
1223
+ local=args.local,
1224
+ all=args.all,
1225
+ team=args.team,
1135
1226
  )
1136
1227
  elif args.command == "ls":
1137
1228
  ls(
@@ -1164,7 +1255,16 @@ def main(argv: Optional[list[str]] = None) -> int: # noqa: C901, PLR0912, PLR09
1164
1255
  schema=args.schema,
1165
1256
  )
1166
1257
  elif args.command == "rm-dataset":
1167
- rm_dataset(catalog, args.name, version=args.version, force=args.force)
1258
+ rm_dataset(
1259
+ catalog,
1260
+ args.name,
1261
+ version=args.version,
1262
+ force=args.force,
1263
+ studio=args.studio,
1264
+ local=args.local,
1265
+ all=args.all,
1266
+ team=args.team,
1267
+ )
1168
1268
  elif args.command == "dataset-stats":
1169
1269
  dataset_stats(
1170
1270
  catalog,
@@ -28,7 +28,6 @@ from tqdm import tqdm
28
28
  from datachain.cache import DataChainCache
29
29
  from datachain.client.fileslice import FileWrapper
30
30
  from datachain.error import ClientError as DataChainClientError
31
- from datachain.lib.file import File
32
31
  from datachain.nodes_fetcher import NodesFetcher
33
32
  from datachain.nodes_thread_pool import NodeChunk
34
33
 
@@ -36,6 +35,7 @@ if TYPE_CHECKING:
36
35
  from fsspec.spec import AbstractFileSystem
37
36
 
38
37
  from datachain.dataset import StorageURI
38
+ from datachain.lib.file import File
39
39
 
40
40
 
41
41
  logger = logging.getLogger("datachain")
@@ -45,7 +45,7 @@ DELIMITER = "/" # Path delimiter.
45
45
 
46
46
  DATA_SOURCE_URI_PATTERN = re.compile(r"^[\w]+:\/\/.*$")
47
47
 
48
- ResultQueue = asyncio.Queue[Optional[Sequence[File]]]
48
+ ResultQueue = asyncio.Queue[Optional[Sequence["File"]]]
49
49
 
50
50
 
51
51
  def _is_win_local_path(uri: str) -> bool:
@@ -212,7 +212,7 @@ class Client(ABC):
212
212
 
213
213
  async def scandir(
214
214
  self, start_prefix: str, method: str = "default"
215
- ) -> AsyncIterator[Sequence[File]]:
215
+ ) -> AsyncIterator[Sequence["File"]]:
216
216
  try:
217
217
  impl = getattr(self, f"_fetch_{method}")
218
218
  except AttributeError:
@@ -317,7 +317,7 @@ class Client(ABC):
317
317
  return f"{self.PREFIX}{self.name}/{rel_path}"
318
318
 
319
319
  @abstractmethod
320
- def info_to_file(self, v: dict[str, Any], parent: str) -> File: ...
320
+ def info_to_file(self, v: dict[str, Any], parent: str) -> "File": ...
321
321
 
322
322
  def fetch_nodes(
323
323
  self,
@@ -354,7 +354,7 @@ class Client(ABC):
354
354
  copy2(src, dst)
355
355
 
356
356
  def open_object(
357
- self, file: File, use_cache: bool = True, cb: Callback = DEFAULT_CALLBACK
357
+ self, file: "File", use_cache: bool = True, cb: Callback = DEFAULT_CALLBACK
358
358
  ) -> BinaryIO:
359
359
  """Open a file, including files in tar archives."""
360
360
  if use_cache and (cache_path := self.cache.get_path(file)):
@@ -362,19 +362,19 @@ class Client(ABC):
362
362
  assert not file.location
363
363
  return FileWrapper(self.fs.open(self.get_full_path(file.path)), cb) # type: ignore[return-value]
364
364
 
365
- def download(self, file: File, *, callback: Callback = DEFAULT_CALLBACK) -> None:
365
+ def download(self, file: "File", *, callback: Callback = DEFAULT_CALLBACK) -> None:
366
366
  sync(get_loop(), functools.partial(self._download, file, callback=callback))
367
367
 
368
- async def _download(self, file: File, *, callback: "Callback" = None) -> None:
368
+ async def _download(self, file: "File", *, callback: "Callback" = None) -> None:
369
369
  if self.cache.contains(file):
370
370
  # Already in cache, so there's nothing to do.
371
371
  return
372
372
  await self._put_in_cache(file, callback=callback)
373
373
 
374
- def put_in_cache(self, file: File, *, callback: "Callback" = None) -> None:
374
+ def put_in_cache(self, file: "File", *, callback: "Callback" = None) -> None:
375
375
  sync(get_loop(), functools.partial(self._put_in_cache, file, callback=callback))
376
376
 
377
- async def _put_in_cache(self, file: File, *, callback: "Callback" = None) -> None:
377
+ async def _put_in_cache(self, file: "File", *, callback: "Callback" = None) -> None:
378
378
  assert not file.location
379
379
  if file.etag:
380
380
  etag = await self.get_current_etag(file)