datachain 0.7.2__tar.gz → 0.7.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (280) hide show
  1. {datachain-0.7.2/src/datachain.egg-info → datachain-0.7.3}/PKG-INFO +2 -2
  2. {datachain-0.7.2 → datachain-0.7.3}/README.rst +1 -1
  3. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/catalog/catalog.py +4 -3
  4. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/cli.py +108 -8
  5. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/data_storage/metastore.py +63 -11
  6. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/dataset.py +142 -14
  7. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/dataset_info.py +7 -3
  8. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/remote/studio.py +40 -8
  9. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/studio.py +29 -0
  10. {datachain-0.7.2 → datachain-0.7.3/src/datachain.egg-info}/PKG-INFO +2 -2
  11. {datachain-0.7.2 → datachain-0.7.3}/tests/func/test_catalog.py +21 -0
  12. {datachain-0.7.2 → datachain-0.7.3}/tests/test_cli_studio.py +119 -0
  13. {datachain-0.7.2 → datachain-0.7.3}/.cruft.json +0 -0
  14. {datachain-0.7.2 → datachain-0.7.3}/.gitattributes +0 -0
  15. {datachain-0.7.2 → datachain-0.7.3}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  16. {datachain-0.7.2 → datachain-0.7.3}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  17. {datachain-0.7.2 → datachain-0.7.3}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  18. {datachain-0.7.2 → datachain-0.7.3}/.github/codecov.yaml +0 -0
  19. {datachain-0.7.2 → datachain-0.7.3}/.github/dependabot.yml +0 -0
  20. {datachain-0.7.2 → datachain-0.7.3}/.github/workflows/benchmarks.yml +0 -0
  21. {datachain-0.7.2 → datachain-0.7.3}/.github/workflows/release.yml +0 -0
  22. {datachain-0.7.2 → datachain-0.7.3}/.github/workflows/tests-studio.yml +0 -0
  23. {datachain-0.7.2 → datachain-0.7.3}/.github/workflows/tests.yml +0 -0
  24. {datachain-0.7.2 → datachain-0.7.3}/.github/workflows/update-template.yaml +0 -0
  25. {datachain-0.7.2 → datachain-0.7.3}/.gitignore +0 -0
  26. {datachain-0.7.2 → datachain-0.7.3}/.pre-commit-config.yaml +0 -0
  27. {datachain-0.7.2 → datachain-0.7.3}/CODE_OF_CONDUCT.rst +0 -0
  28. {datachain-0.7.2 → datachain-0.7.3}/CONTRIBUTING.rst +0 -0
  29. {datachain-0.7.2 → datachain-0.7.3}/LICENSE +0 -0
  30. {datachain-0.7.2 → datachain-0.7.3}/docs/assets/captioned_cartoons.png +0 -0
  31. {datachain-0.7.2 → datachain-0.7.3}/docs/assets/datachain-white.svg +0 -0
  32. {datachain-0.7.2 → datachain-0.7.3}/docs/assets/datachain.svg +0 -0
  33. {datachain-0.7.2 → datachain-0.7.3}/docs/index.md +0 -0
  34. {datachain-0.7.2 → datachain-0.7.3}/docs/overrides/main.html +0 -0
  35. {datachain-0.7.2 → datachain-0.7.3}/docs/references/datachain.md +0 -0
  36. {datachain-0.7.2 → datachain-0.7.3}/docs/references/datatype.md +0 -0
  37. {datachain-0.7.2 → datachain-0.7.3}/docs/references/file.md +0 -0
  38. {datachain-0.7.2 → datachain-0.7.3}/docs/references/index.md +0 -0
  39. {datachain-0.7.2 → datachain-0.7.3}/docs/references/sql.md +0 -0
  40. {datachain-0.7.2 → datachain-0.7.3}/docs/references/torch.md +0 -0
  41. {datachain-0.7.2 → datachain-0.7.3}/docs/references/udf.md +0 -0
  42. {datachain-0.7.2 → datachain-0.7.3}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  43. {datachain-0.7.2 → datachain-0.7.3}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  44. {datachain-0.7.2 → datachain-0.7.3}/examples/computer_vision/openimage-detect.py +0 -0
  45. {datachain-0.7.2 → datachain-0.7.3}/examples/computer_vision/ultralytics-bbox.py +0 -0
  46. {datachain-0.7.2 → datachain-0.7.3}/examples/computer_vision/ultralytics-pose.py +0 -0
  47. {datachain-0.7.2 → datachain-0.7.3}/examples/computer_vision/ultralytics-segment.py +0 -0
  48. {datachain-0.7.2 → datachain-0.7.3}/examples/get_started/common_sql_functions.py +0 -0
  49. {datachain-0.7.2 → datachain-0.7.3}/examples/get_started/json-csv-reader.py +0 -0
  50. {datachain-0.7.2 → datachain-0.7.3}/examples/get_started/torch-loader.py +0 -0
  51. {datachain-0.7.2 → datachain-0.7.3}/examples/get_started/udfs/parallel.py +0 -0
  52. {datachain-0.7.2 → datachain-0.7.3}/examples/get_started/udfs/simple.py +0 -0
  53. {datachain-0.7.2 → datachain-0.7.3}/examples/get_started/udfs/stateful.py +0 -0
  54. {datachain-0.7.2 → datachain-0.7.3}/examples/llm_and_nlp/claude-query.py +0 -0
  55. {datachain-0.7.2 → datachain-0.7.3}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  56. {datachain-0.7.2 → datachain-0.7.3}/examples/llm_and_nlp/unstructured-embeddings-gen.py +0 -0
  57. {datachain-0.7.2 → datachain-0.7.3}/examples/llm_and_nlp/unstructured-summary-map.py +0 -0
  58. {datachain-0.7.2 → datachain-0.7.3}/examples/multimodal/clip_inference.py +0 -0
  59. {datachain-0.7.2 → datachain-0.7.3}/examples/multimodal/hf_pipeline.py +0 -0
  60. {datachain-0.7.2 → datachain-0.7.3}/examples/multimodal/openai_image_desc_lib.py +0 -0
  61. {datachain-0.7.2 → datachain-0.7.3}/examples/multimodal/wds.py +0 -0
  62. {datachain-0.7.2 → datachain-0.7.3}/examples/multimodal/wds_filtered.py +0 -0
  63. {datachain-0.7.2 → datachain-0.7.3}/mkdocs.yml +0 -0
  64. {datachain-0.7.2 → datachain-0.7.3}/noxfile.py +0 -0
  65. {datachain-0.7.2 → datachain-0.7.3}/pyproject.toml +0 -0
  66. {datachain-0.7.2 → datachain-0.7.3}/setup.cfg +0 -0
  67. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/__init__.py +0 -0
  68. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/__main__.py +0 -0
  69. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/asyn.py +0 -0
  70. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/cache.py +0 -0
  71. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/catalog/__init__.py +0 -0
  72. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/catalog/datasource.py +0 -0
  73. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/catalog/loader.py +0 -0
  74. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/cli_utils.py +0 -0
  75. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/client/__init__.py +0 -0
  76. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/client/azure.py +0 -0
  77. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/client/fileslice.py +0 -0
  78. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/client/fsspec.py +0 -0
  79. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/client/gcs.py +0 -0
  80. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/client/hf.py +0 -0
  81. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/client/local.py +0 -0
  82. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/client/s3.py +0 -0
  83. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/config.py +0 -0
  84. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/data_storage/__init__.py +0 -0
  85. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/data_storage/db_engine.py +0 -0
  86. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/data_storage/id_generator.py +0 -0
  87. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/data_storage/job.py +0 -0
  88. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/data_storage/schema.py +0 -0
  89. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/data_storage/serializer.py +0 -0
  90. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/data_storage/sqlite.py +0 -0
  91. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/data_storage/warehouse.py +0 -0
  92. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/error.py +0 -0
  93. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/func/__init__.py +0 -0
  94. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/func/aggregate.py +0 -0
  95. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/func/array.py +0 -0
  96. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/func/base.py +0 -0
  97. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/func/conditional.py +0 -0
  98. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/func/func.py +0 -0
  99. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/func/path.py +0 -0
  100. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/func/random.py +0 -0
  101. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/func/string.py +0 -0
  102. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/func/window.py +0 -0
  103. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/job.py +0 -0
  104. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/__init__.py +0 -0
  105. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/arrow.py +0 -0
  106. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/clip.py +0 -0
  107. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/convert/__init__.py +0 -0
  108. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/convert/flatten.py +0 -0
  109. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/convert/python_to_sql.py +0 -0
  110. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/convert/sql_to_python.py +0 -0
  111. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/convert/unflatten.py +0 -0
  112. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  113. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/data_model.py +0 -0
  114. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/dc.py +0 -0
  115. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/file.py +0 -0
  116. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/hf.py +0 -0
  117. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/image.py +0 -0
  118. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/listing.py +0 -0
  119. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/listing_info.py +0 -0
  120. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/meta_formats.py +0 -0
  121. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/model_store.py +0 -0
  122. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/pytorch.py +0 -0
  123. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/settings.py +0 -0
  124. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/signal_schema.py +0 -0
  125. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/tar.py +0 -0
  126. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/text.py +0 -0
  127. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/udf.py +0 -0
  128. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/udf_signature.py +0 -0
  129. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/utils.py +0 -0
  130. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/vfile.py +0 -0
  131. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/webdataset.py +0 -0
  132. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/lib/webdataset_laion.py +0 -0
  133. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/listing.py +0 -0
  134. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/model/__init__.py +0 -0
  135. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/model/bbox.py +0 -0
  136. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/model/pose.py +0 -0
  137. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/model/segment.py +0 -0
  138. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/model/ultralytics/__init__.py +0 -0
  139. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/model/ultralytics/bbox.py +0 -0
  140. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/model/ultralytics/pose.py +0 -0
  141. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/model/ultralytics/segment.py +0 -0
  142. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/node.py +0 -0
  143. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/nodes_fetcher.py +0 -0
  144. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/nodes_thread_pool.py +0 -0
  145. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/progress.py +0 -0
  146. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/py.typed +0 -0
  147. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/query/__init__.py +0 -0
  148. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/query/batch.py +0 -0
  149. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/query/dataset.py +0 -0
  150. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/query/dispatch.py +0 -0
  151. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/query/metrics.py +0 -0
  152. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/query/params.py +0 -0
  153. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/query/queue.py +0 -0
  154. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/query/schema.py +0 -0
  155. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/query/session.py +0 -0
  156. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/remote/__init__.py +0 -0
  157. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/sql/__init__.py +0 -0
  158. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/sql/default/__init__.py +0 -0
  159. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/sql/default/base.py +0 -0
  160. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/sql/functions/__init__.py +0 -0
  161. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/sql/functions/aggregate.py +0 -0
  162. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/sql/functions/array.py +0 -0
  163. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/sql/functions/conditional.py +0 -0
  164. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/sql/functions/path.py +0 -0
  165. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/sql/functions/random.py +0 -0
  166. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/sql/functions/string.py +0 -0
  167. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/sql/selectable.py +0 -0
  168. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/sql/sqlite/__init__.py +0 -0
  169. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/sql/sqlite/base.py +0 -0
  170. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/sql/sqlite/types.py +0 -0
  171. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/sql/sqlite/vector.py +0 -0
  172. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/sql/types.py +0 -0
  173. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/sql/utils.py +0 -0
  174. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/telemetry.py +0 -0
  175. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/toolkit/__init__.py +0 -0
  176. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/toolkit/split.py +0 -0
  177. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/torch/__init__.py +0 -0
  178. {datachain-0.7.2 → datachain-0.7.3}/src/datachain/utils.py +0 -0
  179. {datachain-0.7.2 → datachain-0.7.3}/src/datachain.egg-info/SOURCES.txt +0 -0
  180. {datachain-0.7.2 → datachain-0.7.3}/src/datachain.egg-info/dependency_links.txt +0 -0
  181. {datachain-0.7.2 → datachain-0.7.3}/src/datachain.egg-info/entry_points.txt +0 -0
  182. {datachain-0.7.2 → datachain-0.7.3}/src/datachain.egg-info/requires.txt +0 -0
  183. {datachain-0.7.2 → datachain-0.7.3}/src/datachain.egg-info/top_level.txt +0 -0
  184. {datachain-0.7.2 → datachain-0.7.3}/tests/__init__.py +0 -0
  185. {datachain-0.7.2 → datachain-0.7.3}/tests/benchmarks/__init__.py +0 -0
  186. {datachain-0.7.2 → datachain-0.7.3}/tests/benchmarks/conftest.py +0 -0
  187. {datachain-0.7.2 → datachain-0.7.3}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  188. {datachain-0.7.2 → datachain-0.7.3}/tests/benchmarks/datasets/.dvc/config +0 -0
  189. {datachain-0.7.2 → datachain-0.7.3}/tests/benchmarks/datasets/.gitignore +0 -0
  190. {datachain-0.7.2 → datachain-0.7.3}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  191. {datachain-0.7.2 → datachain-0.7.3}/tests/benchmarks/test_datachain.py +0 -0
  192. {datachain-0.7.2 → datachain-0.7.3}/tests/benchmarks/test_ls.py +0 -0
  193. {datachain-0.7.2 → datachain-0.7.3}/tests/benchmarks/test_version.py +0 -0
  194. {datachain-0.7.2 → datachain-0.7.3}/tests/conftest.py +0 -0
  195. {datachain-0.7.2 → datachain-0.7.3}/tests/data.py +0 -0
  196. {datachain-0.7.2 → datachain-0.7.3}/tests/examples/__init__.py +0 -0
  197. {datachain-0.7.2 → datachain-0.7.3}/tests/examples/test_examples.py +0 -0
  198. {datachain-0.7.2 → datachain-0.7.3}/tests/examples/test_wds_e2e.py +0 -0
  199. {datachain-0.7.2 → datachain-0.7.3}/tests/examples/wds_data.py +0 -0
  200. {datachain-0.7.2 → datachain-0.7.3}/tests/func/__init__.py +0 -0
  201. {datachain-0.7.2 → datachain-0.7.3}/tests/func/test_client.py +0 -0
  202. {datachain-0.7.2 → datachain-0.7.3}/tests/func/test_datachain.py +0 -0
  203. {datachain-0.7.2 → datachain-0.7.3}/tests/func/test_dataset_query.py +0 -0
  204. {datachain-0.7.2 → datachain-0.7.3}/tests/func/test_datasets.py +0 -0
  205. {datachain-0.7.2 → datachain-0.7.3}/tests/func/test_feature_pickling.py +0 -0
  206. {datachain-0.7.2 → datachain-0.7.3}/tests/func/test_listing.py +0 -0
  207. {datachain-0.7.2 → datachain-0.7.3}/tests/func/test_ls.py +0 -0
  208. {datachain-0.7.2 → datachain-0.7.3}/tests/func/test_meta_formats.py +0 -0
  209. {datachain-0.7.2 → datachain-0.7.3}/tests/func/test_metrics.py +0 -0
  210. {datachain-0.7.2 → datachain-0.7.3}/tests/func/test_pull.py +0 -0
  211. {datachain-0.7.2 → datachain-0.7.3}/tests/func/test_pytorch.py +0 -0
  212. {datachain-0.7.2 → datachain-0.7.3}/tests/func/test_query.py +0 -0
  213. {datachain-0.7.2 → datachain-0.7.3}/tests/func/test_toolkit.py +0 -0
  214. {datachain-0.7.2 → datachain-0.7.3}/tests/scripts/feature_class.py +0 -0
  215. {datachain-0.7.2 → datachain-0.7.3}/tests/scripts/feature_class_exception.py +0 -0
  216. {datachain-0.7.2 → datachain-0.7.3}/tests/scripts/feature_class_parallel.py +0 -0
  217. {datachain-0.7.2 → datachain-0.7.3}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  218. {datachain-0.7.2 → datachain-0.7.3}/tests/scripts/name_len_slow.py +0 -0
  219. {datachain-0.7.2 → datachain-0.7.3}/tests/test_atomicity.py +0 -0
  220. {datachain-0.7.2 → datachain-0.7.3}/tests/test_cli_e2e.py +0 -0
  221. {datachain-0.7.2 → datachain-0.7.3}/tests/test_query_e2e.py +0 -0
  222. {datachain-0.7.2 → datachain-0.7.3}/tests/test_telemetry.py +0 -0
  223. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/__init__.py +0 -0
  224. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/lib/__init__.py +0 -0
  225. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/lib/conftest.py +0 -0
  226. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/lib/test_arrow.py +0 -0
  227. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/lib/test_clip.py +0 -0
  228. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/lib/test_datachain.py +0 -0
  229. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  230. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/lib/test_datachain_merge.py +0 -0
  231. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/lib/test_feature.py +0 -0
  232. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/lib/test_feature_utils.py +0 -0
  233. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/lib/test_file.py +0 -0
  234. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/lib/test_hf.py +0 -0
  235. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/lib/test_image.py +0 -0
  236. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/lib/test_listing_info.py +0 -0
  237. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/lib/test_models.py +0 -0
  238. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/lib/test_schema.py +0 -0
  239. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/lib/test_signal_schema.py +0 -0
  240. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/lib/test_sql_to_python.py +0 -0
  241. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/lib/test_text.py +0 -0
  242. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/lib/test_udf_signature.py +0 -0
  243. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/lib/test_utils.py +0 -0
  244. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/lib/test_webdataset.py +0 -0
  245. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/sql/__init__.py +0 -0
  246. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/sql/sqlite/__init__.py +0 -0
  247. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/sql/sqlite/test_types.py +0 -0
  248. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/sql/sqlite/test_utils.py +0 -0
  249. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/sql/test_array.py +0 -0
  250. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/sql/test_conditional.py +0 -0
  251. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/sql/test_path.py +0 -0
  252. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/sql/test_random.py +0 -0
  253. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/sql/test_selectable.py +0 -0
  254. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/sql/test_string.py +0 -0
  255. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/test_asyn.py +0 -0
  256. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/test_cache.py +0 -0
  257. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/test_catalog.py +0 -0
  258. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/test_catalog_loader.py +0 -0
  259. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/test_cli_parsing.py +0 -0
  260. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/test_client.py +0 -0
  261. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/test_client_s3.py +0 -0
  262. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/test_config.py +0 -0
  263. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/test_data_storage.py +0 -0
  264. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/test_database_engine.py +0 -0
  265. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/test_dataset.py +0 -0
  266. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/test_dispatch.py +0 -0
  267. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/test_fileslice.py +0 -0
  268. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/test_func.py +0 -0
  269. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/test_id_generator.py +0 -0
  270. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/test_listing.py +0 -0
  271. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/test_metastore.py +0 -0
  272. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/test_module_exports.py +0 -0
  273. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/test_query.py +0 -0
  274. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/test_query_metrics.py +0 -0
  275. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/test_query_params.py +0 -0
  276. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/test_serializer.py +0 -0
  277. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/test_session.py +0 -0
  278. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/test_utils.py +0 -0
  279. {datachain-0.7.2 → datachain-0.7.3}/tests/unit/test_warehouse.py +0 -0
  280. {datachain-0.7.2 → datachain-0.7.3}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.7.2
3
+ Version: 0.7.3
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -139,7 +139,7 @@ Key Features
139
139
  ============
140
140
 
141
141
  📂 **Multimodal Dataset Versioning.**
142
- - Version unstructured data without redundant data copies, by supporitng
142
+ - Version unstructured data without redundant data copies, by supporting
143
143
  references to S3, GCP, Azure, and local file systems.
144
144
  - Multimodal data support: images, video, text, PDFs, JSONs, CSVs, parquet, etc.
145
145
  - Unite files and metadata together into persistent, versioned, columnar datasets.
@@ -37,7 +37,7 @@ Key Features
37
37
  ============
38
38
 
39
39
  📂 **Multimodal Dataset Versioning.**
40
- - Version unstructured data without redundant data copies, by supporitng
40
+ - Version unstructured data without redundant data copies, by supporting
41
41
  references to S3, GCP, Azure, and local file systems.
42
42
  - Multimodal data support: images, video, text, PDFs, JSONs, CSVs, parquet, etc.
43
43
  - Unite files and metadata together into persistent, versioned, columnar datasets.
@@ -38,6 +38,7 @@ from datachain.dataset import (
38
38
  DATASET_PREFIX,
39
39
  QUERY_DATASET_PREFIX,
40
40
  DatasetDependency,
41
+ DatasetListRecord,
41
42
  DatasetRecord,
42
43
  DatasetStats,
43
44
  DatasetStatus,
@@ -72,7 +73,7 @@ if TYPE_CHECKING:
72
73
  AbstractMetastore,
73
74
  AbstractWarehouse,
74
75
  )
75
- from datachain.dataset import DatasetVersion
76
+ from datachain.dataset import DatasetListVersion
76
77
  from datachain.job import Job
77
78
  from datachain.lib.file import File
78
79
  from datachain.listing import Listing
@@ -1135,7 +1136,7 @@ class Catalog:
1135
1136
 
1136
1137
  return direct_dependencies
1137
1138
 
1138
- def ls_datasets(self, include_listing: bool = False) -> Iterator[DatasetRecord]:
1139
+ def ls_datasets(self, include_listing: bool = False) -> Iterator[DatasetListRecord]:
1139
1140
  datasets = self.metastore.list_datasets()
1140
1141
  for d in datasets:
1141
1142
  if not d.is_bucket_listing or include_listing:
@@ -1144,7 +1145,7 @@ class Catalog:
1144
1145
  def list_datasets_versions(
1145
1146
  self,
1146
1147
  include_listing: bool = False,
1147
- ) -> Iterator[tuple[DatasetRecord, "DatasetVersion", Optional["Job"]]]:
1148
+ ) -> Iterator[tuple[DatasetListRecord, "DatasetListVersion", Optional["Job"]]]:
1148
1149
  """Iterate over all dataset versions with related jobs."""
1149
1150
  datasets = list(self.ls_datasets(include_listing=include_listing))
1150
1151
 
@@ -18,7 +18,12 @@ from datachain.cli_utils import BooleanOptionalAction, CommaSeparatedArgs, KeyVa
18
18
  from datachain.config import Config
19
19
  from datachain.error import DataChainError
20
20
  from datachain.lib.dc import DataChain
21
- from datachain.studio import list_datasets, process_studio_cli_args
21
+ from datachain.studio import (
22
+ edit_studio_dataset,
23
+ list_datasets,
24
+ process_studio_cli_args,
25
+ remove_studio_dataset,
26
+ )
22
27
  from datachain.telemetry import telemetry
23
28
 
24
29
  if TYPE_CHECKING:
@@ -403,21 +408,44 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
403
408
  parse_edit_dataset.add_argument(
404
409
  "--new-name",
405
410
  action="store",
406
- default="",
407
411
  help="Dataset new name",
408
412
  )
409
413
  parse_edit_dataset.add_argument(
410
414
  "--description",
411
415
  action="store",
412
- default="",
413
416
  help="Dataset description",
414
417
  )
415
418
  parse_edit_dataset.add_argument(
416
419
  "--labels",
417
- default=[],
418
420
  nargs="+",
419
421
  help="Dataset labels",
420
422
  )
423
+ parse_edit_dataset.add_argument(
424
+ "--studio",
425
+ action="store_true",
426
+ default=False,
427
+ help="Edit dataset from Studio",
428
+ )
429
+ parse_edit_dataset.add_argument(
430
+ "-L",
431
+ "--local",
432
+ action="store_true",
433
+ default=False,
434
+ help="Edit local dataset only",
435
+ )
436
+ parse_edit_dataset.add_argument(
437
+ "-a",
438
+ "--all",
439
+ action="store_true",
440
+ default=True,
441
+ help="Edit both datasets from studio and local",
442
+ )
443
+ parse_edit_dataset.add_argument(
444
+ "--team",
445
+ action="store",
446
+ default=None,
447
+ help="The team to edit a dataset. By default, it will use team from config.",
448
+ )
421
449
 
422
450
  datasets_parser = subp.add_parser(
423
451
  "datasets", parents=[parent_parser], description="List datasets"
@@ -466,6 +494,32 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
466
494
  action=BooleanOptionalAction,
467
495
  help="Force delete registered dataset with all of it's versions",
468
496
  )
497
+ rm_dataset_parser.add_argument(
498
+ "--studio",
499
+ action="store_true",
500
+ default=False,
501
+ help="Remove dataset from Studio",
502
+ )
503
+ rm_dataset_parser.add_argument(
504
+ "-L",
505
+ "--local",
506
+ action="store_true",
507
+ default=False,
508
+ help="Remove local datasets only",
509
+ )
510
+ rm_dataset_parser.add_argument(
511
+ "-a",
512
+ "--all",
513
+ action="store_true",
514
+ default=True,
515
+ help="Remove both local and studio",
516
+ )
517
+ rm_dataset_parser.add_argument(
518
+ "--team",
519
+ action="store",
520
+ default=None,
521
+ help="The team to delete a dataset. By default, it will use team from config.",
522
+ )
469
523
 
470
524
  dataset_stats_parser = subp.add_parser(
471
525
  "dataset-stats",
@@ -909,8 +963,40 @@ def rm_dataset(
909
963
  name: str,
910
964
  version: Optional[int] = None,
911
965
  force: Optional[bool] = False,
966
+ studio: bool = False,
967
+ local: bool = False,
968
+ all: bool = True,
969
+ team: Optional[str] = None,
970
+ ):
971
+ token = Config().read().get("studio", {}).get("token")
972
+ all, local, studio = _determine_flavors(studio, local, all, token)
973
+
974
+ if all or local:
975
+ catalog.remove_dataset(name, version=version, force=force)
976
+
977
+ if (all or studio) and token:
978
+ remove_studio_dataset(team, name, version, force)
979
+
980
+
981
+ def edit_dataset(
982
+ catalog: "Catalog",
983
+ name: str,
984
+ new_name: Optional[str] = None,
985
+ description: Optional[str] = None,
986
+ labels: Optional[list[str]] = None,
987
+ studio: bool = False,
988
+ local: bool = False,
989
+ all: bool = True,
990
+ team: Optional[str] = None,
912
991
  ):
913
- catalog.remove_dataset(name, version=version, force=force)
992
+ token = Config().read().get("studio", {}).get("token")
993
+ all, local, studio = _determine_flavors(studio, local, all, token)
994
+
995
+ if all or local:
996
+ catalog.edit_dataset(name, new_name, description, labels)
997
+
998
+ if (all or studio) and token:
999
+ edit_studio_dataset(team, name, new_name, description, labels)
914
1000
 
915
1001
 
916
1002
  def dataset_stats(
@@ -1127,11 +1213,16 @@ def main(argv: Optional[list[str]] = None) -> int: # noqa: C901, PLR0912, PLR09
1127
1213
  edatachain_file=args.edatachain_file,
1128
1214
  )
1129
1215
  elif args.command == "edit-dataset":
1130
- catalog.edit_dataset(
1216
+ edit_dataset(
1217
+ catalog,
1131
1218
  args.name,
1132
- description=args.description,
1133
1219
  new_name=args.new_name,
1220
+ description=args.description,
1134
1221
  labels=args.labels,
1222
+ studio=args.studio,
1223
+ local=args.local,
1224
+ all=args.all,
1225
+ team=args.team,
1135
1226
  )
1136
1227
  elif args.command == "ls":
1137
1228
  ls(
@@ -1164,7 +1255,16 @@ def main(argv: Optional[list[str]] = None) -> int: # noqa: C901, PLR0912, PLR09
1164
1255
  schema=args.schema,
1165
1256
  )
1166
1257
  elif args.command == "rm-dataset":
1167
- rm_dataset(catalog, args.name, version=args.version, force=args.force)
1258
+ rm_dataset(
1259
+ catalog,
1260
+ args.name,
1261
+ version=args.version,
1262
+ force=args.force,
1263
+ studio=args.studio,
1264
+ local=args.local,
1265
+ all=args.all,
1266
+ team=args.team,
1267
+ )
1168
1268
  elif args.command == "dataset-stats":
1169
1269
  dataset_stats(
1170
1270
  catalog,
@@ -27,6 +27,8 @@ from datachain.data_storage import JobQueryType, JobStatus
27
27
  from datachain.data_storage.serializer import Serializable
28
28
  from datachain.dataset import (
29
29
  DatasetDependency,
30
+ DatasetListRecord,
31
+ DatasetListVersion,
30
32
  DatasetRecord,
31
33
  DatasetStatus,
32
34
  DatasetVersion,
@@ -59,6 +61,8 @@ class AbstractMetastore(ABC, Serializable):
59
61
 
60
62
  schema: "schema.Schema"
61
63
  dataset_class: type[DatasetRecord] = DatasetRecord
64
+ dataset_list_class: type[DatasetListRecord] = DatasetListRecord
65
+ dataset_list_version_class: type[DatasetListVersion] = DatasetListVersion
62
66
  dependency_class: type[DatasetDependency] = DatasetDependency
63
67
  job_class: type[Job] = Job
64
68
 
@@ -166,11 +170,11 @@ class AbstractMetastore(ABC, Serializable):
166
170
  """
167
171
 
168
172
  @abstractmethod
169
- def list_datasets(self) -> Iterator[DatasetRecord]:
173
+ def list_datasets(self) -> Iterator[DatasetListRecord]:
170
174
  """Lists all datasets."""
171
175
 
172
176
  @abstractmethod
173
- def list_datasets_by_prefix(self, prefix: str) -> Iterator["DatasetRecord"]:
177
+ def list_datasets_by_prefix(self, prefix: str) -> Iterator["DatasetListRecord"]:
174
178
  """Lists all datasets which names start with prefix."""
175
179
 
176
180
  @abstractmethod
@@ -348,6 +352,14 @@ class AbstractDBMetastore(AbstractMetastore):
348
352
  if c.name # type: ignore [attr-defined]
349
353
  ]
350
354
 
355
+ @cached_property
356
+ def _dataset_list_fields(self) -> list[str]:
357
+ return [
358
+ c.name # type: ignore [attr-defined]
359
+ for c in self._datasets_columns()
360
+ if c.name in self.dataset_list_class.__dataclass_fields__ # type: ignore [attr-defined]
361
+ ]
362
+
351
363
  @classmethod
352
364
  def _datasets_versions_columns(cls) -> list["SchemaItem"]:
353
365
  """Datasets versions table columns."""
@@ -390,6 +402,15 @@ class AbstractDBMetastore(AbstractMetastore):
390
402
  if c.name # type: ignore [attr-defined]
391
403
  ]
392
404
 
405
+ @cached_property
406
+ def _dataset_list_version_fields(self) -> list[str]:
407
+ return [
408
+ c.name # type: ignore [attr-defined]
409
+ for c in self._datasets_versions_columns()
410
+ if c.name # type: ignore [attr-defined]
411
+ in self.dataset_list_version_class.__dataclass_fields__
412
+ ]
413
+
393
414
  @classmethod
394
415
  def _datasets_dependencies_columns(cls) -> list["SchemaItem"]:
395
416
  """Datasets dependencies table columns."""
@@ -671,7 +692,25 @@ class AbstractDBMetastore(AbstractMetastore):
671
692
  if dataset:
672
693
  yield dataset
673
694
 
674
- def _base_dataset_query(self):
695
+ def _parse_list_dataset(self, rows) -> Optional[DatasetListRecord]:
696
+ versions = [self.dataset_list_class.parse(*r) for r in rows]
697
+ if not versions:
698
+ return None
699
+ return reduce(lambda ds, version: ds.merge_versions(version), versions)
700
+
701
+ def _parse_dataset_list(self, rows) -> Iterator["DatasetListRecord"]:
702
+ # grouping rows by dataset id
703
+ for _, g in groupby(rows, lambda r: r[0]):
704
+ dataset = self._parse_list_dataset(list(g))
705
+ if dataset:
706
+ yield dataset
707
+
708
+ def _get_dataset_query(
709
+ self,
710
+ dataset_fields: list[str],
711
+ dataset_version_fields: list[str],
712
+ isouter: bool = True,
713
+ ):
675
714
  if not (
676
715
  self.db.has_table(self._datasets.name)
677
716
  and self.db.has_table(self._datasets_versions.name)
@@ -680,23 +719,36 @@ class AbstractDBMetastore(AbstractMetastore):
680
719
 
681
720
  d = self._datasets
682
721
  dv = self._datasets_versions
722
+
683
723
  query = self._datasets_select(
684
- *(getattr(d.c, f) for f in self._dataset_fields),
685
- *(getattr(dv.c, f) for f in self._dataset_version_fields),
724
+ *(getattr(d.c, f) for f in dataset_fields),
725
+ *(getattr(dv.c, f) for f in dataset_version_fields),
686
726
  )
687
- j = d.join(dv, d.c.id == dv.c.dataset_id, isouter=True)
727
+ j = d.join(dv, d.c.id == dv.c.dataset_id, isouter=isouter)
688
728
  return query.select_from(j)
689
729
 
690
- def list_datasets(self) -> Iterator["DatasetRecord"]:
730
+ def _base_dataset_query(self):
731
+ return self._get_dataset_query(
732
+ self._dataset_fields, self._dataset_version_fields
733
+ )
734
+
735
+ def _base_list_datasets_query(self):
736
+ return self._get_dataset_query(
737
+ self._dataset_list_fields, self._dataset_list_version_fields, isouter=False
738
+ )
739
+
740
+ def list_datasets(self) -> Iterator["DatasetListRecord"]:
691
741
  """Lists all datasets."""
692
- yield from self._parse_datasets(self.db.execute(self._base_dataset_query()))
742
+ yield from self._parse_dataset_list(
743
+ self.db.execute(self._base_list_datasets_query())
744
+ )
693
745
 
694
746
  def list_datasets_by_prefix(
695
747
  self, prefix: str, conn=None
696
- ) -> Iterator["DatasetRecord"]:
697
- query = self._base_dataset_query()
748
+ ) -> Iterator["DatasetListRecord"]:
749
+ query = self._base_list_datasets_query()
698
750
  query = query.where(self._datasets.c.name.startswith(prefix))
699
- yield from self._parse_datasets(self.db.execute(query))
751
+ yield from self._parse_dataset_list(self.db.execute(query))
700
752
 
701
753
  def get_dataset(self, name: str, conn=None) -> DatasetRecord:
702
754
  """Gets a single dataset by name"""
@@ -15,7 +15,9 @@ from datachain.error import DatasetVersionNotFoundError
15
15
  from datachain.sql.types import NAME_TYPES_MAPPING, SQLType
16
16
 
17
17
  T = TypeVar("T", bound="DatasetRecord")
18
+ LT = TypeVar("LT", bound="DatasetListRecord")
18
19
  V = TypeVar("V", bound="DatasetVersion")
20
+ LV = TypeVar("LV", bound="DatasetListVersion")
19
21
  DD = TypeVar("DD", bound="DatasetDependency")
20
22
 
21
23
  DATASET_PREFIX = "ds://"
@@ -264,6 +266,59 @@ class DatasetVersion:
264
266
  return cls(**kwargs)
265
267
 
266
268
 
269
+ @dataclass
270
+ class DatasetListVersion:
271
+ id: int
272
+ uuid: str
273
+ dataset_id: int
274
+ version: int
275
+ status: int
276
+ created_at: datetime
277
+ finished_at: Optional[datetime]
278
+ error_message: str
279
+ error_stack: str
280
+ num_objects: Optional[int]
281
+ size: Optional[int]
282
+ query_script: str = ""
283
+ job_id: Optional[str] = None
284
+
285
+ @classmethod
286
+ def parse(
287
+ cls: type[LV],
288
+ id: int,
289
+ uuid: str,
290
+ dataset_id: int,
291
+ version: int,
292
+ status: int,
293
+ created_at: datetime,
294
+ finished_at: Optional[datetime],
295
+ error_message: str,
296
+ error_stack: str,
297
+ num_objects: Optional[int],
298
+ size: Optional[int],
299
+ query_script: str = "",
300
+ job_id: Optional[str] = None,
301
+ ):
302
+ return cls(
303
+ id,
304
+ uuid,
305
+ dataset_id,
306
+ version,
307
+ status,
308
+ created_at,
309
+ finished_at,
310
+ error_message,
311
+ error_stack,
312
+ num_objects,
313
+ size,
314
+ query_script,
315
+ job_id,
316
+ )
317
+
318
+ def __hash__(self):
319
+ return hash(f"{self.dataset_id}_{self.version}")
320
+
321
+
267
322
  @dataclass
268
323
  class DatasetRecord:
269
324
  id: int
@@ -447,20 +502,6 @@ class DatasetRecord:
447
502
  identifier = self.identifier(version)
448
503
  return f"{DATASET_PREFIX}{identifier}"
449
504
 
450
- @property
451
- def is_bucket_listing(self) -> bool:
452
- """
453
- For bucket listing we implicitly create underlying dataset to hold data. This
454
- method is checking if this is one of those datasets.
455
- """
456
- from datachain.client import Client
457
-
458
- # TODO refactor and maybe remove method in
459
- # https://github.com/iterative/datachain/issues/318
460
- return Client.is_data_source_uri(self.name) or self.name.startswith(
461
- LISTING_PREFIX
462
- )
463
-
464
505
  @property
465
506
  def versions_values(self) -> list[int]:
466
507
  """
@@ -499,5 +540,92 @@ class DatasetRecord:
499
540
  return cls(**kwargs, versions=versions)
500
541
 
501
542
 
543
+ @dataclass
544
+ class DatasetListRecord:
545
+ id: int
546
+ name: str
547
+ description: Optional[str]
548
+ labels: list[str]
549
+ versions: list[DatasetListVersion]
550
+ created_at: Optional[datetime] = None
551
+
552
+ @classmethod
553
+ def parse( # noqa: PLR0913
554
+ cls: type[LT],
555
+ id: int,
556
+ name: str,
557
+ description: Optional[str],
558
+ labels: str,
559
+ created_at: datetime,
560
+ version_id: int,
561
+ version_uuid: str,
562
+ version_dataset_id: int,
563
+ version: int,
564
+ version_status: int,
565
+ version_created_at: datetime,
566
+ version_finished_at: Optional[datetime],
567
+ version_error_message: str,
568
+ version_error_stack: str,
569
+ version_num_objects: Optional[int],
570
+ version_size: Optional[int],
571
+ version_query_script: Optional[str],
572
+ version_job_id: Optional[str] = None,
573
+ ) -> "DatasetListRecord":
574
+ labels_lst: list[str] = json.loads(labels) if labels else []
575
+
576
+ dataset_version = DatasetListVersion.parse(
577
+ version_id,
578
+ version_uuid,
579
+ version_dataset_id,
580
+ version,
581
+ version_status,
582
+ version_created_at,
583
+ version_finished_at,
584
+ version_error_message,
585
+ version_error_stack,
586
+ version_num_objects,
587
+ version_size,
588
+ version_query_script, # type: ignore[arg-type]
589
+ version_job_id,
590
+ )
591
+
592
+ return cls(
593
+ id,
594
+ name,
595
+ description,
596
+ labels_lst,
597
+ [dataset_version],
598
+ created_at,
599
+ )
600
+
601
+ def merge_versions(self, other: "DatasetListRecord") -> "DatasetListRecord":
602
+ """Merge versions from another dataset"""
603
+ if other.id != self.id:
604
+ raise RuntimeError("Cannot merge versions of datasets with different ids")
605
+ if not other.versions:
606
+ # nothing to merge
607
+ return self
608
+ if not self.versions:
609
+ self.versions = []
610
+
611
+ self.versions = list(set(self.versions + other.versions))
612
+ self.versions.sort(key=lambda v: v.version)
613
+ return self
614
+
615
+ @property
616
+ def is_bucket_listing(self) -> bool:
617
+ """
618
+ For bucket listing we implicitly create underlying dataset to hold data. This
619
+ method is checking if this is one of those datasets.
620
+ """
621
+ from datachain.client import Client
622
+
623
+ # TODO refactor and maybe remove method in
624
+ # https://github.com/iterative/datachain/issues/318
625
+ return Client.is_data_source_uri(self.name) or self.name.startswith(
626
+ LISTING_PREFIX
627
+ )
628
+
629
+
502
630
  class RowDict(dict):
503
631
  pass
@@ -5,7 +5,11 @@ from uuid import uuid4
5
5
 
6
6
  from pydantic import Field, field_validator
7
7
 
8
- from datachain.dataset import DatasetRecord, DatasetStatus, DatasetVersion
8
+ from datachain.dataset import (
9
+ DatasetListRecord,
10
+ DatasetListVersion,
11
+ DatasetStatus,
12
+ )
9
13
  from datachain.job import Job
10
14
  from datachain.lib.data_model import DataModel
11
15
  from datachain.utils import TIME_ZERO
@@ -57,8 +61,8 @@ class DatasetInfo(DataModel):
57
61
  @classmethod
58
62
  def from_models(
59
63
  cls,
60
- dataset: DatasetRecord,
61
- version: DatasetVersion,
64
+ dataset: DatasetListRecord,
65
+ version: DatasetListVersion,
62
66
  job: Optional[Job],
63
67
  ) -> "Self":
64
68
  return cls(
@@ -178,17 +178,9 @@ class StudioClient:
178
178
  data = {}
179
179
 
180
180
  if not ok:
181
- logger.error(
182
- "Got bad response from Studio, content is %s",
183
- response.content.decode("utf-8"),
184
- )
185
181
  if response.status_code == 403:
186
182
  message = f"Not authorized for the team {self.team}"
187
183
  else:
188
- logger.error(
189
- "Got bad response from Studio, content is %s",
190
- response.content.decode("utf-8"),
191
- )
192
184
  message = data.get("message", "")
193
185
  else:
194
186
  message = ""
@@ -230,6 +222,46 @@ class StudioClient:
230
222
  def ls_datasets(self) -> Response[LsData]:
231
223
  return self._send_request("datachain/ls-datasets", {})
232
224
 
225
+ def edit_dataset(
226
+ self,
227
+ name: str,
228
+ new_name: Optional[str] = None,
229
+ description: Optional[str] = None,
230
+ labels: Optional[list[str]] = None,
231
+ ) -> Response[DatasetInfoData]:
232
+ body = {
233
+ "dataset_name": name,
234
+ }
235
+
236
+ if new_name is not None:
237
+ body["new_name"] = new_name
238
+
239
+ if description is not None:
240
+ body["description"] = description
241
+
242
+ if labels is not None:
243
+ body["labels"] = labels # type: ignore[assignment]
244
+
245
+ return self._send_request(
246
+ "datachain/edit-dataset",
247
+ body,
248
+ )
249
+
250
+ def rm_dataset(
251
+ self,
252
+ name: str,
253
+ version: Optional[int] = None,
254
+ force: Optional[bool] = False,
255
+ ) -> Response[DatasetInfoData]:
256
+ return self._send_request(
257
+ "datachain/rm-dataset",
258
+ {
259
+ "dataset_name": name,
260
+ "version": version,
261
+ "force": force,
262
+ },
263
+ )
264
+
233
265
  def dataset_info(self, name: str) -> Response[DatasetInfoData]:
234
266
  def _parse_dataset_info(dataset_info):
235
267
  _parse_dates(dataset_info, ["created_at", "finished_at"])
@@ -130,6 +130,35 @@ def list_datasets(team: Optional[str] = None):
130
130
  yield (name, version)
131
131
 
132
132
 
133
+ def edit_studio_dataset(
134
+ team_name: Optional[str],
135
+ name: str,
136
+ new_name: Optional[str] = None,
137
+ description: Optional[str] = None,
138
+ labels: Optional[list[str]] = None,
139
+ ):
140
+ client = StudioClient(team=team_name)
141
+ response = client.edit_dataset(name, new_name, description, labels)
142
+ if not response.ok:
143
+ raise_remote_error(response.message)
144
+
145
+ print(f"Dataset {name} updated")
146
+
147
+
148
+ def remove_studio_dataset(
149
+ team_name: Optional[str],
150
+ name: str,
151
+ version: Optional[int] = None,
152
+ force: Optional[bool] = False,
153
+ ):
154
+ client = StudioClient(team=team_name)
155
+ response = client.rm_dataset(name, version, force)
156
+ if not response.ok:
157
+ raise_remote_error(response.message)
158
+
159
+ print(f"Dataset {name} removed")
160
+
161
+
133
162
  def save_config(hostname, token):
134
163
  config = Config(ConfigLevel.GLOBAL)
135
164
  with config.edit() as conf:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.7.2
3
+ Version: 0.7.3
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -139,7 +139,7 @@ Key Features
139
139
  ============
140
140
 
141
141
  📂 **Multimodal Dataset Versioning.**
142
- - Version unstructured data without redundant data copies, by supporitng
142
+ - Version unstructured data without redundant data copies, by supporting
143
143
  references to S3, GCP, Azure, and local file systems.
144
144
  - Multimodal data support: images, video, text, PDFs, JSONs, CSVs, parquet, etc.
145
145
  - Unite files and metadata together into persistent, versioned, columnar datasets.