datachain 0.15.0__tar.gz → 0.16.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (355) hide show
  1. {datachain-0.15.0/src/datachain.egg-info → datachain-0.16.0}/PKG-INFO +1 -1
  2. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/catalog/catalog.py +9 -9
  3. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/__init__.py +1 -1
  4. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/commands/datasets.py +3 -3
  5. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/commands/show.py +2 -2
  6. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/parser/__init__.py +2 -2
  7. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/data_storage/metastore.py +5 -5
  8. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/dataset.py +8 -8
  9. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dataset_info.py +18 -0
  10. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/datachain.py +4 -3
  11. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/datasets.py +9 -0
  12. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/udf.py +2 -1
  13. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/query/dataset.py +2 -2
  14. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/remote/studio.py +2 -2
  15. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/studio.py +2 -2
  16. {datachain-0.15.0 → datachain-0.16.0/src/datachain.egg-info}/PKG-INFO +1 -1
  17. {datachain-0.15.0 → datachain-0.16.0}/tests/conftest.py +7 -7
  18. {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_datachain.py +4 -4
  19. {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_datasets.py +7 -7
  20. {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_pull.py +1 -1
  21. {datachain-0.15.0 → datachain-0.16.0}/tests/test_cli_studio.py +4 -4
  22. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_datachain.py +35 -0
  23. {datachain-0.15.0 → datachain-0.16.0}/.cruft.json +0 -0
  24. {datachain-0.15.0 → datachain-0.16.0}/.gitattributes +0 -0
  25. {datachain-0.15.0 → datachain-0.16.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  26. {datachain-0.15.0 → datachain-0.16.0}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  27. {datachain-0.15.0 → datachain-0.16.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  28. {datachain-0.15.0 → datachain-0.16.0}/.github/codecov.yaml +0 -0
  29. {datachain-0.15.0 → datachain-0.16.0}/.github/dependabot.yml +0 -0
  30. {datachain-0.15.0 → datachain-0.16.0}/.github/workflows/benchmarks.yml +0 -0
  31. {datachain-0.15.0 → datachain-0.16.0}/.github/workflows/release.yml +0 -0
  32. {datachain-0.15.0 → datachain-0.16.0}/.github/workflows/tests-studio.yml +0 -0
  33. {datachain-0.15.0 → datachain-0.16.0}/.github/workflows/tests.yml +0 -0
  34. {datachain-0.15.0 → datachain-0.16.0}/.github/workflows/update-template.yaml +0 -0
  35. {datachain-0.15.0 → datachain-0.16.0}/.gitignore +0 -0
  36. {datachain-0.15.0 → datachain-0.16.0}/.pre-commit-config.yaml +0 -0
  37. {datachain-0.15.0 → datachain-0.16.0}/CODE_OF_CONDUCT.rst +0 -0
  38. {datachain-0.15.0 → datachain-0.16.0}/LICENSE +0 -0
  39. {datachain-0.15.0 → datachain-0.16.0}/README.rst +0 -0
  40. {datachain-0.15.0 → datachain-0.16.0}/docs/assets/captioned_cartoons.png +0 -0
  41. {datachain-0.15.0 → datachain-0.16.0}/docs/assets/datachain-white.svg +0 -0
  42. {datachain-0.15.0 → datachain-0.16.0}/docs/assets/datachain.svg +0 -0
  43. {datachain-0.15.0 → datachain-0.16.0}/docs/contributing.md +0 -0
  44. {datachain-0.15.0 → datachain-0.16.0}/docs/css/github-permalink-style.css +0 -0
  45. {datachain-0.15.0 → datachain-0.16.0}/docs/examples.md +0 -0
  46. {datachain-0.15.0 → datachain-0.16.0}/docs/index.md +0 -0
  47. {datachain-0.15.0 → datachain-0.16.0}/docs/overrides/main.html +0 -0
  48. {datachain-0.15.0 → datachain-0.16.0}/docs/quick-start.md +0 -0
  49. {datachain-0.15.0 → datachain-0.16.0}/docs/references/data-types/arrowrow.md +0 -0
  50. {datachain-0.15.0 → datachain-0.16.0}/docs/references/data-types/bbox.md +0 -0
  51. {datachain-0.15.0 → datachain-0.16.0}/docs/references/data-types/file.md +0 -0
  52. {datachain-0.15.0 → datachain-0.16.0}/docs/references/data-types/imagefile.md +0 -0
  53. {datachain-0.15.0 → datachain-0.16.0}/docs/references/data-types/index.md +0 -0
  54. {datachain-0.15.0 → datachain-0.16.0}/docs/references/data-types/pose.md +0 -0
  55. {datachain-0.15.0 → datachain-0.16.0}/docs/references/data-types/segment.md +0 -0
  56. {datachain-0.15.0 → datachain-0.16.0}/docs/references/data-types/tarvfile.md +0 -0
  57. {datachain-0.15.0 → datachain-0.16.0}/docs/references/data-types/textfile.md +0 -0
  58. {datachain-0.15.0 → datachain-0.16.0}/docs/references/data-types/videofile.md +0 -0
  59. {datachain-0.15.0 → datachain-0.16.0}/docs/references/datachain.md +0 -0
  60. {datachain-0.15.0 → datachain-0.16.0}/docs/references/func.md +0 -0
  61. {datachain-0.15.0 → datachain-0.16.0}/docs/references/index.md +0 -0
  62. {datachain-0.15.0 → datachain-0.16.0}/docs/references/remotes.md +0 -0
  63. {datachain-0.15.0 → datachain-0.16.0}/docs/references/toolkit.md +0 -0
  64. {datachain-0.15.0 → datachain-0.16.0}/docs/references/torch.md +0 -0
  65. {datachain-0.15.0 → datachain-0.16.0}/docs/references/udf.md +0 -0
  66. {datachain-0.15.0 → datachain-0.16.0}/docs/tutorials.md +0 -0
  67. {datachain-0.15.0 → datachain-0.16.0}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  68. {datachain-0.15.0 → datachain-0.16.0}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  69. {datachain-0.15.0 → datachain-0.16.0}/examples/computer_vision/openimage-detect.py +0 -0
  70. {datachain-0.15.0 → datachain-0.16.0}/examples/computer_vision/ultralytics-bbox.py +0 -0
  71. {datachain-0.15.0 → datachain-0.16.0}/examples/computer_vision/ultralytics-pose.py +0 -0
  72. {datachain-0.15.0 → datachain-0.16.0}/examples/computer_vision/ultralytics-segment.py +0 -0
  73. {datachain-0.15.0 → datachain-0.16.0}/examples/get_started/common_sql_functions.py +0 -0
  74. {datachain-0.15.0 → datachain-0.16.0}/examples/get_started/json-csv-reader.py +0 -0
  75. {datachain-0.15.0 → datachain-0.16.0}/examples/get_started/torch-loader.py +0 -0
  76. {datachain-0.15.0 → datachain-0.16.0}/examples/get_started/udfs/parallel.py +0 -0
  77. {datachain-0.15.0 → datachain-0.16.0}/examples/get_started/udfs/simple.py +0 -0
  78. {datachain-0.15.0 → datachain-0.16.0}/examples/get_started/udfs/stateful.py +0 -0
  79. {datachain-0.15.0 → datachain-0.16.0}/examples/llm_and_nlp/claude-query.py +0 -0
  80. {datachain-0.15.0 → datachain-0.16.0}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  81. {datachain-0.15.0 → datachain-0.16.0}/examples/multimodal/clip_inference.py +0 -0
  82. {datachain-0.15.0 → datachain-0.16.0}/examples/multimodal/hf_pipeline.py +0 -0
  83. {datachain-0.15.0 → datachain-0.16.0}/examples/multimodal/openai_image_desc_lib.py +0 -0
  84. {datachain-0.15.0 → datachain-0.16.0}/examples/multimodal/wds.py +0 -0
  85. {datachain-0.15.0 → datachain-0.16.0}/examples/multimodal/wds_filtered.py +0 -0
  86. {datachain-0.15.0 → datachain-0.16.0}/mkdocs.yml +0 -0
  87. {datachain-0.15.0 → datachain-0.16.0}/noxfile.py +0 -0
  88. {datachain-0.15.0 → datachain-0.16.0}/pyproject.toml +0 -0
  89. {datachain-0.15.0 → datachain-0.16.0}/setup.cfg +0 -0
  90. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/__init__.py +0 -0
  91. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/__main__.py +0 -0
  92. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/asyn.py +0 -0
  93. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cache.py +0 -0
  94. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/catalog/__init__.py +0 -0
  95. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/catalog/datasource.py +0 -0
  96. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/catalog/loader.py +0 -0
  97. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/commands/__init__.py +0 -0
  98. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/commands/du.py +0 -0
  99. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/commands/index.py +0 -0
  100. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/commands/ls.py +0 -0
  101. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/commands/misc.py +0 -0
  102. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/commands/query.py +0 -0
  103. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/parser/job.py +0 -0
  104. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/parser/studio.py +0 -0
  105. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/parser/utils.py +0 -0
  106. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/utils.py +0 -0
  107. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/client/__init__.py +0 -0
  108. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/client/azure.py +0 -0
  109. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/client/fileslice.py +0 -0
  110. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/client/fsspec.py +0 -0
  111. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/client/gcs.py +0 -0
  112. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/client/hf.py +0 -0
  113. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/client/local.py +0 -0
  114. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/client/s3.py +0 -0
  115. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/config.py +0 -0
  116. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/data_storage/__init__.py +0 -0
  117. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/data_storage/db_engine.py +0 -0
  118. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/data_storage/job.py +0 -0
  119. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/data_storage/schema.py +0 -0
  120. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/data_storage/serializer.py +0 -0
  121. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/data_storage/sqlite.py +0 -0
  122. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/data_storage/warehouse.py +0 -0
  123. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/diff/__init__.py +0 -0
  124. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/error.py +0 -0
  125. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/fs/__init__.py +0 -0
  126. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/fs/reference.py +0 -0
  127. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/fs/utils.py +0 -0
  128. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/func/__init__.py +0 -0
  129. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/func/aggregate.py +0 -0
  130. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/func/array.py +0 -0
  131. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/func/base.py +0 -0
  132. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/func/conditional.py +0 -0
  133. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/func/func.py +0 -0
  134. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/func/numeric.py +0 -0
  135. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/func/path.py +0 -0
  136. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/func/random.py +0 -0
  137. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/func/string.py +0 -0
  138. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/func/window.py +0 -0
  139. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/job.py +0 -0
  140. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/__init__.py +0 -0
  141. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/arrow.py +0 -0
  142. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/clip.py +0 -0
  143. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/convert/__init__.py +0 -0
  144. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/convert/flatten.py +0 -0
  145. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/convert/python_to_sql.py +0 -0
  146. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/convert/sql_to_python.py +0 -0
  147. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/convert/unflatten.py +0 -0
  148. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  149. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/data_model.py +0 -0
  150. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/__init__.py +0 -0
  151. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/csv.py +0 -0
  152. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/database.py +0 -0
  153. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/hf.py +0 -0
  154. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/json.py +0 -0
  155. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/listings.py +0 -0
  156. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/pandas.py +0 -0
  157. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/parquet.py +0 -0
  158. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/records.py +0 -0
  159. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/storage.py +0 -0
  160. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/utils.py +0 -0
  161. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/values.py +0 -0
  162. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/file.py +0 -0
  163. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/hf.py +0 -0
  164. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/image.py +0 -0
  165. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/listing.py +0 -0
  166. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/listing_info.py +0 -0
  167. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/meta_formats.py +0 -0
  168. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/model_store.py +0 -0
  169. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/pytorch.py +0 -0
  170. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/settings.py +0 -0
  171. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/signal_schema.py +0 -0
  172. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/tar.py +0 -0
  173. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/text.py +0 -0
  174. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/udf_signature.py +0 -0
  175. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/utils.py +0 -0
  176. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/video.py +0 -0
  177. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/webdataset.py +0 -0
  178. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/webdataset_laion.py +0 -0
  179. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/listing.py +0 -0
  180. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/model/__init__.py +0 -0
  181. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/model/bbox.py +0 -0
  182. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/model/pose.py +0 -0
  183. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/model/segment.py +0 -0
  184. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/model/ultralytics/__init__.py +0 -0
  185. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/model/ultralytics/bbox.py +0 -0
  186. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/model/ultralytics/pose.py +0 -0
  187. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/model/ultralytics/segment.py +0 -0
  188. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/model/utils.py +0 -0
  189. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/node.py +0 -0
  190. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/nodes_fetcher.py +0 -0
  191. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/nodes_thread_pool.py +0 -0
  192. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/progress.py +0 -0
  193. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/py.typed +0 -0
  194. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/query/__init__.py +0 -0
  195. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/query/batch.py +0 -0
  196. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/query/dispatch.py +0 -0
  197. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/query/metrics.py +0 -0
  198. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/query/params.py +0 -0
  199. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/query/queue.py +0 -0
  200. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/query/schema.py +0 -0
  201. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/query/session.py +0 -0
  202. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/query/udf.py +0 -0
  203. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/query/utils.py +0 -0
  204. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/remote/__init__.py +0 -0
  205. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/script_meta.py +0 -0
  206. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/__init__.py +0 -0
  207. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/default/__init__.py +0 -0
  208. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/default/base.py +0 -0
  209. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/functions/__init__.py +0 -0
  210. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/functions/aggregate.py +0 -0
  211. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/functions/array.py +0 -0
  212. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/functions/conditional.py +0 -0
  213. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/functions/numeric.py +0 -0
  214. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/functions/path.py +0 -0
  215. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/functions/random.py +0 -0
  216. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/functions/string.py +0 -0
  217. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/selectable.py +0 -0
  218. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/sqlite/__init__.py +0 -0
  219. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/sqlite/base.py +0 -0
  220. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/sqlite/types.py +0 -0
  221. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/sqlite/vector.py +0 -0
  222. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/types.py +0 -0
  223. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/utils.py +0 -0
  224. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/telemetry.py +0 -0
  225. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/toolkit/__init__.py +0 -0
  226. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/toolkit/split.py +0 -0
  227. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/torch/__init__.py +0 -0
  228. {datachain-0.15.0 → datachain-0.16.0}/src/datachain/utils.py +0 -0
  229. {datachain-0.15.0 → datachain-0.16.0}/src/datachain.egg-info/SOURCES.txt +0 -0
  230. {datachain-0.15.0 → datachain-0.16.0}/src/datachain.egg-info/dependency_links.txt +0 -0
  231. {datachain-0.15.0 → datachain-0.16.0}/src/datachain.egg-info/entry_points.txt +0 -0
  232. {datachain-0.15.0 → datachain-0.16.0}/src/datachain.egg-info/requires.txt +0 -0
  233. {datachain-0.15.0 → datachain-0.16.0}/src/datachain.egg-info/top_level.txt +0 -0
  234. {datachain-0.15.0 → datachain-0.16.0}/tests/__init__.py +0 -0
  235. {datachain-0.15.0 → datachain-0.16.0}/tests/benchmarks/__init__.py +0 -0
  236. {datachain-0.15.0 → datachain-0.16.0}/tests/benchmarks/conftest.py +0 -0
  237. {datachain-0.15.0 → datachain-0.16.0}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  238. {datachain-0.15.0 → datachain-0.16.0}/tests/benchmarks/datasets/.dvc/config +0 -0
  239. {datachain-0.15.0 → datachain-0.16.0}/tests/benchmarks/datasets/.gitignore +0 -0
  240. {datachain-0.15.0 → datachain-0.16.0}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  241. {datachain-0.15.0 → datachain-0.16.0}/tests/benchmarks/test_datachain.py +0 -0
  242. {datachain-0.15.0 → datachain-0.16.0}/tests/benchmarks/test_ls.py +0 -0
  243. {datachain-0.15.0 → datachain-0.16.0}/tests/benchmarks/test_version.py +0 -0
  244. {datachain-0.15.0 → datachain-0.16.0}/tests/data.py +0 -0
  245. {datachain-0.15.0 → datachain-0.16.0}/tests/examples/__init__.py +0 -0
  246. {datachain-0.15.0 → datachain-0.16.0}/tests/examples/test_examples.py +0 -0
  247. {datachain-0.15.0 → datachain-0.16.0}/tests/examples/test_wds_e2e.py +0 -0
  248. {datachain-0.15.0 → datachain-0.16.0}/tests/examples/wds_data.py +0 -0
  249. {datachain-0.15.0 → datachain-0.16.0}/tests/func/__init__.py +0 -0
  250. {datachain-0.15.0 → datachain-0.16.0}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  251. {datachain-0.15.0 → datachain-0.16.0}/tests/func/data/lena.jpg +0 -0
  252. {datachain-0.15.0 → datachain-0.16.0}/tests/func/fake-service-account-credentials.json +0 -0
  253. {datachain-0.15.0 → datachain-0.16.0}/tests/func/model/__init__.py +0 -0
  254. {datachain-0.15.0 → datachain-0.16.0}/tests/func/model/data/running-mask0.png +0 -0
  255. {datachain-0.15.0 → datachain-0.16.0}/tests/func/model/data/running-mask1.png +0 -0
  256. {datachain-0.15.0 → datachain-0.16.0}/tests/func/model/data/running.jpg +0 -0
  257. {datachain-0.15.0 → datachain-0.16.0}/tests/func/model/data/ships.jpg +0 -0
  258. {datachain-0.15.0 → datachain-0.16.0}/tests/func/model/test_yolo.py +0 -0
  259. {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_catalog.py +0 -0
  260. {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_client.py +0 -0
  261. {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_cloud_transfer.py +0 -0
  262. {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_data_storage.py +0 -0
  263. {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_datachain_merge.py +0 -0
  264. {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_dataset_query.py +0 -0
  265. {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_feature_pickling.py +0 -0
  266. {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_file.py +0 -0
  267. {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_hf.py +0 -0
  268. {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_hidden_field.py +0 -0
  269. {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_image.py +0 -0
  270. {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_listing.py +0 -0
  271. {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_ls.py +0 -0
  272. {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_meta_formats.py +0 -0
  273. {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_metrics.py +0 -0
  274. {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_pytorch.py +0 -0
  275. {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_query.py +0 -0
  276. {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_read_database.py +0 -0
  277. {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_session.py +0 -0
  278. {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_toolkit.py +0 -0
  279. {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_video.py +0 -0
  280. {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_warehouse.py +0 -0
  281. {datachain-0.15.0 → datachain-0.16.0}/tests/scripts/feature_class.py +0 -0
  282. {datachain-0.15.0 → datachain-0.16.0}/tests/scripts/feature_class_exception.py +0 -0
  283. {datachain-0.15.0 → datachain-0.16.0}/tests/scripts/feature_class_parallel.py +0 -0
  284. {datachain-0.15.0 → datachain-0.16.0}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  285. {datachain-0.15.0 → datachain-0.16.0}/tests/scripts/name_len_slow.py +0 -0
  286. {datachain-0.15.0 → datachain-0.16.0}/tests/test_atomicity.py +0 -0
  287. {datachain-0.15.0 → datachain-0.16.0}/tests/test_cli_e2e.py +0 -0
  288. {datachain-0.15.0 → datachain-0.16.0}/tests/test_import_time.py +0 -0
  289. {datachain-0.15.0 → datachain-0.16.0}/tests/test_query_e2e.py +0 -0
  290. {datachain-0.15.0 → datachain-0.16.0}/tests/test_telemetry.py +0 -0
  291. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/__init__.py +0 -0
  292. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/__init__.py +0 -0
  293. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/conftest.py +0 -0
  294. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_arrow.py +0 -0
  295. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_clip.py +0 -0
  296. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  297. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_datachain_merge.py +0 -0
  298. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_diff.py +0 -0
  299. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_feature.py +0 -0
  300. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_feature_utils.py +0 -0
  301. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_file.py +0 -0
  302. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_hf.py +0 -0
  303. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_image.py +0 -0
  304. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_listing_info.py +0 -0
  305. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_python_to_sql.py +0 -0
  306. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_schema.py +0 -0
  307. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_signal_schema.py +0 -0
  308. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_sql_to_python.py +0 -0
  309. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_text.py +0 -0
  310. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_udf_signature.py +0 -0
  311. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_utils.py +0 -0
  312. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_webdataset.py +0 -0
  313. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/model/__init__.py +0 -0
  314. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/model/test_bbox.py +0 -0
  315. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/model/test_pose.py +0 -0
  316. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/model/test_segment.py +0 -0
  317. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/model/test_utils.py +0 -0
  318. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/sql/__init__.py +0 -0
  319. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/sql/sqlite/__init__.py +0 -0
  320. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/sql/sqlite/test_types.py +0 -0
  321. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/sql/sqlite/test_utils.py +0 -0
  322. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/sql/test_array.py +0 -0
  323. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/sql/test_conditional.py +0 -0
  324. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/sql/test_path.py +0 -0
  325. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/sql/test_random.py +0 -0
  326. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/sql/test_selectable.py +0 -0
  327. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/sql/test_string.py +0 -0
  328. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_asyn.py +0 -0
  329. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_cache.py +0 -0
  330. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_catalog.py +0 -0
  331. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_catalog_loader.py +0 -0
  332. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_cli_parsing.py +0 -0
  333. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_client.py +0 -0
  334. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_client_gcs.py +0 -0
  335. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_client_s3.py +0 -0
  336. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_config.py +0 -0
  337. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_data_storage.py +0 -0
  338. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_database_engine.py +0 -0
  339. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_dataset.py +0 -0
  340. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_dispatch.py +0 -0
  341. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_fileslice.py +0 -0
  342. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_func.py +0 -0
  343. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_listing.py +0 -0
  344. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_metastore.py +0 -0
  345. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_module_exports.py +0 -0
  346. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_pytorch.py +0 -0
  347. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_query.py +0 -0
  348. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_query_metrics.py +0 -0
  349. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_query_params.py +0 -0
  350. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_script_meta.py +0 -0
  351. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_serializer.py +0 -0
  352. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_session.py +0 -0
  353. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_utils.py +0 -0
  354. {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_warehouse.py +0 -0
  355. {datachain-0.15.0 → datachain-0.16.0}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.15.0
3
+ Version: 0.16.0
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -776,7 +776,7 @@ class Catalog:
776
776
  listing: Optional[bool] = False,
777
777
  uuid: Optional[str] = None,
778
778
  description: Optional[str] = None,
779
- labels: Optional[list[str]] = None,
779
+ attrs: Optional[list[str]] = None,
780
780
  ) -> "DatasetRecord":
781
781
  """
782
782
  Creates new dataset of a specific version.
@@ -794,16 +794,16 @@ class Catalog:
794
794
  dataset = self.get_dataset(name)
795
795
  default_version = dataset.next_version
796
796
 
797
- if (description or labels) and (
798
- dataset.description != description or dataset.labels != labels
797
+ if (description or attrs) and (
798
+ dataset.description != description or dataset.attrs != attrs
799
799
  ):
800
800
  description = description or dataset.description
801
- labels = labels or dataset.labels
801
+ attrs = attrs or dataset.attrs
802
802
 
803
803
  self.update_dataset(
804
804
  dataset,
805
805
  description=description,
806
- labels=labels,
806
+ attrs=attrs,
807
807
  )
808
808
 
809
809
  except DatasetNotFoundError:
@@ -817,7 +817,7 @@ class Catalog:
817
817
  schema=schema,
818
818
  ignore_if_exists=True,
819
819
  description=description,
820
- labels=labels,
820
+ attrs=attrs,
821
821
  )
822
822
 
823
823
  version = version or default_version
@@ -1334,15 +1334,15 @@ class Catalog:
1334
1334
  name: str,
1335
1335
  new_name: Optional[str] = None,
1336
1336
  description: Optional[str] = None,
1337
- labels: Optional[list[str]] = None,
1337
+ attrs: Optional[list[str]] = None,
1338
1338
  ) -> DatasetRecord:
1339
1339
  update_data = {}
1340
1340
  if new_name:
1341
1341
  update_data["name"] = new_name
1342
1342
  if description is not None:
1343
1343
  update_data["description"] = description
1344
- if labels is not None:
1345
- update_data["labels"] = labels # type: ignore[assignment]
1344
+ if attrs is not None:
1345
+ update_data["attrs"] = attrs # type: ignore[assignment]
1346
1346
 
1347
1347
  dataset = self.get_dataset(name)
1348
1348
  return self.update_dataset(dataset, **update_data)
@@ -149,7 +149,7 @@ def handle_dataset_command(args, catalog):
149
149
  args.name,
150
150
  new_name=args.new_name,
151
151
  description=args.description,
152
- labels=args.labels,
152
+ attrs=args.attrs,
153
153
  studio=args.studio,
154
154
  local=args.local,
155
155
  all=args.all,
@@ -154,7 +154,7 @@ def edit_dataset(
154
154
  name: str,
155
155
  new_name: Optional[str] = None,
156
156
  description: Optional[str] = None,
157
- labels: Optional[list[str]] = None,
157
+ attrs: Optional[list[str]] = None,
158
158
  studio: bool = False,
159
159
  local: bool = False,
160
160
  all: bool = True,
@@ -167,9 +167,9 @@ def edit_dataset(
167
167
 
168
168
  if all or local:
169
169
  try:
170
- catalog.edit_dataset(name, new_name, description, labels)
170
+ catalog.edit_dataset(name, new_name, description, attrs)
171
171
  except DatasetNotFoundError:
172
172
  print("Dataset not found in local", file=sys.stderr)
173
173
 
174
174
  if (all or studio) and token:
175
- edit_studio_dataset(team, name, new_name, description, labels)
175
+ edit_studio_dataset(team, name, new_name, description, attrs)
@@ -42,8 +42,8 @@ def show(
42
42
  print("Name: ", name)
43
43
  if dataset.description:
44
44
  print("Description: ", dataset.description)
45
- if dataset.labels:
46
- print("Labels: ", ",".join(dataset.labels))
45
+ if dataset.attrs:
46
+ print("Attributes: ", ",".join(dataset.attrs))
47
47
  print("\n")
48
48
 
49
49
  show_records(records, collapse_columns=not no_collapse, hidden_fields=hidden_fields)
@@ -217,9 +217,9 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
217
217
  help="Dataset description",
218
218
  )
219
219
  parse_edit_dataset.add_argument(
220
- "--labels",
220
+ "--attrs",
221
221
  nargs="+",
222
- help="Dataset labels",
222
+ help="Dataset attributes",
223
223
  )
224
224
  parse_edit_dataset.add_argument(
225
225
  "--studio",
@@ -120,7 +120,7 @@ class AbstractMetastore(ABC, Serializable):
120
120
  schema: Optional[dict[str, Any]] = None,
121
121
  ignore_if_exists: bool = False,
122
122
  description: Optional[str] = None,
123
- labels: Optional[list[str]] = None,
123
+ attrs: Optional[list[str]] = None,
124
124
  ) -> DatasetRecord:
125
125
  """Creates new dataset."""
126
126
 
@@ -326,7 +326,7 @@ class AbstractDBMetastore(AbstractMetastore):
326
326
  Column("id", Integer, primary_key=True),
327
327
  Column("name", Text, nullable=False),
328
328
  Column("description", Text),
329
- Column("labels", JSON, nullable=True),
329
+ Column("attrs", JSON, nullable=True),
330
330
  Column("status", Integer, nullable=False),
331
331
  Column("feature_schema", JSON, nullable=True),
332
332
  Column("created_at", DateTime(timezone=True)),
@@ -521,7 +521,7 @@ class AbstractDBMetastore(AbstractMetastore):
521
521
  schema: Optional[dict[str, Any]] = None,
522
522
  ignore_if_exists: bool = False,
523
523
  description: Optional[str] = None,
524
- labels: Optional[list[str]] = None,
524
+ attrs: Optional[list[str]] = None,
525
525
  **kwargs, # TODO registered = True / False
526
526
  ) -> DatasetRecord:
527
527
  """Creates new dataset."""
@@ -538,7 +538,7 @@ class AbstractDBMetastore(AbstractMetastore):
538
538
  query_script=query_script,
539
539
  schema=json.dumps(schema or {}),
540
540
  description=description,
541
- labels=json.dumps(labels or []),
541
+ attrs=json.dumps(attrs or []),
542
542
  )
543
543
  if ignore_if_exists and hasattr(query, "on_conflict_do_nothing"):
544
544
  # SQLite and PostgreSQL both support 'on_conflict_do_nothing',
@@ -621,7 +621,7 @@ class AbstractDBMetastore(AbstractMetastore):
621
621
  dataset_values = {}
622
622
  for field, value in kwargs.items():
623
623
  if field in self._dataset_fields[1:]:
624
- if field in ["labels", "schema"]:
624
+ if field in ["attrs", "schema"]:
625
625
  values[field] = json.dumps(value) if value else None
626
626
  else:
627
627
  values[field] = value
@@ -329,7 +329,7 @@ class DatasetRecord:
329
329
  id: int
330
330
  name: str
331
331
  description: Optional[str]
332
- labels: list[str]
332
+ attrs: list[str]
333
333
  schema: dict[str, Union[SQLType, type[SQLType]]]
334
334
  feature_schema: dict
335
335
  versions: list[DatasetVersion]
@@ -357,7 +357,7 @@ class DatasetRecord:
357
357
  id: int,
358
358
  name: str,
359
359
  description: Optional[str],
360
- labels: str,
360
+ attrs: str,
361
361
  status: int,
362
362
  feature_schema: Optional[str],
363
363
  created_at: datetime,
@@ -387,7 +387,7 @@ class DatasetRecord:
387
387
  version_schema: str,
388
388
  version_job_id: Optional[str] = None,
389
389
  ) -> "DatasetRecord":
390
- labels_lst: list[str] = json.loads(labels) if labels else []
390
+ attrs_lst: list[str] = json.loads(attrs) if attrs else []
391
391
  schema_dct: dict[str, Any] = json.loads(schema) if schema else {}
392
392
  version_schema_dct: dict[str, str] = (
393
393
  json.loads(version_schema) if version_schema else {}
@@ -418,7 +418,7 @@ class DatasetRecord:
418
418
  id,
419
419
  name,
420
420
  description,
421
- labels_lst,
421
+ attrs_lst,
422
422
  cls.parse_schema(schema_dct), # type: ignore[arg-type]
423
423
  json.loads(feature_schema) if feature_schema else {},
424
424
  [dataset_version],
@@ -562,7 +562,7 @@ class DatasetListRecord:
562
562
  id: int
563
563
  name: str
564
564
  description: Optional[str]
565
- labels: list[str]
565
+ attrs: list[str]
566
566
  versions: list[DatasetListVersion]
567
567
  created_at: Optional[datetime] = None
568
568
 
@@ -572,7 +572,7 @@ class DatasetListRecord:
572
572
  id: int,
573
573
  name: str,
574
574
  description: Optional[str],
575
- labels: str,
575
+ attrs: str,
576
576
  created_at: datetime,
577
577
  version_id: int,
578
578
  version_uuid: str,
@@ -588,7 +588,7 @@ class DatasetListRecord:
588
588
  version_query_script: Optional[str],
589
589
  version_job_id: Optional[str] = None,
590
590
  ) -> "DatasetListRecord":
591
- labels_lst: list[str] = json.loads(labels) if labels else []
591
+ attrs_lst: list[str] = json.loads(attrs) if attrs else []
592
592
 
593
593
  dataset_version = DatasetListVersion.parse(
594
594
  version_id,
@@ -610,7 +610,7 @@ class DatasetListRecord:
610
610
  id,
611
611
  name,
612
612
  description,
613
- labels_lst,
613
+ attrs_lst,
614
614
  [dataset_version],
615
615
  created_at,
616
616
  )
@@ -32,11 +32,28 @@ class DatasetInfo(DataModel):
32
32
  metrics: dict[str, Any] = Field(default={})
33
33
  error_message: str = Field(default="")
34
34
  error_stack: str = Field(default="")
35
+ attrs: list[str] = Field(default=[])
35
36
 
36
37
  @property
37
38
  def is_temp(self) -> bool:
38
39
  return Session.is_temp_dataset(self.name)
39
40
 
41
+ def has_attr(self, attr: str) -> bool:
42
+ s = attr.split("=")
43
+ if len(s) == 1:
44
+ return attr in self.attrs
45
+
46
+ name = s[0]
47
+ value = s[1]
48
+ for a in self.attrs:
49
+ s = a.split("=")
50
+ if value == "*" and s[0] == name:
51
+ return True
52
+ if len(s) == 2 and s[0] == name and s[1] == value:
53
+ return True
54
+
55
+ return False
56
+
40
57
  @staticmethod
41
58
  def _validate_dict(
42
59
  v: Optional[Union[str, dict]],
@@ -83,4 +100,5 @@ class DatasetInfo(DataModel):
83
100
  metrics=job.metrics if job else {},
84
101
  error_message=version.error_message,
85
102
  error_stack=version.error_stack,
103
+ attrs=dataset.attrs,
86
104
  )
@@ -459,7 +459,7 @@ class DataChain:
459
459
  name: str,
460
460
  version: Optional[int] = None,
461
461
  description: Optional[str] = None,
462
- labels: Optional[list[str]] = None,
462
+ attrs: Optional[list[str]] = None,
463
463
  **kwargs,
464
464
  ) -> "Self":
465
465
  """Save to a Dataset. It returns the chain itself.
@@ -468,7 +468,8 @@ class DataChain:
468
468
  name : dataset name.
469
469
  version : version of a dataset. Default - the last version that exist.
470
470
  description : description of a dataset.
471
- labels : labels of a dataset.
471
+ attrs : attributes of a dataset. They can be without value, e.g "NLP",
472
+ or with a value, e.g "location=US".
472
473
  """
473
474
  schema = self.signals_schema.clone_without_sys_signals().serialize()
474
475
  return self._evolve(
@@ -476,7 +477,7 @@ class DataChain:
476
477
  name=name,
477
478
  version=version,
478
479
  description=description,
479
- labels=labels,
480
+ attrs=attrs,
480
481
  feature_schema=schema,
481
482
  **kwargs,
482
483
  )
@@ -102,6 +102,7 @@ def datasets(
102
102
  column: Optional[str] = None,
103
103
  include_listing: bool = False,
104
104
  studio: bool = False,
105
+ attrs: Optional[list[str]] = None,
105
106
  ) -> "DataChain":
106
107
  """Generate chain with list of registered datasets.
107
108
 
@@ -114,6 +115,10 @@ def datasets(
114
115
  include_listing: If True, includes listing datasets. Defaults to False.
115
116
  studio: If True, returns datasets from Studio only,
116
117
  otherwise returns all local datasets. Defaults to False.
118
+ attrs: Optional list of attributes to filter datasets on. It can be just
119
+ attribute without value e.g "NLP", or attribute with value
120
+ e.g "location=US". Attribute with value can also accept "*" to target
121
+ all that have specific name e.g "location=*"
117
122
 
118
123
  Returns:
119
124
  DataChain: A new DataChain instance containing dataset information.
@@ -139,6 +144,10 @@ def datasets(
139
144
  ]
140
145
  datasets_values = [d for d in datasets_values if not d.is_temp]
141
146
 
147
+ if attrs:
148
+ for attr in attrs:
149
+ datasets_values = [d for d in datasets_values if d.has_attr(attr)]
150
+
142
151
  if not column:
143
152
  # flattening dataset fields
144
153
  schema = {
@@ -474,8 +474,9 @@ class Generator(UDFBase):
474
474
  remove_prefetched=bool(self.prefetch) and not cache,
475
475
  )
476
476
  with closing(prepared_inputs):
477
- for row in processed_cb.wrap(prepared_inputs):
477
+ for row in prepared_inputs:
478
478
  yield _process_row(row)
479
+ processed_cb.relative_update(1)
479
480
 
480
481
  self.teardown()
481
482
 
@@ -1680,7 +1680,7 @@ class DatasetQuery:
1680
1680
  version: Optional[int] = None,
1681
1681
  feature_schema: Optional[dict] = None,
1682
1682
  description: Optional[str] = None,
1683
- labels: Optional[list[str]] = None,
1683
+ attrs: Optional[list[str]] = None,
1684
1684
  **kwargs,
1685
1685
  ) -> "Self":
1686
1686
  """Save the query as a dataset."""
@@ -1714,7 +1714,7 @@ class DatasetQuery:
1714
1714
  feature_schema=feature_schema,
1715
1715
  columns=columns,
1716
1716
  description=description,
1717
- labels=labels,
1717
+ attrs=attrs,
1718
1718
  **kwargs,
1719
1719
  )
1720
1720
  version = version or dataset.latest_version
@@ -290,13 +290,13 @@ class StudioClient:
290
290
  name: str,
291
291
  new_name: Optional[str] = None,
292
292
  description: Optional[str] = None,
293
- labels: Optional[list[str]] = None,
293
+ attrs: Optional[list[str]] = None,
294
294
  ) -> Response[DatasetInfoData]:
295
295
  body = {
296
296
  "new_name": new_name,
297
297
  "dataset_name": name,
298
298
  "description": description,
299
- "labels": labels,
299
+ "attrs": attrs,
300
300
  }
301
301
 
302
302
  return self._send_request(
@@ -187,10 +187,10 @@ def edit_studio_dataset(
187
187
  name: str,
188
188
  new_name: Optional[str] = None,
189
189
  description: Optional[str] = None,
190
- labels: Optional[list[str]] = None,
190
+ attrs: Optional[list[str]] = None,
191
191
  ):
192
192
  client = StudioClient(team=team_name)
193
- response = client.edit_dataset(name, new_name, description, labels)
193
+ response = client.edit_dataset(name, new_name, description, attrs)
194
194
  if not response.ok:
195
195
  raise DataChainError(response.message)
196
196
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.15.0
3
+ Version: 0.16.0
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -550,7 +550,7 @@ def animal_dataset(listed_bucket, cloud_test_catalog):
550
550
  src_uri = cloud_test_catalog.src_uri
551
551
  dataset = catalog.create_dataset_from_sources(name, [src_uri], recursive=True)
552
552
  return catalog.update_dataset(
553
- dataset, {"description": "animal dataset", "labels": ["cats", "dogs"]}
553
+ dataset, {"description": "animal dataset", "attrs": ["cats", "dogs"]}
554
554
  )
555
555
 
556
556
 
@@ -563,7 +563,7 @@ def dogs_dataset(listed_bucket, cloud_test_catalog):
563
563
  name, [f"{src_uri}/dogs/*"], recursive=True
564
564
  )
565
565
  return catalog.update_dataset(
566
- dataset, {"description": "dogs dataset", "labels": ["dogs", "dataset"]}
566
+ dataset, {"description": "dogs dataset", "attrs": ["dogs", "dataset"]}
567
567
  )
568
568
 
569
569
 
@@ -576,7 +576,7 @@ def cats_dataset(listed_bucket, cloud_test_catalog):
576
576
  name, [f"{src_uri}/cats/*"], recursive=True
577
577
  )
578
578
  return catalog.update_dataset(
579
- dataset, {"description": "cats dataset", "labels": ["cats", "dataset"]}
579
+ dataset, {"description": "cats dataset", "attrs": ["cats", "dataset"]}
580
580
  )
581
581
 
582
582
 
@@ -586,7 +586,7 @@ def dataset_record():
586
586
  id=1,
587
587
  name=f"ds_{uuid.uuid4().hex}",
588
588
  description="",
589
- labels=[],
589
+ attrs=[],
590
590
  versions=[],
591
591
  status=1,
592
592
  schema={},
@@ -651,7 +651,7 @@ def studio_datasets(requests_mock, studio_token):
651
651
  "id": 1,
652
652
  "name": "dogs",
653
653
  "description": "dogs dataset",
654
- "labels": ["dogs", "dataset"],
654
+ "attrs": ["dogs", "dataset"],
655
655
  "versions": [
656
656
  {
657
657
  "version": 1,
@@ -676,7 +676,7 @@ def studio_datasets(requests_mock, studio_token):
676
676
  "id": 2,
677
677
  "name": "cats",
678
678
  "description": "cats dataset",
679
- "labels": ["cats", "dataset"],
679
+ "attrs": ["cats", "dataset"],
680
680
  "versions": [
681
681
  {
682
682
  "version": 1,
@@ -691,7 +691,7 @@ def studio_datasets(requests_mock, studio_token):
691
691
  "id": 3,
692
692
  "name": "both",
693
693
  "description": "both dataset",
694
- "labels": ["both", "dataset"],
694
+ "attrs": ["both", "dataset"],
695
695
  "versions": [
696
696
  {
697
697
  "version": 1,
@@ -560,23 +560,23 @@ def test_save(test_session):
560
560
  name="new_name",
561
561
  version=1,
562
562
  description="new description",
563
- labels=["new_label", "old_label"],
563
+ attrs=["new_label", "old_label"],
564
564
  )
565
565
 
566
566
  ds = test_session.catalog.get_dataset("new_name")
567
567
  assert ds.name == "new_name"
568
568
  assert ds.description == "new description"
569
- assert ds.labels == ["new_label", "old_label"]
569
+ assert ds.attrs == ["new_label", "old_label"]
570
570
 
571
571
  chain.save(
572
572
  name="new_name",
573
573
  description="updated description",
574
- labels=["new_label", "old_label", "new_label2"],
574
+ attrs=["new_label", "old_label", "new_label2"],
575
575
  )
576
576
  ds = test_session.catalog.get_dataset("new_name")
577
577
  assert ds.name == "new_name"
578
578
  assert ds.description == "updated description"
579
- assert ds.labels == ["new_label", "old_label", "new_label2"]
579
+ assert ds.attrs == ["new_label", "old_label", "new_label2"]
580
580
 
581
581
 
582
582
  def test_show_nested_empty(capsys, test_session):
@@ -170,7 +170,7 @@ def test_create_dataset_from_sources(listed_bucket, cloud_test_catalog):
170
170
  assert dataset.name == dataset_name
171
171
  assert dataset.description is None
172
172
  assert dataset.versions_values == [1]
173
- assert dataset.labels == []
173
+ assert dataset.attrs == []
174
174
  assert dataset.status == DatasetStatus.COMPLETE
175
175
 
176
176
  assert dataset_version.status == DatasetStatus.COMPLETE
@@ -207,7 +207,7 @@ def test_create_dataset_from_sources_dataset(cloud_test_catalog, dogs_dataset):
207
207
  assert dataset.name == dataset_name
208
208
  assert dataset.description is None
209
209
  assert dataset.versions_values == [1]
210
- assert dataset.labels == []
210
+ assert dataset.attrs == []
211
211
  assert dataset.status == DatasetStatus.COMPLETE
212
212
 
213
213
  assert dataset_version.status == DatasetStatus.COMPLETE
@@ -546,14 +546,14 @@ def test_edit_dataset(cloud_test_catalog, dogs_dataset):
546
546
  dogs_dataset.name,
547
547
  new_name=dataset_new_name,
548
548
  description="new description",
549
- labels=["cats", "birds"],
549
+ attrs=["cats", "birds"],
550
550
  )
551
551
 
552
552
  dataset = catalog.get_dataset(dataset_new_name)
553
553
  assert dataset.versions_values == [1]
554
554
  assert dataset.name == dataset_new_name
555
555
  assert dataset.description == "new description"
556
- assert dataset.labels == ["cats", "birds"]
556
+ assert dataset.attrs == ["cats", "birds"]
557
557
 
558
558
  # check if dataset tables are renamed correctly
559
559
  old_dataset_table_name = catalog.warehouse.dataset_table_name(dataset_old_name, 1)
@@ -589,7 +589,7 @@ def test_edit_dataset_same_name(cloud_test_catalog, dogs_dataset):
589
589
  )
590
590
 
591
591
 
592
- def test_edit_dataset_remove_labels_and_description(cloud_test_catalog, dogs_dataset):
592
+ def test_edit_dataset_remove_attrs_and_description(cloud_test_catalog, dogs_dataset):
593
593
  dataset_new_name = uuid.uuid4().hex
594
594
  catalog = cloud_test_catalog.catalog
595
595
 
@@ -597,14 +597,14 @@ def test_edit_dataset_remove_labels_and_description(cloud_test_catalog, dogs_dat
597
597
  dogs_dataset.name,
598
598
  new_name=dataset_new_name,
599
599
  description="",
600
- labels=[],
600
+ attrs=[],
601
601
  )
602
602
 
603
603
  dataset = catalog.get_dataset(dataset_new_name)
604
604
  assert dataset.versions_values == [1]
605
605
  assert dataset.name == dataset_new_name
606
606
  assert dataset.description == ""
607
- assert dataset.labels == []
607
+ assert dataset.attrs == []
608
608
 
609
609
 
610
610
  def test_ls_dataset_rows(cloud_test_catalog, dogs_dataset):
@@ -118,7 +118,7 @@ def remote_dataset(remote_dataset_version, schema):
118
118
  "id": 1,
119
119
  "name": "dogs",
120
120
  "description": "",
121
- "labels": [],
121
+ "attrs": [],
122
122
  "schema": schema,
123
123
  "status": 4,
124
124
  "feature_schema": {},
@@ -247,7 +247,7 @@ def test_studio_edit_dataset(capsys, mocker):
247
247
  "new_name": "new-name",
248
248
  "team_name": "team_name",
249
249
  "description": None,
250
- "labels": None,
250
+ "attrs": None,
251
251
  }
252
252
 
253
253
  # With all arguments
@@ -261,8 +261,8 @@ def test_studio_edit_dataset(capsys, mocker):
261
261
  "new-name",
262
262
  "--description",
263
263
  "description",
264
- "--labels",
265
- "label1",
264
+ "--attrs",
265
+ "attr1",
266
266
  "--team",
267
267
  "team_name",
268
268
  "--studio",
@@ -275,7 +275,7 @@ def test_studio_edit_dataset(capsys, mocker):
275
275
  "dataset_name": "name",
276
276
  "new_name": "new-name",
277
277
  "description": "description",
278
- "labels": ["label1"],
278
+ "attrs": ["attr1"],
279
279
  "team_name": "team_name",
280
280
  }
281
281
 
@@ -356,6 +356,41 @@ def test_datasets_in_memory():
356
356
  assert datasets[0].num_objects == 6
357
357
 
358
358
 
359
+ @pytest.mark.parametrize(
360
+ "attrs,result",
361
+ [
362
+ (["number"], ["evens", "primes"]),
363
+ (["num=prime"], ["primes"]),
364
+ (["num=even"], ["evens"]),
365
+ (["num=*"], ["evens", "primes"]),
366
+ (["num=*", "small"], ["primes"]),
367
+ (["letter"], ["letters"]),
368
+ (["missing"], []),
369
+ (["num=*", "missing"], []),
370
+ (None, ["evens", "letters", "primes"]),
371
+ ([], ["evens", "letters", "primes"]),
372
+ ],
373
+ )
374
+ def test_datasets_filtering(test_session, attrs, result):
375
+ ds = dc.datasets(column="dataset", session=test_session)
376
+ datasets = [d for d in ds.collect("dataset") if d.name == "fibonacci"]
377
+ assert len(datasets) == 0
378
+
379
+ dc.read_values(num=[1, 2, 3], session=test_session).save(
380
+ "primes", attrs=["number", "num=prime", "small"]
381
+ )
382
+
383
+ dc.read_values(num=[2, 4, 6], session=test_session).save(
384
+ "evens", attrs=["number", "num=even"]
385
+ )
386
+
387
+ dc.read_values(letter=["a", "b", "c"], session=test_session).save(
388
+ "letters", attrs=["letter"]
389
+ )
390
+
391
+ assert sorted(dc.datasets(attrs=attrs).collect("name")) == sorted(result)
392
+
393
+
359
394
  def test_listings(test_session, tmp_dir):
360
395
  df = pd.DataFrame(DF_DATA)
361
396
  df.to_parquet(tmp_dir / "df.parquet")
File without changes
File without changes