datachain 0.16.5__tar.gz → 0.17.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (369) hide show
  1. {datachain-0.16.5/src/datachain.egg-info → datachain-0.17.1}/PKG-INFO +2 -2
  2. datachain-0.17.1/docs/commands/job/ls.md +84 -0
  3. {datachain-0.16.5 → datachain-0.17.1}/docs/examples.md +1 -1
  4. {datachain-0.16.5 → datachain-0.17.1}/mkdocs.yml +1 -0
  5. {datachain-0.16.5 → datachain-0.17.1}/noxfile.py +1 -0
  6. {datachain-0.16.5 → datachain-0.17.1}/pyproject.toml +1 -1
  7. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/catalog/catalog.py +20 -91
  8. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/cli/commands/datasets.py +1 -1
  9. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/cli/commands/show.py +1 -1
  10. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/cli/parser/__init__.py +2 -2
  11. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/cli/parser/job.py +30 -0
  12. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/data_storage/metastore.py +23 -23
  13. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/data_storage/sqlite.py +8 -7
  14. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/data_storage/warehouse.py +12 -12
  15. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/dataset.py +88 -45
  16. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/dataset_info.py +2 -1
  17. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/dc/datachain.py +8 -3
  18. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/dc/datasets.py +28 -7
  19. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/dc/storage.py +10 -2
  20. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/dc/values.py +2 -0
  21. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/pytorch.py +2 -2
  22. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/listing.py +1 -1
  23. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/query/dataset.py +9 -9
  24. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/query/dispatch.py +8 -6
  25. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/query/session.py +2 -2
  26. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/remote/studio.py +16 -5
  27. datachain-0.17.1/src/datachain/semver.py +58 -0
  28. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/studio.py +34 -3
  29. {datachain-0.16.5 → datachain-0.17.1/src/datachain.egg-info}/PKG-INFO +2 -2
  30. {datachain-0.16.5 → datachain-0.17.1}/src/datachain.egg-info/SOURCES.txt +3 -0
  31. {datachain-0.16.5 → datachain-0.17.1}/src/datachain.egg-info/requires.txt +1 -1
  32. {datachain-0.16.5 → datachain-0.17.1}/tests/conftest.py +4 -4
  33. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_catalog.py +8 -8
  34. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_datachain.py +13 -13
  35. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_dataset_query.py +82 -74
  36. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_datasets.py +59 -247
  37. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_hidden_field.py +1 -1
  38. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_ls.py +1 -1
  39. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_pull.py +18 -18
  40. {datachain-0.16.5 → datachain-0.17.1}/tests/test_cli_e2e.py +4 -4
  41. {datachain-0.16.5 → datachain-0.17.1}/tests/test_cli_studio.py +26 -22
  42. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/lib/test_datachain.py +82 -19
  43. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_dataset.py +2 -2
  44. datachain-0.17.1/tests/unit/test_semver.py +71 -0
  45. {datachain-0.16.5 → datachain-0.17.1}/tests/utils.py +1 -1
  46. {datachain-0.16.5 → datachain-0.17.1}/.cruft.json +0 -0
  47. {datachain-0.16.5 → datachain-0.17.1}/.gitattributes +0 -0
  48. {datachain-0.16.5 → datachain-0.17.1}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  49. {datachain-0.16.5 → datachain-0.17.1}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  50. {datachain-0.16.5 → datachain-0.17.1}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  51. {datachain-0.16.5 → datachain-0.17.1}/.github/codecov.yaml +0 -0
  52. {datachain-0.16.5 → datachain-0.17.1}/.github/dependabot.yml +0 -0
  53. {datachain-0.16.5 → datachain-0.17.1}/.github/workflows/benchmarks.yml +0 -0
  54. {datachain-0.16.5 → datachain-0.17.1}/.github/workflows/release.yml +0 -0
  55. {datachain-0.16.5 → datachain-0.17.1}/.github/workflows/tests-studio.yml +0 -0
  56. {datachain-0.16.5 → datachain-0.17.1}/.github/workflows/tests.yml +0 -0
  57. {datachain-0.16.5 → datachain-0.17.1}/.github/workflows/update-template.yaml +0 -0
  58. {datachain-0.16.5 → datachain-0.17.1}/.gitignore +0 -0
  59. {datachain-0.16.5 → datachain-0.17.1}/.pre-commit-config.yaml +0 -0
  60. {datachain-0.16.5 → datachain-0.17.1}/CODE_OF_CONDUCT.rst +0 -0
  61. {datachain-0.16.5 → datachain-0.17.1}/LICENSE +0 -0
  62. {datachain-0.16.5 → datachain-0.17.1}/README.rst +0 -0
  63. {datachain-0.16.5 → datachain-0.17.1}/docs/assets/captioned_cartoons.png +0 -0
  64. {datachain-0.16.5 → datachain-0.17.1}/docs/assets/datachain-white.svg +0 -0
  65. {datachain-0.16.5 → datachain-0.17.1}/docs/assets/datachain.svg +0 -0
  66. {datachain-0.16.5 → datachain-0.17.1}/docs/commands/auth/login.md +0 -0
  67. {datachain-0.16.5 → datachain-0.17.1}/docs/commands/auth/logout.md +0 -0
  68. {datachain-0.16.5 → datachain-0.17.1}/docs/commands/auth/team.md +0 -0
  69. {datachain-0.16.5 → datachain-0.17.1}/docs/commands/auth/token.md +0 -0
  70. {datachain-0.16.5 → datachain-0.17.1}/docs/commands/index.md +0 -0
  71. {datachain-0.16.5 → datachain-0.17.1}/docs/commands/job/cancel.md +0 -0
  72. {datachain-0.16.5 → datachain-0.17.1}/docs/commands/job/logs.md +0 -0
  73. {datachain-0.16.5 → datachain-0.17.1}/docs/commands/job/run.md +0 -0
  74. {datachain-0.16.5 → datachain-0.17.1}/docs/contributing.md +0 -0
  75. {datachain-0.16.5 → datachain-0.17.1}/docs/css/github-permalink-style.css +0 -0
  76. {datachain-0.16.5 → datachain-0.17.1}/docs/index.md +0 -0
  77. {datachain-0.16.5 → datachain-0.17.1}/docs/overrides/main.html +0 -0
  78. {datachain-0.16.5 → datachain-0.17.1}/docs/quick-start.md +0 -0
  79. {datachain-0.16.5 → datachain-0.17.1}/docs/references/data-types/arrowrow.md +0 -0
  80. {datachain-0.16.5 → datachain-0.17.1}/docs/references/data-types/bbox.md +0 -0
  81. {datachain-0.16.5 → datachain-0.17.1}/docs/references/data-types/file.md +0 -0
  82. {datachain-0.16.5 → datachain-0.17.1}/docs/references/data-types/imagefile.md +0 -0
  83. {datachain-0.16.5 → datachain-0.17.1}/docs/references/data-types/index.md +0 -0
  84. {datachain-0.16.5 → datachain-0.17.1}/docs/references/data-types/pose.md +0 -0
  85. {datachain-0.16.5 → datachain-0.17.1}/docs/references/data-types/segment.md +0 -0
  86. {datachain-0.16.5 → datachain-0.17.1}/docs/references/data-types/tarvfile.md +0 -0
  87. {datachain-0.16.5 → datachain-0.17.1}/docs/references/data-types/textfile.md +0 -0
  88. {datachain-0.16.5 → datachain-0.17.1}/docs/references/data-types/videofile.md +0 -0
  89. {datachain-0.16.5 → datachain-0.17.1}/docs/references/datachain.md +0 -0
  90. {datachain-0.16.5 → datachain-0.17.1}/docs/references/func.md +0 -0
  91. {datachain-0.16.5 → datachain-0.17.1}/docs/references/index.md +0 -0
  92. {datachain-0.16.5 → datachain-0.17.1}/docs/references/remotes.md +0 -0
  93. {datachain-0.16.5 → datachain-0.17.1}/docs/references/toolkit.md +0 -0
  94. {datachain-0.16.5 → datachain-0.17.1}/docs/references/torch.md +0 -0
  95. {datachain-0.16.5 → datachain-0.17.1}/docs/references/udf.md +0 -0
  96. {datachain-0.16.5 → datachain-0.17.1}/docs/tutorials.md +0 -0
  97. {datachain-0.16.5 → datachain-0.17.1}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  98. {datachain-0.16.5 → datachain-0.17.1}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  99. {datachain-0.16.5 → datachain-0.17.1}/examples/computer_vision/openimage-detect.py +0 -0
  100. {datachain-0.16.5 → datachain-0.17.1}/examples/computer_vision/ultralytics-bbox.py +0 -0
  101. {datachain-0.16.5 → datachain-0.17.1}/examples/computer_vision/ultralytics-pose.py +0 -0
  102. {datachain-0.16.5 → datachain-0.17.1}/examples/computer_vision/ultralytics-segment.py +0 -0
  103. {datachain-0.16.5 → datachain-0.17.1}/examples/get_started/common_sql_functions.py +0 -0
  104. {datachain-0.16.5 → datachain-0.17.1}/examples/get_started/json-csv-reader.py +0 -0
  105. {datachain-0.16.5 → datachain-0.17.1}/examples/get_started/torch-loader.py +0 -0
  106. {datachain-0.16.5 → datachain-0.17.1}/examples/get_started/udfs/parallel.py +0 -0
  107. {datachain-0.16.5 → datachain-0.17.1}/examples/get_started/udfs/simple.py +0 -0
  108. {datachain-0.16.5 → datachain-0.17.1}/examples/get_started/udfs/stateful.py +0 -0
  109. {datachain-0.16.5 → datachain-0.17.1}/examples/llm_and_nlp/claude-query.py +0 -0
  110. {datachain-0.16.5 → datachain-0.17.1}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  111. {datachain-0.16.5 → datachain-0.17.1}/examples/multimodal/clip_inference.py +0 -0
  112. {datachain-0.16.5 → datachain-0.17.1}/examples/multimodal/hf_pipeline.py +0 -0
  113. {datachain-0.16.5 → datachain-0.17.1}/examples/multimodal/openai_image_desc_lib.py +0 -0
  114. {datachain-0.16.5 → datachain-0.17.1}/examples/multimodal/wds.py +0 -0
  115. {datachain-0.16.5 → datachain-0.17.1}/examples/multimodal/wds_filtered.py +0 -0
  116. {datachain-0.16.5 → datachain-0.17.1}/setup.cfg +0 -0
  117. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/__init__.py +0 -0
  118. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/__main__.py +0 -0
  119. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/asyn.py +0 -0
  120. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/cache.py +0 -0
  121. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/catalog/__init__.py +0 -0
  122. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/catalog/datasource.py +0 -0
  123. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/catalog/loader.py +0 -0
  124. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/cli/__init__.py +0 -0
  125. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/cli/commands/__init__.py +0 -0
  126. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/cli/commands/du.py +0 -0
  127. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/cli/commands/index.py +0 -0
  128. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/cli/commands/ls.py +0 -0
  129. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/cli/commands/misc.py +0 -0
  130. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/cli/commands/query.py +0 -0
  131. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/cli/parser/studio.py +0 -0
  132. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/cli/parser/utils.py +0 -0
  133. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/cli/utils.py +0 -0
  134. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/client/__init__.py +0 -0
  135. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/client/azure.py +0 -0
  136. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/client/fileslice.py +0 -0
  137. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/client/fsspec.py +0 -0
  138. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/client/gcs.py +0 -0
  139. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/client/hf.py +0 -0
  140. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/client/local.py +0 -0
  141. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/client/s3.py +0 -0
  142. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/config.py +0 -0
  143. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/data_storage/__init__.py +0 -0
  144. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/data_storage/db_engine.py +0 -0
  145. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/data_storage/job.py +0 -0
  146. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/data_storage/schema.py +0 -0
  147. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/data_storage/serializer.py +0 -0
  148. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/diff/__init__.py +0 -0
  149. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/error.py +0 -0
  150. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/fs/__init__.py +0 -0
  151. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/fs/reference.py +0 -0
  152. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/fs/utils.py +0 -0
  153. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/func/__init__.py +0 -0
  154. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/func/aggregate.py +0 -0
  155. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/func/array.py +0 -0
  156. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/func/base.py +0 -0
  157. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/func/conditional.py +0 -0
  158. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/func/func.py +0 -0
  159. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/func/numeric.py +0 -0
  160. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/func/path.py +0 -0
  161. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/func/random.py +0 -0
  162. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/func/string.py +0 -0
  163. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/func/window.py +0 -0
  164. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/job.py +0 -0
  165. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/__init__.py +0 -0
  166. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/arrow.py +0 -0
  167. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/clip.py +0 -0
  168. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/convert/__init__.py +0 -0
  169. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/convert/flatten.py +0 -0
  170. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/convert/python_to_sql.py +0 -0
  171. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/convert/sql_to_python.py +0 -0
  172. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/convert/unflatten.py +0 -0
  173. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  174. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/data_model.py +0 -0
  175. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/dc/__init__.py +0 -0
  176. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/dc/csv.py +0 -0
  177. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/dc/database.py +0 -0
  178. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/dc/hf.py +0 -0
  179. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/dc/json.py +0 -0
  180. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/dc/listings.py +0 -0
  181. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/dc/pandas.py +0 -0
  182. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/dc/parquet.py +0 -0
  183. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/dc/records.py +0 -0
  184. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/dc/utils.py +0 -0
  185. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/file.py +0 -0
  186. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/hf.py +0 -0
  187. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/image.py +0 -0
  188. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/listing.py +0 -0
  189. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/listing_info.py +0 -0
  190. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/meta_formats.py +0 -0
  191. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/model_store.py +0 -0
  192. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/settings.py +0 -0
  193. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/signal_schema.py +0 -0
  194. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/tar.py +0 -0
  195. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/text.py +0 -0
  196. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/udf.py +0 -0
  197. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/udf_signature.py +0 -0
  198. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/utils.py +0 -0
  199. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/video.py +0 -0
  200. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/webdataset.py +0 -0
  201. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/lib/webdataset_laion.py +0 -0
  202. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/model/__init__.py +0 -0
  203. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/model/bbox.py +0 -0
  204. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/model/pose.py +0 -0
  205. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/model/segment.py +0 -0
  206. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/model/ultralytics/__init__.py +0 -0
  207. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/model/ultralytics/bbox.py +0 -0
  208. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/model/ultralytics/pose.py +0 -0
  209. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/model/ultralytics/segment.py +0 -0
  210. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/model/utils.py +0 -0
  211. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/node.py +0 -0
  212. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/nodes_fetcher.py +0 -0
  213. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/nodes_thread_pool.py +0 -0
  214. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/progress.py +0 -0
  215. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/py.typed +0 -0
  216. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/query/__init__.py +0 -0
  217. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/query/batch.py +0 -0
  218. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/query/metrics.py +0 -0
  219. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/query/params.py +0 -0
  220. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/query/queue.py +0 -0
  221. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/query/schema.py +0 -0
  222. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/query/udf.py +0 -0
  223. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/query/utils.py +0 -0
  224. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/remote/__init__.py +0 -0
  225. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/script_meta.py +0 -0
  226. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/sql/__init__.py +0 -0
  227. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/sql/default/__init__.py +0 -0
  228. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/sql/default/base.py +0 -0
  229. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/sql/functions/__init__.py +0 -0
  230. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/sql/functions/aggregate.py +0 -0
  231. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/sql/functions/array.py +0 -0
  232. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/sql/functions/conditional.py +0 -0
  233. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/sql/functions/numeric.py +0 -0
  234. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/sql/functions/path.py +0 -0
  235. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/sql/functions/random.py +0 -0
  236. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/sql/functions/string.py +0 -0
  237. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/sql/selectable.py +0 -0
  238. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/sql/sqlite/__init__.py +0 -0
  239. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/sql/sqlite/base.py +0 -0
  240. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/sql/sqlite/types.py +0 -0
  241. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/sql/sqlite/vector.py +0 -0
  242. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/sql/types.py +0 -0
  243. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/sql/utils.py +0 -0
  244. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/telemetry.py +0 -0
  245. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/toolkit/__init__.py +0 -0
  246. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/toolkit/split.py +0 -0
  247. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/torch/__init__.py +0 -0
  248. {datachain-0.16.5 → datachain-0.17.1}/src/datachain/utils.py +0 -0
  249. {datachain-0.16.5 → datachain-0.17.1}/src/datachain.egg-info/dependency_links.txt +0 -0
  250. {datachain-0.16.5 → datachain-0.17.1}/src/datachain.egg-info/entry_points.txt +0 -0
  251. {datachain-0.16.5 → datachain-0.17.1}/src/datachain.egg-info/top_level.txt +0 -0
  252. {datachain-0.16.5 → datachain-0.17.1}/tests/__init__.py +0 -0
  253. {datachain-0.16.5 → datachain-0.17.1}/tests/benchmarks/__init__.py +0 -0
  254. {datachain-0.16.5 → datachain-0.17.1}/tests/benchmarks/conftest.py +0 -0
  255. {datachain-0.16.5 → datachain-0.17.1}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  256. {datachain-0.16.5 → datachain-0.17.1}/tests/benchmarks/datasets/.dvc/config +0 -0
  257. {datachain-0.16.5 → datachain-0.17.1}/tests/benchmarks/datasets/.gitignore +0 -0
  258. {datachain-0.16.5 → datachain-0.17.1}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  259. {datachain-0.16.5 → datachain-0.17.1}/tests/benchmarks/test_datachain.py +0 -0
  260. {datachain-0.16.5 → datachain-0.17.1}/tests/benchmarks/test_ls.py +0 -0
  261. {datachain-0.16.5 → datachain-0.17.1}/tests/benchmarks/test_version.py +0 -0
  262. {datachain-0.16.5 → datachain-0.17.1}/tests/data.py +0 -0
  263. {datachain-0.16.5 → datachain-0.17.1}/tests/examples/__init__.py +0 -0
  264. {datachain-0.16.5 → datachain-0.17.1}/tests/examples/test_examples.py +0 -0
  265. {datachain-0.16.5 → datachain-0.17.1}/tests/examples/test_wds_e2e.py +0 -0
  266. {datachain-0.16.5 → datachain-0.17.1}/tests/examples/wds_data.py +0 -0
  267. {datachain-0.16.5 → datachain-0.17.1}/tests/func/__init__.py +0 -0
  268. {datachain-0.16.5 → datachain-0.17.1}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  269. {datachain-0.16.5 → datachain-0.17.1}/tests/func/data/lena.jpg +0 -0
  270. {datachain-0.16.5 → datachain-0.17.1}/tests/func/fake-service-account-credentials.json +0 -0
  271. {datachain-0.16.5 → datachain-0.17.1}/tests/func/model/__init__.py +0 -0
  272. {datachain-0.16.5 → datachain-0.17.1}/tests/func/model/data/running-mask0.png +0 -0
  273. {datachain-0.16.5 → datachain-0.17.1}/tests/func/model/data/running-mask1.png +0 -0
  274. {datachain-0.16.5 → datachain-0.17.1}/tests/func/model/data/running.jpg +0 -0
  275. {datachain-0.16.5 → datachain-0.17.1}/tests/func/model/data/ships.jpg +0 -0
  276. {datachain-0.16.5 → datachain-0.17.1}/tests/func/model/test_yolo.py +0 -0
  277. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_batching.py +0 -0
  278. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_client.py +0 -0
  279. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_cloud_transfer.py +0 -0
  280. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_data_storage.py +0 -0
  281. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_datachain_merge.py +0 -0
  282. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_feature_pickling.py +0 -0
  283. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_file.py +0 -0
  284. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_func.py +0 -0
  285. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_hf.py +0 -0
  286. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_image.py +0 -0
  287. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_listing.py +0 -0
  288. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_meta_formats.py +0 -0
  289. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_metrics.py +0 -0
  290. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_pytorch.py +0 -0
  291. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_query.py +0 -0
  292. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_read_database.py +0 -0
  293. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_session.py +0 -0
  294. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_toolkit.py +0 -0
  295. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_video.py +0 -0
  296. {datachain-0.16.5 → datachain-0.17.1}/tests/func/test_warehouse.py +0 -0
  297. {datachain-0.16.5 → datachain-0.17.1}/tests/scripts/feature_class.py +0 -0
  298. {datachain-0.16.5 → datachain-0.17.1}/tests/scripts/feature_class_exception.py +0 -0
  299. {datachain-0.16.5 → datachain-0.17.1}/tests/scripts/feature_class_parallel.py +0 -0
  300. {datachain-0.16.5 → datachain-0.17.1}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  301. {datachain-0.16.5 → datachain-0.17.1}/tests/scripts/name_len_slow.py +0 -0
  302. {datachain-0.16.5 → datachain-0.17.1}/tests/test_atomicity.py +0 -0
  303. {datachain-0.16.5 → datachain-0.17.1}/tests/test_import_time.py +0 -0
  304. {datachain-0.16.5 → datachain-0.17.1}/tests/test_query_e2e.py +0 -0
  305. {datachain-0.16.5 → datachain-0.17.1}/tests/test_telemetry.py +0 -0
  306. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/__init__.py +0 -0
  307. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/lib/__init__.py +0 -0
  308. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/lib/conftest.py +0 -0
  309. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/lib/test_arrow.py +0 -0
  310. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/lib/test_clip.py +0 -0
  311. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  312. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/lib/test_datachain_merge.py +0 -0
  313. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/lib/test_diff.py +0 -0
  314. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/lib/test_feature.py +0 -0
  315. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/lib/test_feature_utils.py +0 -0
  316. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/lib/test_file.py +0 -0
  317. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/lib/test_hf.py +0 -0
  318. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/lib/test_image.py +0 -0
  319. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/lib/test_listing_info.py +0 -0
  320. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/lib/test_python_to_sql.py +0 -0
  321. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/lib/test_schema.py +0 -0
  322. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/lib/test_signal_schema.py +0 -0
  323. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/lib/test_sql_to_python.py +0 -0
  324. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/lib/test_text.py +0 -0
  325. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/lib/test_udf.py +0 -0
  326. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/lib/test_udf_signature.py +0 -0
  327. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/lib/test_utils.py +0 -0
  328. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/lib/test_webdataset.py +0 -0
  329. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/model/__init__.py +0 -0
  330. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/model/test_bbox.py +0 -0
  331. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/model/test_pose.py +0 -0
  332. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/model/test_segment.py +0 -0
  333. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/model/test_utils.py +0 -0
  334. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/sql/__init__.py +0 -0
  335. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/sql/sqlite/__init__.py +0 -0
  336. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/sql/sqlite/test_types.py +0 -0
  337. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/sql/sqlite/test_utils.py +0 -0
  338. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/sql/test_array.py +0 -0
  339. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/sql/test_conditional.py +0 -0
  340. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/sql/test_path.py +0 -0
  341. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/sql/test_random.py +0 -0
  342. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/sql/test_selectable.py +0 -0
  343. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/sql/test_string.py +0 -0
  344. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_asyn.py +0 -0
  345. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_cache.py +0 -0
  346. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_catalog.py +0 -0
  347. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_catalog_loader.py +0 -0
  348. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_cli_parsing.py +0 -0
  349. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_client.py +0 -0
  350. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_client_gcs.py +0 -0
  351. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_client_s3.py +0 -0
  352. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_config.py +0 -0
  353. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_data_storage.py +0 -0
  354. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_database_engine.py +0 -0
  355. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_dispatch.py +0 -0
  356. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_fileslice.py +0 -0
  357. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_func.py +0 -0
  358. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_listing.py +0 -0
  359. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_metastore.py +0 -0
  360. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_module_exports.py +0 -0
  361. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_pytorch.py +0 -0
  362. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_query.py +0 -0
  363. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_query_metrics.py +0 -0
  364. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_query_params.py +0 -0
  365. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_script_meta.py +0 -0
  366. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_serializer.py +0 -0
  367. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_session.py +0 -0
  368. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_utils.py +0 -0
  369. {datachain-0.16.5 → datachain-0.17.1}/tests/unit/test_warehouse.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.16.5
3
+ Version: 0.17.1
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -44,7 +44,7 @@ Requires-Dist: datamodel-code-generator>=0.25
44
44
  Requires-Dist: Pillow<12,>=10.0.0
45
45
  Requires-Dist: msgpack<2,>=1.0.4
46
46
  Requires-Dist: psutil
47
- Requires-Dist: huggingface_hub
47
+ Requires-Dist: huggingface_hub<0.31
48
48
  Requires-Dist: iterative-telemetry>=0.0.10
49
49
  Requires-Dist: platformdirs
50
50
  Requires-Dist: dvc-studio-client<1,>=0.21
@@ -0,0 +1,84 @@
1
+ # job ls
2
+
3
+ List jobs in Studio.
4
+
5
+ ## Synopsis
6
+
7
+ ```usage
8
+ usage: datachain job ls [-h] [-v] [-q] [--status STATUS] [--team TEAM] [--limit LIMIT]
9
+ ```
10
+
11
+ ## Description
12
+
13
+ This command lists jobs in Studio. You can filter jobs by their status, specify a team, and limit the number of jobs returned. By default, it shows the 20 most recent jobs.
14
+
15
+
16
+ ## Options
17
+
18
+ * `--status STATUS` - Status to filter jobs by
19
+ * `--team TEAM` - Team to list jobs for (default: from config)
20
+ * `--limit LIMIT` - Limit the number of jobs returned (default: 20)
21
+ * `-h`, `--help` - Show the help message and exit
22
+ * `-v`, `--verbose` - Be verbose
23
+ * `-q`, `--quiet` - Be quiet
24
+
25
+ ## Status options
26
+
27
+ You will be able to filter the job with following status:
28
+
29
+ * `CREATED` - Job has been created but not yet scheduled
30
+ * `SCHEDULED` - Job is scheduled to run at a future time
31
+ * `QUEUED` - Job is in the queue waiting to be executed
32
+ * `INIT` - Job is initializing and preparing to run
33
+ * `RUNNING` - Job is currently executing
34
+ * `COMPLETE` - Job has finished successfully
35
+ * `FAILED` - Job has failed during execution
36
+ * `CANCELING_SCHEDULED` - A scheduled job is being canceled
37
+ * `CANCELING` - A running job is being canceled
38
+ * `CANCELED` - Job has been canceled
39
+ * `ACTIVE` - Job is in active state.
40
+ * `INACTIVE` - Job is in inactive state.
41
+
42
+ Note: The following statuses are considered active jobs:
43
+
44
+ * `CREATED`
45
+ * `SCHEDULED`
46
+ * `QUEUED`
47
+ * `INIT`
48
+ * `RUNNING`
49
+ * `CANCELING_SCHEDULED`
50
+ * `CANCELING`
51
+
52
+
53
+ ## Examples
54
+
55
+ 1. List all jobs (default limit of 20):
56
+ ```bash
57
+ datachain job ls
58
+ ```
59
+
60
+ 2. List jobs for a specific team:
61
+ ```bash
62
+ datachain job ls --team my-team
63
+ ```
64
+
65
+ 3. List jobs with a specific status:
66
+ ```bash
67
+ datachain job ls --status complete
68
+ ```
69
+
70
+ 4. List more jobs by increasing the limit:
71
+ ```bash
72
+ datachain job ls --limit 50
73
+ ```
74
+
75
+ 5. List jobs with verbose output:
76
+ ```bash
77
+ datachain job ls -v
78
+ ```
79
+
80
+ ## Notes
81
+
82
+ * The default limit of 20 jobs helps manage the output size and performance
83
+ * Jobs are typically listed in reverse chronological order (newest first)
84
+ * Use the `--status` filter to find jobs in specific states (e.g., running, completed, failed)
@@ -167,7 +167,7 @@ dialog-rating@v2
167
167
  By default, when a saved dataset is loaded, the latest version is fetched but another version can be requested:
168
168
 
169
169
  ```python
170
- ds = dc.read_dataset("dialog-rating", version=1)
170
+ ds = dc.read_dataset("dialog-rating", version="1.0.0")
171
171
  ```
172
172
 
173
173
  ### Chain execution, optimization and parallelism
@@ -96,6 +96,7 @@ nav:
96
96
  - run: commands/job/run.md
97
97
  - logs: commands/job/logs.md
98
98
  - cancel: commands/job/cancel.md
99
+ - ls: commands/job/ls.md
99
100
  - 📡 Interacting with remote storage: references/remotes.md
100
101
  - 🤝 Contributing: contributing.md
101
102
 
@@ -90,6 +90,7 @@ def build(session: nox.Session) -> None:
90
90
  @nox.session(python=python_versions)
91
91
  def examples(session: nox.Session) -> None:
92
92
  session.install(".[examples]")
93
+ session.run("uv", "pip", "list")
93
94
  session.run(
94
95
  "pytest",
95
96
  "--durations=0",
@@ -48,7 +48,7 @@ dependencies = [
48
48
  "Pillow>=10.0.0,<12",
49
49
  "msgpack>=1.0.4,<2",
50
50
  "psutil",
51
- "huggingface_hub",
51
+ "huggingface_hub<0.31", # fix for "Provider 'featherless-ai' not supported" error
52
52
  "iterative-telemetry>=0.0.10",
53
53
  "platformdirs",
54
54
  "dvc-studio-client>=0.21,<1",
@@ -33,6 +33,7 @@ from datachain.cache import Cache
33
33
  from datachain.client import Client
34
34
  from datachain.dataset import (
35
35
  DATASET_PREFIX,
36
+ DEFAULT_DATASET_VERSION,
36
37
  QUERY_DATASET_PREFIX,
37
38
  DatasetDependency,
38
39
  DatasetListRecord,
@@ -154,9 +155,9 @@ class DatasetRowsFetcher(NodesThreadPool):
154
155
  metastore: "AbstractMetastore",
155
156
  warehouse: "AbstractWarehouse",
156
157
  remote_ds_name: str,
157
- remote_ds_version: int,
158
+ remote_ds_version: str,
158
159
  local_ds_name: str,
159
- local_ds_version: int,
160
+ local_ds_version: str,
160
161
  schema: dict[str, Union[SQLType, type[SQLType]]],
161
162
  max_threads: int = PULL_DATASET_MAX_THREADS,
162
163
  progress_bar=None,
@@ -286,7 +287,7 @@ class NodeGroup:
286
287
  # (not including the bucket name or s3:// prefix)
287
288
  source_path: str = ""
288
289
  dataset_name: Optional[str] = None
289
- dataset_version: Optional[int] = None
290
+ dataset_version: Optional[str] = None
290
291
  instantiated_nodes: Optional[list[NodeWithPath]] = None
291
292
 
292
293
  @property
@@ -607,7 +608,7 @@ class Catalog:
607
608
  return lst, client, list_path
608
609
 
609
610
  def _remove_dataset_rows_and_warehouse_info(
610
- self, dataset: DatasetRecord, version: int, **kwargs
611
+ self, dataset: DatasetRecord, version: str, **kwargs
611
612
  ):
612
613
  self.warehouse.drop_dataset_rows_table(dataset, version)
613
614
  self.update_dataset_version_with_warehouse_info(
@@ -767,7 +768,7 @@ class Catalog:
767
768
  def create_dataset(
768
769
  self,
769
770
  name: str,
770
- version: Optional[int] = None,
771
+ version: Optional[str] = None,
771
772
  *,
772
773
  columns: Sequence[Column],
773
774
  feature_schema: Optional[dict] = None,
@@ -783,18 +784,17 @@ class Catalog:
783
784
  Creates new dataset of a specific version.
784
785
  If dataset is not yet created, it will create it with version 1
785
786
  If version is None, then next unused version is created.
786
- If version is given, then it must be an unused version number.
787
+ If version is given, then it must be an unused version.
787
788
  """
788
789
  assert [c.name for c in columns if c.name != "sys__id"], f"got {columns=}"
789
790
  if not listing and Client.is_data_source_uri(name):
790
791
  raise RuntimeError(
791
792
  "Cannot create dataset that starts with source prefix, e.g s3://"
792
793
  )
793
- default_version = 1
794
+ default_version = DEFAULT_DATASET_VERSION
794
795
  try:
795
796
  dataset = self.get_dataset(name)
796
- default_version = dataset.next_version
797
-
797
+ default_version = dataset.next_version_patch
798
798
  if (description or attrs) and (
799
799
  dataset.description != description or dataset.attrs != attrs
800
800
  ):
@@ -846,7 +846,7 @@ class Catalog:
846
846
  def create_new_dataset_version(
847
847
  self,
848
848
  dataset: DatasetRecord,
849
- version: int,
849
+ version: str,
850
850
  *,
851
851
  columns: Sequence[Column],
852
852
  sources="",
@@ -892,7 +892,7 @@ class Catalog:
892
892
  return dataset
893
893
 
894
894
  def update_dataset_version_with_warehouse_info(
895
- self, dataset: DatasetRecord, version: int, rows_dropped=False, **kwargs
895
+ self, dataset: DatasetRecord, version: str, rows_dropped=False, **kwargs
896
896
  ) -> None:
897
897
  from datachain.query.dataset import DatasetQuery
898
898
 
@@ -959,7 +959,7 @@ class Catalog:
959
959
  return dataset
960
960
 
961
961
  def remove_dataset_version(
962
- self, dataset: DatasetRecord, version: int, drop_rows: Optional[bool] = True
962
+ self, dataset: DatasetRecord, version: str, drop_rows: Optional[bool] = True
963
963
  ) -> None:
964
964
  """
965
965
  Deletes one single dataset version.
@@ -1037,82 +1037,11 @@ class Catalog:
1037
1037
 
1038
1038
  return self.get_dataset(name)
1039
1039
 
1040
- def register_dataset(
1041
- self,
1042
- dataset: DatasetRecord,
1043
- version: int,
1044
- target_dataset: DatasetRecord,
1045
- target_version: Optional[int] = None,
1046
- ) -> DatasetRecord:
1047
- """
1048
- Registers dataset version of one dataset as dataset version of another
1049
- one (it can be new version of existing one).
1050
- It also removes original dataset version
1051
- """
1052
- target_version = target_version or target_dataset.next_version
1053
-
1054
- if not target_dataset.is_valid_next_version(target_version):
1055
- raise DatasetInvalidVersionError(
1056
- f"Version {target_version} must be higher than the current latest one"
1057
- )
1058
-
1059
- dataset_version = dataset.get_version(version)
1060
- if not dataset_version:
1061
- raise DatasetVersionNotFoundError(
1062
- f"Dataset {dataset.name} does not have version {version}"
1063
- )
1064
-
1065
- if not dataset_version.is_final_status():
1066
- raise ValueError("Cannot register dataset version in non final status")
1067
-
1068
- # copy dataset version
1069
- target_dataset = self.metastore.create_dataset_version(
1070
- target_dataset,
1071
- target_version,
1072
- sources=dataset_version.sources,
1073
- status=dataset_version.status,
1074
- query_script=dataset_version.query_script,
1075
- error_message=dataset_version.error_message,
1076
- error_stack=dataset_version.error_stack,
1077
- script_output=dataset_version.script_output,
1078
- created_at=dataset_version.created_at,
1079
- finished_at=dataset_version.finished_at,
1080
- schema=dataset_version.serialized_schema,
1081
- num_objects=dataset_version.num_objects,
1082
- size=dataset_version.size,
1083
- preview=dataset_version.preview,
1084
- job_id=dataset_version.job_id,
1085
- )
1086
-
1087
- # to avoid re-creating rows table, we are just renaming it for a new version
1088
- # of target dataset
1089
- self.warehouse.rename_dataset_table(
1090
- dataset.name,
1091
- target_dataset.name,
1092
- old_version=version,
1093
- new_version=target_version,
1094
- )
1095
- self.metastore.update_dataset_dependency_source(
1096
- dataset,
1097
- version,
1098
- new_source_dataset=target_dataset,
1099
- new_source_dataset_version=target_version,
1100
- )
1101
-
1102
- if dataset.id == target_dataset.id:
1103
- # we are updating the same dataset so we need to refresh it to have newly
1104
- # added version in step before
1105
- dataset = self.get_dataset(dataset.name)
1106
-
1107
- self.remove_dataset_version(dataset, version, drop_rows=False)
1108
-
1109
- return self.get_dataset(target_dataset.name)
1110
-
1111
1040
  def get_dataset(self, name: str) -> DatasetRecord:
1112
1041
  return self.metastore.get_dataset(name)
1113
1042
 
1114
1043
  def get_dataset_with_remote_fallback(
1115
- self, name: str, version: Optional[int] = None
1044
+ self, name: str, version: Optional[str] = None
1116
1045
  ) -> DatasetRecord:
1117
1046
  try:
1118
1047
  ds = self.get_dataset(name)
@@ -1157,7 +1086,7 @@ class Catalog:
1157
1086
  return DatasetRecord.from_dict(dataset_info)
1158
1087
 
1159
1088
  def get_dataset_dependencies(
1160
- self, name: str, version: int, indirect=False
1089
+ self, name: str, version: str, indirect=False
1161
1090
  ) -> list[Optional[DatasetDependency]]:
1162
1091
  dataset = self.get_dataset(name)
1163
1092
 
@@ -1175,7 +1104,7 @@ class Catalog:
1175
1104
  if d.is_dataset:
1176
1105
  # only datasets can have dependencies
1177
1106
  d.dependencies = self.get_dataset_dependencies(
1178
- d.name, int(d.version), indirect=indirect
1107
+ d.name, d.version, indirect=indirect
1179
1108
  )
1180
1109
 
1181
1110
  return direct_dependencies
@@ -1244,7 +1173,7 @@ class Catalog:
1244
1173
  ]
1245
1174
 
1246
1175
  def ls_dataset_rows(
1247
- self, name: str, version: int, offset=None, limit=None
1176
+ self, name: str, version: str, offset=None, limit=None
1248
1177
  ) -> list[dict]:
1249
1178
  from datachain.query.dataset import DatasetQuery
1250
1179
 
@@ -1282,7 +1211,7 @@ class Catalog:
1282
1211
  self,
1283
1212
  bucket_uri: str,
1284
1213
  name: str,
1285
- version: int,
1214
+ version: str,
1286
1215
  client_config=None,
1287
1216
  ) -> list[str]:
1288
1217
  dataset = self.get_dataset(name)
@@ -1291,14 +1220,14 @@ class Catalog:
1291
1220
  bucket_uri, dataset, version, client_config
1292
1221
  )
1293
1222
 
1294
- def dataset_table_export_file_names(self, name: str, version: int) -> list[str]:
1223
+ def dataset_table_export_file_names(self, name: str, version: str) -> list[str]:
1295
1224
  dataset = self.get_dataset(name)
1296
1225
  return self.warehouse.dataset_table_export_file_names(dataset, version)
1297
1226
 
1298
1227
  def remove_dataset(
1299
1228
  self,
1300
1229
  name: str,
1301
- version: Optional[int] = None,
1230
+ version: Optional[str] = None,
1302
1231
  force: Optional[bool] = False,
1303
1232
  studio: Optional[bool] = False,
1304
1233
  ):
@@ -1372,7 +1301,7 @@ class Catalog:
1372
1301
  remote_ds_uri: str,
1373
1302
  output: Optional[str] = None,
1374
1303
  local_ds_name: Optional[str] = None,
1375
- local_ds_version: Optional[int] = None,
1304
+ local_ds_version: Optional[str] = None,
1376
1305
  cp: bool = False,
1377
1306
  force: bool = False,
1378
1307
  *,
@@ -127,7 +127,7 @@ def _datasets_tabulate_row(name, both, local_version, studio_version):
127
127
  def rm_dataset(
128
128
  catalog: "Catalog",
129
129
  name: str,
130
- version: Optional[int] = None,
130
+ version: Optional[str] = None,
131
131
  force: Optional[bool] = False,
132
132
  studio: bool = False,
133
133
  local: bool = False,
@@ -10,7 +10,7 @@ if TYPE_CHECKING:
10
10
  def show(
11
11
  catalog: "Catalog",
12
12
  name: str,
13
- version: Optional[int] = None,
13
+ version: Optional[str] = None,
14
14
  limit: int = 10,
15
15
  offset: int = 0,
16
16
  columns: Sequence[str] = (),
@@ -302,7 +302,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
302
302
  "--version",
303
303
  action="store",
304
304
  default=None,
305
- type=int,
305
+ type=str,
306
306
  help="Dataset version",
307
307
  )
308
308
  rm_dataset_parser.add_argument(
@@ -495,7 +495,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
495
495
  "--version",
496
496
  action="store",
497
497
  default=None,
498
- type=int,
498
+ type=str,
499
499
  help="Dataset version",
500
500
  )
501
501
  show_parser.add_argument("--schema", action="store_true", help="Show schema")
@@ -83,6 +83,36 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
83
83
  help="Python package requirements",
84
84
  )
85
85
 
86
+ studio_ls_help = "List jobs in Studio"
87
+ studio_ls_description = "List jobs in Studio."
88
+
89
+ studio_ls_parser = jobs_subparser.add_parser(
90
+ "ls",
91
+ parents=[parent_parser],
92
+ description=studio_ls_description,
93
+ help=studio_ls_help,
94
+ formatter_class=CustomHelpFormatter,
95
+ )
96
+
97
+ studio_ls_parser.add_argument(
98
+ "--status",
99
+ action="store",
100
+ help="Status to filter jobs by",
101
+ )
102
+
103
+ studio_ls_parser.add_argument(
104
+ "--team",
105
+ action="store",
106
+ default=None,
107
+ help="Team to list jobs for (default: from config)",
108
+ )
109
+ studio_ls_parser.add_argument(
110
+ "--limit",
111
+ type=int,
112
+ default=20,
113
+ help="Limit the number of jobs returned (default: 20)",
114
+ )
115
+
86
116
  studio_cancel_help = "Cancel a job in Studio"
87
117
  studio_cancel_description = "Cancel a running job in Studio."
88
118
 
@@ -128,7 +128,7 @@ class AbstractMetastore(ABC, Serializable):
128
128
  def create_dataset_version( # noqa: PLR0913
129
129
  self,
130
130
  dataset: DatasetRecord,
131
- version: int,
131
+ version: str,
132
132
  status: int,
133
133
  sources: str = "",
134
134
  feature_schema: Optional[dict] = None,
@@ -158,13 +158,13 @@ class AbstractMetastore(ABC, Serializable):
158
158
 
159
159
  @abstractmethod
160
160
  def update_dataset_version(
161
- self, dataset: DatasetRecord, version: int, **kwargs
161
+ self, dataset: DatasetRecord, version: str, **kwargs
162
162
  ) -> DatasetVersion:
163
163
  """Updates dataset version fields."""
164
164
 
165
165
  @abstractmethod
166
166
  def remove_dataset_version(
167
- self, dataset: DatasetRecord, version: int
167
+ self, dataset: DatasetRecord, version: str
168
168
  ) -> DatasetRecord:
169
169
  """
170
170
  Deletes one single dataset version.
@@ -188,7 +188,7 @@ class AbstractMetastore(ABC, Serializable):
188
188
  self,
189
189
  dataset: DatasetRecord,
190
190
  status: int,
191
- version: Optional[int] = None,
191
+ version: Optional[str] = None,
192
192
  error_message="",
193
193
  error_stack="",
194
194
  script_output="",
@@ -202,9 +202,9 @@ class AbstractMetastore(ABC, Serializable):
202
202
  def add_dataset_dependency(
203
203
  self,
204
204
  source_dataset_name: str,
205
- source_dataset_version: int,
205
+ source_dataset_version: str,
206
206
  dataset_name: str,
207
- dataset_version: int,
207
+ dataset_version: str,
208
208
  ) -> None:
209
209
  """Adds dataset dependency to dataset."""
210
210
 
@@ -212,21 +212,21 @@ class AbstractMetastore(ABC, Serializable):
212
212
  def update_dataset_dependency_source(
213
213
  self,
214
214
  source_dataset: DatasetRecord,
215
- source_dataset_version: int,
215
+ source_dataset_version: str,
216
216
  new_source_dataset: Optional[DatasetRecord] = None,
217
- new_source_dataset_version: Optional[int] = None,
217
+ new_source_dataset_version: Optional[str] = None,
218
218
  ) -> None:
219
219
  """Updates dataset dependency source."""
220
220
 
221
221
  @abstractmethod
222
222
  def get_direct_dataset_dependencies(
223
- self, dataset: DatasetRecord, version: int
223
+ self, dataset: DatasetRecord, version: str
224
224
  ) -> list[Optional[DatasetDependency]]:
225
225
  """Gets direct dataset dependencies."""
226
226
 
227
227
  @abstractmethod
228
228
  def remove_dataset_dependencies(
229
- self, dataset: DatasetRecord, version: Optional[int] = None
229
+ self, dataset: DatasetRecord, version: Optional[str] = None
230
230
  ) -> None:
231
231
  """
232
232
  When we remove dataset, we need to clean up it's dependencies as well.
@@ -234,7 +234,7 @@ class AbstractMetastore(ABC, Serializable):
234
234
 
235
235
  @abstractmethod
236
236
  def remove_dataset_dependants(
237
- self, dataset: DatasetRecord, version: Optional[int] = None
237
+ self, dataset: DatasetRecord, version: Optional[str] = None
238
238
  ) -> None:
239
239
  """
240
240
  When we remove dataset, we need to clear its references in other dataset
@@ -370,7 +370,7 @@ class AbstractDBMetastore(AbstractMetastore):
370
370
  ForeignKey(f"{cls.DATASET_TABLE}.id", ondelete="CASCADE"),
371
371
  nullable=False,
372
372
  ),
373
- Column("version", Integer, nullable=False),
373
+ Column("version", Text, nullable=False, default="1.0.0"),
374
374
  Column(
375
375
  "status",
376
376
  Integer,
@@ -554,7 +554,7 @@ class AbstractDBMetastore(AbstractMetastore):
554
554
  def create_dataset_version( # noqa: PLR0913
555
555
  self,
556
556
  dataset: DatasetRecord,
557
- version: int,
557
+ version: str,
558
558
  status: int,
559
559
  sources: str = "",
560
560
  feature_schema: Optional[dict] = None,
@@ -648,7 +648,7 @@ class AbstractDBMetastore(AbstractMetastore):
648
648
  return result_ds
649
649
 
650
650
  def update_dataset_version(
651
- self, dataset: DatasetRecord, version: int, conn=None, **kwargs
651
+ self, dataset: DatasetRecord, version: str, conn=None, **kwargs
652
652
  ) -> DatasetVersion:
653
653
  """Updates dataset fields."""
654
654
  dataset_version = dataset.get_version(version)
@@ -758,7 +758,7 @@ class AbstractDBMetastore(AbstractMetastore):
758
758
  return ds
759
759
 
760
760
  def remove_dataset_version(
761
- self, dataset: DatasetRecord, version: int
761
+ self, dataset: DatasetRecord, version: str
762
762
  ) -> DatasetRecord:
763
763
  """
764
764
  Deletes one single dataset version.
@@ -791,7 +791,7 @@ class AbstractDBMetastore(AbstractMetastore):
791
791
  self,
792
792
  dataset: DatasetRecord,
793
793
  status: int,
794
- version: Optional[int] = None,
794
+ version: Optional[str] = None,
795
795
  error_message="",
796
796
  error_stack="",
797
797
  script_output="",
@@ -825,9 +825,9 @@ class AbstractDBMetastore(AbstractMetastore):
825
825
  def add_dataset_dependency(
826
826
  self,
827
827
  source_dataset_name: str,
828
- source_dataset_version: int,
828
+ source_dataset_version: str,
829
829
  dataset_name: str,
830
- dataset_version: int,
830
+ dataset_version: str,
831
831
  ) -> None:
832
832
  """Adds dataset dependency to dataset."""
833
833
  source_dataset = self.get_dataset(source_dataset_name)
@@ -847,9 +847,9 @@ class AbstractDBMetastore(AbstractMetastore):
847
847
  def update_dataset_dependency_source(
848
848
  self,
849
849
  source_dataset: DatasetRecord,
850
- source_dataset_version: int,
850
+ source_dataset_version: str,
851
851
  new_source_dataset: Optional[DatasetRecord] = None,
852
- new_source_dataset_version: Optional[int] = None,
852
+ new_source_dataset_version: Optional[str] = None,
853
853
  ) -> None:
854
854
  dd = self._datasets_dependencies
855
855
 
@@ -880,7 +880,7 @@ class AbstractDBMetastore(AbstractMetastore):
880
880
  """
881
881
 
882
882
  def get_direct_dataset_dependencies(
883
- self, dataset: DatasetRecord, version: int
883
+ self, dataset: DatasetRecord, version: str
884
884
  ) -> list[Optional[DatasetDependency]]:
885
885
  d = self._datasets
886
886
  dd = self._datasets_dependencies
@@ -909,7 +909,7 @@ class AbstractDBMetastore(AbstractMetastore):
909
909
  return [self.dependency_class.parse(*r) for r in self.db.execute(query)]
910
910
 
911
911
  def remove_dataset_dependencies(
912
- self, dataset: DatasetRecord, version: Optional[int] = None
912
+ self, dataset: DatasetRecord, version: Optional[str] = None
913
913
  ) -> None:
914
914
  """
915
915
  When we remove dataset, we need to clean up it's dependencies as well
@@ -928,7 +928,7 @@ class AbstractDBMetastore(AbstractMetastore):
928
928
  self.db.execute(q)
929
929
 
930
930
  def remove_dataset_dependants(
931
- self, dataset: DatasetRecord, version: Optional[int] = None
931
+ self, dataset: DatasetRecord, version: Optional[str] = None
932
932
  ) -> None:
933
933
  """
934
934
  When we remove dataset, we need to clear its references in other dataset