datachain 0.19.1__tar.gz → 0.19.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (390) hide show
  1. {datachain-0.19.1 → datachain-0.19.2}/PKG-INFO +1 -1
  2. datachain-0.19.2/docs/commands/auth/team.md +42 -0
  3. {datachain-0.19.1 → datachain-0.19.2}/docs/commands/index.md +2 -0
  4. datachain-0.19.2/docs/commands/job/clusters.md +39 -0
  5. {datachain-0.19.1 → datachain-0.19.2}/docs/commands/job/run.md +10 -0
  6. {datachain-0.19.1 → datachain-0.19.2}/mkdocs.yml +1 -0
  7. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/cli/parser/job.py +25 -0
  8. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/cli/parser/studio.py +11 -4
  9. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/data_storage/schema.py +23 -1
  10. datachain-0.19.2/src/datachain/lib/dc/listings.py +154 -0
  11. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/remote/studio.py +6 -0
  12. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/studio.py +45 -2
  13. {datachain-0.19.1 → datachain-0.19.2}/src/datachain.egg-info/PKG-INFO +1 -1
  14. {datachain-0.19.1 → datachain-0.19.2}/src/datachain.egg-info/SOURCES.txt +1 -0
  15. {datachain-0.19.1 → datachain-0.19.2}/tests/test_cli_studio.py +6 -3
  16. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/lib/test_datachain.py +52 -1
  17. datachain-0.19.1/docs/commands/auth/team.md +0 -36
  18. datachain-0.19.1/src/datachain/lib/dc/listings.py +0 -43
  19. {datachain-0.19.1 → datachain-0.19.2}/.cruft.json +0 -0
  20. {datachain-0.19.1 → datachain-0.19.2}/.gitattributes +0 -0
  21. {datachain-0.19.1 → datachain-0.19.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  22. {datachain-0.19.1 → datachain-0.19.2}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  23. {datachain-0.19.1 → datachain-0.19.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  24. {datachain-0.19.1 → datachain-0.19.2}/.github/codecov.yaml +0 -0
  25. {datachain-0.19.1 → datachain-0.19.2}/.github/dependabot.yml +0 -0
  26. {datachain-0.19.1 → datachain-0.19.2}/.github/workflows/benchmarks.yml +0 -0
  27. {datachain-0.19.1 → datachain-0.19.2}/.github/workflows/release.yml +0 -0
  28. {datachain-0.19.1 → datachain-0.19.2}/.github/workflows/tests-studio.yml +0 -0
  29. {datachain-0.19.1 → datachain-0.19.2}/.github/workflows/tests.yml +0 -0
  30. {datachain-0.19.1 → datachain-0.19.2}/.github/workflows/update-template.yaml +0 -0
  31. {datachain-0.19.1 → datachain-0.19.2}/.gitignore +0 -0
  32. {datachain-0.19.1 → datachain-0.19.2}/.pre-commit-config.yaml +0 -0
  33. {datachain-0.19.1 → datachain-0.19.2}/CODE_OF_CONDUCT.rst +0 -0
  34. {datachain-0.19.1 → datachain-0.19.2}/LICENSE +0 -0
  35. {datachain-0.19.1 → datachain-0.19.2}/README.rst +0 -0
  36. {datachain-0.19.1 → datachain-0.19.2}/docs/assets/captioned_cartoons.png +0 -0
  37. {datachain-0.19.1 → datachain-0.19.2}/docs/assets/datachain-white.svg +0 -0
  38. {datachain-0.19.1 → datachain-0.19.2}/docs/assets/datachain.svg +0 -0
  39. {datachain-0.19.1 → datachain-0.19.2}/docs/commands/auth/login.md +0 -0
  40. {datachain-0.19.1 → datachain-0.19.2}/docs/commands/auth/logout.md +0 -0
  41. {datachain-0.19.1 → datachain-0.19.2}/docs/commands/auth/token.md +0 -0
  42. {datachain-0.19.1 → datachain-0.19.2}/docs/commands/job/cancel.md +0 -0
  43. {datachain-0.19.1 → datachain-0.19.2}/docs/commands/job/logs.md +0 -0
  44. {datachain-0.19.1 → datachain-0.19.2}/docs/commands/job/ls.md +0 -0
  45. {datachain-0.19.1 → datachain-0.19.2}/docs/contributing.md +0 -0
  46. {datachain-0.19.1 → datachain-0.19.2}/docs/css/github-permalink-style.css +0 -0
  47. {datachain-0.19.1 → datachain-0.19.2}/docs/examples.md +0 -0
  48. {datachain-0.19.1 → datachain-0.19.2}/docs/guide/delta.md +0 -0
  49. {datachain-0.19.1 → datachain-0.19.2}/docs/guide/index.md +0 -0
  50. {datachain-0.19.1 → datachain-0.19.2}/docs/guide/processing.md +0 -0
  51. {datachain-0.19.1 → datachain-0.19.2}/docs/guide/remotes.md +0 -0
  52. {datachain-0.19.1 → datachain-0.19.2}/docs/guide/retry.md +0 -0
  53. {datachain-0.19.1 → datachain-0.19.2}/docs/index.md +0 -0
  54. {datachain-0.19.1 → datachain-0.19.2}/docs/overrides/main.html +0 -0
  55. {datachain-0.19.1 → datachain-0.19.2}/docs/quick-start.md +0 -0
  56. {datachain-0.19.1 → datachain-0.19.2}/docs/references/data-types/arrowrow.md +0 -0
  57. {datachain-0.19.1 → datachain-0.19.2}/docs/references/data-types/bbox.md +0 -0
  58. {datachain-0.19.1 → datachain-0.19.2}/docs/references/data-types/file.md +0 -0
  59. {datachain-0.19.1 → datachain-0.19.2}/docs/references/data-types/imagefile.md +0 -0
  60. {datachain-0.19.1 → datachain-0.19.2}/docs/references/data-types/index.md +0 -0
  61. {datachain-0.19.1 → datachain-0.19.2}/docs/references/data-types/pose.md +0 -0
  62. {datachain-0.19.1 → datachain-0.19.2}/docs/references/data-types/segment.md +0 -0
  63. {datachain-0.19.1 → datachain-0.19.2}/docs/references/data-types/tarvfile.md +0 -0
  64. {datachain-0.19.1 → datachain-0.19.2}/docs/references/data-types/textfile.md +0 -0
  65. {datachain-0.19.1 → datachain-0.19.2}/docs/references/data-types/videofile.md +0 -0
  66. {datachain-0.19.1 → datachain-0.19.2}/docs/references/datachain.md +0 -0
  67. {datachain-0.19.1 → datachain-0.19.2}/docs/references/func.md +0 -0
  68. {datachain-0.19.1 → datachain-0.19.2}/docs/references/index.md +0 -0
  69. {datachain-0.19.1 → datachain-0.19.2}/docs/references/toolkit.md +0 -0
  70. {datachain-0.19.1 → datachain-0.19.2}/docs/references/torch.md +0 -0
  71. {datachain-0.19.1 → datachain-0.19.2}/docs/references/udf.md +0 -0
  72. {datachain-0.19.1 → datachain-0.19.2}/docs/tutorials.md +0 -0
  73. {datachain-0.19.1 → datachain-0.19.2}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  74. {datachain-0.19.1 → datachain-0.19.2}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  75. {datachain-0.19.1 → datachain-0.19.2}/examples/computer_vision/openimage-detect.py +0 -0
  76. {datachain-0.19.1 → datachain-0.19.2}/examples/computer_vision/ultralytics-bbox.py +0 -0
  77. {datachain-0.19.1 → datachain-0.19.2}/examples/computer_vision/ultralytics-pose.py +0 -0
  78. {datachain-0.19.1 → datachain-0.19.2}/examples/computer_vision/ultralytics-segment.py +0 -0
  79. {datachain-0.19.1 → datachain-0.19.2}/examples/get_started/common_sql_functions.py +0 -0
  80. {datachain-0.19.1 → datachain-0.19.2}/examples/get_started/json-csv-reader.py +0 -0
  81. {datachain-0.19.1 → datachain-0.19.2}/examples/get_started/torch-loader.py +0 -0
  82. {datachain-0.19.1 → datachain-0.19.2}/examples/get_started/udfs/parallel.py +0 -0
  83. {datachain-0.19.1 → datachain-0.19.2}/examples/get_started/udfs/simple.py +0 -0
  84. {datachain-0.19.1 → datachain-0.19.2}/examples/get_started/udfs/stateful.py +0 -0
  85. {datachain-0.19.1 → datachain-0.19.2}/examples/incremental_processing/delta.py +0 -0
  86. {datachain-0.19.1 → datachain-0.19.2}/examples/incremental_processing/retry.py +0 -0
  87. {datachain-0.19.1 → datachain-0.19.2}/examples/incremental_processing/utils.py +0 -0
  88. {datachain-0.19.1 → datachain-0.19.2}/examples/llm_and_nlp/claude-query.py +0 -0
  89. {datachain-0.19.1 → datachain-0.19.2}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  90. {datachain-0.19.1 → datachain-0.19.2}/examples/multimodal/clip_inference.py +0 -0
  91. {datachain-0.19.1 → datachain-0.19.2}/examples/multimodal/hf_pipeline.py +0 -0
  92. {datachain-0.19.1 → datachain-0.19.2}/examples/multimodal/openai_image_desc_lib.py +0 -0
  93. {datachain-0.19.1 → datachain-0.19.2}/examples/multimodal/wds.py +0 -0
  94. {datachain-0.19.1 → datachain-0.19.2}/examples/multimodal/wds_filtered.py +0 -0
  95. {datachain-0.19.1 → datachain-0.19.2}/noxfile.py +0 -0
  96. {datachain-0.19.1 → datachain-0.19.2}/pyproject.toml +0 -0
  97. {datachain-0.19.1 → datachain-0.19.2}/setup.cfg +0 -0
  98. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/__init__.py +0 -0
  99. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/__main__.py +0 -0
  100. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/asyn.py +0 -0
  101. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/cache.py +0 -0
  102. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/catalog/__init__.py +0 -0
  103. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/catalog/catalog.py +0 -0
  104. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/catalog/datasource.py +0 -0
  105. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/catalog/loader.py +0 -0
  106. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/cli/__init__.py +0 -0
  107. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/cli/commands/__init__.py +0 -0
  108. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/cli/commands/datasets.py +0 -0
  109. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/cli/commands/du.py +0 -0
  110. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/cli/commands/index.py +0 -0
  111. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/cli/commands/ls.py +0 -0
  112. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/cli/commands/misc.py +0 -0
  113. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/cli/commands/query.py +0 -0
  114. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/cli/commands/show.py +0 -0
  115. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/cli/parser/__init__.py +0 -0
  116. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/cli/parser/utils.py +0 -0
  117. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/cli/utils.py +0 -0
  118. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/client/__init__.py +0 -0
  119. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/client/azure.py +0 -0
  120. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/client/fileslice.py +0 -0
  121. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/client/fsspec.py +0 -0
  122. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/client/gcs.py +0 -0
  123. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/client/hf.py +0 -0
  124. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/client/local.py +0 -0
  125. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/client/s3.py +0 -0
  126. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/config.py +0 -0
  127. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/data_storage/__init__.py +0 -0
  128. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/data_storage/db_engine.py +0 -0
  129. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/data_storage/job.py +0 -0
  130. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/data_storage/metastore.py +0 -0
  131. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/data_storage/serializer.py +0 -0
  132. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/data_storage/sqlite.py +0 -0
  133. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/data_storage/warehouse.py +0 -0
  134. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/dataset.py +0 -0
  135. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/delta.py +0 -0
  136. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/diff/__init__.py +0 -0
  137. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/error.py +0 -0
  138. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/fs/__init__.py +0 -0
  139. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/fs/reference.py +0 -0
  140. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/fs/utils.py +0 -0
  141. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/func/__init__.py +0 -0
  142. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/func/aggregate.py +0 -0
  143. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/func/array.py +0 -0
  144. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/func/base.py +0 -0
  145. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/func/conditional.py +0 -0
  146. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/func/func.py +0 -0
  147. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/func/numeric.py +0 -0
  148. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/func/path.py +0 -0
  149. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/func/random.py +0 -0
  150. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/func/string.py +0 -0
  151. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/func/window.py +0 -0
  152. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/job.py +0 -0
  153. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/__init__.py +0 -0
  154. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/arrow.py +0 -0
  155. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/clip.py +0 -0
  156. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/convert/__init__.py +0 -0
  157. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/convert/flatten.py +0 -0
  158. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/convert/python_to_sql.py +0 -0
  159. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/convert/sql_to_python.py +0 -0
  160. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/convert/unflatten.py +0 -0
  161. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  162. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/data_model.py +0 -0
  163. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/dataset_info.py +0 -0
  164. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/dc/__init__.py +0 -0
  165. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/dc/csv.py +0 -0
  166. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/dc/database.py +0 -0
  167. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/dc/datachain.py +0 -0
  168. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/dc/datasets.py +0 -0
  169. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/dc/hf.py +0 -0
  170. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/dc/json.py +0 -0
  171. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/dc/pandas.py +0 -0
  172. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/dc/parquet.py +0 -0
  173. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/dc/records.py +0 -0
  174. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/dc/storage.py +0 -0
  175. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/dc/utils.py +0 -0
  176. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/dc/values.py +0 -0
  177. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/file.py +0 -0
  178. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/hf.py +0 -0
  179. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/image.py +0 -0
  180. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/listing.py +0 -0
  181. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/listing_info.py +0 -0
  182. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/meta_formats.py +0 -0
  183. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/model_store.py +0 -0
  184. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/pytorch.py +0 -0
  185. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/settings.py +0 -0
  186. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/signal_schema.py +0 -0
  187. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/tar.py +0 -0
  188. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/text.py +0 -0
  189. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/udf.py +0 -0
  190. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/udf_signature.py +0 -0
  191. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/utils.py +0 -0
  192. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/video.py +0 -0
  193. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/webdataset.py +0 -0
  194. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/lib/webdataset_laion.py +0 -0
  195. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/listing.py +0 -0
  196. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/model/__init__.py +0 -0
  197. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/model/bbox.py +0 -0
  198. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/model/pose.py +0 -0
  199. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/model/segment.py +0 -0
  200. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/model/ultralytics/__init__.py +0 -0
  201. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/model/ultralytics/bbox.py +0 -0
  202. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/model/ultralytics/pose.py +0 -0
  203. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/model/ultralytics/segment.py +0 -0
  204. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/model/utils.py +0 -0
  205. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/node.py +0 -0
  206. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/nodes_fetcher.py +0 -0
  207. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/nodes_thread_pool.py +0 -0
  208. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/progress.py +0 -0
  209. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/py.typed +0 -0
  210. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/query/__init__.py +0 -0
  211. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/query/batch.py +0 -0
  212. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/query/dataset.py +0 -0
  213. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/query/dispatch.py +0 -0
  214. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/query/metrics.py +0 -0
  215. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/query/params.py +0 -0
  216. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/query/queue.py +0 -0
  217. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/query/schema.py +0 -0
  218. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/query/session.py +0 -0
  219. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/query/udf.py +0 -0
  220. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/query/utils.py +0 -0
  221. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/remote/__init__.py +0 -0
  222. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/script_meta.py +0 -0
  223. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/semver.py +0 -0
  224. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/sql/__init__.py +0 -0
  225. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/sql/default/__init__.py +0 -0
  226. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/sql/default/base.py +0 -0
  227. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/sql/functions/__init__.py +0 -0
  228. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/sql/functions/aggregate.py +0 -0
  229. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/sql/functions/array.py +0 -0
  230. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/sql/functions/conditional.py +0 -0
  231. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/sql/functions/numeric.py +0 -0
  232. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/sql/functions/path.py +0 -0
  233. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/sql/functions/random.py +0 -0
  234. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/sql/functions/string.py +0 -0
  235. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/sql/selectable.py +0 -0
  236. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/sql/sqlite/__init__.py +0 -0
  237. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/sql/sqlite/base.py +0 -0
  238. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/sql/sqlite/types.py +0 -0
  239. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/sql/sqlite/vector.py +0 -0
  240. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/sql/types.py +0 -0
  241. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/sql/utils.py +0 -0
  242. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/telemetry.py +0 -0
  243. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/toolkit/__init__.py +0 -0
  244. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/toolkit/split.py +0 -0
  245. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/torch/__init__.py +0 -0
  246. {datachain-0.19.1 → datachain-0.19.2}/src/datachain/utils.py +0 -0
  247. {datachain-0.19.1 → datachain-0.19.2}/src/datachain.egg-info/dependency_links.txt +0 -0
  248. {datachain-0.19.1 → datachain-0.19.2}/src/datachain.egg-info/entry_points.txt +0 -0
  249. {datachain-0.19.1 → datachain-0.19.2}/src/datachain.egg-info/requires.txt +0 -0
  250. {datachain-0.19.1 → datachain-0.19.2}/src/datachain.egg-info/top_level.txt +0 -0
  251. {datachain-0.19.1 → datachain-0.19.2}/tests/__init__.py +0 -0
  252. {datachain-0.19.1 → datachain-0.19.2}/tests/benchmarks/__init__.py +0 -0
  253. {datachain-0.19.1 → datachain-0.19.2}/tests/benchmarks/conftest.py +0 -0
  254. {datachain-0.19.1 → datachain-0.19.2}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  255. {datachain-0.19.1 → datachain-0.19.2}/tests/benchmarks/datasets/.dvc/config +0 -0
  256. {datachain-0.19.1 → datachain-0.19.2}/tests/benchmarks/datasets/.gitignore +0 -0
  257. {datachain-0.19.1 → datachain-0.19.2}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  258. {datachain-0.19.1 → datachain-0.19.2}/tests/benchmarks/test_datachain.py +0 -0
  259. {datachain-0.19.1 → datachain-0.19.2}/tests/benchmarks/test_ls.py +0 -0
  260. {datachain-0.19.1 → datachain-0.19.2}/tests/benchmarks/test_version.py +0 -0
  261. {datachain-0.19.1 → datachain-0.19.2}/tests/conftest.py +0 -0
  262. {datachain-0.19.1 → datachain-0.19.2}/tests/data.py +0 -0
  263. {datachain-0.19.1 → datachain-0.19.2}/tests/examples/__init__.py +0 -0
  264. {datachain-0.19.1 → datachain-0.19.2}/tests/examples/test_examples.py +0 -0
  265. {datachain-0.19.1 → datachain-0.19.2}/tests/examples/test_wds_e2e.py +0 -0
  266. {datachain-0.19.1 → datachain-0.19.2}/tests/examples/wds_data.py +0 -0
  267. {datachain-0.19.1 → datachain-0.19.2}/tests/func/__init__.py +0 -0
  268. {datachain-0.19.1 → datachain-0.19.2}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  269. {datachain-0.19.1 → datachain-0.19.2}/tests/func/data/lena.jpg +0 -0
  270. {datachain-0.19.1 → datachain-0.19.2}/tests/func/fake-service-account-credentials.json +0 -0
  271. {datachain-0.19.1 → datachain-0.19.2}/tests/func/functions/__init__.py +0 -0
  272. {datachain-0.19.1 → datachain-0.19.2}/tests/func/functions/test_aggregate.py +0 -0
  273. {datachain-0.19.1 → datachain-0.19.2}/tests/func/functions/test_array.py +0 -0
  274. {datachain-0.19.1 → datachain-0.19.2}/tests/func/functions/test_conditional.py +0 -0
  275. {datachain-0.19.1 → datachain-0.19.2}/tests/func/functions/test_numeric.py +0 -0
  276. {datachain-0.19.1 → datachain-0.19.2}/tests/func/functions/test_path.py +0 -0
  277. {datachain-0.19.1 → datachain-0.19.2}/tests/func/functions/test_random.py +0 -0
  278. {datachain-0.19.1 → datachain-0.19.2}/tests/func/functions/test_string.py +0 -0
  279. {datachain-0.19.1 → datachain-0.19.2}/tests/func/model/__init__.py +0 -0
  280. {datachain-0.19.1 → datachain-0.19.2}/tests/func/model/data/running-mask0.png +0 -0
  281. {datachain-0.19.1 → datachain-0.19.2}/tests/func/model/data/running-mask1.png +0 -0
  282. {datachain-0.19.1 → datachain-0.19.2}/tests/func/model/data/running.jpg +0 -0
  283. {datachain-0.19.1 → datachain-0.19.2}/tests/func/model/data/ships.jpg +0 -0
  284. {datachain-0.19.1 → datachain-0.19.2}/tests/func/model/test_yolo.py +0 -0
  285. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_batching.py +0 -0
  286. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_catalog.py +0 -0
  287. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_client.py +0 -0
  288. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_cloud_transfer.py +0 -0
  289. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_data_storage.py +0 -0
  290. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_datachain.py +0 -0
  291. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_datachain_merge.py +0 -0
  292. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_dataset_query.py +0 -0
  293. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_datasets.py +0 -0
  294. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_delta.py +0 -0
  295. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_feature_pickling.py +0 -0
  296. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_file.py +0 -0
  297. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_hf.py +0 -0
  298. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_hidden_field.py +0 -0
  299. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_image.py +0 -0
  300. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_listing.py +0 -0
  301. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_ls.py +0 -0
  302. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_meta_formats.py +0 -0
  303. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_metastore.py +0 -0
  304. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_metrics.py +0 -0
  305. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_pull.py +0 -0
  306. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_pytorch.py +0 -0
  307. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_query.py +0 -0
  308. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_read_database.py +0 -0
  309. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_retry.py +0 -0
  310. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_session.py +0 -0
  311. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_toolkit.py +0 -0
  312. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_video.py +0 -0
  313. {datachain-0.19.1 → datachain-0.19.2}/tests/func/test_warehouse.py +0 -0
  314. {datachain-0.19.1 → datachain-0.19.2}/tests/scripts/feature_class.py +0 -0
  315. {datachain-0.19.1 → datachain-0.19.2}/tests/scripts/feature_class_exception.py +0 -0
  316. {datachain-0.19.1 → datachain-0.19.2}/tests/scripts/feature_class_parallel.py +0 -0
  317. {datachain-0.19.1 → datachain-0.19.2}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  318. {datachain-0.19.1 → datachain-0.19.2}/tests/scripts/name_len_slow.py +0 -0
  319. {datachain-0.19.1 → datachain-0.19.2}/tests/test_atomicity.py +0 -0
  320. {datachain-0.19.1 → datachain-0.19.2}/tests/test_cli_e2e.py +0 -0
  321. {datachain-0.19.1 → datachain-0.19.2}/tests/test_import_time.py +0 -0
  322. {datachain-0.19.1 → datachain-0.19.2}/tests/test_query_e2e.py +0 -0
  323. {datachain-0.19.1 → datachain-0.19.2}/tests/test_telemetry.py +0 -0
  324. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/__init__.py +0 -0
  325. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/lib/__init__.py +0 -0
  326. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/lib/conftest.py +0 -0
  327. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/lib/test_arrow.py +0 -0
  328. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/lib/test_clip.py +0 -0
  329. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  330. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/lib/test_datachain_merge.py +0 -0
  331. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/lib/test_diff.py +0 -0
  332. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/lib/test_feature.py +0 -0
  333. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/lib/test_feature_utils.py +0 -0
  334. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/lib/test_file.py +0 -0
  335. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/lib/test_hf.py +0 -0
  336. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/lib/test_image.py +0 -0
  337. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/lib/test_listing_info.py +0 -0
  338. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/lib/test_python_to_sql.py +0 -0
  339. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/lib/test_schema.py +0 -0
  340. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/lib/test_signal_schema.py +0 -0
  341. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/lib/test_sql_to_python.py +0 -0
  342. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/lib/test_text.py +0 -0
  343. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/lib/test_udf.py +0 -0
  344. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/lib/test_udf_signature.py +0 -0
  345. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/lib/test_utils.py +0 -0
  346. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/lib/test_webdataset.py +0 -0
  347. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/model/__init__.py +0 -0
  348. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/model/test_bbox.py +0 -0
  349. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/model/test_pose.py +0 -0
  350. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/model/test_segment.py +0 -0
  351. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/model/test_utils.py +0 -0
  352. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/sql/__init__.py +0 -0
  353. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/sql/sqlite/__init__.py +0 -0
  354. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/sql/sqlite/test_types.py +0 -0
  355. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/sql/sqlite/test_utils.py +0 -0
  356. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/sql/test_array.py +0 -0
  357. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/sql/test_conditional.py +0 -0
  358. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/sql/test_path.py +0 -0
  359. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/sql/test_random.py +0 -0
  360. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/sql/test_selectable.py +0 -0
  361. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/sql/test_string.py +0 -0
  362. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_asyn.py +0 -0
  363. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_cache.py +0 -0
  364. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_catalog.py +0 -0
  365. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_catalog_loader.py +0 -0
  366. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_cli_parsing.py +0 -0
  367. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_client.py +0 -0
  368. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_client_gcs.py +0 -0
  369. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_client_s3.py +0 -0
  370. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_config.py +0 -0
  371. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_data_storage.py +0 -0
  372. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_database_engine.py +0 -0
  373. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_dataset.py +0 -0
  374. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_dispatch.py +0 -0
  375. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_fileslice.py +0 -0
  376. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_func.py +0 -0
  377. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_listing.py +0 -0
  378. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_metastore.py +0 -0
  379. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_module_exports.py +0 -0
  380. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_pytorch.py +0 -0
  381. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_query.py +0 -0
  382. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_query_metrics.py +0 -0
  383. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_query_params.py +0 -0
  384. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_script_meta.py +0 -0
  385. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_semver.py +0 -0
  386. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_serializer.py +0 -0
  387. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_session.py +0 -0
  388. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_utils.py +0 -0
  389. {datachain-0.19.1 → datachain-0.19.2}/tests/unit/test_warehouse.py +0 -0
  390. {datachain-0.19.1 → datachain-0.19.2}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.19.1
3
+ Version: 0.19.2
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -0,0 +1,42 @@
1
+ # auth team
2
+
3
+ Set the default team for Studio operations.
4
+
5
+ ## Synopsis
6
+
7
+ ```usage
8
+ usage: datachain auth team [-h] [-v] [-q] [--local] [team_name]
9
+ ```
10
+
11
+ ## Description
12
+
13
+ This command sets or shows the default team for Studio operations. By default, the team setting is saved globally, but you can use the `--local` option to set it for the current project.
14
+ If team_name is not passed, the current team in use is shown to the user.
15
+
16
+ ## Arguments
17
+
18
+ * `team_name` - Optional, Name of the team to set as default
19
+
20
+ ## Options
21
+
22
+ * `--local` - Set team locally for the current project
23
+ * `-h`, `--help` - Show the help message and exit.
24
+ * `-v`, `--verbose` - Be verbose.
25
+ * `-q`, `--quiet` - Be quiet.
26
+
27
+ ## Examples
28
+
29
+ 1. Set default team for all projects:
30
+ ```bash
31
+ datachain auth team my-team
32
+ ```
33
+
34
+ 2. Set default team locally for the current project:
35
+ ```bash
36
+ datachain auth team --local my-team
37
+ ```
38
+
39
+ 3. Print the current default in use
40
+ ```bash
41
+ datachain auth team
42
+ ```
@@ -31,3 +31,5 @@ DataChain is a command-line tool for wrangling unstructured AI data at scale. Us
31
31
  - Monitor job logs with [`datachain job logs`](job/logs.md)
32
32
 
33
33
  - Cancel running jobs with [`datachain job cancel`](job/cancel.md)
34
+
35
+ - Check for the clusters available for jobs [`datachain job clusters`](job/clusters.md)
@@ -0,0 +1,39 @@
1
+ # job clusters
2
+
3
+ List compute clusters in Studio.
4
+
5
+ ## Synopsis
6
+
7
+ ```usage
8
+ usage: datachain job clusters [-h] [-v] [-q] [--team TEAM]
9
+ ```
10
+
11
+ ## Description
12
+
13
+ This command lists compute clusters available in Studio. You can specify a team to list clusters for. The command provides information about the compute resources available for running jobs.
14
+
15
+ ## Options
16
+
17
+ * `--team TEAM` - Team to list clusters for (default: from config)
18
+ * `-h`, `--help` - Show the help message and exit
19
+ * `-v`, `--verbose` - Be verbose
20
+ * `-q`, `--quiet` - Be quiet
21
+
22
+ ## Examples
23
+
24
+ 1. List all clusters for the default team:
25
+ ```bash
26
+ datachain job clusters
27
+ ```
28
+
29
+ 2. List clusters for a specific team:
30
+ ```bash
31
+ datachain job clusters --team my-team
32
+ ```
33
+
34
+
35
+ ## Notes
36
+
37
+ * The command shows all compute clusters available to your team
38
+ * Clusters represent the compute resources where your jobs can run
39
+ * Use the `--team` option to view clusters for a different team
@@ -24,6 +24,7 @@ This command runs a job in Studio using the specified query file. You can config
24
24
  * `--team TEAM` - Team to run job for (default: from config)
25
25
  * `--env-file ENV_FILE` - File with environment variables for the job
26
26
  * `--env ENV` - Environment variables in KEY=VALUE format
27
+ * `--cluster-id CLUSTER_ID` - Compute cluster ID to run the job on
27
28
  * `--workers WORKERS` - Number of workers for the job
28
29
  * `--files FILES` - Additional files to include in the job
29
30
  * `--python-version PYTHON_VERSION` - Python version for the job (e.g., 3.9, 3.10, 3.11)
@@ -71,8 +72,17 @@ datachain job run --repository https://github.com/iterative/datachain query.py
71
72
  datachain job run --priority 2 query.py
72
73
  ```
73
74
 
75
+ 8. Run a job in a specific cluster
76
+ ```bash
77
+ # Get the cluster id using following command
78
+ datachain job clusters
79
+ # Use the id of an active clusters from above
80
+ datachain job run --cluster-id 1 query.py
81
+ ```
82
+
74
83
  ## Notes
75
84
 
76
85
  * Closing the logs command (e.g., with Ctrl+C) will only stop displaying the logs but will not cancel the job execution
77
86
  * To cancel a running job, use the `datachain job cancel` command
78
87
  * The job will continue running in Studio even after you stop viewing the logs
88
+ * You can get the list of compute clusters using `datachain job clusters` command.
@@ -97,6 +97,7 @@ nav:
97
97
  - logs: commands/job/logs.md
98
98
  - cancel: commands/job/cancel.md
99
99
  - ls: commands/job/ls.md
100
+ - clusters: commands/job/clusters.md
100
101
  - 📚 User Guide:
101
102
  - Overview: guide/index.md
102
103
  - 📡 Interacting with remote storage: guide/remotes.md
@@ -51,6 +51,13 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
51
51
  help="Environment variables in KEY=VALUE format",
52
52
  )
53
53
 
54
+ studio_run_parser.add_argument(
55
+ "--cluster-id",
56
+ type=int,
57
+ action="store",
58
+ help="Compute cluster ID to run the job on",
59
+ )
60
+
54
61
  studio_run_parser.add_argument(
55
62
  "--workers",
56
63
  type=int,
@@ -165,3 +172,21 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
165
172
  default=None,
166
173
  help="Team to check logs for (default: from config)",
167
174
  )
175
+
176
+ studio_clusters_help = "List compute clusters in Studio"
177
+ studio_clusters_description = "List compute clusters in Studio."
178
+
179
+ studio_clusters_parser = jobs_subparser.add_parser(
180
+ "clusters",
181
+ parents=[parent_parser],
182
+ description=studio_clusters_description,
183
+ help=studio_clusters_help,
184
+ formatter_class=CustomHelpFormatter,
185
+ )
186
+
187
+ studio_clusters_parser.add_argument(
188
+ "--team",
189
+ action="store",
190
+ default=None,
191
+ help="Team to list clusters for (default: from config)",
192
+ )
@@ -89,8 +89,13 @@ def add_auth_parser(subparsers, parent_parser) -> None:
89
89
  help="Remove the token from the local project config",
90
90
  )
91
91
 
92
- auth_team_help = "Set default team for Studio operations"
93
- auth_team_description = "Set the default team for Studio operations."
92
+ auth_team_help = "Set or show default team for Studio operations"
93
+ auth_team_description = (
94
+ "Set or show the default team for Studio operations. "
95
+ "This will be used globally by default. "
96
+ "Use --local to set the team locally for the current project. "
97
+ "If no team name is provided, the default team will be shown."
98
+ )
94
99
 
95
100
  team_parser = auth_subparser.add_parser(
96
101
  "team",
@@ -102,13 +107,15 @@ def add_auth_parser(subparsers, parent_parser) -> None:
102
107
  team_parser.add_argument(
103
108
  "team_name",
104
109
  action="store",
110
+ default=None,
111
+ nargs="?",
105
112
  help="Name of the team to set as default",
106
113
  )
107
114
  team_parser.add_argument(
108
- "--global",
115
+ "--local",
109
116
  action="store_true",
110
117
  default=False,
111
- help="Set team globally for all projects",
118
+ help="Set team locally for the current project",
112
119
  )
113
120
 
114
121
  auth_token_help = "View Studio authentication token" # noqa: S105
@@ -13,7 +13,16 @@ from sqlalchemy.sql import func as f
13
13
  from sqlalchemy.sql.expression import false, null, true
14
14
 
15
15
  from datachain.sql.functions import path as pathfunc
16
- from datachain.sql.types import Int, SQLType, UInt64
16
+ from datachain.sql.types import (
17
+ JSON,
18
+ Boolean,
19
+ DateTime,
20
+ Int,
21
+ Int64,
22
+ SQLType,
23
+ String,
24
+ UInt64,
25
+ )
17
26
 
18
27
  if TYPE_CHECKING:
19
28
  from sqlalchemy.engine.interfaces import Dialect
@@ -272,6 +281,19 @@ class DataTable:
272
281
  ),
273
282
  ]
274
283
 
284
+ @classmethod
285
+ def listing_columns(cls):
286
+ return [
287
+ sa.Column("file__source", String()),
288
+ sa.Column("file__path", String()),
289
+ sa.Column("file__size", Int64()),
290
+ sa.Column("file__version", String()),
291
+ sa.Column("file__etag", String()),
292
+ sa.Column("file__is_latest", Boolean()),
293
+ sa.Column("file__last_modified", DateTime()),
294
+ sa.Column("file__location", JSON()),
295
+ ]
296
+
275
297
  def dir_expansion(self):
276
298
  return DirExpansion(self.column)
277
299
 
@@ -0,0 +1,154 @@
1
+ from typing import (
2
+ TYPE_CHECKING,
3
+ Optional,
4
+ )
5
+
6
+ from datachain.lib.listing import LISTING_PREFIX, ls
7
+ from datachain.lib.listing_info import ListingInfo
8
+ from datachain.lib.settings import Settings
9
+ from datachain.lib.signal_schema import SignalSchema
10
+ from datachain.query import Session
11
+ from datachain.query.dataset import DatasetQuery, QueryStep, step_result
12
+
13
+ from .values import read_values
14
+
15
+ if TYPE_CHECKING:
16
+ from typing_extensions import ParamSpec
17
+
18
+ from datachain.dataset import DatasetVersion
19
+ from datachain.query.dataset import StepResult
20
+
21
+ from .datachain import DataChain
22
+
23
+ P = ParamSpec("P")
24
+
25
+
26
+ class ReadOnlyQueryStep(QueryStep):
27
+ """
28
+ This step is used to read the dataset in read-only mode.
29
+ It is used to avoid the need to read the table metadata from the warehouse.
30
+ This is useful when we want to list the files in the dataset.
31
+ """
32
+
33
+ def apply(self) -> "StepResult":
34
+ import sqlalchemy as sa
35
+
36
+ def q(*columns):
37
+ return sa.select(*columns)
38
+
39
+ table_name = self.catalog.warehouse.dataset_table_name(
40
+ self.dataset_name, self.dataset_version
41
+ )
42
+ dataset_row_cls = self.catalog.warehouse.schema.dataset_row_cls
43
+ table = dataset_row_cls.new_table(
44
+ table_name,
45
+ columns=(
46
+ [
47
+ *dataset_row_cls.sys_columns(),
48
+ *dataset_row_cls.listing_columns(),
49
+ ]
50
+ ),
51
+ )
52
+
53
+ return step_result(
54
+ q, table.columns, dependencies=[(self.dataset_name, self.dataset_version)]
55
+ )
56
+
57
+
58
+ def listings(
59
+ session: Optional[Session] = None,
60
+ in_memory: bool = False,
61
+ column: str = "listing",
62
+ **kwargs,
63
+ ) -> "DataChain":
64
+ """Generate chain with list of cached listings.
65
+ Listing is a special kind of dataset which has directory listing data of
66
+ some underlying storage (e.g S3 bucket).
67
+
68
+ Example:
69
+ ```py
70
+ import datachain as dc
71
+ dc.listings().show()
72
+ ```
73
+ """
74
+ session = Session.get(session, in_memory=in_memory)
75
+ catalog = kwargs.get("catalog") or session.catalog
76
+
77
+ return read_values(
78
+ session=session,
79
+ in_memory=in_memory,
80
+ output={column: ListingInfo},
81
+ **{column: catalog.listings()}, # type: ignore[arg-type]
82
+ )
83
+
84
+
85
+ def read_listing_dataset(
86
+ name: str,
87
+ version: Optional[str] = None,
88
+ path: str = "",
89
+ session: Optional["Session"] = None,
90
+ settings: Optional[dict] = None,
91
+ ) -> tuple["DataChain", "DatasetVersion"]:
92
+ """Read a listing dataset and return a DataChain and listing version.
93
+
94
+ Args:
95
+ name: Name of the dataset
96
+ version: Version of the dataset
97
+ path: Path within the listing to read. Path can have globs.
98
+ session: Optional Session object to use for reading
99
+ settings: Optional settings dictionary to use for reading
100
+
101
+ Returns:
102
+ tuple[DataChain, DatasetVersion]: A tuple containing:
103
+ - DataChain configured for listing files
104
+ - DatasetVersion object for the specified listing version
105
+
106
+ Example:
107
+ ```py
108
+ import datachain as dc
109
+ chain, listing_version = dc.read_listing_dataset(
110
+ "lst__s3://my-bucket/my-path", version="1.0.0", path="my-path"
111
+ )
112
+ chain.show()
113
+ ```
114
+ """
115
+ # Configure and return a DataChain for reading listing dataset files
116
+ # Uses ReadOnlyQueryStep to avoid warehouse metadata lookups
117
+ from datachain.lib.dc import Sys
118
+ from datachain.lib.file import File
119
+
120
+ from .datachain import DataChain
121
+
122
+ if not name.startswith(LISTING_PREFIX):
123
+ name = LISTING_PREFIX + name
124
+
125
+ session = Session.get(session)
126
+ dataset = session.catalog.get_dataset(name)
127
+ if version is None:
128
+ version = dataset.latest_version
129
+
130
+ query = DatasetQuery(
131
+ name=name,
132
+ session=session,
133
+ indexing_column_types=File._datachain_column_types,
134
+ fallback_to_studio=False,
135
+ )
136
+ if settings:
137
+ cfg = {**settings}
138
+ if "prefetch" not in cfg:
139
+ cfg["prefetch"] = 0
140
+ _settings = Settings(**cfg)
141
+ else:
142
+ _settings = Settings(prefetch=0)
143
+ signal_schema = SignalSchema({"sys": Sys, "file": File})
144
+
145
+ query.starting_step = ReadOnlyQueryStep(query.catalog, name, version)
146
+ query.version = version
147
+ # We already know that this is a listing dataset,
148
+ # so we can set the listing function to True
149
+ query.set_listing_fn(lambda: True)
150
+
151
+ chain = DataChain(query, _settings, signal_schema)
152
+ chain = ls(chain, path, recursive=True, column="file")
153
+
154
+ return chain, dataset.get_version(version)
@@ -30,6 +30,7 @@ DatasetExportSignedUrls = Optional[list[str]]
30
30
  FileUploadData = Optional[dict[str, Any]]
31
31
  JobData = Optional[dict[str, Any]]
32
32
  JobListData = dict[str, Any]
33
+ ClusterListData = dict[str, Any]
33
34
  logger = logging.getLogger("datachain")
34
35
 
35
36
  DATASET_ROWS_CHUNK_SIZE = 8192
@@ -391,6 +392,7 @@ class StudioClient:
391
392
  requirements: Optional[str] = None,
392
393
  repository: Optional[str] = None,
393
394
  priority: Optional[int] = None,
395
+ cluster_id: Optional[int] = None,
394
396
  ) -> Response[JobData]:
395
397
  data = {
396
398
  "query": query,
@@ -403,6 +405,7 @@ class StudioClient:
403
405
  "requirements": requirements,
404
406
  "repository": repository,
405
407
  "priority": priority,
408
+ "compute_cluster_id": cluster_id,
406
409
  }
407
410
  return self._send_request("datachain/job", data)
408
411
 
@@ -423,3 +426,6 @@ class StudioClient:
423
426
  ) -> Response[JobData]:
424
427
  url = f"datachain/job/{job_id}/cancel"
425
428
  return self._send_request(url, data={}, method="POST")
429
+
430
+ def get_clusters(self) -> Response[ClusterListData]:
431
+ return self._send_request("datachain/clusters", {}, method="GET")
@@ -41,6 +41,7 @@ def process_jobs_args(args: "Namespace"):
41
41
  args.req,
42
42
  args.req_file,
43
43
  args.priority,
44
+ args.cluster_id,
44
45
  )
45
46
 
46
47
  if args.cmd == "cancel":
@@ -51,6 +52,9 @@ def process_jobs_args(args: "Namespace"):
51
52
  if args.cmd == "ls":
52
53
  return list_jobs(args.status, args.team, args.limit)
53
54
 
55
+ if args.cmd == "clusters":
56
+ return list_clusters(args.team)
57
+
54
58
  raise DataChainError(f"Unknown command '{args.cmd}'.")
55
59
 
56
60
 
@@ -68,14 +72,24 @@ def process_auth_cli_args(args: "Namespace"):
68
72
  return logout(args.local)
69
73
  if args.cmd == "token":
70
74
  return token()
71
-
72
75
  if args.cmd == "team":
73
76
  return set_team(args)
74
77
  raise DataChainError(f"Unknown command '{args.cmd}'.")
75
78
 
76
79
 
77
80
  def set_team(args: "Namespace"):
78
- level = ConfigLevel.GLOBAL if args.__dict__.get("global") else ConfigLevel.LOCAL
81
+ if args.team_name is None:
82
+ config = Config().read().get("studio", {})
83
+ team = config.get("team")
84
+ if team:
85
+ print(f"Default team is '{team}'")
86
+ return 0
87
+
88
+ raise DataChainError(
89
+ "No default team set. Use `datachain auth team <team_name>` to set one."
90
+ )
91
+
92
+ level = ConfigLevel.LOCAL if args.local else ConfigLevel.GLOBAL
79
93
  config = Config(level)
80
94
  with config.edit() as conf:
81
95
  studio_conf = conf.get("studio", {})
@@ -121,6 +135,7 @@ def login(args: "Namespace"):
121
135
  level = ConfigLevel.LOCAL if args.local else ConfigLevel.GLOBAL
122
136
  config_path = save_config(hostname, access_token, level=level)
123
137
  print(f"Authentication complete. Saved token to {config_path}.")
138
+ print("You can now use 'datachain auth team' to set the default team.")
124
139
  return 0
125
140
 
126
141
 
@@ -268,6 +283,7 @@ def create_job(
268
283
  req: Optional[list[str]] = None,
269
284
  req_file: Optional[str] = None,
270
285
  priority: Optional[int] = None,
286
+ cluster_id: Optional[int] = None,
271
287
  ):
272
288
  query_type = "PYTHON" if query_file.endswith(".py") else "SHELL"
273
289
  with open(query_file) as f:
@@ -297,6 +313,7 @@ def create_job(
297
313
  repository=repository,
298
314
  requirements=requirements,
299
315
  priority=priority,
316
+ cluster_id=cluster_id,
300
317
  )
301
318
  if not response.ok:
302
319
  raise DataChainError(response.message)
@@ -380,3 +397,29 @@ def show_job_logs(job_id: str, team_name: Optional[str]):
380
397
 
381
398
  client = StudioClient(team=team_name)
382
399
  show_logs_from_client(client, job_id)
400
+
401
+
402
+ def list_clusters(team_name: Optional[str]):
403
+ client = StudioClient(team=team_name)
404
+ response = client.get_clusters()
405
+ if not response.ok:
406
+ raise DataChainError(response.message)
407
+
408
+ clusters = response.data.get("clusters", [])
409
+ if not clusters:
410
+ print("No clusters found")
411
+ return
412
+
413
+ rows = [
414
+ {
415
+ "ID": cluster.get("id"),
416
+ "Status": cluster.get("status"),
417
+ "Cloud Provider": cluster.get("cloud_provider"),
418
+ "Cloud Credentials": cluster.get("cloud_credentials"),
419
+ "Is Active": cluster.get("is_active"),
420
+ "Max Workers": cluster.get("max_workers"),
421
+ }
422
+ for cluster in clusters
423
+ ]
424
+
425
+ print(tabulate.tabulate(rows, headers="keys", tablefmt="grid"))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.19.1
3
+ Version: 0.19.2
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -32,6 +32,7 @@ docs/commands/auth/logout.md
32
32
  docs/commands/auth/team.md
33
33
  docs/commands/auth/token.md
34
34
  docs/commands/job/cancel.md
35
+ docs/commands/job/clusters.md
35
36
  docs/commands/job/logs.md
36
37
  docs/commands/job/ls.md
37
38
  docs/commands/job/run.md
@@ -109,13 +109,13 @@ def test_studio_token(capsys):
109
109
 
110
110
  def test_studio_team_local():
111
111
  assert main(["auth", "team", "team_name"]) == 0
112
- config = Config(ConfigLevel.LOCAL).read()
112
+ config = Config(ConfigLevel.GLOBAL).read()
113
113
  assert config["studio"]["team"] == "team_name"
114
114
 
115
115
 
116
116
  def test_studio_team_global():
117
- assert main(["auth", "team", "team_name", "--global"]) == 0
118
- config = Config(ConfigLevel.GLOBAL).read()
117
+ assert main(["auth", "team", "team_name", "--local"]) == 0
118
+ config = Config(ConfigLevel.LOCAL).read()
119
119
  assert config["studio"]["team"] == "team_name"
120
120
 
121
121
 
@@ -387,6 +387,8 @@ def test_studio_run(capsys, mocker, tmp_dir):
387
387
  "stupidity",
388
388
  "--repository",
389
389
  "https://github.com/iterative/datachain",
390
+ "--cluster-id",
391
+ "1",
390
392
  ]
391
393
  )
392
394
  == 0
@@ -425,4 +427,5 @@ def test_studio_run(capsys, mocker, tmp_dir):
425
427
  "team_name": "team_name",
426
428
  "repository": "https://github.com/iterative/datachain",
427
429
  "priority": 5,
430
+ "compute_cluster_id": 1,
428
431
  }
@@ -19,8 +19,9 @@ from datachain import Column
19
19
  from datachain.error import DatasetInvalidVersionError, DatasetVersionNotFoundError
20
20
  from datachain.lib.data_model import DataModel
21
21
  from datachain.lib.dc import C, DatasetPrepareError, Sys
22
+ from datachain.lib.dc.listings import read_listing_dataset
22
23
  from datachain.lib.file import File
23
- from datachain.lib.listing import LISTING_PREFIX
24
+ from datachain.lib.listing import LISTING_PREFIX, parse_listing_uri
24
25
  from datachain.lib.listing_info import ListingInfo
25
26
  from datachain.lib.signal_schema import (
26
27
  SignalRemoveError,
@@ -454,6 +455,56 @@ def test_listings_reindex_subpath_local_file_system(test_session, tmp_dir):
454
455
  assert dc.read_storage(subdir.as_uri(), session=test_session).count() == 1
455
456
 
456
457
 
458
+ @pytest.mark.parametrize("version", [None, "1.0.0"])
459
+ def test_listings_read_listing_dataset(test_session, tmp_dir, version):
460
+ df = pd.DataFrame(DF_DATA)
461
+ df.to_parquet(tmp_dir / "df.parquet")
462
+ uri = tmp_dir.as_uri()
463
+
464
+ ds_name, _, _ = parse_listing_uri(uri)
465
+ dc.read_storage(uri, session=test_session).exec()
466
+
467
+ chain, listing_version = read_listing_dataset(
468
+ ds_name, version=version, session=test_session
469
+ )
470
+ assert listing_version.num_objects == 1
471
+ assert listing_version.size > 1000
472
+ assert listing_version.size < 5000
473
+ assert listing_version.status == 4
474
+
475
+ assert chain.count() == 1
476
+ files = list(chain.collect("file"))
477
+ assert len(files) == 1
478
+ assert files[0].path == "df.parquet"
479
+ assert files[0].source == uri
480
+
481
+
482
+ def test_listings_read_listing_dataset_with_subpath(test_session, tmp_dir):
483
+ subdir = tmp_dir / "subdir"
484
+ os.mkdir(subdir)
485
+
486
+ df = pd.DataFrame(DF_DATA)
487
+ df.to_parquet(tmp_dir / "df.parquet")
488
+ df.to_parquet(tmp_dir / "df2.parquet")
489
+ df.to_parquet(subdir / "df3.parquet")
490
+
491
+ ds_name, _, _ = parse_listing_uri(tmp_dir.as_uri())
492
+ ds_name = ds_name.removeprefix(LISTING_PREFIX)
493
+ dc.read_storage(tmp_dir.as_uri(), session=test_session).exec()
494
+
495
+ chain, listing_version = read_listing_dataset(
496
+ ds_name, path="subdir", session=test_session
497
+ )
498
+ assert listing_version.num_objects == 3
499
+
500
+ # Chain is filtered for subdir
501
+ assert chain.count() == 1
502
+ files = list(chain.collect("file"))
503
+ assert len(files) == 1
504
+ assert files[0].path == "subdir/df3.parquet"
505
+ assert files[0].source == tmp_dir.as_uri()
506
+
507
+
457
508
  def test_preserve_feature_schema(test_session):
458
509
  ds = dc.read_records(dc.DataChain.DEFAULT_FILE_RECORD, session=test_session)
459
510
  ds = ds.gen(
@@ -1,36 +0,0 @@
1
- # auth team
2
-
3
- Set the default team for Studio operations.
4
-
5
- ## Synopsis
6
-
7
- ```usage
8
- usage: datachain auth team [-h] [-v] [-q] [--global] team_name
9
- ```
10
-
11
- ## Description
12
-
13
- This command sets the default team for Studio operations. By default, the team setting is project-specific, but you can use the `--global` option to set it for all projects.
14
-
15
- ## Arguments
16
-
17
- * `team_name` - Name of the team to set as default
18
-
19
- ## Options
20
-
21
- * `--global` - Set team globally for all projects
22
- * `-h`, `--help` - Show the help message and exit.
23
- * `-v`, `--verbose` - Be verbose.
24
- * `-q`, `--quiet` - Be quiet.
25
-
26
- ## Examples
27
-
28
- 1. Set default team for current project:
29
- ```bash
30
- datachain auth team my-team
31
- ```
32
-
33
- 2. Set default team globally for all projects:
34
- ```bash
35
- datachain auth team --global my-team
36
- ```