datachain 0.8.8__tar.gz → 0.8.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (306) hide show
  1. {datachain-0.8.8 → datachain-0.8.9}/.pre-commit-config.yaml +1 -1
  2. {datachain-0.8.8 → datachain-0.8.9}/PKG-INFO +4 -3
  3. {datachain-0.8.8 → datachain-0.8.9}/README.rst +2 -1
  4. {datachain-0.8.8 → datachain-0.8.9}/pyproject.toml +1 -1
  5. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/cli/__init__.py +12 -4
  6. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/cli/commands/datasets.py +2 -3
  7. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/cli/parser/__init__.py +51 -69
  8. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/cli/parser/job.py +20 -25
  9. datachain-0.8.9/src/datachain/cli/parser/studio.py +102 -0
  10. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/cli/parser/utils.py +1 -1
  11. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/client/local.py +1 -1
  12. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/arrow.py +1 -1
  13. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/convert/unflatten.py +1 -2
  14. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/dc.py +23 -4
  15. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/file.py +27 -4
  16. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/listing.py +4 -4
  17. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/pytorch.py +3 -1
  18. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/udf.py +56 -20
  19. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/model/bbox.py +9 -9
  20. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/model/pose.py +9 -9
  21. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/model/segment.py +6 -6
  22. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/progress.py +0 -13
  23. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/query/dataset.py +12 -10
  24. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/studio.py +15 -9
  25. {datachain-0.8.8 → datachain-0.8.9}/src/datachain.egg-info/PKG-INFO +4 -3
  26. {datachain-0.8.8 → datachain-0.8.9}/src/datachain.egg-info/requires.txt +1 -1
  27. {datachain-0.8.8 → datachain-0.8.9}/tests/benchmarks/test_datachain.py +5 -0
  28. {datachain-0.8.8 → datachain-0.8.9}/tests/func/test_datachain.py +31 -8
  29. {datachain-0.8.8 → datachain-0.8.9}/tests/func/test_pull.py +2 -2
  30. {datachain-0.8.8 → datachain-0.8.9}/tests/func/test_pytorch.py +15 -2
  31. {datachain-0.8.8 → datachain-0.8.9}/tests/test_cli_studio.py +0 -16
  32. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/lib/test_file.py +16 -0
  33. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_pytorch.py +30 -0
  34. {datachain-0.8.8 → datachain-0.8.9}/tests/utils.py +1 -1
  35. datachain-0.8.8/src/datachain/cli/parser/studio.py +0 -126
  36. {datachain-0.8.8 → datachain-0.8.9}/.cruft.json +0 -0
  37. {datachain-0.8.8 → datachain-0.8.9}/.gitattributes +0 -0
  38. {datachain-0.8.8 → datachain-0.8.9}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  39. {datachain-0.8.8 → datachain-0.8.9}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  40. {datachain-0.8.8 → datachain-0.8.9}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  41. {datachain-0.8.8 → datachain-0.8.9}/.github/codecov.yaml +0 -0
  42. {datachain-0.8.8 → datachain-0.8.9}/.github/dependabot.yml +0 -0
  43. {datachain-0.8.8 → datachain-0.8.9}/.github/workflows/benchmarks.yml +0 -0
  44. {datachain-0.8.8 → datachain-0.8.9}/.github/workflows/release.yml +0 -0
  45. {datachain-0.8.8 → datachain-0.8.9}/.github/workflows/tests-studio.yml +0 -0
  46. {datachain-0.8.8 → datachain-0.8.9}/.github/workflows/tests.yml +0 -0
  47. {datachain-0.8.8 → datachain-0.8.9}/.github/workflows/update-template.yaml +0 -0
  48. {datachain-0.8.8 → datachain-0.8.9}/.gitignore +0 -0
  49. {datachain-0.8.8 → datachain-0.8.9}/CODE_OF_CONDUCT.rst +0 -0
  50. {datachain-0.8.8 → datachain-0.8.9}/LICENSE +0 -0
  51. {datachain-0.8.8 → datachain-0.8.9}/docs/assets/captioned_cartoons.png +0 -0
  52. {datachain-0.8.8 → datachain-0.8.9}/docs/assets/datachain-white.svg +0 -0
  53. {datachain-0.8.8 → datachain-0.8.9}/docs/assets/datachain.svg +0 -0
  54. {datachain-0.8.8 → datachain-0.8.9}/docs/contributing.md +0 -0
  55. {datachain-0.8.8 → datachain-0.8.9}/docs/css/github-permalink-style.css +0 -0
  56. {datachain-0.8.8 → datachain-0.8.9}/docs/examples.md +0 -0
  57. {datachain-0.8.8 → datachain-0.8.9}/docs/index.md +0 -0
  58. {datachain-0.8.8 → datachain-0.8.9}/docs/overrides/main.html +0 -0
  59. {datachain-0.8.8 → datachain-0.8.9}/docs/quick-start.md +0 -0
  60. {datachain-0.8.8 → datachain-0.8.9}/docs/references/datachain.md +0 -0
  61. {datachain-0.8.8 → datachain-0.8.9}/docs/references/datatype.md +0 -0
  62. {datachain-0.8.8 → datachain-0.8.9}/docs/references/file.md +0 -0
  63. {datachain-0.8.8 → datachain-0.8.9}/docs/references/index.md +0 -0
  64. {datachain-0.8.8 → datachain-0.8.9}/docs/references/sql.md +0 -0
  65. {datachain-0.8.8 → datachain-0.8.9}/docs/references/torch.md +0 -0
  66. {datachain-0.8.8 → datachain-0.8.9}/docs/references/udf.md +0 -0
  67. {datachain-0.8.8 → datachain-0.8.9}/docs/tutorials.md +0 -0
  68. {datachain-0.8.8 → datachain-0.8.9}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  69. {datachain-0.8.8 → datachain-0.8.9}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  70. {datachain-0.8.8 → datachain-0.8.9}/examples/computer_vision/openimage-detect.py +0 -0
  71. {datachain-0.8.8 → datachain-0.8.9}/examples/computer_vision/ultralytics-bbox.py +0 -0
  72. {datachain-0.8.8 → datachain-0.8.9}/examples/computer_vision/ultralytics-pose.py +0 -0
  73. {datachain-0.8.8 → datachain-0.8.9}/examples/computer_vision/ultralytics-segment.py +0 -0
  74. {datachain-0.8.8 → datachain-0.8.9}/examples/get_started/common_sql_functions.py +0 -0
  75. {datachain-0.8.8 → datachain-0.8.9}/examples/get_started/json-csv-reader.py +0 -0
  76. {datachain-0.8.8 → datachain-0.8.9}/examples/get_started/torch-loader.py +0 -0
  77. {datachain-0.8.8 → datachain-0.8.9}/examples/get_started/udfs/parallel.py +0 -0
  78. {datachain-0.8.8 → datachain-0.8.9}/examples/get_started/udfs/simple.py +0 -0
  79. {datachain-0.8.8 → datachain-0.8.9}/examples/get_started/udfs/stateful.py +0 -0
  80. {datachain-0.8.8 → datachain-0.8.9}/examples/llm_and_nlp/claude-query.py +0 -0
  81. {datachain-0.8.8 → datachain-0.8.9}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  82. {datachain-0.8.8 → datachain-0.8.9}/examples/llm_and_nlp/unstructured-embeddings-gen.py +0 -0
  83. {datachain-0.8.8 → datachain-0.8.9}/examples/llm_and_nlp/unstructured-summary-map.py +0 -0
  84. {datachain-0.8.8 → datachain-0.8.9}/examples/multimodal/clip_inference.py +0 -0
  85. {datachain-0.8.8 → datachain-0.8.9}/examples/multimodal/hf_pipeline.py +0 -0
  86. {datachain-0.8.8 → datachain-0.8.9}/examples/multimodal/openai_image_desc_lib.py +0 -0
  87. {datachain-0.8.8 → datachain-0.8.9}/examples/multimodal/wds.py +0 -0
  88. {datachain-0.8.8 → datachain-0.8.9}/examples/multimodal/wds_filtered.py +0 -0
  89. {datachain-0.8.8 → datachain-0.8.9}/mkdocs.yml +0 -0
  90. {datachain-0.8.8 → datachain-0.8.9}/noxfile.py +0 -0
  91. {datachain-0.8.8 → datachain-0.8.9}/setup.cfg +0 -0
  92. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/__init__.py +0 -0
  93. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/__main__.py +0 -0
  94. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/asyn.py +0 -0
  95. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/cache.py +0 -0
  96. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/catalog/__init__.py +0 -0
  97. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/catalog/catalog.py +0 -0
  98. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/catalog/datasource.py +0 -0
  99. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/catalog/loader.py +0 -0
  100. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/cli/commands/__init__.py +0 -0
  101. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/cli/commands/du.py +0 -0
  102. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/cli/commands/index.py +0 -0
  103. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/cli/commands/ls.py +0 -0
  104. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/cli/commands/misc.py +0 -0
  105. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/cli/commands/query.py +0 -0
  106. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/cli/commands/show.py +0 -0
  107. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/cli/utils.py +0 -0
  108. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/client/__init__.py +0 -0
  109. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/client/azure.py +0 -0
  110. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/client/fileslice.py +0 -0
  111. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/client/fsspec.py +0 -0
  112. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/client/gcs.py +0 -0
  113. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/client/hf.py +0 -0
  114. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/client/s3.py +0 -0
  115. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/config.py +0 -0
  116. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/data_storage/__init__.py +0 -0
  117. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/data_storage/db_engine.py +0 -0
  118. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/data_storage/job.py +0 -0
  119. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/data_storage/metastore.py +0 -0
  120. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/data_storage/schema.py +0 -0
  121. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/data_storage/serializer.py +0 -0
  122. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/data_storage/sqlite.py +0 -0
  123. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/data_storage/warehouse.py +0 -0
  124. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/dataset.py +0 -0
  125. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/diff/__init__.py +0 -0
  126. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/error.py +0 -0
  127. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/func/__init__.py +0 -0
  128. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/func/aggregate.py +0 -0
  129. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/func/array.py +0 -0
  130. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/func/base.py +0 -0
  131. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/func/conditional.py +0 -0
  132. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/func/func.py +0 -0
  133. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/func/numeric.py +0 -0
  134. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/func/path.py +0 -0
  135. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/func/random.py +0 -0
  136. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/func/string.py +0 -0
  137. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/func/window.py +0 -0
  138. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/job.py +0 -0
  139. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/__init__.py +0 -0
  140. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/clip.py +0 -0
  141. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/convert/__init__.py +0 -0
  142. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/convert/flatten.py +0 -0
  143. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/convert/python_to_sql.py +0 -0
  144. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/convert/sql_to_python.py +0 -0
  145. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  146. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/data_model.py +0 -0
  147. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/dataset_info.py +0 -0
  148. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/hf.py +0 -0
  149. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/image.py +0 -0
  150. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/listing_info.py +0 -0
  151. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/meta_formats.py +0 -0
  152. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/model_store.py +0 -0
  153. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/settings.py +0 -0
  154. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/signal_schema.py +0 -0
  155. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/tar.py +0 -0
  156. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/text.py +0 -0
  157. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/udf_signature.py +0 -0
  158. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/utils.py +0 -0
  159. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/vfile.py +0 -0
  160. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/webdataset.py +0 -0
  161. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/lib/webdataset_laion.py +0 -0
  162. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/listing.py +0 -0
  163. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/model/__init__.py +0 -0
  164. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/model/ultralytics/__init__.py +0 -0
  165. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/model/ultralytics/bbox.py +0 -0
  166. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/model/ultralytics/pose.py +0 -0
  167. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/model/ultralytics/segment.py +0 -0
  168. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/node.py +0 -0
  169. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/nodes_fetcher.py +0 -0
  170. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/nodes_thread_pool.py +0 -0
  171. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/py.typed +0 -0
  172. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/query/__init__.py +0 -0
  173. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/query/batch.py +0 -0
  174. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/query/dispatch.py +0 -0
  175. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/query/metrics.py +0 -0
  176. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/query/params.py +0 -0
  177. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/query/queue.py +0 -0
  178. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/query/schema.py +0 -0
  179. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/query/session.py +0 -0
  180. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/query/udf.py +0 -0
  181. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/query/utils.py +0 -0
  182. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/remote/__init__.py +0 -0
  183. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/remote/studio.py +0 -0
  184. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/sql/__init__.py +0 -0
  185. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/sql/default/__init__.py +0 -0
  186. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/sql/default/base.py +0 -0
  187. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/sql/functions/__init__.py +0 -0
  188. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/sql/functions/aggregate.py +0 -0
  189. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/sql/functions/array.py +0 -0
  190. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/sql/functions/conditional.py +0 -0
  191. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/sql/functions/numeric.py +0 -0
  192. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/sql/functions/path.py +0 -0
  193. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/sql/functions/random.py +0 -0
  194. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/sql/functions/string.py +0 -0
  195. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/sql/selectable.py +0 -0
  196. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/sql/sqlite/__init__.py +0 -0
  197. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/sql/sqlite/base.py +0 -0
  198. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/sql/sqlite/types.py +0 -0
  199. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/sql/sqlite/vector.py +0 -0
  200. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/sql/types.py +0 -0
  201. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/sql/utils.py +0 -0
  202. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/telemetry.py +0 -0
  203. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/toolkit/__init__.py +0 -0
  204. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/toolkit/split.py +0 -0
  205. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/torch/__init__.py +0 -0
  206. {datachain-0.8.8 → datachain-0.8.9}/src/datachain/utils.py +0 -0
  207. {datachain-0.8.8 → datachain-0.8.9}/src/datachain.egg-info/SOURCES.txt +0 -0
  208. {datachain-0.8.8 → datachain-0.8.9}/src/datachain.egg-info/dependency_links.txt +0 -0
  209. {datachain-0.8.8 → datachain-0.8.9}/src/datachain.egg-info/entry_points.txt +0 -0
  210. {datachain-0.8.8 → datachain-0.8.9}/src/datachain.egg-info/top_level.txt +0 -0
  211. {datachain-0.8.8 → datachain-0.8.9}/tests/__init__.py +0 -0
  212. {datachain-0.8.8 → datachain-0.8.9}/tests/benchmarks/__init__.py +0 -0
  213. {datachain-0.8.8 → datachain-0.8.9}/tests/benchmarks/conftest.py +0 -0
  214. {datachain-0.8.8 → datachain-0.8.9}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  215. {datachain-0.8.8 → datachain-0.8.9}/tests/benchmarks/datasets/.dvc/config +0 -0
  216. {datachain-0.8.8 → datachain-0.8.9}/tests/benchmarks/datasets/.gitignore +0 -0
  217. {datachain-0.8.8 → datachain-0.8.9}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  218. {datachain-0.8.8 → datachain-0.8.9}/tests/benchmarks/test_ls.py +0 -0
  219. {datachain-0.8.8 → datachain-0.8.9}/tests/benchmarks/test_version.py +0 -0
  220. {datachain-0.8.8 → datachain-0.8.9}/tests/conftest.py +0 -0
  221. {datachain-0.8.8 → datachain-0.8.9}/tests/data.py +0 -0
  222. {datachain-0.8.8 → datachain-0.8.9}/tests/examples/__init__.py +0 -0
  223. {datachain-0.8.8 → datachain-0.8.9}/tests/examples/test_examples.py +0 -0
  224. {datachain-0.8.8 → datachain-0.8.9}/tests/examples/test_wds_e2e.py +0 -0
  225. {datachain-0.8.8 → datachain-0.8.9}/tests/examples/wds_data.py +0 -0
  226. {datachain-0.8.8 → datachain-0.8.9}/tests/func/__init__.py +0 -0
  227. {datachain-0.8.8 → datachain-0.8.9}/tests/func/fake-service-account-credentials.json +0 -0
  228. {datachain-0.8.8 → datachain-0.8.9}/tests/func/test_catalog.py +0 -0
  229. {datachain-0.8.8 → datachain-0.8.9}/tests/func/test_client.py +0 -0
  230. {datachain-0.8.8 → datachain-0.8.9}/tests/func/test_dataset_query.py +0 -0
  231. {datachain-0.8.8 → datachain-0.8.9}/tests/func/test_datasets.py +0 -0
  232. {datachain-0.8.8 → datachain-0.8.9}/tests/func/test_feature_pickling.py +0 -0
  233. {datachain-0.8.8 → datachain-0.8.9}/tests/func/test_listing.py +0 -0
  234. {datachain-0.8.8 → datachain-0.8.9}/tests/func/test_ls.py +0 -0
  235. {datachain-0.8.8 → datachain-0.8.9}/tests/func/test_meta_formats.py +0 -0
  236. {datachain-0.8.8 → datachain-0.8.9}/tests/func/test_metrics.py +0 -0
  237. {datachain-0.8.8 → datachain-0.8.9}/tests/func/test_query.py +0 -0
  238. {datachain-0.8.8 → datachain-0.8.9}/tests/func/test_session.py +0 -0
  239. {datachain-0.8.8 → datachain-0.8.9}/tests/func/test_toolkit.py +0 -0
  240. {datachain-0.8.8 → datachain-0.8.9}/tests/scripts/feature_class.py +0 -0
  241. {datachain-0.8.8 → datachain-0.8.9}/tests/scripts/feature_class_exception.py +0 -0
  242. {datachain-0.8.8 → datachain-0.8.9}/tests/scripts/feature_class_parallel.py +0 -0
  243. {datachain-0.8.8 → datachain-0.8.9}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  244. {datachain-0.8.8 → datachain-0.8.9}/tests/scripts/name_len_slow.py +0 -0
  245. {datachain-0.8.8 → datachain-0.8.9}/tests/test_atomicity.py +0 -0
  246. {datachain-0.8.8 → datachain-0.8.9}/tests/test_cli_e2e.py +0 -0
  247. {datachain-0.8.8 → datachain-0.8.9}/tests/test_query_e2e.py +0 -0
  248. {datachain-0.8.8 → datachain-0.8.9}/tests/test_telemetry.py +0 -0
  249. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/__init__.py +0 -0
  250. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/lib/__init__.py +0 -0
  251. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/lib/conftest.py +0 -0
  252. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/lib/test_arrow.py +0 -0
  253. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/lib/test_clip.py +0 -0
  254. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/lib/test_datachain.py +0 -0
  255. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  256. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/lib/test_datachain_merge.py +0 -0
  257. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/lib/test_diff.py +0 -0
  258. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/lib/test_feature.py +0 -0
  259. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/lib/test_feature_utils.py +0 -0
  260. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/lib/test_hf.py +0 -0
  261. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/lib/test_image.py +0 -0
  262. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/lib/test_listing_info.py +0 -0
  263. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/lib/test_models.py +0 -0
  264. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/lib/test_schema.py +0 -0
  265. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/lib/test_signal_schema.py +0 -0
  266. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/lib/test_sql_to_python.py +0 -0
  267. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/lib/test_text.py +0 -0
  268. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/lib/test_udf_signature.py +0 -0
  269. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/lib/test_utils.py +0 -0
  270. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/lib/test_webdataset.py +0 -0
  271. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/sql/__init__.py +0 -0
  272. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/sql/sqlite/__init__.py +0 -0
  273. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/sql/sqlite/test_types.py +0 -0
  274. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/sql/sqlite/test_utils.py +0 -0
  275. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/sql/test_array.py +0 -0
  276. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/sql/test_conditional.py +0 -0
  277. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/sql/test_path.py +0 -0
  278. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/sql/test_random.py +0 -0
  279. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/sql/test_selectable.py +0 -0
  280. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/sql/test_string.py +0 -0
  281. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_asyn.py +0 -0
  282. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_cache.py +0 -0
  283. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_catalog.py +0 -0
  284. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_catalog_loader.py +0 -0
  285. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_cli_parsing.py +0 -0
  286. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_client.py +0 -0
  287. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_client_gcs.py +0 -0
  288. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_client_s3.py +0 -0
  289. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_config.py +0 -0
  290. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_data_storage.py +0 -0
  291. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_database_engine.py +0 -0
  292. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_dataset.py +0 -0
  293. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_diff.py +0 -0
  294. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_dispatch.py +0 -0
  295. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_fileslice.py +0 -0
  296. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_func.py +0 -0
  297. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_listing.py +0 -0
  298. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_metastore.py +0 -0
  299. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_module_exports.py +0 -0
  300. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_query.py +0 -0
  301. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_query_metrics.py +0 -0
  302. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_query_params.py +0 -0
  303. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_serializer.py +0 -0
  304. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_session.py +0 -0
  305. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_utils.py +0 -0
  306. {datachain-0.8.8 → datachain-0.8.9}/tests/unit/test_warehouse.py +0 -0
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.8.6'
27
+ rev: 'v0.9.1'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: datachain
3
- Version: 0.8.8
3
+ Version: 0.8.9
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -99,7 +99,7 @@ Requires-Dist: unstructured[pdf]<0.16.12; extra == "examples"
99
99
  Requires-Dist: pdfplumber==0.11.5; extra == "examples"
100
100
  Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
101
101
  Requires-Dist: onnx==1.16.1; extra == "examples"
102
- Requires-Dist: ultralytics==8.3.58; extra == "examples"
102
+ Requires-Dist: ultralytics==8.3.61; extra == "examples"
103
103
 
104
104
  ================
105
105
  |logo| DataChain
@@ -189,13 +189,14 @@ Python code:
189
189
 
190
190
  .. code:: py
191
191
 
192
+ import os
192
193
  from mistralai import Mistral
193
194
  from datachain import File, DataChain, Column
194
195
 
195
196
  PROMPT = "Was this dialog successful? Answer in a single word: Success or Failure."
196
197
 
197
198
  def eval_dialogue(file: File) -> bool:
198
- client = Mistral()
199
+ client = Mistral(api_key = os.environ["MISTRAL_API_KEY"])
199
200
  response = client.chat.complete(
200
201
  model="open-mixtral-8x22b",
201
202
  messages=[{"role": "system", "content": PROMPT},
@@ -86,13 +86,14 @@ Python code:
86
86
 
87
87
  .. code:: py
88
88
 
89
+ import os
89
90
  from mistralai import Mistral
90
91
  from datachain import File, DataChain, Column
91
92
 
92
93
  PROMPT = "Was this dialog successful? Answer in a single word: Success or Failure."
93
94
 
94
95
  def eval_dialogue(file: File) -> bool:
95
- client = Mistral()
96
+ client = Mistral(api_key = os.environ["MISTRAL_API_KEY"])
96
97
  response = client.chat.complete(
97
98
  model="open-mixtral-8x22b",
98
99
  messages=[{"role": "system", "content": PROMPT},
@@ -112,7 +112,7 @@ examples = [
112
112
  "pdfplumber==0.11.5",
113
113
  "huggingface_hub[hf_transfer]",
114
114
  "onnx==1.16.1",
115
- "ultralytics==8.3.58"
115
+ "ultralytics==8.3.61"
116
116
  ]
117
117
 
118
118
  [project.urls]
@@ -39,6 +39,10 @@ def main(argv: Optional[list[str]] = None) -> int:
39
39
  if args.command in ("internal-run-udf", "internal-run-udf-worker"):
40
40
  return handle_udf(args.command)
41
41
 
42
+ if args.command is None:
43
+ datachain_parser.print_help(sys.stderr)
44
+ return 1
45
+
42
46
  logger.addHandler(logging.StreamHandler())
43
47
  logging_level = get_logging_level(args)
44
48
  logger.setLevel(logging_level)
@@ -120,12 +124,17 @@ def handle_clone_command(args, catalog):
120
124
  recursive=bool(args.recursive),
121
125
  no_glob=args.no_glob,
122
126
  no_cp=args.no_cp,
123
- edatachain=args.edatachain,
124
- edatachain_file=args.edatachain_file,
125
127
  )
126
128
 
127
129
 
128
130
  def handle_dataset_command(args, catalog):
131
+ if args.datasets_cmd is None:
132
+ print(
133
+ f"Use 'datachain {args.command} --help' to see available options",
134
+ file=sys.stderr,
135
+ )
136
+ return 1
137
+
129
138
  dataset_commands = {
130
139
  "pull": lambda: catalog.pull_dataset(
131
140
  args.dataset,
@@ -134,8 +143,6 @@ def handle_dataset_command(args, catalog):
134
143
  local_ds_version=args.local_version,
135
144
  cp=args.cp,
136
145
  force=bool(args.force),
137
- edatachain=args.edatachain,
138
- edatachain_file=args.edatachain_file,
139
146
  ),
140
147
  "edit": lambda: edit_dataset(
141
148
  catalog,
@@ -187,6 +194,7 @@ def handle_dataset_command(args, catalog):
187
194
  handler = dataset_commands.get(args.datasets_cmd)
188
195
  if handler:
189
196
  return handler()
197
+
190
198
  raise Exception(f"Unexpected command {args.datasets_cmd}")
191
199
 
192
200
 
@@ -11,6 +11,7 @@ if TYPE_CHECKING:
11
11
  from datachain.cli.utils import determine_flavors
12
12
  from datachain.config import Config
13
13
  from datachain.error import DatasetNotFoundError
14
+ from datachain.studio import list_datasets as list_datasets_studio
14
15
 
15
16
 
16
17
  def list_datasets(
@@ -20,14 +21,12 @@ def list_datasets(
20
21
  all: bool = True,
21
22
  team: Optional[str] = None,
22
23
  ):
23
- from datachain.studio import list_datasets
24
-
25
24
  token = Config().read().get("studio", {}).get("token")
26
25
  all, local, studio = determine_flavors(studio, local, all, token)
27
26
 
28
27
  local_datasets = set(list_datasets_local(catalog)) if all or local else set()
29
28
  studio_datasets = (
30
- set(list_datasets(team=team)) if (all or studio) and token else set()
29
+ set(list_datasets_studio(team=team)) if (all or studio) and token else set()
31
30
  )
32
31
 
33
32
  rows = [
@@ -1,3 +1,4 @@
1
+ import argparse
1
2
  from argparse import ArgumentParser
2
3
  from importlib.metadata import PackageNotFoundError, version
3
4
 
@@ -18,7 +19,8 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
18
19
  __version__ = "unknown"
19
20
 
20
21
  parser = ArgumentParser(
21
- description="DataChain: Wrangle unstructured AI data at scale", prog="datachain"
22
+ description="DataChain: Wrangle unstructured AI data at scale.",
23
+ prog="datachain",
22
24
  )
23
25
  parser.add_argument("-V", "--version", action="version", version=__version__)
24
26
 
@@ -31,13 +33,13 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
31
33
  parent_parser.add_argument(
32
34
  "--anon",
33
35
  action="store_true",
34
- help="AWS anon (aka awscli's --no-sign-request)",
36
+ help="anon flag for remote storage (like awscli's --no-sign-request)",
35
37
  )
36
38
  parent_parser.add_argument(
37
39
  "-u", "--update", action="count", default=0, help="Update cache"
38
40
  )
39
41
  parent_parser.add_argument(
40
- "-v", "--verbose", action="count", default=0, help="Verbose"
42
+ "-v", "--verbose", action="count", default=0, help="Be verbose"
41
43
  )
42
44
  parent_parser.add_argument(
43
45
  "-q", "--quiet", action="count", default=0, help="Be quiet"
@@ -46,24 +48,23 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
46
48
  "--debug-sql",
47
49
  action="store_true",
48
50
  default=False,
49
- help="Show All SQL Queries (very verbose output, for debugging only)",
51
+ help=argparse.SUPPRESS,
50
52
  )
51
53
  parent_parser.add_argument(
52
54
  "--pdb",
53
55
  action="store_true",
54
56
  default=False,
55
- help="Drop into the pdb debugger on fatal exception",
57
+ help=argparse.SUPPRESS,
56
58
  )
57
59
 
58
60
  subp = parser.add_subparsers(
59
61
  title="Available Commands",
60
62
  metavar="command",
61
63
  dest="command",
62
- help=f"Use `{parser.prog} command --help` for command-specific help.",
63
- required=True,
64
+ help=f"Use `{parser.prog} command --help` for command-specific help",
64
65
  )
65
66
  parse_cp = subp.add_parser(
66
- "cp", parents=[parent_parser], description="Copy data files from the cloud"
67
+ "cp", parents=[parent_parser], description="Copy data files from the cloud."
67
68
  )
68
69
  add_sources_arg(parse_cp).complete = shtab.DIR # type: ignore[attr-defined]
69
70
  parse_cp.add_argument("output", type=str, help="Output")
@@ -72,7 +73,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
72
73
  "--force",
73
74
  default=False,
74
75
  action="store_true",
75
- help="Force creating outputs",
76
+ help="Force creating files even if they already exist",
76
77
  )
77
78
  parse_cp.add_argument(
78
79
  "-r",
@@ -90,7 +91,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
90
91
  )
91
92
 
92
93
  parse_clone = subp.add_parser(
93
- "clone", parents=[parent_parser], description="Copy data files from the cloud"
94
+ "clone", parents=[parent_parser], description="Copy data files from the cloud."
94
95
  )
95
96
  add_sources_arg(parse_clone).complete = shtab.DIR # type: ignore[attr-defined]
96
97
  parse_clone.add_argument("output", type=str, help="Output")
@@ -121,16 +122,6 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
121
122
  action="store_true",
122
123
  help="Do not copy files, just create a dataset",
123
124
  )
124
- parse_clone.add_argument(
125
- "--edatachain",
126
- default=False,
127
- action="store_true",
128
- help="Create a .edatachain file",
129
- )
130
- parse_clone.add_argument(
131
- "--edatachain-file",
132
- help="Use a different filename for the resulting .edatachain file",
133
- )
134
125
 
135
126
  add_studio_parser(subp, parent_parser)
136
127
  add_jobs_parser(subp, parent_parser)
@@ -139,22 +130,22 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
139
130
  "dataset",
140
131
  aliases=["ds"],
141
132
  parents=[parent_parser],
142
- description="Commands for managing datasers",
133
+ description="Commands for managing datasets.",
143
134
  )
144
135
  datasets_subparser = datasets_parser.add_subparsers(
145
136
  dest="datasets_cmd",
146
- help="Use `datachain datasets CMD --help` to display command specific help",
137
+ help="Use `datachain dataset CMD --help` to display command-specific help",
147
138
  )
148
139
 
149
140
  parse_pull = datasets_subparser.add_parser(
150
141
  "pull",
151
142
  parents=[parent_parser],
152
- description="Pull specific dataset version from SaaS",
143
+ description="Pull specific dataset version from Studio.",
153
144
  )
154
145
  parse_pull.add_argument(
155
146
  "dataset",
156
147
  type=str,
157
- help="Name and version of remote dataset created in SaaS",
148
+ help="Name and version of remote dataset created in Studio",
158
149
  )
159
150
  parse_pull.add_argument("-o", "--output", type=str, help="Output")
160
151
  parse_pull.add_argument(
@@ -178,16 +169,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
178
169
  action="store_true",
179
170
  help="Copy actual files after pulling remote dataset into local DB",
180
171
  )
181
- parse_pull.add_argument(
182
- "--edatachain",
183
- default=False,
184
- action="store_true",
185
- help="Create .edatachain file",
186
- )
187
- parse_pull.add_argument(
188
- "--edatachain-file",
189
- help="Use a different filename for the resulting .edatachain file",
190
- )
172
+
191
173
  parse_pull.add_argument(
192
174
  "--local-name",
193
175
  action="store",
@@ -202,7 +184,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
202
184
  )
203
185
 
204
186
  parse_edit_dataset = datasets_subparser.add_parser(
205
- "edit", parents=[parent_parser], description="Edit dataset metadata"
187
+ "edit", parents=[parent_parser], description="Edit dataset metadata."
206
188
  )
207
189
  parse_edit_dataset.add_argument("name", type=str, help="Dataset name")
208
190
  parse_edit_dataset.add_argument(
@@ -244,41 +226,41 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
244
226
  "--team",
245
227
  action="store",
246
228
  default=None,
247
- help="The team to edit a dataset. By default, it will use team from config.",
229
+ help="The team to edit a dataset. By default, it will use team from config",
248
230
  )
249
231
 
250
- datasets_parser = datasets_subparser.add_parser(
251
- "ls", parents=[parent_parser], description="List datasets"
232
+ datasets_ls_parser = datasets_subparser.add_parser(
233
+ "ls", parents=[parent_parser], description="List datasets."
252
234
  )
253
- datasets_parser.add_argument(
235
+ datasets_ls_parser.add_argument(
254
236
  "--studio",
255
237
  action="store_true",
256
238
  default=False,
257
239
  help="List the files in the Studio",
258
240
  )
259
- datasets_parser.add_argument(
241
+ datasets_ls_parser.add_argument(
260
242
  "-L",
261
243
  "--local",
262
244
  action="store_true",
263
245
  default=False,
264
246
  help="List local files only",
265
247
  )
266
- datasets_parser.add_argument(
248
+ datasets_ls_parser.add_argument(
267
249
  "-a",
268
250
  "--all",
269
251
  action="store_true",
270
252
  default=True,
271
253
  help="List all files including hidden files",
272
254
  )
273
- datasets_parser.add_argument(
255
+ datasets_ls_parser.add_argument(
274
256
  "--team",
275
257
  action="store",
276
258
  default=None,
277
- help="The team to list datasets for. By default, it will use team from config.",
259
+ help="The team to list datasets for. By default, it will use team from config",
278
260
  )
279
261
 
280
262
  rm_dataset_parser = datasets_subparser.add_parser(
281
- "rm", parents=[parent_parser], description="Removes dataset", aliases=["remove"]
263
+ "rm", parents=[parent_parser], description="Remove dataset.", aliases=["remove"]
282
264
  )
283
265
  rm_dataset_parser.add_argument("name", type=str, help="Dataset name")
284
266
  rm_dataset_parser.add_argument(
@@ -292,7 +274,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
292
274
  "--force",
293
275
  default=False,
294
276
  action=BooleanOptionalAction,
295
- help="Force delete registered dataset with all of it's versions",
277
+ help="Force delete registered dataset with all of its versions",
296
278
  )
297
279
  rm_dataset_parser.add_argument(
298
280
  "--studio",
@@ -318,13 +300,11 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
318
300
  "--team",
319
301
  action="store",
320
302
  default=None,
321
- help="The team to delete a dataset. By default, it will use team from config.",
303
+ help="The team to delete a dataset. By default, it will use team from config",
322
304
  )
323
305
 
324
306
  dataset_stats_parser = datasets_subparser.add_parser(
325
- "stats",
326
- parents=[parent_parser],
327
- description="Shows basic dataset stats",
307
+ "stats", parents=[parent_parser], description="Show basic dataset statistics."
328
308
  )
329
309
  dataset_stats_parser.add_argument("name", type=str, help="Dataset name")
330
310
  dataset_stats_parser.add_argument(
@@ -349,7 +329,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
349
329
  )
350
330
 
351
331
  parse_ls = subp.add_parser(
352
- "ls", parents=[parent_parser], description="List storage contents"
332
+ "ls", parents=[parent_parser], description="List storage contents."
353
333
  )
354
334
  add_sources_arg(parse_ls, nargs="*")
355
335
  parse_ls.add_argument(
@@ -357,7 +337,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
357
337
  "--long",
358
338
  action="count",
359
339
  default=0,
360
- help="List files in the long format",
340
+ help="List files in long format",
361
341
  )
362
342
  parse_ls.add_argument(
363
343
  "--studio",
@@ -383,11 +363,11 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
383
363
  "--team",
384
364
  action="store",
385
365
  default=None,
386
- help="The team to list datasets for. By default, it will use team from config.",
366
+ help="The team to list datasets for. By default, it will use team from config",
387
367
  )
388
368
 
389
369
  parse_du = subp.add_parser(
390
- "du", parents=[parent_parser], description="Display space usage"
370
+ "du", parents=[parent_parser], description="Display space usage."
391
371
  )
392
372
  add_sources_arg(parse_du)
393
373
  parse_du.add_argument(
@@ -405,8 +385,8 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
405
385
  type=int,
406
386
  metavar="N",
407
387
  help=(
408
- "Display sizes for N directory depths below the given directory, "
409
- "the default is 0 (summarize provided directory only)."
388
+ "Display sizes up to N directory levels deep "
389
+ "(default: 0, summarize provided directory only)"
410
390
  ),
411
391
  )
412
392
  parse_du.add_argument(
@@ -417,32 +397,32 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
417
397
  )
418
398
 
419
399
  parse_find = subp.add_parser(
420
- "find", parents=[parent_parser], description="Search in a directory hierarchy"
400
+ "find", parents=[parent_parser], description="Search in a directory hierarchy."
421
401
  )
422
402
  add_sources_arg(parse_find)
423
403
  parse_find.add_argument(
424
404
  "--name",
425
405
  type=str,
426
406
  action="append",
427
- help="Filename to match pattern.",
407
+ help="Match filename pattern",
428
408
  )
429
409
  parse_find.add_argument(
430
410
  "--iname",
431
411
  type=str,
432
412
  action="append",
433
- help="Like -name but case insensitive.",
413
+ help="Match filename pattern (case insensitive)",
434
414
  )
435
415
  parse_find.add_argument(
436
416
  "--path",
437
417
  type=str,
438
418
  action="append",
439
- help="Path to match pattern.",
419
+ help="Path to match pattern",
440
420
  )
441
421
  parse_find.add_argument(
442
422
  "--ipath",
443
423
  type=str,
444
424
  action="append",
445
- help="Like -path but case insensitive.",
425
+ help="Like -path but case insensitive",
446
426
  )
447
427
  parse_find.add_argument(
448
428
  "--size",
@@ -450,7 +430,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
450
430
  help=(
451
431
  "Filter by size (+ is greater or equal, - is less or equal). "
452
432
  "Specified size is in bytes, or use a suffix like K, M, G for "
453
- "kilobytes, megabytes, gigabytes, etc."
433
+ "kilobytes, megabytes, gigabytes, etc"
454
434
  ),
455
435
  )
456
436
  parse_find.add_argument(
@@ -470,14 +450,14 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
470
450
  )
471
451
 
472
452
  parse_index = subp.add_parser(
473
- "index", parents=[parent_parser], description="Index storage location"
453
+ "index", parents=[parent_parser], description="Index storage location."
474
454
  )
475
455
  add_sources_arg(parse_index)
476
456
 
477
457
  show_parser = subp.add_parser(
478
458
  "show",
479
459
  parents=[parent_parser],
480
- description="Create a new dataset with a query script",
460
+ description="Create a new dataset with a query script.",
481
461
  )
482
462
  show_parser.add_argument("name", type=str, help="Dataset name")
483
463
  show_parser.add_argument(
@@ -493,7 +473,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
493
473
  query_parser = subp.add_parser(
494
474
  "query",
495
475
  parents=[parent_parser],
496
- description="Create a new dataset with a query script",
476
+ description="Create a new dataset with a query script.",
497
477
  )
498
478
  query_parser.add_argument(
499
479
  "script", metavar="<script.py>", type=str, help="Filepath for script"
@@ -507,7 +487,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
507
487
  metavar="N",
508
488
  help=(
509
489
  "Use multiprocessing to run any query script UDFs with N worker processes. "
510
- "N defaults to the CPU count."
490
+ "N defaults to the CPU count"
511
491
  ),
512
492
  )
513
493
  query_parser.add_argument(
@@ -520,10 +500,12 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
520
500
  )
521
501
 
522
502
  subp.add_parser(
523
- "clear-cache", parents=[parent_parser], description="Clear the local file cache"
503
+ "clear-cache",
504
+ parents=[parent_parser],
505
+ description="Clear the local file cache.",
524
506
  )
525
507
  subp.add_parser(
526
- "gc", parents=[parent_parser], description="Garbage collect temporary tables"
508
+ "gc", parents=[parent_parser], description="Garbage collect temporary tables."
527
509
  )
528
510
 
529
511
  subp.add_parser("internal-run-udf", parents=[parent_parser])
@@ -536,12 +518,12 @@ def add_completion_parser(subparsers, parents):
536
518
  parser = subparsers.add_parser(
537
519
  "completion",
538
520
  parents=parents,
539
- description="Output shell completion script",
521
+ description="Output shell completion script.",
540
522
  )
541
523
  parser.add_argument(
542
524
  "-s",
543
525
  "--shell",
544
- help="Shell syntax for completions.",
526
+ help="Shell syntax for completions",
545
527
  default="bash",
546
528
  choices=shtab.SUPPORTED_SHELLS,
547
529
  )
@@ -1,19 +1,16 @@
1
1
  def add_jobs_parser(subparsers, parent_parser) -> None:
2
- jobs_help = "Commands to handle the Job running with Iterative Studio"
3
- jobs_description = (
4
- "This will help us to run, cancel and view the status of the job in Studio. "
5
- )
2
+ jobs_help = "Manage jobs in Studio"
3
+ jobs_description = "Commands to manage job execution in Studio."
6
4
  jobs_parser = subparsers.add_parser(
7
5
  "job", parents=[parent_parser], description=jobs_description, help=jobs_help
8
6
  )
9
7
  jobs_subparser = jobs_parser.add_subparsers(
10
8
  dest="cmd",
11
- help="Use `DataChain studio CMD --help` to display command-specific help.",
12
- required=True,
9
+ help="Use `datachain studio CMD --help` to display command-specific help",
13
10
  )
14
11
 
15
12
  studio_run_help = "Run a job in Studio"
16
- studio_run_description = "This command runs a job in Studio."
13
+ studio_run_description = "Run a job in Studio."
17
14
 
18
15
  studio_run_parser = jobs_subparser.add_parser(
19
16
  "run",
@@ -25,56 +22,56 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
25
22
  studio_run_parser.add_argument(
26
23
  "query_file",
27
24
  action="store",
28
- help="The query file to run.",
25
+ help="Query file to run",
29
26
  )
30
27
 
31
28
  studio_run_parser.add_argument(
32
29
  "--team",
33
30
  action="store",
34
31
  default=None,
35
- help="The team to run a job for. By default, it will use team from config.",
32
+ help="Team to run job for (default: from config)",
36
33
  )
37
34
  studio_run_parser.add_argument(
38
35
  "--env-file",
39
36
  action="store",
40
- help="File containing environment variables to set for the job.",
37
+ help="File with environment variables for the job",
41
38
  )
42
39
 
43
40
  studio_run_parser.add_argument(
44
41
  "--env",
45
42
  nargs="+",
46
- help="Environment variable. Can be specified multiple times. Format: KEY=VALUE",
43
+ help="Environment variables in KEY=VALUE format",
47
44
  )
48
45
 
49
46
  studio_run_parser.add_argument(
50
47
  "--workers",
51
48
  type=int,
52
- help="Number of workers to use for the job.",
49
+ help="Number of workers for the job",
53
50
  )
54
51
  studio_run_parser.add_argument(
55
52
  "--files",
56
53
  nargs="+",
57
- help="Files to include in the job.",
54
+ help="Additional files to include in the job",
58
55
  )
59
56
  studio_run_parser.add_argument(
60
57
  "--python-version",
61
58
  action="store",
62
- help="Python version to use for the job (e.g. '3.9', '3.10', '3.11').",
59
+ help="Python version for the job (e.g., 3.9, 3.10, 3.11)",
63
60
  )
64
61
  studio_run_parser.add_argument(
65
62
  "--req-file",
66
63
  action="store",
67
- help="File containing Python package requirements.",
64
+ help="Python requirements file",
68
65
  )
69
66
 
70
67
  studio_run_parser.add_argument(
71
68
  "--req",
72
69
  nargs="+",
73
- help="Python package requirement. Can be specified multiple times.",
70
+ help="Python package requirements",
74
71
  )
75
72
 
76
73
  studio_cancel_help = "Cancel a job in Studio"
77
- studio_cancel_description = "This command cancels a job in Studio."
74
+ studio_cancel_description = "Cancel a running job in Studio."
78
75
 
79
76
  studio_cancel_parser = jobs_subparser.add_parser(
80
77
  "cancel",
@@ -86,19 +83,17 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
86
83
  studio_cancel_parser.add_argument(
87
84
  "job_id",
88
85
  action="store",
89
- help="The job ID to cancel.",
86
+ help="Job ID to cancel",
90
87
  )
91
88
  studio_cancel_parser.add_argument(
92
89
  "--team",
93
90
  action="store",
94
91
  default=None,
95
- help="The team to cancel a job for. By default, it will use team from config.",
92
+ help="Team to cancel job for (default: from config)",
96
93
  )
97
94
 
98
- studio_log_help = "Show the logs and latest status of Jobs in Studio"
99
- studio_log_description = (
100
- "This will display the logs and latest status of jobs in Studio"
101
- )
95
+ studio_log_help = "Show job logs and status in Studio"
96
+ studio_log_description = "Display logs and current status of jobs in Studio."
102
97
 
103
98
  studio_log_parser = jobs_subparser.add_parser(
104
99
  "logs",
@@ -110,11 +105,11 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
110
105
  studio_log_parser.add_argument(
111
106
  "job_id",
112
107
  action="store",
113
- help="The job ID to show the logs.",
108
+ help="Job ID to show logs for",
114
109
  )
115
110
  studio_log_parser.add_argument(
116
111
  "--team",
117
112
  action="store",
118
113
  default=None,
119
- help="The team to check the logs. By default, it will use team from config.",
114
+ help="Team to check logs for (default: from config)",
120
115
  )