datachain 0.27.0__tar.gz → 0.28.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (406) hide show
  1. {datachain-0.27.0 → datachain-0.28.1}/.pre-commit-config.yaml +1 -1
  2. {datachain-0.27.0 → datachain-0.28.1}/PKG-INFO +2 -2
  3. {datachain-0.27.0 → datachain-0.28.1}/docs/commands/job/run.md +8 -0
  4. {datachain-0.27.0 → datachain-0.28.1}/mkdocs.yml +1 -1
  5. {datachain-0.27.0 → datachain-0.28.1}/pyproject.toml +1 -1
  6. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/cli/parser/job.py +5 -0
  7. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/data_storage/job.py +2 -1
  8. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/dc/datachain.py +9 -4
  9. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/file.py +53 -1
  10. datachain-0.28.1/src/datachain/lib/utils.py +155 -0
  11. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/studio.py +33 -11
  12. {datachain-0.27.0 → datachain-0.28.1}/src/datachain.egg-info/PKG-INFO +2 -2
  13. {datachain-0.27.0 → datachain-0.28.1}/src/datachain.egg-info/requires.txt +1 -1
  14. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_datachain.py +17 -6
  15. {datachain-0.27.0 → datachain-0.28.1}/tests/test_cli_studio.py +1 -1
  16. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/test_file.py +47 -1
  17. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/test_utils.py +70 -1
  18. datachain-0.27.0/src/datachain/lib/utils.py +0 -59
  19. {datachain-0.27.0 → datachain-0.28.1}/.cruft.json +0 -0
  20. {datachain-0.27.0 → datachain-0.28.1}/.gitattributes +0 -0
  21. {datachain-0.27.0 → datachain-0.28.1}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  22. {datachain-0.27.0 → datachain-0.28.1}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  23. {datachain-0.27.0 → datachain-0.28.1}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  24. {datachain-0.27.0 → datachain-0.28.1}/.github/codecov.yaml +0 -0
  25. {datachain-0.27.0 → datachain-0.28.1}/.github/dependabot.yml +0 -0
  26. {datachain-0.27.0 → datachain-0.28.1}/.github/workflows/benchmarks.yml +0 -0
  27. {datachain-0.27.0 → datachain-0.28.1}/.github/workflows/release.yml +0 -0
  28. {datachain-0.27.0 → datachain-0.28.1}/.github/workflows/tests-studio.yml +0 -0
  29. {datachain-0.27.0 → datachain-0.28.1}/.github/workflows/tests.yml +0 -0
  30. {datachain-0.27.0 → datachain-0.28.1}/.github/workflows/update-template.yaml +0 -0
  31. {datachain-0.27.0 → datachain-0.28.1}/.gitignore +0 -0
  32. {datachain-0.27.0 → datachain-0.28.1}/CODE_OF_CONDUCT.rst +0 -0
  33. {datachain-0.27.0 → datachain-0.28.1}/LICENSE +0 -0
  34. {datachain-0.27.0 → datachain-0.28.1}/README.rst +0 -0
  35. {datachain-0.27.0 → datachain-0.28.1}/docs/assets/captioned_cartoons.png +0 -0
  36. {datachain-0.27.0 → datachain-0.28.1}/docs/assets/datachain-white.svg +0 -0
  37. {datachain-0.27.0 → datachain-0.28.1}/docs/assets/datachain.svg +0 -0
  38. {datachain-0.27.0 → datachain-0.28.1}/docs/commands/auth/login.md +0 -0
  39. {datachain-0.27.0 → datachain-0.28.1}/docs/commands/auth/logout.md +0 -0
  40. {datachain-0.27.0 → datachain-0.28.1}/docs/commands/auth/team.md +0 -0
  41. {datachain-0.27.0 → datachain-0.28.1}/docs/commands/auth/token.md +0 -0
  42. {datachain-0.27.0 → datachain-0.28.1}/docs/commands/index.md +0 -0
  43. {datachain-0.27.0 → datachain-0.28.1}/docs/commands/job/cancel.md +0 -0
  44. {datachain-0.27.0 → datachain-0.28.1}/docs/commands/job/clusters.md +0 -0
  45. {datachain-0.27.0 → datachain-0.28.1}/docs/commands/job/logs.md +0 -0
  46. {datachain-0.27.0 → datachain-0.28.1}/docs/commands/job/ls.md +0 -0
  47. {datachain-0.27.0 → datachain-0.28.1}/docs/contributing.md +0 -0
  48. {datachain-0.27.0 → datachain-0.28.1}/docs/css/github-permalink-style.css +0 -0
  49. {datachain-0.27.0 → datachain-0.28.1}/docs/examples.md +0 -0
  50. {datachain-0.27.0 → datachain-0.28.1}/docs/guide/db_migrations.md +0 -0
  51. {datachain-0.27.0 → datachain-0.28.1}/docs/guide/delta.md +0 -0
  52. {datachain-0.27.0 → datachain-0.28.1}/docs/guide/env.md +0 -0
  53. {datachain-0.27.0 → datachain-0.28.1}/docs/guide/index.md +0 -0
  54. {datachain-0.27.0 → datachain-0.28.1}/docs/guide/namespaces.md +0 -0
  55. {datachain-0.27.0 → datachain-0.28.1}/docs/guide/processing.md +0 -0
  56. {datachain-0.27.0 → datachain-0.28.1}/docs/guide/remotes.md +0 -0
  57. {datachain-0.27.0 → datachain-0.28.1}/docs/guide/retry.md +0 -0
  58. {datachain-0.27.0 → datachain-0.28.1}/docs/index.md +0 -0
  59. {datachain-0.27.0 → datachain-0.28.1}/docs/overrides/main.html +0 -0
  60. {datachain-0.27.0 → datachain-0.28.1}/docs/quick-start.md +0 -0
  61. {datachain-0.27.0 → datachain-0.28.1}/docs/references/data-types/arrowrow.md +0 -0
  62. {datachain-0.27.0 → datachain-0.28.1}/docs/references/data-types/bbox.md +0 -0
  63. {datachain-0.27.0 → datachain-0.28.1}/docs/references/data-types/file.md +0 -0
  64. {datachain-0.27.0 → datachain-0.28.1}/docs/references/data-types/imagefile.md +0 -0
  65. {datachain-0.27.0 → datachain-0.28.1}/docs/references/data-types/index.md +0 -0
  66. {datachain-0.27.0 → datachain-0.28.1}/docs/references/data-types/pose.md +0 -0
  67. {datachain-0.27.0 → datachain-0.28.1}/docs/references/data-types/segment.md +0 -0
  68. {datachain-0.27.0 → datachain-0.28.1}/docs/references/data-types/tarvfile.md +0 -0
  69. {datachain-0.27.0 → datachain-0.28.1}/docs/references/data-types/textfile.md +0 -0
  70. {datachain-0.27.0 → datachain-0.28.1}/docs/references/data-types/videofile.md +0 -0
  71. {datachain-0.27.0 → datachain-0.28.1}/docs/references/datachain.md +0 -0
  72. {datachain-0.27.0 → datachain-0.28.1}/docs/references/func.md +0 -0
  73. {datachain-0.27.0 → datachain-0.28.1}/docs/references/index.md +0 -0
  74. {datachain-0.27.0 → datachain-0.28.1}/docs/references/toolkit.md +0 -0
  75. {datachain-0.27.0 → datachain-0.28.1}/docs/references/torch.md +0 -0
  76. {datachain-0.27.0 → datachain-0.28.1}/docs/references/udf.md +0 -0
  77. {datachain-0.27.0 → datachain-0.28.1}/docs/tutorials.md +0 -0
  78. {datachain-0.27.0 → datachain-0.28.1}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  79. {datachain-0.27.0 → datachain-0.28.1}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  80. {datachain-0.27.0 → datachain-0.28.1}/examples/computer_vision/openimage-detect.py +0 -0
  81. {datachain-0.27.0 → datachain-0.28.1}/examples/computer_vision/ultralytics-bbox.py +0 -0
  82. {datachain-0.27.0 → datachain-0.28.1}/examples/computer_vision/ultralytics-pose.py +0 -0
  83. {datachain-0.27.0 → datachain-0.28.1}/examples/computer_vision/ultralytics-segment.py +0 -0
  84. {datachain-0.27.0 → datachain-0.28.1}/examples/get_started/common_sql_functions.py +0 -0
  85. {datachain-0.27.0 → datachain-0.28.1}/examples/get_started/json-csv-reader.py +0 -0
  86. {datachain-0.27.0 → datachain-0.28.1}/examples/get_started/torch-loader.py +0 -0
  87. {datachain-0.27.0 → datachain-0.28.1}/examples/get_started/udfs/parallel.py +0 -0
  88. {datachain-0.27.0 → datachain-0.28.1}/examples/get_started/udfs/simple.py +0 -0
  89. {datachain-0.27.0 → datachain-0.28.1}/examples/get_started/udfs/stateful.py +0 -0
  90. {datachain-0.27.0 → datachain-0.28.1}/examples/incremental_processing/delta.py +0 -0
  91. {datachain-0.27.0 → datachain-0.28.1}/examples/incremental_processing/retry.py +0 -0
  92. {datachain-0.27.0 → datachain-0.28.1}/examples/incremental_processing/utils.py +0 -0
  93. {datachain-0.27.0 → datachain-0.28.1}/examples/llm_and_nlp/claude-query.py +0 -0
  94. {datachain-0.27.0 → datachain-0.28.1}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  95. {datachain-0.27.0 → datachain-0.28.1}/examples/multimodal/audio-to-text.py +0 -0
  96. {datachain-0.27.0 → datachain-0.28.1}/examples/multimodal/clip_inference.py +0 -0
  97. {datachain-0.27.0 → datachain-0.28.1}/examples/multimodal/hf_pipeline.py +0 -0
  98. {datachain-0.27.0 → datachain-0.28.1}/examples/multimodal/openai_image_desc_lib.py +0 -0
  99. {datachain-0.27.0 → datachain-0.28.1}/examples/multimodal/wds.py +0 -0
  100. {datachain-0.27.0 → datachain-0.28.1}/examples/multimodal/wds_filtered.py +0 -0
  101. {datachain-0.27.0 → datachain-0.28.1}/noxfile.py +0 -0
  102. {datachain-0.27.0 → datachain-0.28.1}/setup.cfg +0 -0
  103. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/__init__.py +0 -0
  104. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/__main__.py +0 -0
  105. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/asyn.py +0 -0
  106. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/cache.py +0 -0
  107. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/catalog/__init__.py +0 -0
  108. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/catalog/catalog.py +0 -0
  109. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/catalog/datasource.py +0 -0
  110. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/catalog/loader.py +0 -0
  111. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/cli/__init__.py +0 -0
  112. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/cli/commands/__init__.py +0 -0
  113. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/cli/commands/datasets.py +0 -0
  114. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/cli/commands/du.py +0 -0
  115. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/cli/commands/index.py +0 -0
  116. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/cli/commands/ls.py +0 -0
  117. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/cli/commands/misc.py +0 -0
  118. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/cli/commands/query.py +0 -0
  119. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/cli/commands/show.py +0 -0
  120. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/cli/parser/__init__.py +0 -0
  121. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/cli/parser/studio.py +0 -0
  122. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/cli/parser/utils.py +0 -0
  123. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/cli/utils.py +0 -0
  124. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/client/__init__.py +0 -0
  125. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/client/azure.py +0 -0
  126. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/client/fileslice.py +0 -0
  127. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/client/fsspec.py +0 -0
  128. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/client/gcs.py +0 -0
  129. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/client/hf.py +0 -0
  130. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/client/local.py +0 -0
  131. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/client/s3.py +0 -0
  132. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/config.py +0 -0
  133. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/data_storage/__init__.py +0 -0
  134. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/data_storage/db_engine.py +0 -0
  135. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/data_storage/metastore.py +0 -0
  136. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/data_storage/schema.py +0 -0
  137. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/data_storage/serializer.py +0 -0
  138. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/data_storage/sqlite.py +0 -0
  139. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/data_storage/warehouse.py +0 -0
  140. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/dataset.py +0 -0
  141. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/delta.py +0 -0
  142. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/diff/__init__.py +0 -0
  143. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/error.py +0 -0
  144. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/fs/__init__.py +0 -0
  145. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/fs/reference.py +0 -0
  146. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/fs/utils.py +0 -0
  147. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/func/__init__.py +0 -0
  148. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/func/aggregate.py +0 -0
  149. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/func/array.py +0 -0
  150. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/func/base.py +0 -0
  151. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/func/conditional.py +0 -0
  152. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/func/func.py +0 -0
  153. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/func/numeric.py +0 -0
  154. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/func/path.py +0 -0
  155. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/func/random.py +0 -0
  156. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/func/string.py +0 -0
  157. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/func/window.py +0 -0
  158. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/job.py +0 -0
  159. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/__init__.py +0 -0
  160. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/arrow.py +0 -0
  161. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/audio.py +0 -0
  162. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/clip.py +0 -0
  163. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/convert/__init__.py +0 -0
  164. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/convert/flatten.py +0 -0
  165. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/convert/python_to_sql.py +0 -0
  166. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/convert/sql_to_python.py +0 -0
  167. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/convert/unflatten.py +0 -0
  168. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  169. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/data_model.py +0 -0
  170. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/dataset_info.py +0 -0
  171. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/dc/__init__.py +0 -0
  172. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/dc/csv.py +0 -0
  173. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/dc/database.py +0 -0
  174. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/dc/datasets.py +0 -0
  175. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/dc/hf.py +0 -0
  176. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/dc/json.py +0 -0
  177. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/dc/listings.py +0 -0
  178. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/dc/pandas.py +0 -0
  179. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/dc/parquet.py +0 -0
  180. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/dc/records.py +0 -0
  181. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/dc/storage.py +0 -0
  182. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/dc/utils.py +0 -0
  183. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/dc/values.py +0 -0
  184. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/hf.py +0 -0
  185. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/image.py +0 -0
  186. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/listing.py +0 -0
  187. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/listing_info.py +0 -0
  188. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/meta_formats.py +0 -0
  189. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/model_store.py +0 -0
  190. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/namespaces.py +0 -0
  191. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/projects.py +0 -0
  192. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/pytorch.py +0 -0
  193. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/settings.py +0 -0
  194. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/signal_schema.py +0 -0
  195. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/tar.py +0 -0
  196. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/text.py +0 -0
  197. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/udf.py +0 -0
  198. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/udf_signature.py +0 -0
  199. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/video.py +0 -0
  200. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/webdataset.py +0 -0
  201. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/lib/webdataset_laion.py +0 -0
  202. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/listing.py +0 -0
  203. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/model/__init__.py +0 -0
  204. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/model/bbox.py +0 -0
  205. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/model/pose.py +0 -0
  206. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/model/segment.py +0 -0
  207. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/model/ultralytics/__init__.py +0 -0
  208. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/model/ultralytics/bbox.py +0 -0
  209. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/model/ultralytics/pose.py +0 -0
  210. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/model/ultralytics/segment.py +0 -0
  211. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/model/utils.py +0 -0
  212. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/namespace.py +0 -0
  213. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/node.py +0 -0
  214. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/nodes_fetcher.py +0 -0
  215. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/nodes_thread_pool.py +0 -0
  216. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/progress.py +0 -0
  217. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/project.py +0 -0
  218. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/py.typed +0 -0
  219. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/query/__init__.py +0 -0
  220. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/query/batch.py +0 -0
  221. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/query/dataset.py +0 -0
  222. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/query/dispatch.py +0 -0
  223. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/query/metrics.py +0 -0
  224. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/query/params.py +0 -0
  225. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/query/queue.py +0 -0
  226. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/query/schema.py +0 -0
  227. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/query/session.py +0 -0
  228. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/query/udf.py +0 -0
  229. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/query/utils.py +0 -0
  230. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/remote/__init__.py +0 -0
  231. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/remote/studio.py +0 -0
  232. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/script_meta.py +0 -0
  233. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/semver.py +0 -0
  234. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/sql/__init__.py +0 -0
  235. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/sql/default/__init__.py +0 -0
  236. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/sql/default/base.py +0 -0
  237. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/sql/functions/__init__.py +0 -0
  238. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/sql/functions/aggregate.py +0 -0
  239. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/sql/functions/array.py +0 -0
  240. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/sql/functions/conditional.py +0 -0
  241. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/sql/functions/numeric.py +0 -0
  242. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/sql/functions/path.py +0 -0
  243. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/sql/functions/random.py +0 -0
  244. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/sql/functions/string.py +0 -0
  245. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/sql/selectable.py +0 -0
  246. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/sql/sqlite/__init__.py +0 -0
  247. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/sql/sqlite/base.py +0 -0
  248. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/sql/sqlite/types.py +0 -0
  249. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/sql/sqlite/vector.py +0 -0
  250. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/sql/types.py +0 -0
  251. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/sql/utils.py +0 -0
  252. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/telemetry.py +0 -0
  253. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/toolkit/__init__.py +0 -0
  254. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/toolkit/split.py +0 -0
  255. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/torch/__init__.py +0 -0
  256. {datachain-0.27.0 → datachain-0.28.1}/src/datachain/utils.py +0 -0
  257. {datachain-0.27.0 → datachain-0.28.1}/src/datachain.egg-info/SOURCES.txt +0 -0
  258. {datachain-0.27.0 → datachain-0.28.1}/src/datachain.egg-info/dependency_links.txt +0 -0
  259. {datachain-0.27.0 → datachain-0.28.1}/src/datachain.egg-info/entry_points.txt +0 -0
  260. {datachain-0.27.0 → datachain-0.28.1}/src/datachain.egg-info/top_level.txt +0 -0
  261. {datachain-0.27.0 → datachain-0.28.1}/tests/__init__.py +0 -0
  262. {datachain-0.27.0 → datachain-0.28.1}/tests/benchmarks/__init__.py +0 -0
  263. {datachain-0.27.0 → datachain-0.28.1}/tests/benchmarks/conftest.py +0 -0
  264. {datachain-0.27.0 → datachain-0.28.1}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  265. {datachain-0.27.0 → datachain-0.28.1}/tests/benchmarks/datasets/.dvc/config +0 -0
  266. {datachain-0.27.0 → datachain-0.28.1}/tests/benchmarks/datasets/.gitignore +0 -0
  267. {datachain-0.27.0 → datachain-0.28.1}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  268. {datachain-0.27.0 → datachain-0.28.1}/tests/benchmarks/test_datachain.py +0 -0
  269. {datachain-0.27.0 → datachain-0.28.1}/tests/benchmarks/test_ls.py +0 -0
  270. {datachain-0.27.0 → datachain-0.28.1}/tests/benchmarks/test_version.py +0 -0
  271. {datachain-0.27.0 → datachain-0.28.1}/tests/conftest.py +0 -0
  272. {datachain-0.27.0 → datachain-0.28.1}/tests/data.py +0 -0
  273. {datachain-0.27.0 → datachain-0.28.1}/tests/examples/__init__.py +0 -0
  274. {datachain-0.27.0 → datachain-0.28.1}/tests/examples/test_examples.py +0 -0
  275. {datachain-0.27.0 → datachain-0.28.1}/tests/examples/test_wds_e2e.py +0 -0
  276. {datachain-0.27.0 → datachain-0.28.1}/tests/examples/wds_data.py +0 -0
  277. {datachain-0.27.0 → datachain-0.28.1}/tests/func/__init__.py +0 -0
  278. {datachain-0.27.0 → datachain-0.28.1}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  279. {datachain-0.27.0 → datachain-0.28.1}/tests/func/data/lena.jpg +0 -0
  280. {datachain-0.27.0 → datachain-0.28.1}/tests/func/fake-service-account-credentials.json +0 -0
  281. {datachain-0.27.0 → datachain-0.28.1}/tests/func/functions/__init__.py +0 -0
  282. {datachain-0.27.0 → datachain-0.28.1}/tests/func/functions/test_aggregate.py +0 -0
  283. {datachain-0.27.0 → datachain-0.28.1}/tests/func/functions/test_array.py +0 -0
  284. {datachain-0.27.0 → datachain-0.28.1}/tests/func/functions/test_conditional.py +0 -0
  285. {datachain-0.27.0 → datachain-0.28.1}/tests/func/functions/test_numeric.py +0 -0
  286. {datachain-0.27.0 → datachain-0.28.1}/tests/func/functions/test_path.py +0 -0
  287. {datachain-0.27.0 → datachain-0.28.1}/tests/func/functions/test_random.py +0 -0
  288. {datachain-0.27.0 → datachain-0.28.1}/tests/func/functions/test_string.py +0 -0
  289. {datachain-0.27.0 → datachain-0.28.1}/tests/func/model/__init__.py +0 -0
  290. {datachain-0.27.0 → datachain-0.28.1}/tests/func/model/data/running-mask0.png +0 -0
  291. {datachain-0.27.0 → datachain-0.28.1}/tests/func/model/data/running-mask1.png +0 -0
  292. {datachain-0.27.0 → datachain-0.28.1}/tests/func/model/data/running.jpg +0 -0
  293. {datachain-0.27.0 → datachain-0.28.1}/tests/func/model/data/ships.jpg +0 -0
  294. {datachain-0.27.0 → datachain-0.28.1}/tests/func/model/test_yolo.py +0 -0
  295. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_audio.py +0 -0
  296. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_batching.py +0 -0
  297. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_catalog.py +0 -0
  298. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_client.py +0 -0
  299. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_cloud_transfer.py +0 -0
  300. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_data_storage.py +0 -0
  301. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_datachain_merge.py +0 -0
  302. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_dataset_query.py +0 -0
  303. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_datasets.py +0 -0
  304. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_delta.py +0 -0
  305. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_feature_pickling.py +0 -0
  306. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_file.py +0 -0
  307. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_hf.py +0 -0
  308. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_hidden_field.py +0 -0
  309. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_image.py +0 -0
  310. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_listing.py +0 -0
  311. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_ls.py +0 -0
  312. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_meta_formats.py +0 -0
  313. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_metastore.py +0 -0
  314. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_metrics.py +0 -0
  315. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_pull.py +0 -0
  316. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_pytorch.py +0 -0
  317. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_query.py +0 -0
  318. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_read_database.py +0 -0
  319. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_read_dataset_remote.py +0 -0
  320. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  321. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_retry.py +0 -0
  322. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_session.py +0 -0
  323. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_studio_datetime_parsing.py +0 -0
  324. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_toolkit.py +0 -0
  325. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_video.py +0 -0
  326. {datachain-0.27.0 → datachain-0.28.1}/tests/func/test_warehouse.py +0 -0
  327. {datachain-0.27.0 → datachain-0.28.1}/tests/scripts/feature_class.py +0 -0
  328. {datachain-0.27.0 → datachain-0.28.1}/tests/scripts/feature_class_exception.py +0 -0
  329. {datachain-0.27.0 → datachain-0.28.1}/tests/scripts/feature_class_parallel.py +0 -0
  330. {datachain-0.27.0 → datachain-0.28.1}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  331. {datachain-0.27.0 → datachain-0.28.1}/tests/scripts/name_len_slow.py +0 -0
  332. {datachain-0.27.0 → datachain-0.28.1}/tests/test_atomicity.py +0 -0
  333. {datachain-0.27.0 → datachain-0.28.1}/tests/test_cli_e2e.py +0 -0
  334. {datachain-0.27.0 → datachain-0.28.1}/tests/test_import_time.py +0 -0
  335. {datachain-0.27.0 → datachain-0.28.1}/tests/test_query_e2e.py +0 -0
  336. {datachain-0.27.0 → datachain-0.28.1}/tests/test_telemetry.py +0 -0
  337. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/__init__.py +0 -0
  338. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/__init__.py +0 -0
  339. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/conftest.py +0 -0
  340. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/test_arrow.py +0 -0
  341. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/test_audio.py +0 -0
  342. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/test_clip.py +0 -0
  343. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/test_datachain.py +0 -0
  344. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  345. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/test_datachain_merge.py +0 -0
  346. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/test_diff.py +0 -0
  347. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/test_feature.py +0 -0
  348. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/test_feature_utils.py +0 -0
  349. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/test_hf.py +0 -0
  350. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/test_image.py +0 -0
  351. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/test_listing_info.py +0 -0
  352. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/test_namespace.py +0 -0
  353. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/test_partition_by.py +0 -0
  354. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/test_project.py +0 -0
  355. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/test_python_to_sql.py +0 -0
  356. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/test_schema.py +0 -0
  357. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/test_signal_schema.py +0 -0
  358. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/test_sql_to_python.py +0 -0
  359. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/test_text.py +0 -0
  360. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/test_udf.py +0 -0
  361. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/test_udf_signature.py +0 -0
  362. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/lib/test_webdataset.py +0 -0
  363. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/model/__init__.py +0 -0
  364. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/model/test_bbox.py +0 -0
  365. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/model/test_pose.py +0 -0
  366. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/model/test_segment.py +0 -0
  367. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/model/test_utils.py +0 -0
  368. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/sql/__init__.py +0 -0
  369. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/sql/sqlite/__init__.py +0 -0
  370. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/sql/sqlite/test_types.py +0 -0
  371. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/sql/sqlite/test_utils.py +0 -0
  372. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/sql/test_array.py +0 -0
  373. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/sql/test_conditional.py +0 -0
  374. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/sql/test_path.py +0 -0
  375. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/sql/test_random.py +0 -0
  376. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/sql/test_selectable.py +0 -0
  377. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/sql/test_string.py +0 -0
  378. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_asyn.py +0 -0
  379. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_cache.py +0 -0
  380. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_catalog.py +0 -0
  381. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_catalog_loader.py +0 -0
  382. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_cli_parsing.py +0 -0
  383. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_client.py +0 -0
  384. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_client_gcs.py +0 -0
  385. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_client_s3.py +0 -0
  386. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_config.py +0 -0
  387. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_data_storage.py +0 -0
  388. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_database_engine.py +0 -0
  389. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_dataset.py +0 -0
  390. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_dispatch.py +0 -0
  391. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_fileslice.py +0 -0
  392. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_func.py +0 -0
  393. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_listing.py +0 -0
  394. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_metastore.py +0 -0
  395. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_module_exports.py +0 -0
  396. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_pytorch.py +0 -0
  397. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_query.py +0 -0
  398. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_query_metrics.py +0 -0
  399. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_query_params.py +0 -0
  400. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_script_meta.py +0 -0
  401. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_semver.py +0 -0
  402. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_serializer.py +0 -0
  403. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_session.py +0 -0
  404. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_utils.py +0 -0
  405. {datachain-0.27.0 → datachain-0.28.1}/tests/unit/test_warehouse.py +0 -0
  406. {datachain-0.27.0 → datachain-0.28.1}/tests/utils.py +0 -0
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.12.4'
27
+ rev: 'v0.12.5'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.27.0
3
+ Version: 0.28.1
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -45,7 +45,7 @@ Requires-Dist: datamodel-code-generator>=0.25
45
45
  Requires-Dist: Pillow<12,>=10.0.0
46
46
  Requires-Dist: msgpack<2,>=1.0.4
47
47
  Requires-Dist: psutil
48
- Requires-Dist: huggingface_hub
48
+ Requires-Dist: huggingface_hub<0.34.0
49
49
  Requires-Dist: iterative-telemetry>=0.0.10
50
50
  Requires-Dist: platformdirs
51
51
  Requires-Dist: dvc-studio-client<1,>=0.21
@@ -14,6 +14,7 @@ usage: datachain job run [-h] [-v] [-q] [--team TEAM] [--env-file ENV_FILE]
14
14
  [--req-file REQ_FILE] [--req REQ [REQ ...]]
15
15
  [--priority PRIORITY]
16
16
  [--start-time START_TIME] [--cron CRON]
17
+ [--no-wait]
17
18
  file
18
19
  ```
19
20
 
@@ -40,6 +41,7 @@ This command runs a job in Studio using the specified query file. You can config
40
41
  * `--priority PRIORITY` - Priority for the job in range 0-5. Lower value is higher priority (default: 5)
41
42
  * `--start-time START_TIME` - Time to schedule the task in YYYY-MM-DDTHH:mm format or natural language.
42
43
  * `--cron CRON` - Cron expression for the cron task.
44
+ * `--no-wait` - Do not wait for the job to finish.
43
45
  * `-h`, `--help` - Show the help message and exit.
44
46
  * `-v`, `--verbose` - Be verbose.
45
47
  * `-q`, `--quiet` - Be quiet.
@@ -131,6 +133,12 @@ datachain job run --cron "@monthly" query.py
131
133
  datachain job run --start-time "tomorrow 3pm" --cron "0 0 * * *" query.py
132
134
  ```
133
135
 
136
+ 12. Start the job and do not wait for the job to complete
137
+ ```bash
138
+ # Do not follow or tail the logs from Studio.
139
+ datachain job run query.py --no-wait
140
+ ```
141
+
134
142
  ## Notes
135
143
 
136
144
  * Closing the logs command (e.g., with Ctrl+C) will only stop displaying the logs but will not cancel the job execution
@@ -177,7 +177,7 @@ plugins:
177
177
  - https://numpy.org/doc/stable/objects.inv
178
178
  - https://pandas.pydata.org/docs/objects.inv
179
179
  - https://arrow.apache.org/docs/objects.inv
180
- - https://docs.sqlalchemy.org/objects.inv
180
+ # - https://docs.sqlalchemy.org/objects.inv # SSL certificate issue
181
181
  - https://docs.pydantic.dev/latest/objects.inv
182
182
 
183
183
  watch:
@@ -49,7 +49,7 @@ dependencies = [
49
49
  "Pillow>=10.0.0,<12",
50
50
  "msgpack>=1.0.4,<2",
51
51
  "psutil",
52
- "huggingface_hub",
52
+ "huggingface_hub<0.34.0",
53
53
  "iterative-telemetry>=0.0.10",
54
54
  "platformdirs",
55
55
  "dvc-studio-client>=0.21,<1",
@@ -109,6 +109,11 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
109
109
  studio_run_parser.add_argument(
110
110
  "--cron", action="store", help="Cron expression for the cron task."
111
111
  )
112
+ studio_run_parser.add_argument(
113
+ "--no-wait",
114
+ action="store_true",
115
+ help="Do not wait for the job to finish",
116
+ )
112
117
 
113
118
  studio_ls_help = "List jobs in Studio"
114
119
  studio_ls_description = "List jobs in Studio."
@@ -12,10 +12,11 @@ class JobStatus(int, Enum):
12
12
  CANCELING = 7
13
13
  CANCELED = 8
14
14
  CANCELING_SCHEDULED = 9
15
+ TASK = 11
15
16
 
16
17
  @classmethod
17
18
  def finished(cls) -> tuple[int, ...]:
18
- return cls.COMPLETE, cls.FAILED, cls.CANCELED
19
+ return cls.COMPLETE, cls.FAILED, cls.CANCELED, cls.TASK
19
20
 
20
21
 
21
22
  class JobQueryType(int, Enum):
@@ -2419,9 +2419,11 @@ class DataChain:
2419
2419
  ds.to_storage("gs://mybucket", placement="filename")
2420
2420
  ```
2421
2421
  """
2422
+ chain = self.persist()
2423
+ count = chain.count()
2424
+
2422
2425
  if placement == "filename" and (
2423
- self._query.distinct(pathfunc.name(C(f"{signal}__path"))).count()
2424
- != self._query.count()
2426
+ chain._query.distinct(pathfunc.name(C(f"{signal}__path"))).count() != count
2425
2427
  ):
2426
2428
  raise ValueError("Files with the same name found")
2427
2429
 
@@ -2433,7 +2435,7 @@ class DataChain:
2433
2435
  unit=" files",
2434
2436
  unit_scale=True,
2435
2437
  unit_divisor=10,
2436
- total=self.count(),
2438
+ total=count,
2437
2439
  leave=False,
2438
2440
  )
2439
2441
  file_exporter = FileExporter(
@@ -2444,7 +2446,10 @@ class DataChain:
2444
2446
  max_threads=num_threads or 1,
2445
2447
  client_config=client_config,
2446
2448
  )
2447
- file_exporter.run(self.to_values(signal), progress_bar)
2449
+ file_exporter.run(
2450
+ (rows[0] for rows in chain.to_iter(signal)),
2451
+ progress_bar,
2452
+ )
2448
2453
 
2449
2454
  def shuffle(self) -> "Self":
2450
2455
  """Shuffle the rows of the chain deterministically."""
@@ -23,7 +23,7 @@ from pydantic import Field, field_validator
23
23
 
24
24
  from datachain.client.fileslice import FileSlice
25
25
  from datachain.lib.data_model import DataModel
26
- from datachain.lib.utils import DataChainError
26
+ from datachain.lib.utils import DataChainError, rebase_path
27
27
  from datachain.nodes_thread_pool import NodesThreadPool
28
28
  from datachain.sql.types import JSON, Boolean, DateTime, Int, String
29
29
  from datachain.utils import TIME_ZERO
@@ -634,6 +634,40 @@ class File(DataModel):
634
634
  location=self.location,
635
635
  )
636
636
 
637
+ def rebase(
638
+ self,
639
+ old_base: str,
640
+ new_base: str,
641
+ suffix: str = "",
642
+ extension: str = "",
643
+ ) -> str:
644
+ """
645
+ Rebase the file's URI from one base directory to another.
646
+
647
+ Args:
648
+ old_base: Base directory to remove from the file's URI
649
+ new_base: New base directory to prepend
650
+ suffix: Optional suffix to add before file extension
651
+ extension: Optional new file extension (without dot)
652
+
653
+ Returns:
654
+ str: Rebased URI with new base directory
655
+
656
+ Raises:
657
+ ValueError: If old_base is not found in the file's URI
658
+
659
+ Examples:
660
+ >>> file = File(source="s3://bucket", path="data/2025-05-27/file.wav")
661
+ >>> file.rebase("s3://bucket/data", "s3://output-bucket/processed", \
662
+ extension="mp3")
663
+ 's3://output-bucket/processed/2025-05-27/file.mp3'
664
+
665
+ >>> file.rebase("data/audio", "/local/output", suffix="_ch1",
666
+ extension="npy")
667
+ '/local/output/file_ch1.npy'
668
+ """
669
+ return rebase_path(self.get_uri(), old_base, new_base, suffix, extension)
670
+
637
671
 
638
672
  def resolve(file: File) -> File:
639
673
  """
@@ -1219,6 +1253,24 @@ class Audio(DataModel):
1219
1253
  codec: str = Field(default="")
1220
1254
  bit_rate: int = Field(default=-1)
1221
1255
 
1256
+ @staticmethod
1257
+ def get_channel_name(num_channels: int, channel_idx: int) -> str:
1258
+ """Map channel index to meaningful name based on common audio formats"""
1259
+ channel_mappings = {
1260
+ 1: ["Mono"],
1261
+ 2: ["Left", "Right"],
1262
+ 4: ["W", "X", "Y", "Z"], # First-order Ambisonics
1263
+ 6: ["FL", "FR", "FC", "LFE", "BL", "BR"], # 5.1 surround
1264
+ 8: ["FL", "FR", "FC", "LFE", "BL", "BR", "SL", "SR"], # 7.1 surround
1265
+ }
1266
+
1267
+ if num_channels in channel_mappings:
1268
+ channels = channel_mappings[num_channels]
1269
+ if 0 <= channel_idx < len(channels):
1270
+ return channels[channel_idx]
1271
+
1272
+ return f"Ch{channel_idx + 1}"
1273
+
1222
1274
 
1223
1275
  class ArrowRow(DataModel):
1224
1276
  """`DataModel` for reading row from Arrow-supported file."""
@@ -0,0 +1,155 @@
1
+ import re
2
+ from abc import ABC, abstractmethod
3
+ from collections.abc import Sequence
4
+ from pathlib import PurePosixPath
5
+ from urllib.parse import urlparse
6
+
7
+
8
+ class AbstractUDF(ABC):
9
+ @abstractmethod
10
+ def process(self, *args, **kwargs):
11
+ pass
12
+
13
+ @abstractmethod
14
+ def setup(self):
15
+ pass
16
+
17
+ @abstractmethod
18
+ def teardown(self):
19
+ pass
20
+
21
+
22
+ class DataChainError(Exception):
23
+ pass
24
+
25
+
26
+ class DataChainParamsError(DataChainError):
27
+ pass
28
+
29
+
30
+ class DataChainColumnError(DataChainParamsError):
31
+ def __init__(self, col_name: str, msg: str):
32
+ super().__init__(f"Error for column {col_name}: {msg}")
33
+
34
+
35
+ def normalize_col_names(col_names: Sequence[str]) -> dict[str, str]:
36
+ """Returns normalized_name -> original_name dict."""
37
+ gen_col_counter = 0
38
+ new_col_names = {}
39
+ org_col_names = set(col_names)
40
+
41
+ for org_column in col_names:
42
+ new_column = org_column.lower()
43
+ new_column = re.sub("[^0-9a-z]+", "_", new_column)
44
+ new_column = new_column.strip("_")
45
+
46
+ generated_column = new_column
47
+
48
+ while (
49
+ not generated_column.isidentifier()
50
+ or generated_column in new_col_names
51
+ or (generated_column != org_column and generated_column in org_col_names)
52
+ ):
53
+ if new_column:
54
+ generated_column = f"c{gen_col_counter}_{new_column}"
55
+ else:
56
+ generated_column = f"c{gen_col_counter}"
57
+ gen_col_counter += 1
58
+
59
+ new_col_names[generated_column] = org_column
60
+
61
+ return new_col_names
62
+
63
+
64
+ def rebase_path(
65
+ src_path: str,
66
+ old_base: str,
67
+ new_base: str,
68
+ suffix: str = "",
69
+ extension: str = "",
70
+ ) -> str:
71
+ """
72
+ Rebase a file path from one base directory to another.
73
+
74
+ Args:
75
+ src_path: Source file path (can include URI scheme like s3://)
76
+ old_base: Base directory to remove from src_path
77
+ new_base: New base directory to prepend
78
+ suffix: Optional suffix to add before file extension
79
+ extension: Optional new file extension (without dot)
80
+
81
+ Returns:
82
+ str: Rebased path with new base directory
83
+
84
+ Raises:
85
+ ValueError: If old_base is not found in src_path
86
+ """
87
+ # Parse URIs to handle schemes properly
88
+ src_parsed = urlparse(src_path)
89
+ old_base_parsed = urlparse(old_base)
90
+ new_base_parsed = urlparse(new_base)
91
+
92
+ # Get the path component (without scheme)
93
+ if src_parsed.scheme:
94
+ src_path_only = src_parsed.netloc + src_parsed.path
95
+ else:
96
+ src_path_only = src_path
97
+
98
+ if old_base_parsed.scheme:
99
+ old_base_only = old_base_parsed.netloc + old_base_parsed.path
100
+ else:
101
+ old_base_only = old_base
102
+
103
+ # Normalize paths
104
+ src_path_norm = PurePosixPath(src_path_only).as_posix()
105
+ old_base_norm = PurePosixPath(old_base_only).as_posix()
106
+
107
+ # Find where old_base appears in src_path
108
+ if old_base_norm in src_path_norm:
109
+ # Find the index where old_base appears
110
+ idx = src_path_norm.find(old_base_norm)
111
+ if idx == -1:
112
+ raise ValueError(f"old_base '{old_base}' not found in src_path")
113
+
114
+ # Extract the relative path after old_base
115
+ relative_start = idx + len(old_base_norm)
116
+ # Skip leading slash if present
117
+ if relative_start < len(src_path_norm) and src_path_norm[relative_start] == "/":
118
+ relative_start += 1
119
+ relative_path = src_path_norm[relative_start:]
120
+ else:
121
+ raise ValueError(f"old_base '{old_base}' not found in src_path")
122
+
123
+ # Parse the filename
124
+ path_obj = PurePosixPath(relative_path)
125
+ stem = path_obj.stem
126
+ current_ext = path_obj.suffix
127
+
128
+ # Apply suffix and extension changes
129
+ new_stem = stem + suffix if suffix else stem
130
+ if extension:
131
+ new_ext = f".{extension}"
132
+ elif current_ext:
133
+ new_ext = current_ext
134
+ else:
135
+ new_ext = ""
136
+
137
+ # Build new filename
138
+ new_name = new_stem + new_ext
139
+
140
+ # Reconstruct path with new base
141
+ parent = str(path_obj.parent)
142
+ if parent == ".":
143
+ new_relative_path = new_name
144
+ else:
145
+ new_relative_path = str(PurePosixPath(parent) / new_name)
146
+
147
+ # Handle new_base URI scheme
148
+ if new_base_parsed.scheme:
149
+ # Has schema like s3://
150
+ base_path = new_base_parsed.netloc + new_base_parsed.path
151
+ base_path = PurePosixPath(base_path).as_posix()
152
+ full_path = str(PurePosixPath(base_path) / new_relative_path)
153
+ return f"{new_base_parsed.scheme}://{full_path}"
154
+ # Regular path
155
+ return str(PurePosixPath(new_base) / new_relative_path)
@@ -8,6 +8,7 @@ import dateparser
8
8
  import tabulate
9
9
 
10
10
  from datachain.config import Config, ConfigLevel
11
+ from datachain.data_storage.job import JobStatus
11
12
  from datachain.dataset import QUERY_DATASET_PREFIX, parse_dataset_name
12
13
  from datachain.error import DataChainError
13
14
  from datachain.remote.studio import StudioClient
@@ -20,6 +21,8 @@ POST_LOGIN_MESSAGE = (
20
21
  "Once you've logged in, return here "
21
22
  "and you'll be ready to start using DataChain with Studio."
22
23
  )
24
+ RETRY_MAX_TIMES = 10
25
+ RETRY_SLEEP_SEC = 1
23
26
 
24
27
 
25
28
  def process_jobs_args(args: "Namespace"):
@@ -46,6 +49,7 @@ def process_jobs_args(args: "Namespace"):
46
49
  args.cluster,
47
50
  args.start_time,
48
51
  args.cron,
52
+ args.no_wait,
49
53
  )
50
54
 
51
55
  if args.cmd == "cancel":
@@ -287,17 +291,34 @@ def parse_start_time(start_time_str: Optional[str]) -> Optional[str]:
287
291
  def show_logs_from_client(client, job_id):
288
292
  # Sync usage
289
293
  async def _run():
294
+ retry_count = 0
290
295
  latest_status = None
291
- async for message in client.tail_job_logs(job_id):
292
- if "logs" in message:
293
- for log in message["logs"]:
294
- print(log["message"], end="")
295
- elif "job" in message:
296
- latest_status = message["job"]["status"]
297
- print(f"\n>>>> Job is now in {latest_status} status.")
296
+ processed_statuses = set()
297
+ while True:
298
+ async for message in client.tail_job_logs(job_id):
299
+ if "logs" in message:
300
+ for log in message["logs"]:
301
+ print(log["message"], end="")
302
+ elif "job" in message:
303
+ latest_status = message["job"]["status"]
304
+ if latest_status in processed_statuses:
305
+ continue
306
+ processed_statuses.add(latest_status)
307
+ print(f"\n>>>> Job is now in {latest_status} status.")
308
+
309
+ try:
310
+ if retry_count > RETRY_MAX_TIMES or (
311
+ latest_status and JobStatus[latest_status].finished()
312
+ ):
313
+ break
314
+ await asyncio.sleep(RETRY_SLEEP_SEC)
315
+ retry_count += 1
316
+ except KeyError:
317
+ pass
318
+
298
319
  return latest_status
299
320
 
300
- latest_status = asyncio.run(_run())
321
+ final_status = asyncio.run(_run())
301
322
 
302
323
  response = client.dataset_job_versions(job_id)
303
324
  if not response.ok:
@@ -314,9 +335,9 @@ def show_logs_from_client(client, job_id):
314
335
 
315
336
  exit_code_by_status = {
316
337
  "FAILED": 1,
317
- "CANCELLED": 2,
338
+ "CANCELED": 2,
318
339
  }
319
- return exit_code_by_status.get(latest_status.upper(), 0) if latest_status else 0
340
+ return exit_code_by_status.get(final_status.upper(), 0) if final_status else 0
320
341
 
321
342
 
322
343
  def create_job(
@@ -334,6 +355,7 @@ def create_job(
334
355
  cluster: Optional[str] = None,
335
356
  start_time: Optional[str] = None,
336
357
  cron: Optional[str] = None,
358
+ no_wait: Optional[bool] = False,
337
359
  ):
338
360
  query_type = "PYTHON" if query_file.endswith(".py") else "SHELL"
339
361
  with open(query_file) as f:
@@ -388,7 +410,7 @@ def create_job(
388
410
  print("Open the job in Studio at", response.data.get("job", {}).get("url"))
389
411
  print("=" * 40)
390
412
 
391
- return show_logs_from_client(client, job_id)
413
+ return 0 if no_wait else show_logs_from_client(client, job_id)
392
414
 
393
415
 
394
416
  def upload_files(client: StudioClient, files: list[str]) -> list[str]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.27.0
3
+ Version: 0.28.1
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -45,7 +45,7 @@ Requires-Dist: datamodel-code-generator>=0.25
45
45
  Requires-Dist: Pillow<12,>=10.0.0
46
46
  Requires-Dist: msgpack<2,>=1.0.4
47
47
  Requires-Dist: psutil
48
- Requires-Dist: huggingface_hub
48
+ Requires-Dist: huggingface_hub<0.34.0
49
49
  Requires-Dist: iterative-telemetry>=0.0.10
50
50
  Requires-Dist: platformdirs
51
51
  Requires-Dist: dvc-studio-client<1,>=0.21
@@ -26,7 +26,7 @@ datamodel-code-generator>=0.25
26
26
  Pillow<12,>=10.0.0
27
27
  msgpack<2,>=1.0.4
28
28
  psutil
29
- huggingface_hub
29
+ huggingface_hub<0.34.0
30
30
  iterative-telemetry>=0.0.10
31
31
  platformdirs
32
32
  dvc-studio-client<1,>=0.21
@@ -9,7 +9,7 @@ import uuid
9
9
  from collections.abc import Iterator
10
10
  from datetime import datetime, timedelta, timezone
11
11
  from pathlib import Path, PurePosixPath
12
- from unittest.mock import patch
12
+ from unittest.mock import Mock, patch
13
13
 
14
14
  import numpy as np
15
15
  import pandas as pd
@@ -358,15 +358,24 @@ def test_to_storage(
358
358
  file_type,
359
359
  num_threads,
360
360
  ):
361
+ mapper = Mock(side_effect=lambda file_path: len(file_path))
362
+
361
363
  ctc = cloud_test_catalog
362
364
  df = dc.read_storage(ctc.src_uri, type=file_type, session=test_session)
363
365
  if use_map:
364
- df.settings(cache=use_cache).map(
365
- res=lambda file: file.export(tmp_dir / "output", placement=placement)
366
- ).exec()
366
+ (
367
+ df.settings(cache=use_cache)
368
+ .map(mapper, params=["file.path"], output={"path_len": int})
369
+ .map(res=lambda file: file.export(tmp_dir / "output", placement=placement))
370
+ .exec()
371
+ )
367
372
  else:
368
- df.settings(cache=use_cache).to_storage(
369
- tmp_dir / "output", placement=placement, num_threads=num_threads
373
+ (
374
+ df.settings(cache=use_cache)
375
+ .map(mapper, params=["file.path"], output={"path_len": int})
376
+ .to_storage(
377
+ tmp_dir / "output", placement=placement, num_threads=num_threads
378
+ )
370
379
  )
371
380
 
372
381
  expected = {
@@ -387,6 +396,8 @@ def test_to_storage(
387
396
  with open(tmp_dir / "output" / file_path) as f:
388
397
  assert f.read() == expected[file.name]
389
398
 
399
+ assert mapper.call_count == len(expected)
400
+
390
401
 
391
402
  @pytest.mark.parametrize("use_cache", [True, False])
392
403
  def test_export_images_files(test_session, tmp_dir, tmp_path, use_cache):
@@ -491,7 +491,7 @@ def test_studio_run_task(capsys, mocker, tmp_dir, studio_token):
491
491
 
492
492
 
493
493
  @pytest.mark.parametrize(
494
- "status,expected_exit_code", [("FAILED", 1), ("CANCELLED", 2), ("COMPLETED", 0)]
494
+ "status,expected_exit_code", [("FAILED", 1), ("CANCELED", 2), ("COMPLETE", 0)]
495
495
  )
496
496
  def test_studio_run_non_zero_exit_code(
497
497
  capsys, mocker, tmp_dir, status, expected_exit_code
@@ -7,7 +7,7 @@ from fsspec.implementations.local import LocalFileSystem
7
7
  from PIL import Image
8
8
 
9
9
  from datachain.catalog import Catalog
10
- from datachain.lib.file import File, FileError, ImageFile, TextFile, resolve
10
+ from datachain.lib.file import Audio, File, FileError, ImageFile, TextFile, resolve
11
11
 
12
12
 
13
13
  def create_file(source: str):
@@ -409,3 +409,49 @@ def test_path_normalized(path, expected, raises):
409
409
  file.get_path_normalized()
410
410
  else:
411
411
  assert file.get_path_normalized() == expected
412
+
413
+
414
+ def test_file_rebase_method():
415
+ """Test File.rebase() method"""
416
+ file = File(source="s3://bucket", path="data/audio/file.wav")
417
+
418
+ # Basic rebase
419
+ result = file.rebase("s3://bucket/data/audio", "s3://output-bucket/waveforms")
420
+ assert result == "s3://output-bucket/waveforms/file.wav"
421
+
422
+ # With suffix and extension
423
+ result = file.rebase(
424
+ "s3://bucket/data/audio",
425
+ "s3://output-bucket/processed",
426
+ suffix="_ch1",
427
+ extension="npy",
428
+ )
429
+ assert result == "s3://output-bucket/processed/file_ch1.npy"
430
+
431
+
432
+ def test_file_rebase_local_path():
433
+ """Test File.rebase() with local file paths"""
434
+ file = File(source="file://", path="/data/audio/folder/file.mp3")
435
+
436
+ result = file.rebase("file:///data/audio", "/output/processed")
437
+ assert result == "/output/processed/folder/file.mp3"
438
+
439
+
440
+ def test_audio_get_channel_name():
441
+ # Test known channel configurations
442
+ assert Audio.get_channel_name(1, 0) == "Mono"
443
+ assert Audio.get_channel_name(2, 0) == "Left"
444
+ assert Audio.get_channel_name(2, 1) == "Right"
445
+ assert Audio.get_channel_name(4, 2) == "Y" # Ambisonics
446
+ assert Audio.get_channel_name(6, 3) == "LFE" # 5.1 surround
447
+ assert Audio.get_channel_name(8, 7) == "SR" # 7.1 surround
448
+
449
+ # Test fallback for unknown configurations
450
+ assert Audio.get_channel_name(-1, 0) == "Ch1"
451
+ assert Audio.get_channel_name(3, 0) == "Ch1"
452
+ assert Audio.get_channel_name(5, 4) == "Ch5"
453
+ assert Audio.get_channel_name(10, 9) == "Ch10"
454
+
455
+ # Test out of range indices
456
+ assert Audio.get_channel_name(2, 5) == "Ch6"
457
+ assert Audio.get_channel_name(1, 1) == "Ch2"