datachain 0.16.3__tar.gz → 0.16.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (366) hide show
  1. {datachain-0.16.3 → datachain-0.16.5}/.github/workflows/benchmarks.yml +4 -1
  2. {datachain-0.16.3 → datachain-0.16.5}/.github/workflows/release.yml +1 -1
  3. {datachain-0.16.3 → datachain-0.16.5}/.github/workflows/tests-studio.yml +2 -1
  4. {datachain-0.16.3 → datachain-0.16.5}/.github/workflows/tests.yml +12 -3
  5. {datachain-0.16.3 → datachain-0.16.5}/.pre-commit-config.yaml +1 -1
  6. {datachain-0.16.3/src/datachain.egg-info → datachain-0.16.5}/PKG-INFO +1 -1
  7. {datachain-0.16.3 → datachain-0.16.5}/docs/commands/job/run.md +5 -0
  8. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/catalog/catalog.py +5 -1
  9. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/__init__.py +11 -9
  10. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/commands/query.py +1 -0
  11. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/parser/__init__.py +9 -1
  12. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/parser/job.py +6 -1
  13. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/data_storage/job.py +1 -0
  14. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/data_storage/metastore.py +82 -71
  15. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/data_storage/warehouse.py +46 -34
  16. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/arrow.py +23 -1
  17. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/csv.py +1 -0
  18. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/datachain.py +30 -13
  19. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/listing.py +2 -0
  20. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/udf.py +17 -5
  21. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/query/batch.py +40 -39
  22. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/query/dataset.py +33 -32
  23. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/query/dispatch.py +137 -75
  24. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/query/metrics.py +1 -2
  25. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/query/queue.py +1 -11
  26. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/query/udf.py +1 -1
  27. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/query/utils.py +8 -14
  28. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/remote/studio.py +2 -0
  29. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/studio.py +3 -0
  30. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/utils.py +3 -0
  31. {datachain-0.16.3 → datachain-0.16.5/src/datachain.egg-info}/PKG-INFO +1 -1
  32. {datachain-0.16.3 → datachain-0.16.5}/src/datachain.egg-info/SOURCES.txt +2 -0
  33. {datachain-0.16.3 → datachain-0.16.5}/tests/conftest.py +35 -8
  34. datachain-0.16.5/tests/func/test_batching.py +243 -0
  35. {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_datachain.py +35 -75
  36. {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_metrics.py +11 -2
  37. {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_query.py +13 -10
  38. {datachain-0.16.3 → datachain-0.16.5}/tests/test_cli_studio.py +3 -0
  39. {datachain-0.16.3 → datachain-0.16.5}/tests/test_query_e2e.py +3 -2
  40. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_datachain.py +6 -2
  41. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_datachain_bootstrap.py +3 -1
  42. datachain-0.16.5/tests/unit/lib/test_udf.py +36 -0
  43. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_dispatch.py +1 -15
  44. {datachain-0.16.3 → datachain-0.16.5}/.cruft.json +0 -0
  45. {datachain-0.16.3 → datachain-0.16.5}/.gitattributes +0 -0
  46. {datachain-0.16.3 → datachain-0.16.5}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  47. {datachain-0.16.3 → datachain-0.16.5}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  48. {datachain-0.16.3 → datachain-0.16.5}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  49. {datachain-0.16.3 → datachain-0.16.5}/.github/codecov.yaml +0 -0
  50. {datachain-0.16.3 → datachain-0.16.5}/.github/dependabot.yml +0 -0
  51. {datachain-0.16.3 → datachain-0.16.5}/.github/workflows/update-template.yaml +0 -0
  52. {datachain-0.16.3 → datachain-0.16.5}/.gitignore +0 -0
  53. {datachain-0.16.3 → datachain-0.16.5}/CODE_OF_CONDUCT.rst +0 -0
  54. {datachain-0.16.3 → datachain-0.16.5}/LICENSE +0 -0
  55. {datachain-0.16.3 → datachain-0.16.5}/README.rst +0 -0
  56. {datachain-0.16.3 → datachain-0.16.5}/docs/assets/captioned_cartoons.png +0 -0
  57. {datachain-0.16.3 → datachain-0.16.5}/docs/assets/datachain-white.svg +0 -0
  58. {datachain-0.16.3 → datachain-0.16.5}/docs/assets/datachain.svg +0 -0
  59. {datachain-0.16.3 → datachain-0.16.5}/docs/commands/auth/login.md +0 -0
  60. {datachain-0.16.3 → datachain-0.16.5}/docs/commands/auth/logout.md +0 -0
  61. {datachain-0.16.3 → datachain-0.16.5}/docs/commands/auth/team.md +0 -0
  62. {datachain-0.16.3 → datachain-0.16.5}/docs/commands/auth/token.md +0 -0
  63. {datachain-0.16.3 → datachain-0.16.5}/docs/commands/index.md +0 -0
  64. {datachain-0.16.3 → datachain-0.16.5}/docs/commands/job/cancel.md +0 -0
  65. {datachain-0.16.3 → datachain-0.16.5}/docs/commands/job/logs.md +0 -0
  66. {datachain-0.16.3 → datachain-0.16.5}/docs/contributing.md +0 -0
  67. {datachain-0.16.3 → datachain-0.16.5}/docs/css/github-permalink-style.css +0 -0
  68. {datachain-0.16.3 → datachain-0.16.5}/docs/examples.md +0 -0
  69. {datachain-0.16.3 → datachain-0.16.5}/docs/index.md +0 -0
  70. {datachain-0.16.3 → datachain-0.16.5}/docs/overrides/main.html +0 -0
  71. {datachain-0.16.3 → datachain-0.16.5}/docs/quick-start.md +0 -0
  72. {datachain-0.16.3 → datachain-0.16.5}/docs/references/data-types/arrowrow.md +0 -0
  73. {datachain-0.16.3 → datachain-0.16.5}/docs/references/data-types/bbox.md +0 -0
  74. {datachain-0.16.3 → datachain-0.16.5}/docs/references/data-types/file.md +0 -0
  75. {datachain-0.16.3 → datachain-0.16.5}/docs/references/data-types/imagefile.md +0 -0
  76. {datachain-0.16.3 → datachain-0.16.5}/docs/references/data-types/index.md +0 -0
  77. {datachain-0.16.3 → datachain-0.16.5}/docs/references/data-types/pose.md +0 -0
  78. {datachain-0.16.3 → datachain-0.16.5}/docs/references/data-types/segment.md +0 -0
  79. {datachain-0.16.3 → datachain-0.16.5}/docs/references/data-types/tarvfile.md +0 -0
  80. {datachain-0.16.3 → datachain-0.16.5}/docs/references/data-types/textfile.md +0 -0
  81. {datachain-0.16.3 → datachain-0.16.5}/docs/references/data-types/videofile.md +0 -0
  82. {datachain-0.16.3 → datachain-0.16.5}/docs/references/datachain.md +0 -0
  83. {datachain-0.16.3 → datachain-0.16.5}/docs/references/func.md +0 -0
  84. {datachain-0.16.3 → datachain-0.16.5}/docs/references/index.md +0 -0
  85. {datachain-0.16.3 → datachain-0.16.5}/docs/references/remotes.md +0 -0
  86. {datachain-0.16.3 → datachain-0.16.5}/docs/references/toolkit.md +0 -0
  87. {datachain-0.16.3 → datachain-0.16.5}/docs/references/torch.md +0 -0
  88. {datachain-0.16.3 → datachain-0.16.5}/docs/references/udf.md +0 -0
  89. {datachain-0.16.3 → datachain-0.16.5}/docs/tutorials.md +0 -0
  90. {datachain-0.16.3 → datachain-0.16.5}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  91. {datachain-0.16.3 → datachain-0.16.5}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  92. {datachain-0.16.3 → datachain-0.16.5}/examples/computer_vision/openimage-detect.py +0 -0
  93. {datachain-0.16.3 → datachain-0.16.5}/examples/computer_vision/ultralytics-bbox.py +0 -0
  94. {datachain-0.16.3 → datachain-0.16.5}/examples/computer_vision/ultralytics-pose.py +0 -0
  95. {datachain-0.16.3 → datachain-0.16.5}/examples/computer_vision/ultralytics-segment.py +0 -0
  96. {datachain-0.16.3 → datachain-0.16.5}/examples/get_started/common_sql_functions.py +0 -0
  97. {datachain-0.16.3 → datachain-0.16.5}/examples/get_started/json-csv-reader.py +0 -0
  98. {datachain-0.16.3 → datachain-0.16.5}/examples/get_started/torch-loader.py +0 -0
  99. {datachain-0.16.3 → datachain-0.16.5}/examples/get_started/udfs/parallel.py +0 -0
  100. {datachain-0.16.3 → datachain-0.16.5}/examples/get_started/udfs/simple.py +0 -0
  101. {datachain-0.16.3 → datachain-0.16.5}/examples/get_started/udfs/stateful.py +0 -0
  102. {datachain-0.16.3 → datachain-0.16.5}/examples/llm_and_nlp/claude-query.py +0 -0
  103. {datachain-0.16.3 → datachain-0.16.5}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  104. {datachain-0.16.3 → datachain-0.16.5}/examples/multimodal/clip_inference.py +0 -0
  105. {datachain-0.16.3 → datachain-0.16.5}/examples/multimodal/hf_pipeline.py +0 -0
  106. {datachain-0.16.3 → datachain-0.16.5}/examples/multimodal/openai_image_desc_lib.py +0 -0
  107. {datachain-0.16.3 → datachain-0.16.5}/examples/multimodal/wds.py +0 -0
  108. {datachain-0.16.3 → datachain-0.16.5}/examples/multimodal/wds_filtered.py +0 -0
  109. {datachain-0.16.3 → datachain-0.16.5}/mkdocs.yml +0 -0
  110. {datachain-0.16.3 → datachain-0.16.5}/noxfile.py +0 -0
  111. {datachain-0.16.3 → datachain-0.16.5}/pyproject.toml +0 -0
  112. {datachain-0.16.3 → datachain-0.16.5}/setup.cfg +0 -0
  113. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/__init__.py +0 -0
  114. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/__main__.py +0 -0
  115. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/asyn.py +0 -0
  116. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cache.py +0 -0
  117. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/catalog/__init__.py +0 -0
  118. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/catalog/datasource.py +0 -0
  119. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/catalog/loader.py +0 -0
  120. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/commands/__init__.py +0 -0
  121. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/commands/datasets.py +0 -0
  122. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/commands/du.py +0 -0
  123. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/commands/index.py +0 -0
  124. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/commands/ls.py +0 -0
  125. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/commands/misc.py +0 -0
  126. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/commands/show.py +0 -0
  127. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/parser/studio.py +0 -0
  128. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/parser/utils.py +0 -0
  129. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/cli/utils.py +0 -0
  130. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/client/__init__.py +0 -0
  131. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/client/azure.py +0 -0
  132. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/client/fileslice.py +0 -0
  133. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/client/fsspec.py +0 -0
  134. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/client/gcs.py +0 -0
  135. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/client/hf.py +0 -0
  136. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/client/local.py +0 -0
  137. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/client/s3.py +0 -0
  138. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/config.py +0 -0
  139. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/data_storage/__init__.py +0 -0
  140. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/data_storage/db_engine.py +0 -0
  141. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/data_storage/schema.py +0 -0
  142. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/data_storage/serializer.py +0 -0
  143. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/data_storage/sqlite.py +0 -0
  144. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/dataset.py +0 -0
  145. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/diff/__init__.py +0 -0
  146. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/error.py +0 -0
  147. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/fs/__init__.py +0 -0
  148. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/fs/reference.py +0 -0
  149. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/fs/utils.py +0 -0
  150. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/func/__init__.py +0 -0
  151. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/func/aggregate.py +0 -0
  152. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/func/array.py +0 -0
  153. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/func/base.py +0 -0
  154. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/func/conditional.py +0 -0
  155. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/func/func.py +0 -0
  156. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/func/numeric.py +0 -0
  157. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/func/path.py +0 -0
  158. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/func/random.py +0 -0
  159. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/func/string.py +0 -0
  160. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/func/window.py +0 -0
  161. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/job.py +0 -0
  162. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/__init__.py +0 -0
  163. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/clip.py +0 -0
  164. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/convert/__init__.py +0 -0
  165. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/convert/flatten.py +0 -0
  166. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/convert/python_to_sql.py +0 -0
  167. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/convert/sql_to_python.py +0 -0
  168. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/convert/unflatten.py +0 -0
  169. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  170. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/data_model.py +0 -0
  171. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dataset_info.py +0 -0
  172. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/__init__.py +0 -0
  173. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/database.py +0 -0
  174. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/datasets.py +0 -0
  175. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/hf.py +0 -0
  176. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/json.py +0 -0
  177. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/listings.py +0 -0
  178. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/pandas.py +0 -0
  179. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/parquet.py +0 -0
  180. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/records.py +0 -0
  181. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/storage.py +0 -0
  182. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/utils.py +0 -0
  183. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/dc/values.py +0 -0
  184. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/file.py +0 -0
  185. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/hf.py +0 -0
  186. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/image.py +0 -0
  187. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/listing_info.py +0 -0
  188. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/meta_formats.py +0 -0
  189. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/model_store.py +0 -0
  190. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/pytorch.py +0 -0
  191. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/settings.py +0 -0
  192. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/signal_schema.py +0 -0
  193. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/tar.py +0 -0
  194. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/text.py +0 -0
  195. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/udf_signature.py +0 -0
  196. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/utils.py +0 -0
  197. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/video.py +0 -0
  198. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/webdataset.py +0 -0
  199. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/lib/webdataset_laion.py +0 -0
  200. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/listing.py +0 -0
  201. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/model/__init__.py +0 -0
  202. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/model/bbox.py +0 -0
  203. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/model/pose.py +0 -0
  204. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/model/segment.py +0 -0
  205. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/model/ultralytics/__init__.py +0 -0
  206. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/model/ultralytics/bbox.py +0 -0
  207. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/model/ultralytics/pose.py +0 -0
  208. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/model/ultralytics/segment.py +0 -0
  209. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/model/utils.py +0 -0
  210. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/node.py +0 -0
  211. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/nodes_fetcher.py +0 -0
  212. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/nodes_thread_pool.py +0 -0
  213. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/progress.py +0 -0
  214. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/py.typed +0 -0
  215. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/query/__init__.py +0 -0
  216. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/query/params.py +0 -0
  217. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/query/schema.py +0 -0
  218. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/query/session.py +0 -0
  219. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/remote/__init__.py +0 -0
  220. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/script_meta.py +0 -0
  221. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/__init__.py +0 -0
  222. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/default/__init__.py +0 -0
  223. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/default/base.py +0 -0
  224. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/functions/__init__.py +0 -0
  225. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/functions/aggregate.py +0 -0
  226. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/functions/array.py +0 -0
  227. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/functions/conditional.py +0 -0
  228. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/functions/numeric.py +0 -0
  229. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/functions/path.py +0 -0
  230. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/functions/random.py +0 -0
  231. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/functions/string.py +0 -0
  232. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/selectable.py +0 -0
  233. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/sqlite/__init__.py +0 -0
  234. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/sqlite/base.py +0 -0
  235. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/sqlite/types.py +0 -0
  236. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/sqlite/vector.py +0 -0
  237. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/types.py +0 -0
  238. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/sql/utils.py +0 -0
  239. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/telemetry.py +0 -0
  240. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/toolkit/__init__.py +0 -0
  241. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/toolkit/split.py +0 -0
  242. {datachain-0.16.3 → datachain-0.16.5}/src/datachain/torch/__init__.py +0 -0
  243. {datachain-0.16.3 → datachain-0.16.5}/src/datachain.egg-info/dependency_links.txt +0 -0
  244. {datachain-0.16.3 → datachain-0.16.5}/src/datachain.egg-info/entry_points.txt +0 -0
  245. {datachain-0.16.3 → datachain-0.16.5}/src/datachain.egg-info/requires.txt +0 -0
  246. {datachain-0.16.3 → datachain-0.16.5}/src/datachain.egg-info/top_level.txt +0 -0
  247. {datachain-0.16.3 → datachain-0.16.5}/tests/__init__.py +0 -0
  248. {datachain-0.16.3 → datachain-0.16.5}/tests/benchmarks/__init__.py +0 -0
  249. {datachain-0.16.3 → datachain-0.16.5}/tests/benchmarks/conftest.py +0 -0
  250. {datachain-0.16.3 → datachain-0.16.5}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  251. {datachain-0.16.3 → datachain-0.16.5}/tests/benchmarks/datasets/.dvc/config +0 -0
  252. {datachain-0.16.3 → datachain-0.16.5}/tests/benchmarks/datasets/.gitignore +0 -0
  253. {datachain-0.16.3 → datachain-0.16.5}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  254. {datachain-0.16.3 → datachain-0.16.5}/tests/benchmarks/test_datachain.py +0 -0
  255. {datachain-0.16.3 → datachain-0.16.5}/tests/benchmarks/test_ls.py +0 -0
  256. {datachain-0.16.3 → datachain-0.16.5}/tests/benchmarks/test_version.py +0 -0
  257. {datachain-0.16.3 → datachain-0.16.5}/tests/data.py +0 -0
  258. {datachain-0.16.3 → datachain-0.16.5}/tests/examples/__init__.py +0 -0
  259. {datachain-0.16.3 → datachain-0.16.5}/tests/examples/test_examples.py +0 -0
  260. {datachain-0.16.3 → datachain-0.16.5}/tests/examples/test_wds_e2e.py +0 -0
  261. {datachain-0.16.3 → datachain-0.16.5}/tests/examples/wds_data.py +0 -0
  262. {datachain-0.16.3 → datachain-0.16.5}/tests/func/__init__.py +0 -0
  263. {datachain-0.16.3 → datachain-0.16.5}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  264. {datachain-0.16.3 → datachain-0.16.5}/tests/func/data/lena.jpg +0 -0
  265. {datachain-0.16.3 → datachain-0.16.5}/tests/func/fake-service-account-credentials.json +0 -0
  266. {datachain-0.16.3 → datachain-0.16.5}/tests/func/model/__init__.py +0 -0
  267. {datachain-0.16.3 → datachain-0.16.5}/tests/func/model/data/running-mask0.png +0 -0
  268. {datachain-0.16.3 → datachain-0.16.5}/tests/func/model/data/running-mask1.png +0 -0
  269. {datachain-0.16.3 → datachain-0.16.5}/tests/func/model/data/running.jpg +0 -0
  270. {datachain-0.16.3 → datachain-0.16.5}/tests/func/model/data/ships.jpg +0 -0
  271. {datachain-0.16.3 → datachain-0.16.5}/tests/func/model/test_yolo.py +0 -0
  272. {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_catalog.py +0 -0
  273. {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_client.py +0 -0
  274. {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_cloud_transfer.py +0 -0
  275. {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_data_storage.py +0 -0
  276. {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_datachain_merge.py +0 -0
  277. {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_dataset_query.py +0 -0
  278. {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_datasets.py +0 -0
  279. {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_feature_pickling.py +0 -0
  280. {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_file.py +0 -0
  281. {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_func.py +0 -0
  282. {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_hf.py +0 -0
  283. {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_hidden_field.py +0 -0
  284. {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_image.py +0 -0
  285. {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_listing.py +0 -0
  286. {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_ls.py +0 -0
  287. {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_meta_formats.py +0 -0
  288. {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_pull.py +0 -0
  289. {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_pytorch.py +0 -0
  290. {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_read_database.py +0 -0
  291. {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_session.py +0 -0
  292. {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_toolkit.py +0 -0
  293. {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_video.py +0 -0
  294. {datachain-0.16.3 → datachain-0.16.5}/tests/func/test_warehouse.py +0 -0
  295. {datachain-0.16.3 → datachain-0.16.5}/tests/scripts/feature_class.py +0 -0
  296. {datachain-0.16.3 → datachain-0.16.5}/tests/scripts/feature_class_exception.py +0 -0
  297. {datachain-0.16.3 → datachain-0.16.5}/tests/scripts/feature_class_parallel.py +0 -0
  298. {datachain-0.16.3 → datachain-0.16.5}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  299. {datachain-0.16.3 → datachain-0.16.5}/tests/scripts/name_len_slow.py +0 -0
  300. {datachain-0.16.3 → datachain-0.16.5}/tests/test_atomicity.py +0 -0
  301. {datachain-0.16.3 → datachain-0.16.5}/tests/test_cli_e2e.py +0 -0
  302. {datachain-0.16.3 → datachain-0.16.5}/tests/test_import_time.py +0 -0
  303. {datachain-0.16.3 → datachain-0.16.5}/tests/test_telemetry.py +0 -0
  304. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/__init__.py +0 -0
  305. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/__init__.py +0 -0
  306. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/conftest.py +0 -0
  307. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_arrow.py +0 -0
  308. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_clip.py +0 -0
  309. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_datachain_merge.py +0 -0
  310. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_diff.py +0 -0
  311. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_feature.py +0 -0
  312. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_feature_utils.py +0 -0
  313. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_file.py +0 -0
  314. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_hf.py +0 -0
  315. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_image.py +0 -0
  316. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_listing_info.py +0 -0
  317. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_python_to_sql.py +0 -0
  318. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_schema.py +0 -0
  319. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_signal_schema.py +0 -0
  320. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_sql_to_python.py +0 -0
  321. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_text.py +0 -0
  322. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_udf_signature.py +0 -0
  323. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_utils.py +0 -0
  324. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/lib/test_webdataset.py +0 -0
  325. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/model/__init__.py +0 -0
  326. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/model/test_bbox.py +0 -0
  327. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/model/test_pose.py +0 -0
  328. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/model/test_segment.py +0 -0
  329. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/model/test_utils.py +0 -0
  330. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/sql/__init__.py +0 -0
  331. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/sql/sqlite/__init__.py +0 -0
  332. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/sql/sqlite/test_types.py +0 -0
  333. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/sql/sqlite/test_utils.py +0 -0
  334. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/sql/test_array.py +0 -0
  335. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/sql/test_conditional.py +0 -0
  336. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/sql/test_path.py +0 -0
  337. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/sql/test_random.py +0 -0
  338. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/sql/test_selectable.py +0 -0
  339. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/sql/test_string.py +0 -0
  340. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_asyn.py +0 -0
  341. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_cache.py +0 -0
  342. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_catalog.py +0 -0
  343. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_catalog_loader.py +0 -0
  344. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_cli_parsing.py +0 -0
  345. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_client.py +0 -0
  346. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_client_gcs.py +0 -0
  347. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_client_s3.py +0 -0
  348. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_config.py +0 -0
  349. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_data_storage.py +0 -0
  350. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_database_engine.py +0 -0
  351. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_dataset.py +0 -0
  352. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_fileslice.py +0 -0
  353. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_func.py +0 -0
  354. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_listing.py +0 -0
  355. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_metastore.py +0 -0
  356. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_module_exports.py +0 -0
  357. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_pytorch.py +0 -0
  358. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_query.py +0 -0
  359. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_query_metrics.py +0 -0
  360. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_query_params.py +0 -0
  361. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_script_meta.py +0 -0
  362. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_serializer.py +0 -0
  363. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_session.py +0 -0
  364. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_utils.py +0 -0
  365. {datachain-0.16.3 → datachain-0.16.5}/tests/unit/test_warehouse.py +0 -0
  366. {datachain-0.16.3 → datachain-0.16.5}/tests/utils.py +0 -0
@@ -25,11 +25,14 @@ jobs:
25
25
  python-version: '3.13'
26
26
 
27
27
  - name: Setup uv
28
- uses: astral-sh/setup-uv@v5
28
+ uses: astral-sh/setup-uv@v6
29
29
  with:
30
30
  enable-cache: true
31
31
  cache-suffix: benchmarks
32
32
  cache-dependency-glob: pyproject.toml
33
+ # revert after this is fixed
34
+ # https://github.com/wntrblm/nox/issues/953
35
+ version: ">=0.6,<0.7"
33
36
 
34
37
  - name: Install nox and dvc
35
38
  run: uv pip install dvc[gs] nox --system
@@ -27,7 +27,7 @@ jobs:
27
27
  python-version: '3.13'
28
28
 
29
29
  - name: Setup uv
30
- uses: astral-sh/setup-uv@v5
30
+ uses: astral-sh/setup-uv@v6
31
31
 
32
32
  - name: Install nox
33
33
  run: uv pip install nox --system
@@ -84,7 +84,7 @@ jobs:
84
84
  python-version: ${{ matrix.pyv }}
85
85
 
86
86
  - name: Setup uv
87
- uses: astral-sh/setup-uv@v5
87
+ uses: astral-sh/setup-uv@v6
88
88
  with:
89
89
  enable-cache: true
90
90
  cache-suffix: studio
@@ -98,6 +98,7 @@ jobs:
98
98
  - name: Run tests
99
99
  # Generate `.test_durations` file with `pytest --store-durations --durations-path ../.github/.test_durations ...`
100
100
  run: >
101
+ PYTHONPATH="$(pwd)/..:${PYTHONPATH}"
101
102
  pytest
102
103
  --config-file=pyproject.toml -rs
103
104
  --splits=6 --group=${{ matrix.group }} --durations-path=../../.github/.test_durations
@@ -29,11 +29,14 @@ jobs:
29
29
  python-version: '3.9'
30
30
 
31
31
  - name: Setup uv
32
- uses: astral-sh/setup-uv@v5
32
+ uses: astral-sh/setup-uv@v6
33
33
  with:
34
34
  enable-cache: true
35
35
  cache-suffix: lint
36
36
  cache-dependency-glob: pyproject.toml
37
+ # revert after this is fixed
38
+ # https://github.com/wntrblm/nox/issues/953
39
+ version: ">=0.6,<0.7"
37
40
 
38
41
  - name: Install nox
39
42
  run: uv pip install nox --system
@@ -87,11 +90,14 @@ jobs:
87
90
  python-version: ${{ matrix.pyv }}
88
91
 
89
92
  - name: Setup uv
90
- uses: astral-sh/setup-uv@v5
93
+ uses: astral-sh/setup-uv@v6
91
94
  with:
92
95
  enable-cache: true
93
96
  cache-suffix: tests-${{ matrix.pyv }}
94
97
  cache-dependency-glob: pyproject.toml
98
+ # revert after this is fixed
99
+ # https://github.com/wntrblm/nox/issues/953
100
+ version: ">=0.6,<0.7"
95
101
 
96
102
  - name: Install nox
97
103
  run: uv pip install nox --system
@@ -154,11 +160,14 @@ jobs:
154
160
  python-version: ${{ matrix.pyv }}
155
161
 
156
162
  - name: Setup uv
157
- uses: astral-sh/setup-uv@v5
163
+ uses: astral-sh/setup-uv@v6
158
164
  with:
159
165
  enable-cache: true
160
166
  cache-suffix: examples-${{ matrix.pyv }}
161
167
  cache-dependency-glob: pyproject.toml
168
+ # revert after this is fixed
169
+ # https://github.com/wntrblm/nox/issues/953
170
+ version: ">=0.6,<0.7"
162
171
 
163
172
  - name: Install nox
164
173
  run: uv pip install nox --system
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.11.6'
27
+ rev: 'v0.11.8'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.16.3
3
+ Version: 0.16.5
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -60,6 +60,11 @@ datachain job run --workers 4 --files utils.py config.json query.py
60
60
  datachain job run --env API_KEY=123 --req pandas numpy query.py
61
61
  ```
62
62
 
63
+ 6. Run a job with a repository (will be cloned in the job working directory):
64
+ ```bash
65
+ datachain job run --repository https://github.com/iterative/datachain query.py
66
+ ```
67
+
63
68
  ## Notes
64
69
 
65
70
  * Closing the logs command (e.g., with Ctrl+C) will only stop displaying the logs but will not cancel the job execution
@@ -79,6 +79,7 @@ DATASET_INTERNAL_ERROR_MESSAGE = "Internal error on creating dataset"
79
79
  QUERY_SCRIPT_INVALID_LAST_STATEMENT_EXIT_CODE = 10
80
80
  # exit code we use if query script was canceled
81
81
  QUERY_SCRIPT_CANCELED_EXIT_CODE = 11
82
+ QUERY_SCRIPT_SIGTERM_EXIT_CODE = -15 # if query script was terminated by SIGTERM
82
83
 
83
84
  # dataset pull
84
85
  PULL_DATASET_MAX_THREADS = 5
@@ -1645,7 +1646,10 @@ class Catalog:
1645
1646
  thread.join() # wait for the reader thread
1646
1647
 
1647
1648
  logger.info("Process %s exited with return code %s", proc.pid, proc.returncode)
1648
- if proc.returncode == QUERY_SCRIPT_CANCELED_EXIT_CODE:
1649
+ if proc.returncode in (
1650
+ QUERY_SCRIPT_CANCELED_EXIT_CODE,
1651
+ QUERY_SCRIPT_SIGTERM_EXIT_CODE,
1652
+ ):
1649
1653
  raise QueryScriptCancelError(
1650
1654
  "Query script was canceled by user",
1651
1655
  return_code=proc.returncode,
@@ -34,8 +34,10 @@ def main(argv: Optional[list[str]] = None) -> int:
34
34
  datachain_parser = get_parser()
35
35
  args = datachain_parser.parse_args(argv)
36
36
 
37
- if args.command in ("internal-run-udf", "internal-run-udf-worker"):
38
- return handle_udf(args.command)
37
+ if args.command == "internal-run-udf":
38
+ return handle_udf()
39
+ if args.command == "internal-run-udf-worker":
40
+ return handle_udf_runner(args.fd)
39
41
 
40
42
  if args.command is None:
41
43
  datachain_parser.print_help(sys.stderr)
@@ -303,13 +305,13 @@ def handle_general_exception(exc, args, logging_level):
303
305
  return error, 1
304
306
 
305
307
 
306
- def handle_udf(command):
307
- if command == "internal-run-udf":
308
- from datachain.query.dispatch import udf_entrypoint
308
+ def handle_udf() -> int:
309
+ from datachain.query.dispatch import udf_entrypoint
309
310
 
310
- return udf_entrypoint()
311
+ return udf_entrypoint()
311
312
 
312
- if command == "internal-run-udf-worker":
313
- from datachain.query.dispatch import udf_worker_entrypoint
314
313
 
315
- return udf_worker_entrypoint()
314
+ def handle_udf_runner(fd: Optional[int] = None) -> int:
315
+ from datachain.query.dispatch import udf_worker_entrypoint
316
+
317
+ return udf_worker_entrypoint(fd)
@@ -29,6 +29,7 @@ def query(
29
29
  name=os.path.basename(script),
30
30
  query=script_content,
31
31
  query_type=JobQueryType.PYTHON,
32
+ status=JobStatus.RUNNING,
32
33
  python_version=python_version,
33
34
  params=params,
34
35
  )
@@ -549,7 +549,15 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
549
549
  add_anon_arg(parse_gc)
550
550
 
551
551
  subp.add_parser("internal-run-udf", parents=[parent_parser])
552
- subp.add_parser("internal-run-udf-worker", parents=[parent_parser])
552
+ run_udf_worker = subp.add_parser("internal-run-udf-worker", parents=[parent_parser])
553
+ run_udf_worker.add_argument(
554
+ "--fd",
555
+ type=int,
556
+ action="store",
557
+ default=None,
558
+ help="File descriptor to write results to",
559
+ )
560
+
553
561
  add_completion_parser(subp, [parent_parser])
554
562
  return parser
555
563
 
@@ -13,7 +13,7 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
13
13
  )
14
14
  jobs_subparser = jobs_parser.add_subparsers(
15
15
  dest="cmd",
16
- help="Use `datachain auth CMD --help` to display command-specific help",
16
+ help="Use `datachain job CMD --help` to display command-specific help",
17
17
  )
18
18
 
19
19
  studio_run_help = "Run a job in Studio"
@@ -66,6 +66,11 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
66
66
  action="store",
67
67
  help="Python version for the job (e.g., 3.9, 3.10, 3.11)",
68
68
  )
69
+ studio_run_parser.add_argument(
70
+ "--repository",
71
+ action="store",
72
+ help="Repository URL to clone before running the job",
73
+ )
69
74
  studio_run_parser.add_argument(
70
75
  "--req-file",
71
76
  action="store",
@@ -3,6 +3,7 @@ from enum import Enum
3
3
 
4
4
  class JobStatus(int, Enum):
5
5
  CREATED = 1
6
+ SCHEDULED = 10
6
7
  QUEUED = 2
7
8
  INIT = 3
8
9
  RUNNING = 4
@@ -254,6 +254,7 @@ class AbstractMetastore(ABC, Serializable):
254
254
  name: str,
255
255
  query: str,
256
256
  query_type: JobQueryType = JobQueryType.PYTHON,
257
+ status: JobStatus = JobStatus.CREATED,
257
258
  workers: int = 1,
258
259
  python_version: Optional[str] = None,
259
260
  params: Optional[dict[str, str]] = None,
@@ -264,33 +265,35 @@ class AbstractMetastore(ABC, Serializable):
264
265
  """
265
266
 
266
267
  @abstractmethod
267
- def set_job_status(
268
+ def get_job(self, job_id: str) -> Optional[Job]:
269
+ """Returns the job with the given ID."""
270
+
271
+ @abstractmethod
272
+ def update_job(
268
273
  self,
269
274
  job_id: str,
270
- status: JobStatus,
275
+ status: Optional[JobStatus] = None,
276
+ exit_code: Optional[int] = None,
271
277
  error_message: Optional[str] = None,
272
278
  error_stack: Optional[str] = None,
279
+ finished_at: Optional[datetime] = None,
273
280
  metrics: Optional[dict[str, Any]] = None,
274
- ) -> None:
275
- """Set the status of the given job."""
281
+ ) -> Optional["Job"]:
282
+ """Updates job fields."""
276
283
 
277
284
  @abstractmethod
278
- def get_job_status(self, job_id: str) -> Optional[JobStatus]:
279
- """Returns the status of the given job."""
280
-
281
- @abstractmethod
282
- def set_job_and_dataset_status(
285
+ def set_job_status(
283
286
  self,
284
287
  job_id: str,
285
- job_status: JobStatus,
286
- dataset_status: DatasetStatus,
288
+ status: JobStatus,
289
+ error_message: Optional[str] = None,
290
+ error_stack: Optional[str] = None,
287
291
  ) -> None:
288
- """Set the status of the given job and dataset."""
292
+ """Set the status of the given job."""
289
293
 
290
294
  @abstractmethod
291
- def get_job_dataset_versions(self, job_id: str) -> list[tuple[str, int]]:
292
- """Returns dataset names and versions for the job."""
293
- raise NotImplementedError
295
+ def get_job_status(self, job_id: str) -> Optional[JobStatus]:
296
+ """Returns the status of the given job."""
294
297
 
295
298
 
296
299
  class AbstractDBMetastore(AbstractMetastore):
@@ -651,30 +654,31 @@ class AbstractDBMetastore(AbstractMetastore):
651
654
  dataset_version = dataset.get_version(version)
652
655
 
653
656
  values = {}
657
+ version_values: dict = {}
654
658
  for field, value in kwargs.items():
655
659
  if field in self._dataset_version_fields[1:]:
656
660
  if field == "schema":
657
- dataset_version.update(**{field: DatasetRecord.parse_schema(value)})
658
661
  values[field] = json.dumps(value) if value else None
662
+ version_values[field] = DatasetRecord.parse_schema(value)
659
663
  elif field == "feature_schema":
660
664
  values[field] = json.dumps(value) if value else None
665
+ version_values[field] = value
661
666
  elif field == "preview" and isinstance(value, list):
662
667
  values[field] = json.dumps(value, cls=JSONSerialize)
668
+ version_values[field] = value
663
669
  else:
664
670
  values[field] = value
665
- dataset_version.update(**{field: value})
666
-
667
- if not values:
668
- # Nothing to update
669
- return dataset_version
671
+ version_values[field] = value
670
672
 
671
- dv = self._datasets_versions
672
- self.db.execute(
673
- self._datasets_versions_update()
674
- .where(dv.c.id == dataset_version.id)
675
- .values(values),
676
- conn=conn,
677
- ) # type: ignore [attr-defined]
673
+ if values:
674
+ dv = self._datasets_versions
675
+ self.db.execute(
676
+ self._datasets_versions_update()
677
+ .where(dv.c.dataset_id == dataset.id and dv.c.version == version)
678
+ .values(values),
679
+ conn=conn,
680
+ ) # type: ignore [attr-defined]
681
+ dataset_version.update(**version_values)
678
682
 
679
683
  return dataset_version
680
684
 
@@ -702,7 +706,7 @@ class AbstractDBMetastore(AbstractMetastore):
702
706
  dataset_fields: list[str],
703
707
  dataset_version_fields: list[str],
704
708
  isouter: bool = True,
705
- ):
709
+ ) -> "Select":
706
710
  if not (
707
711
  self.db.has_table(self._datasets.name)
708
712
  and self.db.has_table(self._datasets_versions.name)
@@ -719,12 +723,12 @@ class AbstractDBMetastore(AbstractMetastore):
719
723
  j = d.join(dv, d.c.id == dv.c.dataset_id, isouter=isouter)
720
724
  return query.select_from(j)
721
725
 
722
- def _base_dataset_query(self):
726
+ def _base_dataset_query(self) -> "Select":
723
727
  return self._get_dataset_query(
724
728
  self._dataset_fields, self._dataset_version_fields
725
729
  )
726
730
 
727
- def _base_list_datasets_query(self):
731
+ def _base_list_datasets_query(self) -> "Select":
728
732
  return self._get_dataset_query(
729
733
  self._dataset_list_fields, self._dataset_list_version_fields, isouter=False
730
734
  )
@@ -1018,6 +1022,7 @@ class AbstractDBMetastore(AbstractMetastore):
1018
1022
  name: str,
1019
1023
  query: str,
1020
1024
  query_type: JobQueryType = JobQueryType.PYTHON,
1025
+ status: JobStatus = JobStatus.CREATED,
1021
1026
  workers: int = 1,
1022
1027
  python_version: Optional[str] = None,
1023
1028
  params: Optional[dict[str, str]] = None,
@@ -1032,7 +1037,7 @@ class AbstractDBMetastore(AbstractMetastore):
1032
1037
  self._jobs_insert().values(
1033
1038
  id=job_id,
1034
1039
  name=name,
1035
- status=JobStatus.CREATED,
1040
+ status=status,
1036
1041
  created_at=datetime.now(timezone.utc),
1037
1042
  query=query,
1038
1043
  query_type=query_type.value,
@@ -1047,25 +1052,65 @@ class AbstractDBMetastore(AbstractMetastore):
1047
1052
  )
1048
1053
  return job_id
1049
1054
 
1055
+ def get_job(self, job_id: str, conn=None) -> Optional[Job]:
1056
+ """Returns the job with the given ID."""
1057
+ query = self._jobs_select(self._jobs).where(self._jobs.c.id == job_id)
1058
+ results = list(self.db.execute(query, conn=conn))
1059
+ if not results:
1060
+ return None
1061
+ return self._parse_job(results[0])
1062
+
1063
+ def update_job(
1064
+ self,
1065
+ job_id: str,
1066
+ status: Optional[JobStatus] = None,
1067
+ exit_code: Optional[int] = None,
1068
+ error_message: Optional[str] = None,
1069
+ error_stack: Optional[str] = None,
1070
+ finished_at: Optional[datetime] = None,
1071
+ metrics: Optional[dict[str, Any]] = None,
1072
+ conn: Optional[Any] = None,
1073
+ ) -> Optional["Job"]:
1074
+ """Updates job fields."""
1075
+ values: dict = {}
1076
+ if status is not None:
1077
+ values["status"] = status
1078
+ if exit_code is not None:
1079
+ values["exit_code"] = exit_code
1080
+ if error_message is not None:
1081
+ values["error_message"] = error_message
1082
+ if error_stack is not None:
1083
+ values["error_stack"] = error_stack
1084
+ if finished_at is not None:
1085
+ values["finished_at"] = finished_at
1086
+ if metrics:
1087
+ values["metrics"] = json.dumps(metrics)
1088
+
1089
+ if values:
1090
+ j = self._jobs
1091
+ self.db.execute(
1092
+ self._jobs_update().where(j.c.id == job_id).values(**values),
1093
+ conn=conn,
1094
+ ) # type: ignore [attr-defined]
1095
+
1096
+ return self.get_job(job_id, conn=conn)
1097
+
1050
1098
  def set_job_status(
1051
1099
  self,
1052
1100
  job_id: str,
1053
1101
  status: JobStatus,
1054
1102
  error_message: Optional[str] = None,
1055
1103
  error_stack: Optional[str] = None,
1056
- metrics: Optional[dict[str, Any]] = None,
1057
1104
  conn: Optional[Any] = None,
1058
1105
  ) -> None:
1059
1106
  """Set the status of the given job."""
1060
- values: dict = {"status": status.value}
1061
- if status.value in JobStatus.finished():
1107
+ values: dict = {"status": status}
1108
+ if status in JobStatus.finished():
1062
1109
  values["finished_at"] = datetime.now(timezone.utc)
1063
1110
  if error_message:
1064
1111
  values["error_message"] = error_message
1065
1112
  if error_stack:
1066
1113
  values["error_stack"] = error_stack
1067
- if metrics:
1068
- values["metrics"] = json.dumps(metrics)
1069
1114
  self.db.execute(
1070
1115
  self._jobs_update(self._jobs.c.id == job_id).values(**values),
1071
1116
  conn=conn,
@@ -1086,37 +1131,3 @@ class AbstractDBMetastore(AbstractMetastore):
1086
1131
  if not results:
1087
1132
  return None
1088
1133
  return results[0][0]
1089
-
1090
- def set_job_and_dataset_status(
1091
- self,
1092
- job_id: str,
1093
- job_status: JobStatus,
1094
- dataset_status: DatasetStatus,
1095
- ) -> None:
1096
- """Set the status of the given job and dataset."""
1097
- with self.db.transaction() as conn:
1098
- self.set_job_status(job_id, status=job_status, conn=conn)
1099
- dv = self._datasets_versions
1100
- query = (
1101
- self._datasets_versions_update()
1102
- .where(
1103
- (dv.c.job_id == job_id) & (dv.c.status != DatasetStatus.COMPLETE)
1104
- )
1105
- .values(status=dataset_status)
1106
- )
1107
- self.db.execute(query, conn=conn) # type: ignore[attr-defined]
1108
-
1109
- def get_job_dataset_versions(self, job_id: str) -> list[tuple[str, int]]:
1110
- """Returns dataset names and versions for the job."""
1111
- dv = self._datasets_versions
1112
- ds = self._datasets
1113
-
1114
- join_condition = dv.c.dataset_id == ds.c.id
1115
-
1116
- query = (
1117
- self._datasets_versions_select(ds.c.name, dv.c.version)
1118
- .select_from(dv.join(ds, join_condition))
1119
- .where(dv.c.job_id == job_id)
1120
- )
1121
-
1122
- return list(self.db.execute(query))