datachain 0.16.4__tar.gz → 0.17.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (369) hide show
  1. {datachain-0.16.4 → datachain-0.17.0}/.github/workflows/tests-studio.yml +1 -0
  2. {datachain-0.16.4 → datachain-0.17.0}/.pre-commit-config.yaml +1 -1
  3. {datachain-0.16.4/src/datachain.egg-info → datachain-0.17.0}/PKG-INFO +1 -1
  4. {datachain-0.16.4 → datachain-0.17.0}/docs/examples.md +1 -1
  5. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/catalog/catalog.py +25 -92
  6. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/cli/__init__.py +11 -9
  7. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/cli/commands/datasets.py +1 -1
  8. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/cli/commands/query.py +1 -0
  9. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/cli/commands/show.py +1 -1
  10. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/cli/parser/__init__.py +11 -3
  11. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/data_storage/job.py +1 -0
  12. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/data_storage/metastore.py +105 -94
  13. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/data_storage/sqlite.py +8 -7
  14. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/data_storage/warehouse.py +58 -46
  15. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/dataset.py +88 -45
  16. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/arrow.py +23 -1
  17. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/dataset_info.py +2 -1
  18. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/dc/csv.py +1 -0
  19. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/dc/datachain.py +38 -16
  20. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/dc/datasets.py +28 -7
  21. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/dc/storage.py +10 -2
  22. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/listing.py +2 -0
  23. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/pytorch.py +2 -2
  24. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/udf.py +17 -5
  25. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/listing.py +1 -1
  26. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/query/batch.py +40 -39
  27. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/query/dataset.py +42 -41
  28. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/query/dispatch.py +137 -75
  29. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/query/metrics.py +1 -2
  30. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/query/queue.py +1 -11
  31. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/query/session.py +2 -2
  32. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/query/udf.py +1 -1
  33. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/query/utils.py +8 -14
  34. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/remote/studio.py +4 -4
  35. datachain-0.17.0/src/datachain/semver.py +58 -0
  36. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/studio.py +1 -1
  37. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/utils.py +3 -0
  38. {datachain-0.16.4 → datachain-0.17.0/src/datachain.egg-info}/PKG-INFO +1 -1
  39. {datachain-0.16.4 → datachain-0.17.0}/src/datachain.egg-info/SOURCES.txt +4 -1
  40. {datachain-0.16.4 → datachain-0.17.0}/tests/conftest.py +39 -12
  41. datachain-0.17.0/tests/func/test_batching.py +243 -0
  42. {datachain-0.16.4 → datachain-0.17.0}/tests/func/test_catalog.py +8 -8
  43. {datachain-0.16.4 → datachain-0.17.0}/tests/func/test_datachain.py +48 -88
  44. {datachain-0.16.4 → datachain-0.17.0}/tests/func/test_dataset_query.py +82 -74
  45. {datachain-0.16.4 → datachain-0.17.0}/tests/func/test_datasets.py +59 -247
  46. {datachain-0.16.4 → datachain-0.17.0}/tests/func/test_hidden_field.py +1 -1
  47. {datachain-0.16.4 → datachain-0.17.0}/tests/func/test_ls.py +1 -1
  48. {datachain-0.16.4 → datachain-0.17.0}/tests/func/test_metrics.py +11 -2
  49. {datachain-0.16.4 → datachain-0.17.0}/tests/func/test_pull.py +18 -18
  50. {datachain-0.16.4 → datachain-0.17.0}/tests/func/test_query.py +13 -10
  51. {datachain-0.16.4 → datachain-0.17.0}/tests/test_cli_e2e.py +4 -4
  52. {datachain-0.16.4 → datachain-0.17.0}/tests/test_cli_studio.py +26 -22
  53. {datachain-0.16.4 → datachain-0.17.0}/tests/test_query_e2e.py +3 -2
  54. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/lib/test_datachain.py +88 -21
  55. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/lib/test_datachain_bootstrap.py +3 -1
  56. datachain-0.17.0/tests/unit/lib/test_udf.py +36 -0
  57. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_dataset.py +2 -2
  58. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_dispatch.py +1 -15
  59. datachain-0.17.0/tests/unit/test_semver.py +71 -0
  60. {datachain-0.16.4 → datachain-0.17.0}/tests/utils.py +1 -1
  61. datachain-0.16.4/file +0 -1
  62. {datachain-0.16.4 → datachain-0.17.0}/.cruft.json +0 -0
  63. {datachain-0.16.4 → datachain-0.17.0}/.gitattributes +0 -0
  64. {datachain-0.16.4 → datachain-0.17.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  65. {datachain-0.16.4 → datachain-0.17.0}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  66. {datachain-0.16.4 → datachain-0.17.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  67. {datachain-0.16.4 → datachain-0.17.0}/.github/codecov.yaml +0 -0
  68. {datachain-0.16.4 → datachain-0.17.0}/.github/dependabot.yml +0 -0
  69. {datachain-0.16.4 → datachain-0.17.0}/.github/workflows/benchmarks.yml +0 -0
  70. {datachain-0.16.4 → datachain-0.17.0}/.github/workflows/release.yml +0 -0
  71. {datachain-0.16.4 → datachain-0.17.0}/.github/workflows/tests.yml +0 -0
  72. {datachain-0.16.4 → datachain-0.17.0}/.github/workflows/update-template.yaml +0 -0
  73. {datachain-0.16.4 → datachain-0.17.0}/.gitignore +0 -0
  74. {datachain-0.16.4 → datachain-0.17.0}/CODE_OF_CONDUCT.rst +0 -0
  75. {datachain-0.16.4 → datachain-0.17.0}/LICENSE +0 -0
  76. {datachain-0.16.4 → datachain-0.17.0}/README.rst +0 -0
  77. {datachain-0.16.4 → datachain-0.17.0}/docs/assets/captioned_cartoons.png +0 -0
  78. {datachain-0.16.4 → datachain-0.17.0}/docs/assets/datachain-white.svg +0 -0
  79. {datachain-0.16.4 → datachain-0.17.0}/docs/assets/datachain.svg +0 -0
  80. {datachain-0.16.4 → datachain-0.17.0}/docs/commands/auth/login.md +0 -0
  81. {datachain-0.16.4 → datachain-0.17.0}/docs/commands/auth/logout.md +0 -0
  82. {datachain-0.16.4 → datachain-0.17.0}/docs/commands/auth/team.md +0 -0
  83. {datachain-0.16.4 → datachain-0.17.0}/docs/commands/auth/token.md +0 -0
  84. {datachain-0.16.4 → datachain-0.17.0}/docs/commands/index.md +0 -0
  85. {datachain-0.16.4 → datachain-0.17.0}/docs/commands/job/cancel.md +0 -0
  86. {datachain-0.16.4 → datachain-0.17.0}/docs/commands/job/logs.md +0 -0
  87. {datachain-0.16.4 → datachain-0.17.0}/docs/commands/job/run.md +0 -0
  88. {datachain-0.16.4 → datachain-0.17.0}/docs/contributing.md +0 -0
  89. {datachain-0.16.4 → datachain-0.17.0}/docs/css/github-permalink-style.css +0 -0
  90. {datachain-0.16.4 → datachain-0.17.0}/docs/index.md +0 -0
  91. {datachain-0.16.4 → datachain-0.17.0}/docs/overrides/main.html +0 -0
  92. {datachain-0.16.4 → datachain-0.17.0}/docs/quick-start.md +0 -0
  93. {datachain-0.16.4 → datachain-0.17.0}/docs/references/data-types/arrowrow.md +0 -0
  94. {datachain-0.16.4 → datachain-0.17.0}/docs/references/data-types/bbox.md +0 -0
  95. {datachain-0.16.4 → datachain-0.17.0}/docs/references/data-types/file.md +0 -0
  96. {datachain-0.16.4 → datachain-0.17.0}/docs/references/data-types/imagefile.md +0 -0
  97. {datachain-0.16.4 → datachain-0.17.0}/docs/references/data-types/index.md +0 -0
  98. {datachain-0.16.4 → datachain-0.17.0}/docs/references/data-types/pose.md +0 -0
  99. {datachain-0.16.4 → datachain-0.17.0}/docs/references/data-types/segment.md +0 -0
  100. {datachain-0.16.4 → datachain-0.17.0}/docs/references/data-types/tarvfile.md +0 -0
  101. {datachain-0.16.4 → datachain-0.17.0}/docs/references/data-types/textfile.md +0 -0
  102. {datachain-0.16.4 → datachain-0.17.0}/docs/references/data-types/videofile.md +0 -0
  103. {datachain-0.16.4 → datachain-0.17.0}/docs/references/datachain.md +0 -0
  104. {datachain-0.16.4 → datachain-0.17.0}/docs/references/func.md +0 -0
  105. {datachain-0.16.4 → datachain-0.17.0}/docs/references/index.md +0 -0
  106. {datachain-0.16.4 → datachain-0.17.0}/docs/references/remotes.md +0 -0
  107. {datachain-0.16.4 → datachain-0.17.0}/docs/references/toolkit.md +0 -0
  108. {datachain-0.16.4 → datachain-0.17.0}/docs/references/torch.md +0 -0
  109. {datachain-0.16.4 → datachain-0.17.0}/docs/references/udf.md +0 -0
  110. {datachain-0.16.4 → datachain-0.17.0}/docs/tutorials.md +0 -0
  111. {datachain-0.16.4 → datachain-0.17.0}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  112. {datachain-0.16.4 → datachain-0.17.0}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  113. {datachain-0.16.4 → datachain-0.17.0}/examples/computer_vision/openimage-detect.py +0 -0
  114. {datachain-0.16.4 → datachain-0.17.0}/examples/computer_vision/ultralytics-bbox.py +0 -0
  115. {datachain-0.16.4 → datachain-0.17.0}/examples/computer_vision/ultralytics-pose.py +0 -0
  116. {datachain-0.16.4 → datachain-0.17.0}/examples/computer_vision/ultralytics-segment.py +0 -0
  117. {datachain-0.16.4 → datachain-0.17.0}/examples/get_started/common_sql_functions.py +0 -0
  118. {datachain-0.16.4 → datachain-0.17.0}/examples/get_started/json-csv-reader.py +0 -0
  119. {datachain-0.16.4 → datachain-0.17.0}/examples/get_started/torch-loader.py +0 -0
  120. {datachain-0.16.4 → datachain-0.17.0}/examples/get_started/udfs/parallel.py +0 -0
  121. {datachain-0.16.4 → datachain-0.17.0}/examples/get_started/udfs/simple.py +0 -0
  122. {datachain-0.16.4 → datachain-0.17.0}/examples/get_started/udfs/stateful.py +0 -0
  123. {datachain-0.16.4 → datachain-0.17.0}/examples/llm_and_nlp/claude-query.py +0 -0
  124. {datachain-0.16.4 → datachain-0.17.0}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  125. {datachain-0.16.4 → datachain-0.17.0}/examples/multimodal/clip_inference.py +0 -0
  126. {datachain-0.16.4 → datachain-0.17.0}/examples/multimodal/hf_pipeline.py +0 -0
  127. {datachain-0.16.4 → datachain-0.17.0}/examples/multimodal/openai_image_desc_lib.py +0 -0
  128. {datachain-0.16.4 → datachain-0.17.0}/examples/multimodal/wds.py +0 -0
  129. {datachain-0.16.4 → datachain-0.17.0}/examples/multimodal/wds_filtered.py +0 -0
  130. {datachain-0.16.4 → datachain-0.17.0}/mkdocs.yml +0 -0
  131. {datachain-0.16.4 → datachain-0.17.0}/noxfile.py +0 -0
  132. {datachain-0.16.4 → datachain-0.17.0}/pyproject.toml +0 -0
  133. {datachain-0.16.4 → datachain-0.17.0}/setup.cfg +0 -0
  134. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/__init__.py +0 -0
  135. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/__main__.py +0 -0
  136. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/asyn.py +0 -0
  137. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/cache.py +0 -0
  138. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/catalog/__init__.py +0 -0
  139. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/catalog/datasource.py +0 -0
  140. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/catalog/loader.py +0 -0
  141. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/cli/commands/__init__.py +0 -0
  142. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/cli/commands/du.py +0 -0
  143. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/cli/commands/index.py +0 -0
  144. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/cli/commands/ls.py +0 -0
  145. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/cli/commands/misc.py +0 -0
  146. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/cli/parser/job.py +0 -0
  147. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/cli/parser/studio.py +0 -0
  148. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/cli/parser/utils.py +0 -0
  149. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/cli/utils.py +0 -0
  150. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/client/__init__.py +0 -0
  151. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/client/azure.py +0 -0
  152. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/client/fileslice.py +0 -0
  153. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/client/fsspec.py +0 -0
  154. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/client/gcs.py +0 -0
  155. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/client/hf.py +0 -0
  156. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/client/local.py +0 -0
  157. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/client/s3.py +0 -0
  158. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/config.py +0 -0
  159. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/data_storage/__init__.py +0 -0
  160. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/data_storage/db_engine.py +0 -0
  161. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/data_storage/schema.py +0 -0
  162. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/data_storage/serializer.py +0 -0
  163. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/diff/__init__.py +0 -0
  164. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/error.py +0 -0
  165. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/fs/__init__.py +0 -0
  166. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/fs/reference.py +0 -0
  167. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/fs/utils.py +0 -0
  168. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/func/__init__.py +0 -0
  169. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/func/aggregate.py +0 -0
  170. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/func/array.py +0 -0
  171. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/func/base.py +0 -0
  172. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/func/conditional.py +0 -0
  173. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/func/func.py +0 -0
  174. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/func/numeric.py +0 -0
  175. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/func/path.py +0 -0
  176. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/func/random.py +0 -0
  177. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/func/string.py +0 -0
  178. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/func/window.py +0 -0
  179. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/job.py +0 -0
  180. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/__init__.py +0 -0
  181. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/clip.py +0 -0
  182. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/convert/__init__.py +0 -0
  183. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/convert/flatten.py +0 -0
  184. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/convert/python_to_sql.py +0 -0
  185. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/convert/sql_to_python.py +0 -0
  186. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/convert/unflatten.py +0 -0
  187. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  188. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/data_model.py +0 -0
  189. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/dc/__init__.py +0 -0
  190. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/dc/database.py +0 -0
  191. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/dc/hf.py +0 -0
  192. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/dc/json.py +0 -0
  193. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/dc/listings.py +0 -0
  194. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/dc/pandas.py +0 -0
  195. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/dc/parquet.py +0 -0
  196. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/dc/records.py +0 -0
  197. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/dc/utils.py +0 -0
  198. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/dc/values.py +0 -0
  199. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/file.py +0 -0
  200. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/hf.py +0 -0
  201. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/image.py +0 -0
  202. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/listing_info.py +0 -0
  203. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/meta_formats.py +0 -0
  204. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/model_store.py +0 -0
  205. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/settings.py +0 -0
  206. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/signal_schema.py +0 -0
  207. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/tar.py +0 -0
  208. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/text.py +0 -0
  209. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/udf_signature.py +0 -0
  210. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/utils.py +0 -0
  211. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/video.py +0 -0
  212. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/webdataset.py +0 -0
  213. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/lib/webdataset_laion.py +0 -0
  214. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/model/__init__.py +0 -0
  215. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/model/bbox.py +0 -0
  216. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/model/pose.py +0 -0
  217. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/model/segment.py +0 -0
  218. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/model/ultralytics/__init__.py +0 -0
  219. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/model/ultralytics/bbox.py +0 -0
  220. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/model/ultralytics/pose.py +0 -0
  221. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/model/ultralytics/segment.py +0 -0
  222. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/model/utils.py +0 -0
  223. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/node.py +0 -0
  224. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/nodes_fetcher.py +0 -0
  225. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/nodes_thread_pool.py +0 -0
  226. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/progress.py +0 -0
  227. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/py.typed +0 -0
  228. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/query/__init__.py +0 -0
  229. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/query/params.py +0 -0
  230. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/query/schema.py +0 -0
  231. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/remote/__init__.py +0 -0
  232. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/script_meta.py +0 -0
  233. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/sql/__init__.py +0 -0
  234. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/sql/default/__init__.py +0 -0
  235. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/sql/default/base.py +0 -0
  236. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/sql/functions/__init__.py +0 -0
  237. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/sql/functions/aggregate.py +0 -0
  238. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/sql/functions/array.py +0 -0
  239. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/sql/functions/conditional.py +0 -0
  240. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/sql/functions/numeric.py +0 -0
  241. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/sql/functions/path.py +0 -0
  242. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/sql/functions/random.py +0 -0
  243. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/sql/functions/string.py +0 -0
  244. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/sql/selectable.py +0 -0
  245. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/sql/sqlite/__init__.py +0 -0
  246. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/sql/sqlite/base.py +0 -0
  247. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/sql/sqlite/types.py +0 -0
  248. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/sql/sqlite/vector.py +0 -0
  249. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/sql/types.py +0 -0
  250. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/sql/utils.py +0 -0
  251. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/telemetry.py +0 -0
  252. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/toolkit/__init__.py +0 -0
  253. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/toolkit/split.py +0 -0
  254. {datachain-0.16.4 → datachain-0.17.0}/src/datachain/torch/__init__.py +0 -0
  255. {datachain-0.16.4 → datachain-0.17.0}/src/datachain.egg-info/dependency_links.txt +0 -0
  256. {datachain-0.16.4 → datachain-0.17.0}/src/datachain.egg-info/entry_points.txt +0 -0
  257. {datachain-0.16.4 → datachain-0.17.0}/src/datachain.egg-info/requires.txt +0 -0
  258. {datachain-0.16.4 → datachain-0.17.0}/src/datachain.egg-info/top_level.txt +0 -0
  259. {datachain-0.16.4 → datachain-0.17.0}/tests/__init__.py +0 -0
  260. {datachain-0.16.4 → datachain-0.17.0}/tests/benchmarks/__init__.py +0 -0
  261. {datachain-0.16.4 → datachain-0.17.0}/tests/benchmarks/conftest.py +0 -0
  262. {datachain-0.16.4 → datachain-0.17.0}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  263. {datachain-0.16.4 → datachain-0.17.0}/tests/benchmarks/datasets/.dvc/config +0 -0
  264. {datachain-0.16.4 → datachain-0.17.0}/tests/benchmarks/datasets/.gitignore +0 -0
  265. {datachain-0.16.4 → datachain-0.17.0}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  266. {datachain-0.16.4 → datachain-0.17.0}/tests/benchmarks/test_datachain.py +0 -0
  267. {datachain-0.16.4 → datachain-0.17.0}/tests/benchmarks/test_ls.py +0 -0
  268. {datachain-0.16.4 → datachain-0.17.0}/tests/benchmarks/test_version.py +0 -0
  269. {datachain-0.16.4 → datachain-0.17.0}/tests/data.py +0 -0
  270. {datachain-0.16.4 → datachain-0.17.0}/tests/examples/__init__.py +0 -0
  271. {datachain-0.16.4 → datachain-0.17.0}/tests/examples/test_examples.py +0 -0
  272. {datachain-0.16.4 → datachain-0.17.0}/tests/examples/test_wds_e2e.py +0 -0
  273. {datachain-0.16.4 → datachain-0.17.0}/tests/examples/wds_data.py +0 -0
  274. {datachain-0.16.4 → datachain-0.17.0}/tests/func/__init__.py +0 -0
  275. {datachain-0.16.4 → datachain-0.17.0}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  276. {datachain-0.16.4 → datachain-0.17.0}/tests/func/data/lena.jpg +0 -0
  277. {datachain-0.16.4 → datachain-0.17.0}/tests/func/fake-service-account-credentials.json +0 -0
  278. {datachain-0.16.4 → datachain-0.17.0}/tests/func/model/__init__.py +0 -0
  279. {datachain-0.16.4 → datachain-0.17.0}/tests/func/model/data/running-mask0.png +0 -0
  280. {datachain-0.16.4 → datachain-0.17.0}/tests/func/model/data/running-mask1.png +0 -0
  281. {datachain-0.16.4 → datachain-0.17.0}/tests/func/model/data/running.jpg +0 -0
  282. {datachain-0.16.4 → datachain-0.17.0}/tests/func/model/data/ships.jpg +0 -0
  283. {datachain-0.16.4 → datachain-0.17.0}/tests/func/model/test_yolo.py +0 -0
  284. {datachain-0.16.4 → datachain-0.17.0}/tests/func/test_client.py +0 -0
  285. {datachain-0.16.4 → datachain-0.17.0}/tests/func/test_cloud_transfer.py +0 -0
  286. {datachain-0.16.4 → datachain-0.17.0}/tests/func/test_data_storage.py +0 -0
  287. {datachain-0.16.4 → datachain-0.17.0}/tests/func/test_datachain_merge.py +0 -0
  288. {datachain-0.16.4 → datachain-0.17.0}/tests/func/test_feature_pickling.py +0 -0
  289. {datachain-0.16.4 → datachain-0.17.0}/tests/func/test_file.py +0 -0
  290. {datachain-0.16.4 → datachain-0.17.0}/tests/func/test_func.py +0 -0
  291. {datachain-0.16.4 → datachain-0.17.0}/tests/func/test_hf.py +0 -0
  292. {datachain-0.16.4 → datachain-0.17.0}/tests/func/test_image.py +0 -0
  293. {datachain-0.16.4 → datachain-0.17.0}/tests/func/test_listing.py +0 -0
  294. {datachain-0.16.4 → datachain-0.17.0}/tests/func/test_meta_formats.py +0 -0
  295. {datachain-0.16.4 → datachain-0.17.0}/tests/func/test_pytorch.py +0 -0
  296. {datachain-0.16.4 → datachain-0.17.0}/tests/func/test_read_database.py +0 -0
  297. {datachain-0.16.4 → datachain-0.17.0}/tests/func/test_session.py +0 -0
  298. {datachain-0.16.4 → datachain-0.17.0}/tests/func/test_toolkit.py +0 -0
  299. {datachain-0.16.4 → datachain-0.17.0}/tests/func/test_video.py +0 -0
  300. {datachain-0.16.4 → datachain-0.17.0}/tests/func/test_warehouse.py +0 -0
  301. {datachain-0.16.4 → datachain-0.17.0}/tests/scripts/feature_class.py +0 -0
  302. {datachain-0.16.4 → datachain-0.17.0}/tests/scripts/feature_class_exception.py +0 -0
  303. {datachain-0.16.4 → datachain-0.17.0}/tests/scripts/feature_class_parallel.py +0 -0
  304. {datachain-0.16.4 → datachain-0.17.0}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  305. {datachain-0.16.4 → datachain-0.17.0}/tests/scripts/name_len_slow.py +0 -0
  306. {datachain-0.16.4 → datachain-0.17.0}/tests/test_atomicity.py +0 -0
  307. {datachain-0.16.4 → datachain-0.17.0}/tests/test_import_time.py +0 -0
  308. {datachain-0.16.4 → datachain-0.17.0}/tests/test_telemetry.py +0 -0
  309. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/__init__.py +0 -0
  310. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/lib/__init__.py +0 -0
  311. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/lib/conftest.py +0 -0
  312. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/lib/test_arrow.py +0 -0
  313. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/lib/test_clip.py +0 -0
  314. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/lib/test_datachain_merge.py +0 -0
  315. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/lib/test_diff.py +0 -0
  316. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/lib/test_feature.py +0 -0
  317. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/lib/test_feature_utils.py +0 -0
  318. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/lib/test_file.py +0 -0
  319. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/lib/test_hf.py +0 -0
  320. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/lib/test_image.py +0 -0
  321. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/lib/test_listing_info.py +0 -0
  322. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/lib/test_python_to_sql.py +0 -0
  323. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/lib/test_schema.py +0 -0
  324. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/lib/test_signal_schema.py +0 -0
  325. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/lib/test_sql_to_python.py +0 -0
  326. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/lib/test_text.py +0 -0
  327. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/lib/test_udf_signature.py +0 -0
  328. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/lib/test_utils.py +0 -0
  329. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/lib/test_webdataset.py +0 -0
  330. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/model/__init__.py +0 -0
  331. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/model/test_bbox.py +0 -0
  332. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/model/test_pose.py +0 -0
  333. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/model/test_segment.py +0 -0
  334. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/model/test_utils.py +0 -0
  335. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/sql/__init__.py +0 -0
  336. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/sql/sqlite/__init__.py +0 -0
  337. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/sql/sqlite/test_types.py +0 -0
  338. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/sql/sqlite/test_utils.py +0 -0
  339. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/sql/test_array.py +0 -0
  340. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/sql/test_conditional.py +0 -0
  341. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/sql/test_path.py +0 -0
  342. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/sql/test_random.py +0 -0
  343. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/sql/test_selectable.py +0 -0
  344. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/sql/test_string.py +0 -0
  345. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_asyn.py +0 -0
  346. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_cache.py +0 -0
  347. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_catalog.py +0 -0
  348. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_catalog_loader.py +0 -0
  349. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_cli_parsing.py +0 -0
  350. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_client.py +0 -0
  351. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_client_gcs.py +0 -0
  352. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_client_s3.py +0 -0
  353. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_config.py +0 -0
  354. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_data_storage.py +0 -0
  355. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_database_engine.py +0 -0
  356. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_fileslice.py +0 -0
  357. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_func.py +0 -0
  358. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_listing.py +0 -0
  359. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_metastore.py +0 -0
  360. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_module_exports.py +0 -0
  361. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_pytorch.py +0 -0
  362. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_query.py +0 -0
  363. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_query_metrics.py +0 -0
  364. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_query_params.py +0 -0
  365. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_script_meta.py +0 -0
  366. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_serializer.py +0 -0
  367. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_session.py +0 -0
  368. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_utils.py +0 -0
  369. {datachain-0.16.4 → datachain-0.17.0}/tests/unit/test_warehouse.py +0 -0
@@ -98,6 +98,7 @@ jobs:
98
98
  - name: Run tests
99
99
  # Generate `.test_durations` file with `pytest --store-durations --durations-path ../.github/.test_durations ...`
100
100
  run: >
101
+ PYTHONPATH="$(pwd)/..:${PYTHONPATH}"
101
102
  pytest
102
103
  --config-file=pyproject.toml -rs
103
104
  --splits=6 --group=${{ matrix.group }} --durations-path=../../.github/.test_durations
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.11.7'
27
+ rev: 'v0.11.8'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.16.4
3
+ Version: 0.17.0
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -167,7 +167,7 @@ dialog-rating@v2
167
167
  By default, when a saved dataset is loaded, the latest version is fetched but another version can be requested:
168
168
 
169
169
  ```python
170
- ds = dc.read_dataset("dialog-rating", version=1)
170
+ ds = dc.read_dataset("dialog-rating", version="1.0.0")
171
171
  ```
172
172
 
173
173
  ### Chain execution, optimization and parallelism
@@ -33,6 +33,7 @@ from datachain.cache import Cache
33
33
  from datachain.client import Client
34
34
  from datachain.dataset import (
35
35
  DATASET_PREFIX,
36
+ DEFAULT_DATASET_VERSION,
36
37
  QUERY_DATASET_PREFIX,
37
38
  DatasetDependency,
38
39
  DatasetListRecord,
@@ -79,6 +80,7 @@ DATASET_INTERNAL_ERROR_MESSAGE = "Internal error on creating dataset"
79
80
  QUERY_SCRIPT_INVALID_LAST_STATEMENT_EXIT_CODE = 10
80
81
  # exit code we use if query script was canceled
81
82
  QUERY_SCRIPT_CANCELED_EXIT_CODE = 11
83
+ QUERY_SCRIPT_SIGTERM_EXIT_CODE = -15 # if query script was terminated by SIGTERM
82
84
 
83
85
  # dataset pull
84
86
  PULL_DATASET_MAX_THREADS = 5
@@ -153,9 +155,9 @@ class DatasetRowsFetcher(NodesThreadPool):
153
155
  metastore: "AbstractMetastore",
154
156
  warehouse: "AbstractWarehouse",
155
157
  remote_ds_name: str,
156
- remote_ds_version: int,
158
+ remote_ds_version: str,
157
159
  local_ds_name: str,
158
- local_ds_version: int,
160
+ local_ds_version: str,
159
161
  schema: dict[str, Union[SQLType, type[SQLType]]],
160
162
  max_threads: int = PULL_DATASET_MAX_THREADS,
161
163
  progress_bar=None,
@@ -285,7 +287,7 @@ class NodeGroup:
285
287
  # (not including the bucket name or s3:// prefix)
286
288
  source_path: str = ""
287
289
  dataset_name: Optional[str] = None
288
- dataset_version: Optional[int] = None
290
+ dataset_version: Optional[str] = None
289
291
  instantiated_nodes: Optional[list[NodeWithPath]] = None
290
292
 
291
293
  @property
@@ -606,7 +608,7 @@ class Catalog:
606
608
  return lst, client, list_path
607
609
 
608
610
  def _remove_dataset_rows_and_warehouse_info(
609
- self, dataset: DatasetRecord, version: int, **kwargs
611
+ self, dataset: DatasetRecord, version: str, **kwargs
610
612
  ):
611
613
  self.warehouse.drop_dataset_rows_table(dataset, version)
612
614
  self.update_dataset_version_with_warehouse_info(
@@ -766,7 +768,7 @@ class Catalog:
766
768
  def create_dataset(
767
769
  self,
768
770
  name: str,
769
- version: Optional[int] = None,
771
+ version: Optional[str] = None,
770
772
  *,
771
773
  columns: Sequence[Column],
772
774
  feature_schema: Optional[dict] = None,
@@ -782,18 +784,17 @@ class Catalog:
782
784
  Creates new dataset of a specific version.
783
785
  If dataset is not yet created, it will create it with version 1
784
786
  If version is None, then next unused version is created.
785
- If version is given, then it must be an unused version number.
787
+ If version is given, then it must be an unused version.
786
788
  """
787
789
  assert [c.name for c in columns if c.name != "sys__id"], f"got {columns=}"
788
790
  if not listing and Client.is_data_source_uri(name):
789
791
  raise RuntimeError(
790
792
  "Cannot create dataset that starts with source prefix, e.g s3://"
791
793
  )
792
- default_version = 1
794
+ default_version = DEFAULT_DATASET_VERSION
793
795
  try:
794
796
  dataset = self.get_dataset(name)
795
- default_version = dataset.next_version
796
-
797
+ default_version = dataset.next_version_patch
797
798
  if (description or attrs) and (
798
799
  dataset.description != description or dataset.attrs != attrs
799
800
  ):
@@ -845,7 +846,7 @@ class Catalog:
845
846
  def create_new_dataset_version(
846
847
  self,
847
848
  dataset: DatasetRecord,
848
- version: int,
849
+ version: str,
849
850
  *,
850
851
  columns: Sequence[Column],
851
852
  sources="",
@@ -891,7 +892,7 @@ class Catalog:
891
892
  return dataset
892
893
 
893
894
  def update_dataset_version_with_warehouse_info(
894
- self, dataset: DatasetRecord, version: int, rows_dropped=False, **kwargs
895
+ self, dataset: DatasetRecord, version: str, rows_dropped=False, **kwargs
895
896
  ) -> None:
896
897
  from datachain.query.dataset import DatasetQuery
897
898
 
@@ -958,7 +959,7 @@ class Catalog:
958
959
  return dataset
959
960
 
960
961
  def remove_dataset_version(
961
- self, dataset: DatasetRecord, version: int, drop_rows: Optional[bool] = True
962
+ self, dataset: DatasetRecord, version: str, drop_rows: Optional[bool] = True
962
963
  ) -> None:
963
964
  """
964
965
  Deletes one single dataset version.
@@ -1036,82 +1037,11 @@ class Catalog:
1036
1037
 
1037
1038
  return self.get_dataset(name)
1038
1039
 
1039
- def register_dataset(
1040
- self,
1041
- dataset: DatasetRecord,
1042
- version: int,
1043
- target_dataset: DatasetRecord,
1044
- target_version: Optional[int] = None,
1045
- ) -> DatasetRecord:
1046
- """
1047
- Registers dataset version of one dataset as dataset version of another
1048
- one (it can be new version of existing one).
1049
- It also removes original dataset version
1050
- """
1051
- target_version = target_version or target_dataset.next_version
1052
-
1053
- if not target_dataset.is_valid_next_version(target_version):
1054
- raise DatasetInvalidVersionError(
1055
- f"Version {target_version} must be higher than the current latest one"
1056
- )
1057
-
1058
- dataset_version = dataset.get_version(version)
1059
- if not dataset_version:
1060
- raise DatasetVersionNotFoundError(
1061
- f"Dataset {dataset.name} does not have version {version}"
1062
- )
1063
-
1064
- if not dataset_version.is_final_status():
1065
- raise ValueError("Cannot register dataset version in non final status")
1066
-
1067
- # copy dataset version
1068
- target_dataset = self.metastore.create_dataset_version(
1069
- target_dataset,
1070
- target_version,
1071
- sources=dataset_version.sources,
1072
- status=dataset_version.status,
1073
- query_script=dataset_version.query_script,
1074
- error_message=dataset_version.error_message,
1075
- error_stack=dataset_version.error_stack,
1076
- script_output=dataset_version.script_output,
1077
- created_at=dataset_version.created_at,
1078
- finished_at=dataset_version.finished_at,
1079
- schema=dataset_version.serialized_schema,
1080
- num_objects=dataset_version.num_objects,
1081
- size=dataset_version.size,
1082
- preview=dataset_version.preview,
1083
- job_id=dataset_version.job_id,
1084
- )
1085
-
1086
- # to avoid re-creating rows table, we are just renaming it for a new version
1087
- # of target dataset
1088
- self.warehouse.rename_dataset_table(
1089
- dataset.name,
1090
- target_dataset.name,
1091
- old_version=version,
1092
- new_version=target_version,
1093
- )
1094
- self.metastore.update_dataset_dependency_source(
1095
- dataset,
1096
- version,
1097
- new_source_dataset=target_dataset,
1098
- new_source_dataset_version=target_version,
1099
- )
1100
-
1101
- if dataset.id == target_dataset.id:
1102
- # we are updating the same dataset so we need to refresh it to have newly
1103
- # added version in step before
1104
- dataset = self.get_dataset(dataset.name)
1105
-
1106
- self.remove_dataset_version(dataset, version, drop_rows=False)
1107
-
1108
- return self.get_dataset(target_dataset.name)
1109
-
1110
1040
  def get_dataset(self, name: str) -> DatasetRecord:
1111
1041
  return self.metastore.get_dataset(name)
1112
1042
 
1113
1043
  def get_dataset_with_remote_fallback(
1114
- self, name: str, version: Optional[int] = None
1044
+ self, name: str, version: Optional[str] = None
1115
1045
  ) -> DatasetRecord:
1116
1046
  try:
1117
1047
  ds = self.get_dataset(name)
@@ -1156,7 +1086,7 @@ class Catalog:
1156
1086
  return DatasetRecord.from_dict(dataset_info)
1157
1087
 
1158
1088
  def get_dataset_dependencies(
1159
- self, name: str, version: int, indirect=False
1089
+ self, name: str, version: str, indirect=False
1160
1090
  ) -> list[Optional[DatasetDependency]]:
1161
1091
  dataset = self.get_dataset(name)
1162
1092
 
@@ -1174,7 +1104,7 @@ class Catalog:
1174
1104
  if d.is_dataset:
1175
1105
  # only datasets can have dependencies
1176
1106
  d.dependencies = self.get_dataset_dependencies(
1177
- d.name, int(d.version), indirect=indirect
1107
+ d.name, d.version, indirect=indirect
1178
1108
  )
1179
1109
 
1180
1110
  return direct_dependencies
@@ -1243,7 +1173,7 @@ class Catalog:
1243
1173
  ]
1244
1174
 
1245
1175
  def ls_dataset_rows(
1246
- self, name: str, version: int, offset=None, limit=None
1176
+ self, name: str, version: str, offset=None, limit=None
1247
1177
  ) -> list[dict]:
1248
1178
  from datachain.query.dataset import DatasetQuery
1249
1179
 
@@ -1281,7 +1211,7 @@ class Catalog:
1281
1211
  self,
1282
1212
  bucket_uri: str,
1283
1213
  name: str,
1284
- version: int,
1214
+ version: str,
1285
1215
  client_config=None,
1286
1216
  ) -> list[str]:
1287
1217
  dataset = self.get_dataset(name)
@@ -1290,14 +1220,14 @@ class Catalog:
1290
1220
  bucket_uri, dataset, version, client_config
1291
1221
  )
1292
1222
 
1293
- def dataset_table_export_file_names(self, name: str, version: int) -> list[str]:
1223
+ def dataset_table_export_file_names(self, name: str, version: str) -> list[str]:
1294
1224
  dataset = self.get_dataset(name)
1295
1225
  return self.warehouse.dataset_table_export_file_names(dataset, version)
1296
1226
 
1297
1227
  def remove_dataset(
1298
1228
  self,
1299
1229
  name: str,
1300
- version: Optional[int] = None,
1230
+ version: Optional[str] = None,
1301
1231
  force: Optional[bool] = False,
1302
1232
  studio: Optional[bool] = False,
1303
1233
  ):
@@ -1371,7 +1301,7 @@ class Catalog:
1371
1301
  remote_ds_uri: str,
1372
1302
  output: Optional[str] = None,
1373
1303
  local_ds_name: Optional[str] = None,
1374
- local_ds_version: Optional[int] = None,
1304
+ local_ds_version: Optional[str] = None,
1375
1305
  cp: bool = False,
1376
1306
  force: bool = False,
1377
1307
  *,
@@ -1645,7 +1575,10 @@ class Catalog:
1645
1575
  thread.join() # wait for the reader thread
1646
1576
 
1647
1577
  logger.info("Process %s exited with return code %s", proc.pid, proc.returncode)
1648
- if proc.returncode == QUERY_SCRIPT_CANCELED_EXIT_CODE:
1578
+ if proc.returncode in (
1579
+ QUERY_SCRIPT_CANCELED_EXIT_CODE,
1580
+ QUERY_SCRIPT_SIGTERM_EXIT_CODE,
1581
+ ):
1649
1582
  raise QueryScriptCancelError(
1650
1583
  "Query script was canceled by user",
1651
1584
  return_code=proc.returncode,
@@ -34,8 +34,10 @@ def main(argv: Optional[list[str]] = None) -> int:
34
34
  datachain_parser = get_parser()
35
35
  args = datachain_parser.parse_args(argv)
36
36
 
37
- if args.command in ("internal-run-udf", "internal-run-udf-worker"):
38
- return handle_udf(args.command)
37
+ if args.command == "internal-run-udf":
38
+ return handle_udf()
39
+ if args.command == "internal-run-udf-worker":
40
+ return handle_udf_runner(args.fd)
39
41
 
40
42
  if args.command is None:
41
43
  datachain_parser.print_help(sys.stderr)
@@ -303,13 +305,13 @@ def handle_general_exception(exc, args, logging_level):
303
305
  return error, 1
304
306
 
305
307
 
306
- def handle_udf(command):
307
- if command == "internal-run-udf":
308
- from datachain.query.dispatch import udf_entrypoint
308
+ def handle_udf() -> int:
309
+ from datachain.query.dispatch import udf_entrypoint
309
310
 
310
- return udf_entrypoint()
311
+ return udf_entrypoint()
311
312
 
312
- if command == "internal-run-udf-worker":
313
- from datachain.query.dispatch import udf_worker_entrypoint
314
313
 
315
- return udf_worker_entrypoint()
314
+ def handle_udf_runner(fd: Optional[int] = None) -> int:
315
+ from datachain.query.dispatch import udf_worker_entrypoint
316
+
317
+ return udf_worker_entrypoint(fd)
@@ -127,7 +127,7 @@ def _datasets_tabulate_row(name, both, local_version, studio_version):
127
127
  def rm_dataset(
128
128
  catalog: "Catalog",
129
129
  name: str,
130
- version: Optional[int] = None,
130
+ version: Optional[str] = None,
131
131
  force: Optional[bool] = False,
132
132
  studio: bool = False,
133
133
  local: bool = False,
@@ -29,6 +29,7 @@ def query(
29
29
  name=os.path.basename(script),
30
30
  query=script_content,
31
31
  query_type=JobQueryType.PYTHON,
32
+ status=JobStatus.RUNNING,
32
33
  python_version=python_version,
33
34
  params=params,
34
35
  )
@@ -10,7 +10,7 @@ if TYPE_CHECKING:
10
10
  def show(
11
11
  catalog: "Catalog",
12
12
  name: str,
13
- version: Optional[int] = None,
13
+ version: Optional[str] = None,
14
14
  limit: int = 10,
15
15
  offset: int = 0,
16
16
  columns: Sequence[str] = (),
@@ -302,7 +302,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
302
302
  "--version",
303
303
  action="store",
304
304
  default=None,
305
- type=int,
305
+ type=str,
306
306
  help="Dataset version",
307
307
  )
308
308
  rm_dataset_parser.add_argument(
@@ -495,7 +495,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
495
495
  "--version",
496
496
  action="store",
497
497
  default=None,
498
- type=int,
498
+ type=str,
499
499
  help="Dataset version",
500
500
  )
501
501
  show_parser.add_argument("--schema", action="store_true", help="Show schema")
@@ -549,7 +549,15 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
549
549
  add_anon_arg(parse_gc)
550
550
 
551
551
  subp.add_parser("internal-run-udf", parents=[parent_parser])
552
- subp.add_parser("internal-run-udf-worker", parents=[parent_parser])
552
+ run_udf_worker = subp.add_parser("internal-run-udf-worker", parents=[parent_parser])
553
+ run_udf_worker.add_argument(
554
+ "--fd",
555
+ type=int,
556
+ action="store",
557
+ default=None,
558
+ help="File descriptor to write results to",
559
+ )
560
+
553
561
  add_completion_parser(subp, [parent_parser])
554
562
  return parser
555
563
 
@@ -3,6 +3,7 @@ from enum import Enum
3
3
 
4
4
  class JobStatus(int, Enum):
5
5
  CREATED = 1
6
+ SCHEDULED = 10
6
7
  QUEUED = 2
7
8
  INIT = 3
8
9
  RUNNING = 4