datachain 0.33.1__tar.gz → 0.34.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (439) hide show
  1. {datachain-0.33.1 → datachain-0.34.0}/.pre-commit-config.yaml +1 -1
  2. {datachain-0.33.1 → datachain-0.34.0}/PKG-INFO +1 -1
  3. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/data_storage/metastore.py +22 -1
  4. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/error.py +4 -0
  5. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/job.py +3 -0
  6. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/dc/datachain.py +158 -70
  7. {datachain-0.33.1 → datachain-0.34.0}/src/datachain.egg-info/PKG-INFO +1 -1
  8. {datachain-0.33.1 → datachain-0.34.0}/src/datachain.egg-info/SOURCES.txt +1 -0
  9. {datachain-0.33.1 → datachain-0.34.0}/tests/conftest.py +6 -3
  10. datachain-0.34.0/tests/unit/lib/test_checkpoints.py +200 -0
  11. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_datachain.py +1 -1
  12. {datachain-0.33.1 → datachain-0.34.0}/.cruft.json +0 -0
  13. {datachain-0.33.1 → datachain-0.34.0}/.gitattributes +0 -0
  14. {datachain-0.33.1 → datachain-0.34.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  15. {datachain-0.33.1 → datachain-0.34.0}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  16. {datachain-0.33.1 → datachain-0.34.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  17. {datachain-0.33.1 → datachain-0.34.0}/.github/codecov.yaml +0 -0
  18. {datachain-0.33.1 → datachain-0.34.0}/.github/dependabot.yml +0 -0
  19. {datachain-0.33.1 → datachain-0.34.0}/.github/workflows/benchmarks.yml +0 -0
  20. {datachain-0.33.1 → datachain-0.34.0}/.github/workflows/release.yml +0 -0
  21. {datachain-0.33.1 → datachain-0.34.0}/.github/workflows/tests-studio.yml +0 -0
  22. {datachain-0.33.1 → datachain-0.34.0}/.github/workflows/tests.yml +0 -0
  23. {datachain-0.33.1 → datachain-0.34.0}/.github/workflows/update-template.yaml +0 -0
  24. {datachain-0.33.1 → datachain-0.34.0}/.gitignore +0 -0
  25. {datachain-0.33.1 → datachain-0.34.0}/CODE_OF_CONDUCT.rst +0 -0
  26. {datachain-0.33.1 → datachain-0.34.0}/LICENSE +0 -0
  27. {datachain-0.33.1 → datachain-0.34.0}/README.rst +0 -0
  28. {datachain-0.33.1 → datachain-0.34.0}/docs/api_hooks.py +0 -0
  29. {datachain-0.33.1 → datachain-0.34.0}/docs/assets/captioned_cartoons.png +0 -0
  30. {datachain-0.33.1 → datachain-0.34.0}/docs/assets/datachain-white.svg +0 -0
  31. {datachain-0.33.1 → datachain-0.34.0}/docs/assets/datachain.svg +0 -0
  32. {datachain-0.33.1 → datachain-0.34.0}/docs/assets/webhook_dialog.png +0 -0
  33. {datachain-0.33.1 → datachain-0.34.0}/docs/assets/webhook_list.png +0 -0
  34. {datachain-0.33.1 → datachain-0.34.0}/docs/commands/auth/login.md +0 -0
  35. {datachain-0.33.1 → datachain-0.34.0}/docs/commands/auth/logout.md +0 -0
  36. {datachain-0.33.1 → datachain-0.34.0}/docs/commands/auth/team.md +0 -0
  37. {datachain-0.33.1 → datachain-0.34.0}/docs/commands/auth/token.md +0 -0
  38. {datachain-0.33.1 → datachain-0.34.0}/docs/commands/index.md +0 -0
  39. {datachain-0.33.1 → datachain-0.34.0}/docs/commands/job/cancel.md +0 -0
  40. {datachain-0.33.1 → datachain-0.34.0}/docs/commands/job/clusters.md +0 -0
  41. {datachain-0.33.1 → datachain-0.34.0}/docs/commands/job/logs.md +0 -0
  42. {datachain-0.33.1 → datachain-0.34.0}/docs/commands/job/ls.md +0 -0
  43. {datachain-0.33.1 → datachain-0.34.0}/docs/commands/job/run.md +0 -0
  44. {datachain-0.33.1 → datachain-0.34.0}/docs/contributing.md +0 -0
  45. {datachain-0.33.1 → datachain-0.34.0}/docs/css/github-permalink-style.css +0 -0
  46. {datachain-0.33.1 → datachain-0.34.0}/docs/examples.md +0 -0
  47. {datachain-0.33.1 → datachain-0.34.0}/docs/guide/db_migrations.md +0 -0
  48. {datachain-0.33.1 → datachain-0.34.0}/docs/guide/delta.md +0 -0
  49. {datachain-0.33.1 → datachain-0.34.0}/docs/guide/env.md +0 -0
  50. {datachain-0.33.1 → datachain-0.34.0}/docs/guide/index.md +0 -0
  51. {datachain-0.33.1 → datachain-0.34.0}/docs/guide/namespaces.md +0 -0
  52. {datachain-0.33.1 → datachain-0.34.0}/docs/guide/processing.md +0 -0
  53. {datachain-0.33.1 → datachain-0.34.0}/docs/guide/remotes.md +0 -0
  54. {datachain-0.33.1 → datachain-0.34.0}/docs/guide/retry.md +0 -0
  55. {datachain-0.33.1 → datachain-0.34.0}/docs/index.md +0 -0
  56. {datachain-0.33.1 → datachain-0.34.0}/docs/overrides/main.html +0 -0
  57. {datachain-0.33.1 → datachain-0.34.0}/docs/quick-start.md +0 -0
  58. {datachain-0.33.1 → datachain-0.34.0}/docs/references/data-types/arrowrow.md +0 -0
  59. {datachain-0.33.1 → datachain-0.34.0}/docs/references/data-types/bbox.md +0 -0
  60. {datachain-0.33.1 → datachain-0.34.0}/docs/references/data-types/file.md +0 -0
  61. {datachain-0.33.1 → datachain-0.34.0}/docs/references/data-types/imagefile.md +0 -0
  62. {datachain-0.33.1 → datachain-0.34.0}/docs/references/data-types/index.md +0 -0
  63. {datachain-0.33.1 → datachain-0.34.0}/docs/references/data-types/pose.md +0 -0
  64. {datachain-0.33.1 → datachain-0.34.0}/docs/references/data-types/segment.md +0 -0
  65. {datachain-0.33.1 → datachain-0.34.0}/docs/references/data-types/tarvfile.md +0 -0
  66. {datachain-0.33.1 → datachain-0.34.0}/docs/references/data-types/textfile.md +0 -0
  67. {datachain-0.33.1 → datachain-0.34.0}/docs/references/data-types/videofile.md +0 -0
  68. {datachain-0.33.1 → datachain-0.34.0}/docs/references/datachain.md +0 -0
  69. {datachain-0.33.1 → datachain-0.34.0}/docs/references/func.md +0 -0
  70. {datachain-0.33.1 → datachain-0.34.0}/docs/references/functions/aggregate.md +0 -0
  71. {datachain-0.33.1 → datachain-0.34.0}/docs/references/functions/array.md +0 -0
  72. {datachain-0.33.1 → datachain-0.34.0}/docs/references/functions/conditional.md +0 -0
  73. {datachain-0.33.1 → datachain-0.34.0}/docs/references/functions/numeric.md +0 -0
  74. {datachain-0.33.1 → datachain-0.34.0}/docs/references/functions/path.md +0 -0
  75. {datachain-0.33.1 → datachain-0.34.0}/docs/references/functions/random.md +0 -0
  76. {datachain-0.33.1 → datachain-0.34.0}/docs/references/functions/string.md +0 -0
  77. {datachain-0.33.1 → datachain-0.34.0}/docs/references/functions/window.md +0 -0
  78. {datachain-0.33.1 → datachain-0.34.0}/docs/references/index.md +0 -0
  79. {datachain-0.33.1 → datachain-0.34.0}/docs/references/toolkit.md +0 -0
  80. {datachain-0.33.1 → datachain-0.34.0}/docs/references/torch.md +0 -0
  81. {datachain-0.33.1 → datachain-0.34.0}/docs/references/udf.md +0 -0
  82. {datachain-0.33.1 → datachain-0.34.0}/docs/studio/api/.gitkeep +0 -0
  83. {datachain-0.33.1 → datachain-0.34.0}/docs/studio/webhooks.md +0 -0
  84. {datachain-0.33.1 → datachain-0.34.0}/docs/templates/main.dot +0 -0
  85. {datachain-0.33.1 → datachain-0.34.0}/docs/templates/operation.dot +0 -0
  86. {datachain-0.33.1 → datachain-0.34.0}/docs/templates/responses.def +0 -0
  87. {datachain-0.33.1 → datachain-0.34.0}/docs/tutorials.md +0 -0
  88. {datachain-0.33.1 → datachain-0.34.0}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  89. {datachain-0.33.1 → datachain-0.34.0}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  90. {datachain-0.33.1 → datachain-0.34.0}/examples/computer_vision/openimage-detect.py +0 -0
  91. {datachain-0.33.1 → datachain-0.34.0}/examples/computer_vision/ultralytics-bbox.py +0 -0
  92. {datachain-0.33.1 → datachain-0.34.0}/examples/computer_vision/ultralytics-pose.py +0 -0
  93. {datachain-0.33.1 → datachain-0.34.0}/examples/computer_vision/ultralytics-segment.py +0 -0
  94. {datachain-0.33.1 → datachain-0.34.0}/examples/get_started/common_sql_functions.py +0 -0
  95. {datachain-0.33.1 → datachain-0.34.0}/examples/get_started/json-csv-reader.py +0 -0
  96. {datachain-0.33.1 → datachain-0.34.0}/examples/get_started/nested_datamodel.py +0 -0
  97. {datachain-0.33.1 → datachain-0.34.0}/examples/get_started/torch-loader.py +0 -0
  98. {datachain-0.33.1 → datachain-0.34.0}/examples/get_started/udfs/parallel.py +0 -0
  99. {datachain-0.33.1 → datachain-0.34.0}/examples/get_started/udfs/simple.py +0 -0
  100. {datachain-0.33.1 → datachain-0.34.0}/examples/get_started/udfs/stateful.py +0 -0
  101. {datachain-0.33.1 → datachain-0.34.0}/examples/incremental_processing/delta.py +0 -0
  102. {datachain-0.33.1 → datachain-0.34.0}/examples/incremental_processing/retry.py +0 -0
  103. {datachain-0.33.1 → datachain-0.34.0}/examples/incremental_processing/utils.py +0 -0
  104. {datachain-0.33.1 → datachain-0.34.0}/examples/llm_and_nlp/claude-query.py +0 -0
  105. {datachain-0.33.1 → datachain-0.34.0}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  106. {datachain-0.33.1 → datachain-0.34.0}/examples/multimodal/audio-to-text.py +0 -0
  107. {datachain-0.33.1 → datachain-0.34.0}/examples/multimodal/clip_inference.py +0 -0
  108. {datachain-0.33.1 → datachain-0.34.0}/examples/multimodal/hf_pipeline.py +0 -0
  109. {datachain-0.33.1 → datachain-0.34.0}/examples/multimodal/openai_image_desc_lib.py +0 -0
  110. {datachain-0.33.1 → datachain-0.34.0}/examples/multimodal/wds.py +0 -0
  111. {datachain-0.33.1 → datachain-0.34.0}/examples/multimodal/wds_filtered.py +0 -0
  112. {datachain-0.33.1 → datachain-0.34.0}/mkdocs.yml +0 -0
  113. {datachain-0.33.1 → datachain-0.34.0}/noxfile.py +0 -0
  114. {datachain-0.33.1 → datachain-0.34.0}/pyproject.toml +0 -0
  115. {datachain-0.33.1 → datachain-0.34.0}/setup.cfg +0 -0
  116. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/__init__.py +0 -0
  117. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/__main__.py +0 -0
  118. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/asyn.py +0 -0
  119. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/cache.py +0 -0
  120. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/catalog/__init__.py +0 -0
  121. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/catalog/catalog.py +0 -0
  122. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/catalog/datasource.py +0 -0
  123. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/catalog/loader.py +0 -0
  124. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/checkpoint.py +0 -0
  125. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/cli/__init__.py +0 -0
  126. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/cli/commands/__init__.py +0 -0
  127. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/cli/commands/datasets.py +0 -0
  128. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/cli/commands/du.py +0 -0
  129. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/cli/commands/index.py +0 -0
  130. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/cli/commands/ls.py +0 -0
  131. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/cli/commands/misc.py +0 -0
  132. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/cli/commands/query.py +0 -0
  133. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/cli/commands/show.py +0 -0
  134. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/cli/parser/__init__.py +0 -0
  135. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/cli/parser/job.py +0 -0
  136. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/cli/parser/studio.py +0 -0
  137. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/cli/parser/utils.py +0 -0
  138. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/cli/utils.py +0 -0
  139. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/client/__init__.py +0 -0
  140. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/client/azure.py +0 -0
  141. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/client/fileslice.py +0 -0
  142. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/client/fsspec.py +0 -0
  143. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/client/gcs.py +0 -0
  144. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/client/hf.py +0 -0
  145. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/client/http.py +0 -0
  146. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/client/local.py +0 -0
  147. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/client/s3.py +0 -0
  148. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/config.py +0 -0
  149. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/data_storage/__init__.py +0 -0
  150. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/data_storage/db_engine.py +0 -0
  151. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/data_storage/job.py +0 -0
  152. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/data_storage/schema.py +0 -0
  153. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/data_storage/serializer.py +0 -0
  154. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/data_storage/sqlite.py +0 -0
  155. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/data_storage/warehouse.py +0 -0
  156. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/dataset.py +0 -0
  157. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/delta.py +0 -0
  158. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/diff/__init__.py +0 -0
  159. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/fs/__init__.py +0 -0
  160. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/fs/reference.py +0 -0
  161. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/fs/utils.py +0 -0
  162. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/func/__init__.py +0 -0
  163. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/func/aggregate.py +0 -0
  164. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/func/array.py +0 -0
  165. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/func/base.py +0 -0
  166. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/func/conditional.py +0 -0
  167. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/func/func.py +0 -0
  168. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/func/numeric.py +0 -0
  169. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/func/path.py +0 -0
  170. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/func/random.py +0 -0
  171. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/func/string.py +0 -0
  172. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/func/window.py +0 -0
  173. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/hash_utils.py +0 -0
  174. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/__init__.py +0 -0
  175. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/arrow.py +0 -0
  176. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/audio.py +0 -0
  177. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/clip.py +0 -0
  178. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/convert/__init__.py +0 -0
  179. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/convert/flatten.py +0 -0
  180. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/convert/python_to_sql.py +0 -0
  181. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/convert/sql_to_python.py +0 -0
  182. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/convert/unflatten.py +0 -0
  183. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  184. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/data_model.py +0 -0
  185. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/dataset_info.py +0 -0
  186. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/dc/__init__.py +0 -0
  187. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/dc/csv.py +0 -0
  188. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/dc/database.py +0 -0
  189. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/dc/datasets.py +0 -0
  190. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/dc/hf.py +0 -0
  191. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/dc/json.py +0 -0
  192. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/dc/listings.py +0 -0
  193. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/dc/pandas.py +0 -0
  194. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/dc/parquet.py +0 -0
  195. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/dc/records.py +0 -0
  196. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/dc/storage.py +0 -0
  197. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/dc/storage_pattern.py +0 -0
  198. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/dc/utils.py +0 -0
  199. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/dc/values.py +0 -0
  200. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/file.py +0 -0
  201. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/hf.py +0 -0
  202. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/image.py +0 -0
  203. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/listing.py +0 -0
  204. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/listing_info.py +0 -0
  205. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/meta_formats.py +0 -0
  206. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/model_store.py +0 -0
  207. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/namespaces.py +0 -0
  208. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/projects.py +0 -0
  209. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/pytorch.py +0 -0
  210. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/settings.py +0 -0
  211. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/signal_schema.py +0 -0
  212. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/tar.py +0 -0
  213. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/text.py +0 -0
  214. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/udf.py +0 -0
  215. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/udf_signature.py +0 -0
  216. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/utils.py +0 -0
  217. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/video.py +0 -0
  218. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/webdataset.py +0 -0
  219. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/lib/webdataset_laion.py +0 -0
  220. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/listing.py +0 -0
  221. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/model/__init__.py +0 -0
  222. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/model/bbox.py +0 -0
  223. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/model/pose.py +0 -0
  224. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/model/segment.py +0 -0
  225. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/model/ultralytics/__init__.py +0 -0
  226. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/model/ultralytics/bbox.py +0 -0
  227. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/model/ultralytics/pose.py +0 -0
  228. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/model/ultralytics/segment.py +0 -0
  229. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/model/utils.py +0 -0
  230. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/namespace.py +0 -0
  231. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/node.py +0 -0
  232. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/nodes_fetcher.py +0 -0
  233. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/nodes_thread_pool.py +0 -0
  234. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/progress.py +0 -0
  235. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/project.py +0 -0
  236. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/py.typed +0 -0
  237. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/query/__init__.py +0 -0
  238. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/query/batch.py +0 -0
  239. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/query/dataset.py +0 -0
  240. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/query/dispatch.py +0 -0
  241. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/query/metrics.py +0 -0
  242. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/query/params.py +0 -0
  243. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/query/queue.py +0 -0
  244. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/query/schema.py +0 -0
  245. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/query/session.py +0 -0
  246. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/query/udf.py +0 -0
  247. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/query/utils.py +0 -0
  248. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/remote/__init__.py +0 -0
  249. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/remote/studio.py +0 -0
  250. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/script_meta.py +0 -0
  251. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/semver.py +0 -0
  252. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/sql/__init__.py +0 -0
  253. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/sql/default/__init__.py +0 -0
  254. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/sql/default/base.py +0 -0
  255. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/sql/functions/__init__.py +0 -0
  256. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/sql/functions/aggregate.py +0 -0
  257. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/sql/functions/array.py +0 -0
  258. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/sql/functions/conditional.py +0 -0
  259. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/sql/functions/numeric.py +0 -0
  260. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/sql/functions/path.py +0 -0
  261. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/sql/functions/random.py +0 -0
  262. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/sql/functions/string.py +0 -0
  263. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/sql/postgresql_dialect.py +0 -0
  264. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/sql/postgresql_types.py +0 -0
  265. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/sql/selectable.py +0 -0
  266. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/sql/sqlite/__init__.py +0 -0
  267. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/sql/sqlite/base.py +0 -0
  268. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/sql/sqlite/types.py +0 -0
  269. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/sql/sqlite/vector.py +0 -0
  270. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/sql/types.py +0 -0
  271. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/sql/utils.py +0 -0
  272. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/studio.py +0 -0
  273. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/telemetry.py +0 -0
  274. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/toolkit/__init__.py +0 -0
  275. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/toolkit/split.py +0 -0
  276. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/torch/__init__.py +0 -0
  277. {datachain-0.33.1 → datachain-0.34.0}/src/datachain/utils.py +0 -0
  278. {datachain-0.33.1 → datachain-0.34.0}/src/datachain.egg-info/dependency_links.txt +0 -0
  279. {datachain-0.33.1 → datachain-0.34.0}/src/datachain.egg-info/entry_points.txt +0 -0
  280. {datachain-0.33.1 → datachain-0.34.0}/src/datachain.egg-info/requires.txt +0 -0
  281. {datachain-0.33.1 → datachain-0.34.0}/src/datachain.egg-info/top_level.txt +0 -0
  282. {datachain-0.33.1 → datachain-0.34.0}/tests/__init__.py +0 -0
  283. {datachain-0.33.1 → datachain-0.34.0}/tests/benchmarks/__init__.py +0 -0
  284. {datachain-0.33.1 → datachain-0.34.0}/tests/benchmarks/conftest.py +0 -0
  285. {datachain-0.33.1 → datachain-0.34.0}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  286. {datachain-0.33.1 → datachain-0.34.0}/tests/benchmarks/datasets/.dvc/config +0 -0
  287. {datachain-0.33.1 → datachain-0.34.0}/tests/benchmarks/datasets/.gitignore +0 -0
  288. {datachain-0.33.1 → datachain-0.34.0}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  289. {datachain-0.33.1 → datachain-0.34.0}/tests/benchmarks/test_datachain.py +0 -0
  290. {datachain-0.33.1 → datachain-0.34.0}/tests/benchmarks/test_ls.py +0 -0
  291. {datachain-0.33.1 → datachain-0.34.0}/tests/benchmarks/test_version.py +0 -0
  292. {datachain-0.33.1 → datachain-0.34.0}/tests/data.py +0 -0
  293. {datachain-0.33.1 → datachain-0.34.0}/tests/examples/__init__.py +0 -0
  294. {datachain-0.33.1 → datachain-0.34.0}/tests/examples/test_examples.py +0 -0
  295. {datachain-0.33.1 → datachain-0.34.0}/tests/examples/test_wds_e2e.py +0 -0
  296. {datachain-0.33.1 → datachain-0.34.0}/tests/examples/wds_data.py +0 -0
  297. {datachain-0.33.1 → datachain-0.34.0}/tests/func/__init__.py +0 -0
  298. {datachain-0.33.1 → datachain-0.34.0}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  299. {datachain-0.33.1 → datachain-0.34.0}/tests/func/data/lena.jpg +0 -0
  300. {datachain-0.33.1 → datachain-0.34.0}/tests/func/fake-service-account-credentials.json +0 -0
  301. {datachain-0.33.1 → datachain-0.34.0}/tests/func/functions/__init__.py +0 -0
  302. {datachain-0.33.1 → datachain-0.34.0}/tests/func/functions/test_aggregate.py +0 -0
  303. {datachain-0.33.1 → datachain-0.34.0}/tests/func/functions/test_array.py +0 -0
  304. {datachain-0.33.1 → datachain-0.34.0}/tests/func/functions/test_conditional.py +0 -0
  305. {datachain-0.33.1 → datachain-0.34.0}/tests/func/functions/test_numeric.py +0 -0
  306. {datachain-0.33.1 → datachain-0.34.0}/tests/func/functions/test_path.py +0 -0
  307. {datachain-0.33.1 → datachain-0.34.0}/tests/func/functions/test_random.py +0 -0
  308. {datachain-0.33.1 → datachain-0.34.0}/tests/func/functions/test_string.py +0 -0
  309. {datachain-0.33.1 → datachain-0.34.0}/tests/func/model/__init__.py +0 -0
  310. {datachain-0.33.1 → datachain-0.34.0}/tests/func/model/data/running-mask0.png +0 -0
  311. {datachain-0.33.1 → datachain-0.34.0}/tests/func/model/data/running-mask1.png +0 -0
  312. {datachain-0.33.1 → datachain-0.34.0}/tests/func/model/data/running.jpg +0 -0
  313. {datachain-0.33.1 → datachain-0.34.0}/tests/func/model/data/ships.jpg +0 -0
  314. {datachain-0.33.1 → datachain-0.34.0}/tests/func/model/test_yolo.py +0 -0
  315. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_audio.py +0 -0
  316. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_batching.py +0 -0
  317. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_catalog.py +0 -0
  318. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_client.py +0 -0
  319. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_cloud_transfer.py +0 -0
  320. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_data_storage.py +0 -0
  321. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_datachain.py +0 -0
  322. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_datachain_merge.py +0 -0
  323. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_dataset_query.py +0 -0
  324. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_datasets.py +0 -0
  325. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_delta.py +0 -0
  326. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_feature_pickling.py +0 -0
  327. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_file.py +0 -0
  328. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_hf.py +0 -0
  329. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_hidden_field.py +0 -0
  330. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_image.py +0 -0
  331. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_listing.py +0 -0
  332. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_ls.py +0 -0
  333. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_meta_formats.py +0 -0
  334. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_metastore.py +0 -0
  335. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_metrics.py +0 -0
  336. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_mutate.py +0 -0
  337. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_pull.py +0 -0
  338. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_pytorch.py +0 -0
  339. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_query.py +0 -0
  340. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_read_database.py +0 -0
  341. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_read_dataset_remote.py +0 -0
  342. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  343. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_retry.py +0 -0
  344. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_session.py +0 -0
  345. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_storage_pattern.py +0 -0
  346. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_studio_datetime_parsing.py +0 -0
  347. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_to_database.py +0 -0
  348. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_toolkit.py +0 -0
  349. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_video.py +0 -0
  350. {datachain-0.33.1 → datachain-0.34.0}/tests/func/test_warehouse.py +0 -0
  351. {datachain-0.33.1 → datachain-0.34.0}/tests/scripts/feature_class.py +0 -0
  352. {datachain-0.33.1 → datachain-0.34.0}/tests/scripts/feature_class_exception.py +0 -0
  353. {datachain-0.33.1 → datachain-0.34.0}/tests/scripts/feature_class_parallel.py +0 -0
  354. {datachain-0.33.1 → datachain-0.34.0}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  355. {datachain-0.33.1 → datachain-0.34.0}/tests/scripts/name_len_slow.py +0 -0
  356. {datachain-0.33.1 → datachain-0.34.0}/tests/test_atomicity.py +0 -0
  357. {datachain-0.33.1 → datachain-0.34.0}/tests/test_cli_e2e.py +0 -0
  358. {datachain-0.33.1 → datachain-0.34.0}/tests/test_cli_studio.py +0 -0
  359. {datachain-0.33.1 → datachain-0.34.0}/tests/test_import_time.py +0 -0
  360. {datachain-0.33.1 → datachain-0.34.0}/tests/test_query_e2e.py +0 -0
  361. {datachain-0.33.1 → datachain-0.34.0}/tests/test_telemetry.py +0 -0
  362. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/__init__.py +0 -0
  363. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/__init__.py +0 -0
  364. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/conftest.py +0 -0
  365. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_arrow.py +0 -0
  366. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_audio.py +0 -0
  367. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_clip.py +0 -0
  368. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  369. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_datachain_merge.py +0 -0
  370. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_diff.py +0 -0
  371. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_feature.py +0 -0
  372. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_feature_utils.py +0 -0
  373. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_file.py +0 -0
  374. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_hf.py +0 -0
  375. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_image.py +0 -0
  376. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_listing_info.py +0 -0
  377. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_namespace.py +0 -0
  378. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_partition_by.py +0 -0
  379. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_project.py +0 -0
  380. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_python_to_sql.py +0 -0
  381. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_schema.py +0 -0
  382. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_settings.py +0 -0
  383. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_signal_schema.py +0 -0
  384. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_sql_to_python.py +0 -0
  385. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_storage_pattern.py +0 -0
  386. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_text.py +0 -0
  387. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_udf.py +0 -0
  388. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_udf_signature.py +0 -0
  389. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_utils.py +0 -0
  390. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/lib/test_webdataset.py +0 -0
  391. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/model/__init__.py +0 -0
  392. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/model/test_bbox.py +0 -0
  393. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/model/test_pose.py +0 -0
  394. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/model/test_segment.py +0 -0
  395. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/model/test_utils.py +0 -0
  396. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/sql/__init__.py +0 -0
  397. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/sql/sqlite/__init__.py +0 -0
  398. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/sql/sqlite/test_types.py +0 -0
  399. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/sql/sqlite/test_utils.py +0 -0
  400. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/sql/test_array.py +0 -0
  401. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/sql/test_conditional.py +0 -0
  402. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/sql/test_path.py +0 -0
  403. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/sql/test_random.py +0 -0
  404. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/sql/test_selectable.py +0 -0
  405. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/sql/test_string.py +0 -0
  406. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_asyn.py +0 -0
  407. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_cache.py +0 -0
  408. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_catalog.py +0 -0
  409. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_catalog_loader.py +0 -0
  410. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_cli_datasets.py +0 -0
  411. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_cli_parsing.py +0 -0
  412. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_client.py +0 -0
  413. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_client_gcs.py +0 -0
  414. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_client_http.py +0 -0
  415. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_client_s3.py +0 -0
  416. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_config.py +0 -0
  417. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_data_storage.py +0 -0
  418. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_database_engine.py +0 -0
  419. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_datachain_hash.py +0 -0
  420. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_dataset.py +0 -0
  421. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_dispatch.py +0 -0
  422. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_fileslice.py +0 -0
  423. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_func.py +0 -0
  424. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_hash_utils.py +0 -0
  425. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_listing.py +0 -0
  426. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_metastore.py +0 -0
  427. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_module_exports.py +0 -0
  428. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_pytorch.py +0 -0
  429. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_query.py +0 -0
  430. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_query_metrics.py +0 -0
  431. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_query_params.py +0 -0
  432. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_query_steps_hash.py +0 -0
  433. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_script_meta.py +0 -0
  434. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_semver.py +0 -0
  435. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_serializer.py +0 -0
  436. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_session.py +0 -0
  437. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_utils.py +0 -0
  438. {datachain-0.33.1 → datachain-0.34.0}/tests/unit/test_warehouse.py +0 -0
  439. {datachain-0.33.1 → datachain-0.34.0}/tests/utils.py +0 -0
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.13.1'
27
+ rev: 'v0.13.2'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.33.1
3
+ Version: 0.34.0
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -21,6 +21,7 @@ from sqlalchemy import (
21
21
  Table,
22
22
  Text,
23
23
  UniqueConstraint,
24
+ desc,
24
25
  select,
25
26
  )
26
27
  from sqlalchemy.sql import func as f
@@ -399,6 +400,7 @@ class AbstractMetastore(ABC, Serializable):
399
400
  workers: int = 1,
400
401
  python_version: Optional[str] = None,
401
402
  params: Optional[dict[str, str]] = None,
403
+ parent_job_id: Optional[str] = None,
402
404
  ) -> str:
403
405
  """
404
406
  Creates a new job.
@@ -443,6 +445,10 @@ class AbstractMetastore(ABC, Serializable):
443
445
  def list_checkpoints(self, job_id: str, conn=None) -> Iterator["Checkpoint"]:
444
446
  """Returns all checkpoints related to some job"""
445
447
 
448
+ @abstractmethod
449
+ def get_last_checkpoint(self, job_id: str, conn=None) -> Optional[Checkpoint]:
450
+ """Get last created checkpoint for some job."""
451
+
446
452
  @abstractmethod
447
453
  def get_checkpoint_by_id(self, checkpoint_id: str, conn=None) -> Checkpoint:
448
454
  """Gets single checkpoint by id"""
@@ -1548,6 +1554,7 @@ class AbstractDBMetastore(AbstractMetastore):
1548
1554
  Column("error_stack", Text, nullable=False, default=""),
1549
1555
  Column("params", JSON, nullable=False),
1550
1556
  Column("metrics", JSON, nullable=False),
1557
+ Column("parent_job_id", Text, nullable=True),
1551
1558
  ]
1552
1559
 
1553
1560
  @cached_property
@@ -1595,6 +1602,7 @@ class AbstractDBMetastore(AbstractMetastore):
1595
1602
  workers: int = 1,
1596
1603
  python_version: Optional[str] = None,
1597
1604
  params: Optional[dict[str, str]] = None,
1605
+ parent_job_id: Optional[str] = None,
1598
1606
  conn: Optional[Any] = None,
1599
1607
  ) -> str:
1600
1608
  """
@@ -1616,6 +1624,7 @@ class AbstractDBMetastore(AbstractMetastore):
1616
1624
  error_stack="",
1617
1625
  params=json.dumps(params or {}),
1618
1626
  metrics=json.dumps({}),
1627
+ parent_job_id=parent_job_id,
1619
1628
  ),
1620
1629
  conn=conn,
1621
1630
  )
@@ -1770,7 +1779,7 @@ class AbstractDBMetastore(AbstractMetastore):
1770
1779
  )
1771
1780
  return self.get_checkpoint_by_id(checkpoint_id)
1772
1781
 
1773
- def list_checkpoints(self, job_id: str, conn=None) -> Iterator["Checkpoint"]:
1782
+ def list_checkpoints(self, job_id: str, conn=None) -> Iterator[Checkpoint]:
1774
1783
  """List checkpoints by job id."""
1775
1784
  query = self._checkpoints_query().where(self._checkpoints.c.job_id == job_id)
1776
1785
  rows = list(self.db.execute(query, conn=conn))
@@ -1800,3 +1809,15 @@ class AbstractDBMetastore(AbstractMetastore):
1800
1809
  if not rows:
1801
1810
  return None
1802
1811
  return self.checkpoint_class.parse(*rows[0])
1812
+
1813
+ def get_last_checkpoint(self, job_id: str, conn=None) -> Optional[Checkpoint]:
1814
+ query = (
1815
+ self._checkpoints_query()
1816
+ .where(self._checkpoints.c.job_id == job_id)
1817
+ .order_by(desc(self._checkpoints.c.created_at))
1818
+ .limit(1)
1819
+ )
1820
+ rows = list(self.db.execute(query, conn=conn))
1821
+ if not rows:
1822
+ return None
1823
+ return self.checkpoint_class.parse(*rows[0])
@@ -101,3 +101,7 @@ class OutdatedDatabaseSchemaError(DataChainError):
101
101
 
102
102
  class CheckpointNotFoundError(NotFoundError):
103
103
  pass
104
+
105
+
106
+ class JobNotFoundError(NotFoundError):
107
+ pass
@@ -22,6 +22,7 @@ class Job:
22
22
  python_version: Optional[str] = None
23
23
  error_message: str = ""
24
24
  error_stack: str = ""
25
+ parent_job_id: Optional[str] = None
25
26
 
26
27
  @classmethod
27
28
  def parse(
@@ -39,6 +40,7 @@ class Job:
39
40
  error_stack: str,
40
41
  params: str,
41
42
  metrics: str,
43
+ parent_job_id: Optional[str],
42
44
  ) -> "Job":
43
45
  return cls(
44
46
  str(id),
@@ -54,4 +56,5 @@ class Job:
54
56
  python_version,
55
57
  error_message,
56
58
  error_stack,
59
+ parent_job_id,
57
60
  )
@@ -19,7 +19,6 @@ from typing import (
19
19
  cast,
20
20
  overload,
21
21
  )
22
- from uuid import uuid4
23
22
 
24
23
  import sqlalchemy
25
24
  import ujson as json
@@ -30,10 +29,15 @@ from tqdm import tqdm
30
29
  from datachain import semver
31
30
  from datachain.dataset import DatasetRecord
32
31
  from datachain.delta import delta_disabled
33
- from datachain.error import ProjectCreateNotAllowedError, ProjectNotFoundError
32
+ from datachain.error import (
33
+ JobNotFoundError,
34
+ ProjectCreateNotAllowedError,
35
+ ProjectNotFoundError,
36
+ )
34
37
  from datachain.func import literal
35
38
  from datachain.func.base import Function
36
39
  from datachain.func.func import Func
40
+ from datachain.job import Job
37
41
  from datachain.lib.convert.python_to_sql import python_to_sql
38
42
  from datachain.lib.data_model import (
39
43
  DataModel,
@@ -50,11 +54,12 @@ from datachain.lib.signal_schema import SignalResolvingError, SignalSchema
50
54
  from datachain.lib.udf import Aggregator, BatchMapper, Generator, Mapper, UDFBase
51
55
  from datachain.lib.udf_signature import UdfSignature
52
56
  from datachain.lib.utils import DataChainColumnError, DataChainParamsError
57
+ from datachain.project import Project
53
58
  from datachain.query import Session
54
59
  from datachain.query.dataset import DatasetQuery, PartitionByType
55
60
  from datachain.query.schema import DEFAULT_DELIMITER, Column
56
61
  from datachain.sql.functions import path as pathfunc
57
- from datachain.utils import batched_it, inside_notebook, row_to_nested_dict
62
+ from datachain.utils import batched_it, env2bool, inside_notebook, row_to_nested_dict
58
63
 
59
64
  from .database import DEFAULT_DATABASE_BATCH_SIZE
60
65
  from .utils import (
@@ -578,6 +583,19 @@ class DataChain:
578
583
  query=self._query.save(project=project, feature_schema=schema)
579
584
  )
580
585
 
586
+ def _calculate_job_hash(self, job_id: str) -> str:
587
+ """
588
+ Calculates hash of the job at the place of this chain's save method.
589
+ Hash is calculated using previous job checkpoint hash (if exists) and
590
+ adding hash of this chain to produce new hash.
591
+ """
592
+ last_checkpoint = self.session.catalog.metastore.get_last_checkpoint(job_id)
593
+
594
+ return hashlib.sha256(
595
+ (bytes.fromhex(last_checkpoint.hash) if last_checkpoint else b"")
596
+ + bytes.fromhex(self.hash())
597
+ ).hexdigest()
598
+
581
599
  def save( # type: ignore[override]
582
600
  self,
583
601
  name: str,
@@ -602,101 +620,171 @@ class DataChain:
602
620
  update_version: which part of the dataset version to automatically increase.
603
621
  Available values: `major`, `minor` or `patch`. Default is `patch`.
604
622
  """
623
+
605
624
  catalog = self.session.catalog
606
- if version is not None:
607
- semver.validate(version)
608
625
 
609
- if update_version is not None and update_version not in [
610
- "patch",
611
- "major",
612
- "minor",
613
- ]:
614
- raise ValueError(
615
- "update_version can have one of the following values: major, minor or"
616
- " patch"
617
- )
626
+ result = None # result chain that will be returned at the end
627
+
628
+ # Version validation
629
+ self._validate_version(version)
630
+ self._validate_update_version(update_version)
618
631
 
619
632
  namespace_name, project_name, name = catalog.get_full_dataset_name(
620
633
  name,
621
634
  namespace_name=self._settings.namespace,
622
635
  project_name=self._settings.project,
623
636
  )
637
+ project = self._get_or_create_project(namespace_name, project_name)
638
+
639
+ # Checkpoint handling
640
+ job, _hash, result = self._resolve_checkpoint(name, project, kwargs)
641
+
642
+ # Schema preparation
643
+ schema = self.signals_schema.clone_without_sys_signals().serialize()
644
+
645
+ # Handle retry and delta functionality
646
+ if not result:
647
+ result = self._handle_delta(name, version, project, schema, kwargs)
648
+
649
+ if not result:
650
+ # calculate chain if we already don't have result from checkpoint or delta
651
+ result = self._evolve(
652
+ query=self._query.save(
653
+ name=name,
654
+ version=version,
655
+ project=project,
656
+ description=description,
657
+ attrs=attrs,
658
+ feature_schema=schema,
659
+ update_version=update_version,
660
+ **kwargs,
661
+ )
662
+ )
663
+
664
+ if job:
665
+ catalog.metastore.create_checkpoint(job.id, _hash) # type: ignore[arg-type]
666
+
667
+ return result
668
+
669
+ def _validate_version(self, version: Optional[str]) -> None:
670
+ """Validate dataset version if provided."""
671
+ if version is not None:
672
+ semver.validate(version)
624
673
 
674
+ def _validate_update_version(self, update_version: Optional[str]) -> None:
675
+ """Ensure update_version is one of: major, minor, patch."""
676
+ allowed = ["major", "minor", "patch"]
677
+ if update_version not in allowed:
678
+ raise ValueError(f"update_version must be one of {allowed}")
679
+
680
+ def _get_or_create_project(self, namespace: str, project_name: str) -> Project:
681
+ """Get project or raise if creation not allowed."""
625
682
  try:
626
- project = self.session.catalog.metastore.get_project(
683
+ return self.session.catalog.metastore.get_project(
627
684
  project_name,
628
- namespace_name,
685
+ namespace,
629
686
  create=is_studio(),
630
687
  )
631
688
  except ProjectNotFoundError as e:
632
- # not being able to create it as creation is not allowed
633
689
  raise ProjectCreateNotAllowedError("Creating project is not allowed") from e
634
690
 
635
- schema = self.signals_schema.clone_without_sys_signals().serialize()
691
+ def _resolve_checkpoint(
692
+ self,
693
+ name: str,
694
+ project: Project,
695
+ kwargs: dict,
696
+ ) -> tuple[Optional[Job], Optional[str], Optional["DataChain"]]:
697
+ """Check if checkpoint exists and return cached dataset if possible."""
698
+ from .datasets import read_dataset
636
699
 
637
- # Handle retry and delta functionality
638
- if self.delta and name:
639
- from datachain.delta import delta_retry_update
700
+ metastore = self.session.catalog.metastore
640
701
 
641
- # Delta chains must have delta_on defined (ensured by _as_delta method)
642
- assert self._delta_on is not None, "Delta chain must have delta_on defined"
702
+ job_id = os.getenv("DATACHAIN_JOB_ID")
703
+ checkpoints_reset = env2bool("DATACHAIN_CHECKPOINTS_RESET", undefined=True)
643
704
 
644
- result_ds, dependencies, has_changes = delta_retry_update(
645
- self,
646
- namespace_name,
647
- project_name,
648
- name,
649
- on=self._delta_on,
650
- right_on=self._delta_result_on,
651
- compare=self._delta_compare,
652
- delta_retry=self._delta_retry,
705
+ if not job_id:
706
+ return None, None, None
707
+
708
+ job = metastore.get_job(job_id)
709
+ if not job:
710
+ raise JobNotFoundError(f"Job with id {job_id} not found")
711
+
712
+ _hash = self._calculate_job_hash(job.id)
713
+
714
+ if (
715
+ job.parent_job_id
716
+ and not checkpoints_reset
717
+ and metastore.find_checkpoint(job.parent_job_id, _hash)
718
+ ):
719
+ # checkpoint found → reuse dataset
720
+ chain = read_dataset(
721
+ name, namespace=project.namespace.name, project=project.name, **kwargs
653
722
  )
723
+ return job, _hash, chain
654
724
 
655
- if result_ds:
656
- return self._evolve(
657
- query=result_ds._query.save(
658
- name=name,
659
- version=version,
660
- project=project,
661
- feature_schema=schema,
662
- dependencies=dependencies,
663
- **kwargs,
664
- )
665
- )
725
+ return job, _hash, None
666
726
 
667
- if not has_changes:
668
- # sources have not been changed so new version of resulting dataset
669
- # would be the same as previous one. To avoid duplicating exact
670
- # datasets, we won't create new version of it and we will return
671
- # current latest version instead.
672
- from .datasets import read_dataset
727
+ def _handle_delta(
728
+ self,
729
+ name: str,
730
+ version: Optional[str],
731
+ project: Project,
732
+ schema: dict,
733
+ kwargs: dict,
734
+ ) -> Optional["DataChain"]:
735
+ """Try to save as a delta dataset.
736
+ Returns:
737
+ A DataChain if delta logic could handle it, otherwise None to fall back
738
+ to the regular save path (e.g., on first dataset creation).
739
+ """
740
+ from datachain.delta import delta_retry_update
673
741
 
674
- return read_dataset(
675
- name, namespace=namespace_name, project=project_name, **kwargs
676
- )
742
+ from .datasets import read_dataset
677
743
 
678
- result = self._evolve(
679
- query=self._query.save(
680
- name=name,
681
- version=version,
682
- project=project,
683
- description=description,
684
- attrs=attrs,
685
- feature_schema=schema,
686
- update_version=update_version,
687
- **kwargs,
688
- )
744
+ if not self.delta or not name:
745
+ return None
746
+
747
+ assert self._delta_on is not None, "Delta chain must have delta_on defined"
748
+
749
+ result_ds, dependencies, has_changes = delta_retry_update(
750
+ self,
751
+ project.namespace.name,
752
+ project.name,
753
+ name,
754
+ on=self._delta_on,
755
+ right_on=self._delta_result_on,
756
+ compare=self._delta_compare,
757
+ delta_retry=self._delta_retry,
689
758
  )
690
759
 
691
- if job_id := os.getenv("DATACHAIN_JOB_ID"):
692
- catalog.metastore.create_checkpoint(
693
- job_id,
694
- _hash=hashlib.sha256( # TODO this will be replaced with self.hash()
695
- str(uuid4()).encode()
696
- ).hexdigest(),
760
+ # Case 1: delta produced a new dataset
761
+ if result_ds:
762
+ return self._evolve(
763
+ query=result_ds._query.save(
764
+ name=name,
765
+ version=version,
766
+ project=project,
767
+ feature_schema=schema,
768
+ dependencies=dependencies,
769
+ **kwargs,
770
+ )
697
771
  )
698
772
 
699
- return result
773
+ # Case 2: no changes → reuse last version
774
+ if not has_changes:
775
+ # sources have not been changed so new version of resulting dataset
776
+ # would be the same as previous one. To avoid duplicating exact
777
+ # datasets, we won't create new version of it and we will return
778
+ # current latest version instead.
779
+ return read_dataset(
780
+ name,
781
+ namespace=project.namespace.name,
782
+ project=project.name,
783
+ **kwargs,
784
+ )
785
+
786
+ # Case 3: first creation of dataset
787
+ return None
700
788
 
701
789
  def apply(self, func, *args, **kwargs):
702
790
  """Apply any function to the chain.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.33.1
3
+ Version: 0.34.0
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -394,6 +394,7 @@ tests/unit/lib/__init__.py
394
394
  tests/unit/lib/conftest.py
395
395
  tests/unit/lib/test_arrow.py
396
396
  tests/unit/lib/test_audio.py
397
+ tests/unit/lib/test_checkpoints.py
397
398
  tests/unit/lib/test_clip.py
398
399
  tests/unit/lib/test_datachain.py
399
400
  tests/unit/lib/test_datachain_bootstrap.py
@@ -456,9 +456,12 @@ def cloud_server(request, tmp_upath_factory, cloud_type, version_aware, tree):
456
456
  return make_cloud_server(src_path, cloud_type, tree)
457
457
 
458
458
 
459
- @pytest.fixture()
460
- def datachain_job_id(monkeypatch):
461
- job_id = str(uuid.uuid4())
459
+ @pytest.fixture
460
+ def datachain_job_id(test_session, monkeypatch):
461
+ job_id = test_session.catalog.metastore.create_job(
462
+ "my-job",
463
+ 'import datachain as dc; dc.read_values(num=[1, 2, 3].save("nums")',
464
+ )
462
465
  monkeypatch.setenv("DATACHAIN_JOB_ID", job_id)
463
466
  return job_id
464
467
 
@@ -0,0 +1,200 @@
1
+ import pytest
2
+
3
+ import datachain as dc
4
+ from datachain.error import DatasetNotFoundError, JobNotFoundError
5
+ from datachain.lib.utils import DataChainError
6
+
7
+
8
+ def mapper_fail(num) -> int:
9
+ raise Exception("Error")
10
+
11
+
12
+ @pytest.fixture
13
+ def nums_dataset(test_session):
14
+ return dc.read_values(num=[1, 2, 3], session=test_session).save("nums")
15
+
16
+
17
+ @pytest.mark.parametrize("reset_checkpoints", [True, False])
18
+ @pytest.mark.parametrize("with_delta", [True, False])
19
+ def test_checkpoints(
20
+ test_session, monkeypatch, nums_dataset, reset_checkpoints, with_delta
21
+ ):
22
+ catalog = test_session.catalog
23
+
24
+ monkeypatch.setenv("DATACHAIN_CHECKPOINTS_RESET", reset_checkpoints)
25
+
26
+ if with_delta:
27
+ chain = dc.read_dataset(
28
+ "nums", delta=True, delta_on=["num"], session=test_session
29
+ )
30
+ else:
31
+ chain = dc.read_dataset("nums", session=test_session)
32
+
33
+ # -------------- FIRST RUN -------------------
34
+ first_job_id = catalog.metastore.create_job("my-job", "echo 1;")
35
+ monkeypatch.setenv("DATACHAIN_JOB_ID", first_job_id)
36
+ chain.save("nums1")
37
+ chain.save("nums2")
38
+ with pytest.raises(DataChainError):
39
+ chain.map(new=mapper_fail).save("nums3")
40
+
41
+ catalog.get_dataset("nums1")
42
+ catalog.get_dataset("nums2")
43
+ with pytest.raises(DatasetNotFoundError):
44
+ catalog.get_dataset("nums3")
45
+
46
+ # -------------- SECOND RUN -------------------
47
+ second_job_id = catalog.metastore.create_job(
48
+ "my-job", "echo 1;", parent_job_id=first_job_id
49
+ )
50
+ monkeypatch.setenv("DATACHAIN_JOB_ID", second_job_id)
51
+ chain.save("nums1")
52
+ chain.save("nums2")
53
+ chain.save("nums3")
54
+
55
+ assert len(catalog.get_dataset("nums1").versions) == 2 if reset_checkpoints else 1
56
+ assert len(catalog.get_dataset("nums2").versions) == 2 if reset_checkpoints else 1
57
+ assert len(catalog.get_dataset("nums3").versions) == 1
58
+
59
+ assert len(list(catalog.metastore.list_checkpoints(first_job_id))) == 2
60
+ assert len(list(catalog.metastore.list_checkpoints(second_job_id))) == 3
61
+
62
+
63
+ @pytest.mark.parametrize("reset_checkpoints", [True, False])
64
+ def test_checkpoints_modified_chains(
65
+ test_session, monkeypatch, nums_dataset, reset_checkpoints
66
+ ):
67
+ catalog = test_session.catalog
68
+ monkeypatch.setenv("DATACHAIN_CHECKPOINTS_RESET", reset_checkpoints)
69
+
70
+ chain = dc.read_dataset("nums", session=test_session)
71
+
72
+ # -------------- FIRST RUN -------------------
73
+ first_job_id = catalog.metastore.create_job("my-job", "echo 1;")
74
+ monkeypatch.setenv("DATACHAIN_JOB_ID", first_job_id)
75
+ chain.save("nums1")
76
+ chain.save("nums2")
77
+ chain.save("nums3")
78
+
79
+ # -------------- SECOND RUN -------------------
80
+ second_job_id = catalog.metastore.create_job(
81
+ "my-job", "echo 1;", parent_job_id=first_job_id
82
+ )
83
+ monkeypatch.setenv("DATACHAIN_JOB_ID", second_job_id)
84
+ chain.save("nums1")
85
+ chain.filter(dc.C("num") > 1).save("nums2") # added change from first run
86
+ chain.save("nums3")
87
+
88
+ assert len(catalog.get_dataset("nums1").versions) == 2 if reset_checkpoints else 1
89
+ assert len(catalog.get_dataset("nums2").versions) == 2
90
+ assert len(catalog.get_dataset("nums3").versions) == 2
91
+
92
+ assert len(list(catalog.metastore.list_checkpoints(first_job_id))) == 3
93
+ assert len(list(catalog.metastore.list_checkpoints(second_job_id))) == 3
94
+
95
+
96
+ @pytest.mark.parametrize("reset_checkpoints", [True, False])
97
+ def test_checkpoints_multiple_runs(
98
+ test_session, monkeypatch, nums_dataset, reset_checkpoints
99
+ ):
100
+ catalog = test_session.catalog
101
+
102
+ monkeypatch.setenv("DATACHAIN_CHECKPOINTS_RESET", reset_checkpoints)
103
+
104
+ chain = dc.read_dataset("nums", session=test_session)
105
+
106
+ # -------------- FIRST RUN -------------------
107
+ first_job_id = catalog.metastore.create_job("my-job", "echo 1;")
108
+ monkeypatch.setenv("DATACHAIN_JOB_ID", first_job_id)
109
+ chain.save("nums1")
110
+ chain.save("nums2")
111
+ with pytest.raises(DataChainError):
112
+ chain.map(new=mapper_fail).save("nums3")
113
+
114
+ catalog.get_dataset("nums1")
115
+ catalog.get_dataset("nums2")
116
+ with pytest.raises(DatasetNotFoundError):
117
+ catalog.get_dataset("nums3")
118
+
119
+ # -------------- SECOND RUN -------------------
120
+ second_job_id = catalog.metastore.create_job(
121
+ "my-job", "echo 1;", parent_job_id=first_job_id
122
+ )
123
+ monkeypatch.setenv("DATACHAIN_JOB_ID", second_job_id)
124
+ chain.save("nums1")
125
+ chain.save("nums2")
126
+ chain.save("nums3")
127
+
128
+ # -------------- THIRD RUN -------------------
129
+ third_job_id = catalog.metastore.create_job(
130
+ "my-job", "echo 1;", parent_job_id=second_job_id
131
+ )
132
+ monkeypatch.setenv("DATACHAIN_JOB_ID", third_job_id)
133
+ chain.save("nums1")
134
+ chain.filter(dc.C("num") > 1).save("nums2")
135
+ with pytest.raises(DataChainError):
136
+ chain.map(new=mapper_fail).save("nums3")
137
+
138
+ # -------------- FOURTH RUN -------------------
139
+ fourth_job_id = catalog.metastore.create_job(
140
+ "my-job", "echo 1;", parent_job_id=third_job_id
141
+ )
142
+ monkeypatch.setenv("DATACHAIN_JOB_ID", fourth_job_id)
143
+ chain.save("nums1")
144
+ chain.filter(dc.C("num") > 1).save("nums2")
145
+ chain.save("nums3")
146
+
147
+ num1_versions = len(catalog.get_dataset("nums1").versions)
148
+ num2_versions = len(catalog.get_dataset("nums2").versions)
149
+ num3_versions = len(catalog.get_dataset("nums3").versions)
150
+
151
+ if reset_checkpoints:
152
+ assert num1_versions == 4
153
+ assert num2_versions == 4
154
+ assert num3_versions == 2
155
+
156
+ else:
157
+ assert num1_versions == 1
158
+ assert num2_versions == 2
159
+ assert num3_versions == 2
160
+
161
+ assert len(list(catalog.metastore.list_checkpoints(first_job_id))) == 2
162
+ assert len(list(catalog.metastore.list_checkpoints(second_job_id))) == 3
163
+ assert len(list(catalog.metastore.list_checkpoints(third_job_id))) == 2
164
+ assert len(list(catalog.metastore.list_checkpoints(fourth_job_id))) == 3
165
+
166
+
167
+ def test_checkpoints_check_valid_chain_is_returned(
168
+ test_session,
169
+ monkeypatch,
170
+ nums_dataset,
171
+ ):
172
+ catalog = test_session.catalog
173
+
174
+ monkeypatch.setenv("DATACHAIN_CHECKPOINTS_RESET", False)
175
+ chain = dc.read_dataset("nums", session=test_session)
176
+
177
+ # -------------- FIRST RUN -------------------
178
+ first_job_id = catalog.metastore.create_job("my-job", "echo 1;")
179
+ monkeypatch.setenv("DATACHAIN_JOB_ID", first_job_id)
180
+ chain.save("nums1")
181
+
182
+ # -------------- SECOND RUN -------------------
183
+ second_job_id = catalog.metastore.create_job(
184
+ "my-job", "echo 1;", parent_job_id=first_job_id
185
+ )
186
+ monkeypatch.setenv("DATACHAIN_JOB_ID", second_job_id)
187
+ ds = chain.save("nums1")
188
+
189
+ # checking that we return expected DataChain even though we skipped chain creation
190
+ # because of the checkpoints
191
+ assert ds.dataset.name == "nums1"
192
+ assert len(ds.dataset.versions) == 1
193
+ assert ds.order_by("num").to_list("num") == [(1,), (2,), (3,)]
194
+
195
+
196
+ def test_checkpoints_invalid_parent_job_id(test_session, monkeypatch, nums_dataset):
197
+ # setting wrong job id
198
+ monkeypatch.setenv("DATACHAIN_JOB_ID", "caee6c54-6328-4bcd-8ca6-2b31cb4fff94")
199
+ with pytest.raises(JobNotFoundError):
200
+ dc.read_dataset("nums", session=test_session).save("nums1")
@@ -4019,7 +4019,7 @@ def test_update_versions_wrong_value(test_session):
4019
4019
  chain.save(ds_name, update_version="wrong")
4020
4020
 
4021
4021
  assert str(excinfo.value) == (
4022
- "update_version can have one of the following values: major, minor or patch"
4022
+ "update_version must be one of ['major', 'minor', 'patch']"
4023
4023
  )
4024
4024
 
4025
4025
 
File without changes
File without changes