datachain 0.36.6__tar.gz → 0.37.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (448) hide show
  1. {datachain-0.36.6 → datachain-0.37.0}/PKG-INFO +1 -1
  2. datachain-0.37.0/docs/guide/checkpoints.md +207 -0
  3. {datachain-0.36.6 → datachain-0.37.0}/docs/guide/index.md +1 -0
  4. {datachain-0.36.6 → datachain-0.37.0}/mkdocs.yml +1 -0
  5. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/catalog/catalog.py +2 -0
  6. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/data_storage/metastore.py +16 -0
  7. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/job.py +1 -1
  8. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/dc/datachain.py +10 -17
  9. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/dc/records.py +0 -2
  10. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/query/dataset.py +0 -4
  11. datachain-0.37.0/src/datachain/query/session.py +347 -0
  12. {datachain-0.36.6 → datachain-0.37.0}/src/datachain.egg-info/PKG-INFO +1 -1
  13. {datachain-0.36.6 → datachain-0.37.0}/src/datachain.egg-info/SOURCES.txt +4 -0
  14. {datachain-0.36.6 → datachain-0.37.0}/tests/conftest.py +21 -4
  15. datachain-0.37.0/tests/func/test_checkpoints.py +52 -0
  16. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_datachain.py +0 -20
  17. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_datasets.py +6 -4
  18. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_session.py +6 -3
  19. {datachain-0.36.6 → datachain-0.37.0}/tests/scripts/feature_class_exception.py +13 -8
  20. {datachain-0.36.6 → datachain-0.37.0}/tests/test_atomicity.py +7 -4
  21. datachain-0.37.0/tests/test_job_management_e2e.py +158 -0
  22. {datachain-0.36.6 → datachain-0.37.0}/tests/test_query_e2e.py +5 -4
  23. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_checkpoints.py +45 -35
  24. datachain-0.37.0/tests/unit/test_job_management.py +174 -0
  25. {datachain-0.36.6 → datachain-0.37.0}/tests/utils.py +25 -0
  26. datachain-0.36.6/src/datachain/query/session.py +0 -205
  27. {datachain-0.36.6 → datachain-0.37.0}/.cruft.json +0 -0
  28. {datachain-0.36.6 → datachain-0.37.0}/.gitattributes +0 -0
  29. {datachain-0.36.6 → datachain-0.37.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  30. {datachain-0.36.6 → datachain-0.37.0}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  31. {datachain-0.36.6 → datachain-0.37.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  32. {datachain-0.36.6 → datachain-0.37.0}/.github/codecov.yaml +0 -0
  33. {datachain-0.36.6 → datachain-0.37.0}/.github/dependabot.yml +0 -0
  34. {datachain-0.36.6 → datachain-0.37.0}/.github/workflows/benchmarks.yml +0 -0
  35. {datachain-0.36.6 → datachain-0.37.0}/.github/workflows/release.yml +0 -0
  36. {datachain-0.36.6 → datachain-0.37.0}/.github/workflows/tests-studio.yml +0 -0
  37. {datachain-0.36.6 → datachain-0.37.0}/.github/workflows/tests.yml +0 -0
  38. {datachain-0.36.6 → datachain-0.37.0}/.github/workflows/update-template.yaml +0 -0
  39. {datachain-0.36.6 → datachain-0.37.0}/.gitignore +0 -0
  40. {datachain-0.36.6 → datachain-0.37.0}/.pre-commit-config.yaml +0 -0
  41. {datachain-0.36.6 → datachain-0.37.0}/CODE_OF_CONDUCT.rst +0 -0
  42. {datachain-0.36.6 → datachain-0.37.0}/LICENSE +0 -0
  43. {datachain-0.36.6 → datachain-0.37.0}/README.rst +0 -0
  44. {datachain-0.36.6 → datachain-0.37.0}/docs/api_hooks.py +0 -0
  45. {datachain-0.36.6 → datachain-0.37.0}/docs/assets/captioned_cartoons.png +0 -0
  46. {datachain-0.36.6 → datachain-0.37.0}/docs/assets/datachain-white.svg +0 -0
  47. {datachain-0.36.6 → datachain-0.37.0}/docs/assets/datachain.svg +0 -0
  48. {datachain-0.36.6 → datachain-0.37.0}/docs/assets/webhook_dialog.png +0 -0
  49. {datachain-0.36.6 → datachain-0.37.0}/docs/assets/webhook_list.png +0 -0
  50. {datachain-0.36.6 → datachain-0.37.0}/docs/commands/auth/login.md +0 -0
  51. {datachain-0.36.6 → datachain-0.37.0}/docs/commands/auth/logout.md +0 -0
  52. {datachain-0.36.6 → datachain-0.37.0}/docs/commands/auth/team.md +0 -0
  53. {datachain-0.36.6 → datachain-0.37.0}/docs/commands/auth/token.md +0 -0
  54. {datachain-0.36.6 → datachain-0.37.0}/docs/commands/index.md +0 -0
  55. {datachain-0.36.6 → datachain-0.37.0}/docs/commands/job/cancel.md +0 -0
  56. {datachain-0.36.6 → datachain-0.37.0}/docs/commands/job/clusters.md +0 -0
  57. {datachain-0.36.6 → datachain-0.37.0}/docs/commands/job/logs.md +0 -0
  58. {datachain-0.36.6 → datachain-0.37.0}/docs/commands/job/ls.md +0 -0
  59. {datachain-0.36.6 → datachain-0.37.0}/docs/commands/job/run.md +0 -0
  60. {datachain-0.36.6 → datachain-0.37.0}/docs/contributing.md +0 -0
  61. {datachain-0.36.6 → datachain-0.37.0}/docs/css/github-permalink-style.css +0 -0
  62. {datachain-0.36.6 → datachain-0.37.0}/docs/examples.md +0 -0
  63. {datachain-0.36.6 → datachain-0.37.0}/docs/guide/db_migrations.md +0 -0
  64. {datachain-0.36.6 → datachain-0.37.0}/docs/guide/delta.md +0 -0
  65. {datachain-0.36.6 → datachain-0.37.0}/docs/guide/env.md +0 -0
  66. {datachain-0.36.6 → datachain-0.37.0}/docs/guide/namespaces.md +0 -0
  67. {datachain-0.36.6 → datachain-0.37.0}/docs/guide/processing.md +0 -0
  68. {datachain-0.36.6 → datachain-0.37.0}/docs/guide/remotes.md +0 -0
  69. {datachain-0.36.6 → datachain-0.37.0}/docs/guide/retry.md +0 -0
  70. {datachain-0.36.6 → datachain-0.37.0}/docs/index.md +0 -0
  71. {datachain-0.36.6 → datachain-0.37.0}/docs/overrides/main.html +0 -0
  72. {datachain-0.36.6 → datachain-0.37.0}/docs/quick-start.md +0 -0
  73. {datachain-0.36.6 → datachain-0.37.0}/docs/references/data-types/arrowrow.md +0 -0
  74. {datachain-0.36.6 → datachain-0.37.0}/docs/references/data-types/bbox.md +0 -0
  75. {datachain-0.36.6 → datachain-0.37.0}/docs/references/data-types/file.md +0 -0
  76. {datachain-0.36.6 → datachain-0.37.0}/docs/references/data-types/imagefile.md +0 -0
  77. {datachain-0.36.6 → datachain-0.37.0}/docs/references/data-types/index.md +0 -0
  78. {datachain-0.36.6 → datachain-0.37.0}/docs/references/data-types/pose.md +0 -0
  79. {datachain-0.36.6 → datachain-0.37.0}/docs/references/data-types/segment.md +0 -0
  80. {datachain-0.36.6 → datachain-0.37.0}/docs/references/data-types/tarvfile.md +0 -0
  81. {datachain-0.36.6 → datachain-0.37.0}/docs/references/data-types/textfile.md +0 -0
  82. {datachain-0.36.6 → datachain-0.37.0}/docs/references/data-types/videofile.md +0 -0
  83. {datachain-0.36.6 → datachain-0.37.0}/docs/references/datachain.md +0 -0
  84. {datachain-0.36.6 → datachain-0.37.0}/docs/references/func.md +0 -0
  85. {datachain-0.36.6 → datachain-0.37.0}/docs/references/functions/aggregate.md +0 -0
  86. {datachain-0.36.6 → datachain-0.37.0}/docs/references/functions/array.md +0 -0
  87. {datachain-0.36.6 → datachain-0.37.0}/docs/references/functions/conditional.md +0 -0
  88. {datachain-0.36.6 → datachain-0.37.0}/docs/references/functions/numeric.md +0 -0
  89. {datachain-0.36.6 → datachain-0.37.0}/docs/references/functions/path.md +0 -0
  90. {datachain-0.36.6 → datachain-0.37.0}/docs/references/functions/random.md +0 -0
  91. {datachain-0.36.6 → datachain-0.37.0}/docs/references/functions/string.md +0 -0
  92. {datachain-0.36.6 → datachain-0.37.0}/docs/references/functions/window.md +0 -0
  93. {datachain-0.36.6 → datachain-0.37.0}/docs/references/index.md +0 -0
  94. {datachain-0.36.6 → datachain-0.37.0}/docs/references/toolkit.md +0 -0
  95. {datachain-0.36.6 → datachain-0.37.0}/docs/references/torch.md +0 -0
  96. {datachain-0.36.6 → datachain-0.37.0}/docs/references/udf.md +0 -0
  97. {datachain-0.36.6 → datachain-0.37.0}/docs/studio/api/.gitkeep +0 -0
  98. {datachain-0.36.6 → datachain-0.37.0}/docs/studio/webhooks.md +0 -0
  99. {datachain-0.36.6 → datachain-0.37.0}/docs/templates/main.dot +0 -0
  100. {datachain-0.36.6 → datachain-0.37.0}/docs/templates/operation.dot +0 -0
  101. {datachain-0.36.6 → datachain-0.37.0}/docs/templates/responses.def +0 -0
  102. {datachain-0.36.6 → datachain-0.37.0}/docs/tutorials.md +0 -0
  103. {datachain-0.36.6 → datachain-0.37.0}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  104. {datachain-0.36.6 → datachain-0.37.0}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  105. {datachain-0.36.6 → datachain-0.37.0}/examples/computer_vision/openimage-detect.py +0 -0
  106. {datachain-0.36.6 → datachain-0.37.0}/examples/computer_vision/ultralytics-bbox.py +0 -0
  107. {datachain-0.36.6 → datachain-0.37.0}/examples/computer_vision/ultralytics-pose.py +0 -0
  108. {datachain-0.36.6 → datachain-0.37.0}/examples/computer_vision/ultralytics-segment.py +0 -0
  109. {datachain-0.36.6 → datachain-0.37.0}/examples/get_started/common_sql_functions.py +0 -0
  110. {datachain-0.36.6 → datachain-0.37.0}/examples/get_started/json-csv-reader.py +0 -0
  111. {datachain-0.36.6 → datachain-0.37.0}/examples/get_started/nested_datamodel.py +0 -0
  112. {datachain-0.36.6 → datachain-0.37.0}/examples/get_started/torch-loader.py +0 -0
  113. {datachain-0.36.6 → datachain-0.37.0}/examples/get_started/udfs/parallel.py +0 -0
  114. {datachain-0.36.6 → datachain-0.37.0}/examples/get_started/udfs/simple.py +0 -0
  115. {datachain-0.36.6 → datachain-0.37.0}/examples/get_started/udfs/stateful.py +0 -0
  116. {datachain-0.36.6 → datachain-0.37.0}/examples/incremental_processing/delta.py +0 -0
  117. {datachain-0.36.6 → datachain-0.37.0}/examples/incremental_processing/retry.py +0 -0
  118. {datachain-0.36.6 → datachain-0.37.0}/examples/incremental_processing/utils.py +0 -0
  119. {datachain-0.36.6 → datachain-0.37.0}/examples/llm_and_nlp/claude-query.py +0 -0
  120. {datachain-0.36.6 → datachain-0.37.0}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  121. {datachain-0.36.6 → datachain-0.37.0}/examples/multimodal/audio-to-text.py +0 -0
  122. {datachain-0.36.6 → datachain-0.37.0}/examples/multimodal/clip_inference.py +0 -0
  123. {datachain-0.36.6 → datachain-0.37.0}/examples/multimodal/hf_pipeline.py +0 -0
  124. {datachain-0.36.6 → datachain-0.37.0}/examples/multimodal/openai_image_desc_lib.py +0 -0
  125. {datachain-0.36.6 → datachain-0.37.0}/examples/multimodal/wds.py +0 -0
  126. {datachain-0.36.6 → datachain-0.37.0}/examples/multimodal/wds_filtered.py +0 -0
  127. {datachain-0.36.6 → datachain-0.37.0}/noxfile.py +0 -0
  128. {datachain-0.36.6 → datachain-0.37.0}/pyproject.toml +0 -0
  129. {datachain-0.36.6 → datachain-0.37.0}/setup.cfg +0 -0
  130. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/__init__.py +0 -0
  131. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/__main__.py +0 -0
  132. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/asyn.py +0 -0
  133. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/cache.py +0 -0
  134. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/catalog/__init__.py +0 -0
  135. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/catalog/datasource.py +0 -0
  136. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/catalog/dependency.py +0 -0
  137. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/catalog/loader.py +0 -0
  138. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/checkpoint.py +0 -0
  139. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/cli/__init__.py +0 -0
  140. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/cli/commands/__init__.py +0 -0
  141. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/cli/commands/datasets.py +0 -0
  142. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/cli/commands/du.py +0 -0
  143. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/cli/commands/index.py +0 -0
  144. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/cli/commands/ls.py +0 -0
  145. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/cli/commands/misc.py +0 -0
  146. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/cli/commands/query.py +0 -0
  147. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/cli/commands/show.py +0 -0
  148. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/cli/parser/__init__.py +0 -0
  149. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/cli/parser/job.py +0 -0
  150. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/cli/parser/studio.py +0 -0
  151. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/cli/parser/utils.py +0 -0
  152. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/cli/utils.py +0 -0
  153. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/client/__init__.py +0 -0
  154. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/client/azure.py +0 -0
  155. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/client/fileslice.py +0 -0
  156. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/client/fsspec.py +0 -0
  157. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/client/gcs.py +0 -0
  158. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/client/hf.py +0 -0
  159. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/client/http.py +0 -0
  160. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/client/local.py +0 -0
  161. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/client/s3.py +0 -0
  162. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/config.py +0 -0
  163. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/data_storage/__init__.py +0 -0
  164. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/data_storage/db_engine.py +0 -0
  165. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/data_storage/job.py +0 -0
  166. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/data_storage/schema.py +0 -0
  167. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/data_storage/serializer.py +0 -0
  168. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/data_storage/sqlite.py +0 -0
  169. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/data_storage/warehouse.py +0 -0
  170. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/dataset.py +0 -0
  171. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/delta.py +0 -0
  172. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/diff/__init__.py +0 -0
  173. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/error.py +0 -0
  174. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/fs/__init__.py +0 -0
  175. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/fs/reference.py +0 -0
  176. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/fs/utils.py +0 -0
  177. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/func/__init__.py +0 -0
  178. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/func/aggregate.py +0 -0
  179. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/func/array.py +0 -0
  180. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/func/base.py +0 -0
  181. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/func/conditional.py +0 -0
  182. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/func/func.py +0 -0
  183. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/func/numeric.py +0 -0
  184. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/func/path.py +0 -0
  185. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/func/random.py +0 -0
  186. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/func/string.py +0 -0
  187. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/func/window.py +0 -0
  188. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/hash_utils.py +0 -0
  189. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/__init__.py +0 -0
  190. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/arrow.py +0 -0
  191. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/audio.py +0 -0
  192. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/clip.py +0 -0
  193. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/convert/__init__.py +0 -0
  194. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/convert/flatten.py +0 -0
  195. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/convert/python_to_sql.py +0 -0
  196. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/convert/sql_to_python.py +0 -0
  197. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/convert/unflatten.py +0 -0
  198. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  199. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/data_model.py +0 -0
  200. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/dataset_info.py +0 -0
  201. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/dc/__init__.py +0 -0
  202. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/dc/csv.py +0 -0
  203. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/dc/database.py +0 -0
  204. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/dc/datasets.py +0 -0
  205. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/dc/hf.py +0 -0
  206. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/dc/json.py +0 -0
  207. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/dc/listings.py +0 -0
  208. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/dc/pandas.py +0 -0
  209. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/dc/parquet.py +0 -0
  210. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/dc/storage.py +0 -0
  211. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/dc/storage_pattern.py +0 -0
  212. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/dc/utils.py +0 -0
  213. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/dc/values.py +0 -0
  214. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/file.py +0 -0
  215. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/hf.py +0 -0
  216. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/image.py +0 -0
  217. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/listing.py +0 -0
  218. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/listing_info.py +0 -0
  219. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/meta_formats.py +0 -0
  220. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/model_store.py +0 -0
  221. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/namespaces.py +0 -0
  222. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/projects.py +0 -0
  223. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/pytorch.py +0 -0
  224. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/settings.py +0 -0
  225. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/signal_schema.py +0 -0
  226. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/tar.py +0 -0
  227. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/text.py +0 -0
  228. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/udf.py +0 -0
  229. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/udf_signature.py +0 -0
  230. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/utils.py +0 -0
  231. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/video.py +0 -0
  232. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/webdataset.py +0 -0
  233. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/lib/webdataset_laion.py +0 -0
  234. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/listing.py +0 -0
  235. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/model/__init__.py +0 -0
  236. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/model/bbox.py +0 -0
  237. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/model/pose.py +0 -0
  238. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/model/segment.py +0 -0
  239. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/model/ultralytics/__init__.py +0 -0
  240. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/model/ultralytics/bbox.py +0 -0
  241. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/model/ultralytics/pose.py +0 -0
  242. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/model/ultralytics/segment.py +0 -0
  243. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/model/utils.py +0 -0
  244. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/namespace.py +0 -0
  245. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/node.py +0 -0
  246. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/nodes_fetcher.py +0 -0
  247. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/nodes_thread_pool.py +0 -0
  248. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/plugins.py +0 -0
  249. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/progress.py +0 -0
  250. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/project.py +0 -0
  251. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/py.typed +0 -0
  252. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/query/__init__.py +0 -0
  253. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/query/batch.py +0 -0
  254. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/query/dispatch.py +0 -0
  255. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/query/metrics.py +0 -0
  256. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/query/params.py +0 -0
  257. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/query/queue.py +0 -0
  258. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/query/schema.py +0 -0
  259. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/query/udf.py +0 -0
  260. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/remote/__init__.py +0 -0
  261. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/remote/studio.py +0 -0
  262. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/script_meta.py +0 -0
  263. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/semver.py +0 -0
  264. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/sql/__init__.py +0 -0
  265. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/sql/default/__init__.py +0 -0
  266. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/sql/default/base.py +0 -0
  267. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/sql/functions/__init__.py +0 -0
  268. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/sql/functions/aggregate.py +0 -0
  269. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/sql/functions/array.py +0 -0
  270. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/sql/functions/conditional.py +0 -0
  271. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/sql/functions/numeric.py +0 -0
  272. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/sql/functions/path.py +0 -0
  273. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/sql/functions/random.py +0 -0
  274. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/sql/functions/string.py +0 -0
  275. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/sql/postgresql_dialect.py +0 -0
  276. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/sql/postgresql_types.py +0 -0
  277. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/sql/selectable.py +0 -0
  278. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/sql/sqlite/__init__.py +0 -0
  279. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/sql/sqlite/base.py +0 -0
  280. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/sql/sqlite/types.py +0 -0
  281. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/sql/sqlite/vector.py +0 -0
  282. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/sql/types.py +0 -0
  283. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/sql/utils.py +0 -0
  284. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/studio.py +0 -0
  285. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/telemetry.py +0 -0
  286. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/toolkit/__init__.py +0 -0
  287. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/toolkit/split.py +0 -0
  288. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/torch/__init__.py +0 -0
  289. {datachain-0.36.6 → datachain-0.37.0}/src/datachain/utils.py +0 -0
  290. {datachain-0.36.6 → datachain-0.37.0}/src/datachain.egg-info/dependency_links.txt +0 -0
  291. {datachain-0.36.6 → datachain-0.37.0}/src/datachain.egg-info/entry_points.txt +0 -0
  292. {datachain-0.36.6 → datachain-0.37.0}/src/datachain.egg-info/requires.txt +0 -0
  293. {datachain-0.36.6 → datachain-0.37.0}/src/datachain.egg-info/top_level.txt +0 -0
  294. {datachain-0.36.6 → datachain-0.37.0}/tests/__init__.py +0 -0
  295. {datachain-0.36.6 → datachain-0.37.0}/tests/benchmarks/__init__.py +0 -0
  296. {datachain-0.36.6 → datachain-0.37.0}/tests/benchmarks/conftest.py +0 -0
  297. {datachain-0.36.6 → datachain-0.37.0}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  298. {datachain-0.36.6 → datachain-0.37.0}/tests/benchmarks/datasets/.dvc/config +0 -0
  299. {datachain-0.36.6 → datachain-0.37.0}/tests/benchmarks/datasets/.gitignore +0 -0
  300. {datachain-0.36.6 → datachain-0.37.0}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  301. {datachain-0.36.6 → datachain-0.37.0}/tests/benchmarks/test_datachain.py +0 -0
  302. {datachain-0.36.6 → datachain-0.37.0}/tests/benchmarks/test_ls.py +0 -0
  303. {datachain-0.36.6 → datachain-0.37.0}/tests/benchmarks/test_version.py +0 -0
  304. {datachain-0.36.6 → datachain-0.37.0}/tests/data.py +0 -0
  305. {datachain-0.36.6 → datachain-0.37.0}/tests/examples/__init__.py +0 -0
  306. {datachain-0.36.6 → datachain-0.37.0}/tests/examples/test_examples.py +0 -0
  307. {datachain-0.36.6 → datachain-0.37.0}/tests/examples/test_wds_e2e.py +0 -0
  308. {datachain-0.36.6 → datachain-0.37.0}/tests/examples/wds_data.py +0 -0
  309. {datachain-0.36.6 → datachain-0.37.0}/tests/func/__init__.py +0 -0
  310. {datachain-0.36.6 → datachain-0.37.0}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  311. {datachain-0.36.6 → datachain-0.37.0}/tests/func/data/lena.jpg +0 -0
  312. {datachain-0.36.6 → datachain-0.37.0}/tests/func/fake-service-account-credentials.json +0 -0
  313. {datachain-0.36.6 → datachain-0.37.0}/tests/func/functions/__init__.py +0 -0
  314. {datachain-0.36.6 → datachain-0.37.0}/tests/func/functions/test_aggregate.py +0 -0
  315. {datachain-0.36.6 → datachain-0.37.0}/tests/func/functions/test_array.py +0 -0
  316. {datachain-0.36.6 → datachain-0.37.0}/tests/func/functions/test_conditional.py +0 -0
  317. {datachain-0.36.6 → datachain-0.37.0}/tests/func/functions/test_numeric.py +0 -0
  318. {datachain-0.36.6 → datachain-0.37.0}/tests/func/functions/test_path.py +0 -0
  319. {datachain-0.36.6 → datachain-0.37.0}/tests/func/functions/test_random.py +0 -0
  320. {datachain-0.36.6 → datachain-0.37.0}/tests/func/functions/test_string.py +0 -0
  321. {datachain-0.36.6 → datachain-0.37.0}/tests/func/model/__init__.py +0 -0
  322. {datachain-0.36.6 → datachain-0.37.0}/tests/func/model/data/running-mask0.png +0 -0
  323. {datachain-0.36.6 → datachain-0.37.0}/tests/func/model/data/running-mask1.png +0 -0
  324. {datachain-0.36.6 → datachain-0.37.0}/tests/func/model/data/running.jpg +0 -0
  325. {datachain-0.36.6 → datachain-0.37.0}/tests/func/model/data/ships.jpg +0 -0
  326. {datachain-0.36.6 → datachain-0.37.0}/tests/func/model/test_yolo.py +0 -0
  327. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_audio.py +0 -0
  328. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_catalog.py +0 -0
  329. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_client.py +0 -0
  330. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_cloud_transfer.py +0 -0
  331. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_data_storage.py +0 -0
  332. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_datachain_merge.py +0 -0
  333. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_dataset_query.py +0 -0
  334. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_delta.py +0 -0
  335. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_feature_pickling.py +0 -0
  336. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_file.py +0 -0
  337. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_hf.py +0 -0
  338. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_hidden_field.py +0 -0
  339. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_image.py +0 -0
  340. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_listing.py +0 -0
  341. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_ls.py +0 -0
  342. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_meta_formats.py +0 -0
  343. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_metastore.py +0 -0
  344. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_metrics.py +0 -0
  345. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_mutate.py +0 -0
  346. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_pull.py +0 -0
  347. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_pytorch.py +0 -0
  348. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_query.py +0 -0
  349. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_read_database.py +0 -0
  350. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_read_dataset_remote.py +0 -0
  351. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  352. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_retry.py +0 -0
  353. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_storage_pattern.py +0 -0
  354. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_studio_datetime_parsing.py +0 -0
  355. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_temp_table_tracking.py +0 -0
  356. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_to_database.py +0 -0
  357. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_toolkit.py +0 -0
  358. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_udf.py +0 -0
  359. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_union.py +0 -0
  360. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_video.py +0 -0
  361. {datachain-0.36.6 → datachain-0.37.0}/tests/func/test_warehouse.py +0 -0
  362. {datachain-0.36.6 → datachain-0.37.0}/tests/scripts/feature_class.py +0 -0
  363. {datachain-0.36.6 → datachain-0.37.0}/tests/scripts/feature_class_parallel.py +0 -0
  364. {datachain-0.36.6 → datachain-0.37.0}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  365. {datachain-0.36.6 → datachain-0.37.0}/tests/scripts/name_len_slow.py +0 -0
  366. {datachain-0.36.6 → datachain-0.37.0}/tests/test_cli_e2e.py +0 -0
  367. {datachain-0.36.6 → datachain-0.37.0}/tests/test_cli_studio.py +0 -0
  368. {datachain-0.36.6 → datachain-0.37.0}/tests/test_import_time.py +0 -0
  369. {datachain-0.36.6 → datachain-0.37.0}/tests/test_telemetry.py +0 -0
  370. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/__init__.py +0 -0
  371. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/__init__.py +0 -0
  372. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/conftest.py +0 -0
  373. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_arrow.py +0 -0
  374. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_audio.py +0 -0
  375. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_clip.py +0 -0
  376. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_datachain.py +0 -0
  377. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  378. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_datachain_merge.py +0 -0
  379. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_diff.py +0 -0
  380. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_feature.py +0 -0
  381. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_feature_utils.py +0 -0
  382. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_file.py +0 -0
  383. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_hf.py +0 -0
  384. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_image.py +0 -0
  385. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_listing_info.py +0 -0
  386. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_namespace.py +0 -0
  387. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_partition_by.py +0 -0
  388. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_project.py +0 -0
  389. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_python_to_sql.py +0 -0
  390. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_schema.py +0 -0
  391. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_settings.py +0 -0
  392. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_signal_schema.py +0 -0
  393. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_sql_to_python.py +0 -0
  394. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_storage_pattern.py +0 -0
  395. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_text.py +0 -0
  396. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_udf.py +0 -0
  397. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_udf_signature.py +0 -0
  398. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_utils.py +0 -0
  399. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/lib/test_webdataset.py +0 -0
  400. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/model/__init__.py +0 -0
  401. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/model/test_bbox.py +0 -0
  402. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/model/test_pose.py +0 -0
  403. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/model/test_segment.py +0 -0
  404. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/model/test_utils.py +0 -0
  405. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/sql/__init__.py +0 -0
  406. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/sql/sqlite/__init__.py +0 -0
  407. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/sql/sqlite/test_types.py +0 -0
  408. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/sql/sqlite/test_utils.py +0 -0
  409. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/sql/test_array.py +0 -0
  410. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/sql/test_conditional.py +0 -0
  411. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/sql/test_path.py +0 -0
  412. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/sql/test_random.py +0 -0
  413. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/sql/test_selectable.py +0 -0
  414. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/sql/test_string.py +0 -0
  415. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_asyn.py +0 -0
  416. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_batching.py +0 -0
  417. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_cache.py +0 -0
  418. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_catalog.py +0 -0
  419. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_catalog_loader.py +0 -0
  420. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_cli_datasets.py +0 -0
  421. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_cli_parsing.py +0 -0
  422. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_client.py +0 -0
  423. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_client_gcs.py +0 -0
  424. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_client_http.py +0 -0
  425. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_client_s3.py +0 -0
  426. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_config.py +0 -0
  427. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_data_storage.py +0 -0
  428. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_database_engine.py +0 -0
  429. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_datachain_hash.py +0 -0
  430. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_dataset.py +0 -0
  431. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_dispatch.py +0 -0
  432. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_fileslice.py +0 -0
  433. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_func.py +0 -0
  434. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_hash_utils.py +0 -0
  435. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_listing.py +0 -0
  436. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_metastore.py +0 -0
  437. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_module_exports.py +0 -0
  438. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_pytorch.py +0 -0
  439. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_query.py +0 -0
  440. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_query_metrics.py +0 -0
  441. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_query_params.py +0 -0
  442. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_query_steps_hash.py +0 -0
  443. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_script_meta.py +0 -0
  444. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_semver.py +0 -0
  445. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_serializer.py +0 -0
  446. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_session.py +0 -0
  447. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_utils.py +0 -0
  448. {datachain-0.36.6 → datachain-0.37.0}/tests/unit/test_warehouse.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.36.6
3
+ Version: 0.37.0
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -0,0 +1,207 @@
1
+ # Checkpoints
2
+
3
+ Checkpoints allow DataChain to automatically skip re-creating datasets that were successfully saved in previous script runs. When a script fails or is interrupted, you can re-run it and DataChain will resume from where it left off, reusing datasets that were already created.
4
+
5
+ **Note:** Checkpoints are currently available only for local script runs. Support for Studio is planned for future releases.
6
+
7
+ ## How Checkpoints Work
8
+
9
+ When you run a Python script locally (e.g., `python my_script.py`), DataChain automatically:
10
+
11
+ 1. **Creates a job** for the script execution, using the script's absolute path as the job name
12
+ 2. **Tracks parent jobs** by finding the last job with the same script name
13
+ 3. **Calculates hashes** for each dataset save operation based on the DataChain operations chain
14
+ 4. **Creates checkpoints** after each successful `.save()` call, storing the hash
15
+ 5. **Checks for existing checkpoints** on subsequent runs - if a matching checkpoint exists in the parent job, DataChain skips the save and reuses the existing dataset
16
+
17
+ This means that if your script creates multiple datasets and fails partway through, the next run will skip recreating the datasets that were already successfully saved.
18
+
19
+ ## Example
20
+
21
+ Consider this script that processes data in multiple stages:
22
+
23
+ ```python
24
+ import datachain as dc
25
+
26
+ # Stage 1: Load and filter data
27
+ filtered = (
28
+ dc.read_csv("s3://mybucket/data.csv")
29
+ .filter(dc.C("score") > 0.5)
30
+ .save("filtered_data")
31
+ )
32
+
33
+ # Stage 2: Transform data
34
+ transformed = (
35
+ filtered
36
+ .map(value=lambda x: x * 2, output=float)
37
+ .save("transformed_data")
38
+ )
39
+
40
+ # Stage 3: Aggregate results
41
+ result = (
42
+ transformed
43
+ .agg(
44
+ total=lambda values: sum(values),
45
+ partition_by="category",
46
+ )
47
+ .save("final_results")
48
+ )
49
+ ```
50
+
51
+ **First run:** The script executes all three stages and creates three datasets: `filtered_data`, `transformed_data`, and `final_results`. If the script fails during Stage 3, only `filtered_data` and `transformed_data` are saved.
52
+
53
+ **Second run:** DataChain detects that `filtered_data` and `transformed_data` were already created in the parent job with matching hashes. It skips recreating them and proceeds directly to Stage 3, creating only `final_results`.
54
+
55
+ ## When Checkpoints Are Used
56
+
57
+ Checkpoints are automatically used when:
58
+
59
+ - Running a Python script locally (e.g., `python my_script.py`)
60
+ - The script has been run before
61
+ - A dataset with the same name is being saved
62
+ - The chain hash matches a checkpoint from the parent job
63
+
64
+ Checkpoints are **not** used when:
65
+
66
+ - Running code interactively (Python REPL, Jupyter notebooks)
67
+ - Running code as a module (e.g., `python -m mymodule`)
68
+ - The `DATACHAIN_CHECKPOINTS_RESET` environment variable is set (see below)
69
+ - Running on Studio (checkpoints support planned for future releases)
70
+
71
+ ## Resetting Checkpoints
72
+
73
+ To ignore existing checkpoints and run your script from scratch, set the `DATACHAIN_CHECKPOINTS_RESET` environment variable:
74
+
75
+ ```bash
76
+ export DATACHAIN_CHECKPOINTS_RESET=1
77
+ python my_script.py
78
+ ```
79
+
80
+ Or set it inline:
81
+
82
+ ```bash
83
+ DATACHAIN_CHECKPOINTS_RESET=1 python my_script.py
84
+ ```
85
+
86
+ This forces DataChain to recreate all datasets, regardless of existing checkpoints.
87
+
88
+ ## How Job Names Are Determined
89
+
90
+ DataChain uses different strategies for naming jobs depending on how the code is executed:
91
+
92
+ ### Script Execution (Checkpoints Enabled)
93
+
94
+ When running `python my_script.py`, DataChain uses the **absolute path** to the script as the job name:
95
+
96
+ ```
97
+ /home/user/projects/my_script.py
98
+ ```
99
+
100
+ This allows DataChain to link runs of the same script together as parent-child jobs, enabling checkpoint lookup.
101
+
102
+ ### Interactive or Module Execution (Checkpoints Disabled)
103
+
104
+ When running code interactively or as a module, DataChain uses a **unique UUID** as the job name:
105
+
106
+ ```
107
+ a1b2c3d4-e5f6-7890-abcd-ef1234567890
108
+ ```
109
+
110
+ This prevents unrelated executions from being linked together, but also means checkpoints cannot be used.
111
+
112
+ ## How Checkpoint Hashes Are Calculated
113
+
114
+ For each `.save()` operation, DataChain calculates a hash based on:
115
+
116
+ 1. The hash of the previous checkpoint in the current job (if any)
117
+ 2. The hash of the current DataChain operations chain
118
+
119
+ This creates a chain of hashes that uniquely identifies each stage of data processing. On subsequent runs, DataChain matches these hashes against the parent job's checkpoints and skips recreating datasets where the hashes match.
120
+
121
+ ### Hash Invalidation
122
+
123
+ **Checkpoints are automatically invalidated when you modify the chain.** Any change to the DataChain operations will result in a different hash, causing DataChain to skip the checkpoint and recompute the dataset.
124
+
125
+ Changes that invalidate checkpoints include:
126
+
127
+ - **Modifying filter conditions:** `.filter(dc.C("score") > 0.5)` → `.filter(dc.C("score") > 0.8)`
128
+ - **Changing map/gen/agg functions:** Any modification to UDF logic
129
+ - **Altering function parameters:** Changes to column names, output types, or other parameters
130
+ - **Adding or removing operations:** Inserting new `.filter()`, `.map()`, or other steps
131
+ - **Reordering operations:** Changing the sequence of transformations
132
+
133
+ ### Example
134
+
135
+ ```python
136
+ # First run - creates three checkpoints
137
+ dc.read_csv("data.csv").save("stage1") # Hash = H1
138
+
139
+ dc.read_dataset("stage1").filter(dc.C("x") > 5).save("stage2") # Hash = H2 = hash(H1 + pipeline_hash)
140
+
141
+ dc.read_dataset("stage2").select("name", "value").save("stage3") # Hash = H3 = hash(H2 + pipeline_hash)
142
+ ```
143
+
144
+ **Second run (no changes):**
145
+ - All three hashes match → all three datasets are reused → no computation
146
+
147
+ **Second run (modified filter):**
148
+ ```python
149
+ dc.read_csv("data.csv").save("stage1") # Hash = H1 matches ✓ → reused
150
+
151
+ dc.read_dataset("stage1").filter(dc.C("x") > 10).save("stage2") # Hash ≠ H2 ✗ → recomputed
152
+
153
+ dc.read_dataset("stage2").select("name", "value").save("stage3") # Hash ≠ H3 ✗ → recomputed
154
+ ```
155
+
156
+ Because the filter changed, `stage2` has a different hash and must be recomputed. Since `stage3` depends on `stage2`, its hash also changes (because it includes H2 in the calculation), so it must be recomputed as well.
157
+
158
+ **Key insight:** Modifying any step in the chain invalidates that checkpoint and all subsequent checkpoints, because the hash chain is broken.
159
+
160
+ ## Dataset Persistence
161
+
162
+ Starting with the checkpoints feature, datasets created during script execution persist even if the script fails or is interrupted. This is essential for checkpoint functionality, as it allows subsequent runs to reuse successfully created datasets.
163
+
164
+ If you need to clean up datasets from failed runs, you can use:
165
+
166
+ ```python
167
+ import datachain as dc
168
+
169
+ # Remove a specific dataset
170
+ dc.delete_dataset("dataset_name")
171
+
172
+ # List all datasets to see what's available
173
+ for ds in dc.datasets():
174
+ print(ds.name)
175
+ ```
176
+
177
+ ## Limitations
178
+
179
+ - **Local only:** Checkpoints currently work only for local script runs. Studio support is planned.
180
+ - **Script-based:** Code must be run as a script (not interactively or as a module).
181
+ - **Hash-based matching:** Any change to the chain will create a different hash, preventing checkpoint reuse.
182
+ - **Same script path:** The script must be run from the same absolute path for parent job linking to work.
183
+
184
+ ## Future Plans
185
+
186
+ ### Studio Support
187
+
188
+ Support for checkpoints on Studio is planned for future releases, which will enable checkpoint functionality for collaborative workflows and cloud-based data processing.
189
+
190
+ ### UDF-Level Checkpoints
191
+
192
+ Currently, checkpoints are created only when datasets are saved using `.save()`. This means that if a script fails during a long-running UDF operation (like `.map()`, `.gen()`, or `.agg()`), the entire UDF computation must be rerun on the next execution.
193
+
194
+ Future versions will support **UDF-level checkpoints**, creating checkpoints after each UDF step in the chain. This will provide much more granular recovery:
195
+
196
+ ```python
197
+ # Future behavior with UDF-level checkpoints
198
+ result = (
199
+ dc.read_csv("data.csv")
200
+ .map(heavy_computation_1) # Checkpoint created after this UDF
201
+ .map(heavy_computation_2) # Checkpoint created after this UDF
202
+ .map(heavy_computation_3) # Checkpoint created after this UDF
203
+ .save("result")
204
+ )
205
+ ```
206
+
207
+ If the script fails during `heavy_computation_3`, the next run will skip re-executing `heavy_computation_1` and `heavy_computation_2`, resuming only the work that wasn't completed.
@@ -10,6 +10,7 @@ Welcome to the DataChain User Guide! This section provides comprehensive documen
10
10
  - [Data Processing Overview](./processing.md) - Discover DataChain's specialized data processing features.
11
11
  - [Delta Processing](./delta.md) - Incremental data processing to efficiently handle large datasets that change over time.
12
12
  - [Error Handling and Retries](./retry.md) - Learn how to handle processing errors and selectively reprocess problematic records.
13
+ - [Checkpoints](./checkpoints.md) - Automatically resume script execution from where it left off after failures.
13
14
  - [Environment Variables](./env.md) - Configure DataChain's behavior using environment variables.
14
15
  - [Namespaces](./namespaces.md) - Learn more about namespaces and projects.
15
16
  - [Local DB Migrations](./namespaces.md) - Learn how to handle local DB migrations after upgrading datachain.
@@ -114,6 +114,7 @@ nav:
114
114
  - Overview: guide/processing.md
115
115
  - Delta Processing: guide/delta.md
116
116
  - Errors Handling and Retries: guide/retry.md
117
+ - Checkpoints: guide/checkpoints.md
117
118
  - Environment Variables: guide/env.md
118
119
  - Namespaces: guide/namespaces.md
119
120
  - Local DB Migrations: guide/db_migrations.md
@@ -793,6 +793,7 @@ class Catalog:
793
793
  description: str | None = None,
794
794
  attrs: list[str] | None = None,
795
795
  update_version: str | None = "patch",
796
+ job_id: str | None = None,
796
797
  ) -> "DatasetRecord":
797
798
  """
798
799
  Creates new dataset of a specific version.
@@ -866,6 +867,7 @@ class Catalog:
866
867
  create_rows_table=create_rows,
867
868
  columns=columns,
868
869
  uuid=uuid,
870
+ job_id=job_id,
869
871
  )
870
872
 
871
873
  def create_new_dataset_version(
@@ -448,6 +448,10 @@ class AbstractMetastore(ABC, Serializable):
448
448
  def get_job_status(self, job_id: str) -> JobStatus | None:
449
449
  """Returns the status of the given job."""
450
450
 
451
+ @abstractmethod
452
+ def get_last_job_by_name(self, name: str, conn=None) -> "Job | None":
453
+ """Returns the last job with the given name, ordered by created_at."""
454
+
451
455
  #
452
456
  # Checkpoints
453
457
  #
@@ -1685,6 +1689,18 @@ class AbstractDBMetastore(AbstractMetastore):
1685
1689
  query = self._jobs_query().where(self._jobs.c.id.in_(ids))
1686
1690
  yield from self._parse_jobs(self.db.execute(query, conn=conn))
1687
1691
 
1692
+ def get_last_job_by_name(self, name: str, conn=None) -> "Job | None":
1693
+ query = (
1694
+ self._jobs_query()
1695
+ .where(self._jobs.c.name == name)
1696
+ .order_by(self._jobs.c.created_at.desc())
1697
+ .limit(1)
1698
+ )
1699
+ results = list(self.db.execute(query, conn=conn))
1700
+ if not results:
1701
+ return None
1702
+ return self._parse_job(results[0])
1703
+
1688
1704
  def create_job(
1689
1705
  self,
1690
1706
  name: str,
@@ -56,5 +56,5 @@ class Job:
56
56
  python_version,
57
57
  error_message,
58
58
  error_stack,
59
- parent_job_id,
59
+ str(parent_job_id) if parent_job_id else None,
60
60
  )
@@ -27,7 +27,6 @@ from datachain import semver
27
27
  from datachain.dataset import DatasetRecord
28
28
  from datachain.delta import delta_disabled
29
29
  from datachain.error import (
30
- JobNotFoundError,
31
30
  ProjectCreateNotAllowedError,
32
31
  ProjectNotFoundError,
33
32
  )
@@ -627,6 +626,9 @@ class DataChain:
627
626
  self._validate_version(version)
628
627
  self._validate_update_version(update_version)
629
628
 
629
+ # get existing job if running in SaaS, or creating new one if running locally
630
+ job = self.session.get_or_create_job()
631
+
630
632
  namespace_name, project_name, name = catalog.get_full_dataset_name(
631
633
  name,
632
634
  namespace_name=self._settings.namespace,
@@ -635,7 +637,7 @@ class DataChain:
635
637
  project = self._get_or_create_project(namespace_name, project_name)
636
638
 
637
639
  # Checkpoint handling
638
- job, _hash, result = self._resolve_checkpoint(name, project, kwargs)
640
+ _hash, result = self._resolve_checkpoint(name, project, job, kwargs)
639
641
 
640
642
  # Schema preparation
641
643
  schema = self.signals_schema.clone_without_sys_signals().serialize()
@@ -655,13 +657,12 @@ class DataChain:
655
657
  attrs=attrs,
656
658
  feature_schema=schema,
657
659
  update_version=update_version,
660
+ job_id=job.id,
658
661
  **kwargs,
659
662
  )
660
663
  )
661
664
 
662
- if job:
663
- catalog.metastore.create_checkpoint(job.id, _hash) # type: ignore[arg-type]
664
-
665
+ catalog.metastore.create_checkpoint(job.id, _hash) # type: ignore[arg-type]
665
666
  return result
666
667
 
667
668
  def _validate_version(self, version: str | None) -> None:
@@ -690,23 +691,15 @@ class DataChain:
690
691
  self,
691
692
  name: str,
692
693
  project: Project,
694
+ job: Job,
693
695
  kwargs: dict,
694
- ) -> tuple[Job | None, str | None, "DataChain | None"]:
696
+ ) -> tuple[str, "DataChain | None"]:
695
697
  """Check if checkpoint exists and return cached dataset if possible."""
696
698
  from .datasets import read_dataset
697
699
 
698
700
  metastore = self.session.catalog.metastore
699
-
700
- job_id = os.getenv("DATACHAIN_JOB_ID")
701
701
  checkpoints_reset = env2bool("DATACHAIN_CHECKPOINTS_RESET", undefined=True)
702
702
 
703
- if not job_id:
704
- return None, None, None
705
-
706
- job = metastore.get_job(job_id)
707
- if not job:
708
- raise JobNotFoundError(f"Job with id {job_id} not found")
709
-
710
703
  _hash = self._calculate_job_hash(job.id)
711
704
 
712
705
  if (
@@ -718,9 +711,9 @@ class DataChain:
718
711
  chain = read_dataset(
719
712
  name, namespace=project.namespace.name, project=project.name, **kwargs
720
713
  )
721
- return job, _hash, chain
714
+ return _hash, chain
722
715
 
723
- return job, _hash, None
716
+ return _hash, None
724
717
 
725
718
  def _handle_delta(
726
719
  self,
@@ -78,8 +78,6 @@ def read_records(
78
78
  ),
79
79
  )
80
80
 
81
- session.add_dataset_version(dsr, dsr.latest_version)
82
-
83
81
  if isinstance(to_insert, dict):
84
82
  to_insert = [to_insert]
85
83
  elif not to_insert:
@@ -1927,10 +1927,6 @@ class DatasetQuery:
1927
1927
  )
1928
1928
  version = version or dataset.latest_version
1929
1929
 
1930
- self.session.add_dataset_version(
1931
- dataset=dataset, version=version, listing=kwargs.get("listing", False)
1932
- )
1933
-
1934
1930
  dr = self.catalog.warehouse.dataset_rows(dataset)
1935
1931
 
1936
1932
  self.catalog.warehouse.copy_table(dr.get_table(), query.select())