datachain 0.32.3__tar.gz → 0.33.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (438) hide show
  1. {datachain-0.32.3 → datachain-0.33.1}/.pre-commit-config.yaml +1 -1
  2. {datachain-0.32.3 → datachain-0.33.1}/PKG-INFO +3 -2
  3. {datachain-0.32.3 → datachain-0.33.1}/pyproject.toml +2 -1
  4. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/catalog/catalog.py +58 -22
  5. datachain-0.33.1/src/datachain/checkpoint.py +44 -0
  6. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/client/fsspec.py +6 -1
  7. datachain-0.33.1/src/datachain/client/http.py +157 -0
  8. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/data_storage/job.py +1 -0
  9. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/data_storage/metastore.py +137 -0
  10. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/data_storage/schema.py +1 -1
  11. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/data_storage/sqlite.py +8 -0
  12. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/diff/__init__.py +7 -13
  13. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/error.py +4 -0
  14. datachain-0.33.1/src/datachain/hash_utils.py +147 -0
  15. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/dc/datachain.py +21 -1
  16. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/signal_schema.py +7 -0
  17. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/udf.py +20 -0
  18. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/query/dataset.py +107 -0
  19. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/utils.py +6 -0
  20. {datachain-0.32.3 → datachain-0.33.1}/src/datachain.egg-info/PKG-INFO +3 -2
  21. {datachain-0.32.3 → datachain-0.33.1}/src/datachain.egg-info/SOURCES.txt +7 -0
  22. {datachain-0.32.3 → datachain-0.33.1}/src/datachain.egg-info/requires.txt +2 -1
  23. {datachain-0.32.3 → datachain-0.33.1}/tests/conftest.py +20 -2
  24. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_datachain.py +12 -0
  25. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_diff.py +41 -0
  26. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_signal_schema.py +24 -0
  27. datachain-0.33.1/tests/unit/test_client_http.py +186 -0
  28. datachain-0.33.1/tests/unit/test_datachain_hash.py +173 -0
  29. datachain-0.33.1/tests/unit/test_hash_utils.py +109 -0
  30. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_query.py +22 -3
  31. datachain-0.33.1/tests/unit/test_query_steps_hash.py +505 -0
  32. {datachain-0.32.3 → datachain-0.33.1}/.cruft.json +0 -0
  33. {datachain-0.32.3 → datachain-0.33.1}/.gitattributes +0 -0
  34. {datachain-0.32.3 → datachain-0.33.1}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  35. {datachain-0.32.3 → datachain-0.33.1}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  36. {datachain-0.32.3 → datachain-0.33.1}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  37. {datachain-0.32.3 → datachain-0.33.1}/.github/codecov.yaml +0 -0
  38. {datachain-0.32.3 → datachain-0.33.1}/.github/dependabot.yml +0 -0
  39. {datachain-0.32.3 → datachain-0.33.1}/.github/workflows/benchmarks.yml +0 -0
  40. {datachain-0.32.3 → datachain-0.33.1}/.github/workflows/release.yml +0 -0
  41. {datachain-0.32.3 → datachain-0.33.1}/.github/workflows/tests-studio.yml +0 -0
  42. {datachain-0.32.3 → datachain-0.33.1}/.github/workflows/tests.yml +0 -0
  43. {datachain-0.32.3 → datachain-0.33.1}/.github/workflows/update-template.yaml +0 -0
  44. {datachain-0.32.3 → datachain-0.33.1}/.gitignore +0 -0
  45. {datachain-0.32.3 → datachain-0.33.1}/CODE_OF_CONDUCT.rst +0 -0
  46. {datachain-0.32.3 → datachain-0.33.1}/LICENSE +0 -0
  47. {datachain-0.32.3 → datachain-0.33.1}/README.rst +0 -0
  48. {datachain-0.32.3 → datachain-0.33.1}/docs/api_hooks.py +0 -0
  49. {datachain-0.32.3 → datachain-0.33.1}/docs/assets/captioned_cartoons.png +0 -0
  50. {datachain-0.32.3 → datachain-0.33.1}/docs/assets/datachain-white.svg +0 -0
  51. {datachain-0.32.3 → datachain-0.33.1}/docs/assets/datachain.svg +0 -0
  52. {datachain-0.32.3 → datachain-0.33.1}/docs/assets/webhook_dialog.png +0 -0
  53. {datachain-0.32.3 → datachain-0.33.1}/docs/assets/webhook_list.png +0 -0
  54. {datachain-0.32.3 → datachain-0.33.1}/docs/commands/auth/login.md +0 -0
  55. {datachain-0.32.3 → datachain-0.33.1}/docs/commands/auth/logout.md +0 -0
  56. {datachain-0.32.3 → datachain-0.33.1}/docs/commands/auth/team.md +0 -0
  57. {datachain-0.32.3 → datachain-0.33.1}/docs/commands/auth/token.md +0 -0
  58. {datachain-0.32.3 → datachain-0.33.1}/docs/commands/index.md +0 -0
  59. {datachain-0.32.3 → datachain-0.33.1}/docs/commands/job/cancel.md +0 -0
  60. {datachain-0.32.3 → datachain-0.33.1}/docs/commands/job/clusters.md +0 -0
  61. {datachain-0.32.3 → datachain-0.33.1}/docs/commands/job/logs.md +0 -0
  62. {datachain-0.32.3 → datachain-0.33.1}/docs/commands/job/ls.md +0 -0
  63. {datachain-0.32.3 → datachain-0.33.1}/docs/commands/job/run.md +0 -0
  64. {datachain-0.32.3 → datachain-0.33.1}/docs/contributing.md +0 -0
  65. {datachain-0.32.3 → datachain-0.33.1}/docs/css/github-permalink-style.css +0 -0
  66. {datachain-0.32.3 → datachain-0.33.1}/docs/examples.md +0 -0
  67. {datachain-0.32.3 → datachain-0.33.1}/docs/guide/db_migrations.md +0 -0
  68. {datachain-0.32.3 → datachain-0.33.1}/docs/guide/delta.md +0 -0
  69. {datachain-0.32.3 → datachain-0.33.1}/docs/guide/env.md +0 -0
  70. {datachain-0.32.3 → datachain-0.33.1}/docs/guide/index.md +0 -0
  71. {datachain-0.32.3 → datachain-0.33.1}/docs/guide/namespaces.md +0 -0
  72. {datachain-0.32.3 → datachain-0.33.1}/docs/guide/processing.md +0 -0
  73. {datachain-0.32.3 → datachain-0.33.1}/docs/guide/remotes.md +0 -0
  74. {datachain-0.32.3 → datachain-0.33.1}/docs/guide/retry.md +0 -0
  75. {datachain-0.32.3 → datachain-0.33.1}/docs/index.md +0 -0
  76. {datachain-0.32.3 → datachain-0.33.1}/docs/overrides/main.html +0 -0
  77. {datachain-0.32.3 → datachain-0.33.1}/docs/quick-start.md +0 -0
  78. {datachain-0.32.3 → datachain-0.33.1}/docs/references/data-types/arrowrow.md +0 -0
  79. {datachain-0.32.3 → datachain-0.33.1}/docs/references/data-types/bbox.md +0 -0
  80. {datachain-0.32.3 → datachain-0.33.1}/docs/references/data-types/file.md +0 -0
  81. {datachain-0.32.3 → datachain-0.33.1}/docs/references/data-types/imagefile.md +0 -0
  82. {datachain-0.32.3 → datachain-0.33.1}/docs/references/data-types/index.md +0 -0
  83. {datachain-0.32.3 → datachain-0.33.1}/docs/references/data-types/pose.md +0 -0
  84. {datachain-0.32.3 → datachain-0.33.1}/docs/references/data-types/segment.md +0 -0
  85. {datachain-0.32.3 → datachain-0.33.1}/docs/references/data-types/tarvfile.md +0 -0
  86. {datachain-0.32.3 → datachain-0.33.1}/docs/references/data-types/textfile.md +0 -0
  87. {datachain-0.32.3 → datachain-0.33.1}/docs/references/data-types/videofile.md +0 -0
  88. {datachain-0.32.3 → datachain-0.33.1}/docs/references/datachain.md +0 -0
  89. {datachain-0.32.3 → datachain-0.33.1}/docs/references/func.md +0 -0
  90. {datachain-0.32.3 → datachain-0.33.1}/docs/references/functions/aggregate.md +0 -0
  91. {datachain-0.32.3 → datachain-0.33.1}/docs/references/functions/array.md +0 -0
  92. {datachain-0.32.3 → datachain-0.33.1}/docs/references/functions/conditional.md +0 -0
  93. {datachain-0.32.3 → datachain-0.33.1}/docs/references/functions/numeric.md +0 -0
  94. {datachain-0.32.3 → datachain-0.33.1}/docs/references/functions/path.md +0 -0
  95. {datachain-0.32.3 → datachain-0.33.1}/docs/references/functions/random.md +0 -0
  96. {datachain-0.32.3 → datachain-0.33.1}/docs/references/functions/string.md +0 -0
  97. {datachain-0.32.3 → datachain-0.33.1}/docs/references/functions/window.md +0 -0
  98. {datachain-0.32.3 → datachain-0.33.1}/docs/references/index.md +0 -0
  99. {datachain-0.32.3 → datachain-0.33.1}/docs/references/toolkit.md +0 -0
  100. {datachain-0.32.3 → datachain-0.33.1}/docs/references/torch.md +0 -0
  101. {datachain-0.32.3 → datachain-0.33.1}/docs/references/udf.md +0 -0
  102. {datachain-0.32.3 → datachain-0.33.1}/docs/studio/api/.gitkeep +0 -0
  103. {datachain-0.32.3 → datachain-0.33.1}/docs/studio/webhooks.md +0 -0
  104. {datachain-0.32.3 → datachain-0.33.1}/docs/templates/main.dot +0 -0
  105. {datachain-0.32.3 → datachain-0.33.1}/docs/templates/operation.dot +0 -0
  106. {datachain-0.32.3 → datachain-0.33.1}/docs/templates/responses.def +0 -0
  107. {datachain-0.32.3 → datachain-0.33.1}/docs/tutorials.md +0 -0
  108. {datachain-0.32.3 → datachain-0.33.1}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  109. {datachain-0.32.3 → datachain-0.33.1}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  110. {datachain-0.32.3 → datachain-0.33.1}/examples/computer_vision/openimage-detect.py +0 -0
  111. {datachain-0.32.3 → datachain-0.33.1}/examples/computer_vision/ultralytics-bbox.py +0 -0
  112. {datachain-0.32.3 → datachain-0.33.1}/examples/computer_vision/ultralytics-pose.py +0 -0
  113. {datachain-0.32.3 → datachain-0.33.1}/examples/computer_vision/ultralytics-segment.py +0 -0
  114. {datachain-0.32.3 → datachain-0.33.1}/examples/get_started/common_sql_functions.py +0 -0
  115. {datachain-0.32.3 → datachain-0.33.1}/examples/get_started/json-csv-reader.py +0 -0
  116. {datachain-0.32.3 → datachain-0.33.1}/examples/get_started/nested_datamodel.py +0 -0
  117. {datachain-0.32.3 → datachain-0.33.1}/examples/get_started/torch-loader.py +0 -0
  118. {datachain-0.32.3 → datachain-0.33.1}/examples/get_started/udfs/parallel.py +0 -0
  119. {datachain-0.32.3 → datachain-0.33.1}/examples/get_started/udfs/simple.py +0 -0
  120. {datachain-0.32.3 → datachain-0.33.1}/examples/get_started/udfs/stateful.py +0 -0
  121. {datachain-0.32.3 → datachain-0.33.1}/examples/incremental_processing/delta.py +0 -0
  122. {datachain-0.32.3 → datachain-0.33.1}/examples/incremental_processing/retry.py +0 -0
  123. {datachain-0.32.3 → datachain-0.33.1}/examples/incremental_processing/utils.py +0 -0
  124. {datachain-0.32.3 → datachain-0.33.1}/examples/llm_and_nlp/claude-query.py +0 -0
  125. {datachain-0.32.3 → datachain-0.33.1}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  126. {datachain-0.32.3 → datachain-0.33.1}/examples/multimodal/audio-to-text.py +0 -0
  127. {datachain-0.32.3 → datachain-0.33.1}/examples/multimodal/clip_inference.py +0 -0
  128. {datachain-0.32.3 → datachain-0.33.1}/examples/multimodal/hf_pipeline.py +0 -0
  129. {datachain-0.32.3 → datachain-0.33.1}/examples/multimodal/openai_image_desc_lib.py +0 -0
  130. {datachain-0.32.3 → datachain-0.33.1}/examples/multimodal/wds.py +0 -0
  131. {datachain-0.32.3 → datachain-0.33.1}/examples/multimodal/wds_filtered.py +0 -0
  132. {datachain-0.32.3 → datachain-0.33.1}/mkdocs.yml +0 -0
  133. {datachain-0.32.3 → datachain-0.33.1}/noxfile.py +0 -0
  134. {datachain-0.32.3 → datachain-0.33.1}/setup.cfg +0 -0
  135. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/__init__.py +0 -0
  136. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/__main__.py +0 -0
  137. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/asyn.py +0 -0
  138. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/cache.py +0 -0
  139. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/catalog/__init__.py +0 -0
  140. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/catalog/datasource.py +0 -0
  141. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/catalog/loader.py +0 -0
  142. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/cli/__init__.py +0 -0
  143. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/cli/commands/__init__.py +0 -0
  144. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/cli/commands/datasets.py +0 -0
  145. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/cli/commands/du.py +0 -0
  146. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/cli/commands/index.py +0 -0
  147. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/cli/commands/ls.py +0 -0
  148. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/cli/commands/misc.py +0 -0
  149. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/cli/commands/query.py +0 -0
  150. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/cli/commands/show.py +0 -0
  151. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/cli/parser/__init__.py +0 -0
  152. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/cli/parser/job.py +0 -0
  153. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/cli/parser/studio.py +0 -0
  154. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/cli/parser/utils.py +0 -0
  155. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/cli/utils.py +0 -0
  156. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/client/__init__.py +0 -0
  157. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/client/azure.py +0 -0
  158. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/client/fileslice.py +0 -0
  159. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/client/gcs.py +0 -0
  160. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/client/hf.py +0 -0
  161. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/client/local.py +0 -0
  162. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/client/s3.py +0 -0
  163. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/config.py +0 -0
  164. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/data_storage/__init__.py +0 -0
  165. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/data_storage/db_engine.py +0 -0
  166. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/data_storage/serializer.py +0 -0
  167. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/data_storage/warehouse.py +0 -0
  168. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/dataset.py +0 -0
  169. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/delta.py +0 -0
  170. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/fs/__init__.py +0 -0
  171. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/fs/reference.py +0 -0
  172. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/fs/utils.py +0 -0
  173. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/func/__init__.py +0 -0
  174. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/func/aggregate.py +0 -0
  175. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/func/array.py +0 -0
  176. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/func/base.py +0 -0
  177. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/func/conditional.py +0 -0
  178. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/func/func.py +0 -0
  179. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/func/numeric.py +0 -0
  180. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/func/path.py +0 -0
  181. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/func/random.py +0 -0
  182. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/func/string.py +0 -0
  183. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/func/window.py +0 -0
  184. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/job.py +0 -0
  185. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/__init__.py +0 -0
  186. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/arrow.py +0 -0
  187. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/audio.py +0 -0
  188. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/clip.py +0 -0
  189. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/convert/__init__.py +0 -0
  190. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/convert/flatten.py +0 -0
  191. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/convert/python_to_sql.py +0 -0
  192. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/convert/sql_to_python.py +0 -0
  193. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/convert/unflatten.py +0 -0
  194. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  195. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/data_model.py +0 -0
  196. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/dataset_info.py +0 -0
  197. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/dc/__init__.py +0 -0
  198. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/dc/csv.py +0 -0
  199. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/dc/database.py +0 -0
  200. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/dc/datasets.py +0 -0
  201. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/dc/hf.py +0 -0
  202. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/dc/json.py +0 -0
  203. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/dc/listings.py +0 -0
  204. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/dc/pandas.py +0 -0
  205. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/dc/parquet.py +0 -0
  206. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/dc/records.py +0 -0
  207. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/dc/storage.py +0 -0
  208. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/dc/storage_pattern.py +0 -0
  209. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/dc/utils.py +0 -0
  210. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/dc/values.py +0 -0
  211. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/file.py +0 -0
  212. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/hf.py +0 -0
  213. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/image.py +0 -0
  214. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/listing.py +0 -0
  215. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/listing_info.py +0 -0
  216. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/meta_formats.py +0 -0
  217. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/model_store.py +0 -0
  218. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/namespaces.py +0 -0
  219. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/projects.py +0 -0
  220. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/pytorch.py +0 -0
  221. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/settings.py +0 -0
  222. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/tar.py +0 -0
  223. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/text.py +0 -0
  224. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/udf_signature.py +0 -0
  225. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/utils.py +0 -0
  226. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/video.py +0 -0
  227. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/webdataset.py +0 -0
  228. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/lib/webdataset_laion.py +0 -0
  229. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/listing.py +0 -0
  230. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/model/__init__.py +0 -0
  231. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/model/bbox.py +0 -0
  232. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/model/pose.py +0 -0
  233. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/model/segment.py +0 -0
  234. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/model/ultralytics/__init__.py +0 -0
  235. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/model/ultralytics/bbox.py +0 -0
  236. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/model/ultralytics/pose.py +0 -0
  237. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/model/ultralytics/segment.py +0 -0
  238. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/model/utils.py +0 -0
  239. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/namespace.py +0 -0
  240. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/node.py +0 -0
  241. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/nodes_fetcher.py +0 -0
  242. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/nodes_thread_pool.py +0 -0
  243. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/progress.py +0 -0
  244. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/project.py +0 -0
  245. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/py.typed +0 -0
  246. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/query/__init__.py +0 -0
  247. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/query/batch.py +0 -0
  248. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/query/dispatch.py +0 -0
  249. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/query/metrics.py +0 -0
  250. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/query/params.py +0 -0
  251. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/query/queue.py +0 -0
  252. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/query/schema.py +0 -0
  253. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/query/session.py +0 -0
  254. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/query/udf.py +0 -0
  255. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/query/utils.py +0 -0
  256. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/remote/__init__.py +0 -0
  257. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/remote/studio.py +0 -0
  258. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/script_meta.py +0 -0
  259. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/semver.py +0 -0
  260. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/sql/__init__.py +0 -0
  261. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/sql/default/__init__.py +0 -0
  262. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/sql/default/base.py +0 -0
  263. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/sql/functions/__init__.py +0 -0
  264. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/sql/functions/aggregate.py +0 -0
  265. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/sql/functions/array.py +0 -0
  266. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/sql/functions/conditional.py +0 -0
  267. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/sql/functions/numeric.py +0 -0
  268. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/sql/functions/path.py +0 -0
  269. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/sql/functions/random.py +0 -0
  270. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/sql/functions/string.py +0 -0
  271. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/sql/postgresql_dialect.py +0 -0
  272. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/sql/postgresql_types.py +0 -0
  273. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/sql/selectable.py +0 -0
  274. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/sql/sqlite/__init__.py +0 -0
  275. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/sql/sqlite/base.py +0 -0
  276. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/sql/sqlite/types.py +0 -0
  277. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/sql/sqlite/vector.py +0 -0
  278. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/sql/types.py +0 -0
  279. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/sql/utils.py +0 -0
  280. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/studio.py +0 -0
  281. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/telemetry.py +0 -0
  282. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/toolkit/__init__.py +0 -0
  283. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/toolkit/split.py +0 -0
  284. {datachain-0.32.3 → datachain-0.33.1}/src/datachain/torch/__init__.py +0 -0
  285. {datachain-0.32.3 → datachain-0.33.1}/src/datachain.egg-info/dependency_links.txt +0 -0
  286. {datachain-0.32.3 → datachain-0.33.1}/src/datachain.egg-info/entry_points.txt +0 -0
  287. {datachain-0.32.3 → datachain-0.33.1}/src/datachain.egg-info/top_level.txt +0 -0
  288. {datachain-0.32.3 → datachain-0.33.1}/tests/__init__.py +0 -0
  289. {datachain-0.32.3 → datachain-0.33.1}/tests/benchmarks/__init__.py +0 -0
  290. {datachain-0.32.3 → datachain-0.33.1}/tests/benchmarks/conftest.py +0 -0
  291. {datachain-0.32.3 → datachain-0.33.1}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  292. {datachain-0.32.3 → datachain-0.33.1}/tests/benchmarks/datasets/.dvc/config +0 -0
  293. {datachain-0.32.3 → datachain-0.33.1}/tests/benchmarks/datasets/.gitignore +0 -0
  294. {datachain-0.32.3 → datachain-0.33.1}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  295. {datachain-0.32.3 → datachain-0.33.1}/tests/benchmarks/test_datachain.py +0 -0
  296. {datachain-0.32.3 → datachain-0.33.1}/tests/benchmarks/test_ls.py +0 -0
  297. {datachain-0.32.3 → datachain-0.33.1}/tests/benchmarks/test_version.py +0 -0
  298. {datachain-0.32.3 → datachain-0.33.1}/tests/data.py +0 -0
  299. {datachain-0.32.3 → datachain-0.33.1}/tests/examples/__init__.py +0 -0
  300. {datachain-0.32.3 → datachain-0.33.1}/tests/examples/test_examples.py +0 -0
  301. {datachain-0.32.3 → datachain-0.33.1}/tests/examples/test_wds_e2e.py +0 -0
  302. {datachain-0.32.3 → datachain-0.33.1}/tests/examples/wds_data.py +0 -0
  303. {datachain-0.32.3 → datachain-0.33.1}/tests/func/__init__.py +0 -0
  304. {datachain-0.32.3 → datachain-0.33.1}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  305. {datachain-0.32.3 → datachain-0.33.1}/tests/func/data/lena.jpg +0 -0
  306. {datachain-0.32.3 → datachain-0.33.1}/tests/func/fake-service-account-credentials.json +0 -0
  307. {datachain-0.32.3 → datachain-0.33.1}/tests/func/functions/__init__.py +0 -0
  308. {datachain-0.32.3 → datachain-0.33.1}/tests/func/functions/test_aggregate.py +0 -0
  309. {datachain-0.32.3 → datachain-0.33.1}/tests/func/functions/test_array.py +0 -0
  310. {datachain-0.32.3 → datachain-0.33.1}/tests/func/functions/test_conditional.py +0 -0
  311. {datachain-0.32.3 → datachain-0.33.1}/tests/func/functions/test_numeric.py +0 -0
  312. {datachain-0.32.3 → datachain-0.33.1}/tests/func/functions/test_path.py +0 -0
  313. {datachain-0.32.3 → datachain-0.33.1}/tests/func/functions/test_random.py +0 -0
  314. {datachain-0.32.3 → datachain-0.33.1}/tests/func/functions/test_string.py +0 -0
  315. {datachain-0.32.3 → datachain-0.33.1}/tests/func/model/__init__.py +0 -0
  316. {datachain-0.32.3 → datachain-0.33.1}/tests/func/model/data/running-mask0.png +0 -0
  317. {datachain-0.32.3 → datachain-0.33.1}/tests/func/model/data/running-mask1.png +0 -0
  318. {datachain-0.32.3 → datachain-0.33.1}/tests/func/model/data/running.jpg +0 -0
  319. {datachain-0.32.3 → datachain-0.33.1}/tests/func/model/data/ships.jpg +0 -0
  320. {datachain-0.32.3 → datachain-0.33.1}/tests/func/model/test_yolo.py +0 -0
  321. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_audio.py +0 -0
  322. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_batching.py +0 -0
  323. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_catalog.py +0 -0
  324. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_client.py +0 -0
  325. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_cloud_transfer.py +0 -0
  326. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_data_storage.py +0 -0
  327. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_datachain_merge.py +0 -0
  328. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_dataset_query.py +0 -0
  329. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_datasets.py +0 -0
  330. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_delta.py +0 -0
  331. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_feature_pickling.py +0 -0
  332. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_file.py +0 -0
  333. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_hf.py +0 -0
  334. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_hidden_field.py +0 -0
  335. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_image.py +0 -0
  336. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_listing.py +0 -0
  337. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_ls.py +0 -0
  338. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_meta_formats.py +0 -0
  339. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_metastore.py +0 -0
  340. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_metrics.py +0 -0
  341. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_mutate.py +0 -0
  342. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_pull.py +0 -0
  343. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_pytorch.py +0 -0
  344. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_query.py +0 -0
  345. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_read_database.py +0 -0
  346. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_read_dataset_remote.py +0 -0
  347. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  348. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_retry.py +0 -0
  349. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_session.py +0 -0
  350. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_storage_pattern.py +0 -0
  351. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_studio_datetime_parsing.py +0 -0
  352. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_to_database.py +0 -0
  353. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_toolkit.py +0 -0
  354. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_video.py +0 -0
  355. {datachain-0.32.3 → datachain-0.33.1}/tests/func/test_warehouse.py +0 -0
  356. {datachain-0.32.3 → datachain-0.33.1}/tests/scripts/feature_class.py +0 -0
  357. {datachain-0.32.3 → datachain-0.33.1}/tests/scripts/feature_class_exception.py +0 -0
  358. {datachain-0.32.3 → datachain-0.33.1}/tests/scripts/feature_class_parallel.py +0 -0
  359. {datachain-0.32.3 → datachain-0.33.1}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  360. {datachain-0.32.3 → datachain-0.33.1}/tests/scripts/name_len_slow.py +0 -0
  361. {datachain-0.32.3 → datachain-0.33.1}/tests/test_atomicity.py +0 -0
  362. {datachain-0.32.3 → datachain-0.33.1}/tests/test_cli_e2e.py +0 -0
  363. {datachain-0.32.3 → datachain-0.33.1}/tests/test_cli_studio.py +0 -0
  364. {datachain-0.32.3 → datachain-0.33.1}/tests/test_import_time.py +0 -0
  365. {datachain-0.32.3 → datachain-0.33.1}/tests/test_query_e2e.py +0 -0
  366. {datachain-0.32.3 → datachain-0.33.1}/tests/test_telemetry.py +0 -0
  367. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/__init__.py +0 -0
  368. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/__init__.py +0 -0
  369. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/conftest.py +0 -0
  370. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_arrow.py +0 -0
  371. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_audio.py +0 -0
  372. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_clip.py +0 -0
  373. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_datachain.py +0 -0
  374. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  375. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_datachain_merge.py +0 -0
  376. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_feature.py +0 -0
  377. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_feature_utils.py +0 -0
  378. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_file.py +0 -0
  379. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_hf.py +0 -0
  380. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_image.py +0 -0
  381. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_listing_info.py +0 -0
  382. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_namespace.py +0 -0
  383. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_partition_by.py +0 -0
  384. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_project.py +0 -0
  385. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_python_to_sql.py +0 -0
  386. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_schema.py +0 -0
  387. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_settings.py +0 -0
  388. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_sql_to_python.py +0 -0
  389. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_storage_pattern.py +0 -0
  390. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_text.py +0 -0
  391. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_udf.py +0 -0
  392. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_udf_signature.py +0 -0
  393. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_utils.py +0 -0
  394. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/lib/test_webdataset.py +0 -0
  395. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/model/__init__.py +0 -0
  396. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/model/test_bbox.py +0 -0
  397. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/model/test_pose.py +0 -0
  398. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/model/test_segment.py +0 -0
  399. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/model/test_utils.py +0 -0
  400. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/sql/__init__.py +0 -0
  401. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/sql/sqlite/__init__.py +0 -0
  402. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/sql/sqlite/test_types.py +0 -0
  403. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/sql/sqlite/test_utils.py +0 -0
  404. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/sql/test_array.py +0 -0
  405. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/sql/test_conditional.py +0 -0
  406. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/sql/test_path.py +0 -0
  407. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/sql/test_random.py +0 -0
  408. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/sql/test_selectable.py +0 -0
  409. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/sql/test_string.py +0 -0
  410. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_asyn.py +0 -0
  411. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_cache.py +0 -0
  412. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_catalog.py +0 -0
  413. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_catalog_loader.py +0 -0
  414. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_cli_datasets.py +0 -0
  415. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_cli_parsing.py +0 -0
  416. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_client.py +0 -0
  417. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_client_gcs.py +0 -0
  418. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_client_s3.py +0 -0
  419. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_config.py +0 -0
  420. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_data_storage.py +0 -0
  421. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_database_engine.py +0 -0
  422. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_dataset.py +0 -0
  423. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_dispatch.py +0 -0
  424. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_fileslice.py +0 -0
  425. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_func.py +0 -0
  426. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_listing.py +0 -0
  427. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_metastore.py +0 -0
  428. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_module_exports.py +0 -0
  429. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_pytorch.py +0 -0
  430. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_query_metrics.py +0 -0
  431. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_query_params.py +0 -0
  432. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_script_meta.py +0 -0
  433. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_semver.py +0 -0
  434. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_serializer.py +0 -0
  435. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_session.py +0 -0
  436. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_utils.py +0 -0
  437. {datachain-0.32.3 → datachain-0.33.1}/tests/unit/test_warehouse.py +0 -0
  438. {datachain-0.32.3 → datachain-0.33.1}/tests/utils.py +0 -0
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.13.0'
27
+ rev: 'v0.13.1'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.32.3
3
+ Version: 0.33.1
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -86,6 +86,7 @@ Requires-Dist: psycopg2-binary>=2.9.0; extra == "postgres"
86
86
  Provides-Extra: tests
87
87
  Requires-Dist: datachain[audio,hf,postgres,remote,torch,vector,video]; extra == "tests"
88
88
  Requires-Dist: pytest<9,>=8; extra == "tests"
89
+ Requires-Dist: pytest-asyncio; extra == "tests"
89
90
  Requires-Dist: pytest-sugar>=0.9.6; extra == "tests"
90
91
  Requires-Dist: pytest-cov>=4.1.0; extra == "tests"
91
92
  Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
@@ -102,7 +103,7 @@ Requires-Dist: scipy; extra == "tests"
102
103
  Requires-Dist: ultralytics; extra == "tests"
103
104
  Provides-Extra: dev
104
105
  Requires-Dist: datachain[docs,tests]; extra == "dev"
105
- Requires-Dist: mypy==1.18.1; extra == "dev"
106
+ Requires-Dist: mypy==1.18.2; extra == "dev"
106
107
  Requires-Dist: types-python-dateutil; extra == "dev"
107
108
  Requires-Dist: types-dateparser; extra == "dev"
108
109
  Requires-Dist: types-pytz; extra == "dev"
@@ -102,6 +102,7 @@ postgres = [
102
102
  tests = [
103
103
  "datachain[torch,audio,remote,vector,hf,video,postgres]",
104
104
  "pytest>=8,<9",
105
+ "pytest-asyncio",
105
106
  "pytest-sugar>=0.9.6",
106
107
  "pytest-cov>=4.1.0",
107
108
  "pytest-mock>=3.12.0",
@@ -119,7 +120,7 @@ tests = [
119
120
  ]
120
121
  dev = [
121
122
  "datachain[docs,tests]",
122
- "mypy==1.18.1",
123
+ "mypy==1.18.2",
123
124
  "types-python-dateutil",
124
125
  "types-dateparser",
125
126
  "types-pytz",
@@ -144,19 +144,26 @@ def shutdown_process(
144
144
  return proc.wait()
145
145
 
146
146
 
147
- def _process_stream(stream: "IO[bytes]", callback: Callable[[str], None]) -> None:
147
+ def process_output(stream: IO[bytes], callback: Callable[[str], None]) -> None:
148
148
  buffer = b""
149
- while byt := stream.read(1): # Read one byte at a time
150
- buffer += byt
151
149
 
152
- if byt in (b"\n", b"\r"): # Check for newline or carriage return
153
- line = buffer.decode("utf-8")
154
- callback(line)
155
- buffer = b"" # Clear buffer for next line
150
+ try:
151
+ while byt := stream.read(1): # Read one byte at a time
152
+ buffer += byt
156
153
 
157
- if buffer: # Handle any remaining data in the buffer
158
- line = buffer.decode("utf-8")
159
- callback(line)
154
+ if byt in (b"\n", b"\r"): # Check for newline or carriage return
155
+ line = buffer.decode("utf-8", errors="replace")
156
+ callback(line)
157
+ buffer = b"" # Clear buffer for the next line
158
+
159
+ if buffer: # Handle any remaining data in the buffer
160
+ line = buffer.decode("utf-8", errors="replace")
161
+ callback(line)
162
+ finally:
163
+ try:
164
+ stream.close() # Ensure output is closed
165
+ except Exception: # noqa: BLE001, S110
166
+ pass
160
167
 
161
168
 
162
169
  class DatasetRowsFetcher(NodesThreadPool):
@@ -1760,13 +1767,13 @@ class Catalog:
1760
1767
  recursive=recursive,
1761
1768
  )
1762
1769
 
1770
+ @staticmethod
1763
1771
  def query(
1764
- self,
1765
1772
  query_script: str,
1766
1773
  env: Optional[Mapping[str, str]] = None,
1767
1774
  python_executable: str = sys.executable,
1768
- capture_output: bool = False,
1769
- output_hook: Callable[[str], None] = noop,
1775
+ stdout_callback: Optional[Callable[[str], None]] = None,
1776
+ stderr_callback: Optional[Callable[[str], None]] = None,
1770
1777
  params: Optional[dict[str, str]] = None,
1771
1778
  job_id: Optional[str] = None,
1772
1779
  interrupt_timeout: Optional[int] = None,
@@ -1781,13 +1788,18 @@ class Catalog:
1781
1788
  },
1782
1789
  )
1783
1790
  popen_kwargs: dict[str, Any] = {}
1784
- if capture_output:
1785
- popen_kwargs = {"stdout": subprocess.PIPE, "stderr": subprocess.STDOUT}
1791
+
1792
+ if stdout_callback is not None:
1793
+ popen_kwargs = {"stdout": subprocess.PIPE}
1794
+ if stderr_callback is not None:
1795
+ popen_kwargs["stderr"] = subprocess.PIPE
1786
1796
 
1787
1797
  def raise_termination_signal(sig: int, _: Any) -> NoReturn:
1788
1798
  raise TerminationSignal(sig)
1789
1799
 
1790
- thread: Optional[Thread] = None
1800
+ stdout_thread: Optional[Thread] = None
1801
+ stderr_thread: Optional[Thread] = None
1802
+
1791
1803
  with subprocess.Popen(cmd, env=env, **popen_kwargs) as proc: # noqa: S603
1792
1804
  logger.info("Starting process %s", proc.pid)
1793
1805
 
@@ -1801,10 +1813,20 @@ class Catalog:
1801
1813
  orig_sigterm_handler = signal.getsignal(signal.SIGTERM)
1802
1814
  signal.signal(signal.SIGTERM, raise_termination_signal)
1803
1815
  try:
1804
- if capture_output:
1805
- args = (proc.stdout, output_hook)
1806
- thread = Thread(target=_process_stream, args=args, daemon=True)
1807
- thread.start()
1816
+ if stdout_callback is not None:
1817
+ stdout_thread = Thread(
1818
+ target=process_output,
1819
+ args=(proc.stdout, stdout_callback),
1820
+ daemon=True,
1821
+ )
1822
+ stdout_thread.start()
1823
+ if stderr_callback is not None:
1824
+ stderr_thread = Thread(
1825
+ target=process_output,
1826
+ args=(proc.stderr, stderr_callback),
1827
+ daemon=True,
1828
+ )
1829
+ stderr_thread.start()
1808
1830
 
1809
1831
  proc.wait()
1810
1832
  except TerminationSignal as exc:
@@ -1822,8 +1844,22 @@ class Catalog:
1822
1844
  finally:
1823
1845
  signal.signal(signal.SIGTERM, orig_sigterm_handler)
1824
1846
  signal.signal(signal.SIGINT, orig_sigint_handler)
1825
- if thread:
1826
- thread.join() # wait for the reader thread
1847
+ # wait for the reader thread
1848
+ thread_join_timeout_seconds = 30
1849
+ if stdout_thread is not None:
1850
+ stdout_thread.join(timeout=thread_join_timeout_seconds)
1851
+ if stdout_thread.is_alive():
1852
+ logger.warning(
1853
+ "stdout thread is still alive after %s seconds",
1854
+ thread_join_timeout_seconds,
1855
+ )
1856
+ if stderr_thread is not None:
1857
+ stderr_thread.join(timeout=thread_join_timeout_seconds)
1858
+ if stderr_thread.is_alive():
1859
+ logger.warning(
1860
+ "stderr thread is still alive after %s seconds",
1861
+ thread_join_timeout_seconds,
1862
+ )
1827
1863
 
1828
1864
  logger.info("Process %s exited with return code %s", proc.pid, proc.returncode)
1829
1865
  if proc.returncode in (
@@ -0,0 +1,44 @@
1
+ import uuid
2
+ from dataclasses import dataclass
3
+ from datetime import datetime
4
+ from typing import Union
5
+
6
+
7
+ @dataclass
8
+ class Checkpoint:
9
+ """
10
+ Represents a checkpoint within a job run.
11
+
12
+ A checkpoint marks a successfully completed stage of execution. In the event
13
+ of a failure, the job can resume from the most recent checkpoint rather than
14
+ starting over from the beginning.
15
+
16
+ Checkpoints can also be created in a "partial" mode, which indicates that the
17
+ work at this stage was only partially completed. For example, if a failure
18
+ occurs halfway through running a UDF, already computed results can still be
19
+ saved, allowing the job to resume from that partially completed state on
20
+ restart.
21
+ """
22
+
23
+ id: str
24
+ job_id: str
25
+ hash: str
26
+ partial: bool
27
+ created_at: datetime
28
+
29
+ @classmethod
30
+ def parse(
31
+ cls,
32
+ id: Union[str, uuid.UUID],
33
+ job_id: str,
34
+ _hash: str,
35
+ partial: bool,
36
+ created_at: datetime,
37
+ ) -> "Checkpoint":
38
+ return cls(
39
+ str(id),
40
+ job_id,
41
+ _hash,
42
+ bool(partial),
43
+ created_at,
44
+ )
@@ -93,10 +93,11 @@ class Client(ABC):
93
93
  self.uri = self.get_uri(self.name)
94
94
 
95
95
  @staticmethod
96
- def get_implementation(url: Union[str, os.PathLike[str]]) -> type["Client"]:
96
+ def get_implementation(url: Union[str, os.PathLike[str]]) -> type["Client"]: # noqa: PLR0911
97
97
  from .azure import AzureClient
98
98
  from .gcs import GCSClient
99
99
  from .hf import HfClient
100
+ from .http import HTTPClient, HTTPSClient
100
101
  from .local import FileClient
101
102
  from .s3 import ClientS3
102
103
 
@@ -114,6 +115,10 @@ class Client(ABC):
114
115
  return FileClient
115
116
  if protocol == HfClient.protocol:
116
117
  return HfClient
118
+ if protocol == HTTPClient.protocol:
119
+ return HTTPClient
120
+ if protocol == HTTPSClient.protocol:
121
+ return HTTPSClient
117
122
 
118
123
  raise NotImplementedError(f"Unsupported protocol: {protocol}")
119
124
 
@@ -0,0 +1,157 @@
1
+ from datetime import datetime, timezone
2
+ from typing import TYPE_CHECKING, Any, ClassVar, Optional, cast
3
+ from urllib.parse import urlparse
4
+
5
+ from fsspec.implementations.http import HTTPFileSystem
6
+
7
+ from datachain.dataset import StorageURI
8
+ from datachain.lib.file import File
9
+
10
+ from .fsspec import Client
11
+
12
+ if TYPE_CHECKING:
13
+ from datachain.cache import Cache
14
+
15
+
16
+ class HTTPClient(Client):
17
+ FS_CLASS = HTTPFileSystem
18
+ PREFIX: ClassVar[str] = "http://"
19
+ protocol: ClassVar[str] = "http"
20
+
21
+ @classmethod
22
+ def create_fs(cls, **kwargs) -> HTTPFileSystem:
23
+ # Configure HTTPFileSystem options
24
+ kwargs.setdefault("simple_links", True)
25
+ kwargs.setdefault("same_scheme", True)
26
+ kwargs.setdefault("cache_type", "bytes")
27
+
28
+ kwargs.pop("version_aware", None)
29
+
30
+ fs = cls.FS_CLASS(**kwargs)
31
+ fs.invalidate_cache()
32
+ return cast("HTTPFileSystem", fs)
33
+
34
+ @classmethod
35
+ def from_name(
36
+ cls,
37
+ name: str,
38
+ cache: "Cache",
39
+ kwargs: dict[str, Any],
40
+ ) -> "HTTPClient":
41
+ parsed = urlparse(name)
42
+
43
+ if parsed.scheme:
44
+ name = parsed.netloc + parsed.path
45
+
46
+ return cls(name, kwargs, cache)
47
+
48
+ @classmethod
49
+ def split_url(cls, url: str) -> tuple[str, str]:
50
+ """Split HTTP/HTTPS URL into domain (bucket equivalent) and path."""
51
+ parsed = urlparse(url)
52
+ domain = parsed.netloc
53
+ path = parsed.path.lstrip("/")
54
+
55
+ if parsed.query:
56
+ path += f"?{parsed.query}"
57
+ if parsed.fragment:
58
+ path += f"#{parsed.fragment}"
59
+
60
+ return domain, path
61
+
62
+ @classmethod
63
+ def get_uri(cls, name: str) -> "StorageURI":
64
+ if not name.startswith(("http://", "https://")):
65
+ return StorageURI(f"{cls.PREFIX}{name}")
66
+ return StorageURI(name)
67
+
68
+ @classmethod
69
+ def is_root_url(cls, url: str) -> bool:
70
+ parsed = urlparse(url)
71
+ return parsed.path in ("", "/") and not parsed.query and not parsed.fragment
72
+
73
+ def get_full_path(self, rel_path: str, version_id: Optional[str] = None) -> str:
74
+ if self.name.startswith(("http://", "https://")):
75
+ base_url = self.name
76
+ else:
77
+ if rel_path and "/" in rel_path:
78
+ first_part = rel_path.split("/")[0]
79
+ if "." in first_part and not first_part.startswith("."):
80
+ return f"{self.protocol}://{rel_path}"
81
+
82
+ base_url = f"{self.protocol}://{self.name}"
83
+
84
+ if rel_path:
85
+ if not base_url.endswith("/") and not rel_path.startswith("/"):
86
+ base_url += "/"
87
+ full_url = base_url + rel_path
88
+ else:
89
+ full_url = base_url
90
+
91
+ return full_url
92
+
93
+ def url(self, path: str, expires: int = 3600, **kwargs) -> str:
94
+ """
95
+ Generate URL for the given path.
96
+ Note: HTTP URLs don't support signed/expiring URLs.
97
+ """
98
+ return self.get_full_path(path, kwargs.pop("version_id", None))
99
+
100
+ def info_to_file(self, v: dict[str, Any], path: str) -> File:
101
+ etag = v.get("ETag", "").strip('"')
102
+ last_modified = v.get("last_modified")
103
+ if last_modified:
104
+ if isinstance(last_modified, str):
105
+ try:
106
+ from email.utils import parsedate_to_datetime
107
+
108
+ last_modified = parsedate_to_datetime(last_modified)
109
+ except (ValueError, TypeError):
110
+ last_modified = datetime.now(timezone.utc)
111
+ elif isinstance(last_modified, (int, float)):
112
+ last_modified = datetime.fromtimestamp(last_modified, timezone.utc)
113
+ else:
114
+ last_modified = datetime.now(timezone.utc)
115
+
116
+ return File(
117
+ source=self.uri,
118
+ path=path,
119
+ size=v.get("size", 0),
120
+ etag=etag,
121
+ version="",
122
+ is_latest=True,
123
+ last_modified=last_modified,
124
+ )
125
+
126
+ def upload(self, data: bytes, path: str) -> "File":
127
+ raise NotImplementedError(
128
+ "HTTP/HTTPS client is read-only. Upload operations are not supported."
129
+ )
130
+
131
+ def get_file_info(self, path: str, version_id: Optional[str] = None) -> "File":
132
+ info = self.fs.info(self.get_full_path(path))
133
+ return self.info_to_file(info, path)
134
+
135
+ def open_object(self, file: "File", use_cache: bool = True, cb=None):
136
+ from datachain.client.fileslice import FileWrapper
137
+
138
+ if use_cache and (cache_path := self.cache.get_path(file)):
139
+ return open(cache_path, mode="rb")
140
+
141
+ assert not file.location
142
+ return FileWrapper(
143
+ self.fs.open(self.get_full_path(file.get_path_normalized())),
144
+ cb or (lambda x: None),
145
+ )
146
+
147
+ async def get_file(self, lpath, rpath, callback, version_id: Optional[str] = None):
148
+ return await self.fs._get_file(lpath, rpath, callback=callback)
149
+
150
+ async def _fetch_dir(self, prefix: str, pbar, result_queue) -> set[str]:
151
+ full_url = self.get_full_path(prefix)
152
+ raise NotImplementedError(f"Cannot download file from {full_url}")
153
+
154
+
155
+ class HTTPSClient(HTTPClient):
156
+ protocol = "https"
157
+ PREFIX = "https://"
@@ -4,6 +4,7 @@ from enum import Enum
4
4
  class JobStatus(int, Enum):
5
5
  CREATED = 1
6
6
  SCHEDULED = 10
7
+ PROVISIONING = 12
7
8
  QUEUED = 2
8
9
  INIT = 3
9
10
  RUNNING = 4
@@ -13,6 +13,7 @@ from uuid import uuid4
13
13
  from sqlalchemy import (
14
14
  JSON,
15
15
  BigInteger,
16
+ Boolean,
16
17
  Column,
17
18
  DateTime,
18
19
  ForeignKey,
@@ -24,6 +25,7 @@ from sqlalchemy import (
24
25
  )
25
26
  from sqlalchemy.sql import func as f
26
27
 
28
+ from datachain.checkpoint import Checkpoint
27
29
  from datachain.data_storage import JobQueryType, JobStatus
28
30
  from datachain.data_storage.serializer import Serializable
29
31
  from datachain.dataset import (
@@ -36,6 +38,7 @@ from datachain.dataset import (
36
38
  StorageURI,
37
39
  )
38
40
  from datachain.error import (
41
+ CheckpointNotFoundError,
39
42
  DatasetNotFoundError,
40
43
  DatasetVersionNotFoundError,
41
44
  NamespaceDeleteNotAllowedError,
@@ -75,6 +78,7 @@ class AbstractMetastore(ABC, Serializable):
75
78
  dataset_list_version_class: type[DatasetListVersion] = DatasetListVersion
76
79
  dependency_class: type[DatasetDependency] = DatasetDependency
77
80
  job_class: type[Job] = Job
81
+ checkpoint_class: type[Checkpoint] = Checkpoint
78
82
 
79
83
  def __init__(
80
84
  self,
@@ -431,6 +435,35 @@ class AbstractMetastore(ABC, Serializable):
431
435
  def get_job_status(self, job_id: str) -> Optional[JobStatus]:
432
436
  """Returns the status of the given job."""
433
437
 
438
+ #
439
+ # Checkpoints
440
+ #
441
+
442
+ @abstractmethod
443
+ def list_checkpoints(self, job_id: str, conn=None) -> Iterator["Checkpoint"]:
444
+ """Returns all checkpoints related to some job"""
445
+
446
+ @abstractmethod
447
+ def get_checkpoint_by_id(self, checkpoint_id: str, conn=None) -> Checkpoint:
448
+ """Gets single checkpoint by id"""
449
+
450
+ def find_checkpoint(
451
+ self, job_id: str, _hash: str, partial: bool = False, conn=None
452
+ ) -> Optional[Checkpoint]:
453
+ """
454
+ Tries to find checkpoint for a job with specific hash and optionally partial
455
+ """
456
+
457
+ @abstractmethod
458
+ def create_checkpoint(
459
+ self,
460
+ job_id: str,
461
+ _hash: str,
462
+ partial: bool = False,
463
+ conn: Optional[Any] = None,
464
+ ) -> Checkpoint:
465
+ """Creates new checkpoint"""
466
+
434
467
 
435
468
  class AbstractDBMetastore(AbstractMetastore):
436
469
  """
@@ -446,6 +479,7 @@ class AbstractDBMetastore(AbstractMetastore):
446
479
  DATASET_VERSION_TABLE = "datasets_versions"
447
480
  DATASET_DEPENDENCY_TABLE = "datasets_dependencies"
448
481
  JOBS_TABLE = "jobs"
482
+ CHECKPOINTS_TABLE = "checkpoints"
449
483
 
450
484
  db: "DatabaseEngine"
451
485
 
@@ -1663,3 +1697,106 @@ class AbstractDBMetastore(AbstractMetastore):
1663
1697
  if not results:
1664
1698
  return None
1665
1699
  return results[0][0]
1700
+
1701
+ #
1702
+ # Checkpoints
1703
+ #
1704
+
1705
+ @staticmethod
1706
+ def _checkpoints_columns() -> "list[SchemaItem]":
1707
+ return [
1708
+ Column(
1709
+ "id",
1710
+ Text,
1711
+ default=uuid4,
1712
+ primary_key=True,
1713
+ nullable=False,
1714
+ ),
1715
+ Column("job_id", Text, nullable=True),
1716
+ Column("hash", Text, nullable=False),
1717
+ Column("partial", Boolean, default=False),
1718
+ Column("created_at", DateTime(timezone=True), nullable=False),
1719
+ UniqueConstraint("job_id", "hash"),
1720
+ ]
1721
+
1722
+ @cached_property
1723
+ def _checkpoints_fields(self) -> list[str]:
1724
+ return [c.name for c in self._checkpoints_columns() if c.name] # type: ignore[attr-defined]
1725
+
1726
+ @cached_property
1727
+ def _checkpoints(self) -> "Table":
1728
+ return Table(
1729
+ self.CHECKPOINTS_TABLE,
1730
+ self.db.metadata,
1731
+ *self._checkpoints_columns(),
1732
+ )
1733
+
1734
+ @abstractmethod
1735
+ def _checkpoints_insert(self) -> "Insert": ...
1736
+
1737
+ def _checkpoints_select(self, *columns) -> "Select":
1738
+ if not columns:
1739
+ return self._checkpoints.select()
1740
+ return select(*columns)
1741
+
1742
+ def _checkpoints_delete(self) -> "Delete":
1743
+ return self._checkpoints.delete()
1744
+
1745
+ def _checkpoints_query(self):
1746
+ return self._checkpoints_select(
1747
+ *[getattr(self._checkpoints.c, f) for f in self._checkpoints_fields]
1748
+ )
1749
+
1750
+ def create_checkpoint(
1751
+ self,
1752
+ job_id: str,
1753
+ _hash: str,
1754
+ partial: bool = False,
1755
+ conn: Optional[Any] = None,
1756
+ ) -> Checkpoint:
1757
+ """
1758
+ Creates a new job query step.
1759
+ """
1760
+ checkpoint_id = str(uuid4())
1761
+ self.db.execute(
1762
+ self._checkpoints_insert().values(
1763
+ id=checkpoint_id,
1764
+ job_id=job_id,
1765
+ hash=_hash,
1766
+ partial=partial,
1767
+ created_at=datetime.now(timezone.utc),
1768
+ ),
1769
+ conn=conn,
1770
+ )
1771
+ return self.get_checkpoint_by_id(checkpoint_id)
1772
+
1773
+ def list_checkpoints(self, job_id: str, conn=None) -> Iterator["Checkpoint"]:
1774
+ """List checkpoints by job id."""
1775
+ query = self._checkpoints_query().where(self._checkpoints.c.job_id == job_id)
1776
+ rows = list(self.db.execute(query, conn=conn))
1777
+
1778
+ yield from [self.checkpoint_class.parse(*r) for r in rows]
1779
+
1780
+ def get_checkpoint_by_id(self, checkpoint_id: str, conn=None) -> Checkpoint:
1781
+ """Returns the checkpoint with the given ID."""
1782
+ ch = self._checkpoints
1783
+ query = self._checkpoints_select(ch).where(ch.c.id == checkpoint_id)
1784
+ rows = list(self.db.execute(query, conn=conn))
1785
+ if not rows:
1786
+ raise CheckpointNotFoundError(f"Checkpoint {checkpoint_id} not found")
1787
+ return self.checkpoint_class.parse(*rows[0])
1788
+
1789
+ def find_checkpoint(
1790
+ self, job_id: str, _hash: str, partial: bool = False, conn=None
1791
+ ) -> Optional[Checkpoint]:
1792
+ """
1793
+ Tries to find checkpoint for a job with specific hash and optionally partial
1794
+ """
1795
+ ch = self._checkpoints
1796
+ query = self._checkpoints_select(ch).where(
1797
+ ch.c.job_id == job_id, ch.c.hash == _hash, ch.c.partial == partial
1798
+ )
1799
+ rows = list(self.db.execute(query, conn=conn))
1800
+ if not rows:
1801
+ return None
1802
+ return self.checkpoint_class.parse(*rows[0])
@@ -51,7 +51,7 @@ def dedup_columns(columns: Iterable[sa.Column]) -> list[sa.Column]:
51
51
  """
52
52
  c_set: dict[str, sa.Column] = {}
53
53
  for c in columns:
54
- if (ec := c_set.get(c.name, None)) is not None:
54
+ if (ec := c_set.get(c.name)) is not None:
55
55
  if str(ec.type) != str(c.type):
56
56
  raise ValueError(
57
57
  f"conflicting types for column {c.name}:{c.type!s} and {ec.type!s}"
@@ -459,6 +459,8 @@ class SQLiteMetastore(AbstractDBMetastore):
459
459
  self.default_table_names.append(self._datasets_dependencies.name)
460
460
  self.db.create_table(self._jobs, if_not_exists=True)
461
461
  self.default_table_names.append(self._jobs.name)
462
+ self.db.create_table(self._checkpoints, if_not_exists=True)
463
+ self.default_table_names.append(self._checkpoints.name)
462
464
 
463
465
  def _init_namespaces_projects(self) -> None:
464
466
  """
@@ -543,6 +545,12 @@ class SQLiteMetastore(AbstractDBMetastore):
543
545
  def _jobs_insert(self) -> "Insert":
544
546
  return sqlite.insert(self._jobs)
545
547
 
548
+ #
549
+ # Checkpoints
550
+ #
551
+ def _checkpoints_insert(self) -> "Insert":
552
+ return sqlite.insert(self._checkpoints)
553
+
546
554
  #
547
555
  # Namespaces
548
556
  #
@@ -1,5 +1,3 @@
1
- import random
2
- import string
3
1
  from collections.abc import Sequence
4
2
  from enum import Enum
5
3
  from typing import TYPE_CHECKING, Optional, Union
@@ -11,16 +9,12 @@ from datachain.query.schema import Column
11
9
  if TYPE_CHECKING:
12
10
  from datachain.lib.dc import DataChain
13
11
 
14
-
15
12
  C = Column
16
13
 
17
14
 
18
- def get_status_col_name() -> str:
19
- """Returns new unique status col name"""
20
- return "diff_" + "".join(
21
- random.choice(string.ascii_letters) # noqa: S311
22
- for _ in range(10)
23
- )
15
+ STATUS_COL_NAME = "diff_7aeed3aa17ba4d50b8d1c368c76e16a6"
16
+ LEFT_DIFF_COL_NAME = "diff_95f95344064a4b819c8625cd1a5cfc2b"
17
+ RIGHT_DIFF_COL_NAME = "diff_5808838a49b54849aa461d7387376d34"
24
18
 
25
19
 
26
20
  class CompareStatus(str, Enum):
@@ -101,9 +95,9 @@ def _compare( # noqa: C901, PLR0912
101
95
  compare = right_compare = [c for c in cols if c in right_cols and c not in on] # type: ignore[misc]
102
96
 
103
97
  # get diff column names
104
- diff_col = status_col or get_status_col_name()
105
- ldiff_col = get_status_col_name()
106
- rdiff_col = get_status_col_name()
98
+ diff_col = status_col or STATUS_COL_NAME
99
+ ldiff_col = LEFT_DIFF_COL_NAME
100
+ rdiff_col = RIGHT_DIFF_COL_NAME
107
101
 
108
102
  # adding helper diff columns, which will be removed after
109
103
  left = left.mutate(**{ldiff_col: 1})
@@ -227,7 +221,7 @@ def compare_and_split(
227
221
  )
228
222
  ```
229
223
  """
230
- status_col = get_status_col_name()
224
+ status_col = STATUS_COL_NAME
231
225
 
232
226
  res = _compare(
233
227
  left,
@@ -97,3 +97,7 @@ class TableMissingError(DataChainError):
97
97
 
98
98
  class OutdatedDatabaseSchemaError(DataChainError):
99
99
  pass
100
+
101
+
102
+ class CheckpointNotFoundError(NotFoundError):
103
+ pass