datachain 0.33.0__tar.gz → 0.34.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (439) hide show
  1. {datachain-0.33.0 → datachain-0.34.0}/.pre-commit-config.yaml +1 -1
  2. {datachain-0.33.0 → datachain-0.34.0}/PKG-INFO +2 -2
  3. {datachain-0.33.0 → datachain-0.34.0}/pyproject.toml +1 -1
  4. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/catalog/catalog.py +58 -22
  5. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/data_storage/job.py +1 -0
  6. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/data_storage/metastore.py +22 -1
  7. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/diff/__init__.py +7 -13
  8. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/error.py +4 -0
  9. datachain-0.34.0/src/datachain/hash_utils.py +147 -0
  10. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/job.py +3 -0
  11. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/datachain.py +166 -70
  12. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/signal_schema.py +7 -0
  13. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/udf.py +20 -0
  14. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/query/dataset.py +107 -0
  15. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/utils.py +6 -0
  16. {datachain-0.33.0 → datachain-0.34.0}/src/datachain.egg-info/PKG-INFO +2 -2
  17. {datachain-0.33.0 → datachain-0.34.0}/src/datachain.egg-info/SOURCES.txt +5 -0
  18. {datachain-0.33.0 → datachain-0.34.0}/src/datachain.egg-info/requires.txt +1 -1
  19. {datachain-0.33.0 → datachain-0.34.0}/tests/conftest.py +26 -5
  20. datachain-0.34.0/tests/unit/lib/test_checkpoints.py +200 -0
  21. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_datachain.py +1 -1
  22. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_diff.py +41 -0
  23. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_signal_schema.py +24 -0
  24. datachain-0.34.0/tests/unit/test_datachain_hash.py +173 -0
  25. datachain-0.34.0/tests/unit/test_hash_utils.py +109 -0
  26. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_query.py +22 -3
  27. datachain-0.34.0/tests/unit/test_query_steps_hash.py +505 -0
  28. {datachain-0.33.0 → datachain-0.34.0}/.cruft.json +0 -0
  29. {datachain-0.33.0 → datachain-0.34.0}/.gitattributes +0 -0
  30. {datachain-0.33.0 → datachain-0.34.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  31. {datachain-0.33.0 → datachain-0.34.0}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  32. {datachain-0.33.0 → datachain-0.34.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  33. {datachain-0.33.0 → datachain-0.34.0}/.github/codecov.yaml +0 -0
  34. {datachain-0.33.0 → datachain-0.34.0}/.github/dependabot.yml +0 -0
  35. {datachain-0.33.0 → datachain-0.34.0}/.github/workflows/benchmarks.yml +0 -0
  36. {datachain-0.33.0 → datachain-0.34.0}/.github/workflows/release.yml +0 -0
  37. {datachain-0.33.0 → datachain-0.34.0}/.github/workflows/tests-studio.yml +0 -0
  38. {datachain-0.33.0 → datachain-0.34.0}/.github/workflows/tests.yml +0 -0
  39. {datachain-0.33.0 → datachain-0.34.0}/.github/workflows/update-template.yaml +0 -0
  40. {datachain-0.33.0 → datachain-0.34.0}/.gitignore +0 -0
  41. {datachain-0.33.0 → datachain-0.34.0}/CODE_OF_CONDUCT.rst +0 -0
  42. {datachain-0.33.0 → datachain-0.34.0}/LICENSE +0 -0
  43. {datachain-0.33.0 → datachain-0.34.0}/README.rst +0 -0
  44. {datachain-0.33.0 → datachain-0.34.0}/docs/api_hooks.py +0 -0
  45. {datachain-0.33.0 → datachain-0.34.0}/docs/assets/captioned_cartoons.png +0 -0
  46. {datachain-0.33.0 → datachain-0.34.0}/docs/assets/datachain-white.svg +0 -0
  47. {datachain-0.33.0 → datachain-0.34.0}/docs/assets/datachain.svg +0 -0
  48. {datachain-0.33.0 → datachain-0.34.0}/docs/assets/webhook_dialog.png +0 -0
  49. {datachain-0.33.0 → datachain-0.34.0}/docs/assets/webhook_list.png +0 -0
  50. {datachain-0.33.0 → datachain-0.34.0}/docs/commands/auth/login.md +0 -0
  51. {datachain-0.33.0 → datachain-0.34.0}/docs/commands/auth/logout.md +0 -0
  52. {datachain-0.33.0 → datachain-0.34.0}/docs/commands/auth/team.md +0 -0
  53. {datachain-0.33.0 → datachain-0.34.0}/docs/commands/auth/token.md +0 -0
  54. {datachain-0.33.0 → datachain-0.34.0}/docs/commands/index.md +0 -0
  55. {datachain-0.33.0 → datachain-0.34.0}/docs/commands/job/cancel.md +0 -0
  56. {datachain-0.33.0 → datachain-0.34.0}/docs/commands/job/clusters.md +0 -0
  57. {datachain-0.33.0 → datachain-0.34.0}/docs/commands/job/logs.md +0 -0
  58. {datachain-0.33.0 → datachain-0.34.0}/docs/commands/job/ls.md +0 -0
  59. {datachain-0.33.0 → datachain-0.34.0}/docs/commands/job/run.md +0 -0
  60. {datachain-0.33.0 → datachain-0.34.0}/docs/contributing.md +0 -0
  61. {datachain-0.33.0 → datachain-0.34.0}/docs/css/github-permalink-style.css +0 -0
  62. {datachain-0.33.0 → datachain-0.34.0}/docs/examples.md +0 -0
  63. {datachain-0.33.0 → datachain-0.34.0}/docs/guide/db_migrations.md +0 -0
  64. {datachain-0.33.0 → datachain-0.34.0}/docs/guide/delta.md +0 -0
  65. {datachain-0.33.0 → datachain-0.34.0}/docs/guide/env.md +0 -0
  66. {datachain-0.33.0 → datachain-0.34.0}/docs/guide/index.md +0 -0
  67. {datachain-0.33.0 → datachain-0.34.0}/docs/guide/namespaces.md +0 -0
  68. {datachain-0.33.0 → datachain-0.34.0}/docs/guide/processing.md +0 -0
  69. {datachain-0.33.0 → datachain-0.34.0}/docs/guide/remotes.md +0 -0
  70. {datachain-0.33.0 → datachain-0.34.0}/docs/guide/retry.md +0 -0
  71. {datachain-0.33.0 → datachain-0.34.0}/docs/index.md +0 -0
  72. {datachain-0.33.0 → datachain-0.34.0}/docs/overrides/main.html +0 -0
  73. {datachain-0.33.0 → datachain-0.34.0}/docs/quick-start.md +0 -0
  74. {datachain-0.33.0 → datachain-0.34.0}/docs/references/data-types/arrowrow.md +0 -0
  75. {datachain-0.33.0 → datachain-0.34.0}/docs/references/data-types/bbox.md +0 -0
  76. {datachain-0.33.0 → datachain-0.34.0}/docs/references/data-types/file.md +0 -0
  77. {datachain-0.33.0 → datachain-0.34.0}/docs/references/data-types/imagefile.md +0 -0
  78. {datachain-0.33.0 → datachain-0.34.0}/docs/references/data-types/index.md +0 -0
  79. {datachain-0.33.0 → datachain-0.34.0}/docs/references/data-types/pose.md +0 -0
  80. {datachain-0.33.0 → datachain-0.34.0}/docs/references/data-types/segment.md +0 -0
  81. {datachain-0.33.0 → datachain-0.34.0}/docs/references/data-types/tarvfile.md +0 -0
  82. {datachain-0.33.0 → datachain-0.34.0}/docs/references/data-types/textfile.md +0 -0
  83. {datachain-0.33.0 → datachain-0.34.0}/docs/references/data-types/videofile.md +0 -0
  84. {datachain-0.33.0 → datachain-0.34.0}/docs/references/datachain.md +0 -0
  85. {datachain-0.33.0 → datachain-0.34.0}/docs/references/func.md +0 -0
  86. {datachain-0.33.0 → datachain-0.34.0}/docs/references/functions/aggregate.md +0 -0
  87. {datachain-0.33.0 → datachain-0.34.0}/docs/references/functions/array.md +0 -0
  88. {datachain-0.33.0 → datachain-0.34.0}/docs/references/functions/conditional.md +0 -0
  89. {datachain-0.33.0 → datachain-0.34.0}/docs/references/functions/numeric.md +0 -0
  90. {datachain-0.33.0 → datachain-0.34.0}/docs/references/functions/path.md +0 -0
  91. {datachain-0.33.0 → datachain-0.34.0}/docs/references/functions/random.md +0 -0
  92. {datachain-0.33.0 → datachain-0.34.0}/docs/references/functions/string.md +0 -0
  93. {datachain-0.33.0 → datachain-0.34.0}/docs/references/functions/window.md +0 -0
  94. {datachain-0.33.0 → datachain-0.34.0}/docs/references/index.md +0 -0
  95. {datachain-0.33.0 → datachain-0.34.0}/docs/references/toolkit.md +0 -0
  96. {datachain-0.33.0 → datachain-0.34.0}/docs/references/torch.md +0 -0
  97. {datachain-0.33.0 → datachain-0.34.0}/docs/references/udf.md +0 -0
  98. {datachain-0.33.0 → datachain-0.34.0}/docs/studio/api/.gitkeep +0 -0
  99. {datachain-0.33.0 → datachain-0.34.0}/docs/studio/webhooks.md +0 -0
  100. {datachain-0.33.0 → datachain-0.34.0}/docs/templates/main.dot +0 -0
  101. {datachain-0.33.0 → datachain-0.34.0}/docs/templates/operation.dot +0 -0
  102. {datachain-0.33.0 → datachain-0.34.0}/docs/templates/responses.def +0 -0
  103. {datachain-0.33.0 → datachain-0.34.0}/docs/tutorials.md +0 -0
  104. {datachain-0.33.0 → datachain-0.34.0}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  105. {datachain-0.33.0 → datachain-0.34.0}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  106. {datachain-0.33.0 → datachain-0.34.0}/examples/computer_vision/openimage-detect.py +0 -0
  107. {datachain-0.33.0 → datachain-0.34.0}/examples/computer_vision/ultralytics-bbox.py +0 -0
  108. {datachain-0.33.0 → datachain-0.34.0}/examples/computer_vision/ultralytics-pose.py +0 -0
  109. {datachain-0.33.0 → datachain-0.34.0}/examples/computer_vision/ultralytics-segment.py +0 -0
  110. {datachain-0.33.0 → datachain-0.34.0}/examples/get_started/common_sql_functions.py +0 -0
  111. {datachain-0.33.0 → datachain-0.34.0}/examples/get_started/json-csv-reader.py +0 -0
  112. {datachain-0.33.0 → datachain-0.34.0}/examples/get_started/nested_datamodel.py +0 -0
  113. {datachain-0.33.0 → datachain-0.34.0}/examples/get_started/torch-loader.py +0 -0
  114. {datachain-0.33.0 → datachain-0.34.0}/examples/get_started/udfs/parallel.py +0 -0
  115. {datachain-0.33.0 → datachain-0.34.0}/examples/get_started/udfs/simple.py +0 -0
  116. {datachain-0.33.0 → datachain-0.34.0}/examples/get_started/udfs/stateful.py +0 -0
  117. {datachain-0.33.0 → datachain-0.34.0}/examples/incremental_processing/delta.py +0 -0
  118. {datachain-0.33.0 → datachain-0.34.0}/examples/incremental_processing/retry.py +0 -0
  119. {datachain-0.33.0 → datachain-0.34.0}/examples/incremental_processing/utils.py +0 -0
  120. {datachain-0.33.0 → datachain-0.34.0}/examples/llm_and_nlp/claude-query.py +0 -0
  121. {datachain-0.33.0 → datachain-0.34.0}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  122. {datachain-0.33.0 → datachain-0.34.0}/examples/multimodal/audio-to-text.py +0 -0
  123. {datachain-0.33.0 → datachain-0.34.0}/examples/multimodal/clip_inference.py +0 -0
  124. {datachain-0.33.0 → datachain-0.34.0}/examples/multimodal/hf_pipeline.py +0 -0
  125. {datachain-0.33.0 → datachain-0.34.0}/examples/multimodal/openai_image_desc_lib.py +0 -0
  126. {datachain-0.33.0 → datachain-0.34.0}/examples/multimodal/wds.py +0 -0
  127. {datachain-0.33.0 → datachain-0.34.0}/examples/multimodal/wds_filtered.py +0 -0
  128. {datachain-0.33.0 → datachain-0.34.0}/mkdocs.yml +0 -0
  129. {datachain-0.33.0 → datachain-0.34.0}/noxfile.py +0 -0
  130. {datachain-0.33.0 → datachain-0.34.0}/setup.cfg +0 -0
  131. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/__init__.py +0 -0
  132. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/__main__.py +0 -0
  133. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/asyn.py +0 -0
  134. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cache.py +0 -0
  135. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/catalog/__init__.py +0 -0
  136. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/catalog/datasource.py +0 -0
  137. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/catalog/loader.py +0 -0
  138. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/checkpoint.py +0 -0
  139. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/__init__.py +0 -0
  140. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/commands/__init__.py +0 -0
  141. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/commands/datasets.py +0 -0
  142. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/commands/du.py +0 -0
  143. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/commands/index.py +0 -0
  144. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/commands/ls.py +0 -0
  145. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/commands/misc.py +0 -0
  146. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/commands/query.py +0 -0
  147. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/commands/show.py +0 -0
  148. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/parser/__init__.py +0 -0
  149. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/parser/job.py +0 -0
  150. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/parser/studio.py +0 -0
  151. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/parser/utils.py +0 -0
  152. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/cli/utils.py +0 -0
  153. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/client/__init__.py +0 -0
  154. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/client/azure.py +0 -0
  155. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/client/fileslice.py +0 -0
  156. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/client/fsspec.py +0 -0
  157. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/client/gcs.py +0 -0
  158. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/client/hf.py +0 -0
  159. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/client/http.py +0 -0
  160. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/client/local.py +0 -0
  161. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/client/s3.py +0 -0
  162. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/config.py +0 -0
  163. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/data_storage/__init__.py +0 -0
  164. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/data_storage/db_engine.py +0 -0
  165. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/data_storage/schema.py +0 -0
  166. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/data_storage/serializer.py +0 -0
  167. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/data_storage/sqlite.py +0 -0
  168. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/data_storage/warehouse.py +0 -0
  169. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/dataset.py +0 -0
  170. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/delta.py +0 -0
  171. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/fs/__init__.py +0 -0
  172. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/fs/reference.py +0 -0
  173. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/fs/utils.py +0 -0
  174. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/func/__init__.py +0 -0
  175. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/func/aggregate.py +0 -0
  176. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/func/array.py +0 -0
  177. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/func/base.py +0 -0
  178. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/func/conditional.py +0 -0
  179. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/func/func.py +0 -0
  180. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/func/numeric.py +0 -0
  181. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/func/path.py +0 -0
  182. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/func/random.py +0 -0
  183. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/func/string.py +0 -0
  184. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/func/window.py +0 -0
  185. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/__init__.py +0 -0
  186. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/arrow.py +0 -0
  187. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/audio.py +0 -0
  188. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/clip.py +0 -0
  189. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/convert/__init__.py +0 -0
  190. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/convert/flatten.py +0 -0
  191. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/convert/python_to_sql.py +0 -0
  192. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/convert/sql_to_python.py +0 -0
  193. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/convert/unflatten.py +0 -0
  194. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  195. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/data_model.py +0 -0
  196. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dataset_info.py +0 -0
  197. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/__init__.py +0 -0
  198. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/csv.py +0 -0
  199. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/database.py +0 -0
  200. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/datasets.py +0 -0
  201. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/hf.py +0 -0
  202. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/json.py +0 -0
  203. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/listings.py +0 -0
  204. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/pandas.py +0 -0
  205. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/parquet.py +0 -0
  206. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/records.py +0 -0
  207. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/storage.py +0 -0
  208. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/storage_pattern.py +0 -0
  209. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/utils.py +0 -0
  210. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/dc/values.py +0 -0
  211. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/file.py +0 -0
  212. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/hf.py +0 -0
  213. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/image.py +0 -0
  214. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/listing.py +0 -0
  215. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/listing_info.py +0 -0
  216. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/meta_formats.py +0 -0
  217. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/model_store.py +0 -0
  218. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/namespaces.py +0 -0
  219. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/projects.py +0 -0
  220. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/pytorch.py +0 -0
  221. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/settings.py +0 -0
  222. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/tar.py +0 -0
  223. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/text.py +0 -0
  224. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/udf_signature.py +0 -0
  225. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/utils.py +0 -0
  226. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/video.py +0 -0
  227. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/webdataset.py +0 -0
  228. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/lib/webdataset_laion.py +0 -0
  229. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/listing.py +0 -0
  230. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/model/__init__.py +0 -0
  231. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/model/bbox.py +0 -0
  232. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/model/pose.py +0 -0
  233. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/model/segment.py +0 -0
  234. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/model/ultralytics/__init__.py +0 -0
  235. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/model/ultralytics/bbox.py +0 -0
  236. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/model/ultralytics/pose.py +0 -0
  237. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/model/ultralytics/segment.py +0 -0
  238. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/model/utils.py +0 -0
  239. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/namespace.py +0 -0
  240. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/node.py +0 -0
  241. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/nodes_fetcher.py +0 -0
  242. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/nodes_thread_pool.py +0 -0
  243. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/progress.py +0 -0
  244. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/project.py +0 -0
  245. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/py.typed +0 -0
  246. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/query/__init__.py +0 -0
  247. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/query/batch.py +0 -0
  248. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/query/dispatch.py +0 -0
  249. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/query/metrics.py +0 -0
  250. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/query/params.py +0 -0
  251. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/query/queue.py +0 -0
  252. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/query/schema.py +0 -0
  253. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/query/session.py +0 -0
  254. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/query/udf.py +0 -0
  255. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/query/utils.py +0 -0
  256. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/remote/__init__.py +0 -0
  257. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/remote/studio.py +0 -0
  258. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/script_meta.py +0 -0
  259. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/semver.py +0 -0
  260. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/__init__.py +0 -0
  261. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/default/__init__.py +0 -0
  262. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/default/base.py +0 -0
  263. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/functions/__init__.py +0 -0
  264. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/functions/aggregate.py +0 -0
  265. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/functions/array.py +0 -0
  266. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/functions/conditional.py +0 -0
  267. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/functions/numeric.py +0 -0
  268. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/functions/path.py +0 -0
  269. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/functions/random.py +0 -0
  270. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/functions/string.py +0 -0
  271. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/postgresql_dialect.py +0 -0
  272. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/postgresql_types.py +0 -0
  273. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/selectable.py +0 -0
  274. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/sqlite/__init__.py +0 -0
  275. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/sqlite/base.py +0 -0
  276. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/sqlite/types.py +0 -0
  277. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/sqlite/vector.py +0 -0
  278. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/types.py +0 -0
  279. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/sql/utils.py +0 -0
  280. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/studio.py +0 -0
  281. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/telemetry.py +0 -0
  282. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/toolkit/__init__.py +0 -0
  283. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/toolkit/split.py +0 -0
  284. {datachain-0.33.0 → datachain-0.34.0}/src/datachain/torch/__init__.py +0 -0
  285. {datachain-0.33.0 → datachain-0.34.0}/src/datachain.egg-info/dependency_links.txt +0 -0
  286. {datachain-0.33.0 → datachain-0.34.0}/src/datachain.egg-info/entry_points.txt +0 -0
  287. {datachain-0.33.0 → datachain-0.34.0}/src/datachain.egg-info/top_level.txt +0 -0
  288. {datachain-0.33.0 → datachain-0.34.0}/tests/__init__.py +0 -0
  289. {datachain-0.33.0 → datachain-0.34.0}/tests/benchmarks/__init__.py +0 -0
  290. {datachain-0.33.0 → datachain-0.34.0}/tests/benchmarks/conftest.py +0 -0
  291. {datachain-0.33.0 → datachain-0.34.0}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  292. {datachain-0.33.0 → datachain-0.34.0}/tests/benchmarks/datasets/.dvc/config +0 -0
  293. {datachain-0.33.0 → datachain-0.34.0}/tests/benchmarks/datasets/.gitignore +0 -0
  294. {datachain-0.33.0 → datachain-0.34.0}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  295. {datachain-0.33.0 → datachain-0.34.0}/tests/benchmarks/test_datachain.py +0 -0
  296. {datachain-0.33.0 → datachain-0.34.0}/tests/benchmarks/test_ls.py +0 -0
  297. {datachain-0.33.0 → datachain-0.34.0}/tests/benchmarks/test_version.py +0 -0
  298. {datachain-0.33.0 → datachain-0.34.0}/tests/data.py +0 -0
  299. {datachain-0.33.0 → datachain-0.34.0}/tests/examples/__init__.py +0 -0
  300. {datachain-0.33.0 → datachain-0.34.0}/tests/examples/test_examples.py +0 -0
  301. {datachain-0.33.0 → datachain-0.34.0}/tests/examples/test_wds_e2e.py +0 -0
  302. {datachain-0.33.0 → datachain-0.34.0}/tests/examples/wds_data.py +0 -0
  303. {datachain-0.33.0 → datachain-0.34.0}/tests/func/__init__.py +0 -0
  304. {datachain-0.33.0 → datachain-0.34.0}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  305. {datachain-0.33.0 → datachain-0.34.0}/tests/func/data/lena.jpg +0 -0
  306. {datachain-0.33.0 → datachain-0.34.0}/tests/func/fake-service-account-credentials.json +0 -0
  307. {datachain-0.33.0 → datachain-0.34.0}/tests/func/functions/__init__.py +0 -0
  308. {datachain-0.33.0 → datachain-0.34.0}/tests/func/functions/test_aggregate.py +0 -0
  309. {datachain-0.33.0 → datachain-0.34.0}/tests/func/functions/test_array.py +0 -0
  310. {datachain-0.33.0 → datachain-0.34.0}/tests/func/functions/test_conditional.py +0 -0
  311. {datachain-0.33.0 → datachain-0.34.0}/tests/func/functions/test_numeric.py +0 -0
  312. {datachain-0.33.0 → datachain-0.34.0}/tests/func/functions/test_path.py +0 -0
  313. {datachain-0.33.0 → datachain-0.34.0}/tests/func/functions/test_random.py +0 -0
  314. {datachain-0.33.0 → datachain-0.34.0}/tests/func/functions/test_string.py +0 -0
  315. {datachain-0.33.0 → datachain-0.34.0}/tests/func/model/__init__.py +0 -0
  316. {datachain-0.33.0 → datachain-0.34.0}/tests/func/model/data/running-mask0.png +0 -0
  317. {datachain-0.33.0 → datachain-0.34.0}/tests/func/model/data/running-mask1.png +0 -0
  318. {datachain-0.33.0 → datachain-0.34.0}/tests/func/model/data/running.jpg +0 -0
  319. {datachain-0.33.0 → datachain-0.34.0}/tests/func/model/data/ships.jpg +0 -0
  320. {datachain-0.33.0 → datachain-0.34.0}/tests/func/model/test_yolo.py +0 -0
  321. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_audio.py +0 -0
  322. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_batching.py +0 -0
  323. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_catalog.py +0 -0
  324. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_client.py +0 -0
  325. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_cloud_transfer.py +0 -0
  326. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_data_storage.py +0 -0
  327. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_datachain.py +0 -0
  328. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_datachain_merge.py +0 -0
  329. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_dataset_query.py +0 -0
  330. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_datasets.py +0 -0
  331. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_delta.py +0 -0
  332. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_feature_pickling.py +0 -0
  333. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_file.py +0 -0
  334. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_hf.py +0 -0
  335. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_hidden_field.py +0 -0
  336. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_image.py +0 -0
  337. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_listing.py +0 -0
  338. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_ls.py +0 -0
  339. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_meta_formats.py +0 -0
  340. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_metastore.py +0 -0
  341. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_metrics.py +0 -0
  342. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_mutate.py +0 -0
  343. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_pull.py +0 -0
  344. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_pytorch.py +0 -0
  345. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_query.py +0 -0
  346. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_read_database.py +0 -0
  347. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_read_dataset_remote.py +0 -0
  348. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  349. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_retry.py +0 -0
  350. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_session.py +0 -0
  351. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_storage_pattern.py +0 -0
  352. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_studio_datetime_parsing.py +0 -0
  353. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_to_database.py +0 -0
  354. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_toolkit.py +0 -0
  355. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_video.py +0 -0
  356. {datachain-0.33.0 → datachain-0.34.0}/tests/func/test_warehouse.py +0 -0
  357. {datachain-0.33.0 → datachain-0.34.0}/tests/scripts/feature_class.py +0 -0
  358. {datachain-0.33.0 → datachain-0.34.0}/tests/scripts/feature_class_exception.py +0 -0
  359. {datachain-0.33.0 → datachain-0.34.0}/tests/scripts/feature_class_parallel.py +0 -0
  360. {datachain-0.33.0 → datachain-0.34.0}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  361. {datachain-0.33.0 → datachain-0.34.0}/tests/scripts/name_len_slow.py +0 -0
  362. {datachain-0.33.0 → datachain-0.34.0}/tests/test_atomicity.py +0 -0
  363. {datachain-0.33.0 → datachain-0.34.0}/tests/test_cli_e2e.py +0 -0
  364. {datachain-0.33.0 → datachain-0.34.0}/tests/test_cli_studio.py +0 -0
  365. {datachain-0.33.0 → datachain-0.34.0}/tests/test_import_time.py +0 -0
  366. {datachain-0.33.0 → datachain-0.34.0}/tests/test_query_e2e.py +0 -0
  367. {datachain-0.33.0 → datachain-0.34.0}/tests/test_telemetry.py +0 -0
  368. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/__init__.py +0 -0
  369. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/__init__.py +0 -0
  370. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/conftest.py +0 -0
  371. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_arrow.py +0 -0
  372. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_audio.py +0 -0
  373. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_clip.py +0 -0
  374. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  375. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_datachain_merge.py +0 -0
  376. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_feature.py +0 -0
  377. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_feature_utils.py +0 -0
  378. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_file.py +0 -0
  379. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_hf.py +0 -0
  380. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_image.py +0 -0
  381. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_listing_info.py +0 -0
  382. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_namespace.py +0 -0
  383. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_partition_by.py +0 -0
  384. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_project.py +0 -0
  385. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_python_to_sql.py +0 -0
  386. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_schema.py +0 -0
  387. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_settings.py +0 -0
  388. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_sql_to_python.py +0 -0
  389. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_storage_pattern.py +0 -0
  390. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_text.py +0 -0
  391. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_udf.py +0 -0
  392. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_udf_signature.py +0 -0
  393. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_utils.py +0 -0
  394. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/lib/test_webdataset.py +0 -0
  395. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/model/__init__.py +0 -0
  396. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/model/test_bbox.py +0 -0
  397. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/model/test_pose.py +0 -0
  398. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/model/test_segment.py +0 -0
  399. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/model/test_utils.py +0 -0
  400. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/sql/__init__.py +0 -0
  401. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/sql/sqlite/__init__.py +0 -0
  402. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/sql/sqlite/test_types.py +0 -0
  403. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/sql/sqlite/test_utils.py +0 -0
  404. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/sql/test_array.py +0 -0
  405. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/sql/test_conditional.py +0 -0
  406. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/sql/test_path.py +0 -0
  407. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/sql/test_random.py +0 -0
  408. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/sql/test_selectable.py +0 -0
  409. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/sql/test_string.py +0 -0
  410. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_asyn.py +0 -0
  411. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_cache.py +0 -0
  412. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_catalog.py +0 -0
  413. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_catalog_loader.py +0 -0
  414. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_cli_datasets.py +0 -0
  415. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_cli_parsing.py +0 -0
  416. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_client.py +0 -0
  417. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_client_gcs.py +0 -0
  418. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_client_http.py +0 -0
  419. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_client_s3.py +0 -0
  420. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_config.py +0 -0
  421. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_data_storage.py +0 -0
  422. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_database_engine.py +0 -0
  423. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_dataset.py +0 -0
  424. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_dispatch.py +0 -0
  425. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_fileslice.py +0 -0
  426. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_func.py +0 -0
  427. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_listing.py +0 -0
  428. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_metastore.py +0 -0
  429. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_module_exports.py +0 -0
  430. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_pytorch.py +0 -0
  431. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_query_metrics.py +0 -0
  432. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_query_params.py +0 -0
  433. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_script_meta.py +0 -0
  434. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_semver.py +0 -0
  435. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_serializer.py +0 -0
  436. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_session.py +0 -0
  437. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_utils.py +0 -0
  438. {datachain-0.33.0 → datachain-0.34.0}/tests/unit/test_warehouse.py +0 -0
  439. {datachain-0.33.0 → datachain-0.34.0}/tests/utils.py +0 -0
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.13.1'
27
+ rev: 'v0.13.2'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.33.0
3
+ Version: 0.34.0
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -103,7 +103,7 @@ Requires-Dist: scipy; extra == "tests"
103
103
  Requires-Dist: ultralytics; extra == "tests"
104
104
  Provides-Extra: dev
105
105
  Requires-Dist: datachain[docs,tests]; extra == "dev"
106
- Requires-Dist: mypy==1.18.1; extra == "dev"
106
+ Requires-Dist: mypy==1.18.2; extra == "dev"
107
107
  Requires-Dist: types-python-dateutil; extra == "dev"
108
108
  Requires-Dist: types-dateparser; extra == "dev"
109
109
  Requires-Dist: types-pytz; extra == "dev"
@@ -120,7 +120,7 @@ tests = [
120
120
  ]
121
121
  dev = [
122
122
  "datachain[docs,tests]",
123
- "mypy==1.18.1",
123
+ "mypy==1.18.2",
124
124
  "types-python-dateutil",
125
125
  "types-dateparser",
126
126
  "types-pytz",
@@ -144,19 +144,26 @@ def shutdown_process(
144
144
  return proc.wait()
145
145
 
146
146
 
147
- def _process_stream(stream: "IO[bytes]", callback: Callable[[str], None]) -> None:
147
+ def process_output(stream: IO[bytes], callback: Callable[[str], None]) -> None:
148
148
  buffer = b""
149
- while byt := stream.read(1): # Read one byte at a time
150
- buffer += byt
151
149
 
152
- if byt in (b"\n", b"\r"): # Check for newline or carriage return
153
- line = buffer.decode("utf-8")
154
- callback(line)
155
- buffer = b"" # Clear buffer for next line
150
+ try:
151
+ while byt := stream.read(1): # Read one byte at a time
152
+ buffer += byt
156
153
 
157
- if buffer: # Handle any remaining data in the buffer
158
- line = buffer.decode("utf-8")
159
- callback(line)
154
+ if byt in (b"\n", b"\r"): # Check for newline or carriage return
155
+ line = buffer.decode("utf-8", errors="replace")
156
+ callback(line)
157
+ buffer = b"" # Clear buffer for the next line
158
+
159
+ if buffer: # Handle any remaining data in the buffer
160
+ line = buffer.decode("utf-8", errors="replace")
161
+ callback(line)
162
+ finally:
163
+ try:
164
+ stream.close() # Ensure output is closed
165
+ except Exception: # noqa: BLE001, S110
166
+ pass
160
167
 
161
168
 
162
169
  class DatasetRowsFetcher(NodesThreadPool):
@@ -1760,13 +1767,13 @@ class Catalog:
1760
1767
  recursive=recursive,
1761
1768
  )
1762
1769
 
1770
+ @staticmethod
1763
1771
  def query(
1764
- self,
1765
1772
  query_script: str,
1766
1773
  env: Optional[Mapping[str, str]] = None,
1767
1774
  python_executable: str = sys.executable,
1768
- capture_output: bool = False,
1769
- output_hook: Callable[[str], None] = noop,
1775
+ stdout_callback: Optional[Callable[[str], None]] = None,
1776
+ stderr_callback: Optional[Callable[[str], None]] = None,
1770
1777
  params: Optional[dict[str, str]] = None,
1771
1778
  job_id: Optional[str] = None,
1772
1779
  interrupt_timeout: Optional[int] = None,
@@ -1781,13 +1788,18 @@ class Catalog:
1781
1788
  },
1782
1789
  )
1783
1790
  popen_kwargs: dict[str, Any] = {}
1784
- if capture_output:
1785
- popen_kwargs = {"stdout": subprocess.PIPE, "stderr": subprocess.STDOUT}
1791
+
1792
+ if stdout_callback is not None:
1793
+ popen_kwargs = {"stdout": subprocess.PIPE}
1794
+ if stderr_callback is not None:
1795
+ popen_kwargs["stderr"] = subprocess.PIPE
1786
1796
 
1787
1797
  def raise_termination_signal(sig: int, _: Any) -> NoReturn:
1788
1798
  raise TerminationSignal(sig)
1789
1799
 
1790
- thread: Optional[Thread] = None
1800
+ stdout_thread: Optional[Thread] = None
1801
+ stderr_thread: Optional[Thread] = None
1802
+
1791
1803
  with subprocess.Popen(cmd, env=env, **popen_kwargs) as proc: # noqa: S603
1792
1804
  logger.info("Starting process %s", proc.pid)
1793
1805
 
@@ -1801,10 +1813,20 @@ class Catalog:
1801
1813
  orig_sigterm_handler = signal.getsignal(signal.SIGTERM)
1802
1814
  signal.signal(signal.SIGTERM, raise_termination_signal)
1803
1815
  try:
1804
- if capture_output:
1805
- args = (proc.stdout, output_hook)
1806
- thread = Thread(target=_process_stream, args=args, daemon=True)
1807
- thread.start()
1816
+ if stdout_callback is not None:
1817
+ stdout_thread = Thread(
1818
+ target=process_output,
1819
+ args=(proc.stdout, stdout_callback),
1820
+ daemon=True,
1821
+ )
1822
+ stdout_thread.start()
1823
+ if stderr_callback is not None:
1824
+ stderr_thread = Thread(
1825
+ target=process_output,
1826
+ args=(proc.stderr, stderr_callback),
1827
+ daemon=True,
1828
+ )
1829
+ stderr_thread.start()
1808
1830
 
1809
1831
  proc.wait()
1810
1832
  except TerminationSignal as exc:
@@ -1822,8 +1844,22 @@ class Catalog:
1822
1844
  finally:
1823
1845
  signal.signal(signal.SIGTERM, orig_sigterm_handler)
1824
1846
  signal.signal(signal.SIGINT, orig_sigint_handler)
1825
- if thread:
1826
- thread.join() # wait for the reader thread
1847
+ # wait for the reader thread
1848
+ thread_join_timeout_seconds = 30
1849
+ if stdout_thread is not None:
1850
+ stdout_thread.join(timeout=thread_join_timeout_seconds)
1851
+ if stdout_thread.is_alive():
1852
+ logger.warning(
1853
+ "stdout thread is still alive after %s seconds",
1854
+ thread_join_timeout_seconds,
1855
+ )
1856
+ if stderr_thread is not None:
1857
+ stderr_thread.join(timeout=thread_join_timeout_seconds)
1858
+ if stderr_thread.is_alive():
1859
+ logger.warning(
1860
+ "stderr thread is still alive after %s seconds",
1861
+ thread_join_timeout_seconds,
1862
+ )
1827
1863
 
1828
1864
  logger.info("Process %s exited with return code %s", proc.pid, proc.returncode)
1829
1865
  if proc.returncode in (
@@ -4,6 +4,7 @@ from enum import Enum
4
4
  class JobStatus(int, Enum):
5
5
  CREATED = 1
6
6
  SCHEDULED = 10
7
+ PROVISIONING = 12
7
8
  QUEUED = 2
8
9
  INIT = 3
9
10
  RUNNING = 4
@@ -21,6 +21,7 @@ from sqlalchemy import (
21
21
  Table,
22
22
  Text,
23
23
  UniqueConstraint,
24
+ desc,
24
25
  select,
25
26
  )
26
27
  from sqlalchemy.sql import func as f
@@ -399,6 +400,7 @@ class AbstractMetastore(ABC, Serializable):
399
400
  workers: int = 1,
400
401
  python_version: Optional[str] = None,
401
402
  params: Optional[dict[str, str]] = None,
403
+ parent_job_id: Optional[str] = None,
402
404
  ) -> str:
403
405
  """
404
406
  Creates a new job.
@@ -443,6 +445,10 @@ class AbstractMetastore(ABC, Serializable):
443
445
  def list_checkpoints(self, job_id: str, conn=None) -> Iterator["Checkpoint"]:
444
446
  """Returns all checkpoints related to some job"""
445
447
 
448
+ @abstractmethod
449
+ def get_last_checkpoint(self, job_id: str, conn=None) -> Optional[Checkpoint]:
450
+ """Get last created checkpoint for some job."""
451
+
446
452
  @abstractmethod
447
453
  def get_checkpoint_by_id(self, checkpoint_id: str, conn=None) -> Checkpoint:
448
454
  """Gets single checkpoint by id"""
@@ -1548,6 +1554,7 @@ class AbstractDBMetastore(AbstractMetastore):
1548
1554
  Column("error_stack", Text, nullable=False, default=""),
1549
1555
  Column("params", JSON, nullable=False),
1550
1556
  Column("metrics", JSON, nullable=False),
1557
+ Column("parent_job_id", Text, nullable=True),
1551
1558
  ]
1552
1559
 
1553
1560
  @cached_property
@@ -1595,6 +1602,7 @@ class AbstractDBMetastore(AbstractMetastore):
1595
1602
  workers: int = 1,
1596
1603
  python_version: Optional[str] = None,
1597
1604
  params: Optional[dict[str, str]] = None,
1605
+ parent_job_id: Optional[str] = None,
1598
1606
  conn: Optional[Any] = None,
1599
1607
  ) -> str:
1600
1608
  """
@@ -1616,6 +1624,7 @@ class AbstractDBMetastore(AbstractMetastore):
1616
1624
  error_stack="",
1617
1625
  params=json.dumps(params or {}),
1618
1626
  metrics=json.dumps({}),
1627
+ parent_job_id=parent_job_id,
1619
1628
  ),
1620
1629
  conn=conn,
1621
1630
  )
@@ -1770,7 +1779,7 @@ class AbstractDBMetastore(AbstractMetastore):
1770
1779
  )
1771
1780
  return self.get_checkpoint_by_id(checkpoint_id)
1772
1781
 
1773
- def list_checkpoints(self, job_id: str, conn=None) -> Iterator["Checkpoint"]:
1782
+ def list_checkpoints(self, job_id: str, conn=None) -> Iterator[Checkpoint]:
1774
1783
  """List checkpoints by job id."""
1775
1784
  query = self._checkpoints_query().where(self._checkpoints.c.job_id == job_id)
1776
1785
  rows = list(self.db.execute(query, conn=conn))
@@ -1800,3 +1809,15 @@ class AbstractDBMetastore(AbstractMetastore):
1800
1809
  if not rows:
1801
1810
  return None
1802
1811
  return self.checkpoint_class.parse(*rows[0])
1812
+
1813
+ def get_last_checkpoint(self, job_id: str, conn=None) -> Optional[Checkpoint]:
1814
+ query = (
1815
+ self._checkpoints_query()
1816
+ .where(self._checkpoints.c.job_id == job_id)
1817
+ .order_by(desc(self._checkpoints.c.created_at))
1818
+ .limit(1)
1819
+ )
1820
+ rows = list(self.db.execute(query, conn=conn))
1821
+ if not rows:
1822
+ return None
1823
+ return self.checkpoint_class.parse(*rows[0])
@@ -1,5 +1,3 @@
1
- import random
2
- import string
3
1
  from collections.abc import Sequence
4
2
  from enum import Enum
5
3
  from typing import TYPE_CHECKING, Optional, Union
@@ -11,16 +9,12 @@ from datachain.query.schema import Column
11
9
  if TYPE_CHECKING:
12
10
  from datachain.lib.dc import DataChain
13
11
 
14
-
15
12
  C = Column
16
13
 
17
14
 
18
- def get_status_col_name() -> str:
19
- """Returns new unique status col name"""
20
- return "diff_" + "".join(
21
- random.choice(string.ascii_letters) # noqa: S311
22
- for _ in range(10)
23
- )
15
+ STATUS_COL_NAME = "diff_7aeed3aa17ba4d50b8d1c368c76e16a6"
16
+ LEFT_DIFF_COL_NAME = "diff_95f95344064a4b819c8625cd1a5cfc2b"
17
+ RIGHT_DIFF_COL_NAME = "diff_5808838a49b54849aa461d7387376d34"
24
18
 
25
19
 
26
20
  class CompareStatus(str, Enum):
@@ -101,9 +95,9 @@ def _compare( # noqa: C901, PLR0912
101
95
  compare = right_compare = [c for c in cols if c in right_cols and c not in on] # type: ignore[misc]
102
96
 
103
97
  # get diff column names
104
- diff_col = status_col or get_status_col_name()
105
- ldiff_col = get_status_col_name()
106
- rdiff_col = get_status_col_name()
98
+ diff_col = status_col or STATUS_COL_NAME
99
+ ldiff_col = LEFT_DIFF_COL_NAME
100
+ rdiff_col = RIGHT_DIFF_COL_NAME
107
101
 
108
102
  # adding helper diff columns, which will be removed after
109
103
  left = left.mutate(**{ldiff_col: 1})
@@ -227,7 +221,7 @@ def compare_and_split(
227
221
  )
228
222
  ```
229
223
  """
230
- status_col = get_status_col_name()
224
+ status_col = STATUS_COL_NAME
231
225
 
232
226
  res = _compare(
233
227
  left,
@@ -101,3 +101,7 @@ class OutdatedDatabaseSchemaError(DataChainError):
101
101
 
102
102
  class CheckpointNotFoundError(NotFoundError):
103
103
  pass
104
+
105
+
106
+ class JobNotFoundError(NotFoundError):
107
+ pass
@@ -0,0 +1,147 @@
1
+ import hashlib
2
+ import inspect
3
+ import json
4
+ import textwrap
5
+ from collections.abc import Sequence
6
+ from typing import TypeVar, Union
7
+
8
+ from sqlalchemy.sql.elements import (
9
+ BinaryExpression,
10
+ BindParameter,
11
+ ColumnElement,
12
+ Label,
13
+ Over,
14
+ UnaryExpression,
15
+ )
16
+ from sqlalchemy.sql.functions import Function
17
+
18
+ T = TypeVar("T", bound=ColumnElement)
19
+ ColumnLike = Union[str, T]
20
+
21
+
22
+ def serialize_column_element(expr: Union[str, ColumnElement]) -> dict: # noqa: PLR0911
23
+ """
24
+ Recursively serialize a SQLAlchemy ColumnElement into a deterministic structure.
25
+ """
26
+
27
+ # Binary operations: col > 5, col1 + col2, etc.
28
+ if isinstance(expr, BinaryExpression):
29
+ op = (
30
+ expr.operator.__name__
31
+ if hasattr(expr.operator, "__name__")
32
+ else str(expr.operator)
33
+ )
34
+ return {
35
+ "type": "binary",
36
+ "op": op,
37
+ "left": serialize_column_element(expr.left),
38
+ "right": serialize_column_element(expr.right),
39
+ }
40
+
41
+ # Unary operations: -col, NOT col, etc.
42
+ if isinstance(expr, UnaryExpression):
43
+ op = (
44
+ expr.operator.__name__
45
+ if expr.operator is not None and hasattr(expr.operator, "__name__")
46
+ else str(expr.operator)
47
+ )
48
+
49
+ return {
50
+ "type": "unary",
51
+ "op": op,
52
+ "element": serialize_column_element(expr.element), # type: ignore[arg-type]
53
+ }
54
+
55
+ # Function calls: func.lower(col), func.count(col), etc.
56
+ if isinstance(expr, Function):
57
+ return {
58
+ "type": "function",
59
+ "name": expr.name,
60
+ "clauses": [serialize_column_element(c) for c in expr.clauses],
61
+ }
62
+
63
+ # Window functions: func.row_number().over(partition_by=..., order_by=...)
64
+ if isinstance(expr, Over):
65
+ return {
66
+ "type": "window",
67
+ "function": serialize_column_element(expr.element),
68
+ "partition_by": [
69
+ serialize_column_element(p) for p in getattr(expr, "partition_by", [])
70
+ ],
71
+ "order_by": [
72
+ serialize_column_element(o) for o in getattr(expr, "order_by", [])
73
+ ],
74
+ }
75
+
76
+ # Labeled expressions: col.label("alias")
77
+ if isinstance(expr, Label):
78
+ return {
79
+ "type": "label",
80
+ "name": expr.name,
81
+ "element": serialize_column_element(expr.element),
82
+ }
83
+
84
+ # Bound values (constants)
85
+ if isinstance(expr, BindParameter):
86
+ return {"type": "bind", "value": expr.value}
87
+
88
+ # Plain columns
89
+ if hasattr(expr, "name"):
90
+ return {"type": "column", "name": expr.name}
91
+
92
+ # Fallback: stringify unknown nodes
93
+ return {"type": "other", "repr": str(expr)}
94
+
95
+
96
+ def hash_column_elements(columns: Sequence[ColumnLike]) -> str:
97
+ """
98
+ Hash a list of ColumnElements deterministically, dialect agnostic.
99
+ Only accepts ordered iterables (like list or tuple).
100
+ """
101
+ serialized = [serialize_column_element(c) for c in columns]
102
+ json_str = json.dumps(serialized, sort_keys=True) # stable JSON
103
+ return hashlib.sha256(json_str.encode("utf-8")).hexdigest()
104
+
105
+
106
+ def hash_callable(func):
107
+ """
108
+ Calculate a hash from a callable.
109
+ Rules:
110
+ - Named functions (def) → use source code for stable, cross-version hashing
111
+ - Lambdas → use bytecode (deterministic in same Python runtime)
112
+ """
113
+ if not callable(func):
114
+ raise TypeError("Expected a callable")
115
+
116
+ # Determine if it is a lambda
117
+ is_lambda = func.__name__ == "<lambda>"
118
+
119
+ if not is_lambda:
120
+ # Try to get exact source of named function
121
+ try:
122
+ lines, _ = inspect.getsourcelines(func)
123
+ payload = textwrap.dedent("".join(lines)).strip()
124
+ except (OSError, TypeError):
125
+ # Fallback: bytecode if source not available
126
+ payload = func.__code__.co_code
127
+ else:
128
+ # For lambdas, fall back directly to bytecode
129
+ payload = func.__code__.co_code
130
+
131
+ # Normalize annotations
132
+ annotations = {
133
+ k: getattr(v, "__name__", str(v)) for k, v in func.__annotations__.items()
134
+ }
135
+
136
+ # Extras to distinguish functions with same code but different metadata
137
+ extras = {
138
+ "name": func.__name__,
139
+ "defaults": func.__defaults__,
140
+ "annotations": annotations,
141
+ }
142
+
143
+ # Compute SHA256
144
+ h = hashlib.sha256()
145
+ h.update(str(payload).encode() if isinstance(payload, str) else payload)
146
+ h.update(str(extras).encode())
147
+ return h.hexdigest()
@@ -22,6 +22,7 @@ class Job:
22
22
  python_version: Optional[str] = None
23
23
  error_message: str = ""
24
24
  error_stack: str = ""
25
+ parent_job_id: Optional[str] = None
25
26
 
26
27
  @classmethod
27
28
  def parse(
@@ -39,6 +40,7 @@ class Job:
39
40
  error_stack: str,
40
41
  params: str,
41
42
  metrics: str,
43
+ parent_job_id: Optional[str],
42
44
  ) -> "Job":
43
45
  return cls(
44
46
  str(id),
@@ -54,4 +56,5 @@ class Job:
54
56
  python_version,
55
57
  error_message,
56
58
  error_stack,
59
+ parent_job_id,
57
60
  )