datachain 0.37.0__tar.gz → 0.37.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (447) hide show
  1. {datachain-0.37.0 → datachain-0.37.2}/.pre-commit-config.yaml +1 -1
  2. {datachain-0.37.0 → datachain-0.37.2}/PKG-INFO +2 -2
  3. {datachain-0.37.0 → datachain-0.37.2}/pyproject.toml +1 -1
  4. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/delta.py +6 -35
  5. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/diff/__init__.py +3 -1
  6. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/datachain.py +2 -3
  7. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/datasets.py +4 -0
  8. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/storage.py +6 -9
  9. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/query/dataset.py +1 -1
  10. {datachain-0.37.0 → datachain-0.37.2}/src/datachain.egg-info/PKG-INFO +2 -2
  11. {datachain-0.37.0 → datachain-0.37.2}/src/datachain.egg-info/requires.txt +1 -1
  12. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_delta.py +118 -5
  13. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_checkpoints.py +8 -6
  14. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_datachain_merge.py +41 -3
  15. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_diff.py +19 -1
  16. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_datachain_hash.py +1 -1
  17. {datachain-0.37.0 → datachain-0.37.2}/.cruft.json +0 -0
  18. {datachain-0.37.0 → datachain-0.37.2}/.gitattributes +0 -0
  19. {datachain-0.37.0 → datachain-0.37.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  20. {datachain-0.37.0 → datachain-0.37.2}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  21. {datachain-0.37.0 → datachain-0.37.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  22. {datachain-0.37.0 → datachain-0.37.2}/.github/codecov.yaml +0 -0
  23. {datachain-0.37.0 → datachain-0.37.2}/.github/dependabot.yml +0 -0
  24. {datachain-0.37.0 → datachain-0.37.2}/.github/workflows/benchmarks.yml +0 -0
  25. {datachain-0.37.0 → datachain-0.37.2}/.github/workflows/release.yml +0 -0
  26. {datachain-0.37.0 → datachain-0.37.2}/.github/workflows/tests-studio.yml +0 -0
  27. {datachain-0.37.0 → datachain-0.37.2}/.github/workflows/tests.yml +0 -0
  28. {datachain-0.37.0 → datachain-0.37.2}/.github/workflows/update-template.yaml +0 -0
  29. {datachain-0.37.0 → datachain-0.37.2}/.gitignore +0 -0
  30. {datachain-0.37.0 → datachain-0.37.2}/CODE_OF_CONDUCT.rst +0 -0
  31. {datachain-0.37.0 → datachain-0.37.2}/LICENSE +0 -0
  32. {datachain-0.37.0 → datachain-0.37.2}/README.rst +0 -0
  33. {datachain-0.37.0 → datachain-0.37.2}/docs/api_hooks.py +0 -0
  34. {datachain-0.37.0 → datachain-0.37.2}/docs/assets/captioned_cartoons.png +0 -0
  35. {datachain-0.37.0 → datachain-0.37.2}/docs/assets/datachain-white.svg +0 -0
  36. {datachain-0.37.0 → datachain-0.37.2}/docs/assets/datachain.svg +0 -0
  37. {datachain-0.37.0 → datachain-0.37.2}/docs/assets/webhook_dialog.png +0 -0
  38. {datachain-0.37.0 → datachain-0.37.2}/docs/assets/webhook_list.png +0 -0
  39. {datachain-0.37.0 → datachain-0.37.2}/docs/commands/auth/login.md +0 -0
  40. {datachain-0.37.0 → datachain-0.37.2}/docs/commands/auth/logout.md +0 -0
  41. {datachain-0.37.0 → datachain-0.37.2}/docs/commands/auth/team.md +0 -0
  42. {datachain-0.37.0 → datachain-0.37.2}/docs/commands/auth/token.md +0 -0
  43. {datachain-0.37.0 → datachain-0.37.2}/docs/commands/index.md +0 -0
  44. {datachain-0.37.0 → datachain-0.37.2}/docs/commands/job/cancel.md +0 -0
  45. {datachain-0.37.0 → datachain-0.37.2}/docs/commands/job/clusters.md +0 -0
  46. {datachain-0.37.0 → datachain-0.37.2}/docs/commands/job/logs.md +0 -0
  47. {datachain-0.37.0 → datachain-0.37.2}/docs/commands/job/ls.md +0 -0
  48. {datachain-0.37.0 → datachain-0.37.2}/docs/commands/job/run.md +0 -0
  49. {datachain-0.37.0 → datachain-0.37.2}/docs/contributing.md +0 -0
  50. {datachain-0.37.0 → datachain-0.37.2}/docs/css/github-permalink-style.css +0 -0
  51. {datachain-0.37.0 → datachain-0.37.2}/docs/examples.md +0 -0
  52. {datachain-0.37.0 → datachain-0.37.2}/docs/guide/checkpoints.md +0 -0
  53. {datachain-0.37.0 → datachain-0.37.2}/docs/guide/db_migrations.md +0 -0
  54. {datachain-0.37.0 → datachain-0.37.2}/docs/guide/delta.md +0 -0
  55. {datachain-0.37.0 → datachain-0.37.2}/docs/guide/env.md +0 -0
  56. {datachain-0.37.0 → datachain-0.37.2}/docs/guide/index.md +0 -0
  57. {datachain-0.37.0 → datachain-0.37.2}/docs/guide/namespaces.md +0 -0
  58. {datachain-0.37.0 → datachain-0.37.2}/docs/guide/processing.md +0 -0
  59. {datachain-0.37.0 → datachain-0.37.2}/docs/guide/remotes.md +0 -0
  60. {datachain-0.37.0 → datachain-0.37.2}/docs/guide/retry.md +0 -0
  61. {datachain-0.37.0 → datachain-0.37.2}/docs/index.md +0 -0
  62. {datachain-0.37.0 → datachain-0.37.2}/docs/overrides/main.html +0 -0
  63. {datachain-0.37.0 → datachain-0.37.2}/docs/quick-start.md +0 -0
  64. {datachain-0.37.0 → datachain-0.37.2}/docs/references/data-types/arrowrow.md +0 -0
  65. {datachain-0.37.0 → datachain-0.37.2}/docs/references/data-types/bbox.md +0 -0
  66. {datachain-0.37.0 → datachain-0.37.2}/docs/references/data-types/file.md +0 -0
  67. {datachain-0.37.0 → datachain-0.37.2}/docs/references/data-types/imagefile.md +0 -0
  68. {datachain-0.37.0 → datachain-0.37.2}/docs/references/data-types/index.md +0 -0
  69. {datachain-0.37.0 → datachain-0.37.2}/docs/references/data-types/pose.md +0 -0
  70. {datachain-0.37.0 → datachain-0.37.2}/docs/references/data-types/segment.md +0 -0
  71. {datachain-0.37.0 → datachain-0.37.2}/docs/references/data-types/tarvfile.md +0 -0
  72. {datachain-0.37.0 → datachain-0.37.2}/docs/references/data-types/textfile.md +0 -0
  73. {datachain-0.37.0 → datachain-0.37.2}/docs/references/data-types/videofile.md +0 -0
  74. {datachain-0.37.0 → datachain-0.37.2}/docs/references/datachain.md +0 -0
  75. {datachain-0.37.0 → datachain-0.37.2}/docs/references/func.md +0 -0
  76. {datachain-0.37.0 → datachain-0.37.2}/docs/references/functions/aggregate.md +0 -0
  77. {datachain-0.37.0 → datachain-0.37.2}/docs/references/functions/array.md +0 -0
  78. {datachain-0.37.0 → datachain-0.37.2}/docs/references/functions/conditional.md +0 -0
  79. {datachain-0.37.0 → datachain-0.37.2}/docs/references/functions/numeric.md +0 -0
  80. {datachain-0.37.0 → datachain-0.37.2}/docs/references/functions/path.md +0 -0
  81. {datachain-0.37.0 → datachain-0.37.2}/docs/references/functions/random.md +0 -0
  82. {datachain-0.37.0 → datachain-0.37.2}/docs/references/functions/string.md +0 -0
  83. {datachain-0.37.0 → datachain-0.37.2}/docs/references/functions/window.md +0 -0
  84. {datachain-0.37.0 → datachain-0.37.2}/docs/references/index.md +0 -0
  85. {datachain-0.37.0 → datachain-0.37.2}/docs/references/toolkit.md +0 -0
  86. {datachain-0.37.0 → datachain-0.37.2}/docs/references/torch.md +0 -0
  87. {datachain-0.37.0 → datachain-0.37.2}/docs/references/udf.md +0 -0
  88. {datachain-0.37.0 → datachain-0.37.2}/docs/studio/api/.gitkeep +0 -0
  89. {datachain-0.37.0 → datachain-0.37.2}/docs/studio/webhooks.md +0 -0
  90. {datachain-0.37.0 → datachain-0.37.2}/docs/templates/main.dot +0 -0
  91. {datachain-0.37.0 → datachain-0.37.2}/docs/templates/operation.dot +0 -0
  92. {datachain-0.37.0 → datachain-0.37.2}/docs/templates/responses.def +0 -0
  93. {datachain-0.37.0 → datachain-0.37.2}/docs/tutorials.md +0 -0
  94. {datachain-0.37.0 → datachain-0.37.2}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  95. {datachain-0.37.0 → datachain-0.37.2}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  96. {datachain-0.37.0 → datachain-0.37.2}/examples/computer_vision/openimage-detect.py +0 -0
  97. {datachain-0.37.0 → datachain-0.37.2}/examples/computer_vision/ultralytics-bbox.py +0 -0
  98. {datachain-0.37.0 → datachain-0.37.2}/examples/computer_vision/ultralytics-pose.py +0 -0
  99. {datachain-0.37.0 → datachain-0.37.2}/examples/computer_vision/ultralytics-segment.py +0 -0
  100. {datachain-0.37.0 → datachain-0.37.2}/examples/get_started/common_sql_functions.py +0 -0
  101. {datachain-0.37.0 → datachain-0.37.2}/examples/get_started/json-csv-reader.py +0 -0
  102. {datachain-0.37.0 → datachain-0.37.2}/examples/get_started/nested_datamodel.py +0 -0
  103. {datachain-0.37.0 → datachain-0.37.2}/examples/get_started/torch-loader.py +0 -0
  104. {datachain-0.37.0 → datachain-0.37.2}/examples/get_started/udfs/parallel.py +0 -0
  105. {datachain-0.37.0 → datachain-0.37.2}/examples/get_started/udfs/simple.py +0 -0
  106. {datachain-0.37.0 → datachain-0.37.2}/examples/get_started/udfs/stateful.py +0 -0
  107. {datachain-0.37.0 → datachain-0.37.2}/examples/incremental_processing/delta.py +0 -0
  108. {datachain-0.37.0 → datachain-0.37.2}/examples/incremental_processing/retry.py +0 -0
  109. {datachain-0.37.0 → datachain-0.37.2}/examples/incremental_processing/utils.py +0 -0
  110. {datachain-0.37.0 → datachain-0.37.2}/examples/llm_and_nlp/claude-query.py +0 -0
  111. {datachain-0.37.0 → datachain-0.37.2}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  112. {datachain-0.37.0 → datachain-0.37.2}/examples/multimodal/audio-to-text.py +0 -0
  113. {datachain-0.37.0 → datachain-0.37.2}/examples/multimodal/clip_inference.py +0 -0
  114. {datachain-0.37.0 → datachain-0.37.2}/examples/multimodal/hf_pipeline.py +0 -0
  115. {datachain-0.37.0 → datachain-0.37.2}/examples/multimodal/openai_image_desc_lib.py +0 -0
  116. {datachain-0.37.0 → datachain-0.37.2}/examples/multimodal/wds.py +0 -0
  117. {datachain-0.37.0 → datachain-0.37.2}/examples/multimodal/wds_filtered.py +0 -0
  118. {datachain-0.37.0 → datachain-0.37.2}/mkdocs.yml +0 -0
  119. {datachain-0.37.0 → datachain-0.37.2}/noxfile.py +0 -0
  120. {datachain-0.37.0 → datachain-0.37.2}/setup.cfg +0 -0
  121. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/__init__.py +0 -0
  122. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/__main__.py +0 -0
  123. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/asyn.py +0 -0
  124. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cache.py +0 -0
  125. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/catalog/__init__.py +0 -0
  126. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/catalog/catalog.py +0 -0
  127. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/catalog/datasource.py +0 -0
  128. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/catalog/dependency.py +0 -0
  129. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/catalog/loader.py +0 -0
  130. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/checkpoint.py +0 -0
  131. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/__init__.py +0 -0
  132. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/commands/__init__.py +0 -0
  133. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/commands/datasets.py +0 -0
  134. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/commands/du.py +0 -0
  135. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/commands/index.py +0 -0
  136. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/commands/ls.py +0 -0
  137. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/commands/misc.py +0 -0
  138. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/commands/query.py +0 -0
  139. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/commands/show.py +0 -0
  140. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/parser/__init__.py +0 -0
  141. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/parser/job.py +0 -0
  142. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/parser/studio.py +0 -0
  143. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/parser/utils.py +0 -0
  144. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/utils.py +0 -0
  145. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/client/__init__.py +0 -0
  146. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/client/azure.py +0 -0
  147. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/client/fileslice.py +0 -0
  148. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/client/fsspec.py +0 -0
  149. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/client/gcs.py +0 -0
  150. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/client/hf.py +0 -0
  151. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/client/http.py +0 -0
  152. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/client/local.py +0 -0
  153. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/client/s3.py +0 -0
  154. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/config.py +0 -0
  155. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/data_storage/__init__.py +0 -0
  156. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/data_storage/db_engine.py +0 -0
  157. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/data_storage/job.py +0 -0
  158. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/data_storage/metastore.py +0 -0
  159. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/data_storage/schema.py +0 -0
  160. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/data_storage/serializer.py +0 -0
  161. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/data_storage/sqlite.py +0 -0
  162. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/data_storage/warehouse.py +0 -0
  163. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/dataset.py +0 -0
  164. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/error.py +0 -0
  165. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/fs/__init__.py +0 -0
  166. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/fs/reference.py +0 -0
  167. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/fs/utils.py +0 -0
  168. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/func/__init__.py +0 -0
  169. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/func/aggregate.py +0 -0
  170. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/func/array.py +0 -0
  171. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/func/base.py +0 -0
  172. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/func/conditional.py +0 -0
  173. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/func/func.py +0 -0
  174. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/func/numeric.py +0 -0
  175. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/func/path.py +0 -0
  176. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/func/random.py +0 -0
  177. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/func/string.py +0 -0
  178. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/func/window.py +0 -0
  179. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/hash_utils.py +0 -0
  180. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/job.py +0 -0
  181. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/__init__.py +0 -0
  182. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/arrow.py +0 -0
  183. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/audio.py +0 -0
  184. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/clip.py +0 -0
  185. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/convert/__init__.py +0 -0
  186. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/convert/flatten.py +0 -0
  187. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/convert/python_to_sql.py +0 -0
  188. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/convert/sql_to_python.py +0 -0
  189. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/convert/unflatten.py +0 -0
  190. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  191. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/data_model.py +0 -0
  192. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dataset_info.py +0 -0
  193. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/__init__.py +0 -0
  194. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/csv.py +0 -0
  195. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/database.py +0 -0
  196. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/hf.py +0 -0
  197. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/json.py +0 -0
  198. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/listings.py +0 -0
  199. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/pandas.py +0 -0
  200. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/parquet.py +0 -0
  201. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/records.py +0 -0
  202. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/storage_pattern.py +0 -0
  203. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/utils.py +0 -0
  204. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/values.py +0 -0
  205. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/file.py +0 -0
  206. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/hf.py +0 -0
  207. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/image.py +0 -0
  208. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/listing.py +0 -0
  209. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/listing_info.py +0 -0
  210. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/meta_formats.py +0 -0
  211. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/model_store.py +0 -0
  212. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/namespaces.py +0 -0
  213. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/projects.py +0 -0
  214. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/pytorch.py +0 -0
  215. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/settings.py +0 -0
  216. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/signal_schema.py +0 -0
  217. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/tar.py +0 -0
  218. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/text.py +0 -0
  219. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/udf.py +0 -0
  220. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/udf_signature.py +0 -0
  221. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/utils.py +0 -0
  222. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/video.py +0 -0
  223. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/webdataset.py +0 -0
  224. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/webdataset_laion.py +0 -0
  225. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/listing.py +0 -0
  226. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/model/__init__.py +0 -0
  227. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/model/bbox.py +0 -0
  228. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/model/pose.py +0 -0
  229. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/model/segment.py +0 -0
  230. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/model/ultralytics/__init__.py +0 -0
  231. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/model/ultralytics/bbox.py +0 -0
  232. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/model/ultralytics/pose.py +0 -0
  233. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/model/ultralytics/segment.py +0 -0
  234. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/model/utils.py +0 -0
  235. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/namespace.py +0 -0
  236. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/node.py +0 -0
  237. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/nodes_fetcher.py +0 -0
  238. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/nodes_thread_pool.py +0 -0
  239. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/plugins.py +0 -0
  240. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/progress.py +0 -0
  241. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/project.py +0 -0
  242. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/py.typed +0 -0
  243. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/query/__init__.py +0 -0
  244. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/query/batch.py +0 -0
  245. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/query/dispatch.py +0 -0
  246. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/query/metrics.py +0 -0
  247. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/query/params.py +0 -0
  248. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/query/queue.py +0 -0
  249. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/query/schema.py +0 -0
  250. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/query/session.py +0 -0
  251. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/query/udf.py +0 -0
  252. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/remote/__init__.py +0 -0
  253. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/remote/studio.py +0 -0
  254. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/script_meta.py +0 -0
  255. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/semver.py +0 -0
  256. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/__init__.py +0 -0
  257. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/default/__init__.py +0 -0
  258. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/default/base.py +0 -0
  259. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/functions/__init__.py +0 -0
  260. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/functions/aggregate.py +0 -0
  261. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/functions/array.py +0 -0
  262. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/functions/conditional.py +0 -0
  263. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/functions/numeric.py +0 -0
  264. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/functions/path.py +0 -0
  265. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/functions/random.py +0 -0
  266. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/functions/string.py +0 -0
  267. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/postgresql_dialect.py +0 -0
  268. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/postgresql_types.py +0 -0
  269. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/selectable.py +0 -0
  270. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/sqlite/__init__.py +0 -0
  271. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/sqlite/base.py +0 -0
  272. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/sqlite/types.py +0 -0
  273. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/sqlite/vector.py +0 -0
  274. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/types.py +0 -0
  275. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/utils.py +0 -0
  276. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/studio.py +0 -0
  277. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/telemetry.py +0 -0
  278. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/toolkit/__init__.py +0 -0
  279. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/toolkit/split.py +0 -0
  280. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/torch/__init__.py +0 -0
  281. {datachain-0.37.0 → datachain-0.37.2}/src/datachain/utils.py +0 -0
  282. {datachain-0.37.0 → datachain-0.37.2}/src/datachain.egg-info/SOURCES.txt +0 -0
  283. {datachain-0.37.0 → datachain-0.37.2}/src/datachain.egg-info/dependency_links.txt +0 -0
  284. {datachain-0.37.0 → datachain-0.37.2}/src/datachain.egg-info/entry_points.txt +0 -0
  285. {datachain-0.37.0 → datachain-0.37.2}/src/datachain.egg-info/top_level.txt +0 -0
  286. {datachain-0.37.0 → datachain-0.37.2}/tests/__init__.py +0 -0
  287. {datachain-0.37.0 → datachain-0.37.2}/tests/benchmarks/__init__.py +0 -0
  288. {datachain-0.37.0 → datachain-0.37.2}/tests/benchmarks/conftest.py +0 -0
  289. {datachain-0.37.0 → datachain-0.37.2}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  290. {datachain-0.37.0 → datachain-0.37.2}/tests/benchmarks/datasets/.dvc/config +0 -0
  291. {datachain-0.37.0 → datachain-0.37.2}/tests/benchmarks/datasets/.gitignore +0 -0
  292. {datachain-0.37.0 → datachain-0.37.2}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  293. {datachain-0.37.0 → datachain-0.37.2}/tests/benchmarks/test_datachain.py +0 -0
  294. {datachain-0.37.0 → datachain-0.37.2}/tests/benchmarks/test_ls.py +0 -0
  295. {datachain-0.37.0 → datachain-0.37.2}/tests/benchmarks/test_version.py +0 -0
  296. {datachain-0.37.0 → datachain-0.37.2}/tests/conftest.py +0 -0
  297. {datachain-0.37.0 → datachain-0.37.2}/tests/data.py +0 -0
  298. {datachain-0.37.0 → datachain-0.37.2}/tests/examples/__init__.py +0 -0
  299. {datachain-0.37.0 → datachain-0.37.2}/tests/examples/test_examples.py +0 -0
  300. {datachain-0.37.0 → datachain-0.37.2}/tests/examples/test_wds_e2e.py +0 -0
  301. {datachain-0.37.0 → datachain-0.37.2}/tests/examples/wds_data.py +0 -0
  302. {datachain-0.37.0 → datachain-0.37.2}/tests/func/__init__.py +0 -0
  303. {datachain-0.37.0 → datachain-0.37.2}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  304. {datachain-0.37.0 → datachain-0.37.2}/tests/func/data/lena.jpg +0 -0
  305. {datachain-0.37.0 → datachain-0.37.2}/tests/func/fake-service-account-credentials.json +0 -0
  306. {datachain-0.37.0 → datachain-0.37.2}/tests/func/functions/__init__.py +0 -0
  307. {datachain-0.37.0 → datachain-0.37.2}/tests/func/functions/test_aggregate.py +0 -0
  308. {datachain-0.37.0 → datachain-0.37.2}/tests/func/functions/test_array.py +0 -0
  309. {datachain-0.37.0 → datachain-0.37.2}/tests/func/functions/test_conditional.py +0 -0
  310. {datachain-0.37.0 → datachain-0.37.2}/tests/func/functions/test_numeric.py +0 -0
  311. {datachain-0.37.0 → datachain-0.37.2}/tests/func/functions/test_path.py +0 -0
  312. {datachain-0.37.0 → datachain-0.37.2}/tests/func/functions/test_random.py +0 -0
  313. {datachain-0.37.0 → datachain-0.37.2}/tests/func/functions/test_string.py +0 -0
  314. {datachain-0.37.0 → datachain-0.37.2}/tests/func/model/__init__.py +0 -0
  315. {datachain-0.37.0 → datachain-0.37.2}/tests/func/model/data/running-mask0.png +0 -0
  316. {datachain-0.37.0 → datachain-0.37.2}/tests/func/model/data/running-mask1.png +0 -0
  317. {datachain-0.37.0 → datachain-0.37.2}/tests/func/model/data/running.jpg +0 -0
  318. {datachain-0.37.0 → datachain-0.37.2}/tests/func/model/data/ships.jpg +0 -0
  319. {datachain-0.37.0 → datachain-0.37.2}/tests/func/model/test_yolo.py +0 -0
  320. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_audio.py +0 -0
  321. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_catalog.py +0 -0
  322. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_checkpoints.py +0 -0
  323. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_client.py +0 -0
  324. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_cloud_transfer.py +0 -0
  325. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_data_storage.py +0 -0
  326. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_datachain.py +0 -0
  327. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_datachain_merge.py +0 -0
  328. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_dataset_query.py +0 -0
  329. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_datasets.py +0 -0
  330. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_feature_pickling.py +0 -0
  331. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_file.py +0 -0
  332. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_hf.py +0 -0
  333. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_hidden_field.py +0 -0
  334. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_image.py +0 -0
  335. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_listing.py +0 -0
  336. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_ls.py +0 -0
  337. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_meta_formats.py +0 -0
  338. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_metastore.py +0 -0
  339. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_metrics.py +0 -0
  340. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_mutate.py +0 -0
  341. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_pull.py +0 -0
  342. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_pytorch.py +0 -0
  343. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_query.py +0 -0
  344. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_read_database.py +0 -0
  345. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_read_dataset_remote.py +0 -0
  346. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  347. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_retry.py +0 -0
  348. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_session.py +0 -0
  349. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_storage_pattern.py +0 -0
  350. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_studio_datetime_parsing.py +0 -0
  351. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_temp_table_tracking.py +0 -0
  352. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_to_database.py +0 -0
  353. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_toolkit.py +0 -0
  354. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_udf.py +0 -0
  355. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_union.py +0 -0
  356. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_video.py +0 -0
  357. {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_warehouse.py +0 -0
  358. {datachain-0.37.0 → datachain-0.37.2}/tests/scripts/feature_class.py +0 -0
  359. {datachain-0.37.0 → datachain-0.37.2}/tests/scripts/feature_class_exception.py +0 -0
  360. {datachain-0.37.0 → datachain-0.37.2}/tests/scripts/feature_class_parallel.py +0 -0
  361. {datachain-0.37.0 → datachain-0.37.2}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  362. {datachain-0.37.0 → datachain-0.37.2}/tests/scripts/name_len_slow.py +0 -0
  363. {datachain-0.37.0 → datachain-0.37.2}/tests/test_atomicity.py +0 -0
  364. {datachain-0.37.0 → datachain-0.37.2}/tests/test_cli_e2e.py +0 -0
  365. {datachain-0.37.0 → datachain-0.37.2}/tests/test_cli_studio.py +0 -0
  366. {datachain-0.37.0 → datachain-0.37.2}/tests/test_import_time.py +0 -0
  367. {datachain-0.37.0 → datachain-0.37.2}/tests/test_job_management_e2e.py +0 -0
  368. {datachain-0.37.0 → datachain-0.37.2}/tests/test_query_e2e.py +0 -0
  369. {datachain-0.37.0 → datachain-0.37.2}/tests/test_telemetry.py +0 -0
  370. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/__init__.py +0 -0
  371. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/__init__.py +0 -0
  372. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/conftest.py +0 -0
  373. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_arrow.py +0 -0
  374. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_audio.py +0 -0
  375. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_clip.py +0 -0
  376. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_datachain.py +0 -0
  377. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  378. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_feature.py +0 -0
  379. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_feature_utils.py +0 -0
  380. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_file.py +0 -0
  381. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_hf.py +0 -0
  382. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_image.py +0 -0
  383. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_listing_info.py +0 -0
  384. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_namespace.py +0 -0
  385. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_partition_by.py +0 -0
  386. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_project.py +0 -0
  387. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_python_to_sql.py +0 -0
  388. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_schema.py +0 -0
  389. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_settings.py +0 -0
  390. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_signal_schema.py +0 -0
  391. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_sql_to_python.py +0 -0
  392. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_storage_pattern.py +0 -0
  393. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_text.py +0 -0
  394. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_udf.py +0 -0
  395. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_udf_signature.py +0 -0
  396. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_utils.py +0 -0
  397. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_webdataset.py +0 -0
  398. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/model/__init__.py +0 -0
  399. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/model/test_bbox.py +0 -0
  400. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/model/test_pose.py +0 -0
  401. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/model/test_segment.py +0 -0
  402. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/model/test_utils.py +0 -0
  403. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/sql/__init__.py +0 -0
  404. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/sql/sqlite/__init__.py +0 -0
  405. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/sql/sqlite/test_types.py +0 -0
  406. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/sql/sqlite/test_utils.py +0 -0
  407. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/sql/test_array.py +0 -0
  408. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/sql/test_conditional.py +0 -0
  409. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/sql/test_path.py +0 -0
  410. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/sql/test_random.py +0 -0
  411. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/sql/test_selectable.py +0 -0
  412. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/sql/test_string.py +0 -0
  413. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_asyn.py +0 -0
  414. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_batching.py +0 -0
  415. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_cache.py +0 -0
  416. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_catalog.py +0 -0
  417. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_catalog_loader.py +0 -0
  418. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_cli_datasets.py +0 -0
  419. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_cli_parsing.py +0 -0
  420. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_client.py +0 -0
  421. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_client_gcs.py +0 -0
  422. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_client_http.py +0 -0
  423. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_client_s3.py +0 -0
  424. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_config.py +0 -0
  425. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_data_storage.py +0 -0
  426. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_database_engine.py +0 -0
  427. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_dataset.py +0 -0
  428. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_dispatch.py +0 -0
  429. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_fileslice.py +0 -0
  430. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_func.py +0 -0
  431. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_hash_utils.py +0 -0
  432. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_job_management.py +0 -0
  433. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_listing.py +0 -0
  434. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_metastore.py +0 -0
  435. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_module_exports.py +0 -0
  436. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_pytorch.py +0 -0
  437. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_query.py +0 -0
  438. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_query_metrics.py +0 -0
  439. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_query_params.py +0 -0
  440. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_query_steps_hash.py +0 -0
  441. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_script_meta.py +0 -0
  442. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_semver.py +0 -0
  443. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_serializer.py +0 -0
  444. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_session.py +0 -0
  445. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_utils.py +0 -0
  446. {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_warehouse.py +0 -0
  447. {datachain-0.37.0 → datachain-0.37.2}/tests/utils.py +0 -0
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.14.0'
27
+ rev: 'v0.14.1'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.37.0
3
+ Version: 0.37.2
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -41,7 +41,7 @@ Requires-Dist: cloudpickle
41
41
  Requires-Dist: pydantic
42
42
  Requires-Dist: jmespath>=1.0
43
43
  Requires-Dist: datamodel-code-generator>=0.25
44
- Requires-Dist: Pillow<12,>=10.0.0
44
+ Requires-Dist: Pillow<13,>=10.0.0
45
45
  Requires-Dist: msgpack<2,>=1.0.4
46
46
  Requires-Dist: psutil
47
47
  Requires-Dist: huggingface_hub
@@ -45,7 +45,7 @@ dependencies = [
45
45
  "pydantic",
46
46
  "jmespath>=1.0",
47
47
  "datamodel-code-generator>=0.25",
48
- "Pillow>=10.0.0,<12",
48
+ "Pillow>=10.0.0,<13",
49
49
  "msgpack>=1.0.4,<2",
50
50
  "psutil",
51
51
  "huggingface_hub",
@@ -1,16 +1,12 @@
1
- import hashlib
2
1
  from collections.abc import Sequence
3
2
  from copy import copy
4
3
  from functools import wraps
5
4
  from typing import TYPE_CHECKING, TypeVar
6
5
 
7
- from attrs import frozen
8
-
9
6
  import datachain
10
7
  from datachain.dataset import DatasetDependency, DatasetRecord
11
8
  from datachain.error import DatasetNotFoundError
12
9
  from datachain.project import Project
13
- from datachain.query.dataset import Step, step_result
14
10
 
15
11
  if TYPE_CHECKING:
16
12
  from collections.abc import Callable
@@ -18,9 +14,7 @@ if TYPE_CHECKING:
18
14
 
19
15
  from typing_extensions import ParamSpec
20
16
 
21
- from datachain.catalog import Catalog
22
17
  from datachain.lib.dc import DataChain
23
- from datachain.query.dataset import QueryGenerator
24
18
 
25
19
  P = ParamSpec("P")
26
20
 
@@ -49,38 +43,11 @@ def delta_disabled(
49
43
  return _inner
50
44
 
51
45
 
52
- @frozen
53
- class _RegenerateSystemColumnsStep(Step):
54
- catalog: "Catalog"
55
-
56
- def hash_inputs(self) -> str:
57
- return hashlib.sha256(b"regenerate_sys_columns").hexdigest()
58
-
59
- def apply(self, query_generator: "QueryGenerator", temp_tables: list[str]):
60
- selectable = query_generator.select()
61
- regenerated = self.catalog.warehouse._regenerate_system_columns(
62
- selectable,
63
- keep_existing_columns=True,
64
- regenerate_columns=None,
65
- )
66
-
67
- def q(*columns):
68
- return regenerated.with_only_columns(*columns)
69
-
70
- return step_result(q, regenerated.selected_columns)
71
-
72
-
73
46
  def _append_steps(dc: "DataChain", other: "DataChain"):
74
47
  """Returns cloned chain with appended steps from other chain.
75
48
  Steps are all those modification methods applied like filters, mappers etc.
76
49
  """
77
50
  dc = dc.clone()
78
- dc._query.steps.append(
79
- _RegenerateSystemColumnsStep(
80
- catalog=dc.session.catalog,
81
- )
82
- )
83
-
84
51
  dc._query.steps += other._query.steps.copy()
85
52
  dc.signals_schema = other.signals_schema
86
53
  return dc
@@ -150,7 +117,9 @@ def _get_retry_chain(
150
117
  error_records = result_dataset.filter(C(delta_retry) != "")
151
118
  error_source_records = source_dc.merge(
152
119
  error_records, on=on, right_on=right_on, inner=True
153
- ).select(*list(source_dc.signals_schema.values))
120
+ ).select(
121
+ *list(source_dc.signals_schema.clone_without_sys_signals().values.keys())
122
+ )
154
123
  retry_chain = error_source_records
155
124
 
156
125
  # Handle missing records if delta_retry is True
@@ -200,7 +169,9 @@ def _get_source_info(
200
169
  indirect=False,
201
170
  )
202
171
 
203
- source_ds_dep = next((d for d in dependencies if d.name == source_ds.name), None)
172
+ source_ds_dep = next(
173
+ (d for d in dependencies if d and d.name == source_ds.name), None
174
+ )
204
175
  if not source_ds_dep:
205
176
  # Starting dataset was removed, back off to normal dataset creation
206
177
  return None, None, None, None, None
@@ -103,8 +103,10 @@ def _compare( # noqa: C901
103
103
  left = left.mutate(**{ldiff_col: 1})
104
104
  right = right.mutate(**{rdiff_col: 1})
105
105
 
106
- if not compare:
106
+ if compare is None:
107
107
  modified_cond = True
108
+ elif len(compare) == 0:
109
+ modified_cond = False
108
110
  else:
109
111
  modified_cond = or_( # type: ignore[assignment]
110
112
  *[
@@ -1697,14 +1697,13 @@ class DataChain:
1697
1697
  query.feature_schema = None
1698
1698
  ds = self._evolve(query=query)
1699
1699
 
1700
+ # Note: merge drops sys signals from both sides, make sure to not include it
1701
+ # in the resulting schema
1700
1702
  signals_schema = self.signals_schema.clone_without_sys_signals()
1701
1703
  right_signals_schema = right_ds.signals_schema.clone_without_sys_signals()
1702
1704
 
1703
1705
  ds.signals_schema = signals_schema.merge(right_signals_schema, rname)
1704
1706
 
1705
- if not full:
1706
- ds.signals_schema = SignalSchema({"sys": Sys}) | ds.signals_schema
1707
-
1708
1707
  return ds
1709
1708
 
1710
1709
  @delta_disabled
@@ -200,6 +200,10 @@ def read_dataset(
200
200
  signals_schema |= SignalSchema.deserialize(query.feature_schema)
201
201
  else:
202
202
  signals_schema |= SignalSchema.from_column_types(query.column_types or {})
203
+
204
+ if delta:
205
+ signals_schema = signals_schema.clone_without_sys_signals()
206
+
203
207
  chain = DataChain(query, _settings, signals_schema)
204
208
 
205
209
  if delta:
@@ -187,6 +187,12 @@ def read_storage(
187
187
  project=listing_project_name,
188
188
  session=session,
189
189
  settings=settings,
190
+ delta=delta,
191
+ delta_on=delta_on,
192
+ delta_result_on=delta_result_on,
193
+ delta_compare=delta_compare,
194
+ delta_retry=delta_retry,
195
+ delta_unsafe=delta_unsafe,
190
196
  )
191
197
  dc._query.update = update
192
198
  dc.signals_schema = dc.signals_schema.mutate({f"{column}": file_type})
@@ -252,13 +258,4 @@ def read_storage(
252
258
 
253
259
  assert storage_chain is not None
254
260
 
255
- if delta:
256
- storage_chain = storage_chain._as_delta(
257
- on=delta_on,
258
- right_on=delta_result_on,
259
- compare=delta_compare,
260
- delta_retry=delta_retry,
261
- delta_unsafe=delta_unsafe,
262
- )
263
-
264
261
  return storage_chain
@@ -1065,7 +1065,7 @@ class SQLJoin(Step):
1065
1065
  q1 = self.get_query(self.query1, temp_tables)
1066
1066
  q2 = self.get_query(self.query2, temp_tables)
1067
1067
 
1068
- q1_columns = _drop_system_columns(q1.c) if self.full else list(q1.c)
1068
+ q1_columns = _drop_system_columns(q1.c)
1069
1069
  q1_column_names = {c.name for c in q1_columns}
1070
1070
 
1071
1071
  q2_columns = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.37.0
3
+ Version: 0.37.2
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -41,7 +41,7 @@ Requires-Dist: cloudpickle
41
41
  Requires-Dist: pydantic
42
42
  Requires-Dist: jmespath>=1.0
43
43
  Requires-Dist: datamodel-code-generator>=0.25
44
- Requires-Dist: Pillow<12,>=10.0.0
44
+ Requires-Dist: Pillow<13,>=10.0.0
45
45
  Requires-Dist: msgpack<2,>=1.0.4
46
46
  Requires-Dist: psutil
47
47
  Requires-Dist: huggingface_hub
@@ -23,7 +23,7 @@ cloudpickle
23
23
  pydantic
24
24
  jmespath>=1.0
25
25
  datamodel-code-generator>=0.25
26
- Pillow<12,>=10.0.0
26
+ Pillow<13,>=10.0.0
27
27
  msgpack<2,>=1.0.4
28
28
  psutil
29
29
  huggingface_hub
@@ -98,6 +98,70 @@ def test_delta_update_from_dataset(test_session, tmp_dir, tmp_path):
98
98
  create_delta_dataset(ds_name)
99
99
 
100
100
 
101
+ def test_delta_falls_back_when_dependency_missing(test_session):
102
+ catalog = test_session.catalog
103
+
104
+ source_ds = "delta_removed_dep_source"
105
+ delta_ds = "delta_removed_dep_result"
106
+ process_log: list[int] = []
107
+
108
+ def record_processing(id: int) -> int:
109
+ process_log.append(id)
110
+ return id
111
+
112
+ # Create first source dataset and initial delta version that depends on it
113
+ dc.read_values(id=[1, 2], session=test_session).save(source_ds)
114
+ dc.read_dataset(
115
+ source_ds,
116
+ session=test_session,
117
+ delta=True,
118
+ delta_on="id",
119
+ ).map(processed_id=record_processing).save(delta_ds)
120
+
121
+ assert _get_dependencies(catalog, delta_ds, "1.0.0") == [(source_ds, "1.0.0")]
122
+ assert set(
123
+ dc.read_dataset(delta_ds, version="1.0.0", session=test_session).to_values("id")
124
+ ) == {1, 2}
125
+ assert sorted(process_log[:2]) == [1, 2]
126
+
127
+ dc.read_values(id=[1, 2, 10, 20, 30], session=test_session).save(source_ds)
128
+
129
+ # Drop the previous version so it is clear the dependency targets 1.0.1
130
+ dc.delete_dataset(source_ds, version="1.0.0", session=test_session)
131
+
132
+ with pytest.raises(DatasetNotFoundError):
133
+ dc.read_dataset(source_ds, session=test_session, version="1.0.0")
134
+
135
+ deps_after_removal = catalog.get_dataset_dependencies(
136
+ delta_ds,
137
+ "1.0.0",
138
+ namespace_name=catalog.metastore.default_project.namespace.name,
139
+ project_name=catalog.metastore.default_project.name,
140
+ indirect=False,
141
+ )
142
+ assert deps_after_removal == [None]
143
+
144
+ dc.read_dataset(
145
+ source_ds,
146
+ session=test_session,
147
+ delta=True,
148
+ delta_on="id",
149
+ ).map(processed_id=record_processing).save(delta_ds)
150
+
151
+ # Delta logic should fall back to rebuilding from scratch with the new dependency
152
+ assert _get_dependencies(catalog, delta_ds, "1.0.1") == [(source_ds, "1.0.1")]
153
+ assert set(
154
+ dc.read_dataset(delta_ds, version="1.0.1", session=test_session).to_values("id")
155
+ ) == {1, 2, 10, 20, 30}
156
+ # Previous version remains intact and still reflects the original source dataset
157
+ assert set(
158
+ dc.read_dataset(delta_ds, version="1.0.0", session=test_session).to_values("id")
159
+ ) == {1, 2}
160
+ # Fallback rebuilds the dataset, so ids 1 and 2 appear twice across both runs.
161
+ assert sorted(process_log[:2]) == [1, 2]
162
+ assert sorted(process_log[2:]) == [1, 2, 10, 20, 30]
163
+
164
+
101
165
  def test_delta_returns_correct_dataset_on_no_changes(test_session):
102
166
  catalog = test_session.catalog
103
167
 
@@ -250,17 +314,66 @@ def test_delta_replay_regenerates_system_columns(test_session):
250
314
 
251
315
  build_chain(delta=False).save(result_name)
252
316
 
253
- build_chain(delta=True).save(
254
- result_name,
255
- delta=True,
256
- delta_on="measurement_id",
257
- )
317
+ build_chain(delta=True).save(result_name)
258
318
 
259
319
  assert set(
260
320
  dc.read_dataset(result_name, session=test_session).to_values("measurement_id")
261
321
  ) == {1, 2}
262
322
 
263
323
 
324
+ def test_storage_delta_replay_regenerates_system_columns(test_session, tmp_dir):
325
+ data_dir = tmp_dir / f"regen_storage_{uuid.uuid4().hex[:8]}"
326
+ data_dir.mkdir()
327
+ storage_uri = data_dir.as_uri()
328
+ result_name = f"regen_storage_result_{uuid.uuid4().hex[:8]}"
329
+
330
+ def write_payload(index: int) -> None:
331
+ (data_dir / f"item{index}.txt").write_text(f"payload-{index}")
332
+
333
+ write_payload(1)
334
+ write_payload(2)
335
+
336
+ def build_chain(delta: bool):
337
+ read_kwargs = {"session": test_session, "update": True}
338
+ if delta:
339
+ read_kwargs |= {
340
+ "delta": True,
341
+ "delta_on": ["file.path"],
342
+ "delta_result_on": ["file.path"],
343
+ }
344
+
345
+ def get_measurement_id(file: File) -> int:
346
+ match = re.search(r"item(\d+)\.txt$", file.path)
347
+ assert match
348
+ return int(match.group(1))
349
+
350
+ def get_num(file: File) -> int:
351
+ return get_measurement_id(file)
352
+
353
+ chain = dc.read_storage(storage_uri, **read_kwargs)
354
+ return (
355
+ chain.mutate(num=1)
356
+ .select_except("num")
357
+ .map(measurement_id=get_measurement_id)
358
+ .map(err=lambda file: "")
359
+ .map(num=get_num)
360
+ .filter(C.err == "")
361
+ .select_except("err")
362
+ .map(double=lambda num: num * 2, output=int)
363
+ .select_except("num")
364
+ )
365
+
366
+ build_chain(delta=False).save(result_name)
367
+
368
+ write_payload(3)
369
+
370
+ build_chain(delta=True).save(result_name)
371
+
372
+ assert set(
373
+ dc.read_dataset(result_name, session=test_session).to_values("measurement_id")
374
+ ) == {1, 2, 3}
375
+
376
+
264
377
  def test_delta_update_from_storage(test_session, tmp_dir, tmp_path):
265
378
  ds_name = "delta_ds"
266
379
  path = tmp_dir.as_uri()
@@ -35,7 +35,7 @@ def test_checkpoints(
35
35
  catalog = test_session.catalog
36
36
  metastore = catalog.metastore
37
37
 
38
- monkeypatch.setenv("DATACHAIN_CHECKPOINTS_RESET", reset_checkpoints)
38
+ monkeypatch.setenv("DATACHAIN_CHECKPOINTS_RESET", str(reset_checkpoints))
39
39
 
40
40
  if with_delta:
41
41
  chain = dc.read_dataset(
@@ -75,8 +75,9 @@ def test_checkpoints(
75
75
  chain.save("nums3")
76
76
  second_job_id = test_session.get_or_create_job().id
77
77
 
78
- assert len(catalog.get_dataset("nums1").versions) == 2 if reset_checkpoints else 1
79
- assert len(catalog.get_dataset("nums2").versions) == 2 if reset_checkpoints else 1
78
+ expected_versions = 1 if with_delta or not reset_checkpoints else 2
79
+ assert len(catalog.get_dataset("nums1").versions) == expected_versions
80
+ assert len(catalog.get_dataset("nums2").versions) == expected_versions
80
81
  assert len(catalog.get_dataset("nums3").versions) == 1
81
82
 
82
83
  assert len(list(catalog.metastore.list_checkpoints(first_job_id))) == 2
@@ -88,7 +89,7 @@ def test_checkpoints_modified_chains(
88
89
  test_session, monkeypatch, nums_dataset, reset_checkpoints
89
90
  ):
90
91
  catalog = test_session.catalog
91
- monkeypatch.setenv("DATACHAIN_CHECKPOINTS_RESET", reset_checkpoints)
92
+ monkeypatch.setenv("DATACHAIN_CHECKPOINTS_RESET", str(reset_checkpoints))
92
93
 
93
94
  chain = dc.read_dataset("nums", session=test_session)
94
95
 
@@ -120,7 +121,7 @@ def test_checkpoints_multiple_runs(
120
121
  ):
121
122
  catalog = test_session.catalog
122
123
 
123
- monkeypatch.setenv("DATACHAIN_CHECKPOINTS_RESET", reset_checkpoints)
124
+ monkeypatch.setenv("DATACHAIN_CHECKPOINTS_RESET", str(reset_checkpoints))
124
125
 
125
126
  chain = dc.read_dataset("nums", session=test_session)
126
127
 
@@ -184,7 +185,7 @@ def test_checkpoints_check_valid_chain_is_returned(
184
185
  monkeypatch,
185
186
  nums_dataset,
186
187
  ):
187
- monkeypatch.setenv("DATACHAIN_CHECKPOINTS_RESET", False)
188
+ monkeypatch.setenv("DATACHAIN_CHECKPOINTS_RESET", str(False))
188
189
  chain = dc.read_dataset("nums", session=test_session)
189
190
 
190
191
  # -------------- FIRST RUN -------------------
@@ -197,6 +198,7 @@ def test_checkpoints_check_valid_chain_is_returned(
197
198
 
198
199
  # checking that we return expected DataChain even though we skipped chain creation
199
200
  # because of the checkpoints
201
+ assert ds.dataset is not None
200
202
  assert ds.dataset.name == "nums1"
201
203
  assert len(ds.dataset.versions) == 1
202
204
  assert ds.order_by("num").to_list("num") == [(1,), (2,), (3,)]
@@ -140,7 +140,7 @@ def test_merge_similar_objects(test_session):
140
140
  rname = "qq"
141
141
  ch = ch1.merge(ch2, "emp.person.name", rname=rname)
142
142
 
143
- assert list(ch.signals_schema.values.keys()) == ["sys", "emp", rname + "emp"]
143
+ assert list(ch.signals_schema.values.keys()) == ["emp", rname + "emp"]
144
144
 
145
145
  empl = list(ch.to_list())
146
146
  assert len(empl) == 4
@@ -175,7 +175,7 @@ def test_merge_similar_objects_in_memory():
175
175
  assert ch.session.catalog.metastore.db.db_file == ":memory:"
176
176
  assert ch.session.catalog.warehouse.db.db_file == ":memory:"
177
177
 
178
- assert list(ch.signals_schema.values.keys()) == ["sys", "emp", rname + "emp"]
178
+ assert list(ch.signals_schema.values.keys()) == ["emp", rname + "emp"]
179
179
 
180
180
  empl = list(ch.to_list())
181
181
  assert len(empl) == 4
@@ -198,7 +198,6 @@ def test_merge_values(test_session):
198
198
  ch = ch1.merge(ch2, "id")
199
199
 
200
200
  assert list(ch.signals_schema.values.keys()) == [
201
- "sys",
202
201
  "id",
203
202
  "descr",
204
203
  "right_id",
@@ -339,3 +338,42 @@ def test_merge_on_expression(test_session):
339
338
  count += 1
340
339
 
341
340
  assert count == len(team) * len(team)
341
+
342
+
343
+ def test_merge_with_drops_sys_columns(test_session):
344
+ left = dc.read_values(id=[1, 1], lval=[10, 20], session=test_session)
345
+ right = dc.read_values(id=[1, 1], rval=["a", "b"], session=test_session)
346
+
347
+ merged = left.merge(right, on="id")
348
+
349
+ assert "sys" not in merged.signals_schema.values
350
+
351
+ cols = merged.settings(sys=True).to_pandas(flatten=True).columns
352
+ assert all(not str(col).startswith("sys") for col in cols)
353
+
354
+ ds_name = "merge_left_dups_sys_check_sys"
355
+ merged.save(ds_name)
356
+
357
+ df_with_sys = (
358
+ dc.read_dataset(ds_name, session=test_session)
359
+ .settings(sys=True)
360
+ .to_pandas(flatten=True)
361
+ )
362
+
363
+ sys_cols = [c for c in df_with_sys.columns if str(c).startswith("sys")]
364
+ assert sys_cols
365
+
366
+ def _col(name: str) -> str:
367
+ for col in df_with_sys.columns:
368
+ if str(col) == f"sys.{name}":
369
+ return str(col)
370
+ raise AssertionError(f"Missing sys column for {name}")
371
+
372
+ sys_id_col = _col("id")
373
+ sys_rand_col = _col("rand")
374
+
375
+ sys_ids = list(df_with_sys[sys_id_col])
376
+ assert len(sys_ids) == len(set(sys_ids))
377
+
378
+ sys_rand = list(df_with_sys[sys_rand_col])
379
+ assert len(sys_rand) == len(set(sys_rand))
@@ -256,6 +256,24 @@ def test_diff_on_equal_datasets(test_session, on_self):
256
256
  assert diff.order_by("id").to_list(*collect_fields) == expected
257
257
 
258
258
 
259
+ def test_diff_only_on_columns_treated_as_same(test_session):
260
+ ds1 = dc.read_values(
261
+ id=[1, 2],
262
+ session=test_session,
263
+ )
264
+ ds2 = dc.read_values(
265
+ id=[1, 2],
266
+ session=test_session,
267
+ )
268
+
269
+ diff = ds1.diff(ds2, on=["id"], same=True, status_col="diff")
270
+
271
+ assert diff.order_by("id").to_list("diff", "id") == [
272
+ (CompareStatus.SAME, 1),
273
+ (CompareStatus.SAME, 2),
274
+ ]
275
+
276
+
259
277
  def test_diff_multiple_columns(test_session, str_default):
260
278
  ds1 = dc.read_values(
261
279
  id=[1, 2, 4],
@@ -382,7 +400,7 @@ def test_diff_missing_on(test_session):
382
400
  ds2 = dc.read_values(id=[1, 2, 4], session=test_session)
383
401
 
384
402
  with pytest.raises(ValueError) as exc_info:
385
- ds1.diff(ds2, on=None)
403
+ ds1.diff(ds2, on=None) # type: ignore[arg-type]
386
404
 
387
405
  assert str(exc_info.value) == "'on' must be specified"
388
406
 
@@ -218,4 +218,4 @@ def test_diff(test_session):
218
218
  status_col="diff",
219
219
  )
220
220
  .hash()
221
- ) == "4135f2deffa91702259de50b48076dd2f8cdf3be32c167332840209c137977f9"
221
+ ) == "8ffac19b12cf96e2916968914d357c4a9c1b81038c43ab5cf97ba1127fb86567"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes