datachain 0.37.1__tar.gz → 0.37.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (447) hide show
  1. {datachain-0.37.1 → datachain-0.37.3}/PKG-INFO +1 -1
  2. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/data_storage/sqlite.py +0 -56
  3. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/data_storage/warehouse.py +0 -15
  4. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/delta.py +3 -34
  5. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/dc/datachain.py +2 -3
  6. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/dc/datasets.py +4 -0
  7. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/dc/storage.py +6 -9
  8. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/query/dataset.py +1 -1
  9. {datachain-0.37.1 → datachain-0.37.3}/src/datachain.egg-info/PKG-INFO +1 -1
  10. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_delta.py +54 -5
  11. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_datachain_merge.py +41 -3
  12. {datachain-0.37.1 → datachain-0.37.3}/.cruft.json +0 -0
  13. {datachain-0.37.1 → datachain-0.37.3}/.gitattributes +0 -0
  14. {datachain-0.37.1 → datachain-0.37.3}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  15. {datachain-0.37.1 → datachain-0.37.3}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  16. {datachain-0.37.1 → datachain-0.37.3}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  17. {datachain-0.37.1 → datachain-0.37.3}/.github/codecov.yaml +0 -0
  18. {datachain-0.37.1 → datachain-0.37.3}/.github/dependabot.yml +0 -0
  19. {datachain-0.37.1 → datachain-0.37.3}/.github/workflows/benchmarks.yml +0 -0
  20. {datachain-0.37.1 → datachain-0.37.3}/.github/workflows/release.yml +0 -0
  21. {datachain-0.37.1 → datachain-0.37.3}/.github/workflows/tests-studio.yml +0 -0
  22. {datachain-0.37.1 → datachain-0.37.3}/.github/workflows/tests.yml +0 -0
  23. {datachain-0.37.1 → datachain-0.37.3}/.github/workflows/update-template.yaml +0 -0
  24. {datachain-0.37.1 → datachain-0.37.3}/.gitignore +0 -0
  25. {datachain-0.37.1 → datachain-0.37.3}/.pre-commit-config.yaml +0 -0
  26. {datachain-0.37.1 → datachain-0.37.3}/CODE_OF_CONDUCT.rst +0 -0
  27. {datachain-0.37.1 → datachain-0.37.3}/LICENSE +0 -0
  28. {datachain-0.37.1 → datachain-0.37.3}/README.rst +0 -0
  29. {datachain-0.37.1 → datachain-0.37.3}/docs/api_hooks.py +0 -0
  30. {datachain-0.37.1 → datachain-0.37.3}/docs/assets/captioned_cartoons.png +0 -0
  31. {datachain-0.37.1 → datachain-0.37.3}/docs/assets/datachain-white.svg +0 -0
  32. {datachain-0.37.1 → datachain-0.37.3}/docs/assets/datachain.svg +0 -0
  33. {datachain-0.37.1 → datachain-0.37.3}/docs/assets/webhook_dialog.png +0 -0
  34. {datachain-0.37.1 → datachain-0.37.3}/docs/assets/webhook_list.png +0 -0
  35. {datachain-0.37.1 → datachain-0.37.3}/docs/commands/auth/login.md +0 -0
  36. {datachain-0.37.1 → datachain-0.37.3}/docs/commands/auth/logout.md +0 -0
  37. {datachain-0.37.1 → datachain-0.37.3}/docs/commands/auth/team.md +0 -0
  38. {datachain-0.37.1 → datachain-0.37.3}/docs/commands/auth/token.md +0 -0
  39. {datachain-0.37.1 → datachain-0.37.3}/docs/commands/index.md +0 -0
  40. {datachain-0.37.1 → datachain-0.37.3}/docs/commands/job/cancel.md +0 -0
  41. {datachain-0.37.1 → datachain-0.37.3}/docs/commands/job/clusters.md +0 -0
  42. {datachain-0.37.1 → datachain-0.37.3}/docs/commands/job/logs.md +0 -0
  43. {datachain-0.37.1 → datachain-0.37.3}/docs/commands/job/ls.md +0 -0
  44. {datachain-0.37.1 → datachain-0.37.3}/docs/commands/job/run.md +0 -0
  45. {datachain-0.37.1 → datachain-0.37.3}/docs/contributing.md +0 -0
  46. {datachain-0.37.1 → datachain-0.37.3}/docs/css/github-permalink-style.css +0 -0
  47. {datachain-0.37.1 → datachain-0.37.3}/docs/examples.md +0 -0
  48. {datachain-0.37.1 → datachain-0.37.3}/docs/guide/checkpoints.md +0 -0
  49. {datachain-0.37.1 → datachain-0.37.3}/docs/guide/db_migrations.md +0 -0
  50. {datachain-0.37.1 → datachain-0.37.3}/docs/guide/delta.md +0 -0
  51. {datachain-0.37.1 → datachain-0.37.3}/docs/guide/env.md +0 -0
  52. {datachain-0.37.1 → datachain-0.37.3}/docs/guide/index.md +0 -0
  53. {datachain-0.37.1 → datachain-0.37.3}/docs/guide/namespaces.md +0 -0
  54. {datachain-0.37.1 → datachain-0.37.3}/docs/guide/processing.md +0 -0
  55. {datachain-0.37.1 → datachain-0.37.3}/docs/guide/remotes.md +0 -0
  56. {datachain-0.37.1 → datachain-0.37.3}/docs/guide/retry.md +0 -0
  57. {datachain-0.37.1 → datachain-0.37.3}/docs/index.md +0 -0
  58. {datachain-0.37.1 → datachain-0.37.3}/docs/overrides/main.html +0 -0
  59. {datachain-0.37.1 → datachain-0.37.3}/docs/quick-start.md +0 -0
  60. {datachain-0.37.1 → datachain-0.37.3}/docs/references/data-types/arrowrow.md +0 -0
  61. {datachain-0.37.1 → datachain-0.37.3}/docs/references/data-types/bbox.md +0 -0
  62. {datachain-0.37.1 → datachain-0.37.3}/docs/references/data-types/file.md +0 -0
  63. {datachain-0.37.1 → datachain-0.37.3}/docs/references/data-types/imagefile.md +0 -0
  64. {datachain-0.37.1 → datachain-0.37.3}/docs/references/data-types/index.md +0 -0
  65. {datachain-0.37.1 → datachain-0.37.3}/docs/references/data-types/pose.md +0 -0
  66. {datachain-0.37.1 → datachain-0.37.3}/docs/references/data-types/segment.md +0 -0
  67. {datachain-0.37.1 → datachain-0.37.3}/docs/references/data-types/tarvfile.md +0 -0
  68. {datachain-0.37.1 → datachain-0.37.3}/docs/references/data-types/textfile.md +0 -0
  69. {datachain-0.37.1 → datachain-0.37.3}/docs/references/data-types/videofile.md +0 -0
  70. {datachain-0.37.1 → datachain-0.37.3}/docs/references/datachain.md +0 -0
  71. {datachain-0.37.1 → datachain-0.37.3}/docs/references/func.md +0 -0
  72. {datachain-0.37.1 → datachain-0.37.3}/docs/references/functions/aggregate.md +0 -0
  73. {datachain-0.37.1 → datachain-0.37.3}/docs/references/functions/array.md +0 -0
  74. {datachain-0.37.1 → datachain-0.37.3}/docs/references/functions/conditional.md +0 -0
  75. {datachain-0.37.1 → datachain-0.37.3}/docs/references/functions/numeric.md +0 -0
  76. {datachain-0.37.1 → datachain-0.37.3}/docs/references/functions/path.md +0 -0
  77. {datachain-0.37.1 → datachain-0.37.3}/docs/references/functions/random.md +0 -0
  78. {datachain-0.37.1 → datachain-0.37.3}/docs/references/functions/string.md +0 -0
  79. {datachain-0.37.1 → datachain-0.37.3}/docs/references/functions/window.md +0 -0
  80. {datachain-0.37.1 → datachain-0.37.3}/docs/references/index.md +0 -0
  81. {datachain-0.37.1 → datachain-0.37.3}/docs/references/toolkit.md +0 -0
  82. {datachain-0.37.1 → datachain-0.37.3}/docs/references/torch.md +0 -0
  83. {datachain-0.37.1 → datachain-0.37.3}/docs/references/udf.md +0 -0
  84. {datachain-0.37.1 → datachain-0.37.3}/docs/studio/api/.gitkeep +0 -0
  85. {datachain-0.37.1 → datachain-0.37.3}/docs/studio/webhooks.md +0 -0
  86. {datachain-0.37.1 → datachain-0.37.3}/docs/templates/main.dot +0 -0
  87. {datachain-0.37.1 → datachain-0.37.3}/docs/templates/operation.dot +0 -0
  88. {datachain-0.37.1 → datachain-0.37.3}/docs/templates/responses.def +0 -0
  89. {datachain-0.37.1 → datachain-0.37.3}/docs/tutorials.md +0 -0
  90. {datachain-0.37.1 → datachain-0.37.3}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  91. {datachain-0.37.1 → datachain-0.37.3}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  92. {datachain-0.37.1 → datachain-0.37.3}/examples/computer_vision/openimage-detect.py +0 -0
  93. {datachain-0.37.1 → datachain-0.37.3}/examples/computer_vision/ultralytics-bbox.py +0 -0
  94. {datachain-0.37.1 → datachain-0.37.3}/examples/computer_vision/ultralytics-pose.py +0 -0
  95. {datachain-0.37.1 → datachain-0.37.3}/examples/computer_vision/ultralytics-segment.py +0 -0
  96. {datachain-0.37.1 → datachain-0.37.3}/examples/get_started/common_sql_functions.py +0 -0
  97. {datachain-0.37.1 → datachain-0.37.3}/examples/get_started/json-csv-reader.py +0 -0
  98. {datachain-0.37.1 → datachain-0.37.3}/examples/get_started/nested_datamodel.py +0 -0
  99. {datachain-0.37.1 → datachain-0.37.3}/examples/get_started/torch-loader.py +0 -0
  100. {datachain-0.37.1 → datachain-0.37.3}/examples/get_started/udfs/parallel.py +0 -0
  101. {datachain-0.37.1 → datachain-0.37.3}/examples/get_started/udfs/simple.py +0 -0
  102. {datachain-0.37.1 → datachain-0.37.3}/examples/get_started/udfs/stateful.py +0 -0
  103. {datachain-0.37.1 → datachain-0.37.3}/examples/incremental_processing/delta.py +0 -0
  104. {datachain-0.37.1 → datachain-0.37.3}/examples/incremental_processing/retry.py +0 -0
  105. {datachain-0.37.1 → datachain-0.37.3}/examples/incremental_processing/utils.py +0 -0
  106. {datachain-0.37.1 → datachain-0.37.3}/examples/llm_and_nlp/claude-query.py +0 -0
  107. {datachain-0.37.1 → datachain-0.37.3}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  108. {datachain-0.37.1 → datachain-0.37.3}/examples/multimodal/audio-to-text.py +0 -0
  109. {datachain-0.37.1 → datachain-0.37.3}/examples/multimodal/clip_inference.py +0 -0
  110. {datachain-0.37.1 → datachain-0.37.3}/examples/multimodal/hf_pipeline.py +0 -0
  111. {datachain-0.37.1 → datachain-0.37.3}/examples/multimodal/openai_image_desc_lib.py +0 -0
  112. {datachain-0.37.1 → datachain-0.37.3}/examples/multimodal/wds.py +0 -0
  113. {datachain-0.37.1 → datachain-0.37.3}/examples/multimodal/wds_filtered.py +0 -0
  114. {datachain-0.37.1 → datachain-0.37.3}/mkdocs.yml +0 -0
  115. {datachain-0.37.1 → datachain-0.37.3}/noxfile.py +0 -0
  116. {datachain-0.37.1 → datachain-0.37.3}/pyproject.toml +0 -0
  117. {datachain-0.37.1 → datachain-0.37.3}/setup.cfg +0 -0
  118. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/__init__.py +0 -0
  119. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/__main__.py +0 -0
  120. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/asyn.py +0 -0
  121. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/cache.py +0 -0
  122. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/catalog/__init__.py +0 -0
  123. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/catalog/catalog.py +0 -0
  124. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/catalog/datasource.py +0 -0
  125. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/catalog/dependency.py +0 -0
  126. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/catalog/loader.py +0 -0
  127. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/checkpoint.py +0 -0
  128. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/cli/__init__.py +0 -0
  129. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/cli/commands/__init__.py +0 -0
  130. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/cli/commands/datasets.py +0 -0
  131. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/cli/commands/du.py +0 -0
  132. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/cli/commands/index.py +0 -0
  133. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/cli/commands/ls.py +0 -0
  134. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/cli/commands/misc.py +0 -0
  135. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/cli/commands/query.py +0 -0
  136. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/cli/commands/show.py +0 -0
  137. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/cli/parser/__init__.py +0 -0
  138. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/cli/parser/job.py +0 -0
  139. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/cli/parser/studio.py +0 -0
  140. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/cli/parser/utils.py +0 -0
  141. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/cli/utils.py +0 -0
  142. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/client/__init__.py +0 -0
  143. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/client/azure.py +0 -0
  144. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/client/fileslice.py +0 -0
  145. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/client/fsspec.py +0 -0
  146. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/client/gcs.py +0 -0
  147. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/client/hf.py +0 -0
  148. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/client/http.py +0 -0
  149. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/client/local.py +0 -0
  150. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/client/s3.py +0 -0
  151. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/config.py +0 -0
  152. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/data_storage/__init__.py +0 -0
  153. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/data_storage/db_engine.py +0 -0
  154. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/data_storage/job.py +0 -0
  155. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/data_storage/metastore.py +0 -0
  156. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/data_storage/schema.py +0 -0
  157. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/data_storage/serializer.py +0 -0
  158. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/dataset.py +0 -0
  159. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/diff/__init__.py +0 -0
  160. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/error.py +0 -0
  161. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/fs/__init__.py +0 -0
  162. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/fs/reference.py +0 -0
  163. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/fs/utils.py +0 -0
  164. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/func/__init__.py +0 -0
  165. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/func/aggregate.py +0 -0
  166. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/func/array.py +0 -0
  167. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/func/base.py +0 -0
  168. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/func/conditional.py +0 -0
  169. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/func/func.py +0 -0
  170. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/func/numeric.py +0 -0
  171. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/func/path.py +0 -0
  172. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/func/random.py +0 -0
  173. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/func/string.py +0 -0
  174. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/func/window.py +0 -0
  175. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/hash_utils.py +0 -0
  176. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/job.py +0 -0
  177. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/__init__.py +0 -0
  178. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/arrow.py +0 -0
  179. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/audio.py +0 -0
  180. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/clip.py +0 -0
  181. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/convert/__init__.py +0 -0
  182. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/convert/flatten.py +0 -0
  183. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/convert/python_to_sql.py +0 -0
  184. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/convert/sql_to_python.py +0 -0
  185. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/convert/unflatten.py +0 -0
  186. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  187. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/data_model.py +0 -0
  188. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/dataset_info.py +0 -0
  189. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/dc/__init__.py +0 -0
  190. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/dc/csv.py +0 -0
  191. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/dc/database.py +0 -0
  192. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/dc/hf.py +0 -0
  193. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/dc/json.py +0 -0
  194. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/dc/listings.py +0 -0
  195. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/dc/pandas.py +0 -0
  196. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/dc/parquet.py +0 -0
  197. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/dc/records.py +0 -0
  198. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/dc/storage_pattern.py +0 -0
  199. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/dc/utils.py +0 -0
  200. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/dc/values.py +0 -0
  201. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/file.py +0 -0
  202. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/hf.py +0 -0
  203. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/image.py +0 -0
  204. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/listing.py +0 -0
  205. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/listing_info.py +0 -0
  206. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/meta_formats.py +0 -0
  207. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/model_store.py +0 -0
  208. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/namespaces.py +0 -0
  209. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/projects.py +0 -0
  210. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/pytorch.py +0 -0
  211. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/settings.py +0 -0
  212. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/signal_schema.py +0 -0
  213. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/tar.py +0 -0
  214. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/text.py +0 -0
  215. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/udf.py +0 -0
  216. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/udf_signature.py +0 -0
  217. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/utils.py +0 -0
  218. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/video.py +0 -0
  219. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/webdataset.py +0 -0
  220. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/lib/webdataset_laion.py +0 -0
  221. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/listing.py +0 -0
  222. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/model/__init__.py +0 -0
  223. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/model/bbox.py +0 -0
  224. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/model/pose.py +0 -0
  225. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/model/segment.py +0 -0
  226. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/model/ultralytics/__init__.py +0 -0
  227. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/model/ultralytics/bbox.py +0 -0
  228. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/model/ultralytics/pose.py +0 -0
  229. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/model/ultralytics/segment.py +0 -0
  230. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/model/utils.py +0 -0
  231. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/namespace.py +0 -0
  232. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/node.py +0 -0
  233. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/nodes_fetcher.py +0 -0
  234. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/nodes_thread_pool.py +0 -0
  235. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/plugins.py +0 -0
  236. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/progress.py +0 -0
  237. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/project.py +0 -0
  238. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/py.typed +0 -0
  239. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/query/__init__.py +0 -0
  240. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/query/batch.py +0 -0
  241. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/query/dispatch.py +0 -0
  242. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/query/metrics.py +0 -0
  243. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/query/params.py +0 -0
  244. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/query/queue.py +0 -0
  245. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/query/schema.py +0 -0
  246. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/query/session.py +0 -0
  247. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/query/udf.py +0 -0
  248. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/remote/__init__.py +0 -0
  249. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/remote/studio.py +0 -0
  250. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/script_meta.py +0 -0
  251. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/semver.py +0 -0
  252. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/sql/__init__.py +0 -0
  253. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/sql/default/__init__.py +0 -0
  254. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/sql/default/base.py +0 -0
  255. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/sql/functions/__init__.py +0 -0
  256. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/sql/functions/aggregate.py +0 -0
  257. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/sql/functions/array.py +0 -0
  258. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/sql/functions/conditional.py +0 -0
  259. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/sql/functions/numeric.py +0 -0
  260. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/sql/functions/path.py +0 -0
  261. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/sql/functions/random.py +0 -0
  262. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/sql/functions/string.py +0 -0
  263. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/sql/postgresql_dialect.py +0 -0
  264. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/sql/postgresql_types.py +0 -0
  265. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/sql/selectable.py +0 -0
  266. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/sql/sqlite/__init__.py +0 -0
  267. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/sql/sqlite/base.py +0 -0
  268. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/sql/sqlite/types.py +0 -0
  269. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/sql/sqlite/vector.py +0 -0
  270. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/sql/types.py +0 -0
  271. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/sql/utils.py +0 -0
  272. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/studio.py +0 -0
  273. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/telemetry.py +0 -0
  274. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/toolkit/__init__.py +0 -0
  275. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/toolkit/split.py +0 -0
  276. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/torch/__init__.py +0 -0
  277. {datachain-0.37.1 → datachain-0.37.3}/src/datachain/utils.py +0 -0
  278. {datachain-0.37.1 → datachain-0.37.3}/src/datachain.egg-info/SOURCES.txt +0 -0
  279. {datachain-0.37.1 → datachain-0.37.3}/src/datachain.egg-info/dependency_links.txt +0 -0
  280. {datachain-0.37.1 → datachain-0.37.3}/src/datachain.egg-info/entry_points.txt +0 -0
  281. {datachain-0.37.1 → datachain-0.37.3}/src/datachain.egg-info/requires.txt +0 -0
  282. {datachain-0.37.1 → datachain-0.37.3}/src/datachain.egg-info/top_level.txt +0 -0
  283. {datachain-0.37.1 → datachain-0.37.3}/tests/__init__.py +0 -0
  284. {datachain-0.37.1 → datachain-0.37.3}/tests/benchmarks/__init__.py +0 -0
  285. {datachain-0.37.1 → datachain-0.37.3}/tests/benchmarks/conftest.py +0 -0
  286. {datachain-0.37.1 → datachain-0.37.3}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  287. {datachain-0.37.1 → datachain-0.37.3}/tests/benchmarks/datasets/.dvc/config +0 -0
  288. {datachain-0.37.1 → datachain-0.37.3}/tests/benchmarks/datasets/.gitignore +0 -0
  289. {datachain-0.37.1 → datachain-0.37.3}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  290. {datachain-0.37.1 → datachain-0.37.3}/tests/benchmarks/test_datachain.py +0 -0
  291. {datachain-0.37.1 → datachain-0.37.3}/tests/benchmarks/test_ls.py +0 -0
  292. {datachain-0.37.1 → datachain-0.37.3}/tests/benchmarks/test_version.py +0 -0
  293. {datachain-0.37.1 → datachain-0.37.3}/tests/conftest.py +0 -0
  294. {datachain-0.37.1 → datachain-0.37.3}/tests/data.py +0 -0
  295. {datachain-0.37.1 → datachain-0.37.3}/tests/examples/__init__.py +0 -0
  296. {datachain-0.37.1 → datachain-0.37.3}/tests/examples/test_examples.py +0 -0
  297. {datachain-0.37.1 → datachain-0.37.3}/tests/examples/test_wds_e2e.py +0 -0
  298. {datachain-0.37.1 → datachain-0.37.3}/tests/examples/wds_data.py +0 -0
  299. {datachain-0.37.1 → datachain-0.37.3}/tests/func/__init__.py +0 -0
  300. {datachain-0.37.1 → datachain-0.37.3}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  301. {datachain-0.37.1 → datachain-0.37.3}/tests/func/data/lena.jpg +0 -0
  302. {datachain-0.37.1 → datachain-0.37.3}/tests/func/fake-service-account-credentials.json +0 -0
  303. {datachain-0.37.1 → datachain-0.37.3}/tests/func/functions/__init__.py +0 -0
  304. {datachain-0.37.1 → datachain-0.37.3}/tests/func/functions/test_aggregate.py +0 -0
  305. {datachain-0.37.1 → datachain-0.37.3}/tests/func/functions/test_array.py +0 -0
  306. {datachain-0.37.1 → datachain-0.37.3}/tests/func/functions/test_conditional.py +0 -0
  307. {datachain-0.37.1 → datachain-0.37.3}/tests/func/functions/test_numeric.py +0 -0
  308. {datachain-0.37.1 → datachain-0.37.3}/tests/func/functions/test_path.py +0 -0
  309. {datachain-0.37.1 → datachain-0.37.3}/tests/func/functions/test_random.py +0 -0
  310. {datachain-0.37.1 → datachain-0.37.3}/tests/func/functions/test_string.py +0 -0
  311. {datachain-0.37.1 → datachain-0.37.3}/tests/func/model/__init__.py +0 -0
  312. {datachain-0.37.1 → datachain-0.37.3}/tests/func/model/data/running-mask0.png +0 -0
  313. {datachain-0.37.1 → datachain-0.37.3}/tests/func/model/data/running-mask1.png +0 -0
  314. {datachain-0.37.1 → datachain-0.37.3}/tests/func/model/data/running.jpg +0 -0
  315. {datachain-0.37.1 → datachain-0.37.3}/tests/func/model/data/ships.jpg +0 -0
  316. {datachain-0.37.1 → datachain-0.37.3}/tests/func/model/test_yolo.py +0 -0
  317. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_audio.py +0 -0
  318. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_catalog.py +0 -0
  319. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_checkpoints.py +0 -0
  320. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_client.py +0 -0
  321. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_cloud_transfer.py +0 -0
  322. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_data_storage.py +0 -0
  323. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_datachain.py +0 -0
  324. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_datachain_merge.py +0 -0
  325. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_dataset_query.py +0 -0
  326. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_datasets.py +0 -0
  327. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_feature_pickling.py +0 -0
  328. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_file.py +0 -0
  329. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_hf.py +0 -0
  330. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_hidden_field.py +0 -0
  331. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_image.py +0 -0
  332. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_listing.py +0 -0
  333. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_ls.py +0 -0
  334. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_meta_formats.py +0 -0
  335. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_metastore.py +0 -0
  336. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_metrics.py +0 -0
  337. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_mutate.py +0 -0
  338. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_pull.py +0 -0
  339. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_pytorch.py +0 -0
  340. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_query.py +0 -0
  341. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_read_database.py +0 -0
  342. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_read_dataset_remote.py +0 -0
  343. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  344. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_retry.py +0 -0
  345. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_session.py +0 -0
  346. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_storage_pattern.py +0 -0
  347. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_studio_datetime_parsing.py +0 -0
  348. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_temp_table_tracking.py +0 -0
  349. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_to_database.py +0 -0
  350. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_toolkit.py +0 -0
  351. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_udf.py +0 -0
  352. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_union.py +0 -0
  353. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_video.py +0 -0
  354. {datachain-0.37.1 → datachain-0.37.3}/tests/func/test_warehouse.py +0 -0
  355. {datachain-0.37.1 → datachain-0.37.3}/tests/scripts/feature_class.py +0 -0
  356. {datachain-0.37.1 → datachain-0.37.3}/tests/scripts/feature_class_exception.py +0 -0
  357. {datachain-0.37.1 → datachain-0.37.3}/tests/scripts/feature_class_parallel.py +0 -0
  358. {datachain-0.37.1 → datachain-0.37.3}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  359. {datachain-0.37.1 → datachain-0.37.3}/tests/scripts/name_len_slow.py +0 -0
  360. {datachain-0.37.1 → datachain-0.37.3}/tests/test_atomicity.py +0 -0
  361. {datachain-0.37.1 → datachain-0.37.3}/tests/test_cli_e2e.py +0 -0
  362. {datachain-0.37.1 → datachain-0.37.3}/tests/test_cli_studio.py +0 -0
  363. {datachain-0.37.1 → datachain-0.37.3}/tests/test_import_time.py +0 -0
  364. {datachain-0.37.1 → datachain-0.37.3}/tests/test_job_management_e2e.py +0 -0
  365. {datachain-0.37.1 → datachain-0.37.3}/tests/test_query_e2e.py +0 -0
  366. {datachain-0.37.1 → datachain-0.37.3}/tests/test_telemetry.py +0 -0
  367. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/__init__.py +0 -0
  368. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/__init__.py +0 -0
  369. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/conftest.py +0 -0
  370. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_arrow.py +0 -0
  371. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_audio.py +0 -0
  372. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_checkpoints.py +0 -0
  373. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_clip.py +0 -0
  374. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_datachain.py +0 -0
  375. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  376. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_diff.py +0 -0
  377. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_feature.py +0 -0
  378. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_feature_utils.py +0 -0
  379. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_file.py +0 -0
  380. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_hf.py +0 -0
  381. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_image.py +0 -0
  382. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_listing_info.py +0 -0
  383. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_namespace.py +0 -0
  384. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_partition_by.py +0 -0
  385. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_project.py +0 -0
  386. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_python_to_sql.py +0 -0
  387. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_schema.py +0 -0
  388. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_settings.py +0 -0
  389. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_signal_schema.py +0 -0
  390. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_sql_to_python.py +0 -0
  391. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_storage_pattern.py +0 -0
  392. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_text.py +0 -0
  393. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_udf.py +0 -0
  394. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_udf_signature.py +0 -0
  395. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_utils.py +0 -0
  396. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/lib/test_webdataset.py +0 -0
  397. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/model/__init__.py +0 -0
  398. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/model/test_bbox.py +0 -0
  399. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/model/test_pose.py +0 -0
  400. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/model/test_segment.py +0 -0
  401. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/model/test_utils.py +0 -0
  402. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/sql/__init__.py +0 -0
  403. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/sql/sqlite/__init__.py +0 -0
  404. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/sql/sqlite/test_types.py +0 -0
  405. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/sql/sqlite/test_utils.py +0 -0
  406. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/sql/test_array.py +0 -0
  407. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/sql/test_conditional.py +0 -0
  408. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/sql/test_path.py +0 -0
  409. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/sql/test_random.py +0 -0
  410. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/sql/test_selectable.py +0 -0
  411. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/sql/test_string.py +0 -0
  412. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_asyn.py +0 -0
  413. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_batching.py +0 -0
  414. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_cache.py +0 -0
  415. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_catalog.py +0 -0
  416. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_catalog_loader.py +0 -0
  417. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_cli_datasets.py +0 -0
  418. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_cli_parsing.py +0 -0
  419. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_client.py +0 -0
  420. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_client_gcs.py +0 -0
  421. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_client_http.py +0 -0
  422. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_client_s3.py +0 -0
  423. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_config.py +0 -0
  424. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_data_storage.py +0 -0
  425. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_database_engine.py +0 -0
  426. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_datachain_hash.py +0 -0
  427. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_dataset.py +0 -0
  428. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_dispatch.py +0 -0
  429. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_fileslice.py +0 -0
  430. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_func.py +0 -0
  431. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_hash_utils.py +0 -0
  432. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_job_management.py +0 -0
  433. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_listing.py +0 -0
  434. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_metastore.py +0 -0
  435. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_module_exports.py +0 -0
  436. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_pytorch.py +0 -0
  437. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_query.py +0 -0
  438. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_query_metrics.py +0 -0
  439. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_query_params.py +0 -0
  440. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_query_steps_hash.py +0 -0
  441. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_script_meta.py +0 -0
  442. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_semver.py +0 -0
  443. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_serializer.py +0 -0
  444. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_session.py +0 -0
  445. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_utils.py +0 -0
  446. {datachain-0.37.1 → datachain-0.37.3}/tests/unit/test_warehouse.py +0 -0
  447. {datachain-0.37.1 → datachain-0.37.3}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.37.1
3
+ Version: 0.37.3
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -29,7 +29,6 @@ from sqlalchemy.sql.selectable import Select
29
29
  from tqdm.auto import tqdm
30
30
 
31
31
  import datachain.sql.sqlite
32
- from datachain import semver
33
32
  from datachain.data_storage import AbstractDBMetastore, AbstractWarehouse
34
33
  from datachain.data_storage.db_engine import DatabaseEngine
35
34
  from datachain.data_storage.schema import DefaultSchema
@@ -692,61 +691,6 @@ class SQLiteWarehouse(AbstractWarehouse):
692
691
  for row in self.db.execute(query, cursor=cur)
693
692
  ]
694
693
 
695
- def merge_dataset_rows(
696
- self,
697
- src: DatasetRecord,
698
- dst: DatasetRecord,
699
- src_version: str,
700
- dst_version: str,
701
- ) -> None:
702
- dst_empty = False
703
-
704
- if not self.db.has_table(self.dataset_table_name(src, src_version)):
705
- # source table doesn't exist, nothing to do
706
- return
707
-
708
- src_dr = self.dataset_rows(src, src_version).table
709
-
710
- if not self.db.has_table(self.dataset_table_name(dst, dst_version)):
711
- # destination table doesn't exist, create it
712
- self.create_dataset_rows_table(
713
- self.dataset_table_name(dst, dst_version),
714
- columns=src_dr.columns,
715
- )
716
- dst_empty = True
717
-
718
- dst_dr = self.dataset_rows(dst, dst_version).table
719
- merge_fields = [c.name for c in src_dr.columns if c.name != "sys__id"]
720
- select_src = select(*(getattr(src_dr.columns, f) for f in merge_fields))
721
-
722
- if dst_empty:
723
- # we don't need union, but just select from source to destination
724
- insert_query = sqlite.insert(dst_dr).from_select(merge_fields, select_src)
725
- else:
726
- dst_version_latest = None
727
- # find the previous version of the destination dataset
728
- dst_previous_versions = [
729
- v.version
730
- for v in dst.versions # type: ignore [union-attr]
731
- if semver.compare(v.version, dst_version) == -1
732
- ]
733
- if dst_previous_versions:
734
- dst_version_latest = max(dst_previous_versions)
735
-
736
- dst_dr_latest = self.dataset_rows(dst, dst_version_latest).table
737
-
738
- select_dst_latest = select(
739
- *(getattr(dst_dr_latest.c, f) for f in merge_fields)
740
- )
741
- union_query = sqlalchemy.union(select_src, select_dst_latest)
742
- insert_query = (
743
- sqlite.insert(dst_dr)
744
- .from_select(merge_fields, union_query)
745
- .prefix_with("OR IGNORE")
746
- )
747
-
748
- self.db.execute(insert_query)
749
-
750
694
  def prepare_entries(self, entries: "Iterable[File]") -> Iterable[dict[str, Any]]:
751
695
  return (e.model_dump() for e in entries)
752
696
 
@@ -371,21 +371,6 @@ class AbstractWarehouse(ABC, Serializable):
371
371
  table = sa.Table(table_name, self.db.metadata)
372
372
  self.db.drop_table(table, if_exists=if_exists)
373
373
 
374
- @abstractmethod
375
- def merge_dataset_rows(
376
- self,
377
- src: "DatasetRecord",
378
- dst: "DatasetRecord",
379
- src_version: str,
380
- dst_version: str,
381
- ) -> None:
382
- """
383
- Merges source dataset rows and current latest destination dataset rows
384
- into a new rows table created for new destination dataset version.
385
- Note that table for new destination version must be created upfront.
386
- Merge results should not contain duplicates.
387
- """
388
-
389
374
  def dataset_rows_select(
390
375
  self,
391
376
  query: sa.Select,
@@ -1,16 +1,12 @@
1
- import hashlib
2
1
  from collections.abc import Sequence
3
2
  from copy import copy
4
3
  from functools import wraps
5
4
  from typing import TYPE_CHECKING, TypeVar
6
5
 
7
- from attrs import frozen
8
-
9
6
  import datachain
10
7
  from datachain.dataset import DatasetDependency, DatasetRecord
11
8
  from datachain.error import DatasetNotFoundError
12
9
  from datachain.project import Project
13
- from datachain.query.dataset import Step, step_result
14
10
 
15
11
  if TYPE_CHECKING:
16
12
  from collections.abc import Callable
@@ -18,9 +14,7 @@ if TYPE_CHECKING:
18
14
 
19
15
  from typing_extensions import ParamSpec
20
16
 
21
- from datachain.catalog import Catalog
22
17
  from datachain.lib.dc import DataChain
23
- from datachain.query.dataset import QueryGenerator
24
18
 
25
19
  P = ParamSpec("P")
26
20
 
@@ -49,38 +43,11 @@ def delta_disabled(
49
43
  return _inner
50
44
 
51
45
 
52
- @frozen
53
- class _RegenerateSystemColumnsStep(Step):
54
- catalog: "Catalog"
55
-
56
- def hash_inputs(self) -> str:
57
- return hashlib.sha256(b"regenerate_sys_columns").hexdigest()
58
-
59
- def apply(self, query_generator: "QueryGenerator", temp_tables: list[str]):
60
- selectable = query_generator.select()
61
- regenerated = self.catalog.warehouse._regenerate_system_columns(
62
- selectable,
63
- keep_existing_columns=True,
64
- regenerate_columns=None,
65
- )
66
-
67
- def q(*columns):
68
- return regenerated.with_only_columns(*columns)
69
-
70
- return step_result(q, regenerated.selected_columns)
71
-
72
-
73
46
  def _append_steps(dc: "DataChain", other: "DataChain"):
74
47
  """Returns cloned chain with appended steps from other chain.
75
48
  Steps are all those modification methods applied like filters, mappers etc.
76
49
  """
77
50
  dc = dc.clone()
78
- dc._query.steps.append(
79
- _RegenerateSystemColumnsStep(
80
- catalog=dc.session.catalog,
81
- )
82
- )
83
-
84
51
  dc._query.steps += other._query.steps.copy()
85
52
  dc.signals_schema = other.signals_schema
86
53
  return dc
@@ -150,7 +117,9 @@ def _get_retry_chain(
150
117
  error_records = result_dataset.filter(C(delta_retry) != "")
151
118
  error_source_records = source_dc.merge(
152
119
  error_records, on=on, right_on=right_on, inner=True
153
- ).select(*list(source_dc.signals_schema.values))
120
+ ).select(
121
+ *list(source_dc.signals_schema.clone_without_sys_signals().values.keys())
122
+ )
154
123
  retry_chain = error_source_records
155
124
 
156
125
  # Handle missing records if delta_retry is True
@@ -1697,14 +1697,13 @@ class DataChain:
1697
1697
  query.feature_schema = None
1698
1698
  ds = self._evolve(query=query)
1699
1699
 
1700
+ # Note: merge drops sys signals from both sides, make sure to not include it
1701
+ # in the resulting schema
1700
1702
  signals_schema = self.signals_schema.clone_without_sys_signals()
1701
1703
  right_signals_schema = right_ds.signals_schema.clone_without_sys_signals()
1702
1704
 
1703
1705
  ds.signals_schema = signals_schema.merge(right_signals_schema, rname)
1704
1706
 
1705
- if not full:
1706
- ds.signals_schema = SignalSchema({"sys": Sys}) | ds.signals_schema
1707
-
1708
1707
  return ds
1709
1708
 
1710
1709
  @delta_disabled
@@ -200,6 +200,10 @@ def read_dataset(
200
200
  signals_schema |= SignalSchema.deserialize(query.feature_schema)
201
201
  else:
202
202
  signals_schema |= SignalSchema.from_column_types(query.column_types or {})
203
+
204
+ if delta:
205
+ signals_schema = signals_schema.clone_without_sys_signals()
206
+
203
207
  chain = DataChain(query, _settings, signals_schema)
204
208
 
205
209
  if delta:
@@ -187,6 +187,12 @@ def read_storage(
187
187
  project=listing_project_name,
188
188
  session=session,
189
189
  settings=settings,
190
+ delta=delta,
191
+ delta_on=delta_on,
192
+ delta_result_on=delta_result_on,
193
+ delta_compare=delta_compare,
194
+ delta_retry=delta_retry,
195
+ delta_unsafe=delta_unsafe,
190
196
  )
191
197
  dc._query.update = update
192
198
  dc.signals_schema = dc.signals_schema.mutate({f"{column}": file_type})
@@ -252,13 +258,4 @@ def read_storage(
252
258
 
253
259
  assert storage_chain is not None
254
260
 
255
- if delta:
256
- storage_chain = storage_chain._as_delta(
257
- on=delta_on,
258
- right_on=delta_result_on,
259
- compare=delta_compare,
260
- delta_retry=delta_retry,
261
- delta_unsafe=delta_unsafe,
262
- )
263
-
264
261
  return storage_chain
@@ -1065,7 +1065,7 @@ class SQLJoin(Step):
1065
1065
  q1 = self.get_query(self.query1, temp_tables)
1066
1066
  q2 = self.get_query(self.query2, temp_tables)
1067
1067
 
1068
- q1_columns = _drop_system_columns(q1.c) if self.full else list(q1.c)
1068
+ q1_columns = _drop_system_columns(q1.c)
1069
1069
  q1_column_names = {c.name for c in q1_columns}
1070
1070
 
1071
1071
  q2_columns = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.37.1
3
+ Version: 0.37.3
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -314,17 +314,66 @@ def test_delta_replay_regenerates_system_columns(test_session):
314
314
 
315
315
  build_chain(delta=False).save(result_name)
316
316
 
317
- build_chain(delta=True).save(
318
- result_name,
319
- delta=True,
320
- delta_on="measurement_id",
321
- )
317
+ build_chain(delta=True).save(result_name)
322
318
 
323
319
  assert set(
324
320
  dc.read_dataset(result_name, session=test_session).to_values("measurement_id")
325
321
  ) == {1, 2}
326
322
 
327
323
 
324
+ def test_storage_delta_replay_regenerates_system_columns(test_session, tmp_dir):
325
+ data_dir = tmp_dir / f"regen_storage_{uuid.uuid4().hex[:8]}"
326
+ data_dir.mkdir()
327
+ storage_uri = data_dir.as_uri()
328
+ result_name = f"regen_storage_result_{uuid.uuid4().hex[:8]}"
329
+
330
+ def write_payload(index: int) -> None:
331
+ (data_dir / f"item{index}.txt").write_text(f"payload-{index}")
332
+
333
+ write_payload(1)
334
+ write_payload(2)
335
+
336
+ def build_chain(delta: bool):
337
+ read_kwargs = {"session": test_session, "update": True}
338
+ if delta:
339
+ read_kwargs |= {
340
+ "delta": True,
341
+ "delta_on": ["file.path"],
342
+ "delta_result_on": ["file.path"],
343
+ }
344
+
345
+ def get_measurement_id(file: File) -> int:
346
+ match = re.search(r"item(\d+)\.txt$", file.path)
347
+ assert match
348
+ return int(match.group(1))
349
+
350
+ def get_num(file: File) -> int:
351
+ return get_measurement_id(file)
352
+
353
+ chain = dc.read_storage(storage_uri, **read_kwargs)
354
+ return (
355
+ chain.mutate(num=1)
356
+ .select_except("num")
357
+ .map(measurement_id=get_measurement_id)
358
+ .map(err=lambda file: "")
359
+ .map(num=get_num)
360
+ .filter(C.err == "")
361
+ .select_except("err")
362
+ .map(double=lambda num: num * 2, output=int)
363
+ .select_except("num")
364
+ )
365
+
366
+ build_chain(delta=False).save(result_name)
367
+
368
+ write_payload(3)
369
+
370
+ build_chain(delta=True).save(result_name)
371
+
372
+ assert set(
373
+ dc.read_dataset(result_name, session=test_session).to_values("measurement_id")
374
+ ) == {1, 2, 3}
375
+
376
+
328
377
  def test_delta_update_from_storage(test_session, tmp_dir, tmp_path):
329
378
  ds_name = "delta_ds"
330
379
  path = tmp_dir.as_uri()
@@ -140,7 +140,7 @@ def test_merge_similar_objects(test_session):
140
140
  rname = "qq"
141
141
  ch = ch1.merge(ch2, "emp.person.name", rname=rname)
142
142
 
143
- assert list(ch.signals_schema.values.keys()) == ["sys", "emp", rname + "emp"]
143
+ assert list(ch.signals_schema.values.keys()) == ["emp", rname + "emp"]
144
144
 
145
145
  empl = list(ch.to_list())
146
146
  assert len(empl) == 4
@@ -175,7 +175,7 @@ def test_merge_similar_objects_in_memory():
175
175
  assert ch.session.catalog.metastore.db.db_file == ":memory:"
176
176
  assert ch.session.catalog.warehouse.db.db_file == ":memory:"
177
177
 
178
- assert list(ch.signals_schema.values.keys()) == ["sys", "emp", rname + "emp"]
178
+ assert list(ch.signals_schema.values.keys()) == ["emp", rname + "emp"]
179
179
 
180
180
  empl = list(ch.to_list())
181
181
  assert len(empl) == 4
@@ -198,7 +198,6 @@ def test_merge_values(test_session):
198
198
  ch = ch1.merge(ch2, "id")
199
199
 
200
200
  assert list(ch.signals_schema.values.keys()) == [
201
- "sys",
202
201
  "id",
203
202
  "descr",
204
203
  "right_id",
@@ -339,3 +338,42 @@ def test_merge_on_expression(test_session):
339
338
  count += 1
340
339
 
341
340
  assert count == len(team) * len(team)
341
+
342
+
343
+ def test_merge_with_drops_sys_columns(test_session):
344
+ left = dc.read_values(id=[1, 1], lval=[10, 20], session=test_session)
345
+ right = dc.read_values(id=[1, 1], rval=["a", "b"], session=test_session)
346
+
347
+ merged = left.merge(right, on="id")
348
+
349
+ assert "sys" not in merged.signals_schema.values
350
+
351
+ cols = merged.settings(sys=True).to_pandas(flatten=True).columns
352
+ assert all(not str(col).startswith("sys") for col in cols)
353
+
354
+ ds_name = "merge_left_dups_sys_check_sys"
355
+ merged.save(ds_name)
356
+
357
+ df_with_sys = (
358
+ dc.read_dataset(ds_name, session=test_session)
359
+ .settings(sys=True)
360
+ .to_pandas(flatten=True)
361
+ )
362
+
363
+ sys_cols = [c for c in df_with_sys.columns if str(c).startswith("sys")]
364
+ assert sys_cols
365
+
366
+ def _col(name: str) -> str:
367
+ for col in df_with_sys.columns:
368
+ if str(col) == f"sys.{name}":
369
+ return str(col)
370
+ raise AssertionError(f"Missing sys column for {name}")
371
+
372
+ sys_id_col = _col("id")
373
+ sys_rand_col = _col("rand")
374
+
375
+ sys_ids = list(df_with_sys[sys_id_col])
376
+ assert len(sys_ids) == len(set(sys_ids))
377
+
378
+ sys_rand = list(df_with_sys[sys_rand_col])
379
+ assert len(sys_rand) == len(set(sys_rand))
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes