datachain 0.34.1__tar.gz → 0.34.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (442) hide show
  1. {datachain-0.34.1 → datachain-0.34.3}/PKG-INFO +1 -1
  2. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/catalog/loader.py +5 -0
  3. datachain-0.34.3/src/datachain/data_storage/serializer.py +119 -0
  4. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/data_storage/sqlite.py +24 -3
  5. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/data_storage/warehouse.py +40 -0
  6. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/dc/datachain.py +7 -2
  7. datachain-0.34.3/src/datachain/plugins.py +30 -0
  8. {datachain-0.34.1 → datachain-0.34.3}/src/datachain.egg-info/PKG-INFO +1 -1
  9. {datachain-0.34.1 → datachain-0.34.3}/src/datachain.egg-info/SOURCES.txt +1 -0
  10. {datachain-0.34.1 → datachain-0.34.3}/tests/conftest.py +1 -0
  11. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_datachain_merge.py +44 -0
  12. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_database_engine.py +13 -11
  13. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_metastore.py +12 -11
  14. datachain-0.34.3/tests/unit/test_serializer.py +218 -0
  15. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_warehouse.py +11 -10
  16. datachain-0.34.1/src/datachain/data_storage/serializer.py +0 -29
  17. datachain-0.34.1/tests/unit/test_serializer.py +0 -92
  18. {datachain-0.34.1 → datachain-0.34.3}/.cruft.json +0 -0
  19. {datachain-0.34.1 → datachain-0.34.3}/.gitattributes +0 -0
  20. {datachain-0.34.1 → datachain-0.34.3}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  21. {datachain-0.34.1 → datachain-0.34.3}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  22. {datachain-0.34.1 → datachain-0.34.3}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  23. {datachain-0.34.1 → datachain-0.34.3}/.github/codecov.yaml +0 -0
  24. {datachain-0.34.1 → datachain-0.34.3}/.github/dependabot.yml +0 -0
  25. {datachain-0.34.1 → datachain-0.34.3}/.github/workflows/benchmarks.yml +0 -0
  26. {datachain-0.34.1 → datachain-0.34.3}/.github/workflows/release.yml +0 -0
  27. {datachain-0.34.1 → datachain-0.34.3}/.github/workflows/tests-studio.yml +0 -0
  28. {datachain-0.34.1 → datachain-0.34.3}/.github/workflows/tests.yml +0 -0
  29. {datachain-0.34.1 → datachain-0.34.3}/.github/workflows/update-template.yaml +0 -0
  30. {datachain-0.34.1 → datachain-0.34.3}/.gitignore +0 -0
  31. {datachain-0.34.1 → datachain-0.34.3}/.pre-commit-config.yaml +0 -0
  32. {datachain-0.34.1 → datachain-0.34.3}/CODE_OF_CONDUCT.rst +0 -0
  33. {datachain-0.34.1 → datachain-0.34.3}/LICENSE +0 -0
  34. {datachain-0.34.1 → datachain-0.34.3}/README.rst +0 -0
  35. {datachain-0.34.1 → datachain-0.34.3}/docs/api_hooks.py +0 -0
  36. {datachain-0.34.1 → datachain-0.34.3}/docs/assets/captioned_cartoons.png +0 -0
  37. {datachain-0.34.1 → datachain-0.34.3}/docs/assets/datachain-white.svg +0 -0
  38. {datachain-0.34.1 → datachain-0.34.3}/docs/assets/datachain.svg +0 -0
  39. {datachain-0.34.1 → datachain-0.34.3}/docs/assets/webhook_dialog.png +0 -0
  40. {datachain-0.34.1 → datachain-0.34.3}/docs/assets/webhook_list.png +0 -0
  41. {datachain-0.34.1 → datachain-0.34.3}/docs/commands/auth/login.md +0 -0
  42. {datachain-0.34.1 → datachain-0.34.3}/docs/commands/auth/logout.md +0 -0
  43. {datachain-0.34.1 → datachain-0.34.3}/docs/commands/auth/team.md +0 -0
  44. {datachain-0.34.1 → datachain-0.34.3}/docs/commands/auth/token.md +0 -0
  45. {datachain-0.34.1 → datachain-0.34.3}/docs/commands/index.md +0 -0
  46. {datachain-0.34.1 → datachain-0.34.3}/docs/commands/job/cancel.md +0 -0
  47. {datachain-0.34.1 → datachain-0.34.3}/docs/commands/job/clusters.md +0 -0
  48. {datachain-0.34.1 → datachain-0.34.3}/docs/commands/job/logs.md +0 -0
  49. {datachain-0.34.1 → datachain-0.34.3}/docs/commands/job/ls.md +0 -0
  50. {datachain-0.34.1 → datachain-0.34.3}/docs/commands/job/run.md +0 -0
  51. {datachain-0.34.1 → datachain-0.34.3}/docs/contributing.md +0 -0
  52. {datachain-0.34.1 → datachain-0.34.3}/docs/css/github-permalink-style.css +0 -0
  53. {datachain-0.34.1 → datachain-0.34.3}/docs/examples.md +0 -0
  54. {datachain-0.34.1 → datachain-0.34.3}/docs/guide/db_migrations.md +0 -0
  55. {datachain-0.34.1 → datachain-0.34.3}/docs/guide/delta.md +0 -0
  56. {datachain-0.34.1 → datachain-0.34.3}/docs/guide/env.md +0 -0
  57. {datachain-0.34.1 → datachain-0.34.3}/docs/guide/index.md +0 -0
  58. {datachain-0.34.1 → datachain-0.34.3}/docs/guide/namespaces.md +0 -0
  59. {datachain-0.34.1 → datachain-0.34.3}/docs/guide/processing.md +0 -0
  60. {datachain-0.34.1 → datachain-0.34.3}/docs/guide/remotes.md +0 -0
  61. {datachain-0.34.1 → datachain-0.34.3}/docs/guide/retry.md +0 -0
  62. {datachain-0.34.1 → datachain-0.34.3}/docs/index.md +0 -0
  63. {datachain-0.34.1 → datachain-0.34.3}/docs/overrides/main.html +0 -0
  64. {datachain-0.34.1 → datachain-0.34.3}/docs/quick-start.md +0 -0
  65. {datachain-0.34.1 → datachain-0.34.3}/docs/references/data-types/arrowrow.md +0 -0
  66. {datachain-0.34.1 → datachain-0.34.3}/docs/references/data-types/bbox.md +0 -0
  67. {datachain-0.34.1 → datachain-0.34.3}/docs/references/data-types/file.md +0 -0
  68. {datachain-0.34.1 → datachain-0.34.3}/docs/references/data-types/imagefile.md +0 -0
  69. {datachain-0.34.1 → datachain-0.34.3}/docs/references/data-types/index.md +0 -0
  70. {datachain-0.34.1 → datachain-0.34.3}/docs/references/data-types/pose.md +0 -0
  71. {datachain-0.34.1 → datachain-0.34.3}/docs/references/data-types/segment.md +0 -0
  72. {datachain-0.34.1 → datachain-0.34.3}/docs/references/data-types/tarvfile.md +0 -0
  73. {datachain-0.34.1 → datachain-0.34.3}/docs/references/data-types/textfile.md +0 -0
  74. {datachain-0.34.1 → datachain-0.34.3}/docs/references/data-types/videofile.md +0 -0
  75. {datachain-0.34.1 → datachain-0.34.3}/docs/references/datachain.md +0 -0
  76. {datachain-0.34.1 → datachain-0.34.3}/docs/references/func.md +0 -0
  77. {datachain-0.34.1 → datachain-0.34.3}/docs/references/functions/aggregate.md +0 -0
  78. {datachain-0.34.1 → datachain-0.34.3}/docs/references/functions/array.md +0 -0
  79. {datachain-0.34.1 → datachain-0.34.3}/docs/references/functions/conditional.md +0 -0
  80. {datachain-0.34.1 → datachain-0.34.3}/docs/references/functions/numeric.md +0 -0
  81. {datachain-0.34.1 → datachain-0.34.3}/docs/references/functions/path.md +0 -0
  82. {datachain-0.34.1 → datachain-0.34.3}/docs/references/functions/random.md +0 -0
  83. {datachain-0.34.1 → datachain-0.34.3}/docs/references/functions/string.md +0 -0
  84. {datachain-0.34.1 → datachain-0.34.3}/docs/references/functions/window.md +0 -0
  85. {datachain-0.34.1 → datachain-0.34.3}/docs/references/index.md +0 -0
  86. {datachain-0.34.1 → datachain-0.34.3}/docs/references/toolkit.md +0 -0
  87. {datachain-0.34.1 → datachain-0.34.3}/docs/references/torch.md +0 -0
  88. {datachain-0.34.1 → datachain-0.34.3}/docs/references/udf.md +0 -0
  89. {datachain-0.34.1 → datachain-0.34.3}/docs/studio/api/.gitkeep +0 -0
  90. {datachain-0.34.1 → datachain-0.34.3}/docs/studio/webhooks.md +0 -0
  91. {datachain-0.34.1 → datachain-0.34.3}/docs/templates/main.dot +0 -0
  92. {datachain-0.34.1 → datachain-0.34.3}/docs/templates/operation.dot +0 -0
  93. {datachain-0.34.1 → datachain-0.34.3}/docs/templates/responses.def +0 -0
  94. {datachain-0.34.1 → datachain-0.34.3}/docs/tutorials.md +0 -0
  95. {datachain-0.34.1 → datachain-0.34.3}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  96. {datachain-0.34.1 → datachain-0.34.3}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  97. {datachain-0.34.1 → datachain-0.34.3}/examples/computer_vision/openimage-detect.py +0 -0
  98. {datachain-0.34.1 → datachain-0.34.3}/examples/computer_vision/ultralytics-bbox.py +0 -0
  99. {datachain-0.34.1 → datachain-0.34.3}/examples/computer_vision/ultralytics-pose.py +0 -0
  100. {datachain-0.34.1 → datachain-0.34.3}/examples/computer_vision/ultralytics-segment.py +0 -0
  101. {datachain-0.34.1 → datachain-0.34.3}/examples/get_started/common_sql_functions.py +0 -0
  102. {datachain-0.34.1 → datachain-0.34.3}/examples/get_started/json-csv-reader.py +0 -0
  103. {datachain-0.34.1 → datachain-0.34.3}/examples/get_started/nested_datamodel.py +0 -0
  104. {datachain-0.34.1 → datachain-0.34.3}/examples/get_started/torch-loader.py +0 -0
  105. {datachain-0.34.1 → datachain-0.34.3}/examples/get_started/udfs/parallel.py +0 -0
  106. {datachain-0.34.1 → datachain-0.34.3}/examples/get_started/udfs/simple.py +0 -0
  107. {datachain-0.34.1 → datachain-0.34.3}/examples/get_started/udfs/stateful.py +0 -0
  108. {datachain-0.34.1 → datachain-0.34.3}/examples/incremental_processing/delta.py +0 -0
  109. {datachain-0.34.1 → datachain-0.34.3}/examples/incremental_processing/retry.py +0 -0
  110. {datachain-0.34.1 → datachain-0.34.3}/examples/incremental_processing/utils.py +0 -0
  111. {datachain-0.34.1 → datachain-0.34.3}/examples/llm_and_nlp/claude-query.py +0 -0
  112. {datachain-0.34.1 → datachain-0.34.3}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  113. {datachain-0.34.1 → datachain-0.34.3}/examples/multimodal/audio-to-text.py +0 -0
  114. {datachain-0.34.1 → datachain-0.34.3}/examples/multimodal/clip_inference.py +0 -0
  115. {datachain-0.34.1 → datachain-0.34.3}/examples/multimodal/hf_pipeline.py +0 -0
  116. {datachain-0.34.1 → datachain-0.34.3}/examples/multimodal/openai_image_desc_lib.py +0 -0
  117. {datachain-0.34.1 → datachain-0.34.3}/examples/multimodal/wds.py +0 -0
  118. {datachain-0.34.1 → datachain-0.34.3}/examples/multimodal/wds_filtered.py +0 -0
  119. {datachain-0.34.1 → datachain-0.34.3}/mkdocs.yml +0 -0
  120. {datachain-0.34.1 → datachain-0.34.3}/noxfile.py +0 -0
  121. {datachain-0.34.1 → datachain-0.34.3}/pyproject.toml +0 -0
  122. {datachain-0.34.1 → datachain-0.34.3}/setup.cfg +0 -0
  123. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/__init__.py +0 -0
  124. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/__main__.py +0 -0
  125. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/asyn.py +0 -0
  126. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/cache.py +0 -0
  127. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/catalog/__init__.py +0 -0
  128. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/catalog/catalog.py +0 -0
  129. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/catalog/datasource.py +0 -0
  130. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/checkpoint.py +0 -0
  131. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/cli/__init__.py +0 -0
  132. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/cli/commands/__init__.py +0 -0
  133. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/cli/commands/datasets.py +0 -0
  134. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/cli/commands/du.py +0 -0
  135. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/cli/commands/index.py +0 -0
  136. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/cli/commands/ls.py +0 -0
  137. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/cli/commands/misc.py +0 -0
  138. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/cli/commands/query.py +0 -0
  139. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/cli/commands/show.py +0 -0
  140. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/cli/parser/__init__.py +0 -0
  141. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/cli/parser/job.py +0 -0
  142. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/cli/parser/studio.py +0 -0
  143. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/cli/parser/utils.py +0 -0
  144. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/cli/utils.py +0 -0
  145. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/client/__init__.py +0 -0
  146. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/client/azure.py +0 -0
  147. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/client/fileslice.py +0 -0
  148. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/client/fsspec.py +0 -0
  149. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/client/gcs.py +0 -0
  150. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/client/hf.py +0 -0
  151. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/client/http.py +0 -0
  152. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/client/local.py +0 -0
  153. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/client/s3.py +0 -0
  154. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/config.py +0 -0
  155. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/data_storage/__init__.py +0 -0
  156. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/data_storage/db_engine.py +0 -0
  157. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/data_storage/job.py +0 -0
  158. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/data_storage/metastore.py +0 -0
  159. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/data_storage/schema.py +0 -0
  160. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/dataset.py +0 -0
  161. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/delta.py +0 -0
  162. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/diff/__init__.py +0 -0
  163. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/error.py +0 -0
  164. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/fs/__init__.py +0 -0
  165. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/fs/reference.py +0 -0
  166. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/fs/utils.py +0 -0
  167. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/func/__init__.py +0 -0
  168. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/func/aggregate.py +0 -0
  169. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/func/array.py +0 -0
  170. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/func/base.py +0 -0
  171. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/func/conditional.py +0 -0
  172. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/func/func.py +0 -0
  173. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/func/numeric.py +0 -0
  174. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/func/path.py +0 -0
  175. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/func/random.py +0 -0
  176. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/func/string.py +0 -0
  177. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/func/window.py +0 -0
  178. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/hash_utils.py +0 -0
  179. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/job.py +0 -0
  180. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/__init__.py +0 -0
  181. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/arrow.py +0 -0
  182. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/audio.py +0 -0
  183. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/clip.py +0 -0
  184. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/convert/__init__.py +0 -0
  185. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/convert/flatten.py +0 -0
  186. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/convert/python_to_sql.py +0 -0
  187. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/convert/sql_to_python.py +0 -0
  188. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/convert/unflatten.py +0 -0
  189. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  190. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/data_model.py +0 -0
  191. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/dataset_info.py +0 -0
  192. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/dc/__init__.py +0 -0
  193. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/dc/csv.py +0 -0
  194. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/dc/database.py +0 -0
  195. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/dc/datasets.py +0 -0
  196. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/dc/hf.py +0 -0
  197. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/dc/json.py +0 -0
  198. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/dc/listings.py +0 -0
  199. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/dc/pandas.py +0 -0
  200. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/dc/parquet.py +0 -0
  201. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/dc/records.py +0 -0
  202. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/dc/storage.py +0 -0
  203. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/dc/storage_pattern.py +0 -0
  204. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/dc/utils.py +0 -0
  205. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/dc/values.py +0 -0
  206. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/file.py +0 -0
  207. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/hf.py +0 -0
  208. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/image.py +0 -0
  209. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/listing.py +0 -0
  210. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/listing_info.py +0 -0
  211. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/meta_formats.py +0 -0
  212. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/model_store.py +0 -0
  213. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/namespaces.py +0 -0
  214. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/projects.py +0 -0
  215. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/pytorch.py +0 -0
  216. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/settings.py +0 -0
  217. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/signal_schema.py +0 -0
  218. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/tar.py +0 -0
  219. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/text.py +0 -0
  220. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/udf.py +0 -0
  221. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/udf_signature.py +0 -0
  222. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/utils.py +0 -0
  223. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/video.py +0 -0
  224. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/webdataset.py +0 -0
  225. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/lib/webdataset_laion.py +0 -0
  226. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/listing.py +0 -0
  227. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/model/__init__.py +0 -0
  228. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/model/bbox.py +0 -0
  229. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/model/pose.py +0 -0
  230. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/model/segment.py +0 -0
  231. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/model/ultralytics/__init__.py +0 -0
  232. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/model/ultralytics/bbox.py +0 -0
  233. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/model/ultralytics/pose.py +0 -0
  234. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/model/ultralytics/segment.py +0 -0
  235. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/model/utils.py +0 -0
  236. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/namespace.py +0 -0
  237. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/node.py +0 -0
  238. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/nodes_fetcher.py +0 -0
  239. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/nodes_thread_pool.py +0 -0
  240. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/progress.py +0 -0
  241. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/project.py +0 -0
  242. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/py.typed +0 -0
  243. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/query/__init__.py +0 -0
  244. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/query/batch.py +0 -0
  245. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/query/dataset.py +0 -0
  246. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/query/dispatch.py +0 -0
  247. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/query/metrics.py +0 -0
  248. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/query/params.py +0 -0
  249. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/query/queue.py +0 -0
  250. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/query/schema.py +0 -0
  251. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/query/session.py +0 -0
  252. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/query/udf.py +0 -0
  253. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/query/utils.py +0 -0
  254. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/remote/__init__.py +0 -0
  255. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/remote/studio.py +0 -0
  256. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/script_meta.py +0 -0
  257. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/semver.py +0 -0
  258. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/sql/__init__.py +0 -0
  259. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/sql/default/__init__.py +0 -0
  260. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/sql/default/base.py +0 -0
  261. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/sql/functions/__init__.py +0 -0
  262. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/sql/functions/aggregate.py +0 -0
  263. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/sql/functions/array.py +0 -0
  264. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/sql/functions/conditional.py +0 -0
  265. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/sql/functions/numeric.py +0 -0
  266. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/sql/functions/path.py +0 -0
  267. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/sql/functions/random.py +0 -0
  268. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/sql/functions/string.py +0 -0
  269. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/sql/postgresql_dialect.py +0 -0
  270. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/sql/postgresql_types.py +0 -0
  271. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/sql/selectable.py +0 -0
  272. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/sql/sqlite/__init__.py +0 -0
  273. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/sql/sqlite/base.py +0 -0
  274. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/sql/sqlite/types.py +0 -0
  275. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/sql/sqlite/vector.py +0 -0
  276. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/sql/types.py +0 -0
  277. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/sql/utils.py +0 -0
  278. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/studio.py +0 -0
  279. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/telemetry.py +0 -0
  280. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/toolkit/__init__.py +0 -0
  281. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/toolkit/split.py +0 -0
  282. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/torch/__init__.py +0 -0
  283. {datachain-0.34.1 → datachain-0.34.3}/src/datachain/utils.py +0 -0
  284. {datachain-0.34.1 → datachain-0.34.3}/src/datachain.egg-info/dependency_links.txt +0 -0
  285. {datachain-0.34.1 → datachain-0.34.3}/src/datachain.egg-info/entry_points.txt +0 -0
  286. {datachain-0.34.1 → datachain-0.34.3}/src/datachain.egg-info/requires.txt +0 -0
  287. {datachain-0.34.1 → datachain-0.34.3}/src/datachain.egg-info/top_level.txt +0 -0
  288. {datachain-0.34.1 → datachain-0.34.3}/tests/__init__.py +0 -0
  289. {datachain-0.34.1 → datachain-0.34.3}/tests/benchmarks/__init__.py +0 -0
  290. {datachain-0.34.1 → datachain-0.34.3}/tests/benchmarks/conftest.py +0 -0
  291. {datachain-0.34.1 → datachain-0.34.3}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  292. {datachain-0.34.1 → datachain-0.34.3}/tests/benchmarks/datasets/.dvc/config +0 -0
  293. {datachain-0.34.1 → datachain-0.34.3}/tests/benchmarks/datasets/.gitignore +0 -0
  294. {datachain-0.34.1 → datachain-0.34.3}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  295. {datachain-0.34.1 → datachain-0.34.3}/tests/benchmarks/test_datachain.py +0 -0
  296. {datachain-0.34.1 → datachain-0.34.3}/tests/benchmarks/test_ls.py +0 -0
  297. {datachain-0.34.1 → datachain-0.34.3}/tests/benchmarks/test_version.py +0 -0
  298. {datachain-0.34.1 → datachain-0.34.3}/tests/data.py +0 -0
  299. {datachain-0.34.1 → datachain-0.34.3}/tests/examples/__init__.py +0 -0
  300. {datachain-0.34.1 → datachain-0.34.3}/tests/examples/test_examples.py +0 -0
  301. {datachain-0.34.1 → datachain-0.34.3}/tests/examples/test_wds_e2e.py +0 -0
  302. {datachain-0.34.1 → datachain-0.34.3}/tests/examples/wds_data.py +0 -0
  303. {datachain-0.34.1 → datachain-0.34.3}/tests/func/__init__.py +0 -0
  304. {datachain-0.34.1 → datachain-0.34.3}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  305. {datachain-0.34.1 → datachain-0.34.3}/tests/func/data/lena.jpg +0 -0
  306. {datachain-0.34.1 → datachain-0.34.3}/tests/func/fake-service-account-credentials.json +0 -0
  307. {datachain-0.34.1 → datachain-0.34.3}/tests/func/functions/__init__.py +0 -0
  308. {datachain-0.34.1 → datachain-0.34.3}/tests/func/functions/test_aggregate.py +0 -0
  309. {datachain-0.34.1 → datachain-0.34.3}/tests/func/functions/test_array.py +0 -0
  310. {datachain-0.34.1 → datachain-0.34.3}/tests/func/functions/test_conditional.py +0 -0
  311. {datachain-0.34.1 → datachain-0.34.3}/tests/func/functions/test_numeric.py +0 -0
  312. {datachain-0.34.1 → datachain-0.34.3}/tests/func/functions/test_path.py +0 -0
  313. {datachain-0.34.1 → datachain-0.34.3}/tests/func/functions/test_random.py +0 -0
  314. {datachain-0.34.1 → datachain-0.34.3}/tests/func/functions/test_string.py +0 -0
  315. {datachain-0.34.1 → datachain-0.34.3}/tests/func/model/__init__.py +0 -0
  316. {datachain-0.34.1 → datachain-0.34.3}/tests/func/model/data/running-mask0.png +0 -0
  317. {datachain-0.34.1 → datachain-0.34.3}/tests/func/model/data/running-mask1.png +0 -0
  318. {datachain-0.34.1 → datachain-0.34.3}/tests/func/model/data/running.jpg +0 -0
  319. {datachain-0.34.1 → datachain-0.34.3}/tests/func/model/data/ships.jpg +0 -0
  320. {datachain-0.34.1 → datachain-0.34.3}/tests/func/model/test_yolo.py +0 -0
  321. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_audio.py +0 -0
  322. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_batching.py +0 -0
  323. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_catalog.py +0 -0
  324. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_client.py +0 -0
  325. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_cloud_transfer.py +0 -0
  326. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_data_storage.py +0 -0
  327. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_datachain.py +0 -0
  328. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_dataset_query.py +0 -0
  329. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_datasets.py +0 -0
  330. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_delta.py +0 -0
  331. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_feature_pickling.py +0 -0
  332. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_file.py +0 -0
  333. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_hf.py +0 -0
  334. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_hidden_field.py +0 -0
  335. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_image.py +0 -0
  336. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_listing.py +0 -0
  337. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_ls.py +0 -0
  338. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_meta_formats.py +0 -0
  339. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_metastore.py +0 -0
  340. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_metrics.py +0 -0
  341. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_mutate.py +0 -0
  342. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_pull.py +0 -0
  343. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_pytorch.py +0 -0
  344. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_query.py +0 -0
  345. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_read_database.py +0 -0
  346. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_read_dataset_remote.py +0 -0
  347. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  348. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_retry.py +0 -0
  349. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_session.py +0 -0
  350. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_storage_pattern.py +0 -0
  351. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_studio_datetime_parsing.py +0 -0
  352. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_to_database.py +0 -0
  353. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_toolkit.py +0 -0
  354. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_video.py +0 -0
  355. {datachain-0.34.1 → datachain-0.34.3}/tests/func/test_warehouse.py +0 -0
  356. {datachain-0.34.1 → datachain-0.34.3}/tests/scripts/feature_class.py +0 -0
  357. {datachain-0.34.1 → datachain-0.34.3}/tests/scripts/feature_class_exception.py +0 -0
  358. {datachain-0.34.1 → datachain-0.34.3}/tests/scripts/feature_class_parallel.py +0 -0
  359. {datachain-0.34.1 → datachain-0.34.3}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  360. {datachain-0.34.1 → datachain-0.34.3}/tests/scripts/name_len_slow.py +0 -0
  361. {datachain-0.34.1 → datachain-0.34.3}/tests/test_atomicity.py +0 -0
  362. {datachain-0.34.1 → datachain-0.34.3}/tests/test_cli_e2e.py +0 -0
  363. {datachain-0.34.1 → datachain-0.34.3}/tests/test_cli_studio.py +0 -0
  364. {datachain-0.34.1 → datachain-0.34.3}/tests/test_import_time.py +0 -0
  365. {datachain-0.34.1 → datachain-0.34.3}/tests/test_query_e2e.py +0 -0
  366. {datachain-0.34.1 → datachain-0.34.3}/tests/test_telemetry.py +0 -0
  367. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/__init__.py +0 -0
  368. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/__init__.py +0 -0
  369. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/conftest.py +0 -0
  370. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_arrow.py +0 -0
  371. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_audio.py +0 -0
  372. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_checkpoints.py +0 -0
  373. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_clip.py +0 -0
  374. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_datachain.py +0 -0
  375. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  376. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_datachain_merge.py +0 -0
  377. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_diff.py +0 -0
  378. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_feature.py +0 -0
  379. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_feature_utils.py +0 -0
  380. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_file.py +0 -0
  381. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_hf.py +0 -0
  382. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_image.py +0 -0
  383. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_listing_info.py +0 -0
  384. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_namespace.py +0 -0
  385. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_partition_by.py +0 -0
  386. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_project.py +0 -0
  387. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_python_to_sql.py +0 -0
  388. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_schema.py +0 -0
  389. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_settings.py +0 -0
  390. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_signal_schema.py +0 -0
  391. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_sql_to_python.py +0 -0
  392. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_storage_pattern.py +0 -0
  393. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_text.py +0 -0
  394. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_udf.py +0 -0
  395. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_udf_signature.py +0 -0
  396. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_utils.py +0 -0
  397. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/lib/test_webdataset.py +0 -0
  398. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/model/__init__.py +0 -0
  399. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/model/test_bbox.py +0 -0
  400. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/model/test_pose.py +0 -0
  401. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/model/test_segment.py +0 -0
  402. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/model/test_utils.py +0 -0
  403. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/sql/__init__.py +0 -0
  404. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/sql/sqlite/__init__.py +0 -0
  405. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/sql/sqlite/test_types.py +0 -0
  406. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/sql/sqlite/test_utils.py +0 -0
  407. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/sql/test_array.py +0 -0
  408. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/sql/test_conditional.py +0 -0
  409. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/sql/test_path.py +0 -0
  410. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/sql/test_random.py +0 -0
  411. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/sql/test_selectable.py +0 -0
  412. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/sql/test_string.py +0 -0
  413. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_asyn.py +0 -0
  414. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_cache.py +0 -0
  415. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_catalog.py +0 -0
  416. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_catalog_loader.py +0 -0
  417. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_cli_datasets.py +0 -0
  418. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_cli_parsing.py +0 -0
  419. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_client.py +0 -0
  420. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_client_gcs.py +0 -0
  421. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_client_http.py +0 -0
  422. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_client_s3.py +0 -0
  423. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_config.py +0 -0
  424. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_data_storage.py +0 -0
  425. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_datachain_hash.py +0 -0
  426. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_dataset.py +0 -0
  427. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_dispatch.py +0 -0
  428. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_fileslice.py +0 -0
  429. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_func.py +0 -0
  430. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_hash_utils.py +0 -0
  431. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_listing.py +0 -0
  432. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_module_exports.py +0 -0
  433. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_pytorch.py +0 -0
  434. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_query.py +0 -0
  435. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_query_metrics.py +0 -0
  436. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_query_params.py +0 -0
  437. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_query_steps_hash.py +0 -0
  438. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_script_meta.py +0 -0
  439. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_semver.py +0 -0
  440. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_session.py +0 -0
  441. {datachain-0.34.1 → datachain-0.34.3}/tests/unit/test_utils.py +0 -0
  442. {datachain-0.34.1 → datachain-0.34.3}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.34.1
3
+ Version: 0.34.3
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -3,6 +3,7 @@ import sys
3
3
  from importlib import import_module
4
4
  from typing import TYPE_CHECKING, Any, Optional
5
5
 
6
+ from datachain.plugins import ensure_plugins_loaded
6
7
  from datachain.utils import get_envs_by_prefix
7
8
 
8
9
  if TYPE_CHECKING:
@@ -24,6 +25,8 @@ IN_MEMORY_ERROR_MESSAGE = "In-memory is only supported on SQLite"
24
25
 
25
26
 
26
27
  def get_metastore(in_memory: bool = False) -> "AbstractMetastore":
28
+ ensure_plugins_loaded()
29
+
27
30
  from datachain.data_storage import AbstractMetastore
28
31
  from datachain.data_storage.serializer import deserialize
29
32
 
@@ -64,6 +67,8 @@ def get_metastore(in_memory: bool = False) -> "AbstractMetastore":
64
67
 
65
68
 
66
69
  def get_warehouse(in_memory: bool = False) -> "AbstractWarehouse":
70
+ ensure_plugins_loaded()
71
+
67
72
  from datachain.data_storage import AbstractWarehouse
68
73
  from datachain.data_storage.serializer import deserialize
69
74
 
@@ -0,0 +1,119 @@
1
+ import base64
2
+ import json
3
+ from abc import abstractmethod
4
+ from collections.abc import Callable
5
+ from typing import Any, ClassVar
6
+
7
+ from datachain.plugins import ensure_plugins_loaded
8
+
9
+
10
+ class CallableRegistry:
11
+ _registry: ClassVar[dict[str, Callable]] = {}
12
+
13
+ @classmethod
14
+ def register(cls, callable_obj: Callable, name: str) -> str:
15
+ cls._registry[name] = callable_obj
16
+ return name
17
+
18
+ @classmethod
19
+ def get(cls, name: str) -> Callable:
20
+ return cls._registry[name]
21
+
22
+
23
+ class Serializable:
24
+ @classmethod
25
+ @abstractmethod
26
+ def serialize_callable_name(cls) -> str:
27
+ """Return the registered name used for this class' factory callable."""
28
+
29
+ @abstractmethod
30
+ def clone_params(self) -> tuple[Callable[..., Any], list[Any], dict[str, Any]]:
31
+ """Return (callable, args, kwargs) necessary to recreate this object."""
32
+
33
+ def _prepare(self, params: tuple) -> dict:
34
+ callable, args, kwargs = params
35
+ callable_name = callable.__self__.serialize_callable_name()
36
+ return {
37
+ "callable": callable_name,
38
+ "args": args,
39
+ "kwargs": {
40
+ k: self._prepare(v) if isinstance(v, tuple) else v
41
+ for k, v in kwargs.items()
42
+ },
43
+ }
44
+
45
+ def serialize(self) -> str:
46
+ """Return a base64-encoded JSON string with registered callable + params."""
47
+ _ensure_default_callables_registered()
48
+ data = self.clone_params()
49
+ return base64.b64encode(json.dumps(self._prepare(data)).encode()).decode()
50
+
51
+
52
+ def deserialize(s: str) -> Serializable:
53
+ """Deserialize from base64-encoded JSON using only registered callables.
54
+
55
+ Nested serialized objects are instantiated automatically except for those
56
+ passed via clone parameter tuples (keys ending with ``_clone_params``),
57
+ which must remain as (callable, args, kwargs) for later factory usage.
58
+ """
59
+ ensure_plugins_loaded()
60
+ _ensure_default_callables_registered()
61
+ decoded = base64.b64decode(s.encode())
62
+ data = json.loads(decoded.decode())
63
+
64
+ def _is_serialized(obj: Any) -> bool:
65
+ return isinstance(obj, dict) and {"callable", "args", "kwargs"}.issubset(
66
+ obj.keys()
67
+ )
68
+
69
+ def _reconstruct(obj: Any, nested: bool = False) -> Any:
70
+ if not _is_serialized(obj):
71
+ return obj
72
+ callable_name: str = obj["callable"]
73
+ args: list[Any] = obj["args"]
74
+ kwargs: dict[str, Any] = obj["kwargs"]
75
+ # Recurse only inside kwargs because serialize() only nests through kwargs
76
+ for k, v in list(kwargs.items()):
77
+ if _is_serialized(v):
78
+ kwargs[k] = _reconstruct(v, True)
79
+ callable_obj = CallableRegistry.get(callable_name)
80
+ if nested:
81
+ return (callable_obj, args, kwargs)
82
+ # Otherwise instantiate
83
+ return callable_obj(*args, **kwargs)
84
+
85
+ if not _is_serialized(data):
86
+ raise ValueError("Invalid serialized data format")
87
+ return _reconstruct(data, False)
88
+
89
+
90
+ class _DefaultsState:
91
+ registered = False
92
+
93
+
94
+ def _ensure_default_callables_registered() -> None:
95
+ if _DefaultsState.registered:
96
+ return
97
+
98
+ from datachain.data_storage.sqlite import (
99
+ SQLiteDatabaseEngine,
100
+ SQLiteMetastore,
101
+ SQLiteWarehouse,
102
+ )
103
+
104
+ # Register (idempotent by name overwrite is fine) using class-level
105
+ # serialization names to avoid hard-coded literals here.
106
+ CallableRegistry.register(
107
+ SQLiteDatabaseEngine.from_db_file,
108
+ SQLiteDatabaseEngine.serialize_callable_name(),
109
+ )
110
+ CallableRegistry.register(
111
+ SQLiteMetastore.init_after_clone,
112
+ SQLiteMetastore.serialize_callable_name(),
113
+ )
114
+ CallableRegistry.register(
115
+ SQLiteWarehouse.init_after_clone,
116
+ SQLiteWarehouse.serialize_callable_name(),
117
+ )
118
+
119
+ _DefaultsState.registered = True
@@ -201,10 +201,14 @@ class SQLiteDatabaseEngine(DatabaseEngine):
201
201
  """
202
202
  return (
203
203
  SQLiteDatabaseEngine.from_db_file,
204
- [self.db_file],
204
+ [str(self.db_file)],
205
205
  {},
206
206
  )
207
207
 
208
+ @classmethod
209
+ def serialize_callable_name(cls) -> str:
210
+ return "sqlite.from_db_file"
211
+
208
212
  def _reconnect(self) -> None:
209
213
  if not self.is_closed:
210
214
  raise RuntimeError("Cannot reconnect on still-open DB!")
@@ -403,6 +407,10 @@ class SQLiteMetastore(AbstractDBMetastore):
403
407
  },
404
408
  )
405
409
 
410
+ @classmethod
411
+ def serialize_callable_name(cls) -> str:
412
+ return "sqlite.metastore.init_after_clone"
413
+
406
414
  @classmethod
407
415
  def init_after_clone(
408
416
  cls,
@@ -610,6 +618,10 @@ class SQLiteWarehouse(AbstractWarehouse):
610
618
  {"db_clone_params": self.db.clone_params()},
611
619
  )
612
620
 
621
+ @classmethod
622
+ def serialize_callable_name(cls) -> str:
623
+ return "sqlite.warehouse.init_after_clone"
624
+
613
625
  @classmethod
614
626
  def init_after_clone(
615
627
  cls,
@@ -863,8 +875,17 @@ class SQLiteWarehouse(AbstractWarehouse):
863
875
  if isinstance(c, BinaryExpression):
864
876
  right_left_join = add_left_rows_filter(c)
865
877
 
866
- union = sqlalchemy.union(left_right_join, right_left_join).subquery()
867
- return sqlalchemy.select(*union.c).select_from(union)
878
+ # Use CTE instead of subquery to force SQLite to materialize the result
879
+ # This breaks deep nesting and prevents parser stack overflow.
880
+ union_cte = sqlalchemy.union(left_right_join, right_left_join).cte()
881
+
882
+ return self._regenerate_system_columns(union_cte)
883
+
884
+ def _system_row_number_expr(self):
885
+ return func.row_number().over()
886
+
887
+ def _system_random_expr(self):
888
+ return self._system_row_number_expr() * 1103515245 + 12345
868
889
 
869
890
  def create_pre_udf_table(self, query: "Select") -> "Table":
870
891
  """
@@ -246,6 +246,44 @@ class AbstractWarehouse(ABC, Serializable):
246
246
  break # no more results
247
247
  offset += page_size
248
248
 
249
+ def _regenerate_system_columns(self, selectable):
250
+ """Return a SELECT that regenerates sys__id and sys__rand deterministically."""
251
+
252
+ base = selectable.subquery() if hasattr(selectable, "subquery") else selectable
253
+
254
+ system_types: dict[str, sa.types.TypeEngine] = {
255
+ sys_col.name: sys_col.type
256
+ for sys_col in self.schema.dataset_row_cls.sys_columns()
257
+ }
258
+
259
+ result_columns = []
260
+ for col in base.c:
261
+ if col.name == "sys__id":
262
+ expr = self._system_row_number_expr()
263
+ expr = sa.cast(expr, system_types["sys__id"])
264
+ result_columns.append(expr.label("sys__id"))
265
+ elif col.name == "sys__rand":
266
+ expr = self._system_random_expr()
267
+ expr = sa.cast(expr, system_types["sys__rand"])
268
+ result_columns.append(expr.label("sys__rand"))
269
+ else:
270
+ result_columns.append(col)
271
+
272
+ # Wrap in subquery to materialize window functions, then wrap again in SELECT
273
+ # This ensures window functions are computed before INSERT...FROM SELECT
274
+ inner = sa.select(*result_columns).select_from(base).subquery()
275
+ return sa.select(*inner.c).select_from(inner)
276
+
277
+ def _system_row_number_expr(self):
278
+ """Return an expression that produces deterministic row numbers."""
279
+
280
+ raise NotImplementedError
281
+
282
+ def _system_random_expr(self):
283
+ """Return an expression that produces deterministic random values."""
284
+
285
+ raise NotImplementedError
286
+
249
287
  #
250
288
  # Table Name Internal Functions
251
289
  #
@@ -923,6 +961,8 @@ class AbstractWarehouse(ABC, Serializable):
923
961
  right: "_FromClauseArgument",
924
962
  onclause: "_OnClauseArgument",
925
963
  inner: bool = True,
964
+ full: bool = False,
965
+ columns=None,
926
966
  ) -> sa.Select:
927
967
  """
928
968
  Join two tables together.
@@ -1701,7 +1701,11 @@ class DataChain:
1701
1701
  )
1702
1702
 
1703
1703
  query = self._query.join(
1704
- right_ds._query, sqlalchemy.and_(*ops), inner, full, rname + "{name}"
1704
+ right_ds._query,
1705
+ sqlalchemy.and_(*ops),
1706
+ inner,
1707
+ full,
1708
+ rname + "{name}",
1705
1709
  )
1706
1710
  query.feature_schema = None
1707
1711
  ds = self._evolve(query=query)
@@ -1989,7 +1993,8 @@ class DataChain:
1989
1993
  results = self.results(include_hidden=include_hidden)
1990
1994
  if as_object:
1991
1995
  df = pd.DataFrame(results, columns=columns, dtype=object)
1992
- return df.where(pd.notna(df), None)
1996
+ df.where(pd.notna(df), None, inplace=True)
1997
+ return df
1993
1998
  return pd.DataFrame.from_records(results, columns=columns)
1994
1999
 
1995
2000
  def show(
@@ -0,0 +1,30 @@
1
+ """Plugin loader for DataChain callables.
2
+
3
+ Discovers and invokes entry points in the group "datachain.callables" once
4
+ per process. This enables external packages (e.g., Studio) to register
5
+ their callables with the serializer registry without explicit imports.
6
+ """
7
+
8
+ from importlib import metadata as importlib_metadata
9
+
10
+ _plugins_loaded = False
11
+
12
+
13
+ def ensure_plugins_loaded() -> None:
14
+ global _plugins_loaded # noqa: PLW0603
15
+ if _plugins_loaded:
16
+ return
17
+
18
+ # Compatible across importlib.metadata versions
19
+ eps_obj = importlib_metadata.entry_points()
20
+ if hasattr(eps_obj, "select"):
21
+ eps_list = eps_obj.select(group="datachain.callables")
22
+ else:
23
+ # Compatibility for older versions of importlib_metadata, Python 3.9
24
+ eps_list = eps_obj.get("datachain.callables", []) # type: ignore[attr-defined]
25
+
26
+ for ep in eps_list:
27
+ func = ep.load()
28
+ func()
29
+
30
+ _plugins_loaded = True
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.34.1
3
+ Version: 0.34.3
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -118,6 +118,7 @@ src/datachain/namespace.py
118
118
  src/datachain/node.py
119
119
  src/datachain/nodes_fetcher.py
120
120
  src/datachain/nodes_thread_pool.py
121
+ src/datachain/plugins.py
121
122
  src/datachain/progress.py
122
123
  src/datachain/project.py
123
124
  src/datachain/py.typed
@@ -126,6 +126,7 @@ def clean_environment(
126
126
  working_dir = str(tmp_path_factory.mktemp("default_working_dir"))
127
127
  monkeypatch_session.chdir(working_dir)
128
128
  monkeypatch_session.delenv(DataChainDir.ENV_VAR, raising=False)
129
+ monkeypatch_session.delenv(DataChainDir.ENV_VAR_DATACHAIN_ROOT, raising=False)
129
130
 
130
131
 
131
132
  @pytest.fixture
@@ -1,6 +1,7 @@
1
1
  import pytest
2
2
 
3
3
  import datachain as dc
4
+ from datachain import File
4
5
  from datachain.sql.types import Int
5
6
 
6
7
 
@@ -99,3 +100,46 @@ def test_merge_multiple(cloud_test_catalog, inner1, inner2, inner3):
99
100
  ("dogs/dog3", 1, signal_default_value),
100
101
  ("dogs/others/dog4", 1, signal_default_value),
101
102
  ]
103
+
104
+
105
+ def test_full_outer_join_preserves_all_rows(test_session):
106
+ """Test that full outer join correctly saves all rows including right-only rows.
107
+ This test verifies the fix for the NULL sys__id bug where right-only rows
108
+ from a full outer join had NULL sys__id values
109
+ """
110
+ # Create two datasets with no overlapping file paths
111
+ ds1 = dc.read_values(
112
+ id=[1, 2, 3],
113
+ file=[File(path=str(i)) for i in [1, 2, 3]],
114
+ session=test_session,
115
+ ).persist()
116
+
117
+ ds2 = dc.read_values(
118
+ id=[5, 6, 7],
119
+ file=[File(path=str(i)) for i in [5, 6, 7]],
120
+ session=test_session,
121
+ ).persist()
122
+
123
+ merged = ds1.merge(ds2, on="file.path", full=True)
124
+
125
+ # Use internal method to get all records including sys columns
126
+ records = merged._query.to_db_records()
127
+
128
+ assert len(records) == 6
129
+
130
+ # Extract sys__id and sys__rand from records
131
+ sys_ids = [r["sys__id"] for r in records]
132
+ sys_rands = [r["sys__rand"] for r in records]
133
+
134
+ # All sys__id values should be non-NULL and unique
135
+ assert all(sid is not None for sid in sys_ids)
136
+ assert len(set(sys_ids)) == 6
137
+ assert all(rand is not None for rand in sys_rands)
138
+
139
+ count_before = merged.count()
140
+
141
+ # Save and verify all rows are persisted
142
+ merged.save("test_merge")
143
+ count_after = dc.read_dataset("test_merge", session=test_session).count()
144
+
145
+ assert count_before == count_after == 6
@@ -1,12 +1,15 @@
1
1
  import base64
2
+ import json
2
3
  import os
3
- import pickle
4
4
 
5
5
  import pytest
6
6
  from sqlalchemy import Column, Integer, Table
7
7
 
8
8
  from datachain.data_storage.serializer import deserialize
9
- from datachain.data_storage.sqlite import SQLiteDatabaseEngine, get_db_file_in_memory
9
+ from datachain.data_storage.sqlite import (
10
+ SQLiteDatabaseEngine,
11
+ get_db_file_in_memory,
12
+ )
10
13
  from tests.utils import skip_if_not_sqlite
11
14
 
12
15
 
@@ -24,6 +27,7 @@ def test_init_clone(tmp_dir, db_file, expected_db_file):
24
27
  expected_db_file = os.fspath(tmp_dir / expected_db_file)
25
28
 
26
29
  with SQLiteDatabaseEngine.from_db_file(db_file) as db:
30
+ assert isinstance(db, SQLiteDatabaseEngine)
27
31
  assert db.db_file == expected_db_file
28
32
 
29
33
  # Test clone
@@ -53,17 +57,15 @@ def test_get_db_file_in_memory(db_file, in_memory, expected):
53
57
 
54
58
 
55
59
  def test_serialize(sqlite_db):
56
- # Test serialization
60
+ # JSON serialization format
57
61
  serialized = sqlite_db.serialize()
58
62
  assert serialized
59
- serialized_pickled = base64.b64decode(serialized.encode())
60
- assert serialized_pickled
61
- (f, args, kwargs) = pickle.loads(serialized_pickled) # noqa: S301
62
- assert str(f) == str(SQLiteDatabaseEngine.from_db_file)
63
- assert args == [":memory:"]
64
- assert kwargs == {}
65
-
66
- # Test deserialization
63
+ raw = base64.b64decode(serialized.encode())
64
+ data = json.loads(raw.decode())
65
+ assert data["callable"] == "sqlite.from_db_file"
66
+ assert data["args"] == [":memory:"]
67
+ assert data["kwargs"] == {}
68
+
67
69
  obj3 = deserialize(serialized)
68
70
  assert isinstance(obj3, SQLiteDatabaseEngine)
69
71
  assert obj3.db_file == ":memory:"
@@ -1,5 +1,5 @@
1
1
  import base64
2
- import pickle
2
+ import json
3
3
 
4
4
  import pytest
5
5
 
@@ -24,18 +24,19 @@ def test_sqlite_metastore(sqlite_db):
24
24
  assert obj2.db.db_file == sqlite_db.db_file
25
25
  assert obj2.clone_params() == obj.clone_params()
26
26
 
27
- # Test serialization
27
+ # Test serialization JSON format
28
28
  serialized = obj.serialize()
29
29
  assert serialized
30
- serialized_pickled = base64.b64decode(serialized.encode())
31
- assert serialized_pickled
32
- (f, args, kwargs) = pickle.loads(serialized_pickled) # noqa: S301
33
- assert str(f) == str(SQLiteMetastore.init_after_clone)
34
- assert args == []
35
- assert kwargs["uri"] == uri
36
- assert str(kwargs["db_clone_params"]) == str(sqlite_db.clone_params())
37
-
38
- # Test deserialization
30
+ raw = base64.b64decode(serialized.encode())
31
+ data = json.loads(raw.decode())
32
+ assert data["callable"] == "sqlite.metastore.init_after_clone"
33
+ assert data["args"] == []
34
+ assert data["kwargs"]["uri"] == uri
35
+ nested = data["kwargs"]["db_clone_params"]
36
+ assert nested["callable"] == "sqlite.from_db_file"
37
+ assert nested["args"] == [":memory:"]
38
+ assert nested["kwargs"] == {}
39
+
39
40
  obj3 = deserialize(serialized)
40
41
  assert isinstance(obj3, SQLiteMetastore)
41
42
  assert obj3.uri == uri