datachain 0.34.1__tar.gz → 0.34.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (442) hide show
  1. {datachain-0.34.1 → datachain-0.34.2}/PKG-INFO +1 -1
  2. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/catalog/loader.py +5 -0
  3. datachain-0.34.2/src/datachain/data_storage/serializer.py +119 -0
  4. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/data_storage/sqlite.py +13 -1
  5. datachain-0.34.2/src/datachain/plugins.py +30 -0
  6. {datachain-0.34.1 → datachain-0.34.2}/src/datachain.egg-info/PKG-INFO +1 -1
  7. {datachain-0.34.1 → datachain-0.34.2}/src/datachain.egg-info/SOURCES.txt +1 -0
  8. {datachain-0.34.1 → datachain-0.34.2}/tests/conftest.py +1 -0
  9. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_database_engine.py +13 -11
  10. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_metastore.py +12 -11
  11. datachain-0.34.2/tests/unit/test_serializer.py +218 -0
  12. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_warehouse.py +11 -10
  13. datachain-0.34.1/src/datachain/data_storage/serializer.py +0 -29
  14. datachain-0.34.1/tests/unit/test_serializer.py +0 -92
  15. {datachain-0.34.1 → datachain-0.34.2}/.cruft.json +0 -0
  16. {datachain-0.34.1 → datachain-0.34.2}/.gitattributes +0 -0
  17. {datachain-0.34.1 → datachain-0.34.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  18. {datachain-0.34.1 → datachain-0.34.2}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  19. {datachain-0.34.1 → datachain-0.34.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  20. {datachain-0.34.1 → datachain-0.34.2}/.github/codecov.yaml +0 -0
  21. {datachain-0.34.1 → datachain-0.34.2}/.github/dependabot.yml +0 -0
  22. {datachain-0.34.1 → datachain-0.34.2}/.github/workflows/benchmarks.yml +0 -0
  23. {datachain-0.34.1 → datachain-0.34.2}/.github/workflows/release.yml +0 -0
  24. {datachain-0.34.1 → datachain-0.34.2}/.github/workflows/tests-studio.yml +0 -0
  25. {datachain-0.34.1 → datachain-0.34.2}/.github/workflows/tests.yml +0 -0
  26. {datachain-0.34.1 → datachain-0.34.2}/.github/workflows/update-template.yaml +0 -0
  27. {datachain-0.34.1 → datachain-0.34.2}/.gitignore +0 -0
  28. {datachain-0.34.1 → datachain-0.34.2}/.pre-commit-config.yaml +0 -0
  29. {datachain-0.34.1 → datachain-0.34.2}/CODE_OF_CONDUCT.rst +0 -0
  30. {datachain-0.34.1 → datachain-0.34.2}/LICENSE +0 -0
  31. {datachain-0.34.1 → datachain-0.34.2}/README.rst +0 -0
  32. {datachain-0.34.1 → datachain-0.34.2}/docs/api_hooks.py +0 -0
  33. {datachain-0.34.1 → datachain-0.34.2}/docs/assets/captioned_cartoons.png +0 -0
  34. {datachain-0.34.1 → datachain-0.34.2}/docs/assets/datachain-white.svg +0 -0
  35. {datachain-0.34.1 → datachain-0.34.2}/docs/assets/datachain.svg +0 -0
  36. {datachain-0.34.1 → datachain-0.34.2}/docs/assets/webhook_dialog.png +0 -0
  37. {datachain-0.34.1 → datachain-0.34.2}/docs/assets/webhook_list.png +0 -0
  38. {datachain-0.34.1 → datachain-0.34.2}/docs/commands/auth/login.md +0 -0
  39. {datachain-0.34.1 → datachain-0.34.2}/docs/commands/auth/logout.md +0 -0
  40. {datachain-0.34.1 → datachain-0.34.2}/docs/commands/auth/team.md +0 -0
  41. {datachain-0.34.1 → datachain-0.34.2}/docs/commands/auth/token.md +0 -0
  42. {datachain-0.34.1 → datachain-0.34.2}/docs/commands/index.md +0 -0
  43. {datachain-0.34.1 → datachain-0.34.2}/docs/commands/job/cancel.md +0 -0
  44. {datachain-0.34.1 → datachain-0.34.2}/docs/commands/job/clusters.md +0 -0
  45. {datachain-0.34.1 → datachain-0.34.2}/docs/commands/job/logs.md +0 -0
  46. {datachain-0.34.1 → datachain-0.34.2}/docs/commands/job/ls.md +0 -0
  47. {datachain-0.34.1 → datachain-0.34.2}/docs/commands/job/run.md +0 -0
  48. {datachain-0.34.1 → datachain-0.34.2}/docs/contributing.md +0 -0
  49. {datachain-0.34.1 → datachain-0.34.2}/docs/css/github-permalink-style.css +0 -0
  50. {datachain-0.34.1 → datachain-0.34.2}/docs/examples.md +0 -0
  51. {datachain-0.34.1 → datachain-0.34.2}/docs/guide/db_migrations.md +0 -0
  52. {datachain-0.34.1 → datachain-0.34.2}/docs/guide/delta.md +0 -0
  53. {datachain-0.34.1 → datachain-0.34.2}/docs/guide/env.md +0 -0
  54. {datachain-0.34.1 → datachain-0.34.2}/docs/guide/index.md +0 -0
  55. {datachain-0.34.1 → datachain-0.34.2}/docs/guide/namespaces.md +0 -0
  56. {datachain-0.34.1 → datachain-0.34.2}/docs/guide/processing.md +0 -0
  57. {datachain-0.34.1 → datachain-0.34.2}/docs/guide/remotes.md +0 -0
  58. {datachain-0.34.1 → datachain-0.34.2}/docs/guide/retry.md +0 -0
  59. {datachain-0.34.1 → datachain-0.34.2}/docs/index.md +0 -0
  60. {datachain-0.34.1 → datachain-0.34.2}/docs/overrides/main.html +0 -0
  61. {datachain-0.34.1 → datachain-0.34.2}/docs/quick-start.md +0 -0
  62. {datachain-0.34.1 → datachain-0.34.2}/docs/references/data-types/arrowrow.md +0 -0
  63. {datachain-0.34.1 → datachain-0.34.2}/docs/references/data-types/bbox.md +0 -0
  64. {datachain-0.34.1 → datachain-0.34.2}/docs/references/data-types/file.md +0 -0
  65. {datachain-0.34.1 → datachain-0.34.2}/docs/references/data-types/imagefile.md +0 -0
  66. {datachain-0.34.1 → datachain-0.34.2}/docs/references/data-types/index.md +0 -0
  67. {datachain-0.34.1 → datachain-0.34.2}/docs/references/data-types/pose.md +0 -0
  68. {datachain-0.34.1 → datachain-0.34.2}/docs/references/data-types/segment.md +0 -0
  69. {datachain-0.34.1 → datachain-0.34.2}/docs/references/data-types/tarvfile.md +0 -0
  70. {datachain-0.34.1 → datachain-0.34.2}/docs/references/data-types/textfile.md +0 -0
  71. {datachain-0.34.1 → datachain-0.34.2}/docs/references/data-types/videofile.md +0 -0
  72. {datachain-0.34.1 → datachain-0.34.2}/docs/references/datachain.md +0 -0
  73. {datachain-0.34.1 → datachain-0.34.2}/docs/references/func.md +0 -0
  74. {datachain-0.34.1 → datachain-0.34.2}/docs/references/functions/aggregate.md +0 -0
  75. {datachain-0.34.1 → datachain-0.34.2}/docs/references/functions/array.md +0 -0
  76. {datachain-0.34.1 → datachain-0.34.2}/docs/references/functions/conditional.md +0 -0
  77. {datachain-0.34.1 → datachain-0.34.2}/docs/references/functions/numeric.md +0 -0
  78. {datachain-0.34.1 → datachain-0.34.2}/docs/references/functions/path.md +0 -0
  79. {datachain-0.34.1 → datachain-0.34.2}/docs/references/functions/random.md +0 -0
  80. {datachain-0.34.1 → datachain-0.34.2}/docs/references/functions/string.md +0 -0
  81. {datachain-0.34.1 → datachain-0.34.2}/docs/references/functions/window.md +0 -0
  82. {datachain-0.34.1 → datachain-0.34.2}/docs/references/index.md +0 -0
  83. {datachain-0.34.1 → datachain-0.34.2}/docs/references/toolkit.md +0 -0
  84. {datachain-0.34.1 → datachain-0.34.2}/docs/references/torch.md +0 -0
  85. {datachain-0.34.1 → datachain-0.34.2}/docs/references/udf.md +0 -0
  86. {datachain-0.34.1 → datachain-0.34.2}/docs/studio/api/.gitkeep +0 -0
  87. {datachain-0.34.1 → datachain-0.34.2}/docs/studio/webhooks.md +0 -0
  88. {datachain-0.34.1 → datachain-0.34.2}/docs/templates/main.dot +0 -0
  89. {datachain-0.34.1 → datachain-0.34.2}/docs/templates/operation.dot +0 -0
  90. {datachain-0.34.1 → datachain-0.34.2}/docs/templates/responses.def +0 -0
  91. {datachain-0.34.1 → datachain-0.34.2}/docs/tutorials.md +0 -0
  92. {datachain-0.34.1 → datachain-0.34.2}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  93. {datachain-0.34.1 → datachain-0.34.2}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  94. {datachain-0.34.1 → datachain-0.34.2}/examples/computer_vision/openimage-detect.py +0 -0
  95. {datachain-0.34.1 → datachain-0.34.2}/examples/computer_vision/ultralytics-bbox.py +0 -0
  96. {datachain-0.34.1 → datachain-0.34.2}/examples/computer_vision/ultralytics-pose.py +0 -0
  97. {datachain-0.34.1 → datachain-0.34.2}/examples/computer_vision/ultralytics-segment.py +0 -0
  98. {datachain-0.34.1 → datachain-0.34.2}/examples/get_started/common_sql_functions.py +0 -0
  99. {datachain-0.34.1 → datachain-0.34.2}/examples/get_started/json-csv-reader.py +0 -0
  100. {datachain-0.34.1 → datachain-0.34.2}/examples/get_started/nested_datamodel.py +0 -0
  101. {datachain-0.34.1 → datachain-0.34.2}/examples/get_started/torch-loader.py +0 -0
  102. {datachain-0.34.1 → datachain-0.34.2}/examples/get_started/udfs/parallel.py +0 -0
  103. {datachain-0.34.1 → datachain-0.34.2}/examples/get_started/udfs/simple.py +0 -0
  104. {datachain-0.34.1 → datachain-0.34.2}/examples/get_started/udfs/stateful.py +0 -0
  105. {datachain-0.34.1 → datachain-0.34.2}/examples/incremental_processing/delta.py +0 -0
  106. {datachain-0.34.1 → datachain-0.34.2}/examples/incremental_processing/retry.py +0 -0
  107. {datachain-0.34.1 → datachain-0.34.2}/examples/incremental_processing/utils.py +0 -0
  108. {datachain-0.34.1 → datachain-0.34.2}/examples/llm_and_nlp/claude-query.py +0 -0
  109. {datachain-0.34.1 → datachain-0.34.2}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  110. {datachain-0.34.1 → datachain-0.34.2}/examples/multimodal/audio-to-text.py +0 -0
  111. {datachain-0.34.1 → datachain-0.34.2}/examples/multimodal/clip_inference.py +0 -0
  112. {datachain-0.34.1 → datachain-0.34.2}/examples/multimodal/hf_pipeline.py +0 -0
  113. {datachain-0.34.1 → datachain-0.34.2}/examples/multimodal/openai_image_desc_lib.py +0 -0
  114. {datachain-0.34.1 → datachain-0.34.2}/examples/multimodal/wds.py +0 -0
  115. {datachain-0.34.1 → datachain-0.34.2}/examples/multimodal/wds_filtered.py +0 -0
  116. {datachain-0.34.1 → datachain-0.34.2}/mkdocs.yml +0 -0
  117. {datachain-0.34.1 → datachain-0.34.2}/noxfile.py +0 -0
  118. {datachain-0.34.1 → datachain-0.34.2}/pyproject.toml +0 -0
  119. {datachain-0.34.1 → datachain-0.34.2}/setup.cfg +0 -0
  120. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/__init__.py +0 -0
  121. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/__main__.py +0 -0
  122. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/asyn.py +0 -0
  123. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/cache.py +0 -0
  124. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/catalog/__init__.py +0 -0
  125. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/catalog/catalog.py +0 -0
  126. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/catalog/datasource.py +0 -0
  127. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/checkpoint.py +0 -0
  128. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/cli/__init__.py +0 -0
  129. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/cli/commands/__init__.py +0 -0
  130. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/cli/commands/datasets.py +0 -0
  131. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/cli/commands/du.py +0 -0
  132. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/cli/commands/index.py +0 -0
  133. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/cli/commands/ls.py +0 -0
  134. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/cli/commands/misc.py +0 -0
  135. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/cli/commands/query.py +0 -0
  136. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/cli/commands/show.py +0 -0
  137. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/cli/parser/__init__.py +0 -0
  138. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/cli/parser/job.py +0 -0
  139. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/cli/parser/studio.py +0 -0
  140. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/cli/parser/utils.py +0 -0
  141. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/cli/utils.py +0 -0
  142. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/client/__init__.py +0 -0
  143. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/client/azure.py +0 -0
  144. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/client/fileslice.py +0 -0
  145. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/client/fsspec.py +0 -0
  146. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/client/gcs.py +0 -0
  147. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/client/hf.py +0 -0
  148. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/client/http.py +0 -0
  149. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/client/local.py +0 -0
  150. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/client/s3.py +0 -0
  151. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/config.py +0 -0
  152. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/data_storage/__init__.py +0 -0
  153. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/data_storage/db_engine.py +0 -0
  154. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/data_storage/job.py +0 -0
  155. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/data_storage/metastore.py +0 -0
  156. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/data_storage/schema.py +0 -0
  157. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/data_storage/warehouse.py +0 -0
  158. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/dataset.py +0 -0
  159. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/delta.py +0 -0
  160. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/diff/__init__.py +0 -0
  161. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/error.py +0 -0
  162. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/fs/__init__.py +0 -0
  163. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/fs/reference.py +0 -0
  164. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/fs/utils.py +0 -0
  165. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/func/__init__.py +0 -0
  166. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/func/aggregate.py +0 -0
  167. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/func/array.py +0 -0
  168. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/func/base.py +0 -0
  169. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/func/conditional.py +0 -0
  170. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/func/func.py +0 -0
  171. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/func/numeric.py +0 -0
  172. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/func/path.py +0 -0
  173. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/func/random.py +0 -0
  174. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/func/string.py +0 -0
  175. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/func/window.py +0 -0
  176. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/hash_utils.py +0 -0
  177. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/job.py +0 -0
  178. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/__init__.py +0 -0
  179. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/arrow.py +0 -0
  180. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/audio.py +0 -0
  181. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/clip.py +0 -0
  182. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/convert/__init__.py +0 -0
  183. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/convert/flatten.py +0 -0
  184. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/convert/python_to_sql.py +0 -0
  185. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/convert/sql_to_python.py +0 -0
  186. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/convert/unflatten.py +0 -0
  187. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  188. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/data_model.py +0 -0
  189. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/dataset_info.py +0 -0
  190. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/dc/__init__.py +0 -0
  191. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/dc/csv.py +0 -0
  192. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/dc/database.py +0 -0
  193. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/dc/datachain.py +0 -0
  194. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/dc/datasets.py +0 -0
  195. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/dc/hf.py +0 -0
  196. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/dc/json.py +0 -0
  197. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/dc/listings.py +0 -0
  198. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/dc/pandas.py +0 -0
  199. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/dc/parquet.py +0 -0
  200. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/dc/records.py +0 -0
  201. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/dc/storage.py +0 -0
  202. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/dc/storage_pattern.py +0 -0
  203. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/dc/utils.py +0 -0
  204. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/dc/values.py +0 -0
  205. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/file.py +0 -0
  206. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/hf.py +0 -0
  207. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/image.py +0 -0
  208. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/listing.py +0 -0
  209. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/listing_info.py +0 -0
  210. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/meta_formats.py +0 -0
  211. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/model_store.py +0 -0
  212. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/namespaces.py +0 -0
  213. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/projects.py +0 -0
  214. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/pytorch.py +0 -0
  215. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/settings.py +0 -0
  216. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/signal_schema.py +0 -0
  217. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/tar.py +0 -0
  218. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/text.py +0 -0
  219. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/udf.py +0 -0
  220. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/udf_signature.py +0 -0
  221. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/utils.py +0 -0
  222. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/video.py +0 -0
  223. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/webdataset.py +0 -0
  224. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/lib/webdataset_laion.py +0 -0
  225. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/listing.py +0 -0
  226. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/model/__init__.py +0 -0
  227. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/model/bbox.py +0 -0
  228. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/model/pose.py +0 -0
  229. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/model/segment.py +0 -0
  230. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/model/ultralytics/__init__.py +0 -0
  231. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/model/ultralytics/bbox.py +0 -0
  232. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/model/ultralytics/pose.py +0 -0
  233. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/model/ultralytics/segment.py +0 -0
  234. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/model/utils.py +0 -0
  235. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/namespace.py +0 -0
  236. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/node.py +0 -0
  237. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/nodes_fetcher.py +0 -0
  238. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/nodes_thread_pool.py +0 -0
  239. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/progress.py +0 -0
  240. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/project.py +0 -0
  241. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/py.typed +0 -0
  242. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/query/__init__.py +0 -0
  243. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/query/batch.py +0 -0
  244. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/query/dataset.py +0 -0
  245. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/query/dispatch.py +0 -0
  246. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/query/metrics.py +0 -0
  247. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/query/params.py +0 -0
  248. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/query/queue.py +0 -0
  249. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/query/schema.py +0 -0
  250. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/query/session.py +0 -0
  251. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/query/udf.py +0 -0
  252. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/query/utils.py +0 -0
  253. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/remote/__init__.py +0 -0
  254. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/remote/studio.py +0 -0
  255. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/script_meta.py +0 -0
  256. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/semver.py +0 -0
  257. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/sql/__init__.py +0 -0
  258. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/sql/default/__init__.py +0 -0
  259. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/sql/default/base.py +0 -0
  260. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/sql/functions/__init__.py +0 -0
  261. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/sql/functions/aggregate.py +0 -0
  262. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/sql/functions/array.py +0 -0
  263. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/sql/functions/conditional.py +0 -0
  264. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/sql/functions/numeric.py +0 -0
  265. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/sql/functions/path.py +0 -0
  266. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/sql/functions/random.py +0 -0
  267. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/sql/functions/string.py +0 -0
  268. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/sql/postgresql_dialect.py +0 -0
  269. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/sql/postgresql_types.py +0 -0
  270. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/sql/selectable.py +0 -0
  271. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/sql/sqlite/__init__.py +0 -0
  272. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/sql/sqlite/base.py +0 -0
  273. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/sql/sqlite/types.py +0 -0
  274. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/sql/sqlite/vector.py +0 -0
  275. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/sql/types.py +0 -0
  276. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/sql/utils.py +0 -0
  277. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/studio.py +0 -0
  278. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/telemetry.py +0 -0
  279. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/toolkit/__init__.py +0 -0
  280. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/toolkit/split.py +0 -0
  281. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/torch/__init__.py +0 -0
  282. {datachain-0.34.1 → datachain-0.34.2}/src/datachain/utils.py +0 -0
  283. {datachain-0.34.1 → datachain-0.34.2}/src/datachain.egg-info/dependency_links.txt +0 -0
  284. {datachain-0.34.1 → datachain-0.34.2}/src/datachain.egg-info/entry_points.txt +0 -0
  285. {datachain-0.34.1 → datachain-0.34.2}/src/datachain.egg-info/requires.txt +0 -0
  286. {datachain-0.34.1 → datachain-0.34.2}/src/datachain.egg-info/top_level.txt +0 -0
  287. {datachain-0.34.1 → datachain-0.34.2}/tests/__init__.py +0 -0
  288. {datachain-0.34.1 → datachain-0.34.2}/tests/benchmarks/__init__.py +0 -0
  289. {datachain-0.34.1 → datachain-0.34.2}/tests/benchmarks/conftest.py +0 -0
  290. {datachain-0.34.1 → datachain-0.34.2}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  291. {datachain-0.34.1 → datachain-0.34.2}/tests/benchmarks/datasets/.dvc/config +0 -0
  292. {datachain-0.34.1 → datachain-0.34.2}/tests/benchmarks/datasets/.gitignore +0 -0
  293. {datachain-0.34.1 → datachain-0.34.2}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  294. {datachain-0.34.1 → datachain-0.34.2}/tests/benchmarks/test_datachain.py +0 -0
  295. {datachain-0.34.1 → datachain-0.34.2}/tests/benchmarks/test_ls.py +0 -0
  296. {datachain-0.34.1 → datachain-0.34.2}/tests/benchmarks/test_version.py +0 -0
  297. {datachain-0.34.1 → datachain-0.34.2}/tests/data.py +0 -0
  298. {datachain-0.34.1 → datachain-0.34.2}/tests/examples/__init__.py +0 -0
  299. {datachain-0.34.1 → datachain-0.34.2}/tests/examples/test_examples.py +0 -0
  300. {datachain-0.34.1 → datachain-0.34.2}/tests/examples/test_wds_e2e.py +0 -0
  301. {datachain-0.34.1 → datachain-0.34.2}/tests/examples/wds_data.py +0 -0
  302. {datachain-0.34.1 → datachain-0.34.2}/tests/func/__init__.py +0 -0
  303. {datachain-0.34.1 → datachain-0.34.2}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  304. {datachain-0.34.1 → datachain-0.34.2}/tests/func/data/lena.jpg +0 -0
  305. {datachain-0.34.1 → datachain-0.34.2}/tests/func/fake-service-account-credentials.json +0 -0
  306. {datachain-0.34.1 → datachain-0.34.2}/tests/func/functions/__init__.py +0 -0
  307. {datachain-0.34.1 → datachain-0.34.2}/tests/func/functions/test_aggregate.py +0 -0
  308. {datachain-0.34.1 → datachain-0.34.2}/tests/func/functions/test_array.py +0 -0
  309. {datachain-0.34.1 → datachain-0.34.2}/tests/func/functions/test_conditional.py +0 -0
  310. {datachain-0.34.1 → datachain-0.34.2}/tests/func/functions/test_numeric.py +0 -0
  311. {datachain-0.34.1 → datachain-0.34.2}/tests/func/functions/test_path.py +0 -0
  312. {datachain-0.34.1 → datachain-0.34.2}/tests/func/functions/test_random.py +0 -0
  313. {datachain-0.34.1 → datachain-0.34.2}/tests/func/functions/test_string.py +0 -0
  314. {datachain-0.34.1 → datachain-0.34.2}/tests/func/model/__init__.py +0 -0
  315. {datachain-0.34.1 → datachain-0.34.2}/tests/func/model/data/running-mask0.png +0 -0
  316. {datachain-0.34.1 → datachain-0.34.2}/tests/func/model/data/running-mask1.png +0 -0
  317. {datachain-0.34.1 → datachain-0.34.2}/tests/func/model/data/running.jpg +0 -0
  318. {datachain-0.34.1 → datachain-0.34.2}/tests/func/model/data/ships.jpg +0 -0
  319. {datachain-0.34.1 → datachain-0.34.2}/tests/func/model/test_yolo.py +0 -0
  320. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_audio.py +0 -0
  321. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_batching.py +0 -0
  322. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_catalog.py +0 -0
  323. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_client.py +0 -0
  324. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_cloud_transfer.py +0 -0
  325. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_data_storage.py +0 -0
  326. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_datachain.py +0 -0
  327. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_datachain_merge.py +0 -0
  328. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_dataset_query.py +0 -0
  329. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_datasets.py +0 -0
  330. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_delta.py +0 -0
  331. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_feature_pickling.py +0 -0
  332. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_file.py +0 -0
  333. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_hf.py +0 -0
  334. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_hidden_field.py +0 -0
  335. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_image.py +0 -0
  336. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_listing.py +0 -0
  337. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_ls.py +0 -0
  338. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_meta_formats.py +0 -0
  339. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_metastore.py +0 -0
  340. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_metrics.py +0 -0
  341. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_mutate.py +0 -0
  342. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_pull.py +0 -0
  343. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_pytorch.py +0 -0
  344. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_query.py +0 -0
  345. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_read_database.py +0 -0
  346. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_read_dataset_remote.py +0 -0
  347. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  348. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_retry.py +0 -0
  349. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_session.py +0 -0
  350. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_storage_pattern.py +0 -0
  351. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_studio_datetime_parsing.py +0 -0
  352. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_to_database.py +0 -0
  353. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_toolkit.py +0 -0
  354. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_video.py +0 -0
  355. {datachain-0.34.1 → datachain-0.34.2}/tests/func/test_warehouse.py +0 -0
  356. {datachain-0.34.1 → datachain-0.34.2}/tests/scripts/feature_class.py +0 -0
  357. {datachain-0.34.1 → datachain-0.34.2}/tests/scripts/feature_class_exception.py +0 -0
  358. {datachain-0.34.1 → datachain-0.34.2}/tests/scripts/feature_class_parallel.py +0 -0
  359. {datachain-0.34.1 → datachain-0.34.2}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  360. {datachain-0.34.1 → datachain-0.34.2}/tests/scripts/name_len_slow.py +0 -0
  361. {datachain-0.34.1 → datachain-0.34.2}/tests/test_atomicity.py +0 -0
  362. {datachain-0.34.1 → datachain-0.34.2}/tests/test_cli_e2e.py +0 -0
  363. {datachain-0.34.1 → datachain-0.34.2}/tests/test_cli_studio.py +0 -0
  364. {datachain-0.34.1 → datachain-0.34.2}/tests/test_import_time.py +0 -0
  365. {datachain-0.34.1 → datachain-0.34.2}/tests/test_query_e2e.py +0 -0
  366. {datachain-0.34.1 → datachain-0.34.2}/tests/test_telemetry.py +0 -0
  367. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/__init__.py +0 -0
  368. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/__init__.py +0 -0
  369. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/conftest.py +0 -0
  370. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_arrow.py +0 -0
  371. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_audio.py +0 -0
  372. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_checkpoints.py +0 -0
  373. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_clip.py +0 -0
  374. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_datachain.py +0 -0
  375. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  376. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_datachain_merge.py +0 -0
  377. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_diff.py +0 -0
  378. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_feature.py +0 -0
  379. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_feature_utils.py +0 -0
  380. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_file.py +0 -0
  381. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_hf.py +0 -0
  382. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_image.py +0 -0
  383. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_listing_info.py +0 -0
  384. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_namespace.py +0 -0
  385. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_partition_by.py +0 -0
  386. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_project.py +0 -0
  387. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_python_to_sql.py +0 -0
  388. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_schema.py +0 -0
  389. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_settings.py +0 -0
  390. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_signal_schema.py +0 -0
  391. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_sql_to_python.py +0 -0
  392. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_storage_pattern.py +0 -0
  393. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_text.py +0 -0
  394. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_udf.py +0 -0
  395. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_udf_signature.py +0 -0
  396. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_utils.py +0 -0
  397. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/lib/test_webdataset.py +0 -0
  398. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/model/__init__.py +0 -0
  399. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/model/test_bbox.py +0 -0
  400. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/model/test_pose.py +0 -0
  401. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/model/test_segment.py +0 -0
  402. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/model/test_utils.py +0 -0
  403. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/sql/__init__.py +0 -0
  404. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/sql/sqlite/__init__.py +0 -0
  405. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/sql/sqlite/test_types.py +0 -0
  406. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/sql/sqlite/test_utils.py +0 -0
  407. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/sql/test_array.py +0 -0
  408. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/sql/test_conditional.py +0 -0
  409. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/sql/test_path.py +0 -0
  410. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/sql/test_random.py +0 -0
  411. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/sql/test_selectable.py +0 -0
  412. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/sql/test_string.py +0 -0
  413. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_asyn.py +0 -0
  414. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_cache.py +0 -0
  415. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_catalog.py +0 -0
  416. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_catalog_loader.py +0 -0
  417. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_cli_datasets.py +0 -0
  418. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_cli_parsing.py +0 -0
  419. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_client.py +0 -0
  420. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_client_gcs.py +0 -0
  421. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_client_http.py +0 -0
  422. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_client_s3.py +0 -0
  423. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_config.py +0 -0
  424. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_data_storage.py +0 -0
  425. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_datachain_hash.py +0 -0
  426. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_dataset.py +0 -0
  427. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_dispatch.py +0 -0
  428. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_fileslice.py +0 -0
  429. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_func.py +0 -0
  430. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_hash_utils.py +0 -0
  431. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_listing.py +0 -0
  432. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_module_exports.py +0 -0
  433. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_pytorch.py +0 -0
  434. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_query.py +0 -0
  435. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_query_metrics.py +0 -0
  436. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_query_params.py +0 -0
  437. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_query_steps_hash.py +0 -0
  438. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_script_meta.py +0 -0
  439. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_semver.py +0 -0
  440. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_session.py +0 -0
  441. {datachain-0.34.1 → datachain-0.34.2}/tests/unit/test_utils.py +0 -0
  442. {datachain-0.34.1 → datachain-0.34.2}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.34.1
3
+ Version: 0.34.2
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -3,6 +3,7 @@ import sys
3
3
  from importlib import import_module
4
4
  from typing import TYPE_CHECKING, Any, Optional
5
5
 
6
+ from datachain.plugins import ensure_plugins_loaded
6
7
  from datachain.utils import get_envs_by_prefix
7
8
 
8
9
  if TYPE_CHECKING:
@@ -24,6 +25,8 @@ IN_MEMORY_ERROR_MESSAGE = "In-memory is only supported on SQLite"
24
25
 
25
26
 
26
27
  def get_metastore(in_memory: bool = False) -> "AbstractMetastore":
28
+ ensure_plugins_loaded()
29
+
27
30
  from datachain.data_storage import AbstractMetastore
28
31
  from datachain.data_storage.serializer import deserialize
29
32
 
@@ -64,6 +67,8 @@ def get_metastore(in_memory: bool = False) -> "AbstractMetastore":
64
67
 
65
68
 
66
69
  def get_warehouse(in_memory: bool = False) -> "AbstractWarehouse":
70
+ ensure_plugins_loaded()
71
+
67
72
  from datachain.data_storage import AbstractWarehouse
68
73
  from datachain.data_storage.serializer import deserialize
69
74
 
@@ -0,0 +1,119 @@
1
+ import base64
2
+ import json
3
+ from abc import abstractmethod
4
+ from collections.abc import Callable
5
+ from typing import Any, ClassVar
6
+
7
+ from datachain.plugins import ensure_plugins_loaded
8
+
9
+
10
+ class CallableRegistry:
11
+ _registry: ClassVar[dict[str, Callable]] = {}
12
+
13
+ @classmethod
14
+ def register(cls, callable_obj: Callable, name: str) -> str:
15
+ cls._registry[name] = callable_obj
16
+ return name
17
+
18
+ @classmethod
19
+ def get(cls, name: str) -> Callable:
20
+ return cls._registry[name]
21
+
22
+
23
+ class Serializable:
24
+ @classmethod
25
+ @abstractmethod
26
+ def serialize_callable_name(cls) -> str:
27
+ """Return the registered name used for this class' factory callable."""
28
+
29
+ @abstractmethod
30
+ def clone_params(self) -> tuple[Callable[..., Any], list[Any], dict[str, Any]]:
31
+ """Return (callable, args, kwargs) necessary to recreate this object."""
32
+
33
+ def _prepare(self, params: tuple) -> dict:
34
+ callable, args, kwargs = params
35
+ callable_name = callable.__self__.serialize_callable_name()
36
+ return {
37
+ "callable": callable_name,
38
+ "args": args,
39
+ "kwargs": {
40
+ k: self._prepare(v) if isinstance(v, tuple) else v
41
+ for k, v in kwargs.items()
42
+ },
43
+ }
44
+
45
+ def serialize(self) -> str:
46
+ """Return a base64-encoded JSON string with registered callable + params."""
47
+ _ensure_default_callables_registered()
48
+ data = self.clone_params()
49
+ return base64.b64encode(json.dumps(self._prepare(data)).encode()).decode()
50
+
51
+
52
+ def deserialize(s: str) -> Serializable:
53
+ """Deserialize from base64-encoded JSON using only registered callables.
54
+
55
+ Nested serialized objects are instantiated automatically except for those
56
+ passed via clone parameter tuples (keys ending with ``_clone_params``),
57
+ which must remain as (callable, args, kwargs) for later factory usage.
58
+ """
59
+ ensure_plugins_loaded()
60
+ _ensure_default_callables_registered()
61
+ decoded = base64.b64decode(s.encode())
62
+ data = json.loads(decoded.decode())
63
+
64
+ def _is_serialized(obj: Any) -> bool:
65
+ return isinstance(obj, dict) and {"callable", "args", "kwargs"}.issubset(
66
+ obj.keys()
67
+ )
68
+
69
+ def _reconstruct(obj: Any, nested: bool = False) -> Any:
70
+ if not _is_serialized(obj):
71
+ return obj
72
+ callable_name: str = obj["callable"]
73
+ args: list[Any] = obj["args"]
74
+ kwargs: dict[str, Any] = obj["kwargs"]
75
+ # Recurse only inside kwargs because serialize() only nests through kwargs
76
+ for k, v in list(kwargs.items()):
77
+ if _is_serialized(v):
78
+ kwargs[k] = _reconstruct(v, True)
79
+ callable_obj = CallableRegistry.get(callable_name)
80
+ if nested:
81
+ return (callable_obj, args, kwargs)
82
+ # Otherwise instantiate
83
+ return callable_obj(*args, **kwargs)
84
+
85
+ if not _is_serialized(data):
86
+ raise ValueError("Invalid serialized data format")
87
+ return _reconstruct(data, False)
88
+
89
+
90
+ class _DefaultsState:
91
+ registered = False
92
+
93
+
94
+ def _ensure_default_callables_registered() -> None:
95
+ if _DefaultsState.registered:
96
+ return
97
+
98
+ from datachain.data_storage.sqlite import (
99
+ SQLiteDatabaseEngine,
100
+ SQLiteMetastore,
101
+ SQLiteWarehouse,
102
+ )
103
+
104
+ # Register (idempotent by name overwrite is fine) using class-level
105
+ # serialization names to avoid hard-coded literals here.
106
+ CallableRegistry.register(
107
+ SQLiteDatabaseEngine.from_db_file,
108
+ SQLiteDatabaseEngine.serialize_callable_name(),
109
+ )
110
+ CallableRegistry.register(
111
+ SQLiteMetastore.init_after_clone,
112
+ SQLiteMetastore.serialize_callable_name(),
113
+ )
114
+ CallableRegistry.register(
115
+ SQLiteWarehouse.init_after_clone,
116
+ SQLiteWarehouse.serialize_callable_name(),
117
+ )
118
+
119
+ _DefaultsState.registered = True
@@ -201,10 +201,14 @@ class SQLiteDatabaseEngine(DatabaseEngine):
201
201
  """
202
202
  return (
203
203
  SQLiteDatabaseEngine.from_db_file,
204
- [self.db_file],
204
+ [str(self.db_file)],
205
205
  {},
206
206
  )
207
207
 
208
+ @classmethod
209
+ def serialize_callable_name(cls) -> str:
210
+ return "sqlite.from_db_file"
211
+
208
212
  def _reconnect(self) -> None:
209
213
  if not self.is_closed:
210
214
  raise RuntimeError("Cannot reconnect on still-open DB!")
@@ -403,6 +407,10 @@ class SQLiteMetastore(AbstractDBMetastore):
403
407
  },
404
408
  )
405
409
 
410
+ @classmethod
411
+ def serialize_callable_name(cls) -> str:
412
+ return "sqlite.metastore.init_after_clone"
413
+
406
414
  @classmethod
407
415
  def init_after_clone(
408
416
  cls,
@@ -610,6 +618,10 @@ class SQLiteWarehouse(AbstractWarehouse):
610
618
  {"db_clone_params": self.db.clone_params()},
611
619
  )
612
620
 
621
+ @classmethod
622
+ def serialize_callable_name(cls) -> str:
623
+ return "sqlite.warehouse.init_after_clone"
624
+
613
625
  @classmethod
614
626
  def init_after_clone(
615
627
  cls,
@@ -0,0 +1,30 @@
1
+ """Plugin loader for DataChain callables.
2
+
3
+ Discovers and invokes entry points in the group "datachain.callables" once
4
+ per process. This enables external packages (e.g., Studio) to register
5
+ their callables with the serializer registry without explicit imports.
6
+ """
7
+
8
+ from importlib import metadata as importlib_metadata
9
+
10
+ _plugins_loaded = False
11
+
12
+
13
+ def ensure_plugins_loaded() -> None:
14
+ global _plugins_loaded # noqa: PLW0603
15
+ if _plugins_loaded:
16
+ return
17
+
18
+ # Compatible across importlib.metadata versions
19
+ eps_obj = importlib_metadata.entry_points()
20
+ if hasattr(eps_obj, "select"):
21
+ eps_list = eps_obj.select(group="datachain.callables")
22
+ else:
23
+ # Compatibility for older versions of importlib_metadata, Python 3.9
24
+ eps_list = eps_obj.get("datachain.callables", []) # type: ignore[attr-defined]
25
+
26
+ for ep in eps_list:
27
+ func = ep.load()
28
+ func()
29
+
30
+ _plugins_loaded = True
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.34.1
3
+ Version: 0.34.2
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -118,6 +118,7 @@ src/datachain/namespace.py
118
118
  src/datachain/node.py
119
119
  src/datachain/nodes_fetcher.py
120
120
  src/datachain/nodes_thread_pool.py
121
+ src/datachain/plugins.py
121
122
  src/datachain/progress.py
122
123
  src/datachain/project.py
123
124
  src/datachain/py.typed
@@ -126,6 +126,7 @@ def clean_environment(
126
126
  working_dir = str(tmp_path_factory.mktemp("default_working_dir"))
127
127
  monkeypatch_session.chdir(working_dir)
128
128
  monkeypatch_session.delenv(DataChainDir.ENV_VAR, raising=False)
129
+ monkeypatch_session.delenv(DataChainDir.ENV_VAR_DATACHAIN_ROOT, raising=False)
129
130
 
130
131
 
131
132
  @pytest.fixture
@@ -1,12 +1,15 @@
1
1
  import base64
2
+ import json
2
3
  import os
3
- import pickle
4
4
 
5
5
  import pytest
6
6
  from sqlalchemy import Column, Integer, Table
7
7
 
8
8
  from datachain.data_storage.serializer import deserialize
9
- from datachain.data_storage.sqlite import SQLiteDatabaseEngine, get_db_file_in_memory
9
+ from datachain.data_storage.sqlite import (
10
+ SQLiteDatabaseEngine,
11
+ get_db_file_in_memory,
12
+ )
10
13
  from tests.utils import skip_if_not_sqlite
11
14
 
12
15
 
@@ -24,6 +27,7 @@ def test_init_clone(tmp_dir, db_file, expected_db_file):
24
27
  expected_db_file = os.fspath(tmp_dir / expected_db_file)
25
28
 
26
29
  with SQLiteDatabaseEngine.from_db_file(db_file) as db:
30
+ assert isinstance(db, SQLiteDatabaseEngine)
27
31
  assert db.db_file == expected_db_file
28
32
 
29
33
  # Test clone
@@ -53,17 +57,15 @@ def test_get_db_file_in_memory(db_file, in_memory, expected):
53
57
 
54
58
 
55
59
  def test_serialize(sqlite_db):
56
- # Test serialization
60
+ # JSON serialization format
57
61
  serialized = sqlite_db.serialize()
58
62
  assert serialized
59
- serialized_pickled = base64.b64decode(serialized.encode())
60
- assert serialized_pickled
61
- (f, args, kwargs) = pickle.loads(serialized_pickled) # noqa: S301
62
- assert str(f) == str(SQLiteDatabaseEngine.from_db_file)
63
- assert args == [":memory:"]
64
- assert kwargs == {}
65
-
66
- # Test deserialization
63
+ raw = base64.b64decode(serialized.encode())
64
+ data = json.loads(raw.decode())
65
+ assert data["callable"] == "sqlite.from_db_file"
66
+ assert data["args"] == [":memory:"]
67
+ assert data["kwargs"] == {}
68
+
67
69
  obj3 = deserialize(serialized)
68
70
  assert isinstance(obj3, SQLiteDatabaseEngine)
69
71
  assert obj3.db_file == ":memory:"
@@ -1,5 +1,5 @@
1
1
  import base64
2
- import pickle
2
+ import json
3
3
 
4
4
  import pytest
5
5
 
@@ -24,18 +24,19 @@ def test_sqlite_metastore(sqlite_db):
24
24
  assert obj2.db.db_file == sqlite_db.db_file
25
25
  assert obj2.clone_params() == obj.clone_params()
26
26
 
27
- # Test serialization
27
+ # Test serialization JSON format
28
28
  serialized = obj.serialize()
29
29
  assert serialized
30
- serialized_pickled = base64.b64decode(serialized.encode())
31
- assert serialized_pickled
32
- (f, args, kwargs) = pickle.loads(serialized_pickled) # noqa: S301
33
- assert str(f) == str(SQLiteMetastore.init_after_clone)
34
- assert args == []
35
- assert kwargs["uri"] == uri
36
- assert str(kwargs["db_clone_params"]) == str(sqlite_db.clone_params())
37
-
38
- # Test deserialization
30
+ raw = base64.b64decode(serialized.encode())
31
+ data = json.loads(raw.decode())
32
+ assert data["callable"] == "sqlite.metastore.init_after_clone"
33
+ assert data["args"] == []
34
+ assert data["kwargs"]["uri"] == uri
35
+ nested = data["kwargs"]["db_clone_params"]
36
+ assert nested["callable"] == "sqlite.from_db_file"
37
+ assert nested["args"] == [":memory:"]
38
+ assert nested["kwargs"] == {}
39
+
39
40
  obj3 = deserialize(serialized)
40
41
  assert isinstance(obj3, SQLiteMetastore)
41
42
  assert obj3.uri == uri
@@ -0,0 +1,218 @@
1
+ import base64
2
+ import json
3
+ from collections.abc import Callable
4
+ from typing import Any, Optional
5
+
6
+ import pytest
7
+
8
+ from datachain.data_storage.serializer import (
9
+ CallableRegistry,
10
+ Serializable,
11
+ deserialize,
12
+ )
13
+
14
+
15
+ class MySerializableInit(Serializable):
16
+ def __init__(self, name, optional=None):
17
+ self.name = name
18
+ self.optional = optional
19
+
20
+ @classmethod
21
+ def serialize_callable_name(cls):
22
+ return "MySerializableInit"
23
+
24
+ @classmethod
25
+ def build(cls, name, optional=None):
26
+ return cls(name, optional=optional)
27
+
28
+ def clone_params(self):
29
+ return self.__class__.build, [self.name], {"optional": self.optional}
30
+
31
+ def get_params(self):
32
+ return self.name, self.optional
33
+
34
+
35
+ class MySerializableFunc(Serializable):
36
+ def __init__(self, name, optional=None):
37
+ self.name = name
38
+ self.optional = optional
39
+
40
+ @classmethod
41
+ def from_params(cls, name, optional=None):
42
+ return cls(name, optional=optional)
43
+
44
+ @classmethod
45
+ def serialize_callable_name(cls):
46
+ return "MySerializableFunc.from_params"
47
+
48
+ def clone_params(self):
49
+ return self.from_params, [self.name], {"optional": self.optional}
50
+
51
+ def get_params(self):
52
+ return self.name, self.optional
53
+
54
+
55
+ class MySerializableNoParams(Serializable):
56
+ @classmethod
57
+ def serialize_callable_name(cls):
58
+ return "MySerializableNoParams"
59
+
60
+ def clone_params(self):
61
+ return self.__class__.build, [], {}
62
+
63
+ @classmethod
64
+ def build(cls):
65
+ return cls()
66
+
67
+
68
+ # Register test classes/functions for the serializer with explicit names
69
+ CallableRegistry.register(MySerializableInit.build, "MySerializableInit")
70
+ CallableRegistry.register(
71
+ MySerializableFunc.from_params, "MySerializableFunc.from_params"
72
+ )
73
+ CallableRegistry.register(MySerializableNoParams.build, "MySerializableNoParams")
74
+
75
+
76
+ @pytest.mark.parametrize(
77
+ "cls,call,call_name",
78
+ [
79
+ (MySerializableInit, MySerializableInit.build, "MySerializableInit"),
80
+ (
81
+ MySerializableFunc,
82
+ MySerializableFunc.from_params,
83
+ "MySerializableFunc.from_params",
84
+ ),
85
+ ],
86
+ )
87
+ @pytest.mark.parametrize(
88
+ "name,optional",
89
+ [
90
+ (None, None),
91
+ ("foo", None),
92
+ (None, 12),
93
+ ("bar", 24),
94
+ ],
95
+ )
96
+ def test_serializable_json_format(cls, call, call_name, name, optional):
97
+ """Test the new JSON-based serialization format."""
98
+ obj = cls(name, optional=optional)
99
+ assert obj.clone_params() == (call, [name], {"optional": optional})
100
+
101
+ # Test new JSON serialization
102
+ serialized = obj.serialize()
103
+ assert serialized
104
+
105
+ # Verify it's JSON format by decoding
106
+ serialized_decoded = base64.b64decode(serialized.encode())
107
+ data = json.loads(serialized_decoded.decode())
108
+ assert data["callable"] == call_name
109
+ assert data["args"] == [name]
110
+ assert data["kwargs"] == {"optional": optional}
111
+
112
+ obj2 = deserialize(serialized)
113
+ assert isinstance(obj2, cls)
114
+ assert obj2.name == name # type: ignore[attr-defined]
115
+ assert obj2.optional == optional # type: ignore[attr-defined]
116
+ assert obj2.get_params() == (name, optional) # type: ignore[attr-defined]
117
+
118
+
119
+ def test_serializable_no_params():
120
+ """Test serialization with no parameters."""
121
+ obj = MySerializableNoParams()
122
+ assert obj.clone_params() == (MySerializableNoParams.build, [], {})
123
+
124
+ # Test new JSON serialization
125
+ serialized = obj.serialize()
126
+ assert serialized
127
+
128
+ # Verify it's JSON format
129
+ serialized_decoded = base64.b64decode(serialized.encode())
130
+ data = json.loads(serialized_decoded.decode())
131
+ assert data["callable"] == "MySerializableNoParams"
132
+ assert data["args"] == []
133
+ assert data["kwargs"] == {}
134
+
135
+ obj2 = deserialize(serialized)
136
+ assert isinstance(obj2, MySerializableNoParams)
137
+
138
+
139
+ def test_callable_registry():
140
+ """Test the CallableRegistry functionality."""
141
+
142
+ # Test registration
143
+ def dummy_func():
144
+ pass
145
+
146
+ CallableRegistry.register(dummy_func, "dummy_func")
147
+ assert CallableRegistry.get("dummy_func") is dummy_func
148
+
149
+ # Test error cases
150
+ with pytest.raises(KeyError):
151
+ CallableRegistry.get("nonexistent")
152
+
153
+ def unregistered_func():
154
+ pass
155
+
156
+ with pytest.raises(KeyError):
157
+ CallableRegistry.get("unregistered_func")
158
+
159
+
160
+ def test_reject_unregistered_callable():
161
+ """Ensure unregistered callable names cannot be deserialized."""
162
+ data = {"callable": "nonexistent_callable", "args": [], "kwargs": {}}
163
+ malicious_serialized = base64.b64encode(json.dumps(data).encode()).decode()
164
+ with pytest.raises(KeyError):
165
+ deserialize(malicious_serialized)
166
+
167
+
168
+ class NestedSerializable(Serializable):
169
+ def __init__(self, value: int, child: Optional["NestedSerializable"] = None):
170
+ self.value = value
171
+ self.child = child
172
+
173
+ @classmethod
174
+ def factory(
175
+ cls,
176
+ value: int,
177
+ child: Optional[tuple[Callable, list, dict[str, Any]]] = None,
178
+ ) -> "NestedSerializable":
179
+ if child is not None:
180
+ f, a, kw = child
181
+ child_obj = f(*a, **kw)
182
+ else:
183
+ child_obj = None
184
+ return cls(value, child_obj)
185
+
186
+ @classmethod
187
+ def serialize_callable_name(cls):
188
+ return "NestedSerializable.factory"
189
+
190
+ def clone_params(self):
191
+ return (
192
+ self.factory,
193
+ [self.value],
194
+ {"child": (self.child.clone_params() if self.child else None)},
195
+ )
196
+
197
+
198
+ CallableRegistry.register(NestedSerializable.factory, "NestedSerializable.factory")
199
+
200
+
201
+ def test_nested_recursive_serialization():
202
+ leaf = NestedSerializable(2)
203
+ root = NestedSerializable(1, child=leaf)
204
+ serialized = root.serialize()
205
+ restored = deserialize(serialized)
206
+ assert isinstance(restored, NestedSerializable)
207
+ assert restored.value == 1
208
+ assert isinstance(restored.child, NestedSerializable)
209
+ assert restored.child.value == 2
210
+ assert restored.child.child is None
211
+
212
+
213
+ def test_deserialize_invalid_top_level():
214
+ bad = base64.b64encode(json.dumps({"foo": 1}).encode()).decode()
215
+ with pytest.raises(ValueError):
216
+ deserialize(bad)
217
+ with pytest.raises(ValueError):
218
+ deserialize("Zm9vYmFy") # base64 for 'foobar'
@@ -1,5 +1,5 @@
1
1
  import base64
2
- import pickle
2
+ import json
3
3
 
4
4
  from datachain.data_storage.serializer import deserialize
5
5
  from datachain.data_storage.sqlite import (
@@ -17,17 +17,18 @@ def test_serialize(sqlite_db):
17
17
  assert obj2.db.db_file == sqlite_db.db_file
18
18
  assert obj2.clone_params() == obj.clone_params()
19
19
 
20
- # Test serialization
20
+ # Test serialization JSON format
21
21
  serialized = obj.serialize()
22
22
  assert serialized
23
- serialized_pickled = base64.b64decode(serialized.encode())
24
- assert serialized_pickled
25
- (f, args, kwargs) = pickle.loads(serialized_pickled) # noqa: S301
26
- assert str(f) == str(SQLiteWarehouse.init_after_clone)
27
- assert args == []
28
- assert str(kwargs["db_clone_params"]) == str(sqlite_db.clone_params())
29
-
30
- # Test deserialization
23
+ raw = base64.b64decode(serialized.encode())
24
+ data = json.loads(raw.decode())
25
+ assert data["callable"] == "sqlite.warehouse.init_after_clone"
26
+ assert data["args"] == []
27
+ nested = data["kwargs"]["db_clone_params"]
28
+ assert nested["callable"] == "sqlite.from_db_file"
29
+ assert nested["args"] == [":memory:"]
30
+ assert nested["kwargs"] == {}
31
+
31
32
  obj3 = deserialize(serialized)
32
33
  assert isinstance(obj3, SQLiteWarehouse)
33
34
  assert obj3.db.db_file == sqlite_db.db_file
@@ -1,29 +0,0 @@
1
- import base64
2
- import pickle
3
- from abc import abstractmethod
4
- from collections.abc import Callable
5
- from typing import Any
6
-
7
-
8
- class Serializable:
9
- @abstractmethod
10
- def clone_params(self) -> tuple[Callable[..., Any], list[Any], dict[str, Any]]:
11
- """
12
- Returns the class, args, and kwargs needed to instantiate a cloned copy
13
- of this instance for use in separate processes or machines.
14
- """
15
-
16
- def serialize(self) -> str:
17
- """
18
- Returns a string representation of clone params.
19
- This is useful for storing the state of an object in environment variable.
20
- """
21
- return base64.b64encode(pickle.dumps(self.clone_params())).decode()
22
-
23
-
24
- def deserialize(s: str) -> Serializable:
25
- """
26
- Returns a new instance of the class represented by the string.
27
- """
28
- (f, args, kwargs) = pickle.loads(base64.b64decode(s.encode())) # noqa: S301
29
- return f(*args, **kwargs)