datachain 0.35.0__tar.gz → 0.35.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (444) hide show
  1. {datachain-0.35.0 → datachain-0.35.1}/PKG-INFO +1 -1
  2. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/data_storage/warehouse.py +34 -18
  3. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/query/batch.py +1 -2
  4. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/query/dataset.py +12 -22
  5. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/query/dispatch.py +25 -35
  6. {datachain-0.35.0 → datachain-0.35.1}/src/datachain.egg-info/PKG-INFO +1 -1
  7. {datachain-0.35.0 → datachain-0.35.1}/src/datachain.egg-info/SOURCES.txt +1 -2
  8. {datachain-0.35.0 → datachain-0.35.1}/tests/conftest.py +46 -0
  9. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_datachain.py +1 -373
  10. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_udf.py +10 -0
  11. datachain-0.35.1/tests/unit/test_batching.py +229 -0
  12. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_datachain_hash.py +1 -1
  13. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_hash_utils.py +6 -6
  14. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_query_steps_hash.py +4 -4
  15. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_utils.py +5 -7
  16. datachain-0.35.1/tests/unit/test_warehouse.py +173 -0
  17. datachain-0.35.0/src/datachain/query/utils.py +0 -38
  18. datachain-0.35.0/tests/func/test_batching.py +0 -242
  19. datachain-0.35.0/tests/unit/test_warehouse.py +0 -43
  20. {datachain-0.35.0 → datachain-0.35.1}/.cruft.json +0 -0
  21. {datachain-0.35.0 → datachain-0.35.1}/.gitattributes +0 -0
  22. {datachain-0.35.0 → datachain-0.35.1}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  23. {datachain-0.35.0 → datachain-0.35.1}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  24. {datachain-0.35.0 → datachain-0.35.1}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  25. {datachain-0.35.0 → datachain-0.35.1}/.github/codecov.yaml +0 -0
  26. {datachain-0.35.0 → datachain-0.35.1}/.github/dependabot.yml +0 -0
  27. {datachain-0.35.0 → datachain-0.35.1}/.github/workflows/benchmarks.yml +0 -0
  28. {datachain-0.35.0 → datachain-0.35.1}/.github/workflows/release.yml +0 -0
  29. {datachain-0.35.0 → datachain-0.35.1}/.github/workflows/tests-studio.yml +0 -0
  30. {datachain-0.35.0 → datachain-0.35.1}/.github/workflows/tests.yml +0 -0
  31. {datachain-0.35.0 → datachain-0.35.1}/.github/workflows/update-template.yaml +0 -0
  32. {datachain-0.35.0 → datachain-0.35.1}/.gitignore +0 -0
  33. {datachain-0.35.0 → datachain-0.35.1}/.pre-commit-config.yaml +0 -0
  34. {datachain-0.35.0 → datachain-0.35.1}/CODE_OF_CONDUCT.rst +0 -0
  35. {datachain-0.35.0 → datachain-0.35.1}/LICENSE +0 -0
  36. {datachain-0.35.0 → datachain-0.35.1}/README.rst +0 -0
  37. {datachain-0.35.0 → datachain-0.35.1}/docs/api_hooks.py +0 -0
  38. {datachain-0.35.0 → datachain-0.35.1}/docs/assets/captioned_cartoons.png +0 -0
  39. {datachain-0.35.0 → datachain-0.35.1}/docs/assets/datachain-white.svg +0 -0
  40. {datachain-0.35.0 → datachain-0.35.1}/docs/assets/datachain.svg +0 -0
  41. {datachain-0.35.0 → datachain-0.35.1}/docs/assets/webhook_dialog.png +0 -0
  42. {datachain-0.35.0 → datachain-0.35.1}/docs/assets/webhook_list.png +0 -0
  43. {datachain-0.35.0 → datachain-0.35.1}/docs/commands/auth/login.md +0 -0
  44. {datachain-0.35.0 → datachain-0.35.1}/docs/commands/auth/logout.md +0 -0
  45. {datachain-0.35.0 → datachain-0.35.1}/docs/commands/auth/team.md +0 -0
  46. {datachain-0.35.0 → datachain-0.35.1}/docs/commands/auth/token.md +0 -0
  47. {datachain-0.35.0 → datachain-0.35.1}/docs/commands/index.md +0 -0
  48. {datachain-0.35.0 → datachain-0.35.1}/docs/commands/job/cancel.md +0 -0
  49. {datachain-0.35.0 → datachain-0.35.1}/docs/commands/job/clusters.md +0 -0
  50. {datachain-0.35.0 → datachain-0.35.1}/docs/commands/job/logs.md +0 -0
  51. {datachain-0.35.0 → datachain-0.35.1}/docs/commands/job/ls.md +0 -0
  52. {datachain-0.35.0 → datachain-0.35.1}/docs/commands/job/run.md +0 -0
  53. {datachain-0.35.0 → datachain-0.35.1}/docs/contributing.md +0 -0
  54. {datachain-0.35.0 → datachain-0.35.1}/docs/css/github-permalink-style.css +0 -0
  55. {datachain-0.35.0 → datachain-0.35.1}/docs/examples.md +0 -0
  56. {datachain-0.35.0 → datachain-0.35.1}/docs/guide/db_migrations.md +0 -0
  57. {datachain-0.35.0 → datachain-0.35.1}/docs/guide/delta.md +0 -0
  58. {datachain-0.35.0 → datachain-0.35.1}/docs/guide/env.md +0 -0
  59. {datachain-0.35.0 → datachain-0.35.1}/docs/guide/index.md +0 -0
  60. {datachain-0.35.0 → datachain-0.35.1}/docs/guide/namespaces.md +0 -0
  61. {datachain-0.35.0 → datachain-0.35.1}/docs/guide/processing.md +0 -0
  62. {datachain-0.35.0 → datachain-0.35.1}/docs/guide/remotes.md +0 -0
  63. {datachain-0.35.0 → datachain-0.35.1}/docs/guide/retry.md +0 -0
  64. {datachain-0.35.0 → datachain-0.35.1}/docs/index.md +0 -0
  65. {datachain-0.35.0 → datachain-0.35.1}/docs/overrides/main.html +0 -0
  66. {datachain-0.35.0 → datachain-0.35.1}/docs/quick-start.md +0 -0
  67. {datachain-0.35.0 → datachain-0.35.1}/docs/references/data-types/arrowrow.md +0 -0
  68. {datachain-0.35.0 → datachain-0.35.1}/docs/references/data-types/bbox.md +0 -0
  69. {datachain-0.35.0 → datachain-0.35.1}/docs/references/data-types/file.md +0 -0
  70. {datachain-0.35.0 → datachain-0.35.1}/docs/references/data-types/imagefile.md +0 -0
  71. {datachain-0.35.0 → datachain-0.35.1}/docs/references/data-types/index.md +0 -0
  72. {datachain-0.35.0 → datachain-0.35.1}/docs/references/data-types/pose.md +0 -0
  73. {datachain-0.35.0 → datachain-0.35.1}/docs/references/data-types/segment.md +0 -0
  74. {datachain-0.35.0 → datachain-0.35.1}/docs/references/data-types/tarvfile.md +0 -0
  75. {datachain-0.35.0 → datachain-0.35.1}/docs/references/data-types/textfile.md +0 -0
  76. {datachain-0.35.0 → datachain-0.35.1}/docs/references/data-types/videofile.md +0 -0
  77. {datachain-0.35.0 → datachain-0.35.1}/docs/references/datachain.md +0 -0
  78. {datachain-0.35.0 → datachain-0.35.1}/docs/references/func.md +0 -0
  79. {datachain-0.35.0 → datachain-0.35.1}/docs/references/functions/aggregate.md +0 -0
  80. {datachain-0.35.0 → datachain-0.35.1}/docs/references/functions/array.md +0 -0
  81. {datachain-0.35.0 → datachain-0.35.1}/docs/references/functions/conditional.md +0 -0
  82. {datachain-0.35.0 → datachain-0.35.1}/docs/references/functions/numeric.md +0 -0
  83. {datachain-0.35.0 → datachain-0.35.1}/docs/references/functions/path.md +0 -0
  84. {datachain-0.35.0 → datachain-0.35.1}/docs/references/functions/random.md +0 -0
  85. {datachain-0.35.0 → datachain-0.35.1}/docs/references/functions/string.md +0 -0
  86. {datachain-0.35.0 → datachain-0.35.1}/docs/references/functions/window.md +0 -0
  87. {datachain-0.35.0 → datachain-0.35.1}/docs/references/index.md +0 -0
  88. {datachain-0.35.0 → datachain-0.35.1}/docs/references/toolkit.md +0 -0
  89. {datachain-0.35.0 → datachain-0.35.1}/docs/references/torch.md +0 -0
  90. {datachain-0.35.0 → datachain-0.35.1}/docs/references/udf.md +0 -0
  91. {datachain-0.35.0 → datachain-0.35.1}/docs/studio/api/.gitkeep +0 -0
  92. {datachain-0.35.0 → datachain-0.35.1}/docs/studio/webhooks.md +0 -0
  93. {datachain-0.35.0 → datachain-0.35.1}/docs/templates/main.dot +0 -0
  94. {datachain-0.35.0 → datachain-0.35.1}/docs/templates/operation.dot +0 -0
  95. {datachain-0.35.0 → datachain-0.35.1}/docs/templates/responses.def +0 -0
  96. {datachain-0.35.0 → datachain-0.35.1}/docs/tutorials.md +0 -0
  97. {datachain-0.35.0 → datachain-0.35.1}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  98. {datachain-0.35.0 → datachain-0.35.1}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  99. {datachain-0.35.0 → datachain-0.35.1}/examples/computer_vision/openimage-detect.py +0 -0
  100. {datachain-0.35.0 → datachain-0.35.1}/examples/computer_vision/ultralytics-bbox.py +0 -0
  101. {datachain-0.35.0 → datachain-0.35.1}/examples/computer_vision/ultralytics-pose.py +0 -0
  102. {datachain-0.35.0 → datachain-0.35.1}/examples/computer_vision/ultralytics-segment.py +0 -0
  103. {datachain-0.35.0 → datachain-0.35.1}/examples/get_started/common_sql_functions.py +0 -0
  104. {datachain-0.35.0 → datachain-0.35.1}/examples/get_started/json-csv-reader.py +0 -0
  105. {datachain-0.35.0 → datachain-0.35.1}/examples/get_started/nested_datamodel.py +0 -0
  106. {datachain-0.35.0 → datachain-0.35.1}/examples/get_started/torch-loader.py +0 -0
  107. {datachain-0.35.0 → datachain-0.35.1}/examples/get_started/udfs/parallel.py +0 -0
  108. {datachain-0.35.0 → datachain-0.35.1}/examples/get_started/udfs/simple.py +0 -0
  109. {datachain-0.35.0 → datachain-0.35.1}/examples/get_started/udfs/stateful.py +0 -0
  110. {datachain-0.35.0 → datachain-0.35.1}/examples/incremental_processing/delta.py +0 -0
  111. {datachain-0.35.0 → datachain-0.35.1}/examples/incremental_processing/retry.py +0 -0
  112. {datachain-0.35.0 → datachain-0.35.1}/examples/incremental_processing/utils.py +0 -0
  113. {datachain-0.35.0 → datachain-0.35.1}/examples/llm_and_nlp/claude-query.py +0 -0
  114. {datachain-0.35.0 → datachain-0.35.1}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  115. {datachain-0.35.0 → datachain-0.35.1}/examples/multimodal/audio-to-text.py +0 -0
  116. {datachain-0.35.0 → datachain-0.35.1}/examples/multimodal/clip_inference.py +0 -0
  117. {datachain-0.35.0 → datachain-0.35.1}/examples/multimodal/hf_pipeline.py +0 -0
  118. {datachain-0.35.0 → datachain-0.35.1}/examples/multimodal/openai_image_desc_lib.py +0 -0
  119. {datachain-0.35.0 → datachain-0.35.1}/examples/multimodal/wds.py +0 -0
  120. {datachain-0.35.0 → datachain-0.35.1}/examples/multimodal/wds_filtered.py +0 -0
  121. {datachain-0.35.0 → datachain-0.35.1}/mkdocs.yml +0 -0
  122. {datachain-0.35.0 → datachain-0.35.1}/noxfile.py +0 -0
  123. {datachain-0.35.0 → datachain-0.35.1}/pyproject.toml +0 -0
  124. {datachain-0.35.0 → datachain-0.35.1}/setup.cfg +0 -0
  125. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/__init__.py +0 -0
  126. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/__main__.py +0 -0
  127. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/asyn.py +0 -0
  128. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/cache.py +0 -0
  129. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/catalog/__init__.py +0 -0
  130. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/catalog/catalog.py +0 -0
  131. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/catalog/datasource.py +0 -0
  132. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/catalog/loader.py +0 -0
  133. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/checkpoint.py +0 -0
  134. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/cli/__init__.py +0 -0
  135. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/cli/commands/__init__.py +0 -0
  136. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/cli/commands/datasets.py +0 -0
  137. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/cli/commands/du.py +0 -0
  138. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/cli/commands/index.py +0 -0
  139. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/cli/commands/ls.py +0 -0
  140. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/cli/commands/misc.py +0 -0
  141. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/cli/commands/query.py +0 -0
  142. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/cli/commands/show.py +0 -0
  143. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/cli/parser/__init__.py +0 -0
  144. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/cli/parser/job.py +0 -0
  145. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/cli/parser/studio.py +0 -0
  146. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/cli/parser/utils.py +0 -0
  147. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/cli/utils.py +0 -0
  148. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/client/__init__.py +0 -0
  149. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/client/azure.py +0 -0
  150. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/client/fileslice.py +0 -0
  151. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/client/fsspec.py +0 -0
  152. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/client/gcs.py +0 -0
  153. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/client/hf.py +0 -0
  154. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/client/http.py +0 -0
  155. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/client/local.py +0 -0
  156. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/client/s3.py +0 -0
  157. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/config.py +0 -0
  158. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/data_storage/__init__.py +0 -0
  159. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/data_storage/db_engine.py +0 -0
  160. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/data_storage/job.py +0 -0
  161. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/data_storage/metastore.py +0 -0
  162. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/data_storage/schema.py +0 -0
  163. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/data_storage/serializer.py +0 -0
  164. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/data_storage/sqlite.py +0 -0
  165. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/dataset.py +0 -0
  166. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/delta.py +0 -0
  167. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/diff/__init__.py +0 -0
  168. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/error.py +0 -0
  169. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/fs/__init__.py +0 -0
  170. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/fs/reference.py +0 -0
  171. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/fs/utils.py +0 -0
  172. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/func/__init__.py +0 -0
  173. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/func/aggregate.py +0 -0
  174. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/func/array.py +0 -0
  175. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/func/base.py +0 -0
  176. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/func/conditional.py +0 -0
  177. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/func/func.py +0 -0
  178. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/func/numeric.py +0 -0
  179. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/func/path.py +0 -0
  180. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/func/random.py +0 -0
  181. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/func/string.py +0 -0
  182. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/func/window.py +0 -0
  183. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/hash_utils.py +0 -0
  184. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/job.py +0 -0
  185. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/__init__.py +0 -0
  186. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/arrow.py +0 -0
  187. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/audio.py +0 -0
  188. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/clip.py +0 -0
  189. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/convert/__init__.py +0 -0
  190. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/convert/flatten.py +0 -0
  191. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/convert/python_to_sql.py +0 -0
  192. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/convert/sql_to_python.py +0 -0
  193. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/convert/unflatten.py +0 -0
  194. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  195. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/data_model.py +0 -0
  196. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/dataset_info.py +0 -0
  197. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/dc/__init__.py +0 -0
  198. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/dc/csv.py +0 -0
  199. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/dc/database.py +0 -0
  200. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/dc/datachain.py +0 -0
  201. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/dc/datasets.py +0 -0
  202. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/dc/hf.py +0 -0
  203. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/dc/json.py +0 -0
  204. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/dc/listings.py +0 -0
  205. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/dc/pandas.py +0 -0
  206. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/dc/parquet.py +0 -0
  207. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/dc/records.py +0 -0
  208. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/dc/storage.py +0 -0
  209. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/dc/storage_pattern.py +0 -0
  210. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/dc/utils.py +0 -0
  211. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/dc/values.py +0 -0
  212. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/file.py +0 -0
  213. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/hf.py +0 -0
  214. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/image.py +0 -0
  215. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/listing.py +0 -0
  216. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/listing_info.py +0 -0
  217. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/meta_formats.py +0 -0
  218. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/model_store.py +0 -0
  219. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/namespaces.py +0 -0
  220. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/projects.py +0 -0
  221. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/pytorch.py +0 -0
  222. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/settings.py +0 -0
  223. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/signal_schema.py +0 -0
  224. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/tar.py +0 -0
  225. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/text.py +0 -0
  226. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/udf.py +0 -0
  227. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/udf_signature.py +0 -0
  228. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/utils.py +0 -0
  229. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/video.py +0 -0
  230. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/webdataset.py +0 -0
  231. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/lib/webdataset_laion.py +0 -0
  232. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/listing.py +0 -0
  233. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/model/__init__.py +0 -0
  234. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/model/bbox.py +0 -0
  235. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/model/pose.py +0 -0
  236. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/model/segment.py +0 -0
  237. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/model/ultralytics/__init__.py +0 -0
  238. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/model/ultralytics/bbox.py +0 -0
  239. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/model/ultralytics/pose.py +0 -0
  240. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/model/ultralytics/segment.py +0 -0
  241. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/model/utils.py +0 -0
  242. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/namespace.py +0 -0
  243. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/node.py +0 -0
  244. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/nodes_fetcher.py +0 -0
  245. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/nodes_thread_pool.py +0 -0
  246. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/plugins.py +0 -0
  247. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/progress.py +0 -0
  248. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/project.py +0 -0
  249. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/py.typed +0 -0
  250. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/query/__init__.py +0 -0
  251. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/query/metrics.py +0 -0
  252. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/query/params.py +0 -0
  253. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/query/queue.py +0 -0
  254. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/query/schema.py +0 -0
  255. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/query/session.py +0 -0
  256. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/query/udf.py +0 -0
  257. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/remote/__init__.py +0 -0
  258. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/remote/studio.py +0 -0
  259. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/script_meta.py +0 -0
  260. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/semver.py +0 -0
  261. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/sql/__init__.py +0 -0
  262. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/sql/default/__init__.py +0 -0
  263. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/sql/default/base.py +0 -0
  264. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/sql/functions/__init__.py +0 -0
  265. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/sql/functions/aggregate.py +0 -0
  266. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/sql/functions/array.py +0 -0
  267. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/sql/functions/conditional.py +0 -0
  268. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/sql/functions/numeric.py +0 -0
  269. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/sql/functions/path.py +0 -0
  270. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/sql/functions/random.py +0 -0
  271. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/sql/functions/string.py +0 -0
  272. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/sql/postgresql_dialect.py +0 -0
  273. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/sql/postgresql_types.py +0 -0
  274. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/sql/selectable.py +0 -0
  275. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/sql/sqlite/__init__.py +0 -0
  276. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/sql/sqlite/base.py +0 -0
  277. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/sql/sqlite/types.py +0 -0
  278. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/sql/sqlite/vector.py +0 -0
  279. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/sql/types.py +0 -0
  280. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/sql/utils.py +0 -0
  281. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/studio.py +0 -0
  282. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/telemetry.py +0 -0
  283. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/toolkit/__init__.py +0 -0
  284. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/toolkit/split.py +0 -0
  285. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/torch/__init__.py +0 -0
  286. {datachain-0.35.0 → datachain-0.35.1}/src/datachain/utils.py +0 -0
  287. {datachain-0.35.0 → datachain-0.35.1}/src/datachain.egg-info/dependency_links.txt +0 -0
  288. {datachain-0.35.0 → datachain-0.35.1}/src/datachain.egg-info/entry_points.txt +0 -0
  289. {datachain-0.35.0 → datachain-0.35.1}/src/datachain.egg-info/requires.txt +0 -0
  290. {datachain-0.35.0 → datachain-0.35.1}/src/datachain.egg-info/top_level.txt +0 -0
  291. {datachain-0.35.0 → datachain-0.35.1}/tests/__init__.py +0 -0
  292. {datachain-0.35.0 → datachain-0.35.1}/tests/benchmarks/__init__.py +0 -0
  293. {datachain-0.35.0 → datachain-0.35.1}/tests/benchmarks/conftest.py +0 -0
  294. {datachain-0.35.0 → datachain-0.35.1}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  295. {datachain-0.35.0 → datachain-0.35.1}/tests/benchmarks/datasets/.dvc/config +0 -0
  296. {datachain-0.35.0 → datachain-0.35.1}/tests/benchmarks/datasets/.gitignore +0 -0
  297. {datachain-0.35.0 → datachain-0.35.1}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  298. {datachain-0.35.0 → datachain-0.35.1}/tests/benchmarks/test_datachain.py +0 -0
  299. {datachain-0.35.0 → datachain-0.35.1}/tests/benchmarks/test_ls.py +0 -0
  300. {datachain-0.35.0 → datachain-0.35.1}/tests/benchmarks/test_version.py +0 -0
  301. {datachain-0.35.0 → datachain-0.35.1}/tests/data.py +0 -0
  302. {datachain-0.35.0 → datachain-0.35.1}/tests/examples/__init__.py +0 -0
  303. {datachain-0.35.0 → datachain-0.35.1}/tests/examples/test_examples.py +0 -0
  304. {datachain-0.35.0 → datachain-0.35.1}/tests/examples/test_wds_e2e.py +0 -0
  305. {datachain-0.35.0 → datachain-0.35.1}/tests/examples/wds_data.py +0 -0
  306. {datachain-0.35.0 → datachain-0.35.1}/tests/func/__init__.py +0 -0
  307. {datachain-0.35.0 → datachain-0.35.1}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  308. {datachain-0.35.0 → datachain-0.35.1}/tests/func/data/lena.jpg +0 -0
  309. {datachain-0.35.0 → datachain-0.35.1}/tests/func/fake-service-account-credentials.json +0 -0
  310. {datachain-0.35.0 → datachain-0.35.1}/tests/func/functions/__init__.py +0 -0
  311. {datachain-0.35.0 → datachain-0.35.1}/tests/func/functions/test_aggregate.py +0 -0
  312. {datachain-0.35.0 → datachain-0.35.1}/tests/func/functions/test_array.py +0 -0
  313. {datachain-0.35.0 → datachain-0.35.1}/tests/func/functions/test_conditional.py +0 -0
  314. {datachain-0.35.0 → datachain-0.35.1}/tests/func/functions/test_numeric.py +0 -0
  315. {datachain-0.35.0 → datachain-0.35.1}/tests/func/functions/test_path.py +0 -0
  316. {datachain-0.35.0 → datachain-0.35.1}/tests/func/functions/test_random.py +0 -0
  317. {datachain-0.35.0 → datachain-0.35.1}/tests/func/functions/test_string.py +0 -0
  318. {datachain-0.35.0 → datachain-0.35.1}/tests/func/model/__init__.py +0 -0
  319. {datachain-0.35.0 → datachain-0.35.1}/tests/func/model/data/running-mask0.png +0 -0
  320. {datachain-0.35.0 → datachain-0.35.1}/tests/func/model/data/running-mask1.png +0 -0
  321. {datachain-0.35.0 → datachain-0.35.1}/tests/func/model/data/running.jpg +0 -0
  322. {datachain-0.35.0 → datachain-0.35.1}/tests/func/model/data/ships.jpg +0 -0
  323. {datachain-0.35.0 → datachain-0.35.1}/tests/func/model/test_yolo.py +0 -0
  324. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_audio.py +0 -0
  325. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_catalog.py +0 -0
  326. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_client.py +0 -0
  327. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_cloud_transfer.py +0 -0
  328. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_data_storage.py +0 -0
  329. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_datachain_merge.py +0 -0
  330. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_dataset_query.py +0 -0
  331. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_datasets.py +0 -0
  332. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_delta.py +0 -0
  333. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_feature_pickling.py +0 -0
  334. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_file.py +0 -0
  335. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_hf.py +0 -0
  336. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_hidden_field.py +0 -0
  337. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_image.py +0 -0
  338. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_listing.py +0 -0
  339. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_ls.py +0 -0
  340. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_meta_formats.py +0 -0
  341. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_metastore.py +0 -0
  342. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_metrics.py +0 -0
  343. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_mutate.py +0 -0
  344. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_pull.py +0 -0
  345. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_pytorch.py +0 -0
  346. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_query.py +0 -0
  347. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_read_database.py +0 -0
  348. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_read_dataset_remote.py +0 -0
  349. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  350. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_retry.py +0 -0
  351. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_session.py +0 -0
  352. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_storage_pattern.py +0 -0
  353. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_studio_datetime_parsing.py +0 -0
  354. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_to_database.py +0 -0
  355. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_toolkit.py +0 -0
  356. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_union.py +0 -0
  357. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_video.py +0 -0
  358. {datachain-0.35.0 → datachain-0.35.1}/tests/func/test_warehouse.py +0 -0
  359. {datachain-0.35.0 → datachain-0.35.1}/tests/scripts/feature_class.py +0 -0
  360. {datachain-0.35.0 → datachain-0.35.1}/tests/scripts/feature_class_exception.py +0 -0
  361. {datachain-0.35.0 → datachain-0.35.1}/tests/scripts/feature_class_parallel.py +0 -0
  362. {datachain-0.35.0 → datachain-0.35.1}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  363. {datachain-0.35.0 → datachain-0.35.1}/tests/scripts/name_len_slow.py +0 -0
  364. {datachain-0.35.0 → datachain-0.35.1}/tests/test_atomicity.py +0 -0
  365. {datachain-0.35.0 → datachain-0.35.1}/tests/test_cli_e2e.py +0 -0
  366. {datachain-0.35.0 → datachain-0.35.1}/tests/test_cli_studio.py +0 -0
  367. {datachain-0.35.0 → datachain-0.35.1}/tests/test_import_time.py +0 -0
  368. {datachain-0.35.0 → datachain-0.35.1}/tests/test_query_e2e.py +0 -0
  369. {datachain-0.35.0 → datachain-0.35.1}/tests/test_telemetry.py +0 -0
  370. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/__init__.py +0 -0
  371. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/__init__.py +0 -0
  372. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/conftest.py +0 -0
  373. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_arrow.py +0 -0
  374. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_audio.py +0 -0
  375. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_checkpoints.py +0 -0
  376. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_clip.py +0 -0
  377. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_datachain.py +0 -0
  378. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  379. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_datachain_merge.py +0 -0
  380. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_diff.py +0 -0
  381. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_feature.py +0 -0
  382. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_feature_utils.py +0 -0
  383. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_file.py +0 -0
  384. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_hf.py +0 -0
  385. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_image.py +0 -0
  386. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_listing_info.py +0 -0
  387. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_namespace.py +0 -0
  388. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_partition_by.py +0 -0
  389. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_project.py +0 -0
  390. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_python_to_sql.py +0 -0
  391. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_schema.py +0 -0
  392. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_settings.py +0 -0
  393. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_signal_schema.py +0 -0
  394. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_sql_to_python.py +0 -0
  395. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_storage_pattern.py +0 -0
  396. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_text.py +0 -0
  397. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_udf.py +0 -0
  398. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_udf_signature.py +0 -0
  399. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_utils.py +0 -0
  400. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/lib/test_webdataset.py +0 -0
  401. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/model/__init__.py +0 -0
  402. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/model/test_bbox.py +0 -0
  403. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/model/test_pose.py +0 -0
  404. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/model/test_segment.py +0 -0
  405. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/model/test_utils.py +0 -0
  406. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/sql/__init__.py +0 -0
  407. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/sql/sqlite/__init__.py +0 -0
  408. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/sql/sqlite/test_types.py +0 -0
  409. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/sql/sqlite/test_utils.py +0 -0
  410. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/sql/test_array.py +0 -0
  411. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/sql/test_conditional.py +0 -0
  412. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/sql/test_path.py +0 -0
  413. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/sql/test_random.py +0 -0
  414. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/sql/test_selectable.py +0 -0
  415. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/sql/test_string.py +0 -0
  416. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_asyn.py +0 -0
  417. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_cache.py +0 -0
  418. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_catalog.py +0 -0
  419. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_catalog_loader.py +0 -0
  420. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_cli_datasets.py +0 -0
  421. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_cli_parsing.py +0 -0
  422. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_client.py +0 -0
  423. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_client_gcs.py +0 -0
  424. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_client_http.py +0 -0
  425. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_client_s3.py +0 -0
  426. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_config.py +0 -0
  427. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_data_storage.py +0 -0
  428. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_database_engine.py +0 -0
  429. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_dataset.py +0 -0
  430. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_dispatch.py +0 -0
  431. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_fileslice.py +0 -0
  432. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_func.py +0 -0
  433. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_listing.py +0 -0
  434. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_metastore.py +0 -0
  435. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_module_exports.py +0 -0
  436. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_pytorch.py +0 -0
  437. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_query.py +0 -0
  438. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_query_metrics.py +0 -0
  439. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_query_params.py +0 -0
  440. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_script_meta.py +0 -0
  441. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_semver.py +0 -0
  442. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_serializer.py +0 -0
  443. {datachain-0.35.0 → datachain-0.35.1}/tests/unit/test_session.py +0 -0
  444. {datachain-0.35.0 → datachain-0.35.1}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.35.0
3
+ Version: 0.35.1
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -22,7 +22,6 @@ from datachain.lib.signal_schema import SignalSchema
22
22
  from datachain.node import DirType, DirTypeGroup, Node, NodeWithPath, get_path
23
23
  from datachain.query.batch import RowsOutput
24
24
  from datachain.query.schema import ColumnMeta
25
- from datachain.query.utils import get_query_id_column
26
25
  from datachain.sql.functions import path as pathfunc
27
26
  from datachain.sql.types import Int, SQLType
28
27
  from datachain.utils import sql_escape_like
@@ -228,7 +227,8 @@ class AbstractWarehouse(ABC, Serializable):
228
227
  while True:
229
228
  if limit is not None:
230
229
  limit -= num_yielded
231
- if limit == 0:
230
+ num_yielded = 0
231
+ if limit <= 0:
232
232
  break
233
233
  if limit < page_size:
234
234
  paginated_query = paginated_query.limit(None).limit(limit)
@@ -246,32 +246,48 @@ class AbstractWarehouse(ABC, Serializable):
246
246
  break # no more results
247
247
  offset += page_size
248
248
 
249
- def _regenerate_system_columns(self, selectable):
250
- """Return a SELECT that regenerates sys__id and sys__rand deterministically."""
249
+ def _regenerate_system_columns(
250
+ self,
251
+ selectable: sa.Select | sa.CTE,
252
+ keep_existing_columns: bool = False,
253
+ ) -> sa.Select:
254
+ """
255
+ Return a SELECT that regenerates sys__id and sys__rand deterministically.
251
256
 
257
+ If keep_existing_columns is True, existing sys__id and sys__rand columns
258
+ will be kept as-is if they exist in the input selectable.
259
+ """
252
260
  base = selectable.subquery() if hasattr(selectable, "subquery") else selectable
253
261
 
262
+ result_columns: dict[str, sa.ColumnElement] = {}
263
+ for col in base.c:
264
+ if col.name in result_columns:
265
+ raise ValueError(f"Duplicate column name {col.name} in SELECT")
266
+ if col.name in ("sys__id", "sys__rand"):
267
+ if keep_existing_columns:
268
+ result_columns[col.name] = col
269
+ else:
270
+ result_columns[col.name] = col
271
+
254
272
  system_types: dict[str, sa.types.TypeEngine] = {
255
273
  sys_col.name: sys_col.type
256
274
  for sys_col in self.schema.dataset_row_cls.sys_columns()
257
275
  }
258
276
 
259
- result_columns = []
260
- for col in base.c:
261
- if col.name == "sys__id":
262
- expr = self._system_row_number_expr()
263
- expr = sa.cast(expr, system_types["sys__id"])
264
- result_columns.append(expr.label("sys__id"))
265
- elif col.name == "sys__rand":
266
- expr = self._system_random_expr()
267
- expr = sa.cast(expr, system_types["sys__rand"])
268
- result_columns.append(expr.label("sys__rand"))
269
- else:
270
- result_columns.append(col)
277
+ # Add missing system columns if needed
278
+ if "sys__id" not in result_columns:
279
+ expr = self._system_row_number_expr()
280
+ expr = sa.cast(expr, system_types["sys__id"])
281
+ result_columns["sys__id"] = expr.label("sys__id")
282
+ if "sys__rand" not in result_columns:
283
+ expr = self._system_random_expr()
284
+ expr = sa.cast(expr, system_types["sys__rand"])
285
+ result_columns["sys__rand"] = expr.label("sys__rand")
271
286
 
272
287
  # Wrap in subquery to materialize window functions, then wrap again in SELECT
273
288
  # This ensures window functions are computed before INSERT...FROM SELECT
274
- inner = sa.select(*result_columns).select_from(base).subquery()
289
+ columns = list(result_columns.values())
290
+ inner = sa.select(*columns).select_from(base).subquery()
275
291
  return sa.select(*inner.c).select_from(inner)
276
292
 
277
293
  def _system_row_number_expr(self):
@@ -380,7 +396,7 @@ class AbstractWarehouse(ABC, Serializable):
380
396
  """
381
397
  Fetch dataset rows from database using a list of IDs.
382
398
  """
383
- if (id_col := get_query_id_column(query)) is None:
399
+ if (id_col := query.selected_columns.get("sys__id")) is None:
384
400
  raise RuntimeError("sys__id column not found in query")
385
401
 
386
402
  query = query._clone().offset(None).limit(None).order_by(None)
@@ -6,7 +6,6 @@ from collections.abc import Callable, Generator, Sequence
6
6
  import sqlalchemy as sa
7
7
 
8
8
  from datachain.data_storage.schema import PARTITION_COLUMN_ID
9
- from datachain.query.utils import get_query_column
10
9
 
11
10
  RowsOutputBatch = Sequence[Sequence]
12
11
  RowsOutput = Sequence | RowsOutputBatch
@@ -106,7 +105,7 @@ class Partition(BatchingStrategy):
106
105
  query: sa.Select,
107
106
  id_col: sa.ColumnElement | None = None,
108
107
  ) -> Generator[RowsOutput, None, None]:
109
- if (partition_col := get_query_column(query, PARTITION_COLUMN_ID)) is None:
108
+ if (partition_col := query.selected_columns.get(PARTITION_COLUMN_ID)) is None:
110
109
  raise RuntimeError("partition column not found in query")
111
110
 
112
111
  ids_only = False
@@ -438,6 +438,9 @@ class UDFStep(Step, ABC):
438
438
  """
439
439
 
440
440
  def populate_udf_table(self, udf_table: "Table", query: Select) -> None:
441
+ if "sys__id" not in query.selected_columns:
442
+ raise RuntimeError("Query must have sys__id column to run UDF")
443
+
441
444
  if (rows_total := self.catalog.warehouse.query_count(query)) == 0:
442
445
  return
443
446
 
@@ -580,13 +583,10 @@ class UDFStep(Step, ABC):
580
583
  """
581
584
  Create temporary table with group by partitions.
582
585
  """
583
- # Check if partition_by is set, we need it to create partitions.
584
- assert self.partition_by is not None
585
- # Check if sys__id is in the query, we need it to be able to join
586
- # the partition table with the udf table later.
587
- assert any(c.name == "sys__id" for c in query.selected_columns), (
588
- "Query must have sys__id column to use partitioning."
589
- )
586
+ if self.partition_by is None:
587
+ raise RuntimeError("Query must have partition_by set to use partitioning")
588
+ if (id_col := query.selected_columns.get("sys__id")) is None:
589
+ raise RuntimeError("Query must have sys__id column to use partitioning")
590
590
 
591
591
  if isinstance(self.partition_by, (list, tuple, GeneratorType)):
592
592
  list_partition_by = list(self.partition_by)
@@ -602,7 +602,7 @@ class UDFStep(Step, ABC):
602
602
 
603
603
  # fill table with partitions
604
604
  cols = [
605
- query.selected_columns.sys__id,
605
+ id_col,
606
606
  f.dense_rank().over(order_by=partition_by).label(PARTITION_COLUMN_ID),
607
607
  ]
608
608
  self.catalog.warehouse.db.execute(
@@ -634,21 +634,11 @@ class UDFStep(Step, ABC):
634
634
 
635
635
  # Apply partitioning if needed.
636
636
  if self.partition_by is not None:
637
- if not any(c.name == "sys__id" for c in query.selected_columns):
638
- # If sys__id is not in the query, we need to create a temp table
639
- # to hold the query results, so we can join it with the
640
- # partition table later.
641
- columns = [
642
- c if isinstance(c, Column) else Column(c.name, c.type)
643
- for c in query.subquery().columns
644
- ]
645
- temp_table = self.catalog.warehouse.create_dataset_rows_table(
646
- self.catalog.warehouse.temp_table_name(),
647
- columns=columns,
637
+ if "sys__id" not in query.selected_columns:
638
+ _query = query = self.catalog.warehouse._regenerate_system_columns(
639
+ query,
640
+ keep_existing_columns=True,
648
641
  )
649
- temp_tables.append(temp_table.name)
650
- self.catalog.warehouse.copy_table(temp_table, query)
651
- _query = query = temp_table.select()
652
642
 
653
643
  partition_tbl = self.create_partitions_table(query)
654
644
  temp_tables.append(partition_tbl.name)
@@ -22,7 +22,6 @@ from datachain.query.dataset import (
22
22
  )
23
23
  from datachain.query.queue import get_from_queue, put_into_queue
24
24
  from datachain.query.udf import UdfInfo
25
- from datachain.query.utils import get_query_id_column
26
25
  from datachain.utils import batched, flatten, safe_closing
27
26
 
28
27
  if TYPE_CHECKING:
@@ -55,6 +54,9 @@ def udf_entrypoint() -> int:
55
54
  udf_info: UdfInfo = load(stdin.buffer)
56
55
 
57
56
  query = udf_info["query"]
57
+ if "sys__id" not in query.selected_columns:
58
+ raise RuntimeError("sys__id column is required in UDF query")
59
+
58
60
  batching = udf_info["batching"]
59
61
  is_generator = udf_info["is_generator"]
60
62
 
@@ -65,15 +67,16 @@ def udf_entrypoint() -> int:
65
67
  wh_cls, wh_args, wh_kwargs = udf_info["warehouse_clone_params"]
66
68
  warehouse: AbstractWarehouse = wh_cls(*wh_args, **wh_kwargs)
67
69
 
68
- id_col = get_query_id_column(query)
69
-
70
70
  with contextlib.closing(
71
- batching(warehouse.dataset_select_paginated, query, id_col=id_col)
71
+ batching(
72
+ warehouse.dataset_select_paginated,
73
+ query,
74
+ id_col=query.selected_columns.sys__id,
75
+ )
72
76
  ) as udf_inputs:
73
77
  try:
74
78
  UDFDispatcher(udf_info).run_udf(
75
79
  udf_inputs,
76
- ids_only=id_col is not None,
77
80
  download_cb=download_cb,
78
81
  processed_cb=processed_cb,
79
82
  generated_cb=generated_cb,
@@ -147,10 +150,10 @@ class UDFDispatcher:
147
150
  self.udf_fields,
148
151
  )
149
152
 
150
- def _run_worker(self, ids_only: bool) -> None:
153
+ def _run_worker(self) -> None:
151
154
  try:
152
155
  worker = self._create_worker()
153
- worker.run(ids_only)
156
+ worker.run()
154
157
  except (Exception, KeyboardInterrupt) as e:
155
158
  if self.done_queue:
156
159
  put_into_queue(
@@ -164,7 +167,6 @@ class UDFDispatcher:
164
167
  def run_udf(
165
168
  self,
166
169
  input_rows: Iterable["RowsOutput"],
167
- ids_only: bool,
168
170
  download_cb: Callback = DEFAULT_CALLBACK,
169
171
  processed_cb: Callback = DEFAULT_CALLBACK,
170
172
  generated_cb: Callback = DEFAULT_CALLBACK,
@@ -178,9 +180,7 @@ class UDFDispatcher:
178
180
 
179
181
  if n_workers == 1:
180
182
  # no need to spawn worker processes if we are running in a single process
181
- self.run_udf_single(
182
- input_rows, ids_only, download_cb, processed_cb, generated_cb
183
- )
183
+ self.run_udf_single(input_rows, download_cb, processed_cb, generated_cb)
184
184
  else:
185
185
  if self.buffer_size < n_workers:
186
186
  raise RuntimeError(
@@ -189,13 +189,12 @@ class UDFDispatcher:
189
189
  )
190
190
 
191
191
  self.run_udf_parallel(
192
- n_workers, input_rows, ids_only, download_cb, processed_cb, generated_cb
192
+ n_workers, input_rows, download_cb, processed_cb, generated_cb
193
193
  )
194
194
 
195
195
  def run_udf_single(
196
196
  self,
197
197
  input_rows: Iterable["RowsOutput"],
198
- ids_only: bool,
199
198
  download_cb: Callback = DEFAULT_CALLBACK,
200
199
  processed_cb: Callback = DEFAULT_CALLBACK,
201
200
  generated_cb: Callback = DEFAULT_CALLBACK,
@@ -204,18 +203,15 @@ class UDFDispatcher:
204
203
  # Rebuild schemas in single process too for consistency (cheap, idempotent).
205
204
  ModelStore.rebuild_all()
206
205
 
207
- if ids_only and not self.is_batching:
206
+ if not self.is_batching:
208
207
  input_rows = flatten(input_rows)
209
208
 
210
209
  def get_inputs() -> Iterable["RowsOutput"]:
211
210
  warehouse = self.catalog.warehouse.clone()
212
- if ids_only:
213
- for ids in batched(input_rows, DEFAULT_BATCH_SIZE):
214
- yield from warehouse.dataset_rows_select_from_ids(
215
- self.query, ids, self.is_batching
216
- )
217
- else:
218
- yield from input_rows
211
+ for ids in batched(input_rows, DEFAULT_BATCH_SIZE):
212
+ yield from warehouse.dataset_rows_select_from_ids(
213
+ self.query, ids, self.is_batching
214
+ )
219
215
 
220
216
  prefetch = udf.prefetch
221
217
  with _get_cache(self.catalog.cache, prefetch, use_cache=self.cache) as _cache:
@@ -249,7 +245,6 @@ class UDFDispatcher:
249
245
  self,
250
246
  n_workers: int,
251
247
  input_rows: Iterable["RowsOutput"],
252
- ids_only: bool,
253
248
  download_cb: Callback = DEFAULT_CALLBACK,
254
249
  processed_cb: Callback = DEFAULT_CALLBACK,
255
250
  generated_cb: Callback = DEFAULT_CALLBACK,
@@ -258,9 +253,7 @@ class UDFDispatcher:
258
253
  self.done_queue = self.ctx.Queue()
259
254
 
260
255
  pool = [
261
- self.ctx.Process(
262
- name=f"Worker-UDF-{i}", target=self._run_worker, args=[ids_only]
263
- )
256
+ self.ctx.Process(name=f"Worker-UDF-{i}", target=self._run_worker)
264
257
  for i in range(n_workers)
265
258
  ]
266
259
  for p in pool:
@@ -406,13 +399,13 @@ class UDFWorker:
406
399
  self.processed_cb = ProcessedCallback("processed", self.done_queue)
407
400
  self.generated_cb = ProcessedCallback("generated", self.done_queue)
408
401
 
409
- def run(self, ids_only: bool) -> None:
402
+ def run(self) -> None:
410
403
  prefetch = self.udf.prefetch
411
404
  with _get_cache(self.catalog.cache, prefetch, use_cache=self.cache) as _cache:
412
405
  catalog = clone_catalog_with_cache(self.catalog, _cache)
413
406
  udf_results = self.udf.run(
414
407
  self.udf_fields,
415
- self.get_inputs(ids_only),
408
+ self.get_inputs(),
416
409
  catalog,
417
410
  self.cache,
418
411
  download_cb=self.download_cb,
@@ -434,13 +427,10 @@ class UDFWorker:
434
427
  put_into_queue(self.done_queue, {"status": OK_STATUS})
435
428
  yield row
436
429
 
437
- def get_inputs(self, ids_only: bool) -> Iterable["RowsOutput"]:
430
+ def get_inputs(self) -> Iterable["RowsOutput"]:
438
431
  warehouse = self.catalog.warehouse.clone()
439
432
  while (batch := get_from_queue(self.task_queue)) != STOP_SIGNAL:
440
- if ids_only:
441
- for ids in batched(batch, DEFAULT_BATCH_SIZE):
442
- yield from warehouse.dataset_rows_select_from_ids(
443
- self.query, ids, self.is_batching
444
- )
445
- else:
446
- yield from batch
433
+ for ids in batched(batch, DEFAULT_BATCH_SIZE):
434
+ yield from warehouse.dataset_rows_select_from_ids(
435
+ self.query, ids, self.is_batching
436
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.35.0
3
+ Version: 0.35.1
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -249,7 +249,6 @@ src/datachain/query/queue.py
249
249
  src/datachain/query/schema.py
250
250
  src/datachain/query/session.py
251
251
  src/datachain/query/udf.py
252
- src/datachain/query/utils.py
253
252
  src/datachain/remote/__init__.py
254
253
  src/datachain/remote/studio.py
255
254
  src/datachain/sql/__init__.py
@@ -301,7 +300,6 @@ tests/examples/wds_data.py
301
300
  tests/func/__init__.py
302
301
  tests/func/fake-service-account-credentials.json
303
302
  tests/func/test_audio.py
304
- tests/func/test_batching.py
305
303
  tests/func/test_catalog.py
306
304
  tests/func/test_client.py
307
305
  tests/func/test_cloud_transfer.py
@@ -361,6 +359,7 @@ tests/scripts/feature_class_parallel_data_model.py
361
359
  tests/scripts/name_len_slow.py
362
360
  tests/unit/__init__.py
363
361
  tests/unit/test_asyn.py
362
+ tests/unit/test_batching.py
364
363
  tests/unit/test_cache.py
365
364
  tests/unit/test_catalog.py
366
365
  tests/unit/test_catalog_loader.py
@@ -1065,6 +1065,52 @@ def dog_entries():
1065
1065
  return _create_dog_entries
1066
1066
 
1067
1067
 
1068
+ PRIMES_UP_TO_73 = (
1069
+ 2,
1070
+ 3,
1071
+ 5,
1072
+ 7,
1073
+ 11,
1074
+ 13,
1075
+ 17,
1076
+ 19,
1077
+ 23,
1078
+ 29,
1079
+ 31,
1080
+ 37,
1081
+ 41,
1082
+ 43,
1083
+ 47,
1084
+ 53,
1085
+ 59,
1086
+ 61,
1087
+ 67,
1088
+ 71,
1089
+ 73,
1090
+ )
1091
+
1092
+
1093
+ @pytest.fixture
1094
+ def numbers_ds(test_session) -> Generator[DatasetRecord, None, None]:
1095
+ numbers = list(range(1, 74))
1096
+ ds = dc.read_values(
1097
+ number=numbers,
1098
+ parity=["odd" if n % 2 else "even" for n in numbers],
1099
+ primality=["prime" if n in PRIMES_UP_TO_73 else "composite" for n in numbers],
1100
+ last_digit=[n % 10 for n in numbers],
1101
+ session=test_session,
1102
+ ).save("numbers_dataset")
1103
+ assert ds.dataset is not None
1104
+ yield ds.dataset
1105
+ dc.delete_dataset(ds.dataset.name, force=True)
1106
+
1107
+
1108
+ @pytest.fixture
1109
+ def numbers_table(warehouse, numbers_ds) -> Generator[sqlalchemy.Table, None, None]:
1110
+ table_name = warehouse.dataset_table_name(numbers_ds, numbers_ds.latest_version)
1111
+ yield warehouse.get_table(table_name)
1112
+
1113
+
1068
1114
  @pytest.fixture
1069
1115
  def mock_parquet_data(compressed_parquet_data, dog_entries, version="1.0.0"):
1070
1116
  return compressed_parquet_data(dog_entries(version))