datachain 0.36.0__tar.gz → 0.36.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (442) hide show
  1. {datachain-0.36.0 → datachain-0.36.2}/PKG-INFO +3 -2
  2. {datachain-0.36.0 → datachain-0.36.2}/pyproject.toml +8 -3
  3. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/data_storage/metastore.py +35 -23
  4. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/data_storage/schema.py +1 -2
  5. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/data_storage/sqlite.py +27 -10
  6. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/data_storage/warehouse.py +50 -33
  7. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/diff/__init__.py +2 -6
  8. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/audio.py +54 -53
  9. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/datachain.py +13 -14
  10. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/query/dataset.py +21 -26
  11. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/query/dispatch.py +64 -42
  12. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/query/queue.py +2 -1
  13. {datachain-0.36.0 → datachain-0.36.2}/src/datachain.egg-info/PKG-INFO +3 -2
  14. {datachain-0.36.0 → datachain-0.36.2}/src/datachain.egg-info/requires.txt +2 -1
  15. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_datachain.py +1 -1
  16. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_datachain_merge.py +7 -18
  17. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_retry.py +0 -1
  18. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_udf.py +116 -0
  19. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_audio.py +31 -37
  20. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_datachain.py +15 -13
  21. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_datachain_hash.py +1 -1
  22. {datachain-0.36.0 → datachain-0.36.2}/.cruft.json +0 -0
  23. {datachain-0.36.0 → datachain-0.36.2}/.gitattributes +0 -0
  24. {datachain-0.36.0 → datachain-0.36.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  25. {datachain-0.36.0 → datachain-0.36.2}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  26. {datachain-0.36.0 → datachain-0.36.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  27. {datachain-0.36.0 → datachain-0.36.2}/.github/codecov.yaml +0 -0
  28. {datachain-0.36.0 → datachain-0.36.2}/.github/dependabot.yml +0 -0
  29. {datachain-0.36.0 → datachain-0.36.2}/.github/workflows/benchmarks.yml +0 -0
  30. {datachain-0.36.0 → datachain-0.36.2}/.github/workflows/release.yml +0 -0
  31. {datachain-0.36.0 → datachain-0.36.2}/.github/workflows/tests-studio.yml +0 -0
  32. {datachain-0.36.0 → datachain-0.36.2}/.github/workflows/tests.yml +0 -0
  33. {datachain-0.36.0 → datachain-0.36.2}/.github/workflows/update-template.yaml +0 -0
  34. {datachain-0.36.0 → datachain-0.36.2}/.gitignore +0 -0
  35. {datachain-0.36.0 → datachain-0.36.2}/.pre-commit-config.yaml +0 -0
  36. {datachain-0.36.0 → datachain-0.36.2}/CODE_OF_CONDUCT.rst +0 -0
  37. {datachain-0.36.0 → datachain-0.36.2}/LICENSE +0 -0
  38. {datachain-0.36.0 → datachain-0.36.2}/README.rst +0 -0
  39. {datachain-0.36.0 → datachain-0.36.2}/docs/api_hooks.py +0 -0
  40. {datachain-0.36.0 → datachain-0.36.2}/docs/assets/captioned_cartoons.png +0 -0
  41. {datachain-0.36.0 → datachain-0.36.2}/docs/assets/datachain-white.svg +0 -0
  42. {datachain-0.36.0 → datachain-0.36.2}/docs/assets/datachain.svg +0 -0
  43. {datachain-0.36.0 → datachain-0.36.2}/docs/assets/webhook_dialog.png +0 -0
  44. {datachain-0.36.0 → datachain-0.36.2}/docs/assets/webhook_list.png +0 -0
  45. {datachain-0.36.0 → datachain-0.36.2}/docs/commands/auth/login.md +0 -0
  46. {datachain-0.36.0 → datachain-0.36.2}/docs/commands/auth/logout.md +0 -0
  47. {datachain-0.36.0 → datachain-0.36.2}/docs/commands/auth/team.md +0 -0
  48. {datachain-0.36.0 → datachain-0.36.2}/docs/commands/auth/token.md +0 -0
  49. {datachain-0.36.0 → datachain-0.36.2}/docs/commands/index.md +0 -0
  50. {datachain-0.36.0 → datachain-0.36.2}/docs/commands/job/cancel.md +0 -0
  51. {datachain-0.36.0 → datachain-0.36.2}/docs/commands/job/clusters.md +0 -0
  52. {datachain-0.36.0 → datachain-0.36.2}/docs/commands/job/logs.md +0 -0
  53. {datachain-0.36.0 → datachain-0.36.2}/docs/commands/job/ls.md +0 -0
  54. {datachain-0.36.0 → datachain-0.36.2}/docs/commands/job/run.md +0 -0
  55. {datachain-0.36.0 → datachain-0.36.2}/docs/contributing.md +0 -0
  56. {datachain-0.36.0 → datachain-0.36.2}/docs/css/github-permalink-style.css +0 -0
  57. {datachain-0.36.0 → datachain-0.36.2}/docs/examples.md +0 -0
  58. {datachain-0.36.0 → datachain-0.36.2}/docs/guide/db_migrations.md +0 -0
  59. {datachain-0.36.0 → datachain-0.36.2}/docs/guide/delta.md +0 -0
  60. {datachain-0.36.0 → datachain-0.36.2}/docs/guide/env.md +0 -0
  61. {datachain-0.36.0 → datachain-0.36.2}/docs/guide/index.md +0 -0
  62. {datachain-0.36.0 → datachain-0.36.2}/docs/guide/namespaces.md +0 -0
  63. {datachain-0.36.0 → datachain-0.36.2}/docs/guide/processing.md +0 -0
  64. {datachain-0.36.0 → datachain-0.36.2}/docs/guide/remotes.md +0 -0
  65. {datachain-0.36.0 → datachain-0.36.2}/docs/guide/retry.md +0 -0
  66. {datachain-0.36.0 → datachain-0.36.2}/docs/index.md +0 -0
  67. {datachain-0.36.0 → datachain-0.36.2}/docs/overrides/main.html +0 -0
  68. {datachain-0.36.0 → datachain-0.36.2}/docs/quick-start.md +0 -0
  69. {datachain-0.36.0 → datachain-0.36.2}/docs/references/data-types/arrowrow.md +0 -0
  70. {datachain-0.36.0 → datachain-0.36.2}/docs/references/data-types/bbox.md +0 -0
  71. {datachain-0.36.0 → datachain-0.36.2}/docs/references/data-types/file.md +0 -0
  72. {datachain-0.36.0 → datachain-0.36.2}/docs/references/data-types/imagefile.md +0 -0
  73. {datachain-0.36.0 → datachain-0.36.2}/docs/references/data-types/index.md +0 -0
  74. {datachain-0.36.0 → datachain-0.36.2}/docs/references/data-types/pose.md +0 -0
  75. {datachain-0.36.0 → datachain-0.36.2}/docs/references/data-types/segment.md +0 -0
  76. {datachain-0.36.0 → datachain-0.36.2}/docs/references/data-types/tarvfile.md +0 -0
  77. {datachain-0.36.0 → datachain-0.36.2}/docs/references/data-types/textfile.md +0 -0
  78. {datachain-0.36.0 → datachain-0.36.2}/docs/references/data-types/videofile.md +0 -0
  79. {datachain-0.36.0 → datachain-0.36.2}/docs/references/datachain.md +0 -0
  80. {datachain-0.36.0 → datachain-0.36.2}/docs/references/func.md +0 -0
  81. {datachain-0.36.0 → datachain-0.36.2}/docs/references/functions/aggregate.md +0 -0
  82. {datachain-0.36.0 → datachain-0.36.2}/docs/references/functions/array.md +0 -0
  83. {datachain-0.36.0 → datachain-0.36.2}/docs/references/functions/conditional.md +0 -0
  84. {datachain-0.36.0 → datachain-0.36.2}/docs/references/functions/numeric.md +0 -0
  85. {datachain-0.36.0 → datachain-0.36.2}/docs/references/functions/path.md +0 -0
  86. {datachain-0.36.0 → datachain-0.36.2}/docs/references/functions/random.md +0 -0
  87. {datachain-0.36.0 → datachain-0.36.2}/docs/references/functions/string.md +0 -0
  88. {datachain-0.36.0 → datachain-0.36.2}/docs/references/functions/window.md +0 -0
  89. {datachain-0.36.0 → datachain-0.36.2}/docs/references/index.md +0 -0
  90. {datachain-0.36.0 → datachain-0.36.2}/docs/references/toolkit.md +0 -0
  91. {datachain-0.36.0 → datachain-0.36.2}/docs/references/torch.md +0 -0
  92. {datachain-0.36.0 → datachain-0.36.2}/docs/references/udf.md +0 -0
  93. {datachain-0.36.0 → datachain-0.36.2}/docs/studio/api/.gitkeep +0 -0
  94. {datachain-0.36.0 → datachain-0.36.2}/docs/studio/webhooks.md +0 -0
  95. {datachain-0.36.0 → datachain-0.36.2}/docs/templates/main.dot +0 -0
  96. {datachain-0.36.0 → datachain-0.36.2}/docs/templates/operation.dot +0 -0
  97. {datachain-0.36.0 → datachain-0.36.2}/docs/templates/responses.def +0 -0
  98. {datachain-0.36.0 → datachain-0.36.2}/docs/tutorials.md +0 -0
  99. {datachain-0.36.0 → datachain-0.36.2}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  100. {datachain-0.36.0 → datachain-0.36.2}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  101. {datachain-0.36.0 → datachain-0.36.2}/examples/computer_vision/openimage-detect.py +0 -0
  102. {datachain-0.36.0 → datachain-0.36.2}/examples/computer_vision/ultralytics-bbox.py +0 -0
  103. {datachain-0.36.0 → datachain-0.36.2}/examples/computer_vision/ultralytics-pose.py +0 -0
  104. {datachain-0.36.0 → datachain-0.36.2}/examples/computer_vision/ultralytics-segment.py +0 -0
  105. {datachain-0.36.0 → datachain-0.36.2}/examples/get_started/common_sql_functions.py +0 -0
  106. {datachain-0.36.0 → datachain-0.36.2}/examples/get_started/json-csv-reader.py +0 -0
  107. {datachain-0.36.0 → datachain-0.36.2}/examples/get_started/nested_datamodel.py +0 -0
  108. {datachain-0.36.0 → datachain-0.36.2}/examples/get_started/torch-loader.py +0 -0
  109. {datachain-0.36.0 → datachain-0.36.2}/examples/get_started/udfs/parallel.py +0 -0
  110. {datachain-0.36.0 → datachain-0.36.2}/examples/get_started/udfs/simple.py +0 -0
  111. {datachain-0.36.0 → datachain-0.36.2}/examples/get_started/udfs/stateful.py +0 -0
  112. {datachain-0.36.0 → datachain-0.36.2}/examples/incremental_processing/delta.py +0 -0
  113. {datachain-0.36.0 → datachain-0.36.2}/examples/incremental_processing/retry.py +0 -0
  114. {datachain-0.36.0 → datachain-0.36.2}/examples/incremental_processing/utils.py +0 -0
  115. {datachain-0.36.0 → datachain-0.36.2}/examples/llm_and_nlp/claude-query.py +0 -0
  116. {datachain-0.36.0 → datachain-0.36.2}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  117. {datachain-0.36.0 → datachain-0.36.2}/examples/multimodal/audio-to-text.py +0 -0
  118. {datachain-0.36.0 → datachain-0.36.2}/examples/multimodal/clip_inference.py +0 -0
  119. {datachain-0.36.0 → datachain-0.36.2}/examples/multimodal/hf_pipeline.py +0 -0
  120. {datachain-0.36.0 → datachain-0.36.2}/examples/multimodal/openai_image_desc_lib.py +0 -0
  121. {datachain-0.36.0 → datachain-0.36.2}/examples/multimodal/wds.py +0 -0
  122. {datachain-0.36.0 → datachain-0.36.2}/examples/multimodal/wds_filtered.py +0 -0
  123. {datachain-0.36.0 → datachain-0.36.2}/mkdocs.yml +0 -0
  124. {datachain-0.36.0 → datachain-0.36.2}/noxfile.py +0 -0
  125. {datachain-0.36.0 → datachain-0.36.2}/setup.cfg +0 -0
  126. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/__init__.py +0 -0
  127. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/__main__.py +0 -0
  128. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/asyn.py +0 -0
  129. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cache.py +0 -0
  130. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/catalog/__init__.py +0 -0
  131. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/catalog/catalog.py +0 -0
  132. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/catalog/datasource.py +0 -0
  133. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/catalog/dependency.py +0 -0
  134. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/catalog/loader.py +0 -0
  135. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/checkpoint.py +0 -0
  136. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/__init__.py +0 -0
  137. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/commands/__init__.py +0 -0
  138. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/commands/datasets.py +0 -0
  139. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/commands/du.py +0 -0
  140. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/commands/index.py +0 -0
  141. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/commands/ls.py +0 -0
  142. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/commands/misc.py +0 -0
  143. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/commands/query.py +0 -0
  144. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/commands/show.py +0 -0
  145. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/parser/__init__.py +0 -0
  146. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/parser/job.py +0 -0
  147. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/parser/studio.py +0 -0
  148. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/parser/utils.py +0 -0
  149. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/utils.py +0 -0
  150. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/client/__init__.py +0 -0
  151. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/client/azure.py +0 -0
  152. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/client/fileslice.py +0 -0
  153. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/client/fsspec.py +0 -0
  154. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/client/gcs.py +0 -0
  155. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/client/hf.py +0 -0
  156. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/client/http.py +0 -0
  157. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/client/local.py +0 -0
  158. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/client/s3.py +0 -0
  159. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/config.py +0 -0
  160. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/data_storage/__init__.py +0 -0
  161. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/data_storage/db_engine.py +0 -0
  162. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/data_storage/job.py +0 -0
  163. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/data_storage/serializer.py +0 -0
  164. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/dataset.py +0 -0
  165. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/delta.py +0 -0
  166. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/error.py +0 -0
  167. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/fs/__init__.py +0 -0
  168. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/fs/reference.py +0 -0
  169. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/fs/utils.py +0 -0
  170. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/func/__init__.py +0 -0
  171. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/func/aggregate.py +0 -0
  172. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/func/array.py +0 -0
  173. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/func/base.py +0 -0
  174. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/func/conditional.py +0 -0
  175. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/func/func.py +0 -0
  176. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/func/numeric.py +0 -0
  177. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/func/path.py +0 -0
  178. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/func/random.py +0 -0
  179. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/func/string.py +0 -0
  180. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/func/window.py +0 -0
  181. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/hash_utils.py +0 -0
  182. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/job.py +0 -0
  183. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/__init__.py +0 -0
  184. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/arrow.py +0 -0
  185. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/clip.py +0 -0
  186. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/convert/__init__.py +0 -0
  187. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/convert/flatten.py +0 -0
  188. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/convert/python_to_sql.py +0 -0
  189. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/convert/sql_to_python.py +0 -0
  190. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/convert/unflatten.py +0 -0
  191. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  192. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/data_model.py +0 -0
  193. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dataset_info.py +0 -0
  194. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/__init__.py +0 -0
  195. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/csv.py +0 -0
  196. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/database.py +0 -0
  197. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/datasets.py +0 -0
  198. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/hf.py +0 -0
  199. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/json.py +0 -0
  200. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/listings.py +0 -0
  201. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/pandas.py +0 -0
  202. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/parquet.py +0 -0
  203. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/records.py +0 -0
  204. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/storage.py +0 -0
  205. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/storage_pattern.py +0 -0
  206. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/utils.py +0 -0
  207. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/values.py +0 -0
  208. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/file.py +0 -0
  209. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/hf.py +0 -0
  210. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/image.py +0 -0
  211. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/listing.py +0 -0
  212. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/listing_info.py +0 -0
  213. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/meta_formats.py +0 -0
  214. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/model_store.py +0 -0
  215. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/namespaces.py +0 -0
  216. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/projects.py +0 -0
  217. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/pytorch.py +0 -0
  218. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/settings.py +0 -0
  219. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/signal_schema.py +0 -0
  220. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/tar.py +0 -0
  221. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/text.py +0 -0
  222. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/udf.py +0 -0
  223. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/udf_signature.py +0 -0
  224. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/utils.py +0 -0
  225. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/video.py +0 -0
  226. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/webdataset.py +0 -0
  227. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/webdataset_laion.py +0 -0
  228. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/listing.py +0 -0
  229. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/model/__init__.py +0 -0
  230. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/model/bbox.py +0 -0
  231. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/model/pose.py +0 -0
  232. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/model/segment.py +0 -0
  233. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/model/ultralytics/__init__.py +0 -0
  234. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/model/ultralytics/bbox.py +0 -0
  235. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/model/ultralytics/pose.py +0 -0
  236. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/model/ultralytics/segment.py +0 -0
  237. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/model/utils.py +0 -0
  238. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/namespace.py +0 -0
  239. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/node.py +0 -0
  240. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/nodes_fetcher.py +0 -0
  241. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/nodes_thread_pool.py +0 -0
  242. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/plugins.py +0 -0
  243. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/progress.py +0 -0
  244. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/project.py +0 -0
  245. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/py.typed +0 -0
  246. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/query/__init__.py +0 -0
  247. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/query/batch.py +0 -0
  248. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/query/metrics.py +0 -0
  249. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/query/params.py +0 -0
  250. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/query/schema.py +0 -0
  251. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/query/session.py +0 -0
  252. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/query/udf.py +0 -0
  253. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/remote/__init__.py +0 -0
  254. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/remote/studio.py +0 -0
  255. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/script_meta.py +0 -0
  256. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/semver.py +0 -0
  257. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/__init__.py +0 -0
  258. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/default/__init__.py +0 -0
  259. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/default/base.py +0 -0
  260. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/functions/__init__.py +0 -0
  261. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/functions/aggregate.py +0 -0
  262. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/functions/array.py +0 -0
  263. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/functions/conditional.py +0 -0
  264. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/functions/numeric.py +0 -0
  265. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/functions/path.py +0 -0
  266. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/functions/random.py +0 -0
  267. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/functions/string.py +0 -0
  268. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/postgresql_dialect.py +0 -0
  269. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/postgresql_types.py +0 -0
  270. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/selectable.py +0 -0
  271. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/sqlite/__init__.py +0 -0
  272. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/sqlite/base.py +0 -0
  273. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/sqlite/types.py +0 -0
  274. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/sqlite/vector.py +0 -0
  275. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/types.py +0 -0
  276. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/utils.py +0 -0
  277. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/studio.py +0 -0
  278. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/telemetry.py +0 -0
  279. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/toolkit/__init__.py +0 -0
  280. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/toolkit/split.py +0 -0
  281. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/torch/__init__.py +0 -0
  282. {datachain-0.36.0 → datachain-0.36.2}/src/datachain/utils.py +0 -0
  283. {datachain-0.36.0 → datachain-0.36.2}/src/datachain.egg-info/SOURCES.txt +0 -0
  284. {datachain-0.36.0 → datachain-0.36.2}/src/datachain.egg-info/dependency_links.txt +0 -0
  285. {datachain-0.36.0 → datachain-0.36.2}/src/datachain.egg-info/entry_points.txt +0 -0
  286. {datachain-0.36.0 → datachain-0.36.2}/src/datachain.egg-info/top_level.txt +0 -0
  287. {datachain-0.36.0 → datachain-0.36.2}/tests/__init__.py +0 -0
  288. {datachain-0.36.0 → datachain-0.36.2}/tests/benchmarks/__init__.py +0 -0
  289. {datachain-0.36.0 → datachain-0.36.2}/tests/benchmarks/conftest.py +0 -0
  290. {datachain-0.36.0 → datachain-0.36.2}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  291. {datachain-0.36.0 → datachain-0.36.2}/tests/benchmarks/datasets/.dvc/config +0 -0
  292. {datachain-0.36.0 → datachain-0.36.2}/tests/benchmarks/datasets/.gitignore +0 -0
  293. {datachain-0.36.0 → datachain-0.36.2}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  294. {datachain-0.36.0 → datachain-0.36.2}/tests/benchmarks/test_datachain.py +0 -0
  295. {datachain-0.36.0 → datachain-0.36.2}/tests/benchmarks/test_ls.py +0 -0
  296. {datachain-0.36.0 → datachain-0.36.2}/tests/benchmarks/test_version.py +0 -0
  297. {datachain-0.36.0 → datachain-0.36.2}/tests/conftest.py +0 -0
  298. {datachain-0.36.0 → datachain-0.36.2}/tests/data.py +0 -0
  299. {datachain-0.36.0 → datachain-0.36.2}/tests/examples/__init__.py +0 -0
  300. {datachain-0.36.0 → datachain-0.36.2}/tests/examples/test_examples.py +0 -0
  301. {datachain-0.36.0 → datachain-0.36.2}/tests/examples/test_wds_e2e.py +0 -0
  302. {datachain-0.36.0 → datachain-0.36.2}/tests/examples/wds_data.py +0 -0
  303. {datachain-0.36.0 → datachain-0.36.2}/tests/func/__init__.py +0 -0
  304. {datachain-0.36.0 → datachain-0.36.2}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  305. {datachain-0.36.0 → datachain-0.36.2}/tests/func/data/lena.jpg +0 -0
  306. {datachain-0.36.0 → datachain-0.36.2}/tests/func/fake-service-account-credentials.json +0 -0
  307. {datachain-0.36.0 → datachain-0.36.2}/tests/func/functions/__init__.py +0 -0
  308. {datachain-0.36.0 → datachain-0.36.2}/tests/func/functions/test_aggregate.py +0 -0
  309. {datachain-0.36.0 → datachain-0.36.2}/tests/func/functions/test_array.py +0 -0
  310. {datachain-0.36.0 → datachain-0.36.2}/tests/func/functions/test_conditional.py +0 -0
  311. {datachain-0.36.0 → datachain-0.36.2}/tests/func/functions/test_numeric.py +0 -0
  312. {datachain-0.36.0 → datachain-0.36.2}/tests/func/functions/test_path.py +0 -0
  313. {datachain-0.36.0 → datachain-0.36.2}/tests/func/functions/test_random.py +0 -0
  314. {datachain-0.36.0 → datachain-0.36.2}/tests/func/functions/test_string.py +0 -0
  315. {datachain-0.36.0 → datachain-0.36.2}/tests/func/model/__init__.py +0 -0
  316. {datachain-0.36.0 → datachain-0.36.2}/tests/func/model/data/running-mask0.png +0 -0
  317. {datachain-0.36.0 → datachain-0.36.2}/tests/func/model/data/running-mask1.png +0 -0
  318. {datachain-0.36.0 → datachain-0.36.2}/tests/func/model/data/running.jpg +0 -0
  319. {datachain-0.36.0 → datachain-0.36.2}/tests/func/model/data/ships.jpg +0 -0
  320. {datachain-0.36.0 → datachain-0.36.2}/tests/func/model/test_yolo.py +0 -0
  321. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_audio.py +0 -0
  322. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_catalog.py +0 -0
  323. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_client.py +0 -0
  324. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_cloud_transfer.py +0 -0
  325. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_data_storage.py +0 -0
  326. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_dataset_query.py +0 -0
  327. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_datasets.py +0 -0
  328. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_delta.py +0 -0
  329. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_feature_pickling.py +0 -0
  330. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_file.py +0 -0
  331. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_hf.py +0 -0
  332. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_hidden_field.py +0 -0
  333. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_image.py +0 -0
  334. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_listing.py +0 -0
  335. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_ls.py +0 -0
  336. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_meta_formats.py +0 -0
  337. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_metastore.py +0 -0
  338. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_metrics.py +0 -0
  339. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_mutate.py +0 -0
  340. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_pull.py +0 -0
  341. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_pytorch.py +0 -0
  342. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_query.py +0 -0
  343. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_read_database.py +0 -0
  344. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_read_dataset_remote.py +0 -0
  345. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  346. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_session.py +0 -0
  347. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_storage_pattern.py +0 -0
  348. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_studio_datetime_parsing.py +0 -0
  349. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_to_database.py +0 -0
  350. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_toolkit.py +0 -0
  351. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_union.py +0 -0
  352. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_video.py +0 -0
  353. {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_warehouse.py +0 -0
  354. {datachain-0.36.0 → datachain-0.36.2}/tests/scripts/feature_class.py +0 -0
  355. {datachain-0.36.0 → datachain-0.36.2}/tests/scripts/feature_class_exception.py +0 -0
  356. {datachain-0.36.0 → datachain-0.36.2}/tests/scripts/feature_class_parallel.py +0 -0
  357. {datachain-0.36.0 → datachain-0.36.2}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  358. {datachain-0.36.0 → datachain-0.36.2}/tests/scripts/name_len_slow.py +0 -0
  359. {datachain-0.36.0 → datachain-0.36.2}/tests/test_atomicity.py +0 -0
  360. {datachain-0.36.0 → datachain-0.36.2}/tests/test_cli_e2e.py +0 -0
  361. {datachain-0.36.0 → datachain-0.36.2}/tests/test_cli_studio.py +0 -0
  362. {datachain-0.36.0 → datachain-0.36.2}/tests/test_import_time.py +0 -0
  363. {datachain-0.36.0 → datachain-0.36.2}/tests/test_query_e2e.py +0 -0
  364. {datachain-0.36.0 → datachain-0.36.2}/tests/test_telemetry.py +0 -0
  365. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/__init__.py +0 -0
  366. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/__init__.py +0 -0
  367. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/conftest.py +0 -0
  368. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_arrow.py +0 -0
  369. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_checkpoints.py +0 -0
  370. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_clip.py +0 -0
  371. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  372. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_datachain_merge.py +0 -0
  373. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_diff.py +0 -0
  374. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_feature.py +0 -0
  375. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_feature_utils.py +0 -0
  376. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_file.py +0 -0
  377. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_hf.py +0 -0
  378. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_image.py +0 -0
  379. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_listing_info.py +0 -0
  380. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_namespace.py +0 -0
  381. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_partition_by.py +0 -0
  382. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_project.py +0 -0
  383. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_python_to_sql.py +0 -0
  384. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_schema.py +0 -0
  385. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_settings.py +0 -0
  386. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_signal_schema.py +0 -0
  387. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_sql_to_python.py +0 -0
  388. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_storage_pattern.py +0 -0
  389. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_text.py +0 -0
  390. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_udf.py +0 -0
  391. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_udf_signature.py +0 -0
  392. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_utils.py +0 -0
  393. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_webdataset.py +0 -0
  394. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/model/__init__.py +0 -0
  395. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/model/test_bbox.py +0 -0
  396. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/model/test_pose.py +0 -0
  397. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/model/test_segment.py +0 -0
  398. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/model/test_utils.py +0 -0
  399. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/sql/__init__.py +0 -0
  400. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/sql/sqlite/__init__.py +0 -0
  401. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/sql/sqlite/test_types.py +0 -0
  402. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/sql/sqlite/test_utils.py +0 -0
  403. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/sql/test_array.py +0 -0
  404. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/sql/test_conditional.py +0 -0
  405. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/sql/test_path.py +0 -0
  406. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/sql/test_random.py +0 -0
  407. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/sql/test_selectable.py +0 -0
  408. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/sql/test_string.py +0 -0
  409. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_asyn.py +0 -0
  410. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_batching.py +0 -0
  411. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_cache.py +0 -0
  412. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_catalog.py +0 -0
  413. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_catalog_loader.py +0 -0
  414. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_cli_datasets.py +0 -0
  415. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_cli_parsing.py +0 -0
  416. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_client.py +0 -0
  417. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_client_gcs.py +0 -0
  418. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_client_http.py +0 -0
  419. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_client_s3.py +0 -0
  420. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_config.py +0 -0
  421. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_data_storage.py +0 -0
  422. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_database_engine.py +0 -0
  423. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_dataset.py +0 -0
  424. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_dispatch.py +0 -0
  425. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_fileslice.py +0 -0
  426. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_func.py +0 -0
  427. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_hash_utils.py +0 -0
  428. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_listing.py +0 -0
  429. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_metastore.py +0 -0
  430. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_module_exports.py +0 -0
  431. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_pytorch.py +0 -0
  432. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_query.py +0 -0
  433. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_query_metrics.py +0 -0
  434. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_query_params.py +0 -0
  435. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_query_steps_hash.py +0 -0
  436. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_script_meta.py +0 -0
  437. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_semver.py +0 -0
  438. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_serializer.py +0 -0
  439. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_session.py +0 -0
  440. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_utils.py +0 -0
  441. {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_warehouse.py +0 -0
  442. {datachain-0.36.0 → datachain-0.36.2}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.36.0
3
+ Version: 0.36.2
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -64,7 +64,6 @@ Requires-Dist: torch>=2.1.0; extra == "torch"
64
64
  Requires-Dist: torchvision; extra == "torch"
65
65
  Requires-Dist: transformers>=4.36.0; extra == "torch"
66
66
  Provides-Extra: audio
67
- Requires-Dist: torchaudio; extra == "audio"
68
67
  Requires-Dist: soundfile; extra == "audio"
69
68
  Provides-Extra: remote
70
69
  Requires-Dist: lz4; extra == "remote"
@@ -76,6 +75,7 @@ Requires-Dist: numba>=0.60.0; extra == "hf"
76
75
  Requires-Dist: datasets[vision]>=4.0.0; extra == "hf"
77
76
  Requires-Dist: datasets[audio]>=4.0.0; (sys_platform == "linux" or sys_platform == "darwin") and extra == "hf"
78
77
  Requires-Dist: fsspec>=2024.12.0; extra == "hf"
78
+ Requires-Dist: torch<2.9.0; extra == "hf"
79
79
  Provides-Extra: video
80
80
  Requires-Dist: ffmpeg-python; extra == "video"
81
81
  Requires-Dist: imageio[ffmpeg,pyav]>=2.37.0; extra == "video"
@@ -117,6 +117,7 @@ Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
117
117
  Requires-Dist: ultralytics; extra == "examples"
118
118
  Requires-Dist: open_clip_torch; extra == "examples"
119
119
  Requires-Dist: openai; extra == "examples"
120
+ Requires-Dist: torchaudio<2.9.0; extra == "examples"
120
121
  Dynamic: license-file
121
122
 
122
123
  ================
@@ -73,7 +73,6 @@ torch = [
73
73
  "transformers>=4.36.0"
74
74
  ]
75
75
  audio = [
76
- "torchaudio",
77
76
  "soundfile"
78
77
  ]
79
78
  remote = [
@@ -88,7 +87,11 @@ hf = [
88
87
  "datasets[vision]>=4.0.0",
89
88
  # https://github.com/pytorch/torchcodec/issues/640
90
89
  "datasets[audio]>=4.0.0 ; (sys_platform == 'linux' or sys_platform == 'darwin')",
91
- "fsspec>=2024.12.0"
90
+ "fsspec>=2024.12.0",
91
+ # Until datasets solve the issue, run test_hf_audio test to see if this can be removed
92
+ # https://github.com/meta-pytorch/torchcodec/issues/912
93
+ # https://github.com/huggingface/transformers/pull/41610
94
+ "torch<2.9.0"
92
95
  ]
93
96
  video = [
94
97
  "ffmpeg-python",
@@ -134,7 +137,9 @@ examples = [
134
137
  "huggingface_hub[hf_transfer]",
135
138
  "ultralytics",
136
139
  "open_clip_torch",
137
- "openai"
140
+ "openai",
141
+ # Transformers still require it
142
+ "torchaudio<2.9.0"
138
143
  ]
139
144
 
140
145
  [project.urls]
@@ -56,13 +56,15 @@ from datachain.project import Project
56
56
  from datachain.utils import JSONSerialize
57
57
 
58
58
  if TYPE_CHECKING:
59
- from sqlalchemy import Delete, Insert, Select, Update
59
+ from sqlalchemy import CTE, Delete, Insert, Select, Subquery, Update
60
60
  from sqlalchemy.schema import SchemaItem
61
+ from sqlalchemy.sql.elements import ColumnElement
61
62
 
62
63
  from datachain.data_storage import schema
63
64
  from datachain.data_storage.db_engine import DatabaseEngine
64
65
 
65
66
  logger = logging.getLogger("datachain")
67
+ DEPTH_LIMIT_DEFAULT = 100
66
68
 
67
69
 
68
70
  class AbstractMetastore(ABC, Serializable):
@@ -1463,6 +1465,18 @@ class AbstractDBMetastore(AbstractMetastore):
1463
1465
  Returns a list of columns to select in a query for fetching dataset dependencies
1464
1466
  """
1465
1467
 
1468
+ @abstractmethod
1469
+ def _dataset_dependency_nodes_select_columns(
1470
+ self,
1471
+ namespaces_subquery: "Subquery",
1472
+ dependency_tree_cte: "CTE",
1473
+ datasets_subquery: "Subquery",
1474
+ ) -> list["ColumnElement"]:
1475
+ """
1476
+ Returns a list of columns to select in a query for fetching
1477
+ dataset dependency nodes.
1478
+ """
1479
+
1466
1480
  def get_direct_dataset_dependencies(
1467
1481
  self, dataset: DatasetRecord, version: str
1468
1482
  ) -> list[DatasetDependency | None]:
@@ -1493,7 +1507,7 @@ class AbstractDBMetastore(AbstractMetastore):
1493
1507
  return [self.dependency_class.parse(*r) for r in self.db.execute(query)]
1494
1508
 
1495
1509
  def get_dataset_dependency_nodes(
1496
- self, dataset_id: int, version_id: int
1510
+ self, dataset_id: int, version_id: int, depth_limit: int = DEPTH_LIMIT_DEFAULT
1497
1511
  ) -> list[DatasetDependencyNode | None]:
1498
1512
  n = self._namespaces_select().subquery()
1499
1513
  p = self._projects
@@ -1522,33 +1536,31 @@ class AbstractDBMetastore(AbstractMetastore):
1522
1536
  cte = base_query.cte(name="dependency_tree", recursive=True)
1523
1537
 
1524
1538
  # Recursive case: dependencies of dependencies
1525
- recursive_query = select(
1526
- *dep_fields,
1527
- (cte.c.depth + 1).label("depth"),
1528
- ).select_from(
1529
- cte.join(
1530
- dd,
1531
- (cte.c.dataset_id == dd.c.source_dataset_id)
1532
- & (cte.c.dataset_version_id == dd.c.source_dataset_version_id),
1539
+ # Limit depth to 100 to prevent infinite loops in case of circular dependencies
1540
+ recursive_query = (
1541
+ select(
1542
+ *dep_fields,
1543
+ (cte.c.depth + 1).label("depth"),
1533
1544
  )
1545
+ .select_from(
1546
+ cte.join(
1547
+ dd,
1548
+ (cte.c.dataset_id == dd.c.source_dataset_id)
1549
+ & (cte.c.dataset_version_id == dd.c.source_dataset_version_id),
1550
+ )
1551
+ )
1552
+ .where(cte.c.depth < depth_limit)
1534
1553
  )
1535
1554
 
1536
1555
  cte = cte.union(recursive_query)
1537
1556
 
1538
1557
  # Fetch all with full details
1539
- final_query = select(
1540
- n.c.name,
1541
- p.c.name,
1542
- cte.c.id,
1543
- cte.c.dataset_id,
1544
- cte.c.dataset_version_id,
1545
- d.c.name,
1546
- dv.c.version,
1547
- dv.c.created_at,
1548
- cte.c.source_dataset_id,
1549
- cte.c.source_dataset_version_id,
1550
- cte.c.depth,
1551
- ).select_from(
1558
+ select_cols = self._dataset_dependency_nodes_select_columns(
1559
+ namespaces_subquery=n,
1560
+ dependency_tree_cte=cte,
1561
+ datasets_subquery=d,
1562
+ )
1563
+ final_query = self._datasets_dependencies_select(*select_cols).select_from(
1552
1564
  # Use outer joins to handle cases where dependent datasets have been
1553
1565
  # physically deleted. This allows us to return dependency records with
1554
1566
  # None values instead of silently omitting them, making broken
@@ -11,7 +11,6 @@ from datachain.sql.types import (
11
11
  JSON,
12
12
  Boolean,
13
13
  DateTime,
14
- Int,
15
14
  Int64,
16
15
  SQLType,
17
16
  String,
@@ -269,7 +268,7 @@ class DataTable:
269
268
  @classmethod
270
269
  def sys_columns(cls):
271
270
  return [
272
- sa.Column("sys__id", Int, primary_key=True),
271
+ sa.Column("sys__id", UInt64, primary_key=True),
273
272
  sa.Column(
274
273
  "sys__rand", UInt64, nullable=False, server_default=f.abs(f.random())
275
274
  ),
@@ -20,7 +20,10 @@ from sqlalchemy import (
20
20
  from sqlalchemy.dialects import sqlite
21
21
  from sqlalchemy.schema import CreateIndex, CreateTable, DropTable
22
22
  from sqlalchemy.sql import func
23
- from sqlalchemy.sql.elements import BinaryExpression, BooleanClauseList
23
+ from sqlalchemy.sql.elements import (
24
+ BinaryExpression,
25
+ BooleanClauseList,
26
+ )
24
27
  from sqlalchemy.sql.expression import bindparam, cast
25
28
  from sqlalchemy.sql.selectable import Select
26
29
  from tqdm.auto import tqdm
@@ -41,6 +44,7 @@ from datachain.sql.types import SQLType
41
44
  from datachain.utils import DataChainDir, batched, batched_it
42
45
 
43
46
  if TYPE_CHECKING:
47
+ from sqlalchemy import CTE, Subquery
44
48
  from sqlalchemy.dialects.sqlite import Insert
45
49
  from sqlalchemy.engine.base import Engine
46
50
  from sqlalchemy.schema import SchemaItem
@@ -539,6 +543,26 @@ class SQLiteMetastore(AbstractDBMetastore):
539
543
  self._datasets_versions.c.created_at,
540
544
  ]
541
545
 
546
+ def _dataset_dependency_nodes_select_columns(
547
+ self,
548
+ namespaces_subquery: "Subquery",
549
+ dependency_tree_cte: "CTE",
550
+ datasets_subquery: "Subquery",
551
+ ) -> list["ColumnElement"]:
552
+ return [
553
+ namespaces_subquery.c.name,
554
+ self._projects.c.name,
555
+ dependency_tree_cte.c.id,
556
+ dependency_tree_cte.c.dataset_id,
557
+ dependency_tree_cte.c.dataset_version_id,
558
+ datasets_subquery.c.name,
559
+ self._datasets_versions.c.version,
560
+ self._datasets_versions.c.created_at,
561
+ dependency_tree_cte.c.source_dataset_id,
562
+ dependency_tree_cte.c.source_dataset_version_id,
563
+ dependency_tree_cte.c.depth,
564
+ ]
565
+
542
566
  #
543
567
  # Jobs
544
568
  #
@@ -868,11 +892,8 @@ class SQLiteWarehouse(AbstractWarehouse):
868
892
  if isinstance(c, BinaryExpression):
869
893
  right_left_join = add_left_rows_filter(c)
870
894
 
871
- # Use CTE instead of subquery to force SQLite to materialize the result
872
- # This breaks deep nesting and prevents parser stack overflow.
873
895
  union_cte = sqlalchemy.union(left_right_join, right_left_join).cte()
874
-
875
- return self._regenerate_system_columns(union_cte)
896
+ return sqlalchemy.select(*union_cte.c).select_from(union_cte)
876
897
 
877
898
  def _system_row_number_expr(self):
878
899
  return func.row_number().over()
@@ -884,11 +905,7 @@ class SQLiteWarehouse(AbstractWarehouse):
884
905
  """
885
906
  Create a temporary table from a query for use in a UDF.
886
907
  """
887
- columns = [
888
- sqlalchemy.Column(c.name, c.type)
889
- for c in query.selected_columns
890
- if c.name != "sys__id"
891
- ]
908
+ columns = [sqlalchemy.Column(c.name, c.type) for c in query.selected_columns]
892
909
  table = self.create_udf_table(columns)
893
910
 
894
911
  with tqdm(desc="Preparing", unit=" rows", leave=False) as pbar:
@@ -5,7 +5,7 @@ import random
5
5
  import string
6
6
  from abc import ABC, abstractmethod
7
7
  from collections.abc import Callable, Generator, Iterable, Iterator, Sequence
8
- from typing import TYPE_CHECKING, Any, Union
8
+ from typing import TYPE_CHECKING, Any, Union, cast
9
9
  from urllib.parse import urlparse
10
10
 
11
11
  import attrs
@@ -23,7 +23,7 @@ from datachain.node import DirType, DirTypeGroup, Node, NodeWithPath, get_path
23
23
  from datachain.query.batch import RowsOutput
24
24
  from datachain.query.schema import ColumnMeta
25
25
  from datachain.sql.functions import path as pathfunc
26
- from datachain.sql.types import Int, SQLType
26
+ from datachain.sql.types import SQLType
27
27
  from datachain.utils import sql_escape_like
28
28
 
29
29
  if TYPE_CHECKING:
@@ -32,6 +32,7 @@ if TYPE_CHECKING:
32
32
  _FromClauseArgument,
33
33
  _OnClauseArgument,
34
34
  )
35
+ from sqlalchemy.sql.selectable import FromClause
35
36
  from sqlalchemy.types import TypeEngine
36
37
 
37
38
  from datachain.data_storage import schema
@@ -248,45 +249,56 @@ class AbstractWarehouse(ABC, Serializable):
248
249
 
249
250
  def _regenerate_system_columns(
250
251
  self,
251
- selectable: sa.Select | sa.CTE,
252
+ selectable: sa.Select,
252
253
  keep_existing_columns: bool = False,
254
+ regenerate_columns: Iterable[str] | None = None,
253
255
  ) -> sa.Select:
254
256
  """
255
- Return a SELECT that regenerates sys__id and sys__rand deterministically.
257
+ Return a SELECT that regenerates system columns deterministically.
256
258
 
257
- If keep_existing_columns is True, existing sys__id and sys__rand columns
258
- will be kept as-is if they exist in the input selectable.
259
- """
260
- base = selectable.subquery() if hasattr(selectable, "subquery") else selectable
261
-
262
- result_columns: dict[str, sa.ColumnElement] = {}
263
- for col in base.c:
264
- if col.name in result_columns:
265
- raise ValueError(f"Duplicate column name {col.name} in SELECT")
266
- if col.name in ("sys__id", "sys__rand"):
267
- if keep_existing_columns:
268
- result_columns[col.name] = col
269
- else:
270
- result_columns[col.name] = col
259
+ If keep_existing_columns is True, existing system columns will be kept as-is
260
+ even when they are listed in ``regenerate_columns``.
271
261
 
272
- system_types: dict[str, sa.types.TypeEngine] = {
262
+ Args:
263
+ selectable: Base SELECT
264
+ keep_existing_columns: When True, reuse existing system columns even if
265
+ they are part of the regeneration set.
266
+ regenerate_columns: Names of system columns to regenerate. Defaults to
267
+ {"sys__id", "sys__rand"}. Columns not listed are left untouched.
268
+ """
269
+ system_columns = {
273
270
  sys_col.name: sys_col.type
274
271
  for sys_col in self.schema.dataset_row_cls.sys_columns()
275
272
  }
273
+ regenerate = set(regenerate_columns or system_columns)
274
+ generators = {
275
+ "sys__id": self._system_row_number_expr,
276
+ "sys__rand": self._system_random_expr,
277
+ }
278
+
279
+ base = cast("FromClause", selectable.subquery())
280
+
281
+ def build(name: str) -> sa.ColumnElement:
282
+ expr = generators[name]()
283
+ return sa.cast(expr, system_columns[name]).label(name)
284
+
285
+ columns: list[sa.ColumnElement] = []
286
+ present: set[str] = set()
287
+ changed = False
288
+
289
+ for col in base.c:
290
+ present.add(col.name)
291
+ regen = col.name in regenerate and not keep_existing_columns
292
+ columns.append(build(col.name) if regen else col)
293
+ changed |= regen
294
+
295
+ for name in regenerate - present:
296
+ columns.append(build(name))
297
+ changed = True
298
+
299
+ if not changed:
300
+ return selectable
276
301
 
277
- # Add missing system columns if needed
278
- if "sys__id" not in result_columns:
279
- expr = self._system_row_number_expr()
280
- expr = sa.cast(expr, system_types["sys__id"])
281
- result_columns["sys__id"] = expr.label("sys__id")
282
- if "sys__rand" not in result_columns:
283
- expr = self._system_random_expr()
284
- expr = sa.cast(expr, system_types["sys__rand"])
285
- result_columns["sys__rand"] = expr.label("sys__rand")
286
-
287
- # Wrap in subquery to materialize window functions, then wrap again in SELECT
288
- # This ensures window functions are computed before INSERT...FROM SELECT
289
- columns = list(result_columns.values())
290
302
  inner = sa.select(*columns).select_from(base).subquery()
291
303
  return sa.select(*inner.c).select_from(inner)
292
304
 
@@ -950,10 +962,15 @@ class AbstractWarehouse(ABC, Serializable):
950
962
  SQLite TEMPORARY tables cannot be directly used as they are process-specific,
951
963
  and UDFs are run in other processes when run in parallel.
952
964
  """
965
+ columns = [
966
+ c
967
+ for c in columns
968
+ if c.name not in [col.name for col in self.dataset_row_cls.sys_columns()]
969
+ ]
953
970
  tbl = sa.Table(
954
971
  name or self.udf_table_name(),
955
972
  sa.MetaData(),
956
- sa.Column("sys__id", Int, primary_key=True),
973
+ *self.dataset_row_cls.sys_columns(),
957
974
  *columns,
958
975
  )
959
976
  self.db.create_table(tbl, if_not_exists=True)
@@ -24,7 +24,7 @@ class CompareStatus(str, Enum):
24
24
  SAME = "S"
25
25
 
26
26
 
27
- def _compare( # noqa: C901, PLR0912
27
+ def _compare( # noqa: C901
28
28
  left: "DataChain",
29
29
  right: "DataChain",
30
30
  on: str | Sequence[str],
@@ -151,11 +151,7 @@ def _compare( # noqa: C901, PLR0912
151
151
  if status_col:
152
152
  cols_select.append(diff_col)
153
153
 
154
- if not dc_diff._sys:
155
- # TODO workaround when sys signal is not available in diff
156
- dc_diff = dc_diff.settings(sys=True).select(*cols_select).settings(sys=False)
157
- else:
158
- dc_diff = dc_diff.select(*cols_select)
154
+ dc_diff = dc_diff.select(*cols_select)
159
155
 
160
156
  # final schema is schema from the left chain with status column added if needed
161
157
  dc_diff.signals_schema = (
@@ -1,4 +1,5 @@
1
1
  import posixpath
2
+ import re
2
3
  from typing import TYPE_CHECKING
3
4
 
4
5
  from datachain.lib.file import FileError
@@ -9,7 +10,7 @@ if TYPE_CHECKING:
9
10
  from datachain.lib.file import Audio, AudioFile, File
10
11
 
11
12
  try:
12
- import torchaudio
13
+ import soundfile as sf
13
14
  except ImportError as exc:
14
15
  raise ImportError(
15
16
  "Missing dependencies for processing audio.\n"
@@ -26,18 +27,25 @@ def audio_info(file: "File | AudioFile") -> "Audio":
26
27
 
27
28
  try:
28
29
  with file.open() as f:
29
- info = torchaudio.info(f)
30
+ info = sf.info(f)
31
+
32
+ sample_rate = int(info.samplerate)
33
+ channels = int(info.channels)
34
+ frames = int(info.frames)
35
+ duration = float(info.duration)
30
36
 
31
- sample_rate = int(info.sample_rate)
32
- channels = int(info.num_channels)
33
- frames = int(info.num_frames)
34
- duration = float(frames / sample_rate) if sample_rate > 0 else 0.0
37
+ # soundfile provides format and subtype
38
+ if info.format:
39
+ format_name = info.format.lower()
40
+ else:
41
+ format_name = file.get_file_ext().lower()
35
42
 
36
- codec_name = getattr(info, "encoding", "")
37
- file_ext = file.get_file_ext().lower()
38
- format_name = _encoding_to_format(codec_name, file_ext)
43
+ if not format_name:
44
+ format_name = "unknown"
45
+ codec_name = info.subtype if info.subtype else ""
39
46
 
40
- bits_per_sample = getattr(info, "bits_per_sample", 0)
47
+ # Calculate bit rate from subtype
48
+ bits_per_sample = _get_bits_per_sample(info.subtype)
41
49
  bit_rate = (
42
50
  bits_per_sample * sample_rate * channels if bits_per_sample > 0 else -1
43
51
  )
@@ -58,44 +66,39 @@ def audio_info(file: "File | AudioFile") -> "Audio":
58
66
  )
59
67
 
60
68
 
61
- def _encoding_to_format(encoding: str, file_ext: str) -> str:
69
+ def _get_bits_per_sample(subtype: str) -> int:
62
70
  """
63
- Map torchaudio encoding to a format name.
71
+ Map soundfile subtype to bits per sample.
64
72
 
65
73
  Args:
66
- encoding: The encoding string from torchaudio.info()
67
- file_ext: The file extension as a fallback
74
+ subtype: The subtype string from soundfile
68
75
 
69
76
  Returns:
70
- Format name as a string
77
+ Bits per sample, or 0 if unknown
71
78
  """
72
- # Direct mapping for formats that match exactly
73
- encoding_map = {
74
- "FLAC": "flac",
75
- "MP3": "mp3",
76
- "VORBIS": "ogg",
77
- "AMR_WB": "amr",
78
- "AMR_NB": "amr",
79
- "OPUS": "opus",
80
- "GSM": "gsm",
79
+ if not subtype:
80
+ return 0
81
+
82
+ # Common PCM and floating-point subtypes
83
+ pcm_bits = {
84
+ "PCM_16": 16,
85
+ "PCM_24": 24,
86
+ "PCM_32": 32,
87
+ "PCM_S8": 8,
88
+ "PCM_U8": 8,
89
+ "FLOAT": 32,
90
+ "DOUBLE": 64,
81
91
  }
82
92
 
83
- if encoding in encoding_map:
84
- return encoding_map[encoding]
93
+ if subtype in pcm_bits:
94
+ return pcm_bits[subtype]
85
95
 
86
- # For PCM variants, use file extension to determine format
87
- if encoding.startswith("PCM_"):
88
- # Common PCM formats by extension
89
- pcm_formats = {
90
- "wav": "wav",
91
- "aiff": "aiff",
92
- "au": "au",
93
- "raw": "raw",
94
- }
95
- return pcm_formats.get(file_ext, "wav") # Default to wav for PCM
96
+ # Handle variants such as PCM_S16LE, PCM_F32LE, etc.
97
+ match = re.search(r"PCM_(?:[A-Z]*?)(\d+)", subtype)
98
+ if match:
99
+ return int(match.group(1))
96
100
 
97
- # Fallback to file extension if encoding is unknown
98
- return file_ext if file_ext else "unknown"
101
+ return 0
99
102
 
100
103
 
101
104
  def audio_to_np(
@@ -114,27 +117,27 @@ def audio_to_np(
114
117
 
115
118
  try:
116
119
  with audio.open() as f:
117
- info = torchaudio.info(f)
118
- sample_rate = info.sample_rate
120
+ info = sf.info(f)
121
+ sample_rate = info.samplerate
119
122
 
120
123
  frame_offset = int(start * sample_rate)
121
124
  num_frames = int(duration * sample_rate) if duration is not None else -1
122
125
 
123
126
  # Reset file pointer to the beginning
124
- # This is important to ensure we read from the correct position later
125
127
  f.seek(0)
126
128
 
127
- waveform, sr = torchaudio.load(
128
- f, frame_offset=frame_offset, num_frames=num_frames
129
+ # Read audio data with offset and frame count
130
+ audio_np, sr = sf.read(
131
+ f,
132
+ start=frame_offset,
133
+ frames=num_frames,
134
+ always_2d=False,
135
+ dtype="float32",
129
136
  )
130
137
 
131
- audio_np = waveform.numpy()
132
-
133
- if audio_np.shape[0] > 1:
134
- audio_np = audio_np.T
135
- else:
136
- audio_np = audio_np.squeeze()
137
-
138
+ # soundfile returns shape (frames,) for mono or
139
+ # (frames, channels) for multi-channel
140
+ # We keep this format as it matches expected output
138
141
  return audio_np, int(sr)
139
142
  except Exception as exc:
140
143
  raise FileError(
@@ -152,11 +155,9 @@ def audio_to_bytes(
152
155
 
153
156
  If duration is None, converts from start to end of file.
154
157
  If start is 0 and duration is None, converts entire file."""
155
- y, sr = audio_to_np(audio, start, duration)
156
-
157
158
  import io
158
159
 
159
- import soundfile as sf
160
+ y, sr = audio_to_np(audio, start, duration)
160
161
 
161
162
  buffer = io.BytesIO()
162
163
  sf.write(buffer, y, sr, format=format)
@@ -856,7 +856,9 @@ class DataChain:
856
856
  udf_obj.to_udf_wrapper(self._settings.batch_size),
857
857
  **self._settings.to_dict(),
858
858
  ),
859
- signal_schema=self.signals_schema | udf_obj.output,
859
+ signal_schema=SignalSchema({"sys": Sys})
860
+ | self.signals_schema
861
+ | udf_obj.output,
860
862
  )
861
863
 
862
864
  def gen(
@@ -894,7 +896,7 @@ class DataChain:
894
896
  udf_obj.to_udf_wrapper(self._settings.batch_size),
895
897
  **self._settings.to_dict(),
896
898
  ),
897
- signal_schema=udf_obj.output,
899
+ signal_schema=SignalSchema({"sys": Sys}) | udf_obj.output,
898
900
  )
899
901
 
900
902
  @delta_disabled
@@ -1031,7 +1033,7 @@ class DataChain:
1031
1033
  partition_by=processed_partition_by,
1032
1034
  **self._settings.to_dict(),
1033
1035
  ),
1034
- signal_schema=udf_obj.output,
1036
+ signal_schema=SignalSchema({"sys": Sys}) | udf_obj.output,
1035
1037
  )
1036
1038
 
1037
1039
  def batch_map(
@@ -1097,11 +1099,7 @@ class DataChain:
1097
1099
  sign = UdfSignature.parse(name, signal_map, func, params, output, is_generator)
1098
1100
  DataModel.register(list(sign.output_schema.values.values()))
1099
1101
 
1100
- signals_schema = self.signals_schema
1101
- if self._sys:
1102
- signals_schema = SignalSchema({"sys": Sys}) | signals_schema
1103
-
1104
- params_schema = signals_schema.slice(
1102
+ params_schema = self.signals_schema.slice(
1105
1103
  sign.params, self._setup, is_batch=is_batch
1106
1104
  )
1107
1105
 
@@ -1156,11 +1154,9 @@ class DataChain:
1156
1154
  )
1157
1155
  )
1158
1156
 
1159
- def select(self, *args: str, _sys: bool = True) -> "Self":
1157
+ def select(self, *args: str) -> "Self":
1160
1158
  """Select only a specified set of signals."""
1161
1159
  new_schema = self.signals_schema.resolve(*args)
1162
- if self._sys and _sys:
1163
- new_schema = SignalSchema({"sys": Sys}) | new_schema
1164
1160
  columns = new_schema.db_signals()
1165
1161
  return self._evolve(
1166
1162
  query=self._query.select(*columns), signal_schema=new_schema
@@ -1710,9 +1706,11 @@ class DataChain:
1710
1706
 
1711
1707
  signals_schema = self.signals_schema.clone_without_sys_signals()
1712
1708
  right_signals_schema = right_ds.signals_schema.clone_without_sys_signals()
1713
- ds.signals_schema = SignalSchema({"sys": Sys}) | signals_schema.merge(
1714
- right_signals_schema, rname
1715
- )
1709
+
1710
+ ds.signals_schema = signals_schema.merge(right_signals_schema, rname)
1711
+
1712
+ if not full:
1713
+ ds.signals_schema = SignalSchema({"sys": Sys}) | ds.signals_schema
1716
1714
 
1717
1715
  return ds
1718
1716
 
@@ -1723,6 +1721,7 @@ class DataChain:
1723
1721
  Parameters:
1724
1722
  other: chain whose rows will be added to `self`.
1725
1723
  """
1724
+ self.signals_schema = self.signals_schema.clone_without_sys_signals()
1726
1725
  return self._evolve(query=self._query.union(other._query))
1727
1726
 
1728
1727
  def subtract( # type: ignore[override]