datachain 0.35.2__tar.gz → 0.36.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (442) hide show
  1. {datachain-0.35.2 → datachain-0.36.1}/.github/workflows/benchmarks.yml +1 -1
  2. {datachain-0.35.2 → datachain-0.36.1}/.github/workflows/release.yml +1 -1
  3. {datachain-0.35.2 → datachain-0.36.1}/.github/workflows/tests-studio.yml +1 -1
  4. {datachain-0.35.2 → datachain-0.36.1}/.github/workflows/tests.yml +4 -4
  5. {datachain-0.35.2 → datachain-0.36.1}/.pre-commit-config.yaml +1 -1
  6. {datachain-0.35.2 → datachain-0.36.1}/PKG-INFO +3 -2
  7. {datachain-0.35.2 → datachain-0.36.1}/pyproject.toml +8 -3
  8. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/catalog/catalog.py +45 -20
  9. datachain-0.36.1/src/datachain/catalog/dependency.py +164 -0
  10. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/data_storage/metastore.py +80 -0
  11. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/data_storage/schema.py +1 -2
  12. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/data_storage/sqlite.py +2 -9
  13. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/data_storage/warehouse.py +50 -33
  14. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/diff/__init__.py +2 -6
  15. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/audio.py +54 -53
  16. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/dc/datachain.py +13 -14
  17. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/query/dataset.py +21 -26
  18. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/query/dispatch.py +64 -42
  19. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/query/queue.py +2 -1
  20. {datachain-0.35.2 → datachain-0.36.1}/src/datachain.egg-info/PKG-INFO +3 -2
  21. {datachain-0.35.2 → datachain-0.36.1}/src/datachain.egg-info/SOURCES.txt +1 -0
  22. {datachain-0.35.2 → datachain-0.36.1}/src/datachain.egg-info/requires.txt +2 -1
  23. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_datachain.py +1 -1
  24. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_datachain_merge.py +7 -18
  25. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_retry.py +0 -1
  26. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_udf.py +116 -0
  27. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_audio.py +31 -37
  28. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_datachain.py +15 -13
  29. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_datachain_hash.py +1 -1
  30. {datachain-0.35.2 → datachain-0.36.1}/.cruft.json +0 -0
  31. {datachain-0.35.2 → datachain-0.36.1}/.gitattributes +0 -0
  32. {datachain-0.35.2 → datachain-0.36.1}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  33. {datachain-0.35.2 → datachain-0.36.1}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  34. {datachain-0.35.2 → datachain-0.36.1}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  35. {datachain-0.35.2 → datachain-0.36.1}/.github/codecov.yaml +0 -0
  36. {datachain-0.35.2 → datachain-0.36.1}/.github/dependabot.yml +0 -0
  37. {datachain-0.35.2 → datachain-0.36.1}/.github/workflows/update-template.yaml +0 -0
  38. {datachain-0.35.2 → datachain-0.36.1}/.gitignore +0 -0
  39. {datachain-0.35.2 → datachain-0.36.1}/CODE_OF_CONDUCT.rst +0 -0
  40. {datachain-0.35.2 → datachain-0.36.1}/LICENSE +0 -0
  41. {datachain-0.35.2 → datachain-0.36.1}/README.rst +0 -0
  42. {datachain-0.35.2 → datachain-0.36.1}/docs/api_hooks.py +0 -0
  43. {datachain-0.35.2 → datachain-0.36.1}/docs/assets/captioned_cartoons.png +0 -0
  44. {datachain-0.35.2 → datachain-0.36.1}/docs/assets/datachain-white.svg +0 -0
  45. {datachain-0.35.2 → datachain-0.36.1}/docs/assets/datachain.svg +0 -0
  46. {datachain-0.35.2 → datachain-0.36.1}/docs/assets/webhook_dialog.png +0 -0
  47. {datachain-0.35.2 → datachain-0.36.1}/docs/assets/webhook_list.png +0 -0
  48. {datachain-0.35.2 → datachain-0.36.1}/docs/commands/auth/login.md +0 -0
  49. {datachain-0.35.2 → datachain-0.36.1}/docs/commands/auth/logout.md +0 -0
  50. {datachain-0.35.2 → datachain-0.36.1}/docs/commands/auth/team.md +0 -0
  51. {datachain-0.35.2 → datachain-0.36.1}/docs/commands/auth/token.md +0 -0
  52. {datachain-0.35.2 → datachain-0.36.1}/docs/commands/index.md +0 -0
  53. {datachain-0.35.2 → datachain-0.36.1}/docs/commands/job/cancel.md +0 -0
  54. {datachain-0.35.2 → datachain-0.36.1}/docs/commands/job/clusters.md +0 -0
  55. {datachain-0.35.2 → datachain-0.36.1}/docs/commands/job/logs.md +0 -0
  56. {datachain-0.35.2 → datachain-0.36.1}/docs/commands/job/ls.md +0 -0
  57. {datachain-0.35.2 → datachain-0.36.1}/docs/commands/job/run.md +0 -0
  58. {datachain-0.35.2 → datachain-0.36.1}/docs/contributing.md +0 -0
  59. {datachain-0.35.2 → datachain-0.36.1}/docs/css/github-permalink-style.css +0 -0
  60. {datachain-0.35.2 → datachain-0.36.1}/docs/examples.md +0 -0
  61. {datachain-0.35.2 → datachain-0.36.1}/docs/guide/db_migrations.md +0 -0
  62. {datachain-0.35.2 → datachain-0.36.1}/docs/guide/delta.md +0 -0
  63. {datachain-0.35.2 → datachain-0.36.1}/docs/guide/env.md +0 -0
  64. {datachain-0.35.2 → datachain-0.36.1}/docs/guide/index.md +0 -0
  65. {datachain-0.35.2 → datachain-0.36.1}/docs/guide/namespaces.md +0 -0
  66. {datachain-0.35.2 → datachain-0.36.1}/docs/guide/processing.md +0 -0
  67. {datachain-0.35.2 → datachain-0.36.1}/docs/guide/remotes.md +0 -0
  68. {datachain-0.35.2 → datachain-0.36.1}/docs/guide/retry.md +0 -0
  69. {datachain-0.35.2 → datachain-0.36.1}/docs/index.md +0 -0
  70. {datachain-0.35.2 → datachain-0.36.1}/docs/overrides/main.html +0 -0
  71. {datachain-0.35.2 → datachain-0.36.1}/docs/quick-start.md +0 -0
  72. {datachain-0.35.2 → datachain-0.36.1}/docs/references/data-types/arrowrow.md +0 -0
  73. {datachain-0.35.2 → datachain-0.36.1}/docs/references/data-types/bbox.md +0 -0
  74. {datachain-0.35.2 → datachain-0.36.1}/docs/references/data-types/file.md +0 -0
  75. {datachain-0.35.2 → datachain-0.36.1}/docs/references/data-types/imagefile.md +0 -0
  76. {datachain-0.35.2 → datachain-0.36.1}/docs/references/data-types/index.md +0 -0
  77. {datachain-0.35.2 → datachain-0.36.1}/docs/references/data-types/pose.md +0 -0
  78. {datachain-0.35.2 → datachain-0.36.1}/docs/references/data-types/segment.md +0 -0
  79. {datachain-0.35.2 → datachain-0.36.1}/docs/references/data-types/tarvfile.md +0 -0
  80. {datachain-0.35.2 → datachain-0.36.1}/docs/references/data-types/textfile.md +0 -0
  81. {datachain-0.35.2 → datachain-0.36.1}/docs/references/data-types/videofile.md +0 -0
  82. {datachain-0.35.2 → datachain-0.36.1}/docs/references/datachain.md +0 -0
  83. {datachain-0.35.2 → datachain-0.36.1}/docs/references/func.md +0 -0
  84. {datachain-0.35.2 → datachain-0.36.1}/docs/references/functions/aggregate.md +0 -0
  85. {datachain-0.35.2 → datachain-0.36.1}/docs/references/functions/array.md +0 -0
  86. {datachain-0.35.2 → datachain-0.36.1}/docs/references/functions/conditional.md +0 -0
  87. {datachain-0.35.2 → datachain-0.36.1}/docs/references/functions/numeric.md +0 -0
  88. {datachain-0.35.2 → datachain-0.36.1}/docs/references/functions/path.md +0 -0
  89. {datachain-0.35.2 → datachain-0.36.1}/docs/references/functions/random.md +0 -0
  90. {datachain-0.35.2 → datachain-0.36.1}/docs/references/functions/string.md +0 -0
  91. {datachain-0.35.2 → datachain-0.36.1}/docs/references/functions/window.md +0 -0
  92. {datachain-0.35.2 → datachain-0.36.1}/docs/references/index.md +0 -0
  93. {datachain-0.35.2 → datachain-0.36.1}/docs/references/toolkit.md +0 -0
  94. {datachain-0.35.2 → datachain-0.36.1}/docs/references/torch.md +0 -0
  95. {datachain-0.35.2 → datachain-0.36.1}/docs/references/udf.md +0 -0
  96. {datachain-0.35.2 → datachain-0.36.1}/docs/studio/api/.gitkeep +0 -0
  97. {datachain-0.35.2 → datachain-0.36.1}/docs/studio/webhooks.md +0 -0
  98. {datachain-0.35.2 → datachain-0.36.1}/docs/templates/main.dot +0 -0
  99. {datachain-0.35.2 → datachain-0.36.1}/docs/templates/operation.dot +0 -0
  100. {datachain-0.35.2 → datachain-0.36.1}/docs/templates/responses.def +0 -0
  101. {datachain-0.35.2 → datachain-0.36.1}/docs/tutorials.md +0 -0
  102. {datachain-0.35.2 → datachain-0.36.1}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  103. {datachain-0.35.2 → datachain-0.36.1}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  104. {datachain-0.35.2 → datachain-0.36.1}/examples/computer_vision/openimage-detect.py +0 -0
  105. {datachain-0.35.2 → datachain-0.36.1}/examples/computer_vision/ultralytics-bbox.py +0 -0
  106. {datachain-0.35.2 → datachain-0.36.1}/examples/computer_vision/ultralytics-pose.py +0 -0
  107. {datachain-0.35.2 → datachain-0.36.1}/examples/computer_vision/ultralytics-segment.py +0 -0
  108. {datachain-0.35.2 → datachain-0.36.1}/examples/get_started/common_sql_functions.py +0 -0
  109. {datachain-0.35.2 → datachain-0.36.1}/examples/get_started/json-csv-reader.py +0 -0
  110. {datachain-0.35.2 → datachain-0.36.1}/examples/get_started/nested_datamodel.py +0 -0
  111. {datachain-0.35.2 → datachain-0.36.1}/examples/get_started/torch-loader.py +0 -0
  112. {datachain-0.35.2 → datachain-0.36.1}/examples/get_started/udfs/parallel.py +0 -0
  113. {datachain-0.35.2 → datachain-0.36.1}/examples/get_started/udfs/simple.py +0 -0
  114. {datachain-0.35.2 → datachain-0.36.1}/examples/get_started/udfs/stateful.py +0 -0
  115. {datachain-0.35.2 → datachain-0.36.1}/examples/incremental_processing/delta.py +0 -0
  116. {datachain-0.35.2 → datachain-0.36.1}/examples/incremental_processing/retry.py +0 -0
  117. {datachain-0.35.2 → datachain-0.36.1}/examples/incremental_processing/utils.py +0 -0
  118. {datachain-0.35.2 → datachain-0.36.1}/examples/llm_and_nlp/claude-query.py +0 -0
  119. {datachain-0.35.2 → datachain-0.36.1}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  120. {datachain-0.35.2 → datachain-0.36.1}/examples/multimodal/audio-to-text.py +0 -0
  121. {datachain-0.35.2 → datachain-0.36.1}/examples/multimodal/clip_inference.py +0 -0
  122. {datachain-0.35.2 → datachain-0.36.1}/examples/multimodal/hf_pipeline.py +0 -0
  123. {datachain-0.35.2 → datachain-0.36.1}/examples/multimodal/openai_image_desc_lib.py +0 -0
  124. {datachain-0.35.2 → datachain-0.36.1}/examples/multimodal/wds.py +0 -0
  125. {datachain-0.35.2 → datachain-0.36.1}/examples/multimodal/wds_filtered.py +0 -0
  126. {datachain-0.35.2 → datachain-0.36.1}/mkdocs.yml +0 -0
  127. {datachain-0.35.2 → datachain-0.36.1}/noxfile.py +0 -0
  128. {datachain-0.35.2 → datachain-0.36.1}/setup.cfg +0 -0
  129. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/__init__.py +0 -0
  130. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/__main__.py +0 -0
  131. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/asyn.py +0 -0
  132. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/cache.py +0 -0
  133. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/catalog/__init__.py +0 -0
  134. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/catalog/datasource.py +0 -0
  135. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/catalog/loader.py +0 -0
  136. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/checkpoint.py +0 -0
  137. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/cli/__init__.py +0 -0
  138. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/cli/commands/__init__.py +0 -0
  139. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/cli/commands/datasets.py +0 -0
  140. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/cli/commands/du.py +0 -0
  141. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/cli/commands/index.py +0 -0
  142. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/cli/commands/ls.py +0 -0
  143. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/cli/commands/misc.py +0 -0
  144. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/cli/commands/query.py +0 -0
  145. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/cli/commands/show.py +0 -0
  146. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/cli/parser/__init__.py +0 -0
  147. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/cli/parser/job.py +0 -0
  148. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/cli/parser/studio.py +0 -0
  149. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/cli/parser/utils.py +0 -0
  150. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/cli/utils.py +0 -0
  151. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/client/__init__.py +0 -0
  152. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/client/azure.py +0 -0
  153. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/client/fileslice.py +0 -0
  154. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/client/fsspec.py +0 -0
  155. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/client/gcs.py +0 -0
  156. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/client/hf.py +0 -0
  157. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/client/http.py +0 -0
  158. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/client/local.py +0 -0
  159. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/client/s3.py +0 -0
  160. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/config.py +0 -0
  161. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/data_storage/__init__.py +0 -0
  162. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/data_storage/db_engine.py +0 -0
  163. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/data_storage/job.py +0 -0
  164. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/data_storage/serializer.py +0 -0
  165. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/dataset.py +0 -0
  166. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/delta.py +0 -0
  167. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/error.py +0 -0
  168. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/fs/__init__.py +0 -0
  169. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/fs/reference.py +0 -0
  170. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/fs/utils.py +0 -0
  171. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/func/__init__.py +0 -0
  172. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/func/aggregate.py +0 -0
  173. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/func/array.py +0 -0
  174. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/func/base.py +0 -0
  175. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/func/conditional.py +0 -0
  176. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/func/func.py +0 -0
  177. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/func/numeric.py +0 -0
  178. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/func/path.py +0 -0
  179. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/func/random.py +0 -0
  180. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/func/string.py +0 -0
  181. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/func/window.py +0 -0
  182. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/hash_utils.py +0 -0
  183. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/job.py +0 -0
  184. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/__init__.py +0 -0
  185. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/arrow.py +0 -0
  186. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/clip.py +0 -0
  187. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/convert/__init__.py +0 -0
  188. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/convert/flatten.py +0 -0
  189. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/convert/python_to_sql.py +0 -0
  190. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/convert/sql_to_python.py +0 -0
  191. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/convert/unflatten.py +0 -0
  192. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  193. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/data_model.py +0 -0
  194. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/dataset_info.py +0 -0
  195. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/dc/__init__.py +0 -0
  196. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/dc/csv.py +0 -0
  197. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/dc/database.py +0 -0
  198. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/dc/datasets.py +0 -0
  199. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/dc/hf.py +0 -0
  200. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/dc/json.py +0 -0
  201. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/dc/listings.py +0 -0
  202. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/dc/pandas.py +0 -0
  203. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/dc/parquet.py +0 -0
  204. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/dc/records.py +0 -0
  205. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/dc/storage.py +0 -0
  206. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/dc/storage_pattern.py +0 -0
  207. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/dc/utils.py +0 -0
  208. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/dc/values.py +0 -0
  209. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/file.py +0 -0
  210. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/hf.py +0 -0
  211. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/image.py +0 -0
  212. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/listing.py +0 -0
  213. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/listing_info.py +0 -0
  214. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/meta_formats.py +0 -0
  215. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/model_store.py +0 -0
  216. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/namespaces.py +0 -0
  217. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/projects.py +0 -0
  218. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/pytorch.py +0 -0
  219. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/settings.py +0 -0
  220. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/signal_schema.py +0 -0
  221. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/tar.py +0 -0
  222. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/text.py +0 -0
  223. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/udf.py +0 -0
  224. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/udf_signature.py +0 -0
  225. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/utils.py +0 -0
  226. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/video.py +0 -0
  227. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/webdataset.py +0 -0
  228. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/lib/webdataset_laion.py +0 -0
  229. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/listing.py +0 -0
  230. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/model/__init__.py +0 -0
  231. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/model/bbox.py +0 -0
  232. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/model/pose.py +0 -0
  233. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/model/segment.py +0 -0
  234. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/model/ultralytics/__init__.py +0 -0
  235. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/model/ultralytics/bbox.py +0 -0
  236. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/model/ultralytics/pose.py +0 -0
  237. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/model/ultralytics/segment.py +0 -0
  238. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/model/utils.py +0 -0
  239. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/namespace.py +0 -0
  240. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/node.py +0 -0
  241. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/nodes_fetcher.py +0 -0
  242. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/nodes_thread_pool.py +0 -0
  243. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/plugins.py +0 -0
  244. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/progress.py +0 -0
  245. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/project.py +0 -0
  246. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/py.typed +0 -0
  247. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/query/__init__.py +0 -0
  248. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/query/batch.py +0 -0
  249. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/query/metrics.py +0 -0
  250. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/query/params.py +0 -0
  251. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/query/schema.py +0 -0
  252. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/query/session.py +0 -0
  253. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/query/udf.py +0 -0
  254. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/remote/__init__.py +0 -0
  255. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/remote/studio.py +0 -0
  256. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/script_meta.py +0 -0
  257. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/semver.py +0 -0
  258. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/sql/__init__.py +0 -0
  259. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/sql/default/__init__.py +0 -0
  260. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/sql/default/base.py +0 -0
  261. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/sql/functions/__init__.py +0 -0
  262. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/sql/functions/aggregate.py +0 -0
  263. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/sql/functions/array.py +0 -0
  264. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/sql/functions/conditional.py +0 -0
  265. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/sql/functions/numeric.py +0 -0
  266. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/sql/functions/path.py +0 -0
  267. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/sql/functions/random.py +0 -0
  268. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/sql/functions/string.py +0 -0
  269. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/sql/postgresql_dialect.py +0 -0
  270. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/sql/postgresql_types.py +0 -0
  271. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/sql/selectable.py +0 -0
  272. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/sql/sqlite/__init__.py +0 -0
  273. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/sql/sqlite/base.py +0 -0
  274. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/sql/sqlite/types.py +0 -0
  275. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/sql/sqlite/vector.py +0 -0
  276. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/sql/types.py +0 -0
  277. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/sql/utils.py +0 -0
  278. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/studio.py +0 -0
  279. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/telemetry.py +0 -0
  280. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/toolkit/__init__.py +0 -0
  281. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/toolkit/split.py +0 -0
  282. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/torch/__init__.py +0 -0
  283. {datachain-0.35.2 → datachain-0.36.1}/src/datachain/utils.py +0 -0
  284. {datachain-0.35.2 → datachain-0.36.1}/src/datachain.egg-info/dependency_links.txt +0 -0
  285. {datachain-0.35.2 → datachain-0.36.1}/src/datachain.egg-info/entry_points.txt +0 -0
  286. {datachain-0.35.2 → datachain-0.36.1}/src/datachain.egg-info/top_level.txt +0 -0
  287. {datachain-0.35.2 → datachain-0.36.1}/tests/__init__.py +0 -0
  288. {datachain-0.35.2 → datachain-0.36.1}/tests/benchmarks/__init__.py +0 -0
  289. {datachain-0.35.2 → datachain-0.36.1}/tests/benchmarks/conftest.py +0 -0
  290. {datachain-0.35.2 → datachain-0.36.1}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  291. {datachain-0.35.2 → datachain-0.36.1}/tests/benchmarks/datasets/.dvc/config +0 -0
  292. {datachain-0.35.2 → datachain-0.36.1}/tests/benchmarks/datasets/.gitignore +0 -0
  293. {datachain-0.35.2 → datachain-0.36.1}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  294. {datachain-0.35.2 → datachain-0.36.1}/tests/benchmarks/test_datachain.py +0 -0
  295. {datachain-0.35.2 → datachain-0.36.1}/tests/benchmarks/test_ls.py +0 -0
  296. {datachain-0.35.2 → datachain-0.36.1}/tests/benchmarks/test_version.py +0 -0
  297. {datachain-0.35.2 → datachain-0.36.1}/tests/conftest.py +0 -0
  298. {datachain-0.35.2 → datachain-0.36.1}/tests/data.py +0 -0
  299. {datachain-0.35.2 → datachain-0.36.1}/tests/examples/__init__.py +0 -0
  300. {datachain-0.35.2 → datachain-0.36.1}/tests/examples/test_examples.py +0 -0
  301. {datachain-0.35.2 → datachain-0.36.1}/tests/examples/test_wds_e2e.py +0 -0
  302. {datachain-0.35.2 → datachain-0.36.1}/tests/examples/wds_data.py +0 -0
  303. {datachain-0.35.2 → datachain-0.36.1}/tests/func/__init__.py +0 -0
  304. {datachain-0.35.2 → datachain-0.36.1}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  305. {datachain-0.35.2 → datachain-0.36.1}/tests/func/data/lena.jpg +0 -0
  306. {datachain-0.35.2 → datachain-0.36.1}/tests/func/fake-service-account-credentials.json +0 -0
  307. {datachain-0.35.2 → datachain-0.36.1}/tests/func/functions/__init__.py +0 -0
  308. {datachain-0.35.2 → datachain-0.36.1}/tests/func/functions/test_aggregate.py +0 -0
  309. {datachain-0.35.2 → datachain-0.36.1}/tests/func/functions/test_array.py +0 -0
  310. {datachain-0.35.2 → datachain-0.36.1}/tests/func/functions/test_conditional.py +0 -0
  311. {datachain-0.35.2 → datachain-0.36.1}/tests/func/functions/test_numeric.py +0 -0
  312. {datachain-0.35.2 → datachain-0.36.1}/tests/func/functions/test_path.py +0 -0
  313. {datachain-0.35.2 → datachain-0.36.1}/tests/func/functions/test_random.py +0 -0
  314. {datachain-0.35.2 → datachain-0.36.1}/tests/func/functions/test_string.py +0 -0
  315. {datachain-0.35.2 → datachain-0.36.1}/tests/func/model/__init__.py +0 -0
  316. {datachain-0.35.2 → datachain-0.36.1}/tests/func/model/data/running-mask0.png +0 -0
  317. {datachain-0.35.2 → datachain-0.36.1}/tests/func/model/data/running-mask1.png +0 -0
  318. {datachain-0.35.2 → datachain-0.36.1}/tests/func/model/data/running.jpg +0 -0
  319. {datachain-0.35.2 → datachain-0.36.1}/tests/func/model/data/ships.jpg +0 -0
  320. {datachain-0.35.2 → datachain-0.36.1}/tests/func/model/test_yolo.py +0 -0
  321. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_audio.py +0 -0
  322. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_catalog.py +0 -0
  323. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_client.py +0 -0
  324. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_cloud_transfer.py +0 -0
  325. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_data_storage.py +0 -0
  326. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_dataset_query.py +0 -0
  327. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_datasets.py +0 -0
  328. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_delta.py +0 -0
  329. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_feature_pickling.py +0 -0
  330. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_file.py +0 -0
  331. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_hf.py +0 -0
  332. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_hidden_field.py +0 -0
  333. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_image.py +0 -0
  334. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_listing.py +0 -0
  335. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_ls.py +0 -0
  336. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_meta_formats.py +0 -0
  337. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_metastore.py +0 -0
  338. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_metrics.py +0 -0
  339. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_mutate.py +0 -0
  340. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_pull.py +0 -0
  341. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_pytorch.py +0 -0
  342. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_query.py +0 -0
  343. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_read_database.py +0 -0
  344. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_read_dataset_remote.py +0 -0
  345. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  346. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_session.py +0 -0
  347. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_storage_pattern.py +0 -0
  348. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_studio_datetime_parsing.py +0 -0
  349. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_to_database.py +0 -0
  350. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_toolkit.py +0 -0
  351. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_union.py +0 -0
  352. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_video.py +0 -0
  353. {datachain-0.35.2 → datachain-0.36.1}/tests/func/test_warehouse.py +0 -0
  354. {datachain-0.35.2 → datachain-0.36.1}/tests/scripts/feature_class.py +0 -0
  355. {datachain-0.35.2 → datachain-0.36.1}/tests/scripts/feature_class_exception.py +0 -0
  356. {datachain-0.35.2 → datachain-0.36.1}/tests/scripts/feature_class_parallel.py +0 -0
  357. {datachain-0.35.2 → datachain-0.36.1}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  358. {datachain-0.35.2 → datachain-0.36.1}/tests/scripts/name_len_slow.py +0 -0
  359. {datachain-0.35.2 → datachain-0.36.1}/tests/test_atomicity.py +0 -0
  360. {datachain-0.35.2 → datachain-0.36.1}/tests/test_cli_e2e.py +0 -0
  361. {datachain-0.35.2 → datachain-0.36.1}/tests/test_cli_studio.py +0 -0
  362. {datachain-0.35.2 → datachain-0.36.1}/tests/test_import_time.py +0 -0
  363. {datachain-0.35.2 → datachain-0.36.1}/tests/test_query_e2e.py +0 -0
  364. {datachain-0.35.2 → datachain-0.36.1}/tests/test_telemetry.py +0 -0
  365. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/__init__.py +0 -0
  366. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/__init__.py +0 -0
  367. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/conftest.py +0 -0
  368. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_arrow.py +0 -0
  369. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_checkpoints.py +0 -0
  370. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_clip.py +0 -0
  371. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  372. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_datachain_merge.py +0 -0
  373. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_diff.py +0 -0
  374. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_feature.py +0 -0
  375. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_feature_utils.py +0 -0
  376. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_file.py +0 -0
  377. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_hf.py +0 -0
  378. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_image.py +0 -0
  379. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_listing_info.py +0 -0
  380. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_namespace.py +0 -0
  381. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_partition_by.py +0 -0
  382. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_project.py +0 -0
  383. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_python_to_sql.py +0 -0
  384. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_schema.py +0 -0
  385. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_settings.py +0 -0
  386. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_signal_schema.py +0 -0
  387. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_sql_to_python.py +0 -0
  388. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_storage_pattern.py +0 -0
  389. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_text.py +0 -0
  390. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_udf.py +0 -0
  391. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_udf_signature.py +0 -0
  392. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_utils.py +0 -0
  393. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/lib/test_webdataset.py +0 -0
  394. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/model/__init__.py +0 -0
  395. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/model/test_bbox.py +0 -0
  396. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/model/test_pose.py +0 -0
  397. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/model/test_segment.py +0 -0
  398. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/model/test_utils.py +0 -0
  399. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/sql/__init__.py +0 -0
  400. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/sql/sqlite/__init__.py +0 -0
  401. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/sql/sqlite/test_types.py +0 -0
  402. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/sql/sqlite/test_utils.py +0 -0
  403. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/sql/test_array.py +0 -0
  404. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/sql/test_conditional.py +0 -0
  405. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/sql/test_path.py +0 -0
  406. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/sql/test_random.py +0 -0
  407. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/sql/test_selectable.py +0 -0
  408. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/sql/test_string.py +0 -0
  409. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_asyn.py +0 -0
  410. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_batching.py +0 -0
  411. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_cache.py +0 -0
  412. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_catalog.py +0 -0
  413. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_catalog_loader.py +0 -0
  414. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_cli_datasets.py +0 -0
  415. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_cli_parsing.py +0 -0
  416. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_client.py +0 -0
  417. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_client_gcs.py +0 -0
  418. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_client_http.py +0 -0
  419. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_client_s3.py +0 -0
  420. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_config.py +0 -0
  421. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_data_storage.py +0 -0
  422. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_database_engine.py +0 -0
  423. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_dataset.py +0 -0
  424. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_dispatch.py +0 -0
  425. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_fileslice.py +0 -0
  426. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_func.py +0 -0
  427. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_hash_utils.py +0 -0
  428. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_listing.py +0 -0
  429. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_metastore.py +0 -0
  430. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_module_exports.py +0 -0
  431. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_pytorch.py +0 -0
  432. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_query.py +0 -0
  433. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_query_metrics.py +0 -0
  434. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_query_params.py +0 -0
  435. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_query_steps_hash.py +0 -0
  436. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_script_meta.py +0 -0
  437. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_semver.py +0 -0
  438. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_serializer.py +0 -0
  439. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_session.py +0 -0
  440. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_utils.py +0 -0
  441. {datachain-0.35.2 → datachain-0.36.1}/tests/unit/test_warehouse.py +0 -0
  442. {datachain-0.35.2 → datachain-0.36.1}/tests/utils.py +0 -0
@@ -25,7 +25,7 @@ jobs:
25
25
  python-version: '3.13'
26
26
 
27
27
  - name: Setup uv
28
- uses: astral-sh/setup-uv@v6
28
+ uses: astral-sh/setup-uv@v7
29
29
  with:
30
30
  enable-cache: true
31
31
  cache-suffix: benchmarks
@@ -27,7 +27,7 @@ jobs:
27
27
  python-version: '3.13'
28
28
 
29
29
  - name: Setup uv
30
- uses: astral-sh/setup-uv@v6
30
+ uses: astral-sh/setup-uv@v7
31
31
 
32
32
  - name: Install nox
33
33
  run: uv pip install nox --system
@@ -97,7 +97,7 @@ jobs:
97
97
  python-version: ${{ matrix.pyv }}
98
98
 
99
99
  - name: Setup uv
100
- uses: astral-sh/setup-uv@v6
100
+ uses: astral-sh/setup-uv@v7
101
101
  with:
102
102
  enable-cache: true
103
103
  cache-suffix: studio
@@ -29,7 +29,7 @@ jobs:
29
29
  python-version: '3.10'
30
30
 
31
31
  - name: Setup uv
32
- uses: astral-sh/setup-uv@v6
32
+ uses: astral-sh/setup-uv@v7
33
33
  with:
34
34
  enable-cache: true
35
35
  cache-suffix: lint
@@ -80,7 +80,7 @@ jobs:
80
80
 
81
81
  - name: Setup PostgreSQL
82
82
  if: runner.os != 'Windows'
83
- uses: ikalnytskyi/action-setup-postgres@10ab8a56cc77b4823c2bfa57b1d4dd5605ef0481 # v7
83
+ uses: ikalnytskyi/action-setup-postgres@c4dda34aae1c821e3a771b68b73b13af3198a7ee # v8
84
84
  with:
85
85
  username: test
86
86
  password: test
@@ -102,7 +102,7 @@ jobs:
102
102
  python-version: ${{ matrix.pyv }}
103
103
 
104
104
  - name: Setup uv
105
- uses: astral-sh/setup-uv@v6
105
+ uses: astral-sh/setup-uv@v7
106
106
  with:
107
107
  enable-cache: true
108
108
  cache-suffix: tests-${{ matrix.pyv }}
@@ -188,7 +188,7 @@ jobs:
188
188
  python-version: ${{ matrix.pyv }}
189
189
 
190
190
  - name: Setup uv
191
- uses: astral-sh/setup-uv@v6
191
+ uses: astral-sh/setup-uv@v7
192
192
  with:
193
193
  enable-cache: true
194
194
  cache-suffix: examples-${{ matrix.pyv }}
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.13.3'
27
+ rev: 'v0.14.0'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.35.2
3
+ Version: 0.36.1
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -64,7 +64,6 @@ Requires-Dist: torch>=2.1.0; extra == "torch"
64
64
  Requires-Dist: torchvision; extra == "torch"
65
65
  Requires-Dist: transformers>=4.36.0; extra == "torch"
66
66
  Provides-Extra: audio
67
- Requires-Dist: torchaudio; extra == "audio"
68
67
  Requires-Dist: soundfile; extra == "audio"
69
68
  Provides-Extra: remote
70
69
  Requires-Dist: lz4; extra == "remote"
@@ -76,6 +75,7 @@ Requires-Dist: numba>=0.60.0; extra == "hf"
76
75
  Requires-Dist: datasets[vision]>=4.0.0; extra == "hf"
77
76
  Requires-Dist: datasets[audio]>=4.0.0; (sys_platform == "linux" or sys_platform == "darwin") and extra == "hf"
78
77
  Requires-Dist: fsspec>=2024.12.0; extra == "hf"
78
+ Requires-Dist: torch<2.9.0; extra == "hf"
79
79
  Provides-Extra: video
80
80
  Requires-Dist: ffmpeg-python; extra == "video"
81
81
  Requires-Dist: imageio[ffmpeg,pyav]>=2.37.0; extra == "video"
@@ -117,6 +117,7 @@ Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
117
117
  Requires-Dist: ultralytics; extra == "examples"
118
118
  Requires-Dist: open_clip_torch; extra == "examples"
119
119
  Requires-Dist: openai; extra == "examples"
120
+ Requires-Dist: torchaudio<2.9.0; extra == "examples"
120
121
  Dynamic: license-file
121
122
 
122
123
  ================
@@ -73,7 +73,6 @@ torch = [
73
73
  "transformers>=4.36.0"
74
74
  ]
75
75
  audio = [
76
- "torchaudio",
77
76
  "soundfile"
78
77
  ]
79
78
  remote = [
@@ -88,7 +87,11 @@ hf = [
88
87
  "datasets[vision]>=4.0.0",
89
88
  # https://github.com/pytorch/torchcodec/issues/640
90
89
  "datasets[audio]>=4.0.0 ; (sys_platform == 'linux' or sys_platform == 'darwin')",
91
- "fsspec>=2024.12.0"
90
+ "fsspec>=2024.12.0",
91
+ # Until datasets solve the issue, run test_hf_audio test to see if this can be removed
92
+ # https://github.com/meta-pytorch/torchcodec/issues/912
93
+ # https://github.com/huggingface/transformers/pull/41610
94
+ "torch<2.9.0"
92
95
  ]
93
96
  video = [
94
97
  "ffmpeg-python",
@@ -134,7 +137,9 @@ examples = [
134
137
  "huggingface_hub[hf_transfer]",
135
138
  "ultralytics",
136
139
  "open_clip_torch",
137
- "openai"
140
+ "openai",
141
+ # Transformers still require it
142
+ "torchaudio<2.9.0"
138
143
  ]
139
144
 
140
145
  [project.urls]
@@ -54,6 +54,7 @@ from datachain.sql.types import DateTime, SQLType
54
54
  from datachain.utils import DataChainDir
55
55
 
56
56
  from .datasource import DataSource
57
+ from .dependency import build_dependency_hierarchy, populate_nested_dependencies
57
58
 
58
59
  if TYPE_CHECKING:
59
60
  from datachain.data_storage import AbstractMetastore, AbstractWarehouse
@@ -1203,6 +1204,38 @@ class Catalog:
1203
1204
  assert isinstance(dataset_info, dict)
1204
1205
  return DatasetRecord.from_dict(dataset_info)
1205
1206
 
1207
+ def get_dataset_dependencies_by_ids(
1208
+ self,
1209
+ dataset_id: int,
1210
+ version_id: int,
1211
+ indirect: bool = True,
1212
+ ) -> list[DatasetDependency | None]:
1213
+ dependency_nodes = self.metastore.get_dataset_dependency_nodes(
1214
+ dataset_id=dataset_id,
1215
+ version_id=version_id,
1216
+ )
1217
+
1218
+ if not dependency_nodes:
1219
+ return []
1220
+
1221
+ dependency_map, children_map = build_dependency_hierarchy(dependency_nodes)
1222
+
1223
+ root_key = (dataset_id, version_id)
1224
+ if root_key not in children_map:
1225
+ return []
1226
+
1227
+ root_dependency_ids = children_map[root_key]
1228
+ root_dependencies = [dependency_map[dep_id] for dep_id in root_dependency_ids]
1229
+
1230
+ if indirect:
1231
+ for dependency in root_dependencies:
1232
+ if dependency is not None:
1233
+ populate_nested_dependencies(
1234
+ dependency, dependency_nodes, dependency_map, children_map
1235
+ )
1236
+
1237
+ return root_dependencies
1238
+
1206
1239
  def get_dataset_dependencies(
1207
1240
  self,
1208
1241
  name: str,
@@ -1216,29 +1249,21 @@ class Catalog:
1216
1249
  namespace_name=namespace_name,
1217
1250
  project_name=project_name,
1218
1251
  )
1219
-
1220
- direct_dependencies = self.metastore.get_direct_dataset_dependencies(
1221
- dataset, version
1222
- )
1252
+ dataset_version = dataset.get_version(version)
1253
+ dataset_id = dataset.id
1254
+ dataset_version_id = dataset_version.id
1223
1255
 
1224
1256
  if not indirect:
1225
- return direct_dependencies
1226
-
1227
- for d in direct_dependencies:
1228
- if not d:
1229
- # dependency has been removed
1230
- continue
1231
- if d.is_dataset:
1232
- # only datasets can have dependencies
1233
- d.dependencies = self.get_dataset_dependencies(
1234
- d.name,
1235
- d.version,
1236
- namespace_name=d.namespace,
1237
- project_name=d.project,
1238
- indirect=indirect,
1239
- )
1257
+ return self.metastore.get_direct_dataset_dependencies(
1258
+ dataset,
1259
+ version,
1260
+ )
1240
1261
 
1241
- return direct_dependencies
1262
+ return self.get_dataset_dependencies_by_ids(
1263
+ dataset_id,
1264
+ dataset_version_id,
1265
+ indirect,
1266
+ )
1242
1267
 
1243
1268
  def ls_datasets(
1244
1269
  self,
@@ -0,0 +1,164 @@
1
+ import builtins
2
+ from dataclasses import dataclass
3
+ from datetime import datetime
4
+ from typing import TypeVar
5
+
6
+ from datachain.dataset import DatasetDependency
7
+
8
+ DDN = TypeVar("DDN", bound="DatasetDependencyNode")
9
+
10
+
11
+ @dataclass
12
+ class DatasetDependencyNode:
13
+ namespace: str
14
+ project: str
15
+ id: int
16
+ dataset_id: int | None
17
+ dataset_version_id: int | None
18
+ dataset_name: str | None
19
+ dataset_version: str | None
20
+ created_at: datetime
21
+ source_dataset_id: int
22
+ source_dataset_version_id: int | None
23
+ depth: int
24
+
25
+ @classmethod
26
+ def parse(
27
+ cls: builtins.type[DDN],
28
+ namespace: str,
29
+ project: str,
30
+ id: int,
31
+ dataset_id: int | None,
32
+ dataset_version_id: int | None,
33
+ dataset_name: str | None,
34
+ dataset_version: str | None,
35
+ created_at: datetime,
36
+ source_dataset_id: int,
37
+ source_dataset_version_id: int | None,
38
+ depth: int,
39
+ ) -> "DatasetDependencyNode | None":
40
+ return cls(
41
+ namespace,
42
+ project,
43
+ id,
44
+ dataset_id,
45
+ dataset_version_id,
46
+ dataset_name,
47
+ dataset_version,
48
+ created_at,
49
+ source_dataset_id,
50
+ source_dataset_version_id,
51
+ depth,
52
+ )
53
+
54
+ def to_dependency(self) -> "DatasetDependency | None":
55
+ return DatasetDependency.parse(
56
+ namespace_name=self.namespace,
57
+ project_name=self.project,
58
+ id=self.id,
59
+ dataset_id=self.dataset_id,
60
+ dataset_version_id=self.dataset_version_id,
61
+ dataset_name=self.dataset_name,
62
+ dataset_version=self.dataset_version,
63
+ dataset_version_created_at=self.created_at,
64
+ )
65
+
66
+
67
+ def build_dependency_hierarchy(
68
+ dependency_nodes: list[DatasetDependencyNode | None],
69
+ ) -> tuple[
70
+ dict[int, DatasetDependency | None], dict[tuple[int, int | None], list[int]]
71
+ ]:
72
+ """
73
+ Build dependency hierarchy from dependency nodes.
74
+
75
+ Args:
76
+ dependency_nodes: List of DatasetDependencyNode objects from the database
77
+
78
+ Returns:
79
+ Tuple of (dependency_map, children_map) where:
80
+ - dependency_map: Maps dependency_id -> DatasetDependency
81
+ - children_map: Maps (source_dataset_id, source_version_id) ->
82
+ list of dependency_ids
83
+ """
84
+ dependency_map: dict[int, DatasetDependency | None] = {}
85
+ children_map: dict[tuple[int, int | None], list[int]] = {}
86
+
87
+ for node in dependency_nodes:
88
+ if node is None:
89
+ continue
90
+ dependency = node.to_dependency()
91
+ parent_key = (node.source_dataset_id, node.source_dataset_version_id)
92
+
93
+ if dependency is not None:
94
+ dependency_map[dependency.id] = dependency
95
+ children_map.setdefault(parent_key, []).append(dependency.id)
96
+ else:
97
+ # Handle case where dependency creation failed (e.g., deleted dependency)
98
+ dependency_map[node.id] = None
99
+ children_map.setdefault(parent_key, []).append(node.id)
100
+
101
+ return dependency_map, children_map
102
+
103
+
104
+ def populate_nested_dependencies(
105
+ dependency: DatasetDependency,
106
+ dependency_nodes: list[DatasetDependencyNode | None],
107
+ dependency_map: dict[int, DatasetDependency | None],
108
+ children_map: dict[tuple[int, int | None], list[int]],
109
+ ) -> None:
110
+ """
111
+ Recursively populate nested dependencies for a given dependency.
112
+
113
+ Args:
114
+ dependency: The dependency to populate nested dependencies for
115
+ dependency_nodes: All dependency nodes from the database
116
+ dependency_map: Maps dependency_id -> DatasetDependency
117
+ children_map: Maps (source_dataset_id, source_version_id) ->
118
+ list of dependency_ids
119
+ """
120
+ # Find the target dataset and version for this dependency
121
+ target_dataset_id, target_version_id = find_target_dataset_version(
122
+ dependency, dependency_nodes
123
+ )
124
+
125
+ if target_dataset_id is None or target_version_id is None:
126
+ return
127
+
128
+ # Get children for this target
129
+ target_key = (target_dataset_id, target_version_id)
130
+ if target_key not in children_map:
131
+ dependency.dependencies = []
132
+ return
133
+
134
+ child_dependency_ids = children_map[target_key]
135
+ child_dependencies = [dependency_map[child_id] for child_id in child_dependency_ids]
136
+
137
+ dependency.dependencies = child_dependencies
138
+
139
+ # Recursively populate children
140
+ for child_dependency in child_dependencies:
141
+ if child_dependency is not None:
142
+ populate_nested_dependencies(
143
+ child_dependency, dependency_nodes, dependency_map, children_map
144
+ )
145
+
146
+
147
+ def find_target_dataset_version(
148
+ dependency: DatasetDependency,
149
+ dependency_nodes: list[DatasetDependencyNode | None],
150
+ ) -> tuple[int | None, int | None]:
151
+ """
152
+ Find the target dataset ID and version ID for a given dependency.
153
+
154
+ Args:
155
+ dependency: The dependency to find target for
156
+ dependency_nodes: All dependency nodes from the database
157
+
158
+ Returns:
159
+ Tuple of (target_dataset_id, target_version_id) or (None, None) if not found
160
+ """
161
+ for node in dependency_nodes:
162
+ if node is not None and node.id == dependency.id:
163
+ return node.dataset_id, node.dataset_version_id
164
+ return None, None
@@ -22,10 +22,12 @@ from sqlalchemy import (
22
22
  Text,
23
23
  UniqueConstraint,
24
24
  desc,
25
+ literal,
25
26
  select,
26
27
  )
27
28
  from sqlalchemy.sql import func as f
28
29
 
30
+ from datachain.catalog.dependency import DatasetDependencyNode
29
31
  from datachain.checkpoint import Checkpoint
30
32
  from datachain.data_storage import JobQueryType, JobStatus
31
33
  from datachain.data_storage.serializer import Serializable
@@ -78,6 +80,7 @@ class AbstractMetastore(ABC, Serializable):
78
80
  dataset_list_class: type[DatasetListRecord] = DatasetListRecord
79
81
  dataset_list_version_class: type[DatasetListVersion] = DatasetListVersion
80
82
  dependency_class: type[DatasetDependency] = DatasetDependency
83
+ dependency_node_class: type[DatasetDependencyNode] = DatasetDependencyNode
81
84
  job_class: type[Job] = Job
82
85
  checkpoint_class: type[Checkpoint] = Checkpoint
83
86
 
@@ -366,6 +369,12 @@ class AbstractMetastore(ABC, Serializable):
366
369
  ) -> list[DatasetDependency | None]:
367
370
  """Gets direct dataset dependencies."""
368
371
 
372
+ @abstractmethod
373
+ def get_dataset_dependency_nodes(
374
+ self, dataset_id: int, version_id: int
375
+ ) -> list[DatasetDependencyNode | None]:
376
+ """Gets dataset dependency node from database."""
377
+
369
378
  @abstractmethod
370
379
  def remove_dataset_dependencies(
371
380
  self, dataset: DatasetRecord, version: str | None = None
@@ -1483,6 +1492,77 @@ class AbstractDBMetastore(AbstractMetastore):
1483
1492
 
1484
1493
  return [self.dependency_class.parse(*r) for r in self.db.execute(query)]
1485
1494
 
1495
+ def get_dataset_dependency_nodes(
1496
+ self, dataset_id: int, version_id: int
1497
+ ) -> list[DatasetDependencyNode | None]:
1498
+ n = self._namespaces_select().subquery()
1499
+ p = self._projects
1500
+ d = self._datasets_select().subquery()
1501
+ dd = self._datasets_dependencies
1502
+ dv = self._datasets_versions
1503
+
1504
+ # Common dependency fields for CTE
1505
+ dep_fields = [
1506
+ dd.c.id,
1507
+ dd.c.source_dataset_id,
1508
+ dd.c.source_dataset_version_id,
1509
+ dd.c.dataset_id,
1510
+ dd.c.dataset_version_id,
1511
+ ]
1512
+
1513
+ # Base case: direct dependencies
1514
+ base_query = select(
1515
+ *dep_fields,
1516
+ literal(0).label("depth"),
1517
+ ).where(
1518
+ (dd.c.source_dataset_id == dataset_id)
1519
+ & (dd.c.source_dataset_version_id == version_id)
1520
+ )
1521
+
1522
+ cte = base_query.cte(name="dependency_tree", recursive=True)
1523
+
1524
+ # Recursive case: dependencies of dependencies
1525
+ recursive_query = select(
1526
+ *dep_fields,
1527
+ (cte.c.depth + 1).label("depth"),
1528
+ ).select_from(
1529
+ cte.join(
1530
+ dd,
1531
+ (cte.c.dataset_id == dd.c.source_dataset_id)
1532
+ & (cte.c.dataset_version_id == dd.c.source_dataset_version_id),
1533
+ )
1534
+ )
1535
+
1536
+ cte = cte.union(recursive_query)
1537
+
1538
+ # Fetch all with full details
1539
+ final_query = select(
1540
+ n.c.name,
1541
+ p.c.name,
1542
+ cte.c.id,
1543
+ cte.c.dataset_id,
1544
+ cte.c.dataset_version_id,
1545
+ d.c.name,
1546
+ dv.c.version,
1547
+ dv.c.created_at,
1548
+ cte.c.source_dataset_id,
1549
+ cte.c.source_dataset_version_id,
1550
+ cte.c.depth,
1551
+ ).select_from(
1552
+ # Use outer joins to handle cases where dependent datasets have been
1553
+ # physically deleted. This allows us to return dependency records with
1554
+ # None values instead of silently omitting them, making broken
1555
+ # dependencies visible to callers.
1556
+ cte.join(d, cte.c.dataset_id == d.c.id, isouter=True)
1557
+ .join(dv, cte.c.dataset_version_id == dv.c.id, isouter=True)
1558
+ .join(p, d.c.project_id == p.c.id, isouter=True)
1559
+ .join(n, p.c.namespace_id == n.c.id, isouter=True)
1560
+ )
1561
+
1562
+ return [
1563
+ self.dependency_node_class.parse(*r) for r in self.db.execute(final_query)
1564
+ ]
1565
+
1486
1566
  def remove_dataset_dependencies(
1487
1567
  self, dataset: DatasetRecord, version: str | None = None
1488
1568
  ) -> None:
@@ -11,7 +11,6 @@ from datachain.sql.types import (
11
11
  JSON,
12
12
  Boolean,
13
13
  DateTime,
14
- Int,
15
14
  Int64,
16
15
  SQLType,
17
16
  String,
@@ -269,7 +268,7 @@ class DataTable:
269
268
  @classmethod
270
269
  def sys_columns(cls):
271
270
  return [
272
- sa.Column("sys__id", Int, primary_key=True),
271
+ sa.Column("sys__id", UInt64, primary_key=True),
273
272
  sa.Column(
274
273
  "sys__rand", UInt64, nullable=False, server_default=f.abs(f.random())
275
274
  ),
@@ -868,11 +868,8 @@ class SQLiteWarehouse(AbstractWarehouse):
868
868
  if isinstance(c, BinaryExpression):
869
869
  right_left_join = add_left_rows_filter(c)
870
870
 
871
- # Use CTE instead of subquery to force SQLite to materialize the result
872
- # This breaks deep nesting and prevents parser stack overflow.
873
871
  union_cte = sqlalchemy.union(left_right_join, right_left_join).cte()
874
-
875
- return self._regenerate_system_columns(union_cte)
872
+ return sqlalchemy.select(*union_cte.c).select_from(union_cte)
876
873
 
877
874
  def _system_row_number_expr(self):
878
875
  return func.row_number().over()
@@ -884,11 +881,7 @@ class SQLiteWarehouse(AbstractWarehouse):
884
881
  """
885
882
  Create a temporary table from a query for use in a UDF.
886
883
  """
887
- columns = [
888
- sqlalchemy.Column(c.name, c.type)
889
- for c in query.selected_columns
890
- if c.name != "sys__id"
891
- ]
884
+ columns = [sqlalchemy.Column(c.name, c.type) for c in query.selected_columns]
892
885
  table = self.create_udf_table(columns)
893
886
 
894
887
  with tqdm(desc="Preparing", unit=" rows", leave=False) as pbar:
@@ -5,7 +5,7 @@ import random
5
5
  import string
6
6
  from abc import ABC, abstractmethod
7
7
  from collections.abc import Callable, Generator, Iterable, Iterator, Sequence
8
- from typing import TYPE_CHECKING, Any, Union
8
+ from typing import TYPE_CHECKING, Any, Union, cast
9
9
  from urllib.parse import urlparse
10
10
 
11
11
  import attrs
@@ -23,7 +23,7 @@ from datachain.node import DirType, DirTypeGroup, Node, NodeWithPath, get_path
23
23
  from datachain.query.batch import RowsOutput
24
24
  from datachain.query.schema import ColumnMeta
25
25
  from datachain.sql.functions import path as pathfunc
26
- from datachain.sql.types import Int, SQLType
26
+ from datachain.sql.types import SQLType
27
27
  from datachain.utils import sql_escape_like
28
28
 
29
29
  if TYPE_CHECKING:
@@ -32,6 +32,7 @@ if TYPE_CHECKING:
32
32
  _FromClauseArgument,
33
33
  _OnClauseArgument,
34
34
  )
35
+ from sqlalchemy.sql.selectable import FromClause
35
36
  from sqlalchemy.types import TypeEngine
36
37
 
37
38
  from datachain.data_storage import schema
@@ -248,45 +249,56 @@ class AbstractWarehouse(ABC, Serializable):
248
249
 
249
250
  def _regenerate_system_columns(
250
251
  self,
251
- selectable: sa.Select | sa.CTE,
252
+ selectable: sa.Select,
252
253
  keep_existing_columns: bool = False,
254
+ regenerate_columns: Iterable[str] | None = None,
253
255
  ) -> sa.Select:
254
256
  """
255
- Return a SELECT that regenerates sys__id and sys__rand deterministically.
257
+ Return a SELECT that regenerates system columns deterministically.
256
258
 
257
- If keep_existing_columns is True, existing sys__id and sys__rand columns
258
- will be kept as-is if they exist in the input selectable.
259
- """
260
- base = selectable.subquery() if hasattr(selectable, "subquery") else selectable
261
-
262
- result_columns: dict[str, sa.ColumnElement] = {}
263
- for col in base.c:
264
- if col.name in result_columns:
265
- raise ValueError(f"Duplicate column name {col.name} in SELECT")
266
- if col.name in ("sys__id", "sys__rand"):
267
- if keep_existing_columns:
268
- result_columns[col.name] = col
269
- else:
270
- result_columns[col.name] = col
259
+ If keep_existing_columns is True, existing system columns will be kept as-is
260
+ even when they are listed in ``regenerate_columns``.
271
261
 
272
- system_types: dict[str, sa.types.TypeEngine] = {
262
+ Args:
263
+ selectable: Base SELECT
264
+ keep_existing_columns: When True, reuse existing system columns even if
265
+ they are part of the regeneration set.
266
+ regenerate_columns: Names of system columns to regenerate. Defaults to
267
+ {"sys__id", "sys__rand"}. Columns not listed are left untouched.
268
+ """
269
+ system_columns = {
273
270
  sys_col.name: sys_col.type
274
271
  for sys_col in self.schema.dataset_row_cls.sys_columns()
275
272
  }
273
+ regenerate = set(regenerate_columns or system_columns)
274
+ generators = {
275
+ "sys__id": self._system_row_number_expr,
276
+ "sys__rand": self._system_random_expr,
277
+ }
278
+
279
+ base = cast("FromClause", selectable.subquery())
280
+
281
+ def build(name: str) -> sa.ColumnElement:
282
+ expr = generators[name]()
283
+ return sa.cast(expr, system_columns[name]).label(name)
284
+
285
+ columns: list[sa.ColumnElement] = []
286
+ present: set[str] = set()
287
+ changed = False
288
+
289
+ for col in base.c:
290
+ present.add(col.name)
291
+ regen = col.name in regenerate and not keep_existing_columns
292
+ columns.append(build(col.name) if regen else col)
293
+ changed |= regen
294
+
295
+ for name in regenerate - present:
296
+ columns.append(build(name))
297
+ changed = True
298
+
299
+ if not changed:
300
+ return selectable
276
301
 
277
- # Add missing system columns if needed
278
- if "sys__id" not in result_columns:
279
- expr = self._system_row_number_expr()
280
- expr = sa.cast(expr, system_types["sys__id"])
281
- result_columns["sys__id"] = expr.label("sys__id")
282
- if "sys__rand" not in result_columns:
283
- expr = self._system_random_expr()
284
- expr = sa.cast(expr, system_types["sys__rand"])
285
- result_columns["sys__rand"] = expr.label("sys__rand")
286
-
287
- # Wrap in subquery to materialize window functions, then wrap again in SELECT
288
- # This ensures window functions are computed before INSERT...FROM SELECT
289
- columns = list(result_columns.values())
290
302
  inner = sa.select(*columns).select_from(base).subquery()
291
303
  return sa.select(*inner.c).select_from(inner)
292
304
 
@@ -950,10 +962,15 @@ class AbstractWarehouse(ABC, Serializable):
950
962
  SQLite TEMPORARY tables cannot be directly used as they are process-specific,
951
963
  and UDFs are run in other processes when run in parallel.
952
964
  """
965
+ columns = [
966
+ c
967
+ for c in columns
968
+ if c.name not in [col.name for col in self.dataset_row_cls.sys_columns()]
969
+ ]
953
970
  tbl = sa.Table(
954
971
  name or self.udf_table_name(),
955
972
  sa.MetaData(),
956
- sa.Column("sys__id", Int, primary_key=True),
973
+ *self.dataset_row_cls.sys_columns(),
957
974
  *columns,
958
975
  )
959
976
  self.db.create_table(tbl, if_not_exists=True)