datachain 0.26.0__tar.gz → 0.26.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (404) hide show
  1. {datachain-0.26.0 → datachain-0.26.2}/.pre-commit-config.yaml +1 -1
  2. {datachain-0.26.0 → datachain-0.26.2}/PKG-INFO +2 -2
  3. {datachain-0.26.0 → datachain-0.26.2}/pyproject.toml +1 -1
  4. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/catalog/loader.py +4 -0
  5. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/func/__init__.py +2 -1
  6. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/func/conditional.py +34 -0
  7. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/arrow.py +1 -1
  8. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/data_model.py +11 -1
  9. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/dc/datachain.py +102 -44
  10. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/dc/hf.py +4 -2
  11. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/hf.py +31 -10
  12. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/pytorch.py +4 -1
  13. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/signal_schema.py +9 -4
  14. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/query/dataset.py +22 -0
  15. {datachain-0.26.0 → datachain-0.26.2}/src/datachain.egg-info/PKG-INFO +2 -2
  16. {datachain-0.26.0 → datachain-0.26.2}/src/datachain.egg-info/requires.txt +1 -1
  17. {datachain-0.26.0 → datachain-0.26.2}/tests/func/functions/test_conditional.py +4 -3
  18. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_dataset_query.py +1 -1
  19. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_hf.py +6 -4
  20. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/test_datachain.py +716 -1
  21. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/test_hf.py +23 -17
  22. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/test_partition_by.py +38 -0
  23. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/test_signal_schema.py +12 -6
  24. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/sql/test_conditional.py +15 -0
  25. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_func.py +13 -0
  26. {datachain-0.26.0 → datachain-0.26.2}/.cruft.json +0 -0
  27. {datachain-0.26.0 → datachain-0.26.2}/.gitattributes +0 -0
  28. {datachain-0.26.0 → datachain-0.26.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  29. {datachain-0.26.0 → datachain-0.26.2}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  30. {datachain-0.26.0 → datachain-0.26.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  31. {datachain-0.26.0 → datachain-0.26.2}/.github/codecov.yaml +0 -0
  32. {datachain-0.26.0 → datachain-0.26.2}/.github/dependabot.yml +0 -0
  33. {datachain-0.26.0 → datachain-0.26.2}/.github/workflows/benchmarks.yml +0 -0
  34. {datachain-0.26.0 → datachain-0.26.2}/.github/workflows/release.yml +0 -0
  35. {datachain-0.26.0 → datachain-0.26.2}/.github/workflows/tests-studio.yml +0 -0
  36. {datachain-0.26.0 → datachain-0.26.2}/.github/workflows/tests.yml +0 -0
  37. {datachain-0.26.0 → datachain-0.26.2}/.github/workflows/update-template.yaml +0 -0
  38. {datachain-0.26.0 → datachain-0.26.2}/.gitignore +0 -0
  39. {datachain-0.26.0 → datachain-0.26.2}/CODE_OF_CONDUCT.rst +0 -0
  40. {datachain-0.26.0 → datachain-0.26.2}/LICENSE +0 -0
  41. {datachain-0.26.0 → datachain-0.26.2}/README.rst +0 -0
  42. {datachain-0.26.0 → datachain-0.26.2}/docs/assets/captioned_cartoons.png +0 -0
  43. {datachain-0.26.0 → datachain-0.26.2}/docs/assets/datachain-white.svg +0 -0
  44. {datachain-0.26.0 → datachain-0.26.2}/docs/assets/datachain.svg +0 -0
  45. {datachain-0.26.0 → datachain-0.26.2}/docs/commands/auth/login.md +0 -0
  46. {datachain-0.26.0 → datachain-0.26.2}/docs/commands/auth/logout.md +0 -0
  47. {datachain-0.26.0 → datachain-0.26.2}/docs/commands/auth/team.md +0 -0
  48. {datachain-0.26.0 → datachain-0.26.2}/docs/commands/auth/token.md +0 -0
  49. {datachain-0.26.0 → datachain-0.26.2}/docs/commands/index.md +0 -0
  50. {datachain-0.26.0 → datachain-0.26.2}/docs/commands/job/cancel.md +0 -0
  51. {datachain-0.26.0 → datachain-0.26.2}/docs/commands/job/clusters.md +0 -0
  52. {datachain-0.26.0 → datachain-0.26.2}/docs/commands/job/logs.md +0 -0
  53. {datachain-0.26.0 → datachain-0.26.2}/docs/commands/job/ls.md +0 -0
  54. {datachain-0.26.0 → datachain-0.26.2}/docs/commands/job/run.md +0 -0
  55. {datachain-0.26.0 → datachain-0.26.2}/docs/contributing.md +0 -0
  56. {datachain-0.26.0 → datachain-0.26.2}/docs/css/github-permalink-style.css +0 -0
  57. {datachain-0.26.0 → datachain-0.26.2}/docs/examples.md +0 -0
  58. {datachain-0.26.0 → datachain-0.26.2}/docs/guide/db_migrations.md +0 -0
  59. {datachain-0.26.0 → datachain-0.26.2}/docs/guide/delta.md +0 -0
  60. {datachain-0.26.0 → datachain-0.26.2}/docs/guide/env.md +0 -0
  61. {datachain-0.26.0 → datachain-0.26.2}/docs/guide/index.md +0 -0
  62. {datachain-0.26.0 → datachain-0.26.2}/docs/guide/namespaces.md +0 -0
  63. {datachain-0.26.0 → datachain-0.26.2}/docs/guide/processing.md +0 -0
  64. {datachain-0.26.0 → datachain-0.26.2}/docs/guide/remotes.md +0 -0
  65. {datachain-0.26.0 → datachain-0.26.2}/docs/guide/retry.md +0 -0
  66. {datachain-0.26.0 → datachain-0.26.2}/docs/index.md +0 -0
  67. {datachain-0.26.0 → datachain-0.26.2}/docs/overrides/main.html +0 -0
  68. {datachain-0.26.0 → datachain-0.26.2}/docs/quick-start.md +0 -0
  69. {datachain-0.26.0 → datachain-0.26.2}/docs/references/data-types/arrowrow.md +0 -0
  70. {datachain-0.26.0 → datachain-0.26.2}/docs/references/data-types/bbox.md +0 -0
  71. {datachain-0.26.0 → datachain-0.26.2}/docs/references/data-types/file.md +0 -0
  72. {datachain-0.26.0 → datachain-0.26.2}/docs/references/data-types/imagefile.md +0 -0
  73. {datachain-0.26.0 → datachain-0.26.2}/docs/references/data-types/index.md +0 -0
  74. {datachain-0.26.0 → datachain-0.26.2}/docs/references/data-types/pose.md +0 -0
  75. {datachain-0.26.0 → datachain-0.26.2}/docs/references/data-types/segment.md +0 -0
  76. {datachain-0.26.0 → datachain-0.26.2}/docs/references/data-types/tarvfile.md +0 -0
  77. {datachain-0.26.0 → datachain-0.26.2}/docs/references/data-types/textfile.md +0 -0
  78. {datachain-0.26.0 → datachain-0.26.2}/docs/references/data-types/videofile.md +0 -0
  79. {datachain-0.26.0 → datachain-0.26.2}/docs/references/datachain.md +0 -0
  80. {datachain-0.26.0 → datachain-0.26.2}/docs/references/func.md +0 -0
  81. {datachain-0.26.0 → datachain-0.26.2}/docs/references/index.md +0 -0
  82. {datachain-0.26.0 → datachain-0.26.2}/docs/references/toolkit.md +0 -0
  83. {datachain-0.26.0 → datachain-0.26.2}/docs/references/torch.md +0 -0
  84. {datachain-0.26.0 → datachain-0.26.2}/docs/references/udf.md +0 -0
  85. {datachain-0.26.0 → datachain-0.26.2}/docs/tutorials.md +0 -0
  86. {datachain-0.26.0 → datachain-0.26.2}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  87. {datachain-0.26.0 → datachain-0.26.2}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  88. {datachain-0.26.0 → datachain-0.26.2}/examples/computer_vision/openimage-detect.py +0 -0
  89. {datachain-0.26.0 → datachain-0.26.2}/examples/computer_vision/ultralytics-bbox.py +0 -0
  90. {datachain-0.26.0 → datachain-0.26.2}/examples/computer_vision/ultralytics-pose.py +0 -0
  91. {datachain-0.26.0 → datachain-0.26.2}/examples/computer_vision/ultralytics-segment.py +0 -0
  92. {datachain-0.26.0 → datachain-0.26.2}/examples/get_started/common_sql_functions.py +0 -0
  93. {datachain-0.26.0 → datachain-0.26.2}/examples/get_started/json-csv-reader.py +0 -0
  94. {datachain-0.26.0 → datachain-0.26.2}/examples/get_started/torch-loader.py +0 -0
  95. {datachain-0.26.0 → datachain-0.26.2}/examples/get_started/udfs/parallel.py +0 -0
  96. {datachain-0.26.0 → datachain-0.26.2}/examples/get_started/udfs/simple.py +0 -0
  97. {datachain-0.26.0 → datachain-0.26.2}/examples/get_started/udfs/stateful.py +0 -0
  98. {datachain-0.26.0 → datachain-0.26.2}/examples/incremental_processing/delta.py +0 -0
  99. {datachain-0.26.0 → datachain-0.26.2}/examples/incremental_processing/retry.py +0 -0
  100. {datachain-0.26.0 → datachain-0.26.2}/examples/incremental_processing/utils.py +0 -0
  101. {datachain-0.26.0 → datachain-0.26.2}/examples/llm_and_nlp/claude-query.py +0 -0
  102. {datachain-0.26.0 → datachain-0.26.2}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  103. {datachain-0.26.0 → datachain-0.26.2}/examples/multimodal/audio-to-text.py +0 -0
  104. {datachain-0.26.0 → datachain-0.26.2}/examples/multimodal/clip_inference.py +0 -0
  105. {datachain-0.26.0 → datachain-0.26.2}/examples/multimodal/hf_pipeline.py +0 -0
  106. {datachain-0.26.0 → datachain-0.26.2}/examples/multimodal/openai_image_desc_lib.py +0 -0
  107. {datachain-0.26.0 → datachain-0.26.2}/examples/multimodal/wds.py +0 -0
  108. {datachain-0.26.0 → datachain-0.26.2}/examples/multimodal/wds_filtered.py +0 -0
  109. {datachain-0.26.0 → datachain-0.26.2}/mkdocs.yml +0 -0
  110. {datachain-0.26.0 → datachain-0.26.2}/noxfile.py +0 -0
  111. {datachain-0.26.0 → datachain-0.26.2}/setup.cfg +0 -0
  112. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/__init__.py +0 -0
  113. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/__main__.py +0 -0
  114. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/asyn.py +0 -0
  115. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/cache.py +0 -0
  116. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/catalog/__init__.py +0 -0
  117. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/catalog/catalog.py +0 -0
  118. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/catalog/datasource.py +0 -0
  119. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/cli/__init__.py +0 -0
  120. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/cli/commands/__init__.py +0 -0
  121. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/cli/commands/datasets.py +0 -0
  122. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/cli/commands/du.py +0 -0
  123. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/cli/commands/index.py +0 -0
  124. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/cli/commands/ls.py +0 -0
  125. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/cli/commands/misc.py +0 -0
  126. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/cli/commands/query.py +0 -0
  127. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/cli/commands/show.py +0 -0
  128. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/cli/parser/__init__.py +0 -0
  129. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/cli/parser/job.py +0 -0
  130. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/cli/parser/studio.py +0 -0
  131. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/cli/parser/utils.py +0 -0
  132. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/cli/utils.py +0 -0
  133. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/client/__init__.py +0 -0
  134. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/client/azure.py +0 -0
  135. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/client/fileslice.py +0 -0
  136. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/client/fsspec.py +0 -0
  137. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/client/gcs.py +0 -0
  138. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/client/hf.py +0 -0
  139. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/client/local.py +0 -0
  140. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/client/s3.py +0 -0
  141. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/config.py +0 -0
  142. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/data_storage/__init__.py +0 -0
  143. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/data_storage/db_engine.py +0 -0
  144. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/data_storage/job.py +0 -0
  145. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/data_storage/metastore.py +0 -0
  146. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/data_storage/schema.py +0 -0
  147. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/data_storage/serializer.py +0 -0
  148. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/data_storage/sqlite.py +0 -0
  149. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/data_storage/warehouse.py +0 -0
  150. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/dataset.py +0 -0
  151. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/delta.py +0 -0
  152. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/diff/__init__.py +0 -0
  153. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/error.py +0 -0
  154. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/fs/__init__.py +0 -0
  155. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/fs/reference.py +0 -0
  156. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/fs/utils.py +0 -0
  157. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/func/aggregate.py +0 -0
  158. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/func/array.py +0 -0
  159. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/func/base.py +0 -0
  160. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/func/func.py +0 -0
  161. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/func/numeric.py +0 -0
  162. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/func/path.py +0 -0
  163. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/func/random.py +0 -0
  164. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/func/string.py +0 -0
  165. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/func/window.py +0 -0
  166. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/job.py +0 -0
  167. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/__init__.py +0 -0
  168. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/audio.py +0 -0
  169. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/clip.py +0 -0
  170. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/convert/__init__.py +0 -0
  171. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/convert/flatten.py +0 -0
  172. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/convert/python_to_sql.py +0 -0
  173. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/convert/sql_to_python.py +0 -0
  174. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/convert/unflatten.py +0 -0
  175. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  176. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/dataset_info.py +0 -0
  177. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/dc/__init__.py +0 -0
  178. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/dc/csv.py +0 -0
  179. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/dc/database.py +0 -0
  180. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/dc/datasets.py +0 -0
  181. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/dc/json.py +0 -0
  182. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/dc/listings.py +0 -0
  183. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/dc/pandas.py +0 -0
  184. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/dc/parquet.py +0 -0
  185. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/dc/records.py +0 -0
  186. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/dc/storage.py +0 -0
  187. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/dc/utils.py +0 -0
  188. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/dc/values.py +0 -0
  189. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/file.py +0 -0
  190. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/image.py +0 -0
  191. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/listing.py +0 -0
  192. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/listing_info.py +0 -0
  193. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/meta_formats.py +0 -0
  194. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/model_store.py +0 -0
  195. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/namespaces.py +0 -0
  196. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/projects.py +0 -0
  197. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/settings.py +0 -0
  198. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/tar.py +0 -0
  199. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/text.py +0 -0
  200. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/udf.py +0 -0
  201. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/udf_signature.py +0 -0
  202. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/utils.py +0 -0
  203. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/video.py +0 -0
  204. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/webdataset.py +0 -0
  205. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/lib/webdataset_laion.py +0 -0
  206. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/listing.py +0 -0
  207. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/model/__init__.py +0 -0
  208. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/model/bbox.py +0 -0
  209. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/model/pose.py +0 -0
  210. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/model/segment.py +0 -0
  211. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/model/ultralytics/__init__.py +0 -0
  212. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/model/ultralytics/bbox.py +0 -0
  213. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/model/ultralytics/pose.py +0 -0
  214. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/model/ultralytics/segment.py +0 -0
  215. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/model/utils.py +0 -0
  216. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/namespace.py +0 -0
  217. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/node.py +0 -0
  218. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/nodes_fetcher.py +0 -0
  219. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/nodes_thread_pool.py +0 -0
  220. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/progress.py +0 -0
  221. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/project.py +0 -0
  222. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/py.typed +0 -0
  223. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/query/__init__.py +0 -0
  224. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/query/batch.py +0 -0
  225. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/query/dispatch.py +0 -0
  226. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/query/metrics.py +0 -0
  227. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/query/params.py +0 -0
  228. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/query/queue.py +0 -0
  229. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/query/schema.py +0 -0
  230. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/query/session.py +0 -0
  231. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/query/udf.py +0 -0
  232. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/query/utils.py +0 -0
  233. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/remote/__init__.py +0 -0
  234. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/remote/studio.py +0 -0
  235. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/script_meta.py +0 -0
  236. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/semver.py +0 -0
  237. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/sql/__init__.py +0 -0
  238. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/sql/default/__init__.py +0 -0
  239. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/sql/default/base.py +0 -0
  240. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/sql/functions/__init__.py +0 -0
  241. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/sql/functions/aggregate.py +0 -0
  242. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/sql/functions/array.py +0 -0
  243. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/sql/functions/conditional.py +0 -0
  244. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/sql/functions/numeric.py +0 -0
  245. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/sql/functions/path.py +0 -0
  246. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/sql/functions/random.py +0 -0
  247. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/sql/functions/string.py +0 -0
  248. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/sql/selectable.py +0 -0
  249. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/sql/sqlite/__init__.py +0 -0
  250. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/sql/sqlite/base.py +0 -0
  251. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/sql/sqlite/types.py +0 -0
  252. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/sql/sqlite/vector.py +0 -0
  253. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/sql/types.py +0 -0
  254. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/sql/utils.py +0 -0
  255. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/studio.py +0 -0
  256. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/telemetry.py +0 -0
  257. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/toolkit/__init__.py +0 -0
  258. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/toolkit/split.py +0 -0
  259. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/torch/__init__.py +0 -0
  260. {datachain-0.26.0 → datachain-0.26.2}/src/datachain/utils.py +0 -0
  261. {datachain-0.26.0 → datachain-0.26.2}/src/datachain.egg-info/SOURCES.txt +0 -0
  262. {datachain-0.26.0 → datachain-0.26.2}/src/datachain.egg-info/dependency_links.txt +0 -0
  263. {datachain-0.26.0 → datachain-0.26.2}/src/datachain.egg-info/entry_points.txt +0 -0
  264. {datachain-0.26.0 → datachain-0.26.2}/src/datachain.egg-info/top_level.txt +0 -0
  265. {datachain-0.26.0 → datachain-0.26.2}/tests/__init__.py +0 -0
  266. {datachain-0.26.0 → datachain-0.26.2}/tests/benchmarks/__init__.py +0 -0
  267. {datachain-0.26.0 → datachain-0.26.2}/tests/benchmarks/conftest.py +0 -0
  268. {datachain-0.26.0 → datachain-0.26.2}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  269. {datachain-0.26.0 → datachain-0.26.2}/tests/benchmarks/datasets/.dvc/config +0 -0
  270. {datachain-0.26.0 → datachain-0.26.2}/tests/benchmarks/datasets/.gitignore +0 -0
  271. {datachain-0.26.0 → datachain-0.26.2}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  272. {datachain-0.26.0 → datachain-0.26.2}/tests/benchmarks/test_datachain.py +0 -0
  273. {datachain-0.26.0 → datachain-0.26.2}/tests/benchmarks/test_ls.py +0 -0
  274. {datachain-0.26.0 → datachain-0.26.2}/tests/benchmarks/test_version.py +0 -0
  275. {datachain-0.26.0 → datachain-0.26.2}/tests/conftest.py +0 -0
  276. {datachain-0.26.0 → datachain-0.26.2}/tests/data.py +0 -0
  277. {datachain-0.26.0 → datachain-0.26.2}/tests/examples/__init__.py +0 -0
  278. {datachain-0.26.0 → datachain-0.26.2}/tests/examples/test_examples.py +0 -0
  279. {datachain-0.26.0 → datachain-0.26.2}/tests/examples/test_wds_e2e.py +0 -0
  280. {datachain-0.26.0 → datachain-0.26.2}/tests/examples/wds_data.py +0 -0
  281. {datachain-0.26.0 → datachain-0.26.2}/tests/func/__init__.py +0 -0
  282. {datachain-0.26.0 → datachain-0.26.2}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  283. {datachain-0.26.0 → datachain-0.26.2}/tests/func/data/lena.jpg +0 -0
  284. {datachain-0.26.0 → datachain-0.26.2}/tests/func/fake-service-account-credentials.json +0 -0
  285. {datachain-0.26.0 → datachain-0.26.2}/tests/func/functions/__init__.py +0 -0
  286. {datachain-0.26.0 → datachain-0.26.2}/tests/func/functions/test_aggregate.py +0 -0
  287. {datachain-0.26.0 → datachain-0.26.2}/tests/func/functions/test_array.py +0 -0
  288. {datachain-0.26.0 → datachain-0.26.2}/tests/func/functions/test_numeric.py +0 -0
  289. {datachain-0.26.0 → datachain-0.26.2}/tests/func/functions/test_path.py +0 -0
  290. {datachain-0.26.0 → datachain-0.26.2}/tests/func/functions/test_random.py +0 -0
  291. {datachain-0.26.0 → datachain-0.26.2}/tests/func/functions/test_string.py +0 -0
  292. {datachain-0.26.0 → datachain-0.26.2}/tests/func/model/__init__.py +0 -0
  293. {datachain-0.26.0 → datachain-0.26.2}/tests/func/model/data/running-mask0.png +0 -0
  294. {datachain-0.26.0 → datachain-0.26.2}/tests/func/model/data/running-mask1.png +0 -0
  295. {datachain-0.26.0 → datachain-0.26.2}/tests/func/model/data/running.jpg +0 -0
  296. {datachain-0.26.0 → datachain-0.26.2}/tests/func/model/data/ships.jpg +0 -0
  297. {datachain-0.26.0 → datachain-0.26.2}/tests/func/model/test_yolo.py +0 -0
  298. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_audio.py +0 -0
  299. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_batching.py +0 -0
  300. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_catalog.py +0 -0
  301. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_client.py +0 -0
  302. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_cloud_transfer.py +0 -0
  303. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_data_storage.py +0 -0
  304. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_datachain.py +0 -0
  305. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_datachain_merge.py +0 -0
  306. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_datasets.py +0 -0
  307. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_delta.py +0 -0
  308. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_feature_pickling.py +0 -0
  309. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_file.py +0 -0
  310. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_hidden_field.py +0 -0
  311. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_image.py +0 -0
  312. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_listing.py +0 -0
  313. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_ls.py +0 -0
  314. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_meta_formats.py +0 -0
  315. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_metastore.py +0 -0
  316. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_metrics.py +0 -0
  317. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_pull.py +0 -0
  318. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_pytorch.py +0 -0
  319. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_query.py +0 -0
  320. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_read_database.py +0 -0
  321. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_read_dataset_remote.py +0 -0
  322. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  323. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_retry.py +0 -0
  324. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_session.py +0 -0
  325. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_toolkit.py +0 -0
  326. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_video.py +0 -0
  327. {datachain-0.26.0 → datachain-0.26.2}/tests/func/test_warehouse.py +0 -0
  328. {datachain-0.26.0 → datachain-0.26.2}/tests/scripts/feature_class.py +0 -0
  329. {datachain-0.26.0 → datachain-0.26.2}/tests/scripts/feature_class_exception.py +0 -0
  330. {datachain-0.26.0 → datachain-0.26.2}/tests/scripts/feature_class_parallel.py +0 -0
  331. {datachain-0.26.0 → datachain-0.26.2}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  332. {datachain-0.26.0 → datachain-0.26.2}/tests/scripts/name_len_slow.py +0 -0
  333. {datachain-0.26.0 → datachain-0.26.2}/tests/test_atomicity.py +0 -0
  334. {datachain-0.26.0 → datachain-0.26.2}/tests/test_cli_e2e.py +0 -0
  335. {datachain-0.26.0 → datachain-0.26.2}/tests/test_cli_studio.py +0 -0
  336. {datachain-0.26.0 → datachain-0.26.2}/tests/test_import_time.py +0 -0
  337. {datachain-0.26.0 → datachain-0.26.2}/tests/test_query_e2e.py +0 -0
  338. {datachain-0.26.0 → datachain-0.26.2}/tests/test_telemetry.py +0 -0
  339. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/__init__.py +0 -0
  340. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/__init__.py +0 -0
  341. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/conftest.py +0 -0
  342. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/test_arrow.py +0 -0
  343. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/test_audio.py +0 -0
  344. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/test_clip.py +0 -0
  345. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  346. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/test_datachain_merge.py +0 -0
  347. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/test_diff.py +0 -0
  348. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/test_feature.py +0 -0
  349. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/test_feature_utils.py +0 -0
  350. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/test_file.py +0 -0
  351. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/test_image.py +0 -0
  352. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/test_listing_info.py +0 -0
  353. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/test_namespace.py +0 -0
  354. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/test_project.py +0 -0
  355. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/test_python_to_sql.py +0 -0
  356. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/test_schema.py +0 -0
  357. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/test_sql_to_python.py +0 -0
  358. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/test_text.py +0 -0
  359. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/test_udf.py +0 -0
  360. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/test_udf_signature.py +0 -0
  361. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/test_utils.py +0 -0
  362. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/lib/test_webdataset.py +0 -0
  363. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/model/__init__.py +0 -0
  364. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/model/test_bbox.py +0 -0
  365. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/model/test_pose.py +0 -0
  366. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/model/test_segment.py +0 -0
  367. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/model/test_utils.py +0 -0
  368. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/sql/__init__.py +0 -0
  369. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/sql/sqlite/__init__.py +0 -0
  370. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/sql/sqlite/test_types.py +0 -0
  371. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/sql/sqlite/test_utils.py +0 -0
  372. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/sql/test_array.py +0 -0
  373. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/sql/test_path.py +0 -0
  374. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/sql/test_random.py +0 -0
  375. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/sql/test_selectable.py +0 -0
  376. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/sql/test_string.py +0 -0
  377. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_asyn.py +0 -0
  378. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_cache.py +0 -0
  379. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_catalog.py +0 -0
  380. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_catalog_loader.py +0 -0
  381. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_cli_parsing.py +0 -0
  382. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_client.py +0 -0
  383. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_client_gcs.py +0 -0
  384. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_client_s3.py +0 -0
  385. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_config.py +0 -0
  386. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_data_storage.py +0 -0
  387. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_database_engine.py +0 -0
  388. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_dataset.py +0 -0
  389. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_dispatch.py +0 -0
  390. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_fileslice.py +0 -0
  391. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_listing.py +0 -0
  392. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_metastore.py +0 -0
  393. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_module_exports.py +0 -0
  394. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_pytorch.py +0 -0
  395. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_query.py +0 -0
  396. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_query_metrics.py +0 -0
  397. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_query_params.py +0 -0
  398. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_script_meta.py +0 -0
  399. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_semver.py +0 -0
  400. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_serializer.py +0 -0
  401. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_session.py +0 -0
  402. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_utils.py +0 -0
  403. {datachain-0.26.0 → datachain-0.26.2}/tests/unit/test_warehouse.py +0 -0
  404. {datachain-0.26.0 → datachain-0.26.2}/tests/utils.py +0 -0
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.12.2'
27
+ rev: 'v0.12.3'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.26.0
3
+ Version: 0.26.2
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -98,7 +98,7 @@ Requires-Dist: scipy; extra == "tests"
98
98
  Requires-Dist: ultralytics; extra == "tests"
99
99
  Provides-Extra: dev
100
100
  Requires-Dist: datachain[docs,tests]; extra == "dev"
101
- Requires-Dist: mypy==1.16.1; extra == "dev"
101
+ Requires-Dist: mypy==1.17.0; extra == "dev"
102
102
  Requires-Dist: types-python-dateutil; extra == "dev"
103
103
  Requires-Dist: types-pytz; extra == "dev"
104
104
  Requires-Dist: types-PyYAML; extra == "dev"
@@ -114,7 +114,7 @@ tests = [
114
114
  ]
115
115
  dev = [
116
116
  "datachain[docs,tests]",
117
- "mypy==1.16.1",
117
+ "mypy==1.17.0",
118
118
  "types-python-dateutil",
119
119
  "types-pytz",
120
120
  "types-PyYAML",
@@ -18,6 +18,7 @@ WAREHOUSE_IMPORT_PATH = "DATACHAIN_WAREHOUSE"
18
18
  WAREHOUSE_ARG_PREFIX = "DATACHAIN_WAREHOUSE_ARG_"
19
19
  DISTRIBUTED_IMPORT_PYTHONPATH = "DATACHAIN_DISTRIBUTED_PYTHONPATH"
20
20
  DISTRIBUTED_IMPORT_PATH = "DATACHAIN_DISTRIBUTED"
21
+ DISTRIBUTED_DISABLED = "DATACHAIN_DISTRIBUTED_DISABLED"
21
22
 
22
23
  IN_MEMORY_ERROR_MESSAGE = "In-memory is only supported on SQLite"
23
24
 
@@ -103,6 +104,9 @@ def get_warehouse(in_memory: bool = False) -> "AbstractWarehouse":
103
104
 
104
105
 
105
106
  def get_udf_distributor_class() -> Optional[type["AbstractUDFDistributor"]]:
107
+ if os.environ.get(DISTRIBUTED_DISABLED) == "True":
108
+ return None
109
+
106
110
  if not (distributed_import_path := os.environ.get(DISTRIBUTED_IMPORT_PATH)):
107
111
  return None
108
112
 
@@ -16,7 +16,7 @@ from .aggregate import (
16
16
  sum,
17
17
  )
18
18
  from .array import contains, cosine_distance, euclidean_distance, length, sip_hash_64
19
- from .conditional import and_, case, greatest, ifelse, isnone, least, or_
19
+ from .conditional import and_, case, greatest, ifelse, isnone, least, not_, or_
20
20
  from .numeric import bit_and, bit_hamming_distance, bit_or, bit_xor, int_hash_64
21
21
  from .path import file_ext, file_stem, name, parent
22
22
  from .random import rand
@@ -54,6 +54,7 @@ __all__ = [
54
54
  "max",
55
55
  "min",
56
56
  "name",
57
+ "not_",
57
58
  "or_",
58
59
  "parent",
59
60
  "path",
@@ -3,6 +3,7 @@ from typing import Optional, Union
3
3
  from sqlalchemy import ColumnElement
4
4
  from sqlalchemy import and_ as sql_and
5
5
  from sqlalchemy import case as sql_case
6
+ from sqlalchemy import not_ as sql_not
6
7
  from sqlalchemy import or_ as sql_or
7
8
 
8
9
  from datachain.lib.utils import DataChainParamsError
@@ -288,3 +289,36 @@ def and_(*args: Union[ColumnElement, Func]) -> Func:
288
289
  func_args.append(arg)
289
290
 
290
291
  return Func("and", inner=sql_and, cols=cols, args=func_args, result_type=bool)
292
+
293
+
294
+ def not_(arg: Union[ColumnElement, Func]) -> Func:
295
+ """
296
+ Returns the function that produces NOT of the given expressions.
297
+
298
+ Args:
299
+ arg (ColumnElement | Func): The expression for NOT statement.
300
+ If a string is provided, it is assumed to be the name of the column.
301
+ If a Column is provided, it is assumed to be a column in the dataset.
302
+ If a Func is provided, it is assumed to be a function returning a value.
303
+
304
+ Returns:
305
+ Func: A `Func` object that represents the NOT function.
306
+
307
+ Example:
308
+ ```py
309
+ dc.mutate(
310
+ test=not_(C("value") == 5)
311
+ )
312
+ ```
313
+
314
+ Notes:
315
+ - The result column will always be of type bool.
316
+ """
317
+ cols, func_args = [], []
318
+
319
+ if isinstance(arg, (str, Func)):
320
+ cols.append(arg)
321
+ else:
322
+ func_args.append(arg)
323
+
324
+ return Func("not", inner=sql_not, cols=cols, args=func_args, result_type=bool)
@@ -262,7 +262,7 @@ def _get_hf_schema(
262
262
  from datachain.lib.hf import get_output_schema, schema_from_arrow
263
263
 
264
264
  features = schema_from_arrow(schema)
265
- return features, get_output_schema(features)
265
+ return features, get_output_schema(features)[0]
266
266
  return None
267
267
 
268
268
 
@@ -3,6 +3,7 @@ from datetime import datetime
3
3
  from typing import ClassVar, Optional, Union, get_args, get_origin
4
4
 
5
5
  from pydantic import AliasChoices, BaseModel, Field, create_model
6
+ from pydantic.fields import FieldInfo
6
7
 
7
8
  from datachain.lib.model_store import ModelStore
8
9
  from datachain.lib.utils import normalize_col_names
@@ -89,7 +90,16 @@ def dict_to_data_model(
89
90
  }
90
91
 
91
92
  class _DataModelStrict(BaseModel, extra="forbid"):
92
- pass
93
+ @classmethod
94
+ def _model_fields_by_aliases(cls) -> dict[str, tuple[str, FieldInfo]]:
95
+ """Returns a map of aliases to original field names and info."""
96
+ field_info = {}
97
+ for _name, field in cls.model_fields.items():
98
+ assert isinstance(field.validation_alias, AliasChoices)
99
+ # Add mapping for all aliases (both normalized and original names)
100
+ for alias in field.validation_alias.choices:
101
+ field_info[str(alias)] = (_name, field)
102
+ return field_info
93
103
 
94
104
  return create_model(
95
105
  name,
@@ -33,7 +33,13 @@ from datachain.func import literal
33
33
  from datachain.func.base import Function
34
34
  from datachain.func.func import Func
35
35
  from datachain.lib.convert.python_to_sql import python_to_sql
36
- from datachain.lib.data_model import DataModel, DataType, DataValue, dict_to_data_model
36
+ from datachain.lib.data_model import (
37
+ DataModel,
38
+ DataType,
39
+ DataValue,
40
+ StandardType,
41
+ dict_to_data_model,
42
+ )
37
43
  from datachain.lib.file import (
38
44
  EXPORT_FILES_MAX_THREADS,
39
45
  ArrowRow,
@@ -360,14 +366,6 @@ class DataChain:
360
366
  self._settings = settings if settings else Settings()
361
367
  return self
362
368
 
363
- def reset_schema(self, signals_schema: SignalSchema) -> "Self":
364
- self.signals_schema = signals_schema
365
- return self
366
-
367
- def add_schema(self, signals_schema: SignalSchema) -> "Self":
368
- self.signals_schema |= signals_schema
369
- return self
370
-
371
369
  @classmethod
372
370
  def from_storage(
373
371
  cls,
@@ -958,7 +956,7 @@ class DataChain:
958
956
  query_func = getattr(self._query, method_name)
959
957
 
960
958
  new_schema = self.signals_schema.resolve(*args)
961
- columns = [C(col) for col in new_schema.db_signals()]
959
+ columns = new_schema.db_signals(as_columns=True)
962
960
  return query_func(*columns, **kwargs)
963
961
 
964
962
  @resolve_columns
@@ -1445,10 +1443,6 @@ class DataChain:
1445
1443
  remove_prefetched=remove_prefetched,
1446
1444
  )
1447
1445
 
1448
- def remove_file_signals(self) -> "Self":
1449
- schema = self.signals_schema.clone_without_file_signals()
1450
- return self.select(*schema.values.keys())
1451
-
1452
1446
  @delta_disabled
1453
1447
  def merge(
1454
1448
  self,
@@ -1803,12 +1797,19 @@ class DataChain:
1803
1797
  )
1804
1798
  return read_pandas(*args, **kwargs)
1805
1799
 
1806
- def to_pandas(self, flatten=False, include_hidden=True) -> "pd.DataFrame":
1800
+ def to_pandas(
1801
+ self,
1802
+ flatten: bool = False,
1803
+ include_hidden: bool = True,
1804
+ ) -> "pd.DataFrame":
1807
1805
  """Return a pandas DataFrame from the chain.
1808
1806
 
1809
1807
  Parameters:
1810
- flatten : Whether to use a multiindex or flatten column names.
1811
- include_hidden : Whether to include hidden columns.
1808
+ flatten: Whether to use a multiindex or flatten column names.
1809
+ include_hidden: Whether to include hidden columns.
1810
+
1811
+ Returns:
1812
+ pd.DataFrame: A pandas DataFrame representation of the chain.
1812
1813
  """
1813
1814
  import pandas as pd
1814
1815
 
@@ -1826,19 +1827,19 @@ class DataChain:
1826
1827
  def show(
1827
1828
  self,
1828
1829
  limit: int = 20,
1829
- flatten=False,
1830
- transpose=False,
1831
- truncate=True,
1832
- include_hidden=False,
1830
+ flatten: bool = False,
1831
+ transpose: bool = False,
1832
+ truncate: bool = True,
1833
+ include_hidden: bool = False,
1833
1834
  ) -> None:
1834
1835
  """Show a preview of the chain results.
1835
1836
 
1836
1837
  Parameters:
1837
- limit : How many rows to show.
1838
- flatten : Whether to use a multiindex or flatten column names.
1839
- transpose : Whether to transpose rows and columns.
1840
- truncate : Whether or not to truncate the contents of columns.
1841
- include_hidden : Whether to include hidden columns.
1838
+ limit: How many rows to show.
1839
+ flatten: Whether to use a multiindex or flatten column names.
1840
+ transpose: Whether to transpose rows and columns.
1841
+ truncate: Whether or not to truncate the contents of columns.
1842
+ include_hidden: Whether to include hidden columns.
1842
1843
  """
1843
1844
  import pandas as pd
1844
1845
 
@@ -2268,21 +2269,73 @@ class DataChain:
2268
2269
  )
2269
2270
  return read_records(*args, **kwargs)
2270
2271
 
2271
- def sum(self, fr: DataType): # type: ignore[override]
2272
- """Compute the sum of a column."""
2273
- return self._extend_to_data_model("sum", fr)
2272
+ def sum(self, col: str) -> StandardType: # type: ignore[override]
2273
+ """Compute the sum of a column.
2274
+
2275
+ Parameters:
2276
+ col: The column to compute the sum for.
2277
+
2278
+ Returns:
2279
+ The sum of the column values.
2280
+
2281
+ Example:
2282
+ ```py
2283
+ total_size = chain.sum("file.size")
2284
+ print(f"Total size: {total_size}")
2285
+ ```
2286
+ """
2287
+ return self._extend_to_data_model("sum", col)
2288
+
2289
+ def avg(self, col: str) -> StandardType: # type: ignore[override]
2290
+ """Compute the average of a column.
2291
+
2292
+ Parameters:
2293
+ col: The column to compute the average for.
2294
+
2295
+ Returns:
2296
+ The average of the column values.
2297
+
2298
+ Example:
2299
+ ```py
2300
+ average_size = chain.avg("file.size")
2301
+ print(f"Average size: {average_size}")
2302
+ ```
2303
+ """
2304
+ return self._extend_to_data_model("avg", col)
2305
+
2306
+ def min(self, col: str) -> StandardType: # type: ignore[override]
2307
+ """Compute the minimum of a column.
2308
+
2309
+ Parameters:
2310
+ col: The column to compute the minimum for.
2311
+
2312
+ Returns:
2313
+ The minimum value in the column.
2314
+
2315
+ Example:
2316
+ ```py
2317
+ min_size = chain.min("file.size")
2318
+ print(f"Minimum size: {min_size}")
2319
+ ```
2320
+ """
2321
+ return self._extend_to_data_model("min", col)
2322
+
2323
+ def max(self, col: str) -> StandardType: # type: ignore[override]
2324
+ """Compute the maximum of a column.
2274
2325
 
2275
- def avg(self, fr: DataType): # type: ignore[override]
2276
- """Compute the average of a column."""
2277
- return self._extend_to_data_model("avg", fr)
2326
+ Parameters:
2327
+ col: The column to compute the maximum for.
2278
2328
 
2279
- def min(self, fr: DataType): # type: ignore[override]
2280
- """Compute the minimum of a column."""
2281
- return self._extend_to_data_model("min", fr)
2329
+ Returns:
2330
+ The maximum value in the column.
2282
2331
 
2283
- def max(self, fr: DataType): # type: ignore[override]
2284
- """Compute the maximum of a column."""
2285
- return self._extend_to_data_model("max", fr)
2332
+ Example:
2333
+ ```py
2334
+ max_size = chain.max("file.size")
2335
+ print(f"Maximum size: {max_size}")
2336
+ ```
2337
+ """
2338
+ return self._extend_to_data_model("max", col)
2286
2339
 
2287
2340
  def setup(self, **kwargs) -> "Self":
2288
2341
  """Setup variables to pass to UDF functions.
@@ -2393,14 +2446,15 @@ class DataChain:
2393
2446
  """Shuffle the rows of the chain deterministically."""
2394
2447
  return self.order_by("sys.rand")
2395
2448
 
2396
- def sample(self, n) -> "Self":
2449
+ def sample(self, n: int) -> "Self":
2397
2450
  """Return a random sample from the chain.
2398
2451
 
2399
2452
  Parameters:
2400
- n (int): Number of samples to draw.
2453
+ n: Number of samples to draw.
2401
2454
 
2402
- NOTE: Samples are not deterministic, and streamed/paginated queries or
2403
- multiple workers will draw samples with replacement.
2455
+ Note:
2456
+ Samples are not deterministic, and streamed/paginated queries or
2457
+ multiple workers will draw samples with replacement.
2404
2458
  """
2405
2459
  return self._evolve(query=self._query.sample(n))
2406
2460
 
@@ -2507,6 +2561,10 @@ class DataChain:
2507
2561
  def chunk(self, index: int, total: int) -> "Self":
2508
2562
  """Split a chain into smaller chunks for e.g. parallelization.
2509
2563
 
2564
+ Parameters:
2565
+ index: The index of the chunk (0-indexed).
2566
+ total: The total number of chunks.
2567
+
2510
2568
  Example:
2511
2569
  ```py
2512
2570
  import datachain as dc
@@ -2526,7 +2584,7 @@ class DataChain:
2526
2584
  """Returns a list of rows of values, optionally limited to the specified
2527
2585
  columns.
2528
2586
 
2529
- Args:
2587
+ Parameters:
2530
2588
  *cols: Limit to the specified columns. By default, all columns are selected.
2531
2589
 
2532
2590
  Returns:
@@ -2556,7 +2614,7 @@ class DataChain:
2556
2614
  def to_values(self, col: str) -> list[DataValue]:
2557
2615
  """Returns a flat list of values from a single column.
2558
2616
 
2559
- Args:
2617
+ Parameters:
2560
2618
  col: The name of the column to extract values from.
2561
2619
 
2562
2620
  Returns:
@@ -32,6 +32,7 @@ def read_hf(
32
32
  Parameters:
33
33
  dataset : Path or name of the dataset to read from Hugging Face Hub,
34
34
  or an instance of `datasets.Dataset`-like object.
35
+ args : Additional positional arguments to pass to datasets.load_dataset.
35
36
  session : Session to use for the chain.
36
37
  settings : Settings to use for the chain.
37
38
  column : Generated object column name.
@@ -64,8 +65,9 @@ def read_hf(
64
65
 
65
66
  model_name = model_name or column or ""
66
67
  hf_features = next(iter(ds_dict.values())).features
67
- output = output | get_output_schema(hf_features)
68
- model = dict_to_data_model(model_name, output)
68
+ hf_output, normalized_names = get_output_schema(hf_features, list(output.keys()))
69
+ output = output | hf_output
70
+ model = dict_to_data_model(model_name, output, list(normalized_names.values()))
69
71
  if column:
70
72
  output = {column: model}
71
73
 
@@ -26,7 +26,7 @@ except ImportError as exc:
26
26
  ) from exc
27
27
 
28
28
  from io import BytesIO
29
- from typing import TYPE_CHECKING, Any, Union
29
+ from typing import TYPE_CHECKING, Any, Optional, Union
30
30
 
31
31
  import PIL
32
32
  from tqdm.auto import tqdm
@@ -34,6 +34,7 @@ from tqdm.auto import tqdm
34
34
  from datachain.lib.arrow import arrow_type_mapper
35
35
  from datachain.lib.data_model import DataModel, DataType, dict_to_data_model
36
36
  from datachain.lib.udf import Generator
37
+ from datachain.lib.utils import normalize_col_names
37
38
 
38
39
  if TYPE_CHECKING:
39
40
  import pyarrow as pa
@@ -94,14 +95,18 @@ class HFGenerator(Generator):
94
95
  ds = self.ds_dict[split]
95
96
  if split:
96
97
  desc += f" split '{split}'"
98
+ model_fields = self.output_schema._model_fields_by_aliases() # type: ignore[attr-defined]
97
99
  with tqdm(desc=desc, unit=" rows", leave=False) as pbar:
98
100
  for row in ds:
99
101
  output_dict = {}
100
102
  if split and "split" in self.output_schema.model_fields:
101
103
  output_dict["split"] = split
102
104
  for name, feat in ds.features.items():
103
- anno = self.output_schema.model_fields[name].annotation
104
- output_dict[name] = convert_feature(row[name], feat, anno)
105
+ normalized_name, info = model_fields[name]
106
+ anno = info.annotation
107
+ output_dict[normalized_name] = convert_feature(
108
+ row[name], feat, anno
109
+ )
105
110
  yield self.output_schema(**output_dict)
106
111
  pbar.update(1)
107
112
 
@@ -122,10 +127,12 @@ def convert_feature(val: Any, feat: Any, anno: Any) -> Any:
122
127
  return HFClassLabel(string=feat.names[val], integer=val)
123
128
  if isinstance(feat, dict):
124
129
  sdict = {}
130
+ model_fields = anno._model_fields_by_aliases() # type: ignore[attr-defined]
125
131
  for sname in val:
126
132
  sfeat = feat[sname]
127
- sanno = anno.model_fields[sname].annotation
128
- sdict[sname] = [convert_feature(v, sfeat, sanno) for v in val[sname]]
133
+ norm_name, info = model_fields[sname]
134
+ sanno = info.annotation
135
+ sdict[norm_name] = [convert_feature(v, sfeat, sanno) for v in val[sname]]
129
136
  return anno(**sdict)
130
137
  if isinstance(feat, Image):
131
138
  if isinstance(val, dict):
@@ -135,12 +142,26 @@ def convert_feature(val: Any, feat: Any, anno: Any) -> Any:
135
142
  return HFAudio(array=val["array"], sampling_rate=val["sampling_rate"])
136
143
 
137
144
 
138
- def get_output_schema(features: Features) -> dict[str, DataType]:
139
- """Generate UDF output schema from huggingface datasets features."""
145
+ def get_output_schema(
146
+ features: Features, existing_column_names: Optional[list[str]] = None
147
+ ) -> tuple[dict[str, DataType], dict[str, str]]:
148
+ """
149
+ Generate UDF output schema from Hugging Face datasets features. It normalizes the
150
+ column names and returns a mapping of normalized names to original names along with
151
+ the data types. `existing_column_names` is the list of column names that already
152
+ exist in the dataset (to avoid name collisions due to normalization).
153
+ """
154
+ existing_column_names = existing_column_names or []
140
155
  fields_dict = {}
141
- for name, val in features.items():
142
- fields_dict[name] = _feature_to_chain_type(name, val)
143
- return fields_dict
156
+ normalized_names = normalize_col_names(
157
+ existing_column_names + list(features.keys())
158
+ )
159
+ # List of tuple(str, str) for HF dataset feature names, (normalized, original)
160
+ new_feature_names = list(normalized_names.items())[len(existing_column_names) :]
161
+ for idx, feat in enumerate(features.items()):
162
+ name, val = feat
163
+ fields_dict[new_feature_names[idx][0]] = _feature_to_chain_type(name, val)
164
+ return fields_dict, normalized_names
144
165
 
145
166
 
146
167
  def _feature_to_chain_type(name: str, val: Any) -> DataType: # noqa: PLR0911
@@ -125,7 +125,10 @@ class PytorchDataset(IterableDataset):
125
125
  ds = read_dataset(
126
126
  name=self.name, version=self.version, session=session
127
127
  ).settings(cache=self.cache, prefetch=self.prefetch)
128
- ds = ds.remove_file_signals()
128
+
129
+ # remove file signals from dataset
130
+ schema = ds.signals_schema.clone_without_file_signals()
131
+ ds = ds.select(*schema.values.keys())
129
132
 
130
133
  if self.num_samples > 0:
131
134
  ds = ds.sample(self.num_samples)
@@ -610,20 +610,25 @@ class SignalSchema:
610
610
  return SignalSchema(schema)
611
611
 
612
612
  def _find_in_tree(self, path: list[str]) -> DataType:
613
+ if val := self.tree.get(".".join(path)):
614
+ # If the path is a single string, we can directly access it
615
+ # without traversing the tree.
616
+ return val[0]
617
+
613
618
  curr_tree = self.tree
614
619
  curr_type = None
615
620
  i = 0
616
621
  while curr_tree is not None and i < len(path):
617
622
  if val := curr_tree.get(path[i]):
618
623
  curr_type, curr_tree = val
619
- elif i == 0 and len(path) > 1 and (val := curr_tree.get(".".join(path))):
620
- curr_type, curr_tree = val
621
- break
622
624
  else:
623
625
  curr_type = None
626
+ break
624
627
  i += 1
625
628
 
626
- if curr_type is None:
629
+ if curr_type is None or i < len(path):
630
+ # If we reached the end of the path and didn't find a type,
631
+ # or if we didn't traverse the entire path, raise an error.
627
632
  raise SignalResolvingError(path, "is not found")
628
633
 
629
634
  return curr_type
@@ -559,7 +559,13 @@ class UDFStep(Step, ABC):
559
559
  """
560
560
  Create temporary table with group by partitions.
561
561
  """
562
+ # Check if partition_by is set, we need it to create partitions.
562
563
  assert self.partition_by is not None
564
+ # Check if sys__id is in the query, we need it to be able to join
565
+ # the partition table with the udf table later.
566
+ assert any(c.name == "sys__id" for c in query.selected_columns), (
567
+ "Query must have sys__id column to use partitioning."
568
+ )
563
569
 
564
570
  if isinstance(self.partition_by, (list, tuple, GeneratorType)):
565
571
  list_partition_by = list(self.partition_by)
@@ -606,6 +612,22 @@ class UDFStep(Step, ABC):
606
612
 
607
613
  # Apply partitioning if needed.
608
614
  if self.partition_by is not None:
615
+ if not any(c.name == "sys__id" for c in query.selected_columns):
616
+ # If sys__id is not in the query, we need to create a temp table
617
+ # to hold the query results, so we can join it with the
618
+ # partition table later.
619
+ columns = [
620
+ c if isinstance(c, Column) else Column(c.name, c.type)
621
+ for c in query.subquery().columns
622
+ ]
623
+ temp_table = self.catalog.warehouse.create_dataset_rows_table(
624
+ self.catalog.warehouse.temp_table_name(),
625
+ columns=columns,
626
+ )
627
+ temp_tables.append(temp_table.name)
628
+ self.catalog.warehouse.copy_table(temp_table, query)
629
+ _query = query = temp_table.select()
630
+
609
631
  partition_tbl = self.create_partitions_table(query)
610
632
  temp_tables.append(partition_tbl.name)
611
633
  query = query.outerjoin(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.26.0
3
+ Version: 0.26.2
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -98,7 +98,7 @@ Requires-Dist: scipy; extra == "tests"
98
98
  Requires-Dist: ultralytics; extra == "tests"
99
99
  Provides-Extra: dev
100
100
  Requires-Dist: datachain[docs,tests]; extra == "dev"
101
- Requires-Dist: mypy==1.16.1; extra == "dev"
101
+ Requires-Dist: mypy==1.17.0; extra == "dev"
102
102
  Requires-Dist: types-python-dateutil; extra == "dev"
103
103
  Requires-Dist: types-pytz; extra == "dev"
104
104
  Requires-Dist: types-PyYAML; extra == "dev"
@@ -41,7 +41,7 @@ soundfile
41
41
 
42
42
  [dev]
43
43
  datachain[docs,tests]
44
- mypy==1.16.1
44
+ mypy==1.17.0
45
45
  types-python-dateutil
46
46
  types-pytz
47
47
  types-PyYAML
@@ -5,7 +5,7 @@ from datachain import func
5
5
  from tests.utils import skip_if_not_sqlite
6
6
 
7
7
 
8
- def test_conditional_and_or(test_session):
8
+ def test_conditional_and_or_not(test_session):
9
9
  class Data(dc.DataModel):
10
10
  i: int
11
11
  f: float
@@ -25,11 +25,12 @@ def test_conditional_and_or(test_session):
25
25
  t2=func.and_(dc.C("data.i") > 15, dc.C("data.f") > 2.5),
26
26
  t3=func.or_(dc.C("data.i") > 15, dc.C("data.f") > 1.5),
27
27
  t4=func.or_(dc.C("data.i") > 15, dc.C("data.f") > 2.5),
28
+ t5=func.not_(dc.C("data.i") > 15),
28
29
  )
29
30
  .order_by("id")
30
- ).to_list("t1", "t2", "t3", "t4")
31
+ ).to_list("t1", "t2", "t3", "t4", "t5")
31
32
 
32
- assert ds == [(0, 0, 0, 0), (1, 0, 1, 1), (1, 1, 1, 1)]
33
+ assert ds == [(0, 0, 0, 0, 1), (1, 0, 1, 1, 0), (1, 1, 1, 1, 0)]
33
34
 
34
35
 
35
36
  def test_conditional_case(test_session):
@@ -227,7 +227,7 @@ def test_select_missing_column(cloud_test_catalog, animal_dataset):
227
227
  ds1 = ds.select(C.missing_column_name)
228
228
  ds2 = ds.select("missing_column_name")
229
229
  # The exception type varies by database backend
230
- exc1 = pytest.raises(Exception, ds1.db_results)
230
+ exc1 = pytest.raises(Exception, ds1.db_results) # noqa: B017
231
231
  assert "missing_column_name" in str(exc1.value)
232
232
  exc2 = pytest.raises(KeyError, ds2.db_results)
233
233
  assert "missing_column_name" in str(exc2.value)
@@ -34,10 +34,11 @@ def test_hf_image(tmp_path):
34
34
  img.save(train_dir / "img1.png")
35
35
 
36
36
  ds = load_dataset("imagefolder", data_dir=tmp_path)
37
- schema = {"split": str} | get_output_schema(ds["train"].features)
37
+ hf_schema, norm_names = get_output_schema(ds["train"].features, ["split"])
38
+ schema = {"split": str} | hf_schema
38
39
  assert schema["image"] is HFImage
39
40
 
40
- gen = HFGenerator(ds, dict_to_data_model("", schema))
41
+ gen = HFGenerator(ds, dict_to_data_model("", schema, list(norm_names.values())))
41
42
  gen.setup()
42
43
  row = next(iter(gen.process("train")))
43
44
  assert row.image.img == image_to_bytes(img)
@@ -56,9 +57,10 @@ def test_hf_audio(tmp_path):
56
57
  write(train_dir / "example.wav", samplerate, data.astype(np.int16))
57
58
 
58
59
  ds = load_dataset("audiofolder", data_dir=tmp_path)
59
- schema = {"split": str} | get_output_schema(ds["train"].features)
60
+ hf_schema, norm_names = get_output_schema(ds["train"].features, ["split"])
61
+ schema = {"split": str} | hf_schema
60
62
 
61
- gen = HFGenerator(ds, dict_to_data_model("", schema))
63
+ gen = HFGenerator(ds, dict_to_data_model("", schema, list(norm_names.values())))
62
64
  gen.setup()
63
65
  row = next(iter(gen.process("train")))
64
66
  assert np.allclose(row.audio.array, data / amplitude, atol=1e-4)