datachain 0.30.2__tar.gz → 0.30.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (418) hide show
  1. {datachain-0.30.2 → datachain-0.30.3}/.github/workflows/benchmarks.yml +1 -1
  2. {datachain-0.30.2 → datachain-0.30.3}/.github/workflows/release.yml +1 -1
  3. {datachain-0.30.2 → datachain-0.30.3}/.github/workflows/tests-studio.yml +2 -2
  4. {datachain-0.30.2 → datachain-0.30.3}/.github/workflows/tests.yml +19 -3
  5. {datachain-0.30.2 → datachain-0.30.3}/.github/workflows/update-template.yaml +1 -1
  6. {datachain-0.30.2 → datachain-0.30.3}/.pre-commit-config.yaml +1 -1
  7. {datachain-0.30.2 → datachain-0.30.3}/PKG-INFO +2 -2
  8. datachain-0.30.3/docs/references/func.md +38 -0
  9. datachain-0.30.3/docs/references/functions/aggregate.md +5 -0
  10. datachain-0.30.3/docs/references/functions/array.md +5 -0
  11. datachain-0.30.3/docs/references/functions/conditional.md +5 -0
  12. datachain-0.30.3/docs/references/functions/numeric.md +5 -0
  13. datachain-0.30.3/docs/references/functions/path.md +5 -0
  14. datachain-0.30.3/docs/references/functions/random.md +5 -0
  15. datachain-0.30.3/docs/references/functions/string.md +22 -0
  16. datachain-0.30.3/docs/references/functions/window.md +5 -0
  17. {datachain-0.30.2 → datachain-0.30.3}/examples/multimodal/audio-to-text.py +4 -1
  18. {datachain-0.30.2 → datachain-0.30.3}/mkdocs.yml +11 -2
  19. {datachain-0.30.2 → datachain-0.30.3}/pyproject.toml +1 -1
  20. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/catalog/catalog.py +86 -29
  21. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/cli/commands/datasets.py +3 -2
  22. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/data_storage/metastore.py +34 -9
  23. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/delta.py +23 -12
  24. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/func/string.py +8 -0
  25. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/dc/database.py +50 -6
  26. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/dc/datachain.py +31 -9
  27. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/dc/datasets.py +9 -4
  28. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/listing.py +5 -9
  29. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/model/ultralytics/bbox.py +14 -12
  30. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/model/ultralytics/pose.py +14 -12
  31. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/model/ultralytics/segment.py +14 -12
  32. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/query/dataset.py +20 -10
  33. {datachain-0.30.2 → datachain-0.30.3}/src/datachain.egg-info/PKG-INFO +2 -2
  34. {datachain-0.30.2 → datachain-0.30.3}/src/datachain.egg-info/SOURCES.txt +8 -0
  35. {datachain-0.30.2 → datachain-0.30.3}/src/datachain.egg-info/requires.txt +1 -1
  36. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_datasets.py +11 -18
  37. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_pull.py +14 -4
  38. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_to_database.py +62 -5
  39. datachain-0.30.2/docs/references/func.md +0 -5
  40. {datachain-0.30.2 → datachain-0.30.3}/.cruft.json +0 -0
  41. {datachain-0.30.2 → datachain-0.30.3}/.gitattributes +0 -0
  42. {datachain-0.30.2 → datachain-0.30.3}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  43. {datachain-0.30.2 → datachain-0.30.3}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  44. {datachain-0.30.2 → datachain-0.30.3}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  45. {datachain-0.30.2 → datachain-0.30.3}/.github/codecov.yaml +0 -0
  46. {datachain-0.30.2 → datachain-0.30.3}/.github/dependabot.yml +0 -0
  47. {datachain-0.30.2 → datachain-0.30.3}/.gitignore +0 -0
  48. {datachain-0.30.2 → datachain-0.30.3}/CODE_OF_CONDUCT.rst +0 -0
  49. {datachain-0.30.2 → datachain-0.30.3}/LICENSE +0 -0
  50. {datachain-0.30.2 → datachain-0.30.3}/README.rst +0 -0
  51. {datachain-0.30.2 → datachain-0.30.3}/docs/assets/captioned_cartoons.png +0 -0
  52. {datachain-0.30.2 → datachain-0.30.3}/docs/assets/datachain-white.svg +0 -0
  53. {datachain-0.30.2 → datachain-0.30.3}/docs/assets/datachain.svg +0 -0
  54. {datachain-0.30.2 → datachain-0.30.3}/docs/commands/auth/login.md +0 -0
  55. {datachain-0.30.2 → datachain-0.30.3}/docs/commands/auth/logout.md +0 -0
  56. {datachain-0.30.2 → datachain-0.30.3}/docs/commands/auth/team.md +0 -0
  57. {datachain-0.30.2 → datachain-0.30.3}/docs/commands/auth/token.md +0 -0
  58. {datachain-0.30.2 → datachain-0.30.3}/docs/commands/index.md +0 -0
  59. {datachain-0.30.2 → datachain-0.30.3}/docs/commands/job/cancel.md +0 -0
  60. {datachain-0.30.2 → datachain-0.30.3}/docs/commands/job/clusters.md +0 -0
  61. {datachain-0.30.2 → datachain-0.30.3}/docs/commands/job/logs.md +0 -0
  62. {datachain-0.30.2 → datachain-0.30.3}/docs/commands/job/ls.md +0 -0
  63. {datachain-0.30.2 → datachain-0.30.3}/docs/commands/job/run.md +0 -0
  64. {datachain-0.30.2 → datachain-0.30.3}/docs/contributing.md +0 -0
  65. {datachain-0.30.2 → datachain-0.30.3}/docs/css/github-permalink-style.css +0 -0
  66. {datachain-0.30.2 → datachain-0.30.3}/docs/examples.md +0 -0
  67. {datachain-0.30.2 → datachain-0.30.3}/docs/guide/db_migrations.md +0 -0
  68. {datachain-0.30.2 → datachain-0.30.3}/docs/guide/delta.md +0 -0
  69. {datachain-0.30.2 → datachain-0.30.3}/docs/guide/env.md +0 -0
  70. {datachain-0.30.2 → datachain-0.30.3}/docs/guide/index.md +0 -0
  71. {datachain-0.30.2 → datachain-0.30.3}/docs/guide/namespaces.md +0 -0
  72. {datachain-0.30.2 → datachain-0.30.3}/docs/guide/processing.md +0 -0
  73. {datachain-0.30.2 → datachain-0.30.3}/docs/guide/remotes.md +0 -0
  74. {datachain-0.30.2 → datachain-0.30.3}/docs/guide/retry.md +0 -0
  75. {datachain-0.30.2 → datachain-0.30.3}/docs/index.md +0 -0
  76. {datachain-0.30.2 → datachain-0.30.3}/docs/overrides/main.html +0 -0
  77. {datachain-0.30.2 → datachain-0.30.3}/docs/quick-start.md +0 -0
  78. {datachain-0.30.2 → datachain-0.30.3}/docs/references/data-types/arrowrow.md +0 -0
  79. {datachain-0.30.2 → datachain-0.30.3}/docs/references/data-types/bbox.md +0 -0
  80. {datachain-0.30.2 → datachain-0.30.3}/docs/references/data-types/file.md +0 -0
  81. {datachain-0.30.2 → datachain-0.30.3}/docs/references/data-types/imagefile.md +0 -0
  82. {datachain-0.30.2 → datachain-0.30.3}/docs/references/data-types/index.md +0 -0
  83. {datachain-0.30.2 → datachain-0.30.3}/docs/references/data-types/pose.md +0 -0
  84. {datachain-0.30.2 → datachain-0.30.3}/docs/references/data-types/segment.md +0 -0
  85. {datachain-0.30.2 → datachain-0.30.3}/docs/references/data-types/tarvfile.md +0 -0
  86. {datachain-0.30.2 → datachain-0.30.3}/docs/references/data-types/textfile.md +0 -0
  87. {datachain-0.30.2 → datachain-0.30.3}/docs/references/data-types/videofile.md +0 -0
  88. {datachain-0.30.2 → datachain-0.30.3}/docs/references/datachain.md +0 -0
  89. {datachain-0.30.2 → datachain-0.30.3}/docs/references/index.md +0 -0
  90. {datachain-0.30.2 → datachain-0.30.3}/docs/references/toolkit.md +0 -0
  91. {datachain-0.30.2 → datachain-0.30.3}/docs/references/torch.md +0 -0
  92. {datachain-0.30.2 → datachain-0.30.3}/docs/references/udf.md +0 -0
  93. {datachain-0.30.2 → datachain-0.30.3}/docs/tutorials.md +0 -0
  94. {datachain-0.30.2 → datachain-0.30.3}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  95. {datachain-0.30.2 → datachain-0.30.3}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  96. {datachain-0.30.2 → datachain-0.30.3}/examples/computer_vision/openimage-detect.py +0 -0
  97. {datachain-0.30.2 → datachain-0.30.3}/examples/computer_vision/ultralytics-bbox.py +0 -0
  98. {datachain-0.30.2 → datachain-0.30.3}/examples/computer_vision/ultralytics-pose.py +0 -0
  99. {datachain-0.30.2 → datachain-0.30.3}/examples/computer_vision/ultralytics-segment.py +0 -0
  100. {datachain-0.30.2 → datachain-0.30.3}/examples/get_started/common_sql_functions.py +0 -0
  101. {datachain-0.30.2 → datachain-0.30.3}/examples/get_started/json-csv-reader.py +0 -0
  102. {datachain-0.30.2 → datachain-0.30.3}/examples/get_started/torch-loader.py +0 -0
  103. {datachain-0.30.2 → datachain-0.30.3}/examples/get_started/udfs/parallel.py +0 -0
  104. {datachain-0.30.2 → datachain-0.30.3}/examples/get_started/udfs/simple.py +0 -0
  105. {datachain-0.30.2 → datachain-0.30.3}/examples/get_started/udfs/stateful.py +0 -0
  106. {datachain-0.30.2 → datachain-0.30.3}/examples/incremental_processing/delta.py +0 -0
  107. {datachain-0.30.2 → datachain-0.30.3}/examples/incremental_processing/retry.py +0 -0
  108. {datachain-0.30.2 → datachain-0.30.3}/examples/incremental_processing/utils.py +0 -0
  109. {datachain-0.30.2 → datachain-0.30.3}/examples/llm_and_nlp/claude-query.py +0 -0
  110. {datachain-0.30.2 → datachain-0.30.3}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  111. {datachain-0.30.2 → datachain-0.30.3}/examples/multimodal/clip_inference.py +0 -0
  112. {datachain-0.30.2 → datachain-0.30.3}/examples/multimodal/hf_pipeline.py +0 -0
  113. {datachain-0.30.2 → datachain-0.30.3}/examples/multimodal/openai_image_desc_lib.py +0 -0
  114. {datachain-0.30.2 → datachain-0.30.3}/examples/multimodal/wds.py +0 -0
  115. {datachain-0.30.2 → datachain-0.30.3}/examples/multimodal/wds_filtered.py +0 -0
  116. {datachain-0.30.2 → datachain-0.30.3}/noxfile.py +0 -0
  117. {datachain-0.30.2 → datachain-0.30.3}/setup.cfg +0 -0
  118. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/__init__.py +0 -0
  119. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/__main__.py +0 -0
  120. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/asyn.py +0 -0
  121. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/cache.py +0 -0
  122. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/catalog/__init__.py +0 -0
  123. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/catalog/datasource.py +0 -0
  124. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/catalog/loader.py +0 -0
  125. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/cli/__init__.py +0 -0
  126. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/cli/commands/__init__.py +0 -0
  127. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/cli/commands/du.py +0 -0
  128. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/cli/commands/index.py +0 -0
  129. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/cli/commands/ls.py +0 -0
  130. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/cli/commands/misc.py +0 -0
  131. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/cli/commands/query.py +0 -0
  132. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/cli/commands/show.py +0 -0
  133. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/cli/parser/__init__.py +0 -0
  134. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/cli/parser/job.py +0 -0
  135. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/cli/parser/studio.py +0 -0
  136. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/cli/parser/utils.py +0 -0
  137. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/cli/utils.py +0 -0
  138. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/client/__init__.py +0 -0
  139. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/client/azure.py +0 -0
  140. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/client/fileslice.py +0 -0
  141. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/client/fsspec.py +0 -0
  142. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/client/gcs.py +0 -0
  143. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/client/hf.py +0 -0
  144. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/client/local.py +0 -0
  145. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/client/s3.py +0 -0
  146. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/config.py +0 -0
  147. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/data_storage/__init__.py +0 -0
  148. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/data_storage/db_engine.py +0 -0
  149. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/data_storage/job.py +0 -0
  150. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/data_storage/schema.py +0 -0
  151. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/data_storage/serializer.py +0 -0
  152. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/data_storage/sqlite.py +0 -0
  153. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/data_storage/warehouse.py +0 -0
  154. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/dataset.py +0 -0
  155. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/diff/__init__.py +0 -0
  156. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/error.py +0 -0
  157. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/fs/__init__.py +0 -0
  158. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/fs/reference.py +0 -0
  159. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/fs/utils.py +0 -0
  160. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/func/__init__.py +0 -0
  161. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/func/aggregate.py +0 -0
  162. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/func/array.py +0 -0
  163. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/func/base.py +0 -0
  164. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/func/conditional.py +0 -0
  165. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/func/func.py +0 -0
  166. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/func/numeric.py +0 -0
  167. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/func/path.py +0 -0
  168. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/func/random.py +0 -0
  169. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/func/window.py +0 -0
  170. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/job.py +0 -0
  171. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/__init__.py +0 -0
  172. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/arrow.py +0 -0
  173. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/audio.py +0 -0
  174. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/clip.py +0 -0
  175. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/convert/__init__.py +0 -0
  176. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/convert/flatten.py +0 -0
  177. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/convert/python_to_sql.py +0 -0
  178. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/convert/sql_to_python.py +0 -0
  179. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/convert/unflatten.py +0 -0
  180. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  181. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/data_model.py +0 -0
  182. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/dataset_info.py +0 -0
  183. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/dc/__init__.py +0 -0
  184. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/dc/csv.py +0 -0
  185. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/dc/hf.py +0 -0
  186. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/dc/json.py +0 -0
  187. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/dc/listings.py +0 -0
  188. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/dc/pandas.py +0 -0
  189. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/dc/parquet.py +0 -0
  190. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/dc/records.py +0 -0
  191. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/dc/storage.py +0 -0
  192. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/dc/utils.py +0 -0
  193. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/dc/values.py +0 -0
  194. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/file.py +0 -0
  195. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/hf.py +0 -0
  196. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/image.py +0 -0
  197. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/listing.py +0 -0
  198. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/listing_info.py +0 -0
  199. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/meta_formats.py +0 -0
  200. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/model_store.py +0 -0
  201. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/namespaces.py +0 -0
  202. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/projects.py +0 -0
  203. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/pytorch.py +0 -0
  204. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/settings.py +0 -0
  205. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/signal_schema.py +0 -0
  206. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/tar.py +0 -0
  207. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/text.py +0 -0
  208. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/udf.py +0 -0
  209. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/udf_signature.py +0 -0
  210. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/utils.py +0 -0
  211. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/video.py +0 -0
  212. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/webdataset.py +0 -0
  213. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/lib/webdataset_laion.py +0 -0
  214. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/model/__init__.py +0 -0
  215. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/model/bbox.py +0 -0
  216. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/model/pose.py +0 -0
  217. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/model/segment.py +0 -0
  218. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/model/ultralytics/__init__.py +0 -0
  219. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/model/utils.py +0 -0
  220. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/namespace.py +0 -0
  221. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/node.py +0 -0
  222. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/nodes_fetcher.py +0 -0
  223. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/nodes_thread_pool.py +0 -0
  224. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/progress.py +0 -0
  225. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/project.py +0 -0
  226. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/py.typed +0 -0
  227. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/query/__init__.py +0 -0
  228. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/query/batch.py +0 -0
  229. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/query/dispatch.py +0 -0
  230. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/query/metrics.py +0 -0
  231. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/query/params.py +0 -0
  232. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/query/queue.py +0 -0
  233. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/query/schema.py +0 -0
  234. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/query/session.py +0 -0
  235. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/query/udf.py +0 -0
  236. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/query/utils.py +0 -0
  237. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/remote/__init__.py +0 -0
  238. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/remote/studio.py +0 -0
  239. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/script_meta.py +0 -0
  240. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/semver.py +0 -0
  241. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/sql/__init__.py +0 -0
  242. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/sql/default/__init__.py +0 -0
  243. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/sql/default/base.py +0 -0
  244. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/sql/functions/__init__.py +0 -0
  245. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/sql/functions/aggregate.py +0 -0
  246. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/sql/functions/array.py +0 -0
  247. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/sql/functions/conditional.py +0 -0
  248. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/sql/functions/numeric.py +0 -0
  249. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/sql/functions/path.py +0 -0
  250. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/sql/functions/random.py +0 -0
  251. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/sql/functions/string.py +0 -0
  252. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/sql/postgresql_dialect.py +0 -0
  253. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/sql/postgresql_types.py +0 -0
  254. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/sql/selectable.py +0 -0
  255. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/sql/sqlite/__init__.py +0 -0
  256. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/sql/sqlite/base.py +0 -0
  257. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/sql/sqlite/types.py +0 -0
  258. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/sql/sqlite/vector.py +0 -0
  259. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/sql/types.py +0 -0
  260. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/sql/utils.py +0 -0
  261. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/studio.py +0 -0
  262. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/telemetry.py +0 -0
  263. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/toolkit/__init__.py +0 -0
  264. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/toolkit/split.py +0 -0
  265. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/torch/__init__.py +0 -0
  266. {datachain-0.30.2 → datachain-0.30.3}/src/datachain/utils.py +0 -0
  267. {datachain-0.30.2 → datachain-0.30.3}/src/datachain.egg-info/dependency_links.txt +0 -0
  268. {datachain-0.30.2 → datachain-0.30.3}/src/datachain.egg-info/entry_points.txt +0 -0
  269. {datachain-0.30.2 → datachain-0.30.3}/src/datachain.egg-info/top_level.txt +0 -0
  270. {datachain-0.30.2 → datachain-0.30.3}/tests/__init__.py +0 -0
  271. {datachain-0.30.2 → datachain-0.30.3}/tests/benchmarks/__init__.py +0 -0
  272. {datachain-0.30.2 → datachain-0.30.3}/tests/benchmarks/conftest.py +0 -0
  273. {datachain-0.30.2 → datachain-0.30.3}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  274. {datachain-0.30.2 → datachain-0.30.3}/tests/benchmarks/datasets/.dvc/config +0 -0
  275. {datachain-0.30.2 → datachain-0.30.3}/tests/benchmarks/datasets/.gitignore +0 -0
  276. {datachain-0.30.2 → datachain-0.30.3}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  277. {datachain-0.30.2 → datachain-0.30.3}/tests/benchmarks/test_datachain.py +0 -0
  278. {datachain-0.30.2 → datachain-0.30.3}/tests/benchmarks/test_ls.py +0 -0
  279. {datachain-0.30.2 → datachain-0.30.3}/tests/benchmarks/test_version.py +0 -0
  280. {datachain-0.30.2 → datachain-0.30.3}/tests/conftest.py +0 -0
  281. {datachain-0.30.2 → datachain-0.30.3}/tests/data.py +0 -0
  282. {datachain-0.30.2 → datachain-0.30.3}/tests/examples/__init__.py +0 -0
  283. {datachain-0.30.2 → datachain-0.30.3}/tests/examples/test_examples.py +0 -0
  284. {datachain-0.30.2 → datachain-0.30.3}/tests/examples/test_wds_e2e.py +0 -0
  285. {datachain-0.30.2 → datachain-0.30.3}/tests/examples/wds_data.py +0 -0
  286. {datachain-0.30.2 → datachain-0.30.3}/tests/func/__init__.py +0 -0
  287. {datachain-0.30.2 → datachain-0.30.3}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  288. {datachain-0.30.2 → datachain-0.30.3}/tests/func/data/lena.jpg +0 -0
  289. {datachain-0.30.2 → datachain-0.30.3}/tests/func/fake-service-account-credentials.json +0 -0
  290. {datachain-0.30.2 → datachain-0.30.3}/tests/func/functions/__init__.py +0 -0
  291. {datachain-0.30.2 → datachain-0.30.3}/tests/func/functions/test_aggregate.py +0 -0
  292. {datachain-0.30.2 → datachain-0.30.3}/tests/func/functions/test_array.py +0 -0
  293. {datachain-0.30.2 → datachain-0.30.3}/tests/func/functions/test_conditional.py +0 -0
  294. {datachain-0.30.2 → datachain-0.30.3}/tests/func/functions/test_numeric.py +0 -0
  295. {datachain-0.30.2 → datachain-0.30.3}/tests/func/functions/test_path.py +0 -0
  296. {datachain-0.30.2 → datachain-0.30.3}/tests/func/functions/test_random.py +0 -0
  297. {datachain-0.30.2 → datachain-0.30.3}/tests/func/functions/test_string.py +0 -0
  298. {datachain-0.30.2 → datachain-0.30.3}/tests/func/model/__init__.py +0 -0
  299. {datachain-0.30.2 → datachain-0.30.3}/tests/func/model/data/running-mask0.png +0 -0
  300. {datachain-0.30.2 → datachain-0.30.3}/tests/func/model/data/running-mask1.png +0 -0
  301. {datachain-0.30.2 → datachain-0.30.3}/tests/func/model/data/running.jpg +0 -0
  302. {datachain-0.30.2 → datachain-0.30.3}/tests/func/model/data/ships.jpg +0 -0
  303. {datachain-0.30.2 → datachain-0.30.3}/tests/func/model/test_yolo.py +0 -0
  304. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_audio.py +0 -0
  305. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_batching.py +0 -0
  306. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_catalog.py +0 -0
  307. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_client.py +0 -0
  308. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_cloud_transfer.py +0 -0
  309. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_data_storage.py +0 -0
  310. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_datachain.py +0 -0
  311. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_datachain_merge.py +0 -0
  312. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_dataset_query.py +0 -0
  313. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_delta.py +0 -0
  314. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_feature_pickling.py +0 -0
  315. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_file.py +0 -0
  316. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_hf.py +0 -0
  317. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_hidden_field.py +0 -0
  318. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_image.py +0 -0
  319. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_listing.py +0 -0
  320. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_ls.py +0 -0
  321. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_meta_formats.py +0 -0
  322. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_metastore.py +0 -0
  323. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_metrics.py +0 -0
  324. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_pytorch.py +0 -0
  325. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_query.py +0 -0
  326. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_read_database.py +0 -0
  327. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_read_dataset_remote.py +0 -0
  328. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  329. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_retry.py +0 -0
  330. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_session.py +0 -0
  331. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_studio_datetime_parsing.py +0 -0
  332. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_toolkit.py +0 -0
  333. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_video.py +0 -0
  334. {datachain-0.30.2 → datachain-0.30.3}/tests/func/test_warehouse.py +0 -0
  335. {datachain-0.30.2 → datachain-0.30.3}/tests/scripts/feature_class.py +0 -0
  336. {datachain-0.30.2 → datachain-0.30.3}/tests/scripts/feature_class_exception.py +0 -0
  337. {datachain-0.30.2 → datachain-0.30.3}/tests/scripts/feature_class_parallel.py +0 -0
  338. {datachain-0.30.2 → datachain-0.30.3}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  339. {datachain-0.30.2 → datachain-0.30.3}/tests/scripts/name_len_slow.py +0 -0
  340. {datachain-0.30.2 → datachain-0.30.3}/tests/test_atomicity.py +0 -0
  341. {datachain-0.30.2 → datachain-0.30.3}/tests/test_cli_e2e.py +0 -0
  342. {datachain-0.30.2 → datachain-0.30.3}/tests/test_cli_studio.py +0 -0
  343. {datachain-0.30.2 → datachain-0.30.3}/tests/test_import_time.py +0 -0
  344. {datachain-0.30.2 → datachain-0.30.3}/tests/test_query_e2e.py +0 -0
  345. {datachain-0.30.2 → datachain-0.30.3}/tests/test_telemetry.py +0 -0
  346. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/__init__.py +0 -0
  347. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/__init__.py +0 -0
  348. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/conftest.py +0 -0
  349. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/test_arrow.py +0 -0
  350. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/test_audio.py +0 -0
  351. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/test_clip.py +0 -0
  352. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/test_datachain.py +0 -0
  353. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  354. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/test_datachain_merge.py +0 -0
  355. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/test_diff.py +0 -0
  356. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/test_feature.py +0 -0
  357. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/test_feature_utils.py +0 -0
  358. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/test_file.py +0 -0
  359. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/test_hf.py +0 -0
  360. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/test_image.py +0 -0
  361. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/test_listing_info.py +0 -0
  362. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/test_namespace.py +0 -0
  363. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/test_partition_by.py +0 -0
  364. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/test_project.py +0 -0
  365. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/test_python_to_sql.py +0 -0
  366. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/test_schema.py +0 -0
  367. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/test_settings.py +0 -0
  368. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/test_signal_schema.py +0 -0
  369. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/test_sql_to_python.py +0 -0
  370. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/test_text.py +0 -0
  371. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/test_udf.py +0 -0
  372. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/test_udf_signature.py +0 -0
  373. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/test_utils.py +0 -0
  374. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/lib/test_webdataset.py +0 -0
  375. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/model/__init__.py +0 -0
  376. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/model/test_bbox.py +0 -0
  377. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/model/test_pose.py +0 -0
  378. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/model/test_segment.py +0 -0
  379. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/model/test_utils.py +0 -0
  380. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/sql/__init__.py +0 -0
  381. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/sql/sqlite/__init__.py +0 -0
  382. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/sql/sqlite/test_types.py +0 -0
  383. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/sql/sqlite/test_utils.py +0 -0
  384. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/sql/test_array.py +0 -0
  385. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/sql/test_conditional.py +0 -0
  386. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/sql/test_path.py +0 -0
  387. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/sql/test_random.py +0 -0
  388. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/sql/test_selectable.py +0 -0
  389. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/sql/test_string.py +0 -0
  390. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_asyn.py +0 -0
  391. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_cache.py +0 -0
  392. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_catalog.py +0 -0
  393. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_catalog_loader.py +0 -0
  394. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_cli_parsing.py +0 -0
  395. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_client.py +0 -0
  396. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_client_gcs.py +0 -0
  397. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_client_s3.py +0 -0
  398. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_config.py +0 -0
  399. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_data_storage.py +0 -0
  400. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_database_engine.py +0 -0
  401. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_dataset.py +0 -0
  402. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_dispatch.py +0 -0
  403. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_fileslice.py +0 -0
  404. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_func.py +0 -0
  405. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_listing.py +0 -0
  406. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_metastore.py +0 -0
  407. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_module_exports.py +0 -0
  408. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_pytorch.py +0 -0
  409. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_query.py +0 -0
  410. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_query_metrics.py +0 -0
  411. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_query_params.py +0 -0
  412. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_script_meta.py +0 -0
  413. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_semver.py +0 -0
  414. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_serializer.py +0 -0
  415. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_session.py +0 -0
  416. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_utils.py +0 -0
  417. {datachain-0.30.2 → datachain-0.30.3}/tests/unit/test_warehouse.py +0 -0
  418. {datachain-0.30.2 → datachain-0.30.3}/tests/utils.py +0 -0
@@ -18,7 +18,7 @@ jobs:
18
18
  run:
19
19
  runs-on: ubuntu-latest
20
20
  steps:
21
- - uses: actions/checkout@v4
21
+ - uses: actions/checkout@v5
22
22
  - name: Set up Python 3.13
23
23
  uses: actions/setup-python@v5
24
24
  with:
@@ -17,7 +17,7 @@ jobs:
17
17
  runs-on: ubuntu-latest
18
18
  steps:
19
19
  - name: Check out the repository
20
- uses: actions/checkout@v4
20
+ uses: actions/checkout@v5
21
21
  with:
22
22
  fetch-depth: 0
23
23
 
@@ -62,7 +62,7 @@ jobs:
62
62
  echo "Studio branch: $STUDIO_BRANCH"
63
63
 
64
64
  - name: Check out Studio
65
- uses: actions/checkout@v4
65
+ uses: actions/checkout@v5
66
66
  with:
67
67
  fetch-depth: 0
68
68
  repository: iterative/studio
@@ -70,7 +70,7 @@ jobs:
70
70
  token: ${{ secrets.ITERATIVE_STUDIO_READ_ACCESS_TOKEN }}
71
71
 
72
72
  - name: Check out repository
73
- uses: actions/checkout@v4
73
+ uses: actions/checkout@v5
74
74
  with:
75
75
  path: './backend/datachain'
76
76
  fetch-depth: 0
@@ -18,7 +18,7 @@ jobs:
18
18
  runs-on: ubuntu-latest
19
19
  steps:
20
20
  - name: Check out the repository
21
- uses: actions/checkout@v4
21
+ uses: actions/checkout@v5
22
22
  with:
23
23
  fetch-depth: 0
24
24
  ref: ${{ github.event.pull_request.head.sha || github.ref }}
@@ -73,7 +73,7 @@ jobs:
73
73
 
74
74
  steps:
75
75
  - name: Check out the repository
76
- uses: actions/checkout@v4
76
+ uses: actions/checkout@v5
77
77
  with:
78
78
  fetch-depth: 0
79
79
  ref: ${{ github.event.pull_request.head.sha || github.ref }}
@@ -175,7 +175,7 @@ jobs:
175
175
  - {os: ubuntu-latest-4-cores, pyv: "3.13", group: multimodal}
176
176
 
177
177
  steps:
178
- - uses: actions/checkout@v4
178
+ - uses: actions/checkout@v5
179
179
  with:
180
180
  ref: ${{ github.event.pull_request.head.sha || github.ref }}
181
181
 
@@ -194,6 +194,22 @@ jobs:
194
194
  - name: Install nox
195
195
  run: uv pip install nox --system
196
196
 
197
+ - name: Install FFmpeg on Windows
198
+ if: runner.os == 'Windows'
199
+ run: choco install ffmpeg
200
+
201
+ - name: Install FFmpeg on macOS
202
+ if: runner.os == 'macOS'
203
+ run: |
204
+ brew install ffmpeg
205
+ echo 'DYLD_FALLBACK_LIBRARY_PATH=/opt/homebrew/lib' >> "$GITHUB_ENV"
206
+
207
+ - name: Install FFmpeg on Ubuntu
208
+ if: runner.os == 'Linux'
209
+ run: |
210
+ sudo apt update
211
+ sudo apt install -y ffmpeg
212
+
197
213
  - name: Set hf token
198
214
  if: matrix.group == 'llm_and_nlp'
199
215
  run: echo 'HF_TOKEN=${{ secrets.HF_TOKEN }}' >> "$GITHUB_ENV"
@@ -11,7 +11,7 @@ jobs:
11
11
  runs-on: ubuntu-latest
12
12
  steps:
13
13
  - name: Check out the repository
14
- uses: actions/checkout@v4
14
+ uses: actions/checkout@v5
15
15
 
16
16
  - name: Update template
17
17
  uses: iterative/py-template@main
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.12.8'
27
+ rev: 'v0.12.9'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.30.2
3
+ Version: 0.30.3
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -45,7 +45,7 @@ Requires-Dist: datamodel-code-generator>=0.25
45
45
  Requires-Dist: Pillow<12,>=10.0.0
46
46
  Requires-Dist: msgpack<2,>=1.0.4
47
47
  Requires-Dist: psutil
48
- Requires-Dist: huggingface_hub<0.34.0
48
+ Requires-Dist: huggingface_hub
49
49
  Requires-Dist: iterative-telemetry>=0.0.10
50
50
  Requires-Dist: platformdirs
51
51
  Requires-Dist: dvc-studio-client<1,>=0.21
@@ -0,0 +1,38 @@
1
+ # Functions
2
+
3
+ Use built-in functions for data manipulation and analysis to operate on the underlying database storing the chain data. These functions are useful for operations like [`DataChain.filter`](datachain.md#datachain.lib.dc.DataChain.filter) and [`DataChain.mutate`](datachain.md#datachain.lib.dc.DataChain.mutate).
4
+
5
+ Functions are organized by category and accessed through their respective modules. For example, string functions are accessed via `func.string.length()`, array functions via `func.array.contains()`, etc.
6
+
7
+ !!! note "Global Function Access"
8
+ Only a subset of functions are available directly from `datachain.func` (e.g., `func.length`). Most functions should be accessed through their specific module namespace (e.g., `func.string.length`) to avoid naming conflicts.
9
+
10
+ ## Function Categories
11
+
12
+ DataChain provides several categories of functions for different types of operations:
13
+
14
+ - **[Aggregate Functions](functions/aggregate.md)** - Functions for aggregating data like `sum`, `count`, `avg`, etc.
15
+ - **[Array Functions](functions/array.md)** - Functions for working with arrays and lists
16
+ - **[Conditional Functions](functions/conditional.md)** - Functions for conditional logic like `ifelse`, `case`, etc.
17
+ - **[Numeric Functions](functions/numeric.md)** - Functions for numeric operations and computations
18
+ - **[Path Functions](functions/path.md)** - Functions for working with file paths
19
+ - **[Random Functions](functions/random.md)** - Functions for generating random values
20
+ - **[String Functions](functions/string.md)** - Functions for string manipulation and processing
21
+ - **[Window Functions](functions/window.md)** - Functions for window operations
22
+
23
+ ## Usage
24
+
25
+ ```python
26
+ from datachain.func import aggregate, array, conditional, numeric, path, random, string, window
27
+
28
+ # Access functions through their module namespaces
29
+ dc.mutate(
30
+ text_length=string.length("text_column"),
31
+ contains_item=array.contains("array_column", "value"),
32
+ file_extension=path.file_ext("file_path")
33
+ )
34
+
35
+ # Some commonly used functions are also available directly
36
+ from datachain.func import sum, count, length, ifelse
37
+ dc.mutate(total=sum("amount"))
38
+ ```
@@ -0,0 +1,5 @@
1
+ # Aggregate Functions
2
+
3
+ Aggregate functions perform calculations on sets of values and return a single result.
4
+
5
+ ::: datachain.func.aggregate
@@ -0,0 +1,5 @@
1
+ # Array Functions
2
+
3
+ Functions for working with arrays and lists, including operations like distance calculations and array manipulation.
4
+
5
+ ::: datachain.func.array
@@ -0,0 +1,5 @@
1
+ # Conditional Functions
2
+
3
+ Functions for conditional logic and control flow in data processing.
4
+
5
+ ::: datachain.func.conditional
@@ -0,0 +1,5 @@
1
+ # Numeric Functions
2
+
3
+ Functions for numeric operations, bit manipulation, and mathematical computations.
4
+
5
+ ::: datachain.func.numeric
@@ -0,0 +1,5 @@
1
+ # Path Functions
2
+
3
+ Functions for working with file paths and extracting path components.
4
+
5
+ ::: datachain.func.path
@@ -0,0 +1,5 @@
1
+ # Random Functions
2
+
3
+ Functions for generating random values and sampling.
4
+
5
+ ::: datachain.func.random
@@ -0,0 +1,22 @@
1
+ # String Functions
2
+
3
+ Functions for string manipulation, text processing, and string analysis.
4
+
5
+ ## Usage
6
+
7
+ String functions are available under the `func.string` namespace to avoid name collisions with other functions:
8
+
9
+ ```python
10
+ from datachain.func import string
11
+
12
+ # Use string functions with the string namespace
13
+ dc.mutate(
14
+ str_len=string.length("text_column"),
15
+ parts=string.split("text_column", ","),
16
+ cleaned=string.replace("text_column", "old", "new"),
17
+ regex_cleaned=string.regexp_replace("text_column", r"\d+", "X"),
18
+ distance=string.byte_hamming_distance("col1", "col2")
19
+ )
20
+ ```
21
+
22
+ ::: datachain.func.string
@@ -0,0 +1,5 @@
1
+ # Window Functions
2
+
3
+ Functions for window operations and analytical processing.
4
+
5
+ ::: datachain.func.window
@@ -35,7 +35,10 @@ def process(fragment: AudioFragment, pipeline: Pipeline) -> str:
35
35
  audio_array = audio_array.mean(axis=1)
36
36
 
37
37
  # Pass the numpy array with exact sampling rate from fragment
38
- result = pipeline({"raw": audio_array, "sampling_rate": sample_rate})
38
+ result = pipeline(
39
+ {"raw": audio_array, "sampling_rate": sample_rate},
40
+ generate_kwargs={"language": "en"},
41
+ )
39
42
  return str(result["text"])
40
43
 
41
44
 
@@ -82,7 +82,16 @@ nav:
82
82
  - Segment: references/data-types/segment.md
83
83
  - UDF: references/udf.md
84
84
  - Torch: references/torch.md
85
- - Functions: references/func.md
85
+ - Functions:
86
+ - Overview: references/func.md
87
+ - Aggregate: references/functions/aggregate.md
88
+ - Array: references/functions/array.md
89
+ - Conditional: references/functions/conditional.md
90
+ - Numeric: references/functions/numeric.md
91
+ - Path: references/functions/path.md
92
+ - Random: references/functions/random.md
93
+ - String: references/functions/string.md
94
+ - Window: references/functions/window.md
86
95
  - Toolkit: references/toolkit.md
87
96
  - 📖 CLI Reference:
88
97
  - Overview: commands/index.md
@@ -177,7 +186,7 @@ plugins:
177
186
  - https://numpy.org/doc/stable/objects.inv
178
187
  - https://pandas.pydata.org/docs/objects.inv
179
188
  - https://arrow.apache.org/docs/objects.inv
180
- # - https://docs.sqlalchemy.org/objects.inv # SSL certificate issue
189
+ - https://docs.sqlalchemy.org/objects.inv # SSL certificate issue
181
190
  - https://docs.pydantic.dev/latest/objects.inv
182
191
 
183
192
  watch:
@@ -49,7 +49,7 @@ dependencies = [
49
49
  "Pillow>=10.0.0,<12",
50
50
  "msgpack>=1.0.4,<2",
51
51
  "psutil",
52
- "huggingface_hub<0.34.0",
52
+ "huggingface_hub",
53
53
  "iterative-telemetry>=0.0.10",
54
54
  "platformdirs",
55
55
  "dvc-studio-client>=0.21,<1",
@@ -680,8 +680,9 @@ class Catalog:
680
680
  ds_namespace, ds_project, ds_name = parse_dataset_name(ds_name)
681
681
  assert ds_namespace
682
682
  assert ds_project
683
- project = self.metastore.get_project(ds_project, ds_namespace)
684
- dataset = self.get_dataset(ds_name, project)
683
+ dataset = self.get_dataset(
684
+ ds_name, namespace_name=ds_namespace, project_name=ds_project
685
+ )
685
686
  if not ds_version:
686
687
  ds_version = dataset.latest_version
687
688
  dataset_sources = self.warehouse.get_dataset_sources(
@@ -807,7 +808,11 @@ class Catalog:
807
808
  )
808
809
  default_version = DEFAULT_DATASET_VERSION
809
810
  try:
810
- dataset = self.get_dataset(name, project)
811
+ dataset = self.get_dataset(
812
+ name,
813
+ namespace_name=project.namespace.name if project else None,
814
+ project_name=project.name if project else None,
815
+ )
811
816
  default_version = dataset.next_version_patch
812
817
  if update_version == "major":
813
818
  default_version = dataset.next_version_major
@@ -1016,7 +1021,11 @@ class Catalog:
1016
1021
  dc.save(name)
1017
1022
  except Exception as e: # noqa: BLE001
1018
1023
  try:
1019
- ds = self.get_dataset(name, project)
1024
+ ds = self.get_dataset(
1025
+ name,
1026
+ namespace_name=project.namespace.name,
1027
+ project_name=project.name,
1028
+ )
1020
1029
  self.metastore.update_dataset_status(
1021
1030
  ds,
1022
1031
  DatasetStatus.FAILED,
@@ -1033,7 +1042,11 @@ class Catalog:
1033
1042
  except DatasetNotFoundError:
1034
1043
  raise e from None
1035
1044
 
1036
- ds = self.get_dataset(name, project)
1045
+ ds = self.get_dataset(
1046
+ name,
1047
+ namespace_name=project.namespace.name,
1048
+ project_name=project.name,
1049
+ )
1037
1050
 
1038
1051
  self.update_dataset_version_with_warehouse_info(
1039
1052
  ds,
@@ -1041,7 +1054,11 @@ class Catalog:
1041
1054
  sources="\n".join(sources),
1042
1055
  )
1043
1056
 
1044
- return self.get_dataset(name, project)
1057
+ return self.get_dataset(
1058
+ name,
1059
+ namespace_name=project.namespace.name,
1060
+ project_name=project.name,
1061
+ )
1045
1062
 
1046
1063
  def get_full_dataset_name(
1047
1064
  self,
@@ -1077,22 +1094,23 @@ class Catalog:
1077
1094
  return namespace_name, project_name, name
1078
1095
 
1079
1096
  def get_dataset(
1080
- self, name: str, project: Optional[Project] = None
1097
+ self,
1098
+ name: str,
1099
+ namespace_name: Optional[str] = None,
1100
+ project_name: Optional[str] = None,
1081
1101
  ) -> DatasetRecord:
1082
1102
  from datachain.lib.listing import is_listing_dataset
1083
1103
 
1084
- project = project or self.metastore.default_project
1104
+ namespace_name = namespace_name or self.metastore.default_namespace_name
1105
+ project_name = project_name or self.metastore.default_project_name
1085
1106
 
1086
1107
  if is_listing_dataset(name):
1087
- project = self.metastore.listing_project
1108
+ namespace_name = self.metastore.system_namespace_name
1109
+ project_name = self.metastore.listing_project_name
1088
1110
 
1089
- try:
1090
- return self.metastore.get_dataset(name, project.id if project else None)
1091
- except DatasetNotFoundError:
1092
- raise DatasetNotFoundError(
1093
- f"Dataset {name} not found in namespace {project.namespace.name}"
1094
- f" and project {project.name}"
1095
- ) from None
1111
+ return self.metastore.get_dataset(
1112
+ name, namespace_name=namespace_name, project_name=project_name
1113
+ )
1096
1114
 
1097
1115
  def get_dataset_with_remote_fallback(
1098
1116
  self,
@@ -1113,8 +1131,11 @@ class Catalog:
1113
1131
 
1114
1132
  if self.metastore.is_local_dataset(namespace_name) or not update:
1115
1133
  try:
1116
- project = self.metastore.get_project(project_name, namespace_name)
1117
- ds = self.get_dataset(name, project)
1134
+ ds = self.get_dataset(
1135
+ name,
1136
+ namespace_name=namespace_name,
1137
+ project_name=project_name,
1138
+ )
1118
1139
  if not version or ds.has_version(version):
1119
1140
  return ds
1120
1141
  except (NamespaceNotFoundError, ProjectNotFoundError, DatasetNotFoundError):
@@ -1139,7 +1160,9 @@ class Catalog:
1139
1160
  local_ds_version=version,
1140
1161
  )
1141
1162
  return self.get_dataset(
1142
- name, self.metastore.get_project(project_name, namespace_name)
1163
+ name,
1164
+ namespace_name=namespace_name,
1165
+ project_name=project_name,
1143
1166
  )
1144
1167
 
1145
1168
  return self.get_remote_dataset(namespace_name, project_name, name)
@@ -1148,7 +1171,11 @@ class Catalog:
1148
1171
  """Returns dataset that contains version with specific uuid"""
1149
1172
  for dataset in self.ls_datasets():
1150
1173
  if dataset.has_version_with_uuid(uuid):
1151
- return self.get_dataset(dataset.name, dataset.project)
1174
+ return self.get_dataset(
1175
+ dataset.name,
1176
+ namespace_name=dataset.project.namespace.name,
1177
+ project_name=dataset.project.name,
1178
+ )
1152
1179
  raise DatasetNotFoundError(f"Dataset with version uuid {uuid} not found.")
1153
1180
 
1154
1181
  def get_remote_dataset(
@@ -1171,9 +1198,18 @@ class Catalog:
1171
1198
  return DatasetRecord.from_dict(dataset_info)
1172
1199
 
1173
1200
  def get_dataset_dependencies(
1174
- self, name: str, version: str, project: Optional[Project] = None, indirect=False
1201
+ self,
1202
+ name: str,
1203
+ version: str,
1204
+ namespace_name: Optional[str] = None,
1205
+ project_name: Optional[str] = None,
1206
+ indirect=False,
1175
1207
  ) -> list[Optional[DatasetDependency]]:
1176
- dataset = self.get_dataset(name, project)
1208
+ dataset = self.get_dataset(
1209
+ name,
1210
+ namespace_name=namespace_name,
1211
+ project_name=project_name,
1212
+ )
1177
1213
 
1178
1214
  direct_dependencies = self.metastore.get_direct_dataset_dependencies(
1179
1215
  dataset, version
@@ -1187,10 +1223,13 @@ class Catalog:
1187
1223
  # dependency has been removed
1188
1224
  continue
1189
1225
  if d.is_dataset:
1190
- project = self.metastore.get_project(d.project, d.namespace)
1191
1226
  # only datasets can have dependencies
1192
1227
  d.dependencies = self.get_dataset_dependencies(
1193
- d.name, d.version, project, indirect=indirect
1228
+ d.name,
1229
+ d.version,
1230
+ namespace_name=d.namespace,
1231
+ project_name=d.project,
1232
+ indirect=indirect,
1194
1233
  )
1195
1234
 
1196
1235
  return direct_dependencies
@@ -1340,7 +1379,11 @@ class Catalog:
1340
1379
  project: Optional[Project] = None,
1341
1380
  client_config=None,
1342
1381
  ) -> list[str]:
1343
- dataset = self.get_dataset(name, project)
1382
+ dataset = self.get_dataset(
1383
+ name,
1384
+ namespace_name=project.namespace.name if project else None,
1385
+ project_name=project.name if project else None,
1386
+ )
1344
1387
 
1345
1388
  return self.warehouse.export_dataset_table(
1346
1389
  bucket_uri, dataset, version, client_config
@@ -1349,7 +1392,11 @@ class Catalog:
1349
1392
  def dataset_table_export_file_names(
1350
1393
  self, name: str, version: str, project: Optional[Project] = None
1351
1394
  ) -> list[str]:
1352
- dataset = self.get_dataset(name, project)
1395
+ dataset = self.get_dataset(
1396
+ name,
1397
+ namespace_name=project.namespace.name if project else None,
1398
+ project_name=project.name if project else None,
1399
+ )
1353
1400
  return self.warehouse.dataset_table_export_file_names(dataset, version)
1354
1401
 
1355
1402
  def remove_dataset(
@@ -1359,7 +1406,11 @@ class Catalog:
1359
1406
  version: Optional[str] = None,
1360
1407
  force: Optional[bool] = False,
1361
1408
  ):
1362
- dataset = self.get_dataset(name, project)
1409
+ dataset = self.get_dataset(
1410
+ name,
1411
+ namespace_name=project.namespace.name if project else None,
1412
+ project_name=project.name if project else None,
1413
+ )
1363
1414
  if not version and not force:
1364
1415
  raise ValueError(f"Missing dataset version from input for dataset {name}")
1365
1416
  if version and not dataset.has_version(version):
@@ -1395,7 +1446,11 @@ class Catalog:
1395
1446
  if attrs is not None:
1396
1447
  update_data["attrs"] = attrs # type: ignore[assignment]
1397
1448
 
1398
- dataset = self.get_dataset(name, project)
1449
+ dataset = self.get_dataset(
1450
+ name,
1451
+ namespace_name=project.namespace.name if project else None,
1452
+ project_name=project.name if project else None,
1453
+ )
1399
1454
  return self.update_dataset(dataset, **update_data)
1400
1455
 
1401
1456
  def ls(
@@ -1549,7 +1604,9 @@ class Catalog:
1549
1604
  )
1550
1605
 
1551
1606
  try:
1552
- local_dataset = self.get_dataset(local_ds_name, project=project)
1607
+ local_dataset = self.get_dataset(
1608
+ local_ds_name, namespace_name=namespace.name, project_name=project.name
1609
+ )
1553
1610
  if local_dataset and local_dataset.has_version(local_ds_version):
1554
1611
  raise DataChainError(
1555
1612
  f"Local dataset {local_ds_uri} already exists with different uuid,"
@@ -107,8 +107,9 @@ def list_datasets_local(catalog: "Catalog", name: Optional[str] = None):
107
107
  def list_datasets_local_versions(catalog: "Catalog", name: str):
108
108
  namespace_name, project_name, name = catalog.get_full_dataset_name(name)
109
109
 
110
- project = catalog.metastore.get_project(project_name, namespace_name)
111
- ds = catalog.get_dataset(name, project)
110
+ ds = catalog.get_dataset(
111
+ name, namespace_name=namespace_name, project_name=project_name
112
+ )
112
113
  for v in ds.versions:
113
114
  yield (name, v.version)
114
115
 
@@ -301,7 +301,13 @@ class AbstractMetastore(ABC, Serializable):
301
301
  """
302
302
 
303
303
  @abstractmethod
304
- def get_dataset(self, name: str, project_id: Optional[int] = None) -> DatasetRecord:
304
+ def get_dataset(
305
+ self,
306
+ name: str, # normal, not full dataset name
307
+ namespace_name: Optional[str] = None,
308
+ project_name: Optional[str] = None,
309
+ conn=None,
310
+ ) -> DatasetRecord:
305
311
  """Gets a single dataset by name."""
306
312
 
307
313
  @abstractmethod
@@ -912,11 +918,14 @@ class AbstractDBMetastore(AbstractMetastore):
912
918
  **kwargs, # TODO registered = True / False
913
919
  ) -> DatasetRecord:
914
920
  """Creates new dataset."""
915
- project_id = project_id or self.default_project.id
921
+ if not project_id:
922
+ project = self.default_project
923
+ else:
924
+ project = self.get_project_by_id(project_id)
916
925
 
917
926
  query = self._datasets_insert().values(
918
927
  name=name,
919
- project_id=project_id,
928
+ project_id=project.id,
920
929
  status=status,
921
930
  feature_schema=json.dumps(feature_schema or {}),
922
931
  created_at=datetime.now(timezone.utc),
@@ -935,7 +944,9 @@ class AbstractDBMetastore(AbstractMetastore):
935
944
  query = query.on_conflict_do_nothing(index_elements=["project_id", "name"])
936
945
  self.db.execute(query)
937
946
 
938
- return self.get_dataset(name, project_id)
947
+ return self.get_dataset(
948
+ name, namespace_name=project.namespace.name, project_name=project.name
949
+ )
939
950
 
940
951
  def create_dataset_version( # noqa: PLR0913
941
952
  self,
@@ -992,7 +1003,12 @@ class AbstractDBMetastore(AbstractMetastore):
992
1003
  )
993
1004
  self.db.execute(query, conn=conn)
994
1005
 
995
- return self.get_dataset(dataset.name, dataset.project.id, conn=conn)
1006
+ return self.get_dataset(
1007
+ dataset.name,
1008
+ namespace_name=dataset.project.namespace.name,
1009
+ project_name=dataset.project.name,
1010
+ conn=conn,
1011
+ )
996
1012
 
997
1013
  def remove_dataset(self, dataset: DatasetRecord) -> None:
998
1014
  """Removes dataset."""
@@ -1216,21 +1232,30 @@ class AbstractDBMetastore(AbstractMetastore):
1216
1232
  def get_dataset(
1217
1233
  self,
1218
1234
  name: str, # normal, not full dataset name
1219
- project_id: Optional[int] = None,
1235
+ namespace_name: Optional[str] = None,
1236
+ project_name: Optional[str] = None,
1220
1237
  conn=None,
1221
1238
  ) -> DatasetRecord:
1222
1239
  """
1223
1240
  Gets a single dataset in project by dataset name.
1224
1241
  """
1225
- project_id = project_id or self.default_project.id
1242
+ namespace_name = namespace_name or self.default_namespace_name
1243
+ project_name = project_name or self.default_project_name
1226
1244
 
1227
1245
  d = self._datasets
1246
+ n = self._namespaces
1247
+ p = self._projects
1228
1248
  query = self._base_dataset_query()
1229
- query = query.where(d.c.name == name, d.c.project_id == project_id) # type: ignore [attr-defined]
1249
+ query = query.where(
1250
+ d.c.name == name,
1251
+ n.c.name == namespace_name,
1252
+ p.c.name == project_name,
1253
+ ) # type: ignore [attr-defined]
1230
1254
  ds = self._parse_dataset(self.db.execute(query, conn=conn))
1231
1255
  if not ds:
1232
1256
  raise DatasetNotFoundError(
1233
- f"Dataset {name} not found in project with id {project_id}"
1257
+ f"Dataset {name} not found in namespace {namespace_name}"
1258
+ f" and project {project_name}"
1234
1259
  )
1235
1260
 
1236
1261
  return ds