datachain 0.30.2__tar.gz → 0.30.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (419) hide show
  1. {datachain-0.30.2 → datachain-0.30.4}/.github/workflows/benchmarks.yml +1 -1
  2. {datachain-0.30.2 → datachain-0.30.4}/.github/workflows/release.yml +1 -1
  3. {datachain-0.30.2 → datachain-0.30.4}/.github/workflows/tests-studio.yml +2 -2
  4. {datachain-0.30.2 → datachain-0.30.4}/.github/workflows/tests.yml +19 -3
  5. {datachain-0.30.2 → datachain-0.30.4}/.github/workflows/update-template.yaml +1 -1
  6. {datachain-0.30.2 → datachain-0.30.4}/.pre-commit-config.yaml +1 -1
  7. {datachain-0.30.2 → datachain-0.30.4}/PKG-INFO +2 -2
  8. datachain-0.30.4/docs/references/func.md +38 -0
  9. datachain-0.30.4/docs/references/functions/aggregate.md +5 -0
  10. datachain-0.30.4/docs/references/functions/array.md +5 -0
  11. datachain-0.30.4/docs/references/functions/conditional.md +5 -0
  12. datachain-0.30.4/docs/references/functions/numeric.md +5 -0
  13. datachain-0.30.4/docs/references/functions/path.md +5 -0
  14. datachain-0.30.4/docs/references/functions/random.md +5 -0
  15. datachain-0.30.4/docs/references/functions/string.md +22 -0
  16. datachain-0.30.4/docs/references/functions/window.md +5 -0
  17. {datachain-0.30.2 → datachain-0.30.4}/examples/multimodal/audio-to-text.py +4 -1
  18. {datachain-0.30.2 → datachain-0.30.4}/mkdocs.yml +11 -2
  19. {datachain-0.30.2 → datachain-0.30.4}/pyproject.toml +1 -1
  20. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/__init__.py +2 -0
  21. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/catalog/__init__.py +2 -0
  22. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/catalog/catalog.py +100 -31
  23. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/catalog/loader.py +4 -2
  24. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/__init__.py +1 -0
  25. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/commands/datasets.py +19 -12
  26. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/data_storage/metastore.py +34 -30
  27. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/data_storage/sqlite.py +0 -4
  28. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/delta.py +23 -12
  29. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/func/string.py +8 -0
  30. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/__init__.py +2 -1
  31. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/database.py +50 -6
  32. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/datachain.py +48 -20
  33. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/datasets.py +12 -7
  34. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/utils.py +5 -0
  35. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/namespaces.py +3 -1
  36. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/projects.py +3 -1
  37. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/signal_schema.py +28 -17
  38. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/listing.py +5 -9
  39. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/model/ultralytics/bbox.py +14 -12
  40. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/model/ultralytics/pose.py +14 -12
  41. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/model/ultralytics/segment.py +14 -12
  42. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/query/dataset.py +42 -28
  43. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/query/schema.py +4 -0
  44. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/utils.py +7 -0
  45. {datachain-0.30.2 → datachain-0.30.4}/src/datachain.egg-info/PKG-INFO +2 -2
  46. {datachain-0.30.2 → datachain-0.30.4}/src/datachain.egg-info/SOURCES.txt +9 -0
  47. {datachain-0.30.2 → datachain-0.30.4}/src/datachain.egg-info/requires.txt +1 -1
  48. {datachain-0.30.2 → datachain-0.30.4}/tests/conftest.py +4 -32
  49. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_catalog.py +2 -0
  50. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_data_storage.py +2 -2
  51. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_datachain.py +0 -70
  52. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_dataset_query.py +19 -6
  53. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_datasets.py +11 -19
  54. datachain-0.30.4/tests/func/test_mutate.py +284 -0
  55. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_pull.py +15 -4
  56. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_read_dataset_remote.py +10 -0
  57. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_to_database.py +62 -5
  58. {datachain-0.30.2 → datachain-0.30.4}/tests/test_cli_studio.py +1 -0
  59. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_datachain.py +12 -15
  60. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_namespace.py +2 -2
  61. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_project.py +1 -1
  62. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_signal_schema.py +4 -2
  63. {datachain-0.30.2 → datachain-0.30.4}/tests/utils.py +2 -14
  64. datachain-0.30.2/docs/references/func.md +0 -5
  65. {datachain-0.30.2 → datachain-0.30.4}/.cruft.json +0 -0
  66. {datachain-0.30.2 → datachain-0.30.4}/.gitattributes +0 -0
  67. {datachain-0.30.2 → datachain-0.30.4}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  68. {datachain-0.30.2 → datachain-0.30.4}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  69. {datachain-0.30.2 → datachain-0.30.4}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  70. {datachain-0.30.2 → datachain-0.30.4}/.github/codecov.yaml +0 -0
  71. {datachain-0.30.2 → datachain-0.30.4}/.github/dependabot.yml +0 -0
  72. {datachain-0.30.2 → datachain-0.30.4}/.gitignore +0 -0
  73. {datachain-0.30.2 → datachain-0.30.4}/CODE_OF_CONDUCT.rst +0 -0
  74. {datachain-0.30.2 → datachain-0.30.4}/LICENSE +0 -0
  75. {datachain-0.30.2 → datachain-0.30.4}/README.rst +0 -0
  76. {datachain-0.30.2 → datachain-0.30.4}/docs/assets/captioned_cartoons.png +0 -0
  77. {datachain-0.30.2 → datachain-0.30.4}/docs/assets/datachain-white.svg +0 -0
  78. {datachain-0.30.2 → datachain-0.30.4}/docs/assets/datachain.svg +0 -0
  79. {datachain-0.30.2 → datachain-0.30.4}/docs/commands/auth/login.md +0 -0
  80. {datachain-0.30.2 → datachain-0.30.4}/docs/commands/auth/logout.md +0 -0
  81. {datachain-0.30.2 → datachain-0.30.4}/docs/commands/auth/team.md +0 -0
  82. {datachain-0.30.2 → datachain-0.30.4}/docs/commands/auth/token.md +0 -0
  83. {datachain-0.30.2 → datachain-0.30.4}/docs/commands/index.md +0 -0
  84. {datachain-0.30.2 → datachain-0.30.4}/docs/commands/job/cancel.md +0 -0
  85. {datachain-0.30.2 → datachain-0.30.4}/docs/commands/job/clusters.md +0 -0
  86. {datachain-0.30.2 → datachain-0.30.4}/docs/commands/job/logs.md +0 -0
  87. {datachain-0.30.2 → datachain-0.30.4}/docs/commands/job/ls.md +0 -0
  88. {datachain-0.30.2 → datachain-0.30.4}/docs/commands/job/run.md +0 -0
  89. {datachain-0.30.2 → datachain-0.30.4}/docs/contributing.md +0 -0
  90. {datachain-0.30.2 → datachain-0.30.4}/docs/css/github-permalink-style.css +0 -0
  91. {datachain-0.30.2 → datachain-0.30.4}/docs/examples.md +0 -0
  92. {datachain-0.30.2 → datachain-0.30.4}/docs/guide/db_migrations.md +0 -0
  93. {datachain-0.30.2 → datachain-0.30.4}/docs/guide/delta.md +0 -0
  94. {datachain-0.30.2 → datachain-0.30.4}/docs/guide/env.md +0 -0
  95. {datachain-0.30.2 → datachain-0.30.4}/docs/guide/index.md +0 -0
  96. {datachain-0.30.2 → datachain-0.30.4}/docs/guide/namespaces.md +0 -0
  97. {datachain-0.30.2 → datachain-0.30.4}/docs/guide/processing.md +0 -0
  98. {datachain-0.30.2 → datachain-0.30.4}/docs/guide/remotes.md +0 -0
  99. {datachain-0.30.2 → datachain-0.30.4}/docs/guide/retry.md +0 -0
  100. {datachain-0.30.2 → datachain-0.30.4}/docs/index.md +0 -0
  101. {datachain-0.30.2 → datachain-0.30.4}/docs/overrides/main.html +0 -0
  102. {datachain-0.30.2 → datachain-0.30.4}/docs/quick-start.md +0 -0
  103. {datachain-0.30.2 → datachain-0.30.4}/docs/references/data-types/arrowrow.md +0 -0
  104. {datachain-0.30.2 → datachain-0.30.4}/docs/references/data-types/bbox.md +0 -0
  105. {datachain-0.30.2 → datachain-0.30.4}/docs/references/data-types/file.md +0 -0
  106. {datachain-0.30.2 → datachain-0.30.4}/docs/references/data-types/imagefile.md +0 -0
  107. {datachain-0.30.2 → datachain-0.30.4}/docs/references/data-types/index.md +0 -0
  108. {datachain-0.30.2 → datachain-0.30.4}/docs/references/data-types/pose.md +0 -0
  109. {datachain-0.30.2 → datachain-0.30.4}/docs/references/data-types/segment.md +0 -0
  110. {datachain-0.30.2 → datachain-0.30.4}/docs/references/data-types/tarvfile.md +0 -0
  111. {datachain-0.30.2 → datachain-0.30.4}/docs/references/data-types/textfile.md +0 -0
  112. {datachain-0.30.2 → datachain-0.30.4}/docs/references/data-types/videofile.md +0 -0
  113. {datachain-0.30.2 → datachain-0.30.4}/docs/references/datachain.md +0 -0
  114. {datachain-0.30.2 → datachain-0.30.4}/docs/references/index.md +0 -0
  115. {datachain-0.30.2 → datachain-0.30.4}/docs/references/toolkit.md +0 -0
  116. {datachain-0.30.2 → datachain-0.30.4}/docs/references/torch.md +0 -0
  117. {datachain-0.30.2 → datachain-0.30.4}/docs/references/udf.md +0 -0
  118. {datachain-0.30.2 → datachain-0.30.4}/docs/tutorials.md +0 -0
  119. {datachain-0.30.2 → datachain-0.30.4}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  120. {datachain-0.30.2 → datachain-0.30.4}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  121. {datachain-0.30.2 → datachain-0.30.4}/examples/computer_vision/openimage-detect.py +0 -0
  122. {datachain-0.30.2 → datachain-0.30.4}/examples/computer_vision/ultralytics-bbox.py +0 -0
  123. {datachain-0.30.2 → datachain-0.30.4}/examples/computer_vision/ultralytics-pose.py +0 -0
  124. {datachain-0.30.2 → datachain-0.30.4}/examples/computer_vision/ultralytics-segment.py +0 -0
  125. {datachain-0.30.2 → datachain-0.30.4}/examples/get_started/common_sql_functions.py +0 -0
  126. {datachain-0.30.2 → datachain-0.30.4}/examples/get_started/json-csv-reader.py +0 -0
  127. {datachain-0.30.2 → datachain-0.30.4}/examples/get_started/torch-loader.py +0 -0
  128. {datachain-0.30.2 → datachain-0.30.4}/examples/get_started/udfs/parallel.py +0 -0
  129. {datachain-0.30.2 → datachain-0.30.4}/examples/get_started/udfs/simple.py +0 -0
  130. {datachain-0.30.2 → datachain-0.30.4}/examples/get_started/udfs/stateful.py +0 -0
  131. {datachain-0.30.2 → datachain-0.30.4}/examples/incremental_processing/delta.py +0 -0
  132. {datachain-0.30.2 → datachain-0.30.4}/examples/incremental_processing/retry.py +0 -0
  133. {datachain-0.30.2 → datachain-0.30.4}/examples/incremental_processing/utils.py +0 -0
  134. {datachain-0.30.2 → datachain-0.30.4}/examples/llm_and_nlp/claude-query.py +0 -0
  135. {datachain-0.30.2 → datachain-0.30.4}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  136. {datachain-0.30.2 → datachain-0.30.4}/examples/multimodal/clip_inference.py +0 -0
  137. {datachain-0.30.2 → datachain-0.30.4}/examples/multimodal/hf_pipeline.py +0 -0
  138. {datachain-0.30.2 → datachain-0.30.4}/examples/multimodal/openai_image_desc_lib.py +0 -0
  139. {datachain-0.30.2 → datachain-0.30.4}/examples/multimodal/wds.py +0 -0
  140. {datachain-0.30.2 → datachain-0.30.4}/examples/multimodal/wds_filtered.py +0 -0
  141. {datachain-0.30.2 → datachain-0.30.4}/noxfile.py +0 -0
  142. {datachain-0.30.2 → datachain-0.30.4}/setup.cfg +0 -0
  143. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/__main__.py +0 -0
  144. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/asyn.py +0 -0
  145. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cache.py +0 -0
  146. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/catalog/datasource.py +0 -0
  147. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/commands/__init__.py +0 -0
  148. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/commands/du.py +0 -0
  149. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/commands/index.py +0 -0
  150. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/commands/ls.py +0 -0
  151. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/commands/misc.py +0 -0
  152. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/commands/query.py +0 -0
  153. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/commands/show.py +0 -0
  154. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/parser/__init__.py +0 -0
  155. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/parser/job.py +0 -0
  156. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/parser/studio.py +0 -0
  157. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/parser/utils.py +0 -0
  158. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/cli/utils.py +0 -0
  159. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/client/__init__.py +0 -0
  160. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/client/azure.py +0 -0
  161. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/client/fileslice.py +0 -0
  162. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/client/fsspec.py +0 -0
  163. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/client/gcs.py +0 -0
  164. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/client/hf.py +0 -0
  165. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/client/local.py +0 -0
  166. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/client/s3.py +0 -0
  167. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/config.py +0 -0
  168. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/data_storage/__init__.py +0 -0
  169. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/data_storage/db_engine.py +0 -0
  170. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/data_storage/job.py +0 -0
  171. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/data_storage/schema.py +0 -0
  172. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/data_storage/serializer.py +0 -0
  173. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/data_storage/warehouse.py +0 -0
  174. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/dataset.py +0 -0
  175. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/diff/__init__.py +0 -0
  176. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/error.py +0 -0
  177. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/fs/__init__.py +0 -0
  178. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/fs/reference.py +0 -0
  179. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/fs/utils.py +0 -0
  180. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/func/__init__.py +0 -0
  181. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/func/aggregate.py +0 -0
  182. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/func/array.py +0 -0
  183. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/func/base.py +0 -0
  184. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/func/conditional.py +0 -0
  185. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/func/func.py +0 -0
  186. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/func/numeric.py +0 -0
  187. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/func/path.py +0 -0
  188. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/func/random.py +0 -0
  189. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/func/window.py +0 -0
  190. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/job.py +0 -0
  191. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/__init__.py +0 -0
  192. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/arrow.py +0 -0
  193. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/audio.py +0 -0
  194. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/clip.py +0 -0
  195. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/convert/__init__.py +0 -0
  196. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/convert/flatten.py +0 -0
  197. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/convert/python_to_sql.py +0 -0
  198. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/convert/sql_to_python.py +0 -0
  199. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/convert/unflatten.py +0 -0
  200. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  201. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/data_model.py +0 -0
  202. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dataset_info.py +0 -0
  203. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/csv.py +0 -0
  204. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/hf.py +0 -0
  205. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/json.py +0 -0
  206. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/listings.py +0 -0
  207. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/pandas.py +0 -0
  208. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/parquet.py +0 -0
  209. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/records.py +0 -0
  210. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/storage.py +0 -0
  211. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/dc/values.py +0 -0
  212. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/file.py +0 -0
  213. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/hf.py +0 -0
  214. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/image.py +0 -0
  215. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/listing.py +0 -0
  216. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/listing_info.py +0 -0
  217. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/meta_formats.py +0 -0
  218. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/model_store.py +0 -0
  219. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/pytorch.py +0 -0
  220. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/settings.py +0 -0
  221. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/tar.py +0 -0
  222. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/text.py +0 -0
  223. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/udf.py +0 -0
  224. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/udf_signature.py +0 -0
  225. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/utils.py +0 -0
  226. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/video.py +0 -0
  227. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/webdataset.py +0 -0
  228. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/lib/webdataset_laion.py +0 -0
  229. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/model/__init__.py +0 -0
  230. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/model/bbox.py +0 -0
  231. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/model/pose.py +0 -0
  232. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/model/segment.py +0 -0
  233. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/model/ultralytics/__init__.py +0 -0
  234. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/model/utils.py +0 -0
  235. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/namespace.py +0 -0
  236. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/node.py +0 -0
  237. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/nodes_fetcher.py +0 -0
  238. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/nodes_thread_pool.py +0 -0
  239. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/progress.py +0 -0
  240. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/project.py +0 -0
  241. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/py.typed +0 -0
  242. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/query/__init__.py +0 -0
  243. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/query/batch.py +0 -0
  244. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/query/dispatch.py +0 -0
  245. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/query/metrics.py +0 -0
  246. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/query/params.py +0 -0
  247. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/query/queue.py +0 -0
  248. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/query/session.py +0 -0
  249. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/query/udf.py +0 -0
  250. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/query/utils.py +0 -0
  251. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/remote/__init__.py +0 -0
  252. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/remote/studio.py +0 -0
  253. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/script_meta.py +0 -0
  254. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/semver.py +0 -0
  255. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/__init__.py +0 -0
  256. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/default/__init__.py +0 -0
  257. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/default/base.py +0 -0
  258. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/functions/__init__.py +0 -0
  259. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/functions/aggregate.py +0 -0
  260. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/functions/array.py +0 -0
  261. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/functions/conditional.py +0 -0
  262. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/functions/numeric.py +0 -0
  263. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/functions/path.py +0 -0
  264. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/functions/random.py +0 -0
  265. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/functions/string.py +0 -0
  266. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/postgresql_dialect.py +0 -0
  267. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/postgresql_types.py +0 -0
  268. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/selectable.py +0 -0
  269. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/sqlite/__init__.py +0 -0
  270. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/sqlite/base.py +0 -0
  271. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/sqlite/types.py +0 -0
  272. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/sqlite/vector.py +0 -0
  273. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/types.py +0 -0
  274. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/sql/utils.py +0 -0
  275. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/studio.py +0 -0
  276. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/telemetry.py +0 -0
  277. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/toolkit/__init__.py +0 -0
  278. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/toolkit/split.py +0 -0
  279. {datachain-0.30.2 → datachain-0.30.4}/src/datachain/torch/__init__.py +0 -0
  280. {datachain-0.30.2 → datachain-0.30.4}/src/datachain.egg-info/dependency_links.txt +0 -0
  281. {datachain-0.30.2 → datachain-0.30.4}/src/datachain.egg-info/entry_points.txt +0 -0
  282. {datachain-0.30.2 → datachain-0.30.4}/src/datachain.egg-info/top_level.txt +0 -0
  283. {datachain-0.30.2 → datachain-0.30.4}/tests/__init__.py +0 -0
  284. {datachain-0.30.2 → datachain-0.30.4}/tests/benchmarks/__init__.py +0 -0
  285. {datachain-0.30.2 → datachain-0.30.4}/tests/benchmarks/conftest.py +0 -0
  286. {datachain-0.30.2 → datachain-0.30.4}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  287. {datachain-0.30.2 → datachain-0.30.4}/tests/benchmarks/datasets/.dvc/config +0 -0
  288. {datachain-0.30.2 → datachain-0.30.4}/tests/benchmarks/datasets/.gitignore +0 -0
  289. {datachain-0.30.2 → datachain-0.30.4}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  290. {datachain-0.30.2 → datachain-0.30.4}/tests/benchmarks/test_datachain.py +0 -0
  291. {datachain-0.30.2 → datachain-0.30.4}/tests/benchmarks/test_ls.py +0 -0
  292. {datachain-0.30.2 → datachain-0.30.4}/tests/benchmarks/test_version.py +0 -0
  293. {datachain-0.30.2 → datachain-0.30.4}/tests/data.py +0 -0
  294. {datachain-0.30.2 → datachain-0.30.4}/tests/examples/__init__.py +0 -0
  295. {datachain-0.30.2 → datachain-0.30.4}/tests/examples/test_examples.py +0 -0
  296. {datachain-0.30.2 → datachain-0.30.4}/tests/examples/test_wds_e2e.py +0 -0
  297. {datachain-0.30.2 → datachain-0.30.4}/tests/examples/wds_data.py +0 -0
  298. {datachain-0.30.2 → datachain-0.30.4}/tests/func/__init__.py +0 -0
  299. {datachain-0.30.2 → datachain-0.30.4}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  300. {datachain-0.30.2 → datachain-0.30.4}/tests/func/data/lena.jpg +0 -0
  301. {datachain-0.30.2 → datachain-0.30.4}/tests/func/fake-service-account-credentials.json +0 -0
  302. {datachain-0.30.2 → datachain-0.30.4}/tests/func/functions/__init__.py +0 -0
  303. {datachain-0.30.2 → datachain-0.30.4}/tests/func/functions/test_aggregate.py +0 -0
  304. {datachain-0.30.2 → datachain-0.30.4}/tests/func/functions/test_array.py +0 -0
  305. {datachain-0.30.2 → datachain-0.30.4}/tests/func/functions/test_conditional.py +0 -0
  306. {datachain-0.30.2 → datachain-0.30.4}/tests/func/functions/test_numeric.py +0 -0
  307. {datachain-0.30.2 → datachain-0.30.4}/tests/func/functions/test_path.py +0 -0
  308. {datachain-0.30.2 → datachain-0.30.4}/tests/func/functions/test_random.py +0 -0
  309. {datachain-0.30.2 → datachain-0.30.4}/tests/func/functions/test_string.py +0 -0
  310. {datachain-0.30.2 → datachain-0.30.4}/tests/func/model/__init__.py +0 -0
  311. {datachain-0.30.2 → datachain-0.30.4}/tests/func/model/data/running-mask0.png +0 -0
  312. {datachain-0.30.2 → datachain-0.30.4}/tests/func/model/data/running-mask1.png +0 -0
  313. {datachain-0.30.2 → datachain-0.30.4}/tests/func/model/data/running.jpg +0 -0
  314. {datachain-0.30.2 → datachain-0.30.4}/tests/func/model/data/ships.jpg +0 -0
  315. {datachain-0.30.2 → datachain-0.30.4}/tests/func/model/test_yolo.py +0 -0
  316. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_audio.py +0 -0
  317. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_batching.py +0 -0
  318. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_client.py +0 -0
  319. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_cloud_transfer.py +0 -0
  320. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_datachain_merge.py +0 -0
  321. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_delta.py +0 -0
  322. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_feature_pickling.py +0 -0
  323. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_file.py +0 -0
  324. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_hf.py +0 -0
  325. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_hidden_field.py +0 -0
  326. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_image.py +0 -0
  327. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_listing.py +0 -0
  328. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_ls.py +0 -0
  329. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_meta_formats.py +0 -0
  330. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_metastore.py +0 -0
  331. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_metrics.py +0 -0
  332. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_pytorch.py +0 -0
  333. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_query.py +0 -0
  334. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_read_database.py +0 -0
  335. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  336. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_retry.py +0 -0
  337. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_session.py +0 -0
  338. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_studio_datetime_parsing.py +0 -0
  339. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_toolkit.py +0 -0
  340. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_video.py +0 -0
  341. {datachain-0.30.2 → datachain-0.30.4}/tests/func/test_warehouse.py +0 -0
  342. {datachain-0.30.2 → datachain-0.30.4}/tests/scripts/feature_class.py +0 -0
  343. {datachain-0.30.2 → datachain-0.30.4}/tests/scripts/feature_class_exception.py +0 -0
  344. {datachain-0.30.2 → datachain-0.30.4}/tests/scripts/feature_class_parallel.py +0 -0
  345. {datachain-0.30.2 → datachain-0.30.4}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  346. {datachain-0.30.2 → datachain-0.30.4}/tests/scripts/name_len_slow.py +0 -0
  347. {datachain-0.30.2 → datachain-0.30.4}/tests/test_atomicity.py +0 -0
  348. {datachain-0.30.2 → datachain-0.30.4}/tests/test_cli_e2e.py +0 -0
  349. {datachain-0.30.2 → datachain-0.30.4}/tests/test_import_time.py +0 -0
  350. {datachain-0.30.2 → datachain-0.30.4}/tests/test_query_e2e.py +0 -0
  351. {datachain-0.30.2 → datachain-0.30.4}/tests/test_telemetry.py +0 -0
  352. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/__init__.py +0 -0
  353. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/__init__.py +0 -0
  354. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/conftest.py +0 -0
  355. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_arrow.py +0 -0
  356. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_audio.py +0 -0
  357. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_clip.py +0 -0
  358. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  359. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_datachain_merge.py +0 -0
  360. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_diff.py +0 -0
  361. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_feature.py +0 -0
  362. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_feature_utils.py +0 -0
  363. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_file.py +0 -0
  364. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_hf.py +0 -0
  365. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_image.py +0 -0
  366. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_listing_info.py +0 -0
  367. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_partition_by.py +0 -0
  368. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_python_to_sql.py +0 -0
  369. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_schema.py +0 -0
  370. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_settings.py +0 -0
  371. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_sql_to_python.py +0 -0
  372. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_text.py +0 -0
  373. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_udf.py +0 -0
  374. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_udf_signature.py +0 -0
  375. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_utils.py +0 -0
  376. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/lib/test_webdataset.py +0 -0
  377. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/model/__init__.py +0 -0
  378. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/model/test_bbox.py +0 -0
  379. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/model/test_pose.py +0 -0
  380. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/model/test_segment.py +0 -0
  381. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/model/test_utils.py +0 -0
  382. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/sql/__init__.py +0 -0
  383. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/sql/sqlite/__init__.py +0 -0
  384. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/sql/sqlite/test_types.py +0 -0
  385. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/sql/sqlite/test_utils.py +0 -0
  386. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/sql/test_array.py +0 -0
  387. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/sql/test_conditional.py +0 -0
  388. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/sql/test_path.py +0 -0
  389. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/sql/test_random.py +0 -0
  390. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/sql/test_selectable.py +0 -0
  391. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/sql/test_string.py +0 -0
  392. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_asyn.py +0 -0
  393. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_cache.py +0 -0
  394. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_catalog.py +0 -0
  395. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_catalog_loader.py +0 -0
  396. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_cli_parsing.py +0 -0
  397. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_client.py +0 -0
  398. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_client_gcs.py +0 -0
  399. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_client_s3.py +0 -0
  400. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_config.py +0 -0
  401. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_data_storage.py +0 -0
  402. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_database_engine.py +0 -0
  403. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_dataset.py +0 -0
  404. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_dispatch.py +0 -0
  405. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_fileslice.py +0 -0
  406. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_func.py +0 -0
  407. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_listing.py +0 -0
  408. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_metastore.py +0 -0
  409. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_module_exports.py +0 -0
  410. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_pytorch.py +0 -0
  411. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_query.py +0 -0
  412. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_query_metrics.py +0 -0
  413. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_query_params.py +0 -0
  414. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_script_meta.py +0 -0
  415. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_semver.py +0 -0
  416. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_serializer.py +0 -0
  417. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_session.py +0 -0
  418. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_utils.py +0 -0
  419. {datachain-0.30.2 → datachain-0.30.4}/tests/unit/test_warehouse.py +0 -0
@@ -18,7 +18,7 @@ jobs:
18
18
  run:
19
19
  runs-on: ubuntu-latest
20
20
  steps:
21
- - uses: actions/checkout@v4
21
+ - uses: actions/checkout@v5
22
22
  - name: Set up Python 3.13
23
23
  uses: actions/setup-python@v5
24
24
  with:
@@ -17,7 +17,7 @@ jobs:
17
17
  runs-on: ubuntu-latest
18
18
  steps:
19
19
  - name: Check out the repository
20
- uses: actions/checkout@v4
20
+ uses: actions/checkout@v5
21
21
  with:
22
22
  fetch-depth: 0
23
23
 
@@ -62,7 +62,7 @@ jobs:
62
62
  echo "Studio branch: $STUDIO_BRANCH"
63
63
 
64
64
  - name: Check out Studio
65
- uses: actions/checkout@v4
65
+ uses: actions/checkout@v5
66
66
  with:
67
67
  fetch-depth: 0
68
68
  repository: iterative/studio
@@ -70,7 +70,7 @@ jobs:
70
70
  token: ${{ secrets.ITERATIVE_STUDIO_READ_ACCESS_TOKEN }}
71
71
 
72
72
  - name: Check out repository
73
- uses: actions/checkout@v4
73
+ uses: actions/checkout@v5
74
74
  with:
75
75
  path: './backend/datachain'
76
76
  fetch-depth: 0
@@ -18,7 +18,7 @@ jobs:
18
18
  runs-on: ubuntu-latest
19
19
  steps:
20
20
  - name: Check out the repository
21
- uses: actions/checkout@v4
21
+ uses: actions/checkout@v5
22
22
  with:
23
23
  fetch-depth: 0
24
24
  ref: ${{ github.event.pull_request.head.sha || github.ref }}
@@ -73,7 +73,7 @@ jobs:
73
73
 
74
74
  steps:
75
75
  - name: Check out the repository
76
- uses: actions/checkout@v4
76
+ uses: actions/checkout@v5
77
77
  with:
78
78
  fetch-depth: 0
79
79
  ref: ${{ github.event.pull_request.head.sha || github.ref }}
@@ -175,7 +175,7 @@ jobs:
175
175
  - {os: ubuntu-latest-4-cores, pyv: "3.13", group: multimodal}
176
176
 
177
177
  steps:
178
- - uses: actions/checkout@v4
178
+ - uses: actions/checkout@v5
179
179
  with:
180
180
  ref: ${{ github.event.pull_request.head.sha || github.ref }}
181
181
 
@@ -194,6 +194,22 @@ jobs:
194
194
  - name: Install nox
195
195
  run: uv pip install nox --system
196
196
 
197
+ - name: Install FFmpeg on Windows
198
+ if: runner.os == 'Windows'
199
+ run: choco install ffmpeg
200
+
201
+ - name: Install FFmpeg on macOS
202
+ if: runner.os == 'macOS'
203
+ run: |
204
+ brew install ffmpeg
205
+ echo 'DYLD_FALLBACK_LIBRARY_PATH=/opt/homebrew/lib' >> "$GITHUB_ENV"
206
+
207
+ - name: Install FFmpeg on Ubuntu
208
+ if: runner.os == 'Linux'
209
+ run: |
210
+ sudo apt update
211
+ sudo apt install -y ffmpeg
212
+
197
213
  - name: Set hf token
198
214
  if: matrix.group == 'llm_and_nlp'
199
215
  run: echo 'HF_TOKEN=${{ secrets.HF_TOKEN }}' >> "$GITHUB_ENV"
@@ -11,7 +11,7 @@ jobs:
11
11
  runs-on: ubuntu-latest
12
12
  steps:
13
13
  - name: Check out the repository
14
- uses: actions/checkout@v4
14
+ uses: actions/checkout@v5
15
15
 
16
16
  - name: Update template
17
17
  uses: iterative/py-template@main
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.12.8'
27
+ rev: 'v0.12.10'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.30.2
3
+ Version: 0.30.4
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -45,7 +45,7 @@ Requires-Dist: datamodel-code-generator>=0.25
45
45
  Requires-Dist: Pillow<12,>=10.0.0
46
46
  Requires-Dist: msgpack<2,>=1.0.4
47
47
  Requires-Dist: psutil
48
- Requires-Dist: huggingface_hub<0.34.0
48
+ Requires-Dist: huggingface_hub
49
49
  Requires-Dist: iterative-telemetry>=0.0.10
50
50
  Requires-Dist: platformdirs
51
51
  Requires-Dist: dvc-studio-client<1,>=0.21
@@ -0,0 +1,38 @@
1
+ # Functions
2
+
3
+ Use built-in functions for data manipulation and analysis to operate on the underlying database storing the chain data. These functions are useful for operations like [`DataChain.filter`](datachain.md#datachain.lib.dc.DataChain.filter) and [`DataChain.mutate`](datachain.md#datachain.lib.dc.DataChain.mutate).
4
+
5
+ Functions are organized by category and accessed through their respective modules. For example, string functions are accessed via `func.string.length()`, array functions via `func.array.contains()`, etc.
6
+
7
+ !!! note "Global Function Access"
8
+ Only a subset of functions are available directly from `datachain.func` (e.g., `func.length`). Most functions should be accessed through their specific module namespace (e.g., `func.string.length`) to avoid naming conflicts.
9
+
10
+ ## Function Categories
11
+
12
+ DataChain provides several categories of functions for different types of operations:
13
+
14
+ - **[Aggregate Functions](functions/aggregate.md)** - Functions for aggregating data like `sum`, `count`, `avg`, etc.
15
+ - **[Array Functions](functions/array.md)** - Functions for working with arrays and lists
16
+ - **[Conditional Functions](functions/conditional.md)** - Functions for conditional logic like `ifelse`, `case`, etc.
17
+ - **[Numeric Functions](functions/numeric.md)** - Functions for numeric operations and computations
18
+ - **[Path Functions](functions/path.md)** - Functions for working with file paths
19
+ - **[Random Functions](functions/random.md)** - Functions for generating random values
20
+ - **[String Functions](functions/string.md)** - Functions for string manipulation and processing
21
+ - **[Window Functions](functions/window.md)** - Functions for window operations
22
+
23
+ ## Usage
24
+
25
+ ```python
26
+ from datachain.func import aggregate, array, conditional, numeric, path, random, string, window
27
+
28
+ # Access functions through their module namespaces
29
+ dc.mutate(
30
+ text_length=string.length("text_column"),
31
+ contains_item=array.contains("array_column", "value"),
32
+ file_extension=path.file_ext("file_path")
33
+ )
34
+
35
+ # Some commonly used functions are also available directly
36
+ from datachain.func import sum, count, length, ifelse
37
+ dc.mutate(total=sum("amount"))
38
+ ```
@@ -0,0 +1,5 @@
1
+ # Aggregate Functions
2
+
3
+ Aggregate functions perform calculations on sets of values and return a single result.
4
+
5
+ ::: datachain.func.aggregate
@@ -0,0 +1,5 @@
1
+ # Array Functions
2
+
3
+ Functions for working with arrays and lists, including operations like distance calculations and array manipulation.
4
+
5
+ ::: datachain.func.array
@@ -0,0 +1,5 @@
1
+ # Conditional Functions
2
+
3
+ Functions for conditional logic and control flow in data processing.
4
+
5
+ ::: datachain.func.conditional
@@ -0,0 +1,5 @@
1
+ # Numeric Functions
2
+
3
+ Functions for numeric operations, bit manipulation, and mathematical computations.
4
+
5
+ ::: datachain.func.numeric
@@ -0,0 +1,5 @@
1
+ # Path Functions
2
+
3
+ Functions for working with file paths and extracting path components.
4
+
5
+ ::: datachain.func.path
@@ -0,0 +1,5 @@
1
+ # Random Functions
2
+
3
+ Functions for generating random values and sampling.
4
+
5
+ ::: datachain.func.random
@@ -0,0 +1,22 @@
1
+ # String Functions
2
+
3
+ Functions for string manipulation, text processing, and string analysis.
4
+
5
+ ## Usage
6
+
7
+ String functions are available under the `func.string` namespace to avoid name collisions with other functions:
8
+
9
+ ```python
10
+ from datachain.func import string
11
+
12
+ # Use string functions with the string namespace
13
+ dc.mutate(
14
+ str_len=string.length("text_column"),
15
+ parts=string.split("text_column", ","),
16
+ cleaned=string.replace("text_column", "old", "new"),
17
+ regex_cleaned=string.regexp_replace("text_column", r"\d+", "X"),
18
+ distance=string.byte_hamming_distance("col1", "col2")
19
+ )
20
+ ```
21
+
22
+ ::: datachain.func.string
@@ -0,0 +1,5 @@
1
+ # Window Functions
2
+
3
+ Functions for window operations and analytical processing.
4
+
5
+ ::: datachain.func.window
@@ -35,7 +35,10 @@ def process(fragment: AudioFragment, pipeline: Pipeline) -> str:
35
35
  audio_array = audio_array.mean(axis=1)
36
36
 
37
37
  # Pass the numpy array with exact sampling rate from fragment
38
- result = pipeline({"raw": audio_array, "sampling_rate": sample_rate})
38
+ result = pipeline(
39
+ {"raw": audio_array, "sampling_rate": sample_rate},
40
+ generate_kwargs={"language": "en"},
41
+ )
39
42
  return str(result["text"])
40
43
 
41
44
 
@@ -82,7 +82,16 @@ nav:
82
82
  - Segment: references/data-types/segment.md
83
83
  - UDF: references/udf.md
84
84
  - Torch: references/torch.md
85
- - Functions: references/func.md
85
+ - Functions:
86
+ - Overview: references/func.md
87
+ - Aggregate: references/functions/aggregate.md
88
+ - Array: references/functions/array.md
89
+ - Conditional: references/functions/conditional.md
90
+ - Numeric: references/functions/numeric.md
91
+ - Path: references/functions/path.md
92
+ - Random: references/functions/random.md
93
+ - String: references/functions/string.md
94
+ - Window: references/functions/window.md
86
95
  - Toolkit: references/toolkit.md
87
96
  - 📖 CLI Reference:
88
97
  - Overview: commands/index.md
@@ -177,7 +186,7 @@ plugins:
177
186
  - https://numpy.org/doc/stable/objects.inv
178
187
  - https://pandas.pydata.org/docs/objects.inv
179
188
  - https://arrow.apache.org/docs/objects.inv
180
- # - https://docs.sqlalchemy.org/objects.inv # SSL certificate issue
189
+ - https://docs.sqlalchemy.org/objects.inv # SSL certificate issue
181
190
  - https://docs.pydantic.dev/latest/objects.inv
182
191
 
183
192
  watch:
@@ -49,7 +49,7 @@ dependencies = [
49
49
  "Pillow>=10.0.0,<12",
50
50
  "msgpack>=1.0.4,<2",
51
51
  "psutil",
52
- "huggingface_hub<0.34.0",
52
+ "huggingface_hub",
53
53
  "iterative-telemetry>=0.0.10",
54
54
  "platformdirs",
55
55
  "dvc-studio-client>=0.21,<1",
@@ -6,6 +6,7 @@ from datachain.lib.dc import (
6
6
  Sys,
7
7
  datasets,
8
8
  delete_dataset,
9
+ is_studio,
9
10
  listings,
10
11
  move_dataset,
11
12
  read_csv,
@@ -74,6 +75,7 @@ __all__ = [
74
75
  "datasets",
75
76
  "delete_dataset",
76
77
  "is_chain_type",
78
+ "is_studio",
77
79
  "listings",
78
80
  "metrics",
79
81
  "move_dataset",
@@ -3,6 +3,7 @@ from .catalog import (
3
3
  QUERY_SCRIPT_CANCELED_EXIT_CODE,
4
4
  QUERY_SCRIPT_INVALID_LAST_STATEMENT_EXIT_CODE,
5
5
  Catalog,
6
+ is_namespace_local,
6
7
  )
7
8
  from .loader import get_catalog
8
9
 
@@ -12,4 +13,5 @@ __all__ = [
12
13
  "QUERY_SCRIPT_INVALID_LAST_STATEMENT_EXIT_CODE",
13
14
  "Catalog",
14
15
  "get_catalog",
16
+ "is_namespace_local",
15
17
  ]
@@ -113,6 +113,11 @@ else:
113
113
  SIGINT = signal.SIGINT
114
114
 
115
115
 
116
+ def is_namespace_local(namespace_name) -> bool:
117
+ """Checks if namespace is from local environment, i.e. is `local`"""
118
+ return namespace_name == "local"
119
+
120
+
116
121
  def shutdown_process(
117
122
  proc: subprocess.Popen,
118
123
  interrupt_timeout: Optional[int] = None,
@@ -680,8 +685,9 @@ class Catalog:
680
685
  ds_namespace, ds_project, ds_name = parse_dataset_name(ds_name)
681
686
  assert ds_namespace
682
687
  assert ds_project
683
- project = self.metastore.get_project(ds_project, ds_namespace)
684
- dataset = self.get_dataset(ds_name, project)
688
+ dataset = self.get_dataset(
689
+ ds_name, namespace_name=ds_namespace, project_name=ds_project
690
+ )
685
691
  if not ds_version:
686
692
  ds_version = dataset.latest_version
687
693
  dataset_sources = self.warehouse.get_dataset_sources(
@@ -807,7 +813,11 @@ class Catalog:
807
813
  )
808
814
  default_version = DEFAULT_DATASET_VERSION
809
815
  try:
810
- dataset = self.get_dataset(name, project)
816
+ dataset = self.get_dataset(
817
+ name,
818
+ namespace_name=project.namespace.name if project else None,
819
+ project_name=project.name if project else None,
820
+ )
811
821
  default_version = dataset.next_version_patch
812
822
  if update_version == "major":
813
823
  default_version = dataset.next_version_major
@@ -1016,7 +1026,11 @@ class Catalog:
1016
1026
  dc.save(name)
1017
1027
  except Exception as e: # noqa: BLE001
1018
1028
  try:
1019
- ds = self.get_dataset(name, project)
1029
+ ds = self.get_dataset(
1030
+ name,
1031
+ namespace_name=project.namespace.name,
1032
+ project_name=project.name,
1033
+ )
1020
1034
  self.metastore.update_dataset_status(
1021
1035
  ds,
1022
1036
  DatasetStatus.FAILED,
@@ -1033,7 +1047,11 @@ class Catalog:
1033
1047
  except DatasetNotFoundError:
1034
1048
  raise e from None
1035
1049
 
1036
- ds = self.get_dataset(name, project)
1050
+ ds = self.get_dataset(
1051
+ name,
1052
+ namespace_name=project.namespace.name,
1053
+ project_name=project.name,
1054
+ )
1037
1055
 
1038
1056
  self.update_dataset_version_with_warehouse_info(
1039
1057
  ds,
@@ -1041,7 +1059,11 @@ class Catalog:
1041
1059
  sources="\n".join(sources),
1042
1060
  )
1043
1061
 
1044
- return self.get_dataset(name, project)
1062
+ return self.get_dataset(
1063
+ name,
1064
+ namespace_name=project.namespace.name,
1065
+ project_name=project.name,
1066
+ )
1045
1067
 
1046
1068
  def get_full_dataset_name(
1047
1069
  self,
@@ -1077,22 +1099,23 @@ class Catalog:
1077
1099
  return namespace_name, project_name, name
1078
1100
 
1079
1101
  def get_dataset(
1080
- self, name: str, project: Optional[Project] = None
1102
+ self,
1103
+ name: str,
1104
+ namespace_name: Optional[str] = None,
1105
+ project_name: Optional[str] = None,
1081
1106
  ) -> DatasetRecord:
1082
1107
  from datachain.lib.listing import is_listing_dataset
1083
1108
 
1084
- project = project or self.metastore.default_project
1109
+ namespace_name = namespace_name or self.metastore.default_namespace_name
1110
+ project_name = project_name or self.metastore.default_project_name
1085
1111
 
1086
1112
  if is_listing_dataset(name):
1087
- project = self.metastore.listing_project
1113
+ namespace_name = self.metastore.system_namespace_name
1114
+ project_name = self.metastore.listing_project_name
1088
1115
 
1089
- try:
1090
- return self.metastore.get_dataset(name, project.id if project else None)
1091
- except DatasetNotFoundError:
1092
- raise DatasetNotFoundError(
1093
- f"Dataset {name} not found in namespace {project.namespace.name}"
1094
- f" and project {project.name}"
1095
- ) from None
1116
+ return self.metastore.get_dataset(
1117
+ name, namespace_name=namespace_name, project_name=project_name
1118
+ )
1096
1119
 
1097
1120
  def get_dataset_with_remote_fallback(
1098
1121
  self,
@@ -1103,6 +1126,8 @@ class Catalog:
1103
1126
  pull_dataset: bool = False,
1104
1127
  update: bool = False,
1105
1128
  ) -> DatasetRecord:
1129
+ from datachain.lib.dc.utils import is_studio
1130
+
1106
1131
  # Intentionally ignore update flag is version is provided. Here only exact
1107
1132
  # version can be provided and update then doesn't make sense.
1108
1133
  # It corresponds to a query like this for example:
@@ -1111,16 +1136,24 @@ class Catalog:
1111
1136
  if version:
1112
1137
  update = False
1113
1138
 
1114
- if self.metastore.is_local_dataset(namespace_name) or not update:
1139
+ # we don't do Studio fallback is script is already ran in Studio, or if we try
1140
+ # to fetch dataset with local namespace as that one cannot
1141
+ # exist in Studio in the first place
1142
+ no_fallback = is_studio() or is_namespace_local(namespace_name)
1143
+
1144
+ if no_fallback or not update:
1115
1145
  try:
1116
- project = self.metastore.get_project(project_name, namespace_name)
1117
- ds = self.get_dataset(name, project)
1146
+ ds = self.get_dataset(
1147
+ name,
1148
+ namespace_name=namespace_name,
1149
+ project_name=project_name,
1150
+ )
1118
1151
  if not version or ds.has_version(version):
1119
1152
  return ds
1120
1153
  except (NamespaceNotFoundError, ProjectNotFoundError, DatasetNotFoundError):
1121
1154
  pass
1122
1155
 
1123
- if self.metastore.is_local_dataset(namespace_name):
1156
+ if no_fallback:
1124
1157
  raise DatasetNotFoundError(
1125
1158
  f"Dataset {name}"
1126
1159
  + (f" version {version} " if version else " ")
@@ -1139,7 +1172,9 @@ class Catalog:
1139
1172
  local_ds_version=version,
1140
1173
  )
1141
1174
  return self.get_dataset(
1142
- name, self.metastore.get_project(project_name, namespace_name)
1175
+ name,
1176
+ namespace_name=namespace_name,
1177
+ project_name=project_name,
1143
1178
  )
1144
1179
 
1145
1180
  return self.get_remote_dataset(namespace_name, project_name, name)
@@ -1148,7 +1183,11 @@ class Catalog:
1148
1183
  """Returns dataset that contains version with specific uuid"""
1149
1184
  for dataset in self.ls_datasets():
1150
1185
  if dataset.has_version_with_uuid(uuid):
1151
- return self.get_dataset(dataset.name, dataset.project)
1186
+ return self.get_dataset(
1187
+ dataset.name,
1188
+ namespace_name=dataset.project.namespace.name,
1189
+ project_name=dataset.project.name,
1190
+ )
1152
1191
  raise DatasetNotFoundError(f"Dataset with version uuid {uuid} not found.")
1153
1192
 
1154
1193
  def get_remote_dataset(
@@ -1171,9 +1210,18 @@ class Catalog:
1171
1210
  return DatasetRecord.from_dict(dataset_info)
1172
1211
 
1173
1212
  def get_dataset_dependencies(
1174
- self, name: str, version: str, project: Optional[Project] = None, indirect=False
1213
+ self,
1214
+ name: str,
1215
+ version: str,
1216
+ namespace_name: Optional[str] = None,
1217
+ project_name: Optional[str] = None,
1218
+ indirect=False,
1175
1219
  ) -> list[Optional[DatasetDependency]]:
1176
- dataset = self.get_dataset(name, project)
1220
+ dataset = self.get_dataset(
1221
+ name,
1222
+ namespace_name=namespace_name,
1223
+ project_name=project_name,
1224
+ )
1177
1225
 
1178
1226
  direct_dependencies = self.metastore.get_direct_dataset_dependencies(
1179
1227
  dataset, version
@@ -1187,10 +1235,13 @@ class Catalog:
1187
1235
  # dependency has been removed
1188
1236
  continue
1189
1237
  if d.is_dataset:
1190
- project = self.metastore.get_project(d.project, d.namespace)
1191
1238
  # only datasets can have dependencies
1192
1239
  d.dependencies = self.get_dataset_dependencies(
1193
- d.name, d.version, project, indirect=indirect
1240
+ d.name,
1241
+ d.version,
1242
+ namespace_name=d.namespace,
1243
+ project_name=d.project,
1244
+ indirect=indirect,
1194
1245
  )
1195
1246
 
1196
1247
  return direct_dependencies
@@ -1340,7 +1391,11 @@ class Catalog:
1340
1391
  project: Optional[Project] = None,
1341
1392
  client_config=None,
1342
1393
  ) -> list[str]:
1343
- dataset = self.get_dataset(name, project)
1394
+ dataset = self.get_dataset(
1395
+ name,
1396
+ namespace_name=project.namespace.name if project else None,
1397
+ project_name=project.name if project else None,
1398
+ )
1344
1399
 
1345
1400
  return self.warehouse.export_dataset_table(
1346
1401
  bucket_uri, dataset, version, client_config
@@ -1349,7 +1404,11 @@ class Catalog:
1349
1404
  def dataset_table_export_file_names(
1350
1405
  self, name: str, version: str, project: Optional[Project] = None
1351
1406
  ) -> list[str]:
1352
- dataset = self.get_dataset(name, project)
1407
+ dataset = self.get_dataset(
1408
+ name,
1409
+ namespace_name=project.namespace.name if project else None,
1410
+ project_name=project.name if project else None,
1411
+ )
1353
1412
  return self.warehouse.dataset_table_export_file_names(dataset, version)
1354
1413
 
1355
1414
  def remove_dataset(
@@ -1359,7 +1418,11 @@ class Catalog:
1359
1418
  version: Optional[str] = None,
1360
1419
  force: Optional[bool] = False,
1361
1420
  ):
1362
- dataset = self.get_dataset(name, project)
1421
+ dataset = self.get_dataset(
1422
+ name,
1423
+ namespace_name=project.namespace.name if project else None,
1424
+ project_name=project.name if project else None,
1425
+ )
1363
1426
  if not version and not force:
1364
1427
  raise ValueError(f"Missing dataset version from input for dataset {name}")
1365
1428
  if version and not dataset.has_version(version):
@@ -1395,7 +1458,11 @@ class Catalog:
1395
1458
  if attrs is not None:
1396
1459
  update_data["attrs"] = attrs # type: ignore[assignment]
1397
1460
 
1398
- dataset = self.get_dataset(name, project)
1461
+ dataset = self.get_dataset(
1462
+ name,
1463
+ namespace_name=project.namespace.name if project else None,
1464
+ project_name=project.name if project else None,
1465
+ )
1399
1466
  return self.update_dataset(dataset, **update_data)
1400
1467
 
1401
1468
  def ls(
@@ -1549,7 +1616,9 @@ class Catalog:
1549
1616
  )
1550
1617
 
1551
1618
  try:
1552
- local_dataset = self.get_dataset(local_ds_name, project=project)
1619
+ local_dataset = self.get_dataset(
1620
+ local_ds_name, namespace_name=namespace.name, project_name=project.name
1621
+ )
1553
1622
  if local_dataset and local_dataset.has_version(local_ds_version):
1554
1623
  raise DataChainError(
1555
1624
  f"Local dataset {local_ds_uri} already exists with different uuid,"
@@ -127,7 +127,8 @@ def get_udf_distributor_class() -> Optional[type["AbstractUDFDistributor"]]:
127
127
 
128
128
 
129
129
  def get_catalog(
130
- client_config: Optional[dict[str, Any]] = None, in_memory: bool = False
130
+ client_config: Optional[dict[str, Any]] = None,
131
+ in_memory: bool = False,
131
132
  ) -> "Catalog":
132
133
  """
133
134
  Function that creates Catalog instance with appropriate metastore
@@ -142,8 +143,9 @@ def get_catalog(
142
143
  """
143
144
  from datachain.catalog import Catalog
144
145
 
146
+ metastore = get_metastore(in_memory=in_memory)
145
147
  return Catalog(
146
- metastore=get_metastore(in_memory=in_memory),
148
+ metastore=metastore,
147
149
  warehouse=get_warehouse(in_memory=in_memory),
148
150
  client_config=client_config,
149
151
  in_memory=in_memory,
@@ -6,6 +6,7 @@ from multiprocessing import freeze_support
6
6
  from typing import Optional
7
7
 
8
8
  from datachain.cli.utils import get_logging_level
9
+ from datachain.error import DataChainError as DataChainError
9
10
 
10
11
  from .commands import (
11
12
  clear_cache,