datachain 0.25.0__tar.gz → 0.25.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (399) hide show
  1. {datachain-0.25.0 → datachain-0.25.2}/.github/workflows/benchmarks.yml +0 -3
  2. {datachain-0.25.0 → datachain-0.25.2}/.github/workflows/tests-studio.yml +15 -2
  3. {datachain-0.25.0 → datachain-0.25.2}/.github/workflows/tests.yml +16 -12
  4. {datachain-0.25.0 → datachain-0.25.2}/PKG-INFO +3 -2
  5. {datachain-0.25.0 → datachain-0.25.2}/docs/references/datachain.md +4 -0
  6. {datachain-0.25.0 → datachain-0.25.2}/pyproject.toml +3 -1
  7. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/arrow.py +9 -0
  8. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/dc/datasets.py +1 -1
  9. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/hf.py +18 -21
  10. {datachain-0.25.0 → datachain-0.25.2}/src/datachain.egg-info/PKG-INFO +3 -2
  11. {datachain-0.25.0 → datachain-0.25.2}/src/datachain.egg-info/requires.txt +4 -1
  12. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_hf.py +16 -1
  13. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/lib/test_datachain.py +16 -0
  14. {datachain-0.25.0 → datachain-0.25.2}/.cruft.json +0 -0
  15. {datachain-0.25.0 → datachain-0.25.2}/.gitattributes +0 -0
  16. {datachain-0.25.0 → datachain-0.25.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  17. {datachain-0.25.0 → datachain-0.25.2}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  18. {datachain-0.25.0 → datachain-0.25.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  19. {datachain-0.25.0 → datachain-0.25.2}/.github/codecov.yaml +0 -0
  20. {datachain-0.25.0 → datachain-0.25.2}/.github/dependabot.yml +0 -0
  21. {datachain-0.25.0 → datachain-0.25.2}/.github/workflows/release.yml +0 -0
  22. {datachain-0.25.0 → datachain-0.25.2}/.github/workflows/update-template.yaml +0 -0
  23. {datachain-0.25.0 → datachain-0.25.2}/.gitignore +0 -0
  24. {datachain-0.25.0 → datachain-0.25.2}/.pre-commit-config.yaml +0 -0
  25. {datachain-0.25.0 → datachain-0.25.2}/CODE_OF_CONDUCT.rst +0 -0
  26. {datachain-0.25.0 → datachain-0.25.2}/LICENSE +0 -0
  27. {datachain-0.25.0 → datachain-0.25.2}/README.rst +0 -0
  28. {datachain-0.25.0 → datachain-0.25.2}/docs/assets/captioned_cartoons.png +0 -0
  29. {datachain-0.25.0 → datachain-0.25.2}/docs/assets/datachain-white.svg +0 -0
  30. {datachain-0.25.0 → datachain-0.25.2}/docs/assets/datachain.svg +0 -0
  31. {datachain-0.25.0 → datachain-0.25.2}/docs/commands/auth/login.md +0 -0
  32. {datachain-0.25.0 → datachain-0.25.2}/docs/commands/auth/logout.md +0 -0
  33. {datachain-0.25.0 → datachain-0.25.2}/docs/commands/auth/team.md +0 -0
  34. {datachain-0.25.0 → datachain-0.25.2}/docs/commands/auth/token.md +0 -0
  35. {datachain-0.25.0 → datachain-0.25.2}/docs/commands/index.md +0 -0
  36. {datachain-0.25.0 → datachain-0.25.2}/docs/commands/job/cancel.md +0 -0
  37. {datachain-0.25.0 → datachain-0.25.2}/docs/commands/job/clusters.md +0 -0
  38. {datachain-0.25.0 → datachain-0.25.2}/docs/commands/job/logs.md +0 -0
  39. {datachain-0.25.0 → datachain-0.25.2}/docs/commands/job/ls.md +0 -0
  40. {datachain-0.25.0 → datachain-0.25.2}/docs/commands/job/run.md +0 -0
  41. {datachain-0.25.0 → datachain-0.25.2}/docs/contributing.md +0 -0
  42. {datachain-0.25.0 → datachain-0.25.2}/docs/css/github-permalink-style.css +0 -0
  43. {datachain-0.25.0 → datachain-0.25.2}/docs/examples.md +0 -0
  44. {datachain-0.25.0 → datachain-0.25.2}/docs/guide/db_migrations.md +0 -0
  45. {datachain-0.25.0 → datachain-0.25.2}/docs/guide/delta.md +0 -0
  46. {datachain-0.25.0 → datachain-0.25.2}/docs/guide/env.md +0 -0
  47. {datachain-0.25.0 → datachain-0.25.2}/docs/guide/index.md +0 -0
  48. {datachain-0.25.0 → datachain-0.25.2}/docs/guide/namespaces.md +0 -0
  49. {datachain-0.25.0 → datachain-0.25.2}/docs/guide/processing.md +0 -0
  50. {datachain-0.25.0 → datachain-0.25.2}/docs/guide/remotes.md +0 -0
  51. {datachain-0.25.0 → datachain-0.25.2}/docs/guide/retry.md +0 -0
  52. {datachain-0.25.0 → datachain-0.25.2}/docs/index.md +0 -0
  53. {datachain-0.25.0 → datachain-0.25.2}/docs/overrides/main.html +0 -0
  54. {datachain-0.25.0 → datachain-0.25.2}/docs/quick-start.md +0 -0
  55. {datachain-0.25.0 → datachain-0.25.2}/docs/references/data-types/arrowrow.md +0 -0
  56. {datachain-0.25.0 → datachain-0.25.2}/docs/references/data-types/bbox.md +0 -0
  57. {datachain-0.25.0 → datachain-0.25.2}/docs/references/data-types/file.md +0 -0
  58. {datachain-0.25.0 → datachain-0.25.2}/docs/references/data-types/imagefile.md +0 -0
  59. {datachain-0.25.0 → datachain-0.25.2}/docs/references/data-types/index.md +0 -0
  60. {datachain-0.25.0 → datachain-0.25.2}/docs/references/data-types/pose.md +0 -0
  61. {datachain-0.25.0 → datachain-0.25.2}/docs/references/data-types/segment.md +0 -0
  62. {datachain-0.25.0 → datachain-0.25.2}/docs/references/data-types/tarvfile.md +0 -0
  63. {datachain-0.25.0 → datachain-0.25.2}/docs/references/data-types/textfile.md +0 -0
  64. {datachain-0.25.0 → datachain-0.25.2}/docs/references/data-types/videofile.md +0 -0
  65. {datachain-0.25.0 → datachain-0.25.2}/docs/references/func.md +0 -0
  66. {datachain-0.25.0 → datachain-0.25.2}/docs/references/index.md +0 -0
  67. {datachain-0.25.0 → datachain-0.25.2}/docs/references/toolkit.md +0 -0
  68. {datachain-0.25.0 → datachain-0.25.2}/docs/references/torch.md +0 -0
  69. {datachain-0.25.0 → datachain-0.25.2}/docs/references/udf.md +0 -0
  70. {datachain-0.25.0 → datachain-0.25.2}/docs/tutorials.md +0 -0
  71. {datachain-0.25.0 → datachain-0.25.2}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  72. {datachain-0.25.0 → datachain-0.25.2}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  73. {datachain-0.25.0 → datachain-0.25.2}/examples/computer_vision/openimage-detect.py +0 -0
  74. {datachain-0.25.0 → datachain-0.25.2}/examples/computer_vision/ultralytics-bbox.py +0 -0
  75. {datachain-0.25.0 → datachain-0.25.2}/examples/computer_vision/ultralytics-pose.py +0 -0
  76. {datachain-0.25.0 → datachain-0.25.2}/examples/computer_vision/ultralytics-segment.py +0 -0
  77. {datachain-0.25.0 → datachain-0.25.2}/examples/get_started/common_sql_functions.py +0 -0
  78. {datachain-0.25.0 → datachain-0.25.2}/examples/get_started/json-csv-reader.py +0 -0
  79. {datachain-0.25.0 → datachain-0.25.2}/examples/get_started/torch-loader.py +0 -0
  80. {datachain-0.25.0 → datachain-0.25.2}/examples/get_started/udfs/parallel.py +0 -0
  81. {datachain-0.25.0 → datachain-0.25.2}/examples/get_started/udfs/simple.py +0 -0
  82. {datachain-0.25.0 → datachain-0.25.2}/examples/get_started/udfs/stateful.py +0 -0
  83. {datachain-0.25.0 → datachain-0.25.2}/examples/incremental_processing/delta.py +0 -0
  84. {datachain-0.25.0 → datachain-0.25.2}/examples/incremental_processing/retry.py +0 -0
  85. {datachain-0.25.0 → datachain-0.25.2}/examples/incremental_processing/utils.py +0 -0
  86. {datachain-0.25.0 → datachain-0.25.2}/examples/llm_and_nlp/claude-query.py +0 -0
  87. {datachain-0.25.0 → datachain-0.25.2}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  88. {datachain-0.25.0 → datachain-0.25.2}/examples/multimodal/clip_inference.py +0 -0
  89. {datachain-0.25.0 → datachain-0.25.2}/examples/multimodal/hf_pipeline.py +0 -0
  90. {datachain-0.25.0 → datachain-0.25.2}/examples/multimodal/openai_image_desc_lib.py +0 -0
  91. {datachain-0.25.0 → datachain-0.25.2}/examples/multimodal/wds.py +0 -0
  92. {datachain-0.25.0 → datachain-0.25.2}/examples/multimodal/wds_filtered.py +0 -0
  93. {datachain-0.25.0 → datachain-0.25.2}/mkdocs.yml +0 -0
  94. {datachain-0.25.0 → datachain-0.25.2}/noxfile.py +0 -0
  95. {datachain-0.25.0 → datachain-0.25.2}/setup.cfg +0 -0
  96. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/__init__.py +0 -0
  97. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/__main__.py +0 -0
  98. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/asyn.py +0 -0
  99. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/cache.py +0 -0
  100. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/catalog/__init__.py +0 -0
  101. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/catalog/catalog.py +0 -0
  102. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/catalog/datasource.py +0 -0
  103. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/catalog/loader.py +0 -0
  104. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/cli/__init__.py +0 -0
  105. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/cli/commands/__init__.py +0 -0
  106. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/cli/commands/datasets.py +0 -0
  107. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/cli/commands/du.py +0 -0
  108. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/cli/commands/index.py +0 -0
  109. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/cli/commands/ls.py +0 -0
  110. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/cli/commands/misc.py +0 -0
  111. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/cli/commands/query.py +0 -0
  112. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/cli/commands/show.py +0 -0
  113. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/cli/parser/__init__.py +0 -0
  114. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/cli/parser/job.py +0 -0
  115. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/cli/parser/studio.py +0 -0
  116. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/cli/parser/utils.py +0 -0
  117. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/cli/utils.py +0 -0
  118. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/client/__init__.py +0 -0
  119. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/client/azure.py +0 -0
  120. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/client/fileslice.py +0 -0
  121. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/client/fsspec.py +0 -0
  122. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/client/gcs.py +0 -0
  123. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/client/hf.py +0 -0
  124. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/client/local.py +0 -0
  125. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/client/s3.py +0 -0
  126. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/config.py +0 -0
  127. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/data_storage/__init__.py +0 -0
  128. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/data_storage/db_engine.py +0 -0
  129. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/data_storage/job.py +0 -0
  130. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/data_storage/metastore.py +0 -0
  131. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/data_storage/schema.py +0 -0
  132. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/data_storage/serializer.py +0 -0
  133. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/data_storage/sqlite.py +0 -0
  134. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/data_storage/warehouse.py +0 -0
  135. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/dataset.py +0 -0
  136. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/delta.py +0 -0
  137. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/diff/__init__.py +0 -0
  138. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/error.py +0 -0
  139. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/fs/__init__.py +0 -0
  140. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/fs/reference.py +0 -0
  141. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/fs/utils.py +0 -0
  142. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/func/__init__.py +0 -0
  143. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/func/aggregate.py +0 -0
  144. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/func/array.py +0 -0
  145. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/func/base.py +0 -0
  146. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/func/conditional.py +0 -0
  147. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/func/func.py +0 -0
  148. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/func/numeric.py +0 -0
  149. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/func/path.py +0 -0
  150. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/func/random.py +0 -0
  151. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/func/string.py +0 -0
  152. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/func/window.py +0 -0
  153. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/job.py +0 -0
  154. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/__init__.py +0 -0
  155. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/clip.py +0 -0
  156. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/convert/__init__.py +0 -0
  157. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/convert/flatten.py +0 -0
  158. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/convert/python_to_sql.py +0 -0
  159. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/convert/sql_to_python.py +0 -0
  160. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/convert/unflatten.py +0 -0
  161. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  162. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/data_model.py +0 -0
  163. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/dataset_info.py +0 -0
  164. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/dc/__init__.py +0 -0
  165. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/dc/csv.py +0 -0
  166. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/dc/database.py +0 -0
  167. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/dc/datachain.py +0 -0
  168. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/dc/hf.py +0 -0
  169. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/dc/json.py +0 -0
  170. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/dc/listings.py +0 -0
  171. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/dc/pandas.py +0 -0
  172. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/dc/parquet.py +0 -0
  173. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/dc/records.py +0 -0
  174. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/dc/storage.py +0 -0
  175. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/dc/utils.py +0 -0
  176. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/dc/values.py +0 -0
  177. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/file.py +0 -0
  178. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/image.py +0 -0
  179. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/listing.py +0 -0
  180. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/listing_info.py +0 -0
  181. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/meta_formats.py +0 -0
  182. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/model_store.py +0 -0
  183. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/namespaces.py +0 -0
  184. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/projects.py +0 -0
  185. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/pytorch.py +0 -0
  186. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/settings.py +0 -0
  187. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/signal_schema.py +0 -0
  188. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/tar.py +0 -0
  189. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/text.py +0 -0
  190. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/udf.py +0 -0
  191. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/udf_signature.py +0 -0
  192. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/utils.py +0 -0
  193. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/video.py +0 -0
  194. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/webdataset.py +0 -0
  195. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/lib/webdataset_laion.py +0 -0
  196. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/listing.py +0 -0
  197. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/model/__init__.py +0 -0
  198. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/model/bbox.py +0 -0
  199. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/model/pose.py +0 -0
  200. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/model/segment.py +0 -0
  201. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/model/ultralytics/__init__.py +0 -0
  202. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/model/ultralytics/bbox.py +0 -0
  203. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/model/ultralytics/pose.py +0 -0
  204. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/model/ultralytics/segment.py +0 -0
  205. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/model/utils.py +0 -0
  206. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/namespace.py +0 -0
  207. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/node.py +0 -0
  208. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/nodes_fetcher.py +0 -0
  209. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/nodes_thread_pool.py +0 -0
  210. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/progress.py +0 -0
  211. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/project.py +0 -0
  212. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/py.typed +0 -0
  213. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/query/__init__.py +0 -0
  214. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/query/batch.py +0 -0
  215. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/query/dataset.py +0 -0
  216. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/query/dispatch.py +0 -0
  217. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/query/metrics.py +0 -0
  218. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/query/params.py +0 -0
  219. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/query/queue.py +0 -0
  220. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/query/schema.py +0 -0
  221. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/query/session.py +0 -0
  222. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/query/udf.py +0 -0
  223. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/query/utils.py +0 -0
  224. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/remote/__init__.py +0 -0
  225. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/remote/studio.py +0 -0
  226. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/script_meta.py +0 -0
  227. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/semver.py +0 -0
  228. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/sql/__init__.py +0 -0
  229. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/sql/default/__init__.py +0 -0
  230. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/sql/default/base.py +0 -0
  231. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/sql/functions/__init__.py +0 -0
  232. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/sql/functions/aggregate.py +0 -0
  233. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/sql/functions/array.py +0 -0
  234. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/sql/functions/conditional.py +0 -0
  235. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/sql/functions/numeric.py +0 -0
  236. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/sql/functions/path.py +0 -0
  237. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/sql/functions/random.py +0 -0
  238. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/sql/functions/string.py +0 -0
  239. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/sql/selectable.py +0 -0
  240. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/sql/sqlite/__init__.py +0 -0
  241. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/sql/sqlite/base.py +0 -0
  242. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/sql/sqlite/types.py +0 -0
  243. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/sql/sqlite/vector.py +0 -0
  244. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/sql/types.py +0 -0
  245. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/sql/utils.py +0 -0
  246. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/studio.py +0 -0
  247. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/telemetry.py +0 -0
  248. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/toolkit/__init__.py +0 -0
  249. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/toolkit/split.py +0 -0
  250. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/torch/__init__.py +0 -0
  251. {datachain-0.25.0 → datachain-0.25.2}/src/datachain/utils.py +0 -0
  252. {datachain-0.25.0 → datachain-0.25.2}/src/datachain.egg-info/SOURCES.txt +0 -0
  253. {datachain-0.25.0 → datachain-0.25.2}/src/datachain.egg-info/dependency_links.txt +0 -0
  254. {datachain-0.25.0 → datachain-0.25.2}/src/datachain.egg-info/entry_points.txt +0 -0
  255. {datachain-0.25.0 → datachain-0.25.2}/src/datachain.egg-info/top_level.txt +0 -0
  256. {datachain-0.25.0 → datachain-0.25.2}/tests/__init__.py +0 -0
  257. {datachain-0.25.0 → datachain-0.25.2}/tests/benchmarks/__init__.py +0 -0
  258. {datachain-0.25.0 → datachain-0.25.2}/tests/benchmarks/conftest.py +0 -0
  259. {datachain-0.25.0 → datachain-0.25.2}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  260. {datachain-0.25.0 → datachain-0.25.2}/tests/benchmarks/datasets/.dvc/config +0 -0
  261. {datachain-0.25.0 → datachain-0.25.2}/tests/benchmarks/datasets/.gitignore +0 -0
  262. {datachain-0.25.0 → datachain-0.25.2}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  263. {datachain-0.25.0 → datachain-0.25.2}/tests/benchmarks/test_datachain.py +0 -0
  264. {datachain-0.25.0 → datachain-0.25.2}/tests/benchmarks/test_ls.py +0 -0
  265. {datachain-0.25.0 → datachain-0.25.2}/tests/benchmarks/test_version.py +0 -0
  266. {datachain-0.25.0 → datachain-0.25.2}/tests/conftest.py +0 -0
  267. {datachain-0.25.0 → datachain-0.25.2}/tests/data.py +0 -0
  268. {datachain-0.25.0 → datachain-0.25.2}/tests/examples/__init__.py +0 -0
  269. {datachain-0.25.0 → datachain-0.25.2}/tests/examples/test_examples.py +0 -0
  270. {datachain-0.25.0 → datachain-0.25.2}/tests/examples/test_wds_e2e.py +0 -0
  271. {datachain-0.25.0 → datachain-0.25.2}/tests/examples/wds_data.py +0 -0
  272. {datachain-0.25.0 → datachain-0.25.2}/tests/func/__init__.py +0 -0
  273. {datachain-0.25.0 → datachain-0.25.2}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  274. {datachain-0.25.0 → datachain-0.25.2}/tests/func/data/lena.jpg +0 -0
  275. {datachain-0.25.0 → datachain-0.25.2}/tests/func/fake-service-account-credentials.json +0 -0
  276. {datachain-0.25.0 → datachain-0.25.2}/tests/func/functions/__init__.py +0 -0
  277. {datachain-0.25.0 → datachain-0.25.2}/tests/func/functions/test_aggregate.py +0 -0
  278. {datachain-0.25.0 → datachain-0.25.2}/tests/func/functions/test_array.py +0 -0
  279. {datachain-0.25.0 → datachain-0.25.2}/tests/func/functions/test_conditional.py +0 -0
  280. {datachain-0.25.0 → datachain-0.25.2}/tests/func/functions/test_numeric.py +0 -0
  281. {datachain-0.25.0 → datachain-0.25.2}/tests/func/functions/test_path.py +0 -0
  282. {datachain-0.25.0 → datachain-0.25.2}/tests/func/functions/test_random.py +0 -0
  283. {datachain-0.25.0 → datachain-0.25.2}/tests/func/functions/test_string.py +0 -0
  284. {datachain-0.25.0 → datachain-0.25.2}/tests/func/model/__init__.py +0 -0
  285. {datachain-0.25.0 → datachain-0.25.2}/tests/func/model/data/running-mask0.png +0 -0
  286. {datachain-0.25.0 → datachain-0.25.2}/tests/func/model/data/running-mask1.png +0 -0
  287. {datachain-0.25.0 → datachain-0.25.2}/tests/func/model/data/running.jpg +0 -0
  288. {datachain-0.25.0 → datachain-0.25.2}/tests/func/model/data/ships.jpg +0 -0
  289. {datachain-0.25.0 → datachain-0.25.2}/tests/func/model/test_yolo.py +0 -0
  290. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_batching.py +0 -0
  291. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_catalog.py +0 -0
  292. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_client.py +0 -0
  293. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_cloud_transfer.py +0 -0
  294. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_data_storage.py +0 -0
  295. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_datachain.py +0 -0
  296. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_datachain_merge.py +0 -0
  297. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_dataset_query.py +0 -0
  298. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_datasets.py +0 -0
  299. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_delta.py +0 -0
  300. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_feature_pickling.py +0 -0
  301. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_file.py +0 -0
  302. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_hidden_field.py +0 -0
  303. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_image.py +0 -0
  304. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_listing.py +0 -0
  305. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_ls.py +0 -0
  306. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_meta_formats.py +0 -0
  307. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_metastore.py +0 -0
  308. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_metrics.py +0 -0
  309. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_pull.py +0 -0
  310. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_pytorch.py +0 -0
  311. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_query.py +0 -0
  312. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_read_database.py +0 -0
  313. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_read_dataset_remote.py +0 -0
  314. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  315. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_retry.py +0 -0
  316. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_session.py +0 -0
  317. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_toolkit.py +0 -0
  318. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_video.py +0 -0
  319. {datachain-0.25.0 → datachain-0.25.2}/tests/func/test_warehouse.py +0 -0
  320. {datachain-0.25.0 → datachain-0.25.2}/tests/scripts/feature_class.py +0 -0
  321. {datachain-0.25.0 → datachain-0.25.2}/tests/scripts/feature_class_exception.py +0 -0
  322. {datachain-0.25.0 → datachain-0.25.2}/tests/scripts/feature_class_parallel.py +0 -0
  323. {datachain-0.25.0 → datachain-0.25.2}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  324. {datachain-0.25.0 → datachain-0.25.2}/tests/scripts/name_len_slow.py +0 -0
  325. {datachain-0.25.0 → datachain-0.25.2}/tests/test_atomicity.py +0 -0
  326. {datachain-0.25.0 → datachain-0.25.2}/tests/test_cli_e2e.py +0 -0
  327. {datachain-0.25.0 → datachain-0.25.2}/tests/test_cli_studio.py +0 -0
  328. {datachain-0.25.0 → datachain-0.25.2}/tests/test_import_time.py +0 -0
  329. {datachain-0.25.0 → datachain-0.25.2}/tests/test_query_e2e.py +0 -0
  330. {datachain-0.25.0 → datachain-0.25.2}/tests/test_telemetry.py +0 -0
  331. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/__init__.py +0 -0
  332. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/lib/__init__.py +0 -0
  333. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/lib/conftest.py +0 -0
  334. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/lib/test_arrow.py +0 -0
  335. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/lib/test_clip.py +0 -0
  336. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  337. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/lib/test_datachain_merge.py +0 -0
  338. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/lib/test_diff.py +0 -0
  339. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/lib/test_feature.py +0 -0
  340. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/lib/test_feature_utils.py +0 -0
  341. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/lib/test_file.py +0 -0
  342. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/lib/test_hf.py +0 -0
  343. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/lib/test_image.py +0 -0
  344. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/lib/test_listing_info.py +0 -0
  345. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/lib/test_namespace.py +0 -0
  346. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/lib/test_project.py +0 -0
  347. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/lib/test_python_to_sql.py +0 -0
  348. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/lib/test_schema.py +0 -0
  349. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/lib/test_signal_schema.py +0 -0
  350. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/lib/test_sql_to_python.py +0 -0
  351. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/lib/test_text.py +0 -0
  352. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/lib/test_udf.py +0 -0
  353. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/lib/test_udf_signature.py +0 -0
  354. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/lib/test_utils.py +0 -0
  355. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/lib/test_webdataset.py +0 -0
  356. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/model/__init__.py +0 -0
  357. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/model/test_bbox.py +0 -0
  358. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/model/test_pose.py +0 -0
  359. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/model/test_segment.py +0 -0
  360. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/model/test_utils.py +0 -0
  361. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/sql/__init__.py +0 -0
  362. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/sql/sqlite/__init__.py +0 -0
  363. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/sql/sqlite/test_types.py +0 -0
  364. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/sql/sqlite/test_utils.py +0 -0
  365. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/sql/test_array.py +0 -0
  366. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/sql/test_conditional.py +0 -0
  367. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/sql/test_path.py +0 -0
  368. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/sql/test_random.py +0 -0
  369. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/sql/test_selectable.py +0 -0
  370. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/sql/test_string.py +0 -0
  371. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_asyn.py +0 -0
  372. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_cache.py +0 -0
  373. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_catalog.py +0 -0
  374. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_catalog_loader.py +0 -0
  375. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_cli_parsing.py +0 -0
  376. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_client.py +0 -0
  377. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_client_gcs.py +0 -0
  378. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_client_s3.py +0 -0
  379. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_config.py +0 -0
  380. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_data_storage.py +0 -0
  381. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_database_engine.py +0 -0
  382. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_dataset.py +0 -0
  383. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_dispatch.py +0 -0
  384. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_fileslice.py +0 -0
  385. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_func.py +0 -0
  386. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_listing.py +0 -0
  387. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_metastore.py +0 -0
  388. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_module_exports.py +0 -0
  389. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_pytorch.py +0 -0
  390. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_query.py +0 -0
  391. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_query_metrics.py +0 -0
  392. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_query_params.py +0 -0
  393. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_script_meta.py +0 -0
  394. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_semver.py +0 -0
  395. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_serializer.py +0 -0
  396. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_session.py +0 -0
  397. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_utils.py +0 -0
  398. {datachain-0.25.0 → datachain-0.25.2}/tests/unit/test_warehouse.py +0 -0
  399. {datachain-0.25.0 → datachain-0.25.2}/tests/utils.py +0 -0
@@ -30,9 +30,6 @@ jobs:
30
30
  enable-cache: true
31
31
  cache-suffix: benchmarks
32
32
  cache-dependency-glob: pyproject.toml
33
- # revert after this is fixed
34
- # https://github.com/wntrblm/nox/issues/953
35
- version: ">=0.6,<0.7"
36
33
 
37
34
  - name: Install nox and dvc
38
35
  run: uv pip install dvc[gs] nox --system
@@ -75,8 +75,21 @@ jobs:
75
75
  path: './backend/datachain'
76
76
  fetch-depth: 0
77
77
 
78
- - name: Set up FFmpeg
79
- uses: AnimMouse/setup-ffmpeg@v1
78
+ - name: Install FFmpeg on Windows
79
+ if: runner.os == 'Windows'
80
+ run: choco install ffmpeg
81
+
82
+ - name: Install FFmpeg on macOS
83
+ if: runner.os == 'macOS'
84
+ run: |
85
+ brew install ffmpeg
86
+ echo 'DYLD_FALLBACK_LIBRARY_PATH=/opt/homebrew/lib' >> "$GITHUB_ENV"
87
+
88
+ - name: Install FFmpeg on Ubuntu
89
+ if: runner.os == 'Linux'
90
+ run: |
91
+ sudo apt update
92
+ sudo apt install -y ffmpeg
80
93
 
81
94
  - name: Set up Python ${{ matrix.pyv }}
82
95
  uses: actions/setup-python@v5
@@ -34,9 +34,6 @@ jobs:
34
34
  enable-cache: true
35
35
  cache-suffix: lint
36
36
  cache-dependency-glob: pyproject.toml
37
- # revert after this is fixed
38
- # https://github.com/wntrblm/nox/issues/953
39
- version: ">=0.6,<0.7"
40
37
 
41
38
  - name: Install nox
42
39
  run: uv pip install nox --system
@@ -81,9 +78,6 @@ jobs:
81
78
  fetch-depth: 0
82
79
  ref: ${{ github.event.pull_request.head.sha || github.ref }}
83
80
 
84
- - name: Set up FFmpeg
85
- uses: AnimMouse/setup-ffmpeg@v1
86
-
87
81
  - name: Set up Python ${{ matrix.pyv }}
88
82
  uses: actions/setup-python@v5
89
83
  with:
@@ -95,9 +89,22 @@ jobs:
95
89
  enable-cache: true
96
90
  cache-suffix: tests-${{ matrix.pyv }}
97
91
  cache-dependency-glob: pyproject.toml
98
- # revert after this is fixed
99
- # https://github.com/wntrblm/nox/issues/953
100
- version: ">=0.6,<0.7"
92
+
93
+ - name: Install FFmpeg on Windows
94
+ if: runner.os == 'Windows'
95
+ run: choco install ffmpeg
96
+
97
+ - name: Install FFmpeg on macOS
98
+ if: runner.os == 'macOS'
99
+ run: |
100
+ brew install ffmpeg
101
+ echo 'DYLD_FALLBACK_LIBRARY_PATH=/opt/homebrew/lib' >> "$GITHUB_ENV"
102
+
103
+ - name: Install FFmpeg on Ubuntu
104
+ if: runner.os == 'Linux'
105
+ run: |
106
+ sudo apt update
107
+ sudo apt install -y ffmpeg
101
108
 
102
109
  - name: Install nox
103
110
  run: uv pip install nox --system
@@ -165,9 +172,6 @@ jobs:
165
172
  enable-cache: true
166
173
  cache-suffix: examples-${{ matrix.pyv }}
167
174
  cache-dependency-glob: pyproject.toml
168
- # revert after this is fixed
169
- # https://github.com/wntrblm/nox/issues/953
170
- version: ">=0.6,<0.7"
171
175
 
172
176
  - name: Install nox
173
177
  run: uv pip install nox --system
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.25.0
3
+ Version: 0.25.2
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -70,7 +70,8 @@ Provides-Extra: vector
70
70
  Requires-Dist: usearch; extra == "vector"
71
71
  Provides-Extra: hf
72
72
  Requires-Dist: numba>=0.60.0; extra == "hf"
73
- Requires-Dist: datasets[audio,vision]>=2.21.0; extra == "hf"
73
+ Requires-Dist: datasets[vision]>=4.0.0; extra == "hf"
74
+ Requires-Dist: datasets[audio]>=4.0.0; (sys_platform == "linux" or sys_platform == "darwin") and extra == "hf"
74
75
  Requires-Dist: fsspec>=2024.12.0; extra == "hf"
75
76
  Provides-Extra: video
76
77
  Requires-Dist: ffmpeg-python; extra == "video"
@@ -15,6 +15,10 @@ for examples of how to create a chain.
15
15
 
16
16
  ::: datachain.lib.dc.datasets.datasets
17
17
 
18
+ ::: datachain.lib.dc.datasets.delete_dataset
19
+
20
+ ::: datachain.lib.dc.datasets.move_dataset
21
+
18
22
  ::: datachain.lib.dc.hf.read_hf
19
23
 
20
24
  ::: datachain.lib.dc.json.read_json
@@ -81,7 +81,9 @@ vector = [
81
81
  ]
82
82
  hf = [
83
83
  "numba>=0.60.0",
84
- "datasets[audio,vision]>=2.21.0",
84
+ "datasets[vision]>=4.0.0",
85
+ # https://github.com/pytorch/torchcodec/issues/640
86
+ "datasets[audio]>=4.0.0 ; (sys_platform == 'linux' or sys_platform == 'darwin')",
85
87
  "fsspec>=2024.12.0"
86
88
  ]
87
89
  video = [
@@ -126,7 +126,16 @@ class ArrowGenerator(Generator):
126
126
  if isinstance(kwargs.get("format"), CsvFileFormat):
127
127
  kwargs["format"] = "csv"
128
128
  arrow_file = ArrowRow(file=file, index=index, kwargs=kwargs)
129
+
130
+ if self.output_schema and hasattr(vals[0], "source"):
131
+ # if we are reading parquet file written by datachain it might have
132
+ # source inside of it already, so we should not duplicate it, instead
133
+ # we are re-creating it of the self.source flag
134
+ vals[0].source = arrow_file # type: ignore[attr-defined]
135
+
136
+ return vals
129
137
  return [arrow_file, *vals]
138
+
130
139
  return vals
131
140
 
132
141
  def _process_non_datachain_record(
@@ -376,7 +376,7 @@ def move_dataset(
376
376
  the namespace and project, or a regular name. If a regular name is used,
377
377
  default values will be applied. The source dataset will no longer exist
378
378
  after the move.
379
- dst: The destination dataset name. This can also be a fully qualified
379
+ dest: The destination dataset name. This can also be a fully qualified
380
380
  name with a namespace and project, or just a regular name (default values
381
381
  will be used in that case). The original dataset will be moved here.
382
382
  session: An optional session instance. If not provided, the default session
@@ -11,7 +11,7 @@ try:
11
11
  Image,
12
12
  IterableDataset,
13
13
  IterableDatasetDict,
14
- Sequence,
14
+ List,
15
15
  Value,
16
16
  load_dataset,
17
17
  )
@@ -59,7 +59,6 @@ class HFImage(DataModel):
59
59
 
60
60
 
61
61
  class HFAudio(DataModel):
62
- path: str
63
62
  array: list[float]
64
63
  sampling_rate: int
65
64
 
@@ -116,26 +115,24 @@ def stream_splits(ds: Union[str, HFDatasetType], *args, **kwargs):
116
115
  return {"": ds}
117
116
 
118
117
 
119
- def convert_feature(val: Any, feat: Any, anno: Any) -> Any: # noqa: PLR0911
120
- if isinstance(feat, (Value, Array2D, Array3D, Array4D, Array5D)):
118
+ def convert_feature(val: Any, feat: Any, anno: Any) -> Any:
119
+ if isinstance(feat, (Value, Array2D, Array3D, Array4D, Array5D, List)):
121
120
  return val
122
121
  if isinstance(feat, ClassLabel):
123
122
  return HFClassLabel(string=feat.names[val], integer=val)
124
- if isinstance(feat, Sequence):
125
- if isinstance(feat.feature, dict):
126
- sdict = {}
127
- for sname in val:
128
- sfeat = feat.feature[sname]
129
- sanno = anno.model_fields[sname].annotation
130
- sdict[sname] = [convert_feature(v, sfeat, sanno) for v in val[sname]]
131
- return anno(**sdict)
132
- return val
123
+ if isinstance(feat, dict):
124
+ sdict = {}
125
+ for sname in val:
126
+ sfeat = feat[sname]
127
+ sanno = anno.model_fields[sname].annotation
128
+ sdict[sname] = [convert_feature(v, sfeat, sanno) for v in val[sname]]
129
+ return anno(**sdict)
133
130
  if isinstance(feat, Image):
134
131
  if isinstance(val, dict):
135
132
  return HFImage(img=val["bytes"])
136
133
  return HFImage(img=image_to_bytes(val))
137
134
  if isinstance(feat, Audio):
138
- return HFAudio(**val)
135
+ return HFAudio(array=val["array"], sampling_rate=val["sampling_rate"])
139
136
 
140
137
 
141
138
  def get_output_schema(features: Features) -> dict[str, DataType]:
@@ -151,13 +148,13 @@ def _feature_to_chain_type(name: str, val: Any) -> DataType: # noqa: PLR0911
151
148
  return arrow_type_mapper(val.pa_type)
152
149
  if isinstance(val, ClassLabel):
153
150
  return HFClassLabel
154
- if isinstance(val, Sequence):
155
- if isinstance(val.feature, dict):
156
- sequence_dict = {}
157
- for sname, sval in val.feature.items():
158
- dtype = _feature_to_chain_type(sname, sval)
159
- sequence_dict[sname] = list[dtype] # type: ignore[valid-type]
160
- return dict_to_data_model(name, sequence_dict) # type: ignore[arg-type]
151
+ if isinstance(val, dict):
152
+ sequence_dict = {}
153
+ for sname, sval in val.items():
154
+ dtype = _feature_to_chain_type(sname, sval)
155
+ sequence_dict[sname] = dtype # type: ignore[valid-type]
156
+ return dict_to_data_model(name, sequence_dict) # type: ignore[arg-type]
157
+ if isinstance(val, List):
161
158
  return list[_feature_to_chain_type(name, val.feature)] # type: ignore[arg-type,misc,return-value]
162
159
  if isinstance(val, Array2D):
163
160
  dtype = arrow_type_mapper(string_to_arrow(val.dtype))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.25.0
3
+ Version: 0.25.2
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -70,7 +70,8 @@ Provides-Extra: vector
70
70
  Requires-Dist: usearch; extra == "vector"
71
71
  Provides-Extra: hf
72
72
  Requires-Dist: numba>=0.60.0; extra == "hf"
73
- Requires-Dist: datasets[audio,vision]>=2.21.0; extra == "hf"
73
+ Requires-Dist: datasets[vision]>=4.0.0; extra == "hf"
74
+ Requires-Dist: datasets[audio]>=4.0.0; (sys_platform == "linux" or sys_platform == "darwin") and extra == "hf"
74
75
  Requires-Dist: fsspec>=2024.12.0; extra == "hf"
75
76
  Provides-Extra: video
76
77
  Requires-Dist: ffmpeg-python; extra == "video"
@@ -63,9 +63,12 @@ open_clip_torch
63
63
 
64
64
  [hf]
65
65
  numba>=0.60.0
66
- datasets[audio,vision]>=2.21.0
66
+ datasets[vision]>=4.0.0
67
67
  fsspec>=2024.12.0
68
68
 
69
+ [hf:sys_platform == "linux" or sys_platform == "darwin"]
70
+ datasets[audio]>=4.0.0
71
+
69
72
  [remote]
70
73
  lz4
71
74
  requests>=2.22.0
@@ -1,4 +1,7 @@
1
+ import importlib
2
+
1
3
  import numpy as np
4
+ import pytest
2
5
  from datasets import load_dataset
3
6
  from datasets.features.image import image_to_bytes
4
7
  from PIL import Image
@@ -12,6 +15,18 @@ from datachain.lib.hf import (
12
15
  )
13
16
 
14
17
 
18
+ def require_torchcodec(test_case):
19
+ """
20
+ Decorator marking a test that requires torchcodec (not available on Windows).
21
+ These tests are skipped when torchcodec isn't installed.
22
+ """
23
+ if not importlib.util.find_spec("torchcodec"):
24
+ test_case = pytest.mark.skip(
25
+ "test requires torchcoded, not available on Windows yet"
26
+ )(test_case)
27
+ return test_case
28
+
29
+
15
30
  def test_hf_image(tmp_path):
16
31
  train_dir = tmp_path / "train"
17
32
  train_dir.mkdir()
@@ -28,6 +43,7 @@ def test_hf_image(tmp_path):
28
43
  assert row.image.img == image_to_bytes(img)
29
44
 
30
45
 
46
+ @require_torchcodec
31
47
  def test_hf_audio(tmp_path):
32
48
  # See https://stackoverflow.com/questions/66191480/how-to-convert-a-numpy-array-to-a-mp3-file
33
49
  samplerate = 44100
@@ -45,6 +61,5 @@ def test_hf_audio(tmp_path):
45
61
  gen = HFGenerator(ds, dict_to_data_model("", schema))
46
62
  gen.setup()
47
63
  row = next(iter(gen.process("train")))
48
- assert row.audio.path == str(train_dir / "example.wav")
49
64
  assert np.allclose(row.audio.array, data / amplitude, atol=1e-4)
50
65
  assert row.audio.sampling_rate == samplerate
@@ -1760,6 +1760,22 @@ def test_read_parquet(tmp_dir, test_session):
1760
1760
  assert df_equal(df1, df)
1761
1761
 
1762
1762
 
1763
+ def test_read_parquet_exported_with_source(test_session, tmp_dir):
1764
+ path = tmp_dir / "df.parquet"
1765
+ path2 = tmp_dir / "df2.parquet"
1766
+ df = pd.DataFrame(DF_DATA)
1767
+
1768
+ df.to_parquet(path)
1769
+ dc.read_parquet(path, source=True).to_parquet(path2)
1770
+ df1 = (
1771
+ dc.read_parquet(path2, source=True)
1772
+ .select("first_name", "age", "city")
1773
+ .to_pandas()
1774
+ )
1775
+
1776
+ assert df_equal(df1, df)
1777
+
1778
+
1763
1779
  @skip_if_not_sqlite
1764
1780
  def test_read_parquet_in_memory(tmp_dir):
1765
1781
  df = pd.DataFrame(DF_DATA)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes