datachain 0.31.0__tar.gz → 0.31.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (423) hide show
  1. {datachain-0.31.0 → datachain-0.31.2}/.github/workflows/benchmarks.yml +1 -1
  2. {datachain-0.31.0 → datachain-0.31.2}/.github/workflows/release.yml +1 -1
  3. {datachain-0.31.0 → datachain-0.31.2}/.github/workflows/tests-studio.yml +1 -1
  4. {datachain-0.31.0 → datachain-0.31.2}/.github/workflows/tests.yml +8 -5
  5. {datachain-0.31.0 → datachain-0.31.2}/.pre-commit-config.yaml +1 -1
  6. {datachain-0.31.0 → datachain-0.31.2}/PKG-INFO +1 -1
  7. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/client/fsspec.py +11 -0
  8. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/clip.py +9 -9
  9. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/dc/csv.py +15 -19
  10. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/dc/datachain.py +64 -62
  11. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/dc/datasets.py +8 -8
  12. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/dc/hf.py +13 -17
  13. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/dc/json.py +9 -9
  14. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/dc/parquet.py +11 -14
  15. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/dc/records.py +2 -2
  16. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/dc/storage.py +74 -44
  17. datachain-0.31.2/src/datachain/lib/dc/storage_pattern.py +300 -0
  18. {datachain-0.31.0 → datachain-0.31.2}/src/datachain.egg-info/PKG-INFO +1 -1
  19. {datachain-0.31.0 → datachain-0.31.2}/src/datachain.egg-info/SOURCES.txt +3 -0
  20. {datachain-0.31.0 → datachain-0.31.2}/tests/conftest.py +4 -1
  21. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_datachain.py +5 -4
  22. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_datachain_merge.py +4 -4
  23. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_datasets.py +3 -3
  24. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_delta.py +86 -4
  25. datachain-0.31.2/tests/func/test_storage_pattern.py +261 -0
  26. datachain-0.31.2/tests/unit/lib/test_storage_pattern.py +121 -0
  27. {datachain-0.31.0 → datachain-0.31.2}/.cruft.json +0 -0
  28. {datachain-0.31.0 → datachain-0.31.2}/.gitattributes +0 -0
  29. {datachain-0.31.0 → datachain-0.31.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  30. {datachain-0.31.0 → datachain-0.31.2}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  31. {datachain-0.31.0 → datachain-0.31.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  32. {datachain-0.31.0 → datachain-0.31.2}/.github/codecov.yaml +0 -0
  33. {datachain-0.31.0 → datachain-0.31.2}/.github/dependabot.yml +0 -0
  34. {datachain-0.31.0 → datachain-0.31.2}/.github/workflows/update-template.yaml +0 -0
  35. {datachain-0.31.0 → datachain-0.31.2}/.gitignore +0 -0
  36. {datachain-0.31.0 → datachain-0.31.2}/CODE_OF_CONDUCT.rst +0 -0
  37. {datachain-0.31.0 → datachain-0.31.2}/LICENSE +0 -0
  38. {datachain-0.31.0 → datachain-0.31.2}/README.rst +0 -0
  39. {datachain-0.31.0 → datachain-0.31.2}/docs/assets/captioned_cartoons.png +0 -0
  40. {datachain-0.31.0 → datachain-0.31.2}/docs/assets/datachain-white.svg +0 -0
  41. {datachain-0.31.0 → datachain-0.31.2}/docs/assets/datachain.svg +0 -0
  42. {datachain-0.31.0 → datachain-0.31.2}/docs/commands/auth/login.md +0 -0
  43. {datachain-0.31.0 → datachain-0.31.2}/docs/commands/auth/logout.md +0 -0
  44. {datachain-0.31.0 → datachain-0.31.2}/docs/commands/auth/team.md +0 -0
  45. {datachain-0.31.0 → datachain-0.31.2}/docs/commands/auth/token.md +0 -0
  46. {datachain-0.31.0 → datachain-0.31.2}/docs/commands/index.md +0 -0
  47. {datachain-0.31.0 → datachain-0.31.2}/docs/commands/job/cancel.md +0 -0
  48. {datachain-0.31.0 → datachain-0.31.2}/docs/commands/job/clusters.md +0 -0
  49. {datachain-0.31.0 → datachain-0.31.2}/docs/commands/job/logs.md +0 -0
  50. {datachain-0.31.0 → datachain-0.31.2}/docs/commands/job/ls.md +0 -0
  51. {datachain-0.31.0 → datachain-0.31.2}/docs/commands/job/run.md +0 -0
  52. {datachain-0.31.0 → datachain-0.31.2}/docs/contributing.md +0 -0
  53. {datachain-0.31.0 → datachain-0.31.2}/docs/css/github-permalink-style.css +0 -0
  54. {datachain-0.31.0 → datachain-0.31.2}/docs/examples.md +0 -0
  55. {datachain-0.31.0 → datachain-0.31.2}/docs/guide/db_migrations.md +0 -0
  56. {datachain-0.31.0 → datachain-0.31.2}/docs/guide/delta.md +0 -0
  57. {datachain-0.31.0 → datachain-0.31.2}/docs/guide/env.md +0 -0
  58. {datachain-0.31.0 → datachain-0.31.2}/docs/guide/index.md +0 -0
  59. {datachain-0.31.0 → datachain-0.31.2}/docs/guide/namespaces.md +0 -0
  60. {datachain-0.31.0 → datachain-0.31.2}/docs/guide/processing.md +0 -0
  61. {datachain-0.31.0 → datachain-0.31.2}/docs/guide/remotes.md +0 -0
  62. {datachain-0.31.0 → datachain-0.31.2}/docs/guide/retry.md +0 -0
  63. {datachain-0.31.0 → datachain-0.31.2}/docs/index.md +0 -0
  64. {datachain-0.31.0 → datachain-0.31.2}/docs/overrides/main.html +0 -0
  65. {datachain-0.31.0 → datachain-0.31.2}/docs/quick-start.md +0 -0
  66. {datachain-0.31.0 → datachain-0.31.2}/docs/references/data-types/arrowrow.md +0 -0
  67. {datachain-0.31.0 → datachain-0.31.2}/docs/references/data-types/bbox.md +0 -0
  68. {datachain-0.31.0 → datachain-0.31.2}/docs/references/data-types/file.md +0 -0
  69. {datachain-0.31.0 → datachain-0.31.2}/docs/references/data-types/imagefile.md +0 -0
  70. {datachain-0.31.0 → datachain-0.31.2}/docs/references/data-types/index.md +0 -0
  71. {datachain-0.31.0 → datachain-0.31.2}/docs/references/data-types/pose.md +0 -0
  72. {datachain-0.31.0 → datachain-0.31.2}/docs/references/data-types/segment.md +0 -0
  73. {datachain-0.31.0 → datachain-0.31.2}/docs/references/data-types/tarvfile.md +0 -0
  74. {datachain-0.31.0 → datachain-0.31.2}/docs/references/data-types/textfile.md +0 -0
  75. {datachain-0.31.0 → datachain-0.31.2}/docs/references/data-types/videofile.md +0 -0
  76. {datachain-0.31.0 → datachain-0.31.2}/docs/references/datachain.md +0 -0
  77. {datachain-0.31.0 → datachain-0.31.2}/docs/references/func.md +0 -0
  78. {datachain-0.31.0 → datachain-0.31.2}/docs/references/functions/aggregate.md +0 -0
  79. {datachain-0.31.0 → datachain-0.31.2}/docs/references/functions/array.md +0 -0
  80. {datachain-0.31.0 → datachain-0.31.2}/docs/references/functions/conditional.md +0 -0
  81. {datachain-0.31.0 → datachain-0.31.2}/docs/references/functions/numeric.md +0 -0
  82. {datachain-0.31.0 → datachain-0.31.2}/docs/references/functions/path.md +0 -0
  83. {datachain-0.31.0 → datachain-0.31.2}/docs/references/functions/random.md +0 -0
  84. {datachain-0.31.0 → datachain-0.31.2}/docs/references/functions/string.md +0 -0
  85. {datachain-0.31.0 → datachain-0.31.2}/docs/references/functions/window.md +0 -0
  86. {datachain-0.31.0 → datachain-0.31.2}/docs/references/index.md +0 -0
  87. {datachain-0.31.0 → datachain-0.31.2}/docs/references/toolkit.md +0 -0
  88. {datachain-0.31.0 → datachain-0.31.2}/docs/references/torch.md +0 -0
  89. {datachain-0.31.0 → datachain-0.31.2}/docs/references/udf.md +0 -0
  90. {datachain-0.31.0 → datachain-0.31.2}/docs/tutorials.md +0 -0
  91. {datachain-0.31.0 → datachain-0.31.2}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  92. {datachain-0.31.0 → datachain-0.31.2}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  93. {datachain-0.31.0 → datachain-0.31.2}/examples/computer_vision/openimage-detect.py +0 -0
  94. {datachain-0.31.0 → datachain-0.31.2}/examples/computer_vision/ultralytics-bbox.py +0 -0
  95. {datachain-0.31.0 → datachain-0.31.2}/examples/computer_vision/ultralytics-pose.py +0 -0
  96. {datachain-0.31.0 → datachain-0.31.2}/examples/computer_vision/ultralytics-segment.py +0 -0
  97. {datachain-0.31.0 → datachain-0.31.2}/examples/get_started/common_sql_functions.py +0 -0
  98. {datachain-0.31.0 → datachain-0.31.2}/examples/get_started/json-csv-reader.py +0 -0
  99. {datachain-0.31.0 → datachain-0.31.2}/examples/get_started/nested_datamodel.py +0 -0
  100. {datachain-0.31.0 → datachain-0.31.2}/examples/get_started/torch-loader.py +0 -0
  101. {datachain-0.31.0 → datachain-0.31.2}/examples/get_started/udfs/parallel.py +0 -0
  102. {datachain-0.31.0 → datachain-0.31.2}/examples/get_started/udfs/simple.py +0 -0
  103. {datachain-0.31.0 → datachain-0.31.2}/examples/get_started/udfs/stateful.py +0 -0
  104. {datachain-0.31.0 → datachain-0.31.2}/examples/incremental_processing/delta.py +0 -0
  105. {datachain-0.31.0 → datachain-0.31.2}/examples/incremental_processing/retry.py +0 -0
  106. {datachain-0.31.0 → datachain-0.31.2}/examples/incremental_processing/utils.py +0 -0
  107. {datachain-0.31.0 → datachain-0.31.2}/examples/llm_and_nlp/claude-query.py +0 -0
  108. {datachain-0.31.0 → datachain-0.31.2}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  109. {datachain-0.31.0 → datachain-0.31.2}/examples/multimodal/audio-to-text.py +0 -0
  110. {datachain-0.31.0 → datachain-0.31.2}/examples/multimodal/clip_inference.py +0 -0
  111. {datachain-0.31.0 → datachain-0.31.2}/examples/multimodal/hf_pipeline.py +0 -0
  112. {datachain-0.31.0 → datachain-0.31.2}/examples/multimodal/openai_image_desc_lib.py +0 -0
  113. {datachain-0.31.0 → datachain-0.31.2}/examples/multimodal/wds.py +0 -0
  114. {datachain-0.31.0 → datachain-0.31.2}/examples/multimodal/wds_filtered.py +0 -0
  115. {datachain-0.31.0 → datachain-0.31.2}/mkdocs.yml +0 -0
  116. {datachain-0.31.0 → datachain-0.31.2}/noxfile.py +0 -0
  117. {datachain-0.31.0 → datachain-0.31.2}/pyproject.toml +0 -0
  118. {datachain-0.31.0 → datachain-0.31.2}/setup.cfg +0 -0
  119. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/__init__.py +0 -0
  120. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/__main__.py +0 -0
  121. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/asyn.py +0 -0
  122. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/cache.py +0 -0
  123. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/catalog/__init__.py +0 -0
  124. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/catalog/catalog.py +0 -0
  125. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/catalog/datasource.py +0 -0
  126. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/catalog/loader.py +0 -0
  127. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/cli/__init__.py +0 -0
  128. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/cli/commands/__init__.py +0 -0
  129. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/cli/commands/datasets.py +0 -0
  130. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/cli/commands/du.py +0 -0
  131. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/cli/commands/index.py +0 -0
  132. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/cli/commands/ls.py +0 -0
  133. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/cli/commands/misc.py +0 -0
  134. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/cli/commands/query.py +0 -0
  135. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/cli/commands/show.py +0 -0
  136. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/cli/parser/__init__.py +0 -0
  137. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/cli/parser/job.py +0 -0
  138. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/cli/parser/studio.py +0 -0
  139. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/cli/parser/utils.py +0 -0
  140. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/cli/utils.py +0 -0
  141. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/client/__init__.py +0 -0
  142. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/client/azure.py +0 -0
  143. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/client/fileslice.py +0 -0
  144. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/client/gcs.py +0 -0
  145. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/client/hf.py +0 -0
  146. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/client/local.py +0 -0
  147. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/client/s3.py +0 -0
  148. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/config.py +0 -0
  149. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/data_storage/__init__.py +0 -0
  150. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/data_storage/db_engine.py +0 -0
  151. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/data_storage/job.py +0 -0
  152. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/data_storage/metastore.py +0 -0
  153. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/data_storage/schema.py +0 -0
  154. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/data_storage/serializer.py +0 -0
  155. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/data_storage/sqlite.py +0 -0
  156. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/data_storage/warehouse.py +0 -0
  157. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/dataset.py +0 -0
  158. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/delta.py +0 -0
  159. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/diff/__init__.py +0 -0
  160. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/error.py +0 -0
  161. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/fs/__init__.py +0 -0
  162. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/fs/reference.py +0 -0
  163. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/fs/utils.py +0 -0
  164. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/func/__init__.py +0 -0
  165. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/func/aggregate.py +0 -0
  166. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/func/array.py +0 -0
  167. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/func/base.py +0 -0
  168. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/func/conditional.py +0 -0
  169. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/func/func.py +0 -0
  170. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/func/numeric.py +0 -0
  171. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/func/path.py +0 -0
  172. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/func/random.py +0 -0
  173. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/func/string.py +0 -0
  174. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/func/window.py +0 -0
  175. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/job.py +0 -0
  176. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/__init__.py +0 -0
  177. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/arrow.py +0 -0
  178. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/audio.py +0 -0
  179. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/convert/__init__.py +0 -0
  180. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/convert/flatten.py +0 -0
  181. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/convert/python_to_sql.py +0 -0
  182. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/convert/sql_to_python.py +0 -0
  183. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/convert/unflatten.py +0 -0
  184. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  185. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/data_model.py +0 -0
  186. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/dataset_info.py +0 -0
  187. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/dc/__init__.py +0 -0
  188. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/dc/database.py +0 -0
  189. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/dc/listings.py +0 -0
  190. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/dc/pandas.py +0 -0
  191. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/dc/utils.py +0 -0
  192. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/dc/values.py +0 -0
  193. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/file.py +0 -0
  194. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/hf.py +0 -0
  195. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/image.py +0 -0
  196. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/listing.py +0 -0
  197. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/listing_info.py +0 -0
  198. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/meta_formats.py +0 -0
  199. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/model_store.py +0 -0
  200. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/namespaces.py +0 -0
  201. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/projects.py +0 -0
  202. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/pytorch.py +0 -0
  203. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/settings.py +0 -0
  204. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/signal_schema.py +0 -0
  205. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/tar.py +0 -0
  206. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/text.py +0 -0
  207. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/udf.py +0 -0
  208. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/udf_signature.py +0 -0
  209. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/utils.py +0 -0
  210. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/video.py +0 -0
  211. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/webdataset.py +0 -0
  212. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/lib/webdataset_laion.py +0 -0
  213. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/listing.py +0 -0
  214. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/model/__init__.py +0 -0
  215. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/model/bbox.py +0 -0
  216. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/model/pose.py +0 -0
  217. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/model/segment.py +0 -0
  218. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/model/ultralytics/__init__.py +0 -0
  219. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/model/ultralytics/bbox.py +0 -0
  220. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/model/ultralytics/pose.py +0 -0
  221. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/model/ultralytics/segment.py +0 -0
  222. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/model/utils.py +0 -0
  223. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/namespace.py +0 -0
  224. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/node.py +0 -0
  225. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/nodes_fetcher.py +0 -0
  226. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/nodes_thread_pool.py +0 -0
  227. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/progress.py +0 -0
  228. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/project.py +0 -0
  229. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/py.typed +0 -0
  230. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/query/__init__.py +0 -0
  231. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/query/batch.py +0 -0
  232. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/query/dataset.py +0 -0
  233. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/query/dispatch.py +0 -0
  234. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/query/metrics.py +0 -0
  235. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/query/params.py +0 -0
  236. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/query/queue.py +0 -0
  237. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/query/schema.py +0 -0
  238. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/query/session.py +0 -0
  239. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/query/udf.py +0 -0
  240. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/query/utils.py +0 -0
  241. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/remote/__init__.py +0 -0
  242. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/remote/studio.py +0 -0
  243. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/script_meta.py +0 -0
  244. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/semver.py +0 -0
  245. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/sql/__init__.py +0 -0
  246. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/sql/default/__init__.py +0 -0
  247. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/sql/default/base.py +0 -0
  248. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/sql/functions/__init__.py +0 -0
  249. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/sql/functions/aggregate.py +0 -0
  250. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/sql/functions/array.py +0 -0
  251. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/sql/functions/conditional.py +0 -0
  252. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/sql/functions/numeric.py +0 -0
  253. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/sql/functions/path.py +0 -0
  254. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/sql/functions/random.py +0 -0
  255. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/sql/functions/string.py +0 -0
  256. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/sql/postgresql_dialect.py +0 -0
  257. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/sql/postgresql_types.py +0 -0
  258. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/sql/selectable.py +0 -0
  259. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/sql/sqlite/__init__.py +0 -0
  260. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/sql/sqlite/base.py +0 -0
  261. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/sql/sqlite/types.py +0 -0
  262. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/sql/sqlite/vector.py +0 -0
  263. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/sql/types.py +0 -0
  264. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/sql/utils.py +0 -0
  265. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/studio.py +0 -0
  266. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/telemetry.py +0 -0
  267. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/toolkit/__init__.py +0 -0
  268. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/toolkit/split.py +0 -0
  269. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/torch/__init__.py +0 -0
  270. {datachain-0.31.0 → datachain-0.31.2}/src/datachain/utils.py +0 -0
  271. {datachain-0.31.0 → datachain-0.31.2}/src/datachain.egg-info/dependency_links.txt +0 -0
  272. {datachain-0.31.0 → datachain-0.31.2}/src/datachain.egg-info/entry_points.txt +0 -0
  273. {datachain-0.31.0 → datachain-0.31.2}/src/datachain.egg-info/requires.txt +0 -0
  274. {datachain-0.31.0 → datachain-0.31.2}/src/datachain.egg-info/top_level.txt +0 -0
  275. {datachain-0.31.0 → datachain-0.31.2}/tests/__init__.py +0 -0
  276. {datachain-0.31.0 → datachain-0.31.2}/tests/benchmarks/__init__.py +0 -0
  277. {datachain-0.31.0 → datachain-0.31.2}/tests/benchmarks/conftest.py +0 -0
  278. {datachain-0.31.0 → datachain-0.31.2}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  279. {datachain-0.31.0 → datachain-0.31.2}/tests/benchmarks/datasets/.dvc/config +0 -0
  280. {datachain-0.31.0 → datachain-0.31.2}/tests/benchmarks/datasets/.gitignore +0 -0
  281. {datachain-0.31.0 → datachain-0.31.2}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  282. {datachain-0.31.0 → datachain-0.31.2}/tests/benchmarks/test_datachain.py +0 -0
  283. {datachain-0.31.0 → datachain-0.31.2}/tests/benchmarks/test_ls.py +0 -0
  284. {datachain-0.31.0 → datachain-0.31.2}/tests/benchmarks/test_version.py +0 -0
  285. {datachain-0.31.0 → datachain-0.31.2}/tests/data.py +0 -0
  286. {datachain-0.31.0 → datachain-0.31.2}/tests/examples/__init__.py +0 -0
  287. {datachain-0.31.0 → datachain-0.31.2}/tests/examples/test_examples.py +0 -0
  288. {datachain-0.31.0 → datachain-0.31.2}/tests/examples/test_wds_e2e.py +0 -0
  289. {datachain-0.31.0 → datachain-0.31.2}/tests/examples/wds_data.py +0 -0
  290. {datachain-0.31.0 → datachain-0.31.2}/tests/func/__init__.py +0 -0
  291. {datachain-0.31.0 → datachain-0.31.2}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  292. {datachain-0.31.0 → datachain-0.31.2}/tests/func/data/lena.jpg +0 -0
  293. {datachain-0.31.0 → datachain-0.31.2}/tests/func/fake-service-account-credentials.json +0 -0
  294. {datachain-0.31.0 → datachain-0.31.2}/tests/func/functions/__init__.py +0 -0
  295. {datachain-0.31.0 → datachain-0.31.2}/tests/func/functions/test_aggregate.py +0 -0
  296. {datachain-0.31.0 → datachain-0.31.2}/tests/func/functions/test_array.py +0 -0
  297. {datachain-0.31.0 → datachain-0.31.2}/tests/func/functions/test_conditional.py +0 -0
  298. {datachain-0.31.0 → datachain-0.31.2}/tests/func/functions/test_numeric.py +0 -0
  299. {datachain-0.31.0 → datachain-0.31.2}/tests/func/functions/test_path.py +0 -0
  300. {datachain-0.31.0 → datachain-0.31.2}/tests/func/functions/test_random.py +0 -0
  301. {datachain-0.31.0 → datachain-0.31.2}/tests/func/functions/test_string.py +0 -0
  302. {datachain-0.31.0 → datachain-0.31.2}/tests/func/model/__init__.py +0 -0
  303. {datachain-0.31.0 → datachain-0.31.2}/tests/func/model/data/running-mask0.png +0 -0
  304. {datachain-0.31.0 → datachain-0.31.2}/tests/func/model/data/running-mask1.png +0 -0
  305. {datachain-0.31.0 → datachain-0.31.2}/tests/func/model/data/running.jpg +0 -0
  306. {datachain-0.31.0 → datachain-0.31.2}/tests/func/model/data/ships.jpg +0 -0
  307. {datachain-0.31.0 → datachain-0.31.2}/tests/func/model/test_yolo.py +0 -0
  308. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_audio.py +0 -0
  309. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_batching.py +0 -0
  310. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_catalog.py +0 -0
  311. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_client.py +0 -0
  312. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_cloud_transfer.py +0 -0
  313. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_data_storage.py +0 -0
  314. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_dataset_query.py +0 -0
  315. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_feature_pickling.py +0 -0
  316. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_file.py +0 -0
  317. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_hf.py +0 -0
  318. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_hidden_field.py +0 -0
  319. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_image.py +0 -0
  320. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_listing.py +0 -0
  321. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_ls.py +0 -0
  322. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_meta_formats.py +0 -0
  323. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_metastore.py +0 -0
  324. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_metrics.py +0 -0
  325. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_mutate.py +0 -0
  326. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_pull.py +0 -0
  327. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_pytorch.py +0 -0
  328. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_query.py +0 -0
  329. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_read_database.py +0 -0
  330. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_read_dataset_remote.py +0 -0
  331. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  332. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_retry.py +0 -0
  333. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_session.py +0 -0
  334. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_studio_datetime_parsing.py +0 -0
  335. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_to_database.py +0 -0
  336. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_toolkit.py +0 -0
  337. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_video.py +0 -0
  338. {datachain-0.31.0 → datachain-0.31.2}/tests/func/test_warehouse.py +0 -0
  339. {datachain-0.31.0 → datachain-0.31.2}/tests/scripts/feature_class.py +0 -0
  340. {datachain-0.31.0 → datachain-0.31.2}/tests/scripts/feature_class_exception.py +0 -0
  341. {datachain-0.31.0 → datachain-0.31.2}/tests/scripts/feature_class_parallel.py +0 -0
  342. {datachain-0.31.0 → datachain-0.31.2}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  343. {datachain-0.31.0 → datachain-0.31.2}/tests/scripts/name_len_slow.py +0 -0
  344. {datachain-0.31.0 → datachain-0.31.2}/tests/test_atomicity.py +0 -0
  345. {datachain-0.31.0 → datachain-0.31.2}/tests/test_cli_e2e.py +0 -0
  346. {datachain-0.31.0 → datachain-0.31.2}/tests/test_cli_studio.py +0 -0
  347. {datachain-0.31.0 → datachain-0.31.2}/tests/test_import_time.py +0 -0
  348. {datachain-0.31.0 → datachain-0.31.2}/tests/test_query_e2e.py +0 -0
  349. {datachain-0.31.0 → datachain-0.31.2}/tests/test_telemetry.py +0 -0
  350. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/__init__.py +0 -0
  351. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/__init__.py +0 -0
  352. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/conftest.py +0 -0
  353. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/test_arrow.py +0 -0
  354. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/test_audio.py +0 -0
  355. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/test_clip.py +0 -0
  356. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/test_datachain.py +0 -0
  357. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  358. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/test_datachain_merge.py +0 -0
  359. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/test_diff.py +0 -0
  360. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/test_feature.py +0 -0
  361. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/test_feature_utils.py +0 -0
  362. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/test_file.py +0 -0
  363. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/test_hf.py +0 -0
  364. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/test_image.py +0 -0
  365. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/test_listing_info.py +0 -0
  366. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/test_namespace.py +0 -0
  367. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/test_partition_by.py +0 -0
  368. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/test_project.py +0 -0
  369. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/test_python_to_sql.py +0 -0
  370. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/test_schema.py +0 -0
  371. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/test_settings.py +0 -0
  372. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/test_signal_schema.py +0 -0
  373. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/test_sql_to_python.py +0 -0
  374. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/test_text.py +0 -0
  375. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/test_udf.py +0 -0
  376. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/test_udf_signature.py +0 -0
  377. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/test_utils.py +0 -0
  378. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/lib/test_webdataset.py +0 -0
  379. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/model/__init__.py +0 -0
  380. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/model/test_bbox.py +0 -0
  381. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/model/test_pose.py +0 -0
  382. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/model/test_segment.py +0 -0
  383. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/model/test_utils.py +0 -0
  384. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/sql/__init__.py +0 -0
  385. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/sql/sqlite/__init__.py +0 -0
  386. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/sql/sqlite/test_types.py +0 -0
  387. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/sql/sqlite/test_utils.py +0 -0
  388. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/sql/test_array.py +0 -0
  389. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/sql/test_conditional.py +0 -0
  390. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/sql/test_path.py +0 -0
  391. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/sql/test_random.py +0 -0
  392. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/sql/test_selectable.py +0 -0
  393. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/sql/test_string.py +0 -0
  394. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_asyn.py +0 -0
  395. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_cache.py +0 -0
  396. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_catalog.py +0 -0
  397. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_catalog_loader.py +0 -0
  398. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_cli_datasets.py +0 -0
  399. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_cli_parsing.py +0 -0
  400. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_client.py +0 -0
  401. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_client_gcs.py +0 -0
  402. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_client_s3.py +0 -0
  403. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_config.py +0 -0
  404. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_data_storage.py +0 -0
  405. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_database_engine.py +0 -0
  406. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_dataset.py +0 -0
  407. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_dispatch.py +0 -0
  408. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_fileslice.py +0 -0
  409. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_func.py +0 -0
  410. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_listing.py +0 -0
  411. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_metastore.py +0 -0
  412. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_module_exports.py +0 -0
  413. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_pytorch.py +0 -0
  414. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_query.py +0 -0
  415. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_query_metrics.py +0 -0
  416. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_query_params.py +0 -0
  417. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_script_meta.py +0 -0
  418. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_semver.py +0 -0
  419. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_serializer.py +0 -0
  420. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_session.py +0 -0
  421. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_utils.py +0 -0
  422. {datachain-0.31.0 → datachain-0.31.2}/tests/unit/test_warehouse.py +0 -0
  423. {datachain-0.31.0 → datachain-0.31.2}/tests/utils.py +0 -0
@@ -20,7 +20,7 @@ jobs:
20
20
  steps:
21
21
  - uses: actions/checkout@v5
22
22
  - name: Set up Python 3.13
23
- uses: actions/setup-python@v5
23
+ uses: actions/setup-python@v6
24
24
  with:
25
25
  python-version: '3.13'
26
26
 
@@ -22,7 +22,7 @@ jobs:
22
22
  fetch-depth: 0
23
23
 
24
24
  - name: Set up Python 3.13
25
- uses: actions/setup-python@v5
25
+ uses: actions/setup-python@v6
26
26
  with:
27
27
  python-version: '3.13'
28
28
 
@@ -92,7 +92,7 @@ jobs:
92
92
  sudo apt install -y ffmpeg
93
93
 
94
94
  - name: Set up Python ${{ matrix.pyv }}
95
- uses: actions/setup-python@v5
95
+ uses: actions/setup-python@v6
96
96
  with:
97
97
  python-version: ${{ matrix.pyv }}
98
98
 
@@ -24,7 +24,7 @@ jobs:
24
24
  ref: ${{ github.event.pull_request.head.sha || github.ref }}
25
25
 
26
26
  - name: Set up Python 3.9
27
- uses: actions/setup-python@v5
27
+ uses: actions/setup-python@v6
28
28
  with:
29
29
  python-version: '3.9'
30
30
 
@@ -97,7 +97,7 @@ jobs:
97
97
  shell: bash
98
98
 
99
99
  - name: Set up Python ${{ matrix.pyv }}
100
- uses: actions/setup-python@v5
100
+ uses: actions/setup-python@v6
101
101
  with:
102
102
  python-version: ${{ matrix.pyv }}
103
103
 
@@ -114,9 +114,12 @@ jobs:
114
114
 
115
115
  - name: Install FFmpeg on macOS
116
116
  if: runner.os == 'macOS'
117
+ # Using ffmpeg@7 since torchcodec does not support ffmpeg 8 yet
118
+ # See: https://github.com/pytorch/torchcodec/issues/839
119
+ # On Windows and Linux ffmpeg < 8 is installed by default
117
120
  run: |
118
- brew install ffmpeg
119
- echo 'DYLD_FALLBACK_LIBRARY_PATH=/opt/homebrew/lib' >> "$GITHUB_ENV"
121
+ brew install ffmpeg@7
122
+ echo 'DYLD_FALLBACK_LIBRARY_PATH=/opt/homebrew/opt/ffmpeg@7/lib' >> "$GITHUB_ENV"
120
123
 
121
124
  - name: Install FFmpeg on Ubuntu
122
125
  if: runner.os == 'Linux'
@@ -180,7 +183,7 @@ jobs:
180
183
  ref: ${{ github.event.pull_request.head.sha || github.ref }}
181
184
 
182
185
  - name: Set up Python ${{ matrix.pyv }}
183
- uses: actions/setup-python@v5
186
+ uses: actions/setup-python@v6
184
187
  with:
185
188
  python-version: ${{ matrix.pyv }}
186
189
 
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.12.11'
27
+ rev: 'v0.12.12'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.31.0
3
+ Version: 0.31.2
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -44,6 +44,7 @@ FETCH_WORKERS = 100
44
44
  DELIMITER = "/" # Path delimiter.
45
45
 
46
46
  DATA_SOURCE_URI_PATTERN = re.compile(r"^[\w]+:\/\/.*$")
47
+ CLOUD_STORAGE_PROTOCOLS = {"s3", "gs", "az", "hf"}
47
48
 
48
49
  ResultQueue = asyncio.Queue[Optional[Sequence["File"]]]
49
50
 
@@ -62,6 +63,16 @@ def _is_win_local_path(uri: str) -> bool:
62
63
  return False
63
64
 
64
65
 
66
+ def is_cloud_uri(uri: str) -> bool:
67
+ protocol = urlparse(uri).scheme
68
+ return protocol in CLOUD_STORAGE_PROTOCOLS
69
+
70
+
71
+ def get_cloud_schemes() -> list[str]:
72
+ """Get list of cloud storage scheme prefixes."""
73
+ return [f"{p}://" for p in CLOUD_STORAGE_PROTOCOLS]
74
+
75
+
65
76
  class Bucket(NamedTuple):
66
77
  name: str
67
78
  uri: "StorageURI"
@@ -45,15 +45,15 @@ def clip_similarity_scores(
45
45
  Calculate CLIP similarity scores between one or more images and/or text.
46
46
 
47
47
  Parameters:
48
- images : Images to use as inputs.
49
- text : Text to use as inputs.
50
- model : Model from clip or open_clip packages.
51
- preprocess : Image preprocessor to apply.
52
- tokenizer : Text tokenizer.
53
- prob : Compute softmax probabilities.
54
- image_to_text : Whether to compute for image-to-text or text-to-image. Ignored
55
- if only one of images or text provided.
56
- device : Device to use. Defaults is None - use model's device.
48
+ images: Images to use as inputs.
49
+ text: Text to use as inputs.
50
+ model: Model from clip or open_clip packages.
51
+ preprocess: Image preprocessor to apply.
52
+ tokenizer: Text tokenizer.
53
+ prob: Compute softmax probabilities.
54
+ image_to_text: Whether to compute for image-to-text or text-to-image. Ignored
55
+ if only one of the images or text provided.
56
+ device: Device to use. Default is None - use model's device.
57
57
 
58
58
 
59
59
  Example:
@@ -1,10 +1,6 @@
1
+ import os
1
2
  from collections.abc import Sequence
2
- from typing import (
3
- TYPE_CHECKING,
4
- Callable,
5
- Optional,
6
- Union,
7
- )
3
+ from typing import TYPE_CHECKING, Callable, Optional, Union
8
4
 
9
5
  from datachain.lib.dc.utils import DatasetPrepareError, OutputType
10
6
  from datachain.lib.model_store import ModelStore
@@ -17,14 +13,14 @@ if TYPE_CHECKING:
17
13
 
18
14
 
19
15
  def read_csv(
20
- path,
16
+ path: Union[str, os.PathLike[str], list[str], list[os.PathLike[str]]],
21
17
  delimiter: Optional[str] = None,
22
18
  header: bool = True,
23
19
  output: OutputType = None,
24
20
  column: str = "",
25
21
  model_name: str = "",
26
22
  source: bool = True,
27
- nrows=None,
23
+ nrows: Optional[int] = None,
28
24
  session: Optional[Session] = None,
29
25
  settings: Optional[dict] = None,
30
26
  column_types: Optional[dict[str, "Union[str, ArrowDataType]"]] = None,
@@ -34,21 +30,21 @@ def read_csv(
34
30
  """Generate chain from csv files.
35
31
 
36
32
  Parameters:
37
- path : Storage URI with directory. URI must start with storage prefix such
33
+ path: Storage URI with directory. URI must start with storage prefix such
38
34
  as `s3://`, `gs://`, `az://` or "file:///".
39
- delimiter : Character for delimiting columns. Takes precedence if also
35
+ delimiter: Character for delimiting columns. Takes precedence if also
40
36
  specified in `parse_options`. Defaults to ",".
41
- header : Whether the files include a header row.
42
- output : Dictionary or feature class defining column names and their
37
+ header: Whether the files include a header row.
38
+ output: Dictionary or feature class defining column names and their
43
39
  corresponding types. List of column names is also accepted, in which
44
40
  case types will be inferred.
45
- column : Created column name.
46
- model_name : Generated model name.
47
- source : Whether to include info about the source file.
48
- nrows : Optional row limit.
49
- session : Session to use for the chain.
50
- settings : Settings to use for the chain.
51
- column_types : Dictionary of column names and their corresponding types.
41
+ column: Created column name.
42
+ model_name: Generated model name.
43
+ source: Whether to include info about the source file.
44
+ nrows: Optional row limit.
45
+ session: Session to use for the chain.
46
+ settings: Settings to use for the chain.
47
+ column_types: Dictionary of column names and their corresponding types.
52
48
  It is passed to CSV reader and for each column specified type auto
53
49
  inference is disabled.
54
50
  parse_options: Tells the parser how to process lines.
@@ -40,11 +40,7 @@ from datachain.lib.data_model import (
40
40
  StandardType,
41
41
  dict_to_data_model,
42
42
  )
43
- from datachain.lib.file import (
44
- EXPORT_FILES_MAX_THREADS,
45
- ArrowRow,
46
- FileExporter,
47
- )
43
+ from datachain.lib.file import EXPORT_FILES_MAX_THREADS, ArrowRow, FileExporter
48
44
  from datachain.lib.file import ExportPlacement as FileExportPlacement
49
45
  from datachain.lib.model_store import ModelStore
50
46
  from datachain.lib.settings import Settings
@@ -352,24 +348,28 @@ class DataChain:
352
348
  batch_size: Optional[int] = None,
353
349
  sys: Optional[bool] = None,
354
350
  ) -> "Self":
355
- """Change settings for chain.
356
-
357
- This function changes specified settings without changing not specified ones.
358
- It returns chain, so, it can be chained later with next operation.
351
+ """
352
+ Set chain execution parameters. Returns the chain itself, allowing method
353
+ chaining for subsequent operations. To restore all settings to their default
354
+ values, use `reset_settings()`.
359
355
 
360
356
  Parameters:
361
- cache : data caching. (default=False)
362
- prefetch : number of workers to use for downloading files in advance.
363
- This is enabled by default and uses 2 workers.
364
- To disable prefetching, set it to 0 or False.
365
- parallel : number of thread for processors. True is a special value to
366
- enable all available CPUs. (default=1)
367
- workers : number of distributed workers. Only for Studio mode. (default=1)
368
- namespace : namespace name.
369
- project : project name.
370
- min_task_size : minimum number of tasks. (default=1)
371
- batch_size : row limit per insert to balance speed and memory usage.
372
- (default=2000)
357
+ cache: Enable files caching to speed up subsequent accesses to the same
358
+ files from the same or different chains. Defaults to False.
359
+ prefetch: Enable prefetching of files. This will download files in
360
+ advance in parallel. If an integer is provided, it specifies the number
361
+ of files to prefetch concurrently for each process on each worker.
362
+ Defaults to 2. Set to 0 or False to disable prefetching.
363
+ parallel: Number of processes to use for processing user-defined functions
364
+ (UDFs) in parallel. If an integer is provided, it specifies the number
365
+ of CPUs to use. If True, all available CPUs are used. Defaults to 1.
366
+ namespace: Namespace to use for the chain by default.
367
+ project: Project to use for the chain by default.
368
+ min_task_size: Minimum number of rows per worker/process for parallel
369
+ processing by UDFs. Defaults to 1.
370
+ batch_size: Number of rows per insert by UDF to fine tune and balance speed
371
+ and memory usage. This might be useful when processing large rows
372
+ or when running into memory issues. Defaults to 2000.
373
373
 
374
374
  Example:
375
375
  ```py
@@ -398,7 +398,7 @@ class DataChain:
398
398
  return self._evolve(settings=settings, _sys=sys)
399
399
 
400
400
  def reset_settings(self, settings: Optional[Settings] = None) -> "Self":
401
- """Reset all settings to default values."""
401
+ """Reset all chain settings to default values."""
402
402
  self._settings = settings if settings else Settings()
403
403
  return self
404
404
 
@@ -580,14 +580,14 @@ class DataChain:
580
580
  """Save to a Dataset. It returns the chain itself.
581
581
 
582
582
  Parameters:
583
- name : dataset name. It can be full name consisting of namespace and
584
- project, but it can also be just a regular dataset name in which
585
- case we are taking namespace and project from settings, if they
586
- are defined there, or default ones instead.
587
- version : version of a dataset. If version is not specified and dataset
583
+ name: dataset name. This can be either a fully qualified name, including
584
+ the namespace and project, or just a regular dataset name. In the latter
585
+ case, the namespace and project will be taken from the settings
586
+ (if specified) or from the default values otherwise.
587
+ version: version of a dataset. If version is not specified and dataset
588
588
  already exists, version patch increment will happen e.g 1.2.1 -> 1.2.2.
589
- description : description of a dataset.
590
- attrs : attributes of a dataset. They can be without value, e.g "NLP",
589
+ description: description of a dataset.
590
+ attrs: attributes of a dataset. They can be without value, e.g "NLP",
591
591
  or with a value, e.g "location=US".
592
592
  update_version: which part of the dataset version to automatically increase.
593
593
  Available values: `major`, `minor` or `patch`. Default is `patch`.
@@ -661,7 +661,9 @@ class DataChain:
661
661
  # current latest version instead.
662
662
  from .datasets import read_dataset
663
663
 
664
- return read_dataset(name, **kwargs)
664
+ return read_dataset(
665
+ name, namespace=namespace_name, project=project_name, **kwargs
666
+ )
665
667
 
666
668
  return self._evolve(
667
669
  query=self._query.save(
@@ -704,7 +706,7 @@ class DataChain:
704
706
  func: Optional[Callable] = None,
705
707
  params: Union[None, str, Sequence[str]] = None,
706
708
  output: OutputType = None,
707
- **signal_map,
709
+ **signal_map: Any,
708
710
  ) -> "Self":
709
711
  """Apply a function to each row to create new signals. The function should
710
712
  return a new object for each row. It returns a chain itself with new signals.
@@ -712,17 +714,17 @@ class DataChain:
712
714
  Input-output relationship: 1:1
713
715
 
714
716
  Parameters:
715
- func : Function applied to each row.
716
- params : List of column names used as input for the function. Default
717
+ func: Function applied to each row.
718
+ params: List of column names used as input for the function. Default
717
719
  is taken from function signature.
718
- output : Dictionary defining new signals and their corresponding types.
720
+ output: Dictionary defining new signals and their corresponding types.
719
721
  Default type is taken from function signature. Default can be also
720
722
  taken from kwargs - **signal_map (see below).
721
723
  If signal name is defined using signal_map (see below) only a single
722
724
  type value can be used.
723
- **signal_map : kwargs can be used to define `func` together with it's return
725
+ **signal_map: kwargs can be used to define `func` together with its return
724
726
  signal name in format of `map(my_sign=my_func)`. This helps define
725
- signal names and function in a nicer way.
727
+ signal names and functions in a nicer way.
726
728
 
727
729
  Example:
728
730
  Using signal_map and single type in output:
@@ -941,7 +943,7 @@ class DataChain:
941
943
  It accepts the same parameters plus an
942
944
  additional parameter:
943
945
 
944
- batch : Size of each batch passed to `func`. Defaults to 1000.
946
+ batch: Size of each batch passed to `func`. Defaults to 1000.
945
947
 
946
948
  Example:
947
949
  ```py
@@ -1309,9 +1311,9 @@ class DataChain:
1309
1311
  """Yields flattened rows of values as a tuple.
1310
1312
 
1311
1313
  Args:
1312
- row_factory : A callable to convert row to a custom format.
1313
- It should accept two arguments: a list of column names and
1314
- a tuple of row values.
1314
+ row_factory: A callable to convert row to a custom format.
1315
+ It should accept two arguments: a list of column names and
1316
+ a tuple of row values.
1315
1317
  include_hidden: Whether to include hidden signals from the schema.
1316
1318
  """
1317
1319
  db_signals = self._effective_signals_schema.db_signals(
@@ -1956,19 +1958,19 @@ class DataChain:
1956
1958
  model_name: str = "",
1957
1959
  source: bool = True,
1958
1960
  nrows: Optional[int] = None,
1959
- **kwargs,
1961
+ **kwargs: Any,
1960
1962
  ) -> "Self":
1961
1963
  """Generate chain from list of tabular files.
1962
1964
 
1963
1965
  Parameters:
1964
- output : Dictionary or feature class defining column names and their
1966
+ output: Dictionary or feature class defining column names and their
1965
1967
  corresponding types. List of column names is also accepted, in which
1966
1968
  case types will be inferred.
1967
- column : Generated column name.
1968
- model_name : Generated model name.
1969
- source : Whether to include info about the source file.
1970
- nrows : Optional row limit.
1971
- kwargs : Parameters to pass to pyarrow.dataset.dataset.
1969
+ column: Generated column name.
1970
+ model_name: Generated model name.
1971
+ source: Whether to include info about the source file.
1972
+ nrows: Optional row limit.
1973
+ kwargs: Parameters to pass to pyarrow.dataset.dataset.
1972
1974
 
1973
1975
  Example:
1974
1976
  Reading a json lines file:
@@ -2098,12 +2100,12 @@ class DataChain:
2098
2100
  """Save chain to parquet file with SignalSchema metadata.
2099
2101
 
2100
2102
  Parameters:
2101
- path : Path or a file-like binary object to save the file. This supports
2103
+ path: Path or a file-like binary object to save the file. This supports
2102
2104
  local paths as well as remote paths, such as s3:// or hf:// with fsspec.
2103
- partition_cols : Column names by which to partition the dataset.
2104
- chunk_size : The chunk size of results to read and convert to columnar
2105
+ partition_cols: Column names by which to partition the dataset.
2106
+ chunk_size: The chunk size of results to read and convert to columnar
2105
2107
  data, to avoid running out of memory.
2106
- fs_kwargs : Optional kwargs to pass to the fsspec filesystem, used only for
2108
+ fs_kwargs: Optional kwargs to pass to the fsspec filesystem, used only for
2107
2109
  write, for fsspec-type URLs, such as s3:// or hf:// when
2108
2110
  provided as the destination path.
2109
2111
  """
@@ -2195,10 +2197,10 @@ class DataChain:
2195
2197
  """Save chain to a csv (comma-separated values) file.
2196
2198
 
2197
2199
  Parameters:
2198
- path : Path to save the file. This supports local paths as well as
2200
+ path: Path to save the file. This supports local paths as well as
2199
2201
  remote paths, such as s3:// or hf:// with fsspec.
2200
- delimiter : Delimiter to use for the resulting file.
2201
- fs_kwargs : Optional kwargs to pass to the fsspec filesystem, used only for
2202
+ delimiter: Delimiter to use for the resulting file.
2203
+ fs_kwargs: Optional kwargs to pass to the fsspec filesystem, used only for
2202
2204
  write, for fsspec-type URLs, such as s3:// or hf:// when
2203
2205
  provided as the destination path.
2204
2206
  """
@@ -2241,12 +2243,12 @@ class DataChain:
2241
2243
  """Save chain to a JSON file.
2242
2244
 
2243
2245
  Parameters:
2244
- path : Path to save the file. This supports local paths as well as
2246
+ path: Path to save the file. This supports local paths as well as
2245
2247
  remote paths, such as s3:// or hf:// with fsspec.
2246
- fs_kwargs : Optional kwargs to pass to the fsspec filesystem, used only for
2248
+ fs_kwargs: Optional kwargs to pass to the fsspec filesystem, used only for
2247
2249
  write, for fsspec-type URLs, such as s3:// or hf:// when
2248
2250
  provided as the destination path.
2249
- include_outer_list : Sets whether to include an outer list for all rows.
2251
+ include_outer_list: Sets whether to include an outer list for all rows.
2250
2252
  Setting this to True makes the file valid JSON, while False instead
2251
2253
  writes in the JSON lines format.
2252
2254
  """
@@ -2301,9 +2303,9 @@ class DataChain:
2301
2303
  """Save chain to a JSON lines file.
2302
2304
 
2303
2305
  Parameters:
2304
- path : Path to save the file. This supports local paths as well as
2306
+ path: Path to save the file. This supports local paths as well as
2305
2307
  remote paths, such as s3:// or hf:// with fsspec.
2306
- fs_kwargs : Optional kwargs to pass to the fsspec filesystem, used only for
2308
+ fs_kwargs: Optional kwargs to pass to the fsspec filesystem, used only for
2307
2309
  write, for fsspec-type URLs, such as s3:// or hf:// when
2308
2310
  provided as the destination path.
2309
2311
  """
@@ -2571,9 +2573,9 @@ class DataChain:
2571
2573
  The possible values are: "filename", "etag", "fullpath", and "checksum".
2572
2574
  link_type: Method to use for exporting files.
2573
2575
  Falls back to `'copy'` if symlinking fails.
2574
- num_threads : number of threads to use for exporting files.
2575
- By default it uses 5 threads.
2576
- anon: If True, we will treat cloud bucket as public one. Default behavior
2576
+ num_threads: number of threads to use for exporting files.
2577
+ By default, it uses 5 threads.
2578
+ anon: If True, we will treat cloud bucket as a public one. Default behavior
2577
2579
  depends on the previous session configuration (e.g. happens in the
2578
2580
  initial `read_storage`) and particular cloud storage client
2579
2581
  implementation (e.g. S3 fallbacks to anonymous access if no credentials
@@ -51,14 +51,14 @@ def read_dataset(
51
51
  namespace and project. Alternatively, it can be a regular name, in which
52
52
  case the explicitly defined namespace and project will be used if they are
53
53
  set; otherwise, default values will be applied.
54
- namespace : optional name of namespace in which dataset to read is created
55
- project : optional name of project in which dataset to read is created
56
- version : dataset version. Supports:
54
+ namespace: optional name of namespace in which dataset to read is created
55
+ project: optional name of project in which dataset to read is created
56
+ version: dataset version. Supports:
57
57
  - Exact version strings: "1.2.3"
58
58
  - Legacy integer versions: 1, 2, 3 (finds latest major version)
59
59
  - Version specifiers (PEP 440): ">=1.0.0,<2.0.0", "~=1.4.2", "==1.2.*", etc.
60
- session : Session to use for the chain.
61
- settings : Settings to use for the chain.
60
+ session: Session to use for the chain.
61
+ settings: Settings to use for the chain.
62
62
  delta: If True, only process new or changed files instead of reprocessing
63
63
  everything. This saves time by skipping files that were already processed in
64
64
  previous versions. The optimization is working when a new version of the
@@ -314,9 +314,9 @@ def delete_dataset(
314
314
  namespace and project. Alternatively, it can be a regular name, in which
315
315
  case the explicitly defined namespace and project will be used if they are
316
316
  set; otherwise, default values will be applied.
317
- namespace : optional name of namespace in which dataset to delete is created
318
- project : optional name of project in which dataset to delete is created
319
- version : Optional dataset version
317
+ namespace: optional name of namespace in which dataset to delete is created
318
+ project: optional name of project in which dataset to delete is created
319
+ version: Optional dataset version
320
320
  force: If true, all datasets versions will be removed. Defaults to False.
321
321
  studio: If True, removes dataset from Studio only, otherwise removes local
322
322
  dataset. Defaults to False.
@@ -1,8 +1,4 @@
1
- from typing import (
2
- TYPE_CHECKING,
3
- Optional,
4
- Union,
5
- )
1
+ from typing import TYPE_CHECKING, Any, Optional, Union
6
2
 
7
3
  from datachain.lib.data_model import dict_to_data_model
8
4
  from datachain.query import Session
@@ -20,28 +16,28 @@ if TYPE_CHECKING:
20
16
 
21
17
  def read_hf(
22
18
  dataset: Union[str, "HFDatasetType"],
23
- *args,
19
+ *args: Any,
24
20
  session: Optional[Session] = None,
25
21
  settings: Optional[dict] = None,
26
22
  column: str = "",
27
23
  model_name: str = "",
28
24
  limit: int = 0,
29
- **kwargs,
25
+ **kwargs: Any,
30
26
  ) -> "DataChain":
31
27
  """Generate chain from Hugging Face Hub dataset.
32
28
 
33
29
  Parameters:
34
- dataset : Path or name of the dataset to read from Hugging Face Hub,
30
+ dataset: Path or name of the dataset to read from Hugging Face Hub,
35
31
  or an instance of `datasets.Dataset`-like object.
36
- args : Additional positional arguments to pass to `datasets.load_dataset`.
37
- session : Session to use for the chain.
38
- settings : Settings to use for the chain.
39
- column : Generated object column name.
40
- model_name : Generated model name.
41
- limit : Limit the number of items to read from the HF dataset.
42
- Adds `take(limit)` to the `datasets.load_dataset`.
43
- Defaults to 0 (no limit).
44
- kwargs : Parameters to pass to `datasets.load_dataset`.
32
+ args: Additional positional arguments to pass to `datasets.load_dataset`.
33
+ session: Session to use for the chain.
34
+ settings: Settings to use for the chain.
35
+ column: Generated object column name.
36
+ model_name: Generated model name.
37
+ limit: The maximum number of items to read from the HF dataset.
38
+ Applies `take(limit)` to `datasets.load_dataset`.
39
+ Defaults to 0 (no limit).
40
+ kwargs: Parameters to pass to `datasets.load_dataset`.
45
41
 
46
42
  Example:
47
43
  Load from Hugging Face Hub:
@@ -26,22 +26,22 @@ def read_json(
26
26
  column: Optional[str] = "",
27
27
  model_name: Optional[str] = None,
28
28
  format: Optional[str] = "json",
29
- nrows=None,
29
+ nrows: Optional[int] = None,
30
30
  **kwargs,
31
31
  ) -> "DataChain":
32
32
  """Get data from JSON. It returns the chain itself.
33
33
 
34
34
  Parameters:
35
- path : storage URI with directory. URI must start with storage prefix such
35
+ path: storage URI with directory. URI must start with storage prefix such
36
36
  as `s3://`, `gs://`, `az://` or "file:///"
37
- type : read file as "binary", "text", or "image" data. Default is "text".
38
- spec : optional Data Model
39
- schema_from : path to sample to infer spec (if schema not provided)
40
- column : generated column name
41
- model_name : optional generated model name
37
+ type: read file as "binary", "text", or "image" data. Default is "text".
38
+ spec: optional Data Model
39
+ schema_from: path to sample to infer spec (if schema not provided)
40
+ column: generated column name
41
+ model_name: optional generated model name
42
42
  format: "json", "jsonl"
43
- jmespath : optional JMESPATH expression to reduce JSON
44
- nrows : optional row limit for jsonl and JSON arrays
43
+ jmespath: optional JMESPATH expression to reduce JSON
44
+ nrows: optional row limit for jsonl and JSON arrays
45
45
 
46
46
  Example:
47
47
  infer JSON schema from data, reduce using JMESPATH
@@ -1,8 +1,5 @@
1
- from typing import (
2
- TYPE_CHECKING,
3
- Any,
4
- Optional,
5
- )
1
+ import os
2
+ from typing import TYPE_CHECKING, Any, Optional, Union
6
3
 
7
4
  from datachain.lib.data_model import DataType
8
5
  from datachain.query import Session
@@ -16,7 +13,7 @@ if TYPE_CHECKING:
16
13
 
17
14
 
18
15
  def read_parquet(
19
- path,
16
+ path: Union[str, os.PathLike[str], list[str], list[os.PathLike[str]]],
20
17
  partitioning: Any = "hive",
21
18
  output: Optional[dict[str, DataType]] = None,
22
19
  column: str = "",
@@ -29,15 +26,15 @@ def read_parquet(
29
26
  """Generate chain from parquet files.
30
27
 
31
28
  Parameters:
32
- path : Storage URI with directory. URI must start with storage prefix such
29
+ path: Storage URI with directory. URI must start with storage prefix such
33
30
  as `s3://`, `gs://`, `az://` or "file:///".
34
- partitioning : Any pyarrow partitioning schema.
35
- output : Dictionary defining column names and their corresponding types.
36
- column : Created column name.
37
- model_name : Generated model name.
38
- source : Whether to include info about the source file.
39
- session : Session to use for the chain.
40
- settings : Settings to use for the chain.
31
+ partitioning: Any pyarrow partitioning schema.
32
+ output: Dictionary defining column names and their corresponding types.
33
+ column: Created column name.
34
+ model_name: Generated model name.
35
+ source: Whether to include info about the source file.
36
+ session: Session to use for the chain.
37
+ settings: Settings to use for the chain.
41
38
 
42
39
  Example:
43
40
  Reading a single file:
@@ -30,9 +30,9 @@ def read_records(
30
30
  or other sources.
31
31
 
32
32
  Parameters:
33
- to_insert : records (or a single record) to insert. Each record is
33
+ to_insert: records (or a single record) to insert. Each record is
34
34
  a dictionary of signals and their values.
35
- schema : describes chain signals and their corresponding types
35
+ schema: describes chain signals and their corresponding types
36
36
 
37
37
  Example:
38
38
  ```py