datachain 0.32.0__tar.gz → 0.32.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (431) hide show
  1. {datachain-0.32.0 → datachain-0.32.2}/.pre-commit-config.yaml +1 -1
  2. {datachain-0.32.0 → datachain-0.32.2}/PKG-INFO +12 -24
  3. {datachain-0.32.0 → datachain-0.32.2}/README.rst +10 -22
  4. {datachain-0.32.0 → datachain-0.32.2}/docs/api_hooks.py +7 -0
  5. datachain-0.32.2/docs/assets/webhook_dialog.png +0 -0
  6. datachain-0.32.2/docs/assets/webhook_list.png +0 -0
  7. {datachain-0.32.0 → datachain-0.32.2}/docs/guide/namespaces.md +23 -0
  8. {datachain-0.32.0 → datachain-0.32.2}/docs/references/datachain.md +2 -0
  9. datachain-0.32.2/docs/studio/webhooks.md +276 -0
  10. {datachain-0.32.0 → datachain-0.32.2}/mkdocs.yml +1 -0
  11. {datachain-0.32.0 → datachain-0.32.2}/pyproject.toml +1 -1
  12. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/__init__.py +1 -1
  13. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/dataset.py +2 -2
  14. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/convert/python_to_sql.py +18 -4
  15. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/dc/parquet.py +20 -5
  16. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/dc/storage.py +12 -6
  17. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/dc/storage_pattern.py +50 -99
  18. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/namespaces.py +4 -5
  19. {datachain-0.32.0 → datachain-0.32.2}/src/datachain.egg-info/PKG-INFO +12 -24
  20. {datachain-0.32.0 → datachain-0.32.2}/src/datachain.egg-info/SOURCES.txt +3 -0
  21. {datachain-0.32.0 → datachain-0.32.2}/src/datachain.egg-info/requires.txt +1 -1
  22. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_metastore.py +1 -1
  23. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_storage_pattern.py +61 -5
  24. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_audio.py +1 -1
  25. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_datachain.py +5 -5
  26. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_namespace.py +8 -1
  27. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_project.py +1 -1
  28. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_python_to_sql.py +19 -0
  29. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_storage_pattern.py +88 -22
  30. {datachain-0.32.0 → datachain-0.32.2}/.cruft.json +0 -0
  31. {datachain-0.32.0 → datachain-0.32.2}/.gitattributes +0 -0
  32. {datachain-0.32.0 → datachain-0.32.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  33. {datachain-0.32.0 → datachain-0.32.2}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  34. {datachain-0.32.0 → datachain-0.32.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  35. {datachain-0.32.0 → datachain-0.32.2}/.github/codecov.yaml +0 -0
  36. {datachain-0.32.0 → datachain-0.32.2}/.github/dependabot.yml +0 -0
  37. {datachain-0.32.0 → datachain-0.32.2}/.github/workflows/benchmarks.yml +0 -0
  38. {datachain-0.32.0 → datachain-0.32.2}/.github/workflows/release.yml +0 -0
  39. {datachain-0.32.0 → datachain-0.32.2}/.github/workflows/tests-studio.yml +0 -0
  40. {datachain-0.32.0 → datachain-0.32.2}/.github/workflows/tests.yml +0 -0
  41. {datachain-0.32.0 → datachain-0.32.2}/.github/workflows/update-template.yaml +0 -0
  42. {datachain-0.32.0 → datachain-0.32.2}/.gitignore +0 -0
  43. {datachain-0.32.0 → datachain-0.32.2}/CODE_OF_CONDUCT.rst +0 -0
  44. {datachain-0.32.0 → datachain-0.32.2}/LICENSE +0 -0
  45. {datachain-0.32.0 → datachain-0.32.2}/docs/assets/captioned_cartoons.png +0 -0
  46. {datachain-0.32.0 → datachain-0.32.2}/docs/assets/datachain-white.svg +0 -0
  47. {datachain-0.32.0 → datachain-0.32.2}/docs/assets/datachain.svg +0 -0
  48. {datachain-0.32.0 → datachain-0.32.2}/docs/commands/auth/login.md +0 -0
  49. {datachain-0.32.0 → datachain-0.32.2}/docs/commands/auth/logout.md +0 -0
  50. {datachain-0.32.0 → datachain-0.32.2}/docs/commands/auth/team.md +0 -0
  51. {datachain-0.32.0 → datachain-0.32.2}/docs/commands/auth/token.md +0 -0
  52. {datachain-0.32.0 → datachain-0.32.2}/docs/commands/index.md +0 -0
  53. {datachain-0.32.0 → datachain-0.32.2}/docs/commands/job/cancel.md +0 -0
  54. {datachain-0.32.0 → datachain-0.32.2}/docs/commands/job/clusters.md +0 -0
  55. {datachain-0.32.0 → datachain-0.32.2}/docs/commands/job/logs.md +0 -0
  56. {datachain-0.32.0 → datachain-0.32.2}/docs/commands/job/ls.md +0 -0
  57. {datachain-0.32.0 → datachain-0.32.2}/docs/commands/job/run.md +0 -0
  58. {datachain-0.32.0 → datachain-0.32.2}/docs/contributing.md +0 -0
  59. {datachain-0.32.0 → datachain-0.32.2}/docs/css/github-permalink-style.css +0 -0
  60. {datachain-0.32.0 → datachain-0.32.2}/docs/examples.md +0 -0
  61. {datachain-0.32.0 → datachain-0.32.2}/docs/guide/db_migrations.md +0 -0
  62. {datachain-0.32.0 → datachain-0.32.2}/docs/guide/delta.md +0 -0
  63. {datachain-0.32.0 → datachain-0.32.2}/docs/guide/env.md +0 -0
  64. {datachain-0.32.0 → datachain-0.32.2}/docs/guide/index.md +0 -0
  65. {datachain-0.32.0 → datachain-0.32.2}/docs/guide/processing.md +0 -0
  66. {datachain-0.32.0 → datachain-0.32.2}/docs/guide/remotes.md +0 -0
  67. {datachain-0.32.0 → datachain-0.32.2}/docs/guide/retry.md +0 -0
  68. {datachain-0.32.0 → datachain-0.32.2}/docs/index.md +0 -0
  69. {datachain-0.32.0 → datachain-0.32.2}/docs/overrides/main.html +0 -0
  70. {datachain-0.32.0 → datachain-0.32.2}/docs/quick-start.md +0 -0
  71. {datachain-0.32.0 → datachain-0.32.2}/docs/references/data-types/arrowrow.md +0 -0
  72. {datachain-0.32.0 → datachain-0.32.2}/docs/references/data-types/bbox.md +0 -0
  73. {datachain-0.32.0 → datachain-0.32.2}/docs/references/data-types/file.md +0 -0
  74. {datachain-0.32.0 → datachain-0.32.2}/docs/references/data-types/imagefile.md +0 -0
  75. {datachain-0.32.0 → datachain-0.32.2}/docs/references/data-types/index.md +0 -0
  76. {datachain-0.32.0 → datachain-0.32.2}/docs/references/data-types/pose.md +0 -0
  77. {datachain-0.32.0 → datachain-0.32.2}/docs/references/data-types/segment.md +0 -0
  78. {datachain-0.32.0 → datachain-0.32.2}/docs/references/data-types/tarvfile.md +0 -0
  79. {datachain-0.32.0 → datachain-0.32.2}/docs/references/data-types/textfile.md +0 -0
  80. {datachain-0.32.0 → datachain-0.32.2}/docs/references/data-types/videofile.md +0 -0
  81. {datachain-0.32.0 → datachain-0.32.2}/docs/references/func.md +0 -0
  82. {datachain-0.32.0 → datachain-0.32.2}/docs/references/functions/aggregate.md +0 -0
  83. {datachain-0.32.0 → datachain-0.32.2}/docs/references/functions/array.md +0 -0
  84. {datachain-0.32.0 → datachain-0.32.2}/docs/references/functions/conditional.md +0 -0
  85. {datachain-0.32.0 → datachain-0.32.2}/docs/references/functions/numeric.md +0 -0
  86. {datachain-0.32.0 → datachain-0.32.2}/docs/references/functions/path.md +0 -0
  87. {datachain-0.32.0 → datachain-0.32.2}/docs/references/functions/random.md +0 -0
  88. {datachain-0.32.0 → datachain-0.32.2}/docs/references/functions/string.md +0 -0
  89. {datachain-0.32.0 → datachain-0.32.2}/docs/references/functions/window.md +0 -0
  90. {datachain-0.32.0 → datachain-0.32.2}/docs/references/index.md +0 -0
  91. {datachain-0.32.0 → datachain-0.32.2}/docs/references/toolkit.md +0 -0
  92. {datachain-0.32.0 → datachain-0.32.2}/docs/references/torch.md +0 -0
  93. {datachain-0.32.0 → datachain-0.32.2}/docs/references/udf.md +0 -0
  94. {datachain-0.32.0 → datachain-0.32.2}/docs/studio/api/.gitkeep +0 -0
  95. {datachain-0.32.0 → datachain-0.32.2}/docs/templates/main.dot +0 -0
  96. {datachain-0.32.0 → datachain-0.32.2}/docs/templates/operation.dot +0 -0
  97. {datachain-0.32.0 → datachain-0.32.2}/docs/templates/responses.def +0 -0
  98. {datachain-0.32.0 → datachain-0.32.2}/docs/tutorials.md +0 -0
  99. {datachain-0.32.0 → datachain-0.32.2}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  100. {datachain-0.32.0 → datachain-0.32.2}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  101. {datachain-0.32.0 → datachain-0.32.2}/examples/computer_vision/openimage-detect.py +0 -0
  102. {datachain-0.32.0 → datachain-0.32.2}/examples/computer_vision/ultralytics-bbox.py +0 -0
  103. {datachain-0.32.0 → datachain-0.32.2}/examples/computer_vision/ultralytics-pose.py +0 -0
  104. {datachain-0.32.0 → datachain-0.32.2}/examples/computer_vision/ultralytics-segment.py +0 -0
  105. {datachain-0.32.0 → datachain-0.32.2}/examples/get_started/common_sql_functions.py +0 -0
  106. {datachain-0.32.0 → datachain-0.32.2}/examples/get_started/json-csv-reader.py +0 -0
  107. {datachain-0.32.0 → datachain-0.32.2}/examples/get_started/nested_datamodel.py +0 -0
  108. {datachain-0.32.0 → datachain-0.32.2}/examples/get_started/torch-loader.py +0 -0
  109. {datachain-0.32.0 → datachain-0.32.2}/examples/get_started/udfs/parallel.py +0 -0
  110. {datachain-0.32.0 → datachain-0.32.2}/examples/get_started/udfs/simple.py +0 -0
  111. {datachain-0.32.0 → datachain-0.32.2}/examples/get_started/udfs/stateful.py +0 -0
  112. {datachain-0.32.0 → datachain-0.32.2}/examples/incremental_processing/delta.py +0 -0
  113. {datachain-0.32.0 → datachain-0.32.2}/examples/incremental_processing/retry.py +0 -0
  114. {datachain-0.32.0 → datachain-0.32.2}/examples/incremental_processing/utils.py +0 -0
  115. {datachain-0.32.0 → datachain-0.32.2}/examples/llm_and_nlp/claude-query.py +0 -0
  116. {datachain-0.32.0 → datachain-0.32.2}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  117. {datachain-0.32.0 → datachain-0.32.2}/examples/multimodal/audio-to-text.py +0 -0
  118. {datachain-0.32.0 → datachain-0.32.2}/examples/multimodal/clip_inference.py +0 -0
  119. {datachain-0.32.0 → datachain-0.32.2}/examples/multimodal/hf_pipeline.py +0 -0
  120. {datachain-0.32.0 → datachain-0.32.2}/examples/multimodal/openai_image_desc_lib.py +0 -0
  121. {datachain-0.32.0 → datachain-0.32.2}/examples/multimodal/wds.py +0 -0
  122. {datachain-0.32.0 → datachain-0.32.2}/examples/multimodal/wds_filtered.py +0 -0
  123. {datachain-0.32.0 → datachain-0.32.2}/noxfile.py +0 -0
  124. {datachain-0.32.0 → datachain-0.32.2}/setup.cfg +0 -0
  125. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/__main__.py +0 -0
  126. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/asyn.py +0 -0
  127. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/cache.py +0 -0
  128. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/catalog/__init__.py +0 -0
  129. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/catalog/catalog.py +0 -0
  130. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/catalog/datasource.py +0 -0
  131. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/catalog/loader.py +0 -0
  132. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/cli/__init__.py +0 -0
  133. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/cli/commands/__init__.py +0 -0
  134. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/cli/commands/datasets.py +0 -0
  135. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/cli/commands/du.py +0 -0
  136. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/cli/commands/index.py +0 -0
  137. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/cli/commands/ls.py +0 -0
  138. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/cli/commands/misc.py +0 -0
  139. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/cli/commands/query.py +0 -0
  140. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/cli/commands/show.py +0 -0
  141. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/cli/parser/__init__.py +0 -0
  142. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/cli/parser/job.py +0 -0
  143. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/cli/parser/studio.py +0 -0
  144. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/cli/parser/utils.py +0 -0
  145. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/cli/utils.py +0 -0
  146. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/client/__init__.py +0 -0
  147. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/client/azure.py +0 -0
  148. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/client/fileslice.py +0 -0
  149. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/client/fsspec.py +0 -0
  150. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/client/gcs.py +0 -0
  151. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/client/hf.py +0 -0
  152. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/client/local.py +0 -0
  153. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/client/s3.py +0 -0
  154. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/config.py +0 -0
  155. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/data_storage/__init__.py +0 -0
  156. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/data_storage/db_engine.py +0 -0
  157. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/data_storage/job.py +0 -0
  158. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/data_storage/metastore.py +0 -0
  159. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/data_storage/schema.py +0 -0
  160. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/data_storage/serializer.py +0 -0
  161. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/data_storage/sqlite.py +0 -0
  162. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/data_storage/warehouse.py +0 -0
  163. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/delta.py +0 -0
  164. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/diff/__init__.py +0 -0
  165. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/error.py +0 -0
  166. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/fs/__init__.py +0 -0
  167. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/fs/reference.py +0 -0
  168. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/fs/utils.py +0 -0
  169. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/func/__init__.py +0 -0
  170. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/func/aggregate.py +0 -0
  171. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/func/array.py +0 -0
  172. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/func/base.py +0 -0
  173. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/func/conditional.py +0 -0
  174. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/func/func.py +0 -0
  175. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/func/numeric.py +0 -0
  176. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/func/path.py +0 -0
  177. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/func/random.py +0 -0
  178. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/func/string.py +0 -0
  179. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/func/window.py +0 -0
  180. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/job.py +0 -0
  181. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/__init__.py +0 -0
  182. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/arrow.py +0 -0
  183. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/audio.py +0 -0
  184. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/clip.py +0 -0
  185. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/convert/__init__.py +0 -0
  186. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/convert/flatten.py +0 -0
  187. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/convert/sql_to_python.py +0 -0
  188. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/convert/unflatten.py +0 -0
  189. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  190. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/data_model.py +0 -0
  191. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/dataset_info.py +0 -0
  192. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/dc/__init__.py +0 -0
  193. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/dc/csv.py +0 -0
  194. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/dc/database.py +0 -0
  195. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/dc/datachain.py +0 -0
  196. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/dc/datasets.py +0 -0
  197. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/dc/hf.py +0 -0
  198. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/dc/json.py +0 -0
  199. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/dc/listings.py +0 -0
  200. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/dc/pandas.py +0 -0
  201. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/dc/records.py +0 -0
  202. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/dc/utils.py +0 -0
  203. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/dc/values.py +0 -0
  204. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/file.py +0 -0
  205. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/hf.py +0 -0
  206. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/image.py +0 -0
  207. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/listing.py +0 -0
  208. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/listing_info.py +0 -0
  209. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/meta_formats.py +0 -0
  210. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/model_store.py +0 -0
  211. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/projects.py +0 -0
  212. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/pytorch.py +0 -0
  213. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/settings.py +0 -0
  214. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/signal_schema.py +0 -0
  215. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/tar.py +0 -0
  216. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/text.py +0 -0
  217. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/udf.py +0 -0
  218. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/udf_signature.py +0 -0
  219. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/utils.py +0 -0
  220. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/video.py +0 -0
  221. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/webdataset.py +0 -0
  222. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/lib/webdataset_laion.py +0 -0
  223. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/listing.py +0 -0
  224. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/model/__init__.py +0 -0
  225. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/model/bbox.py +0 -0
  226. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/model/pose.py +0 -0
  227. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/model/segment.py +0 -0
  228. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/model/ultralytics/__init__.py +0 -0
  229. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/model/ultralytics/bbox.py +0 -0
  230. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/model/ultralytics/pose.py +0 -0
  231. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/model/ultralytics/segment.py +0 -0
  232. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/model/utils.py +0 -0
  233. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/namespace.py +0 -0
  234. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/node.py +0 -0
  235. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/nodes_fetcher.py +0 -0
  236. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/nodes_thread_pool.py +0 -0
  237. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/progress.py +0 -0
  238. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/project.py +0 -0
  239. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/py.typed +0 -0
  240. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/query/__init__.py +0 -0
  241. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/query/batch.py +0 -0
  242. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/query/dataset.py +0 -0
  243. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/query/dispatch.py +0 -0
  244. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/query/metrics.py +0 -0
  245. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/query/params.py +0 -0
  246. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/query/queue.py +0 -0
  247. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/query/schema.py +0 -0
  248. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/query/session.py +0 -0
  249. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/query/udf.py +0 -0
  250. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/query/utils.py +0 -0
  251. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/remote/__init__.py +0 -0
  252. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/remote/studio.py +0 -0
  253. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/script_meta.py +0 -0
  254. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/semver.py +0 -0
  255. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/sql/__init__.py +0 -0
  256. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/sql/default/__init__.py +0 -0
  257. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/sql/default/base.py +0 -0
  258. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/sql/functions/__init__.py +0 -0
  259. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/sql/functions/aggregate.py +0 -0
  260. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/sql/functions/array.py +0 -0
  261. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/sql/functions/conditional.py +0 -0
  262. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/sql/functions/numeric.py +0 -0
  263. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/sql/functions/path.py +0 -0
  264. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/sql/functions/random.py +0 -0
  265. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/sql/functions/string.py +0 -0
  266. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/sql/postgresql_dialect.py +0 -0
  267. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/sql/postgresql_types.py +0 -0
  268. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/sql/selectable.py +0 -0
  269. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/sql/sqlite/__init__.py +0 -0
  270. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/sql/sqlite/base.py +0 -0
  271. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/sql/sqlite/types.py +0 -0
  272. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/sql/sqlite/vector.py +0 -0
  273. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/sql/types.py +0 -0
  274. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/sql/utils.py +0 -0
  275. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/studio.py +0 -0
  276. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/telemetry.py +0 -0
  277. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/toolkit/__init__.py +0 -0
  278. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/toolkit/split.py +0 -0
  279. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/torch/__init__.py +0 -0
  280. {datachain-0.32.0 → datachain-0.32.2}/src/datachain/utils.py +0 -0
  281. {datachain-0.32.0 → datachain-0.32.2}/src/datachain.egg-info/dependency_links.txt +0 -0
  282. {datachain-0.32.0 → datachain-0.32.2}/src/datachain.egg-info/entry_points.txt +0 -0
  283. {datachain-0.32.0 → datachain-0.32.2}/src/datachain.egg-info/top_level.txt +0 -0
  284. {datachain-0.32.0 → datachain-0.32.2}/tests/__init__.py +0 -0
  285. {datachain-0.32.0 → datachain-0.32.2}/tests/benchmarks/__init__.py +0 -0
  286. {datachain-0.32.0 → datachain-0.32.2}/tests/benchmarks/conftest.py +0 -0
  287. {datachain-0.32.0 → datachain-0.32.2}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  288. {datachain-0.32.0 → datachain-0.32.2}/tests/benchmarks/datasets/.dvc/config +0 -0
  289. {datachain-0.32.0 → datachain-0.32.2}/tests/benchmarks/datasets/.gitignore +0 -0
  290. {datachain-0.32.0 → datachain-0.32.2}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  291. {datachain-0.32.0 → datachain-0.32.2}/tests/benchmarks/test_datachain.py +0 -0
  292. {datachain-0.32.0 → datachain-0.32.2}/tests/benchmarks/test_ls.py +0 -0
  293. {datachain-0.32.0 → datachain-0.32.2}/tests/benchmarks/test_version.py +0 -0
  294. {datachain-0.32.0 → datachain-0.32.2}/tests/conftest.py +0 -0
  295. {datachain-0.32.0 → datachain-0.32.2}/tests/data.py +0 -0
  296. {datachain-0.32.0 → datachain-0.32.2}/tests/examples/__init__.py +0 -0
  297. {datachain-0.32.0 → datachain-0.32.2}/tests/examples/test_examples.py +0 -0
  298. {datachain-0.32.0 → datachain-0.32.2}/tests/examples/test_wds_e2e.py +0 -0
  299. {datachain-0.32.0 → datachain-0.32.2}/tests/examples/wds_data.py +0 -0
  300. {datachain-0.32.0 → datachain-0.32.2}/tests/func/__init__.py +0 -0
  301. {datachain-0.32.0 → datachain-0.32.2}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  302. {datachain-0.32.0 → datachain-0.32.2}/tests/func/data/lena.jpg +0 -0
  303. {datachain-0.32.0 → datachain-0.32.2}/tests/func/fake-service-account-credentials.json +0 -0
  304. {datachain-0.32.0 → datachain-0.32.2}/tests/func/functions/__init__.py +0 -0
  305. {datachain-0.32.0 → datachain-0.32.2}/tests/func/functions/test_aggregate.py +0 -0
  306. {datachain-0.32.0 → datachain-0.32.2}/tests/func/functions/test_array.py +0 -0
  307. {datachain-0.32.0 → datachain-0.32.2}/tests/func/functions/test_conditional.py +0 -0
  308. {datachain-0.32.0 → datachain-0.32.2}/tests/func/functions/test_numeric.py +0 -0
  309. {datachain-0.32.0 → datachain-0.32.2}/tests/func/functions/test_path.py +0 -0
  310. {datachain-0.32.0 → datachain-0.32.2}/tests/func/functions/test_random.py +0 -0
  311. {datachain-0.32.0 → datachain-0.32.2}/tests/func/functions/test_string.py +0 -0
  312. {datachain-0.32.0 → datachain-0.32.2}/tests/func/model/__init__.py +0 -0
  313. {datachain-0.32.0 → datachain-0.32.2}/tests/func/model/data/running-mask0.png +0 -0
  314. {datachain-0.32.0 → datachain-0.32.2}/tests/func/model/data/running-mask1.png +0 -0
  315. {datachain-0.32.0 → datachain-0.32.2}/tests/func/model/data/running.jpg +0 -0
  316. {datachain-0.32.0 → datachain-0.32.2}/tests/func/model/data/ships.jpg +0 -0
  317. {datachain-0.32.0 → datachain-0.32.2}/tests/func/model/test_yolo.py +0 -0
  318. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_audio.py +0 -0
  319. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_batching.py +0 -0
  320. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_catalog.py +0 -0
  321. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_client.py +0 -0
  322. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_cloud_transfer.py +0 -0
  323. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_data_storage.py +0 -0
  324. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_datachain.py +0 -0
  325. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_datachain_merge.py +0 -0
  326. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_dataset_query.py +0 -0
  327. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_datasets.py +0 -0
  328. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_delta.py +0 -0
  329. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_feature_pickling.py +0 -0
  330. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_file.py +0 -0
  331. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_hf.py +0 -0
  332. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_hidden_field.py +0 -0
  333. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_image.py +0 -0
  334. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_listing.py +0 -0
  335. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_ls.py +0 -0
  336. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_meta_formats.py +0 -0
  337. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_metrics.py +0 -0
  338. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_mutate.py +0 -0
  339. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_pull.py +0 -0
  340. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_pytorch.py +0 -0
  341. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_query.py +0 -0
  342. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_read_database.py +0 -0
  343. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_read_dataset_remote.py +0 -0
  344. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  345. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_retry.py +0 -0
  346. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_session.py +0 -0
  347. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_studio_datetime_parsing.py +0 -0
  348. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_to_database.py +0 -0
  349. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_toolkit.py +0 -0
  350. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_video.py +0 -0
  351. {datachain-0.32.0 → datachain-0.32.2}/tests/func/test_warehouse.py +0 -0
  352. {datachain-0.32.0 → datachain-0.32.2}/tests/scripts/feature_class.py +0 -0
  353. {datachain-0.32.0 → datachain-0.32.2}/tests/scripts/feature_class_exception.py +0 -0
  354. {datachain-0.32.0 → datachain-0.32.2}/tests/scripts/feature_class_parallel.py +0 -0
  355. {datachain-0.32.0 → datachain-0.32.2}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  356. {datachain-0.32.0 → datachain-0.32.2}/tests/scripts/name_len_slow.py +0 -0
  357. {datachain-0.32.0 → datachain-0.32.2}/tests/test_atomicity.py +0 -0
  358. {datachain-0.32.0 → datachain-0.32.2}/tests/test_cli_e2e.py +0 -0
  359. {datachain-0.32.0 → datachain-0.32.2}/tests/test_cli_studio.py +0 -0
  360. {datachain-0.32.0 → datachain-0.32.2}/tests/test_import_time.py +0 -0
  361. {datachain-0.32.0 → datachain-0.32.2}/tests/test_query_e2e.py +0 -0
  362. {datachain-0.32.0 → datachain-0.32.2}/tests/test_telemetry.py +0 -0
  363. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/__init__.py +0 -0
  364. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/__init__.py +0 -0
  365. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/conftest.py +0 -0
  366. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_arrow.py +0 -0
  367. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_clip.py +0 -0
  368. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  369. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_datachain_merge.py +0 -0
  370. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_diff.py +0 -0
  371. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_feature.py +0 -0
  372. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_feature_utils.py +0 -0
  373. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_file.py +0 -0
  374. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_hf.py +0 -0
  375. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_image.py +0 -0
  376. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_listing_info.py +0 -0
  377. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_partition_by.py +0 -0
  378. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_schema.py +0 -0
  379. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_settings.py +0 -0
  380. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_signal_schema.py +0 -0
  381. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_sql_to_python.py +0 -0
  382. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_text.py +0 -0
  383. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_udf.py +0 -0
  384. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_udf_signature.py +0 -0
  385. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_utils.py +0 -0
  386. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/lib/test_webdataset.py +0 -0
  387. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/model/__init__.py +0 -0
  388. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/model/test_bbox.py +0 -0
  389. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/model/test_pose.py +0 -0
  390. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/model/test_segment.py +0 -0
  391. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/model/test_utils.py +0 -0
  392. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/sql/__init__.py +0 -0
  393. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/sql/sqlite/__init__.py +0 -0
  394. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/sql/sqlite/test_types.py +0 -0
  395. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/sql/sqlite/test_utils.py +0 -0
  396. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/sql/test_array.py +0 -0
  397. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/sql/test_conditional.py +0 -0
  398. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/sql/test_path.py +0 -0
  399. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/sql/test_random.py +0 -0
  400. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/sql/test_selectable.py +0 -0
  401. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/sql/test_string.py +0 -0
  402. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_asyn.py +0 -0
  403. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_cache.py +0 -0
  404. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_catalog.py +0 -0
  405. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_catalog_loader.py +0 -0
  406. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_cli_datasets.py +0 -0
  407. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_cli_parsing.py +0 -0
  408. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_client.py +0 -0
  409. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_client_gcs.py +0 -0
  410. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_client_s3.py +0 -0
  411. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_config.py +0 -0
  412. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_data_storage.py +0 -0
  413. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_database_engine.py +0 -0
  414. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_dataset.py +0 -0
  415. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_dispatch.py +0 -0
  416. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_fileslice.py +0 -0
  417. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_func.py +0 -0
  418. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_listing.py +0 -0
  419. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_metastore.py +0 -0
  420. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_module_exports.py +0 -0
  421. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_pytorch.py +0 -0
  422. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_query.py +0 -0
  423. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_query_metrics.py +0 -0
  424. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_query_params.py +0 -0
  425. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_script_meta.py +0 -0
  426. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_semver.py +0 -0
  427. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_serializer.py +0 -0
  428. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_session.py +0 -0
  429. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_utils.py +0 -0
  430. {datachain-0.32.0 → datachain-0.32.2}/tests/unit/test_warehouse.py +0 -0
  431. {datachain-0.32.0 → datachain-0.32.2}/tests/utils.py +0 -0
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.12.12'
27
+ rev: 'v0.13.0'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.32.0
3
+ Version: 0.32.2
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -102,7 +102,7 @@ Requires-Dist: scipy; extra == "tests"
102
102
  Requires-Dist: ultralytics; extra == "tests"
103
103
  Provides-Extra: dev
104
104
  Requires-Dist: datachain[docs,tests]; extra == "dev"
105
- Requires-Dist: mypy==1.17.0; extra == "dev"
105
+ Requires-Dist: mypy==1.18.1; extra == "dev"
106
106
  Requires-Dist: types-python-dateutil; extra == "dev"
107
107
  Requires-Dist: types-dateparser; extra == "dev"
108
108
  Requires-Dist: types-pytz; extra == "dev"
@@ -210,45 +210,33 @@ datasets that evolve over time and may occasionally have processing errors.
210
210
  .. code:: py
211
211
 
212
212
  import datachain as dc
213
- from datachain import C, File
214
213
 
215
- def process_file(file: File):
216
- """Process a file, which may occasionally fail."""
214
+ def process_file(file: dc.File) -> tuple[str, str, str]:
215
+ """Analyze a file, may occasionally fail."""
217
216
  try:
218
217
  # Your processing logic here
219
218
  content = file.read_text()
220
- result = analyze_content(content)
221
- return {
222
- "content": content,
223
- "result": result,
224
- "error": None # No error
225
- }
219
+ result = content.upper()
220
+ return content, result, "" # No error
226
221
  except Exception as e:
227
222
  # Return an error that will trigger reprocessing next time
228
- return {
229
- "content": None,
230
- "result": None,
231
- "error": str(e) # Error field will trigger retry
232
- }
223
+ return "", "", str(e) # Error field will trigger retry
233
224
 
234
225
  # Process files efficiently with delta and retry
226
+ # Run it many times, keep adding files, to see delta and retry in action
235
227
  chain = (
236
228
  dc.read_storage(
237
229
  "data/",
238
230
  update=True,
239
231
  delta=True, # Process only new/changed files
240
232
  delta_on="file.path", # Identify files by path
241
- retry_on="error" # Field that indicates errors
233
+ delta_retry="error", # Process files with error again
242
234
  )
243
- .map(processed_result=process_file)
244
- .mutate(
245
- content=C("processed_result.content"),
246
- result=C("processed_result.result"),
247
- error=C("processed_result.error")
248
- )
249
- .save(name="processed_data")
235
+ .map(process_file, output=("content", "result", "error"))
236
+ .save("processed-data")
250
237
  )
251
238
 
239
+
252
240
  Example: LLM based text-file evaluation
253
241
  ---------------------------------------
254
242
 
@@ -89,45 +89,33 @@ datasets that evolve over time and may occasionally have processing errors.
89
89
  .. code:: py
90
90
 
91
91
  import datachain as dc
92
- from datachain import C, File
93
92
 
94
- def process_file(file: File):
95
- """Process a file, which may occasionally fail."""
93
+ def process_file(file: dc.File) -> tuple[str, str, str]:
94
+ """Analyze a file, may occasionally fail."""
96
95
  try:
97
96
  # Your processing logic here
98
97
  content = file.read_text()
99
- result = analyze_content(content)
100
- return {
101
- "content": content,
102
- "result": result,
103
- "error": None # No error
104
- }
98
+ result = content.upper()
99
+ return content, result, "" # No error
105
100
  except Exception as e:
106
101
  # Return an error that will trigger reprocessing next time
107
- return {
108
- "content": None,
109
- "result": None,
110
- "error": str(e) # Error field will trigger retry
111
- }
102
+ return "", "", str(e) # Error field will trigger retry
112
103
 
113
104
  # Process files efficiently with delta and retry
105
+ # Run it many times, keep adding files, to see delta and retry in action
114
106
  chain = (
115
107
  dc.read_storage(
116
108
  "data/",
117
109
  update=True,
118
110
  delta=True, # Process only new/changed files
119
111
  delta_on="file.path", # Identify files by path
120
- retry_on="error" # Field that indicates errors
112
+ delta_retry="error", # Process files with error again
121
113
  )
122
- .map(processed_result=process_file)
123
- .mutate(
124
- content=C("processed_result.content"),
125
- result=C("processed_result.result"),
126
- error=C("processed_result.error")
127
- )
128
- .save(name="processed_data")
114
+ .map(process_file, output=("content", "result", "error"))
115
+ .save("processed-data")
129
116
  )
130
117
 
118
+
131
119
  Example: LLM based text-file evaluation
132
120
  ---------------------------------------
133
121
 
@@ -4,6 +4,13 @@ def on_pre_build(**kwargs):
4
4
 
5
5
  import requests
6
6
 
7
+ # Skip if files already exist
8
+ if os.path.exists("docs/openapi.json") and os.path.exists(
9
+ "docs/studio/api/index.md"
10
+ ):
11
+ print("API docs already exist, skipping generation")
12
+ return
13
+
7
14
  # Download OpenAPI spec
8
15
  response = requests.get(
9
16
  "https://studio.datachain.ai/api/openapi.json",
@@ -159,3 +159,26 @@ dc.read_values(scores=[0.8, 1.5, 2.1]).save("metrics")
159
159
 
160
160
  ds = dc.read_dataset("local.local.metrics")
161
161
  ds.show()
162
+ ```
163
+
164
+ ## Removing Namespaces and Projects
165
+
166
+ Use `delete_namespace` to remove an empty namespace or an empty project within a namespace. Delete will fail if the target is not empty.
167
+
168
+ ### Signature
169
+
170
+ ```python
171
+ def delete_namespace(name: str, session: Optional[Session]) -> None:
172
+ ```
173
+
174
+ - **`<namespace>`** — deletes the namespace (must contain no projects or datasets).
175
+ - **`<namespace>.<project>`** — deletes the project (must contain no datasets).
176
+
177
+ ### Examples
178
+
179
+ ```python
180
+ import datachain as dc
181
+
182
+ dc.delete_namespace("dev.my-project") # delete project
183
+ dc.delete_namespace("dev") # delete namespace
184
+ ```
@@ -19,6 +19,8 @@ for examples of how to create a chain.
19
19
 
20
20
  ::: datachain.lib.dc.datasets.move_dataset
21
21
 
22
+ ::: datachain.lib.namespaces.delete_namespace
23
+
22
24
  ::: datachain.lib.dc.hf.read_hf
23
25
 
24
26
  ::: datachain.lib.dc.json.read_json
@@ -0,0 +1,276 @@
1
+ # Webhooks in Studio
2
+
3
+ ## About webhooks
4
+
5
+ Webhooks provide a way for the notifications to be delivered to an external web server whenever certain events occur in [Studio](https://studio.datachain.ai). With webhooks, you can set a setting once that you want to hear about certain events or activities .
6
+
7
+ When you create a webhook, you specify a URL, and necessary information you want us to send to along with the events that you want to listen on Datachain. When the event occurs, Datachain Studio will send an HTTP request with the data about the event to the URL that you specified. If your server is setup to listen for webhook deliveries at that URL, it can take action when it receives one.
8
+
9
+ For example, you can subscribe your webhook to events that occur when a job is created, is complete, is failed, is running, and so on. You can then monitor whenever a job is failed through this webhook.
10
+
11
+ ### Alternative
12
+ As opposed to webhooks, you can also use [CLI command](../commands/index.md) to get the job information or some of our available [API endpoints](api/index.md) but webhook requires less effort than polling an API since it allows near real time updates.
13
+
14
+ ## Available event type
15
+ As of now, your server can receive two different types of events.
16
+
17
+ ### JOB
18
+
19
+ Whenever any job is created or any status is changed to the job, you will receive the JOB webhook event. The payload you get with the job webhook is as:
20
+
21
+ Header: `http-x-datachain-event`: `JOB`
22
+
23
+ Payload:
24
+ ```json
25
+ {
26
+ "action": "job_status",
27
+ "job": {
28
+ "id": "da59df47-d121-4eb6-aa76-dc452755544e",
29
+ "status": "COMPLETE",
30
+ "error_message": "",
31
+ "name": "job_query.py",
32
+ "created_at": "2021-07-27T16:02:08.070557",
33
+ "updated_at": "2021-07-27T16:22:08.070557",
34
+ "finished_at": "2021-07-27T16:22:08.070557",
35
+ "url": "https://studio.datachain.ai/team/TeamName/datasets/jobs/da59df47-d121-4eb6-aa76-dc452755544e"
36
+ },
37
+ "timestamp": "2021-07-27T16:22:08.070557",
38
+ "text": "Job job_query.py (da59df47-d121-4eb6-aa76-dc452755544e) changed its status to COMPLETE"
39
+ }
40
+ ```
41
+
42
+ ### PING
43
+ Whenever you add your webhook to your team, Studio sends a PING event to check the delivery to the server. You can check the recent deliveries to check if the webhook is successfully connected.
44
+
45
+ Header: `http-x-datachain-event`: `PING`.
46
+
47
+ Payload:
48
+ ```json
49
+ {
50
+ "action": "PING",
51
+ "message": "Webhook connection test successful"
52
+ }
53
+ ```
54
+
55
+
56
+ ## Creating webhooks
57
+
58
+ You should have admin access to a team to create the webhooks in the team. To create a webhook, go to settings for the team and under the section Webhooks, click on Add new Webhook.
59
+ ![Webhook Settings](../assets/webhook_list.png)
60
+
61
+ Enter the necessary information to create the webhooks.
62
+
63
+ - **URL:** Enter the valid URL where you’d like to receive the webhook payload in
64
+ - **Secret:** A string to use as a secret key. You should choose a random string of text with high entropy. You can use the webhook secret to [validate incoming requests](#validating-webhook-deliveries) to those only originating from Datachain Studio.
65
+ - **Events:** Under events, select the events you would like to trigger the webhook.
66
+ - **JOB:**
67
+ - CREATED: When a job is created but not yet scheduled to run
68
+ - SCHEDULED: Job has been scheduled to run
69
+ - QUEUED: Job is in the queue waiting to be processed
70
+ - INIT: Job is initializing (starting up)
71
+ - RUNNING: When a job starts running
72
+ - COMPLETE: Job has completed successfully
73
+ - FAILED: Job failed with error
74
+ - CANCELED: Job has been canceled successfully
75
+ - CANCELING: Job has been scheduled to cancel
76
+ - TASK: A scheduled task is created.
77
+
78
+ - SSL Verification: By default, we verify SSL certificates when delivering payloads. SSL verification helps ensure that hook payloads are delivered to your URL endpoint securely, keeping your data away from prying eyes. Disabling this option is **not recommended**.
79
+ - HTTP Method: By default, we make a post request, but you can specify other http method if necessary.
80
+ - Content Type: Optionally, select the data format you want to receive the webhook payload in
81
+ - **application/json** will deliver the JSON payload directly as the body of the `POST` request.
82
+ - **application/x-www-form-urlencoded** will send the JSON payload as a form parameter called `payload`.
83
+
84
+ ![Add webhook](../assets/webhook_dialog.png)
85
+
86
+
87
+ ## Handling webhook deliveries
88
+
89
+ When you create a webhook, you specify a URL and subscribe to event types. When any event that your webhook is subscribed to occurs, Datachain Studio will send an HTTP request with the data about the event to the event that you specified. If your server is setup at that URL, it can take action when it receives one.
90
+
91
+ ### Setup
92
+
93
+ In order to test your webhook locally, you can use a webhook proxy URL to forward the webhooks from Studio to your computer or codespace. We are using [smee.io](http://smee.io) to provide a webhook proxy url and forward webhooks.
94
+
95
+ 1. Go to [smee.io](http://smee.io)
96
+ 2. Start a new channel
97
+ 3. Copy the full URL under the webhook proxy URL. We will use this URL in the following setup steps.
98
+ 4. Install smee-client if it is not already installed using `npm install --global smee-client`
99
+ 5. To receive forwarded webhooks from smee.io, run the following command in your terminal. Replace the `WEBHOOK_PROXY_URL` with your webhook proxy URL from earlier.
100
+
101
+ ```bash
102
+ smee --path /webhook --port 3000 --url WEBHOOK_PROXY_URL
103
+ ```
104
+
105
+ 6. Keep this running while you test out your webhook. When you want to stop forwarding the webhooks, enter Ctrl + C
106
+ 7. Create webhook using the step as mentioned above or edit the one if you already have with the url from earlier.
107
+ 8. Write code to handle webhook deliveries
108
+ 1. Initialize your server to listen for requests to your webhook URL
109
+ 2. Read HTTP headers and body from request
110
+ 3. Take desired action in response to the request.
111
+
112
+ You can use any programming languages that you can to run on your server.
113
+
114
+ ### Example Code
115
+
116
+ #### Python
117
+
118
+ This example uses the Python and Flask libraries to handle the routes and HTTP requests.
119
+
120
+ To use this you must install flask library in your project. For example:
121
+
122
+ ```bash
123
+ pip install Flask
124
+ ```
125
+
126
+ Create a python file with following contents. Modify the code to handle only the event types that your webhook is subscribed to as well as the ping event that Studio sends when you create a webhook. This example handles job, dataset and ping events.
127
+
128
+ ```python
129
+ # You installed the `flask` library earlier.
130
+ from flask import Flask, request
131
+
132
+ # This defines the port where your server should listen.
133
+ # 3000 matches the port that you specified for webhook forwarding.
134
+ #
135
+ # Once you deploy your code to a server,
136
+ # Change this to match the port where your server is listening.
137
+ port = 3000
138
+ secret = "secretString"
139
+
140
+ # This initializes a new Flask application.
141
+ app = Flask(__name__)
142
+
143
+ # This defines a POST route at the `/webhook` path.
144
+ # It matches the path you specified for the smee.io forwarding.
145
+ #
146
+ # Once you deploy your code to a server and update your webhook URL,
147
+ # Change this to match the path portion of the URL for your webhook.
148
+ @app.route('/webhook', methods=['POST'])
149
+ def webhook():
150
+ # Respond to indicate that delivery was successfully received.
151
+ # Your server should respond with a 2XX response
152
+ # within 10 seconds of receiving a webhook delivery.
153
+ # If your server takes longer than that to respond,
154
+ # then Studio terminates the connection.
155
+
156
+ # Check `http-x-datachain-event` header for the event type.
157
+ datachain_event = request.headers.get('http-x-datachain-event')
158
+
159
+ # You should add logic to handle each event type
160
+ # that your webhook is subscribed to.
161
+ # For example, this code handles the `JOB` and `PING` events.
162
+ if datachain_event == 'JOB':
163
+ data = request.get_json()
164
+ action = data.get('action')
165
+ if action == 'job_status':
166
+ print(
167
+ f"Job status for job {data['job']['id']} was" \
168
+ " changed to {data['job']['status']}"
169
+ )
170
+ else:
171
+ print(f"Unhandled action for the job event: {action}")
172
+ elif datachain_event == 'PING':
173
+ print('Ping event received')
174
+ else:
175
+ print(f"Unhandled event: {datachain_event}")
176
+
177
+ return '', 202 # 202 Accepted status code
178
+
179
+ # This starts the server.
180
+ if __name__ == '__main__':
181
+ app.run(host='0.0.0.0', port=port, debug=True)
182
+ print(f"Server is running on port {port}")
183
+
184
+ ```
185
+
186
+ To test the code, run the file using `python FILENAME`. Make sure that you are forwarding the webhooks in a separate terminal.
187
+
188
+ When you run a job in Studio, you will see some similar response as below:
189
+
190
+ ```prolog
191
+ Ping event received
192
+ Job status for job a852ee4a-091a-456f-ba1a-c809f7e804f3 was changed to CREATED
193
+ Job status for job a852ee4a-091a-456f-ba1a-c809f7e804f3 was changed to SCHEDULED
194
+ Job status for job a852ee4a-091a-456f-ba1a-c809f7e804f3 was changed to QUEUED
195
+ Job status for job a852ee4a-091a-456f-ba1a-c809f7e804f3 was changed to INIT
196
+ Job status for job a852ee4a-091a-456f-ba1a-c809f7e804f3 was changed to RUNNING
197
+ Job status for job a852ee4a-091a-456f-ba1a-c809f7e804f3 was changed to COMPLETE
198
+ ```
199
+
200
+ ## Validating webhook deliveries
201
+
202
+ Once your server is configured to receive payloads, it will listen for any delivery that’s sent to the endpoint you configured. To ensure that your server only processes webhook deliveries that were sent by Datachain Studio and to ensure that the delivery was not tampered with, you should validate webhook signature before processing the delivery further.
203
+
204
+ Studio will use the secret you added when creating your webhook to create a hash signature that’s sent to you with each payload. The hash signature will appear in each delivery as the value of `X-datachain-signature-256` header.
205
+
206
+ In your code that handles webhook deliveries, you should calculate a hash using your secret token and compare the hash Studio sent with the expected hash that you calculate and ensure they match.
207
+
208
+ Notes:
209
+
210
+ - Studio uses HMAC hex digest to compute the hash
211
+ - The hash signature always starts with `sha256=`
212
+ - The hash signature is generated using webhook’s secret token and payload contents.
213
+ - Never use a plain `==` operator. Instead consider using a method like [`secure_compare`](https://www.rubydoc.info/gems/rack/Rack%2FUtils:secure_compare) or [`crypto.timingSafeEqual`](https://nodejs.org/api/crypto.html#cryptotimingsafeequala-b), which performs a "constant time" string comparison to help mitigate certain timing attacks against regular equality operators, or regular loops in JIT-optimized languages.
214
+
215
+ Updating the example above:
216
+
217
+ ```python
218
+ import hashlib
219
+ import hmac
220
+ from flask import abort
221
+
222
+ def verify_signature(payload_body, secret_token, signature_header):
223
+ """Verify the payload was sent from Studio by validating SHA256.
224
+
225
+ Raise and return 403 if not authorized.
226
+
227
+ Args:
228
+ payload_body: request body to verify (request.body())
229
+ secret_token: Studio webhook token (WEBHOOK_SECRET)
230
+ signature_header: header (x-datachain-signature-256)
231
+ """
232
+ if not signature_header:
233
+ abort(403, "X-datachain-signature-256 is missing!")
234
+ hash_object = hmac.new(
235
+ secret_token.encode('utf-8'),
236
+ msg=payload_body,
237
+ digestmod=hashlib.sha256
238
+ )
239
+ expected_signature = "sha256=" + hash_object.hexdigest()
240
+ if not hmac.compare_digest(
241
+ expected_signature, signature_header
242
+ ):
243
+ abort(403, "Request signatures didn't match!")
244
+ ```
245
+
246
+ Add the following call in the api receiver.
247
+
248
+ ```python
249
+ # Get the signature header
250
+ signature = request.headers.get('X-Datachain-Signature-256')
251
+
252
+ # Re-enable signature verification with improved JSON handling
253
+ if signature:
254
+ verify_signature(request.get_data(), secret, signature)
255
+ else:
256
+ print("Warning: No signature header found")
257
+ ```
258
+
259
+ ## Slack Integration
260
+
261
+ You can use this webhook feature to send a message to slack as well. To integrate slack with Studio,
262
+
263
+ 1. Using the guide as described in [Slack documentation](https://docs.slack.dev/messaging/sending-messages-using-incoming-webhooks/) , create an incoming webhook and copy the webhook address in the following format `https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX` .
264
+ 2. Use the address to create a webhook in Studio as described [above](#creating-webhooks)
265
+ 3. You should now be able to view the message in Slack channel connected.
266
+
267
+ ## Best practices for using Webhooks
268
+
269
+ 1. You should only subscribe to the webhook events that you need. This will reduce the amount of work your server needs to do.
270
+ 2. The webhook secret should be a random string of text with high entropy. You should securely store your webhook secret in a way that your server can access.
271
+ 3. You should ensure that your server uses an HTTPS connection. By default, Studio will verify SSL certificates when delivering webhooks. Studio recommends that you leave SSL verification enabled.
272
+ 4. Your server should respond with a 2XX response within 10 seconds of receiving a webhook delivery. If your server takes longer than that to respond, then Studio terminates the connection and considers the delivery a failure.
273
+ 5. Check the event header and action type before processing the event.
274
+ 6. Make sure the endpoints are idempotent meaning if multiple requests for same event is received, the server should handle this.
275
+ 7. Datachain Studio may deliver webhooks in a different order than the order in which the events took place. If you need to know when the event occurred relative to another event, you should use the timestamps that are included in the delivery payload.
276
+ 8. Consecutive 10 failures to webhook will disable the webhook deliveries.
@@ -119,6 +119,7 @@ nav:
119
119
  - Local DB Migrations: guide/db_migrations.md
120
120
  - 🔗 Studio:
121
121
  - API: studio/api/index.md
122
+ - Webhooks: studio/webhooks.md
122
123
  - 🤝 Contributing: contributing.md
123
124
 
124
125
  - DataChain Website ↗: https://datachain.ai" target="_blank"
@@ -119,7 +119,7 @@ tests = [
119
119
  ]
120
120
  dev = [
121
121
  "datachain[docs,tests]",
122
- "mypy==1.17.0",
122
+ "mypy==1.18.1",
123
123
  "types-python-dateutil",
124
124
  "types-dateparser",
125
125
  "types-pytz",
@@ -37,7 +37,7 @@ from datachain.lib.file import (
37
37
  VideoFrame,
38
38
  )
39
39
  from datachain.lib.model_store import ModelStore
40
- from datachain.lib.namespaces import delete as delete_namespace
40
+ from datachain.lib.namespaces import delete_namespace
41
41
  from datachain.lib.projects import create as create_project
42
42
  from datachain.lib.udf import Aggregator, Generator, Mapper
43
43
  from datachain.lib.utils import AbstractUDF, DataChainError
@@ -619,7 +619,7 @@ class DatasetRecord:
619
619
  if not self.versions:
620
620
  return "1.0.0"
621
621
 
622
- major, minor, patch = semver.parse(self.latest_version)
622
+ major, _, _ = semver.parse(self.latest_version)
623
623
  return semver.create(major + 1, 0, 0)
624
624
 
625
625
  @property
@@ -630,7 +630,7 @@ class DatasetRecord:
630
630
  if not self.versions:
631
631
  return "1.0.0"
632
632
 
633
- major, minor, patch = semver.parse(self.latest_version)
633
+ major, minor, _ = semver.parse(self.latest_version)
634
634
  return semver.create(major, minor + 1, 0)
635
635
 
636
636
  @property
@@ -1,8 +1,14 @@
1
1
  import inspect
2
+ import sys
2
3
  from datetime import datetime
3
4
  from enum import Enum
4
5
  from typing import Annotated, Literal, Union, get_args, get_origin
5
6
 
7
+ if sys.version_info >= (3, 10):
8
+ from types import UnionType
9
+ else:
10
+ UnionType = None
11
+
6
12
  from pydantic import BaseModel
7
13
  from typing_extensions import Literal as LiteralEx
8
14
 
@@ -34,6 +40,13 @@ PYTHON_TO_SQL = {
34
40
  }
35
41
 
36
42
 
43
+ def _is_union(orig) -> bool:
44
+ if orig == Union:
45
+ return True
46
+ # some code is unreachab in python<3.10
47
+ return UnionType is not None and orig is UnionType # type: ignore[unreachable]
48
+
49
+
37
50
  def python_to_sql(typ): # noqa: PLR0911
38
51
  if inspect.isclass(typ):
39
52
  if issubclass(typ, SQLType):
@@ -69,9 +82,10 @@ def python_to_sql(typ): # noqa: PLR0911
69
82
  if inspect.isclass(orig) and issubclass(dict, orig):
70
83
  return JSON
71
84
 
72
- if orig == Union:
85
+ if _is_union(orig):
73
86
  if len(args) == 2 and (type(None) in args):
74
- return python_to_sql(args[0])
87
+ non_none_arg = args[0] if args[0] is not type(None) else args[1]
88
+ return python_to_sql(non_none_arg)
75
89
 
76
90
  if _is_union_str_literal(orig, args):
77
91
  return String
@@ -95,7 +109,7 @@ def list_of_args_to_type(args) -> SQLType:
95
109
 
96
110
 
97
111
  def _is_json_inside_union(orig, args) -> bool:
98
- if orig == Union and len(args) >= 2:
112
+ if _is_union(orig) and len(args) >= 2:
99
113
  # List in JSON: Union[dict, list[dict]]
100
114
  args_no_nones = [arg for arg in args if arg != type(None)] # noqa: E721
101
115
  if len(args_no_nones) == 2:
@@ -112,6 +126,6 @@ def _is_json_inside_union(orig, args) -> bool:
112
126
 
113
127
 
114
128
  def _is_union_str_literal(orig, args) -> bool:
115
- if orig != Union:
129
+ if not _is_union(orig):
116
130
  return False
117
131
  return all(arg is str or get_origin(arg) in (Literal, LiteralEx) for arg in args)
@@ -26,8 +26,14 @@ def read_parquet(
26
26
  """Generate chain from parquet files.
27
27
 
28
28
  Parameters:
29
- path: Storage URI with directory. URI must start with storage prefix such
30
- as `s3://`, `gs://`, `az://` or "file:///".
29
+ path: Storage path(s) or URI(s). Can be a local path or start with a
30
+ storage prefix like `s3://`, `gs://`, `az://`, `hf://` or "file:///".
31
+ Supports glob patterns:
32
+ - `*` : wildcard
33
+ - `**` : recursive wildcard
34
+ - `?` : single character
35
+ - `{a,b}` : brace expansion list
36
+ - `{1..9}` : brace numeric or alphabetic range
31
37
  partitioning: Any pyarrow partitioning schema.
32
38
  output: Dictionary defining column names and their corresponding types.
33
39
  column: Created column name.
@@ -43,10 +49,19 @@ def read_parquet(
43
49
  dc.read_parquet("s3://mybucket/file.parquet")
44
50
  ```
45
51
 
46
- Reading a partitioned dataset from a directory:
52
+ All files from a directory:
47
53
  ```py
48
- import datachain as dc
49
- dc.read_parquet("s3://mybucket/dir")
54
+ dc.read_parquet("s3://mybucket/dir/")
55
+ ```
56
+
57
+ Only parquet files from a directory, and all it's subdirectories:
58
+ ```py
59
+ dc.read_parquet("s3://mybucket/dir/**/*.parquet")
60
+ ```
61
+
62
+ Using filename patterns - numeric, list, starting with zeros:
63
+ ```py
64
+ dc.read_parquet("s3://mybucket/202{1..4}/{yellow,green}-{01..12}.parquet")
50
65
  ```
51
66
  """
52
67
  from .storage import read_storage
@@ -51,7 +51,8 @@ def read_storage(
51
51
  - `*` : wildcard
52
52
  - `**` : recursive wildcard
53
53
  - `?` : single character
54
- - `{a,b}` : brace expansion
54
+ - `{a,b}` : brace expansion list
55
+ - `{1..9}` : brace numeric or alphabetic range
55
56
  type: read file as "binary", "text", or "image" data. Default is "binary".
56
57
  recursive: search recursively for the given path.
57
58
  column: Column name that will contain File objects. Default is "file".
@@ -88,27 +89,32 @@ def read_storage(
88
89
  Simple call from s3:
89
90
  ```python
90
91
  import datachain as dc
91
- chain = dc.read_storage("s3://my-bucket/my-dir")
92
+ dc.read_storage("s3://my-bucket/my-dir")
92
93
  ```
93
94
 
94
95
  Match all .json files recursively using glob pattern
95
96
  ```py
96
- chain = dc.read_storage("gs://bucket/meta/**/*.json")
97
+ dc.read_storage("gs://bucket/meta/**/*.json")
97
98
  ```
98
99
 
99
100
  Match image file extensions for directories with pattern
100
101
  ```py
101
- chain = dc.read_storage("s3://bucket/202?/**/*.{jpg,jpeg,png}")
102
+ dc.read_storage("s3://bucket/202?/**/*.{jpg,jpeg,png}")
103
+ ```
104
+
105
+ By ranges in filenames:
106
+ ```py
107
+ dc.read_storage("s3://bucket/202{1..4}/**/*.{jpg,jpeg,png}")
102
108
  ```
103
109
 
104
110
  Multiple URIs:
105
111
  ```python
106
- chain = dc.read_storage(["s3://my-bkt/dir1", "s3://bucket2/dir2/dir3"])
112
+ dc.read_storage(["s3://my-bkt/dir1", "s3://bucket2/dir2/dir3"])
107
113
  ```
108
114
 
109
115
  With AWS S3-compatible storage:
110
116
  ```python
111
- chain = dc.read_storage(
117
+ dc.read_storage(
112
118
  "s3://my-bucket/my-dir",
113
119
  client_config = {"aws_endpoint_url": "<minio-endpoint-url>"}
114
120
  )