datachain 0.31.1__tar.gz → 0.31.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (428) hide show
  1. {datachain-0.31.1 → datachain-0.31.3}/.gitignore +2 -0
  2. {datachain-0.31.1 → datachain-0.31.3}/PKG-INFO +1 -1
  3. datachain-0.31.3/docs/api_hooks.py +35 -0
  4. datachain-0.31.3/docs/templates/main.dot +81 -0
  5. datachain-0.31.3/docs/templates/operation.dot +47 -0
  6. datachain-0.31.3/docs/templates/responses.def +61 -0
  7. {datachain-0.31.1 → datachain-0.31.3}/mkdocs.yml +4 -0
  8. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/catalog/catalog.py +58 -22
  9. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/client/fsspec.py +11 -0
  10. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/dc/storage.py +67 -25
  11. datachain-0.31.3/src/datachain/lib/dc/storage_pattern.py +300 -0
  12. {datachain-0.31.1 → datachain-0.31.3}/src/datachain.egg-info/PKG-INFO +1 -1
  13. {datachain-0.31.1 → datachain-0.31.3}/src/datachain.egg-info/SOURCES.txt +8 -0
  14. {datachain-0.31.1 → datachain-0.31.3}/tests/conftest.py +4 -1
  15. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_datachain.py +5 -4
  16. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_datachain_merge.py +4 -4
  17. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_datasets.py +3 -3
  18. datachain-0.31.3/tests/func/test_storage_pattern.py +261 -0
  19. datachain-0.31.3/tests/unit/lib/test_storage_pattern.py +121 -0
  20. datachain-0.31.3/tests/unit/sql/sqlite/__init__.py +0 -0
  21. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_query.py +22 -3
  22. {datachain-0.31.1 → datachain-0.31.3}/.cruft.json +0 -0
  23. {datachain-0.31.1 → datachain-0.31.3}/.gitattributes +0 -0
  24. {datachain-0.31.1 → datachain-0.31.3}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  25. {datachain-0.31.1 → datachain-0.31.3}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  26. {datachain-0.31.1 → datachain-0.31.3}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  27. {datachain-0.31.1 → datachain-0.31.3}/.github/codecov.yaml +0 -0
  28. {datachain-0.31.1 → datachain-0.31.3}/.github/dependabot.yml +0 -0
  29. {datachain-0.31.1 → datachain-0.31.3}/.github/workflows/benchmarks.yml +0 -0
  30. {datachain-0.31.1 → datachain-0.31.3}/.github/workflows/release.yml +0 -0
  31. {datachain-0.31.1 → datachain-0.31.3}/.github/workflows/tests-studio.yml +0 -0
  32. {datachain-0.31.1 → datachain-0.31.3}/.github/workflows/tests.yml +0 -0
  33. {datachain-0.31.1 → datachain-0.31.3}/.github/workflows/update-template.yaml +0 -0
  34. {datachain-0.31.1 → datachain-0.31.3}/.pre-commit-config.yaml +0 -0
  35. {datachain-0.31.1 → datachain-0.31.3}/CODE_OF_CONDUCT.rst +0 -0
  36. {datachain-0.31.1 → datachain-0.31.3}/LICENSE +0 -0
  37. {datachain-0.31.1 → datachain-0.31.3}/README.rst +0 -0
  38. {datachain-0.31.1 → datachain-0.31.3}/docs/assets/captioned_cartoons.png +0 -0
  39. {datachain-0.31.1 → datachain-0.31.3}/docs/assets/datachain-white.svg +0 -0
  40. {datachain-0.31.1 → datachain-0.31.3}/docs/assets/datachain.svg +0 -0
  41. {datachain-0.31.1 → datachain-0.31.3}/docs/commands/auth/login.md +0 -0
  42. {datachain-0.31.1 → datachain-0.31.3}/docs/commands/auth/logout.md +0 -0
  43. {datachain-0.31.1 → datachain-0.31.3}/docs/commands/auth/team.md +0 -0
  44. {datachain-0.31.1 → datachain-0.31.3}/docs/commands/auth/token.md +0 -0
  45. {datachain-0.31.1 → datachain-0.31.3}/docs/commands/index.md +0 -0
  46. {datachain-0.31.1 → datachain-0.31.3}/docs/commands/job/cancel.md +0 -0
  47. {datachain-0.31.1 → datachain-0.31.3}/docs/commands/job/clusters.md +0 -0
  48. {datachain-0.31.1 → datachain-0.31.3}/docs/commands/job/logs.md +0 -0
  49. {datachain-0.31.1 → datachain-0.31.3}/docs/commands/job/ls.md +0 -0
  50. {datachain-0.31.1 → datachain-0.31.3}/docs/commands/job/run.md +0 -0
  51. {datachain-0.31.1 → datachain-0.31.3}/docs/contributing.md +0 -0
  52. {datachain-0.31.1 → datachain-0.31.3}/docs/css/github-permalink-style.css +0 -0
  53. {datachain-0.31.1 → datachain-0.31.3}/docs/examples.md +0 -0
  54. {datachain-0.31.1 → datachain-0.31.3}/docs/guide/db_migrations.md +0 -0
  55. {datachain-0.31.1 → datachain-0.31.3}/docs/guide/delta.md +0 -0
  56. {datachain-0.31.1 → datachain-0.31.3}/docs/guide/env.md +0 -0
  57. {datachain-0.31.1 → datachain-0.31.3}/docs/guide/index.md +0 -0
  58. {datachain-0.31.1 → datachain-0.31.3}/docs/guide/namespaces.md +0 -0
  59. {datachain-0.31.1 → datachain-0.31.3}/docs/guide/processing.md +0 -0
  60. {datachain-0.31.1 → datachain-0.31.3}/docs/guide/remotes.md +0 -0
  61. {datachain-0.31.1 → datachain-0.31.3}/docs/guide/retry.md +0 -0
  62. {datachain-0.31.1 → datachain-0.31.3}/docs/index.md +0 -0
  63. {datachain-0.31.1 → datachain-0.31.3}/docs/overrides/main.html +0 -0
  64. {datachain-0.31.1 → datachain-0.31.3}/docs/quick-start.md +0 -0
  65. {datachain-0.31.1 → datachain-0.31.3}/docs/references/data-types/arrowrow.md +0 -0
  66. {datachain-0.31.1 → datachain-0.31.3}/docs/references/data-types/bbox.md +0 -0
  67. {datachain-0.31.1 → datachain-0.31.3}/docs/references/data-types/file.md +0 -0
  68. {datachain-0.31.1 → datachain-0.31.3}/docs/references/data-types/imagefile.md +0 -0
  69. {datachain-0.31.1 → datachain-0.31.3}/docs/references/data-types/index.md +0 -0
  70. {datachain-0.31.1 → datachain-0.31.3}/docs/references/data-types/pose.md +0 -0
  71. {datachain-0.31.1 → datachain-0.31.3}/docs/references/data-types/segment.md +0 -0
  72. {datachain-0.31.1 → datachain-0.31.3}/docs/references/data-types/tarvfile.md +0 -0
  73. {datachain-0.31.1 → datachain-0.31.3}/docs/references/data-types/textfile.md +0 -0
  74. {datachain-0.31.1 → datachain-0.31.3}/docs/references/data-types/videofile.md +0 -0
  75. {datachain-0.31.1 → datachain-0.31.3}/docs/references/datachain.md +0 -0
  76. {datachain-0.31.1 → datachain-0.31.3}/docs/references/func.md +0 -0
  77. {datachain-0.31.1 → datachain-0.31.3}/docs/references/functions/aggregate.md +0 -0
  78. {datachain-0.31.1 → datachain-0.31.3}/docs/references/functions/array.md +0 -0
  79. {datachain-0.31.1 → datachain-0.31.3}/docs/references/functions/conditional.md +0 -0
  80. {datachain-0.31.1 → datachain-0.31.3}/docs/references/functions/numeric.md +0 -0
  81. {datachain-0.31.1 → datachain-0.31.3}/docs/references/functions/path.md +0 -0
  82. {datachain-0.31.1 → datachain-0.31.3}/docs/references/functions/random.md +0 -0
  83. {datachain-0.31.1 → datachain-0.31.3}/docs/references/functions/string.md +0 -0
  84. {datachain-0.31.1 → datachain-0.31.3}/docs/references/functions/window.md +0 -0
  85. {datachain-0.31.1 → datachain-0.31.3}/docs/references/index.md +0 -0
  86. {datachain-0.31.1 → datachain-0.31.3}/docs/references/toolkit.md +0 -0
  87. {datachain-0.31.1 → datachain-0.31.3}/docs/references/torch.md +0 -0
  88. {datachain-0.31.1 → datachain-0.31.3}/docs/references/udf.md +0 -0
  89. /datachain-0.31.1/src/datachain/fs/__init__.py → /datachain-0.31.3/docs/studio/api/.gitkeep +0 -0
  90. {datachain-0.31.1 → datachain-0.31.3}/docs/tutorials.md +0 -0
  91. {datachain-0.31.1 → datachain-0.31.3}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  92. {datachain-0.31.1 → datachain-0.31.3}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  93. {datachain-0.31.1 → datachain-0.31.3}/examples/computer_vision/openimage-detect.py +0 -0
  94. {datachain-0.31.1 → datachain-0.31.3}/examples/computer_vision/ultralytics-bbox.py +0 -0
  95. {datachain-0.31.1 → datachain-0.31.3}/examples/computer_vision/ultralytics-pose.py +0 -0
  96. {datachain-0.31.1 → datachain-0.31.3}/examples/computer_vision/ultralytics-segment.py +0 -0
  97. {datachain-0.31.1 → datachain-0.31.3}/examples/get_started/common_sql_functions.py +0 -0
  98. {datachain-0.31.1 → datachain-0.31.3}/examples/get_started/json-csv-reader.py +0 -0
  99. {datachain-0.31.1 → datachain-0.31.3}/examples/get_started/nested_datamodel.py +0 -0
  100. {datachain-0.31.1 → datachain-0.31.3}/examples/get_started/torch-loader.py +0 -0
  101. {datachain-0.31.1 → datachain-0.31.3}/examples/get_started/udfs/parallel.py +0 -0
  102. {datachain-0.31.1 → datachain-0.31.3}/examples/get_started/udfs/simple.py +0 -0
  103. {datachain-0.31.1 → datachain-0.31.3}/examples/get_started/udfs/stateful.py +0 -0
  104. {datachain-0.31.1 → datachain-0.31.3}/examples/incremental_processing/delta.py +0 -0
  105. {datachain-0.31.1 → datachain-0.31.3}/examples/incremental_processing/retry.py +0 -0
  106. {datachain-0.31.1 → datachain-0.31.3}/examples/incremental_processing/utils.py +0 -0
  107. {datachain-0.31.1 → datachain-0.31.3}/examples/llm_and_nlp/claude-query.py +0 -0
  108. {datachain-0.31.1 → datachain-0.31.3}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  109. {datachain-0.31.1 → datachain-0.31.3}/examples/multimodal/audio-to-text.py +0 -0
  110. {datachain-0.31.1 → datachain-0.31.3}/examples/multimodal/clip_inference.py +0 -0
  111. {datachain-0.31.1 → datachain-0.31.3}/examples/multimodal/hf_pipeline.py +0 -0
  112. {datachain-0.31.1 → datachain-0.31.3}/examples/multimodal/openai_image_desc_lib.py +0 -0
  113. {datachain-0.31.1 → datachain-0.31.3}/examples/multimodal/wds.py +0 -0
  114. {datachain-0.31.1 → datachain-0.31.3}/examples/multimodal/wds_filtered.py +0 -0
  115. {datachain-0.31.1 → datachain-0.31.3}/noxfile.py +0 -0
  116. {datachain-0.31.1 → datachain-0.31.3}/pyproject.toml +0 -0
  117. {datachain-0.31.1 → datachain-0.31.3}/setup.cfg +0 -0
  118. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/__init__.py +0 -0
  119. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/__main__.py +0 -0
  120. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/asyn.py +0 -0
  121. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/cache.py +0 -0
  122. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/catalog/__init__.py +0 -0
  123. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/catalog/datasource.py +0 -0
  124. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/catalog/loader.py +0 -0
  125. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/cli/__init__.py +0 -0
  126. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/cli/commands/__init__.py +0 -0
  127. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/cli/commands/datasets.py +0 -0
  128. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/cli/commands/du.py +0 -0
  129. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/cli/commands/index.py +0 -0
  130. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/cli/commands/ls.py +0 -0
  131. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/cli/commands/misc.py +0 -0
  132. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/cli/commands/query.py +0 -0
  133. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/cli/commands/show.py +0 -0
  134. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/cli/parser/__init__.py +0 -0
  135. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/cli/parser/job.py +0 -0
  136. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/cli/parser/studio.py +0 -0
  137. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/cli/parser/utils.py +0 -0
  138. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/cli/utils.py +0 -0
  139. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/client/__init__.py +0 -0
  140. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/client/azure.py +0 -0
  141. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/client/fileslice.py +0 -0
  142. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/client/gcs.py +0 -0
  143. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/client/hf.py +0 -0
  144. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/client/local.py +0 -0
  145. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/client/s3.py +0 -0
  146. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/config.py +0 -0
  147. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/data_storage/__init__.py +0 -0
  148. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/data_storage/db_engine.py +0 -0
  149. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/data_storage/job.py +0 -0
  150. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/data_storage/metastore.py +0 -0
  151. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/data_storage/schema.py +0 -0
  152. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/data_storage/serializer.py +0 -0
  153. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/data_storage/sqlite.py +0 -0
  154. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/data_storage/warehouse.py +0 -0
  155. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/dataset.py +0 -0
  156. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/delta.py +0 -0
  157. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/diff/__init__.py +0 -0
  158. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/error.py +0 -0
  159. {datachain-0.31.1/src/datachain/lib → datachain-0.31.3/src/datachain/fs}/__init__.py +0 -0
  160. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/fs/reference.py +0 -0
  161. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/fs/utils.py +0 -0
  162. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/func/__init__.py +0 -0
  163. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/func/aggregate.py +0 -0
  164. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/func/array.py +0 -0
  165. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/func/base.py +0 -0
  166. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/func/conditional.py +0 -0
  167. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/func/func.py +0 -0
  168. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/func/numeric.py +0 -0
  169. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/func/path.py +0 -0
  170. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/func/random.py +0 -0
  171. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/func/string.py +0 -0
  172. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/func/window.py +0 -0
  173. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/job.py +0 -0
  174. {datachain-0.31.1/src/datachain/lib/convert → datachain-0.31.3/src/datachain/lib}/__init__.py +0 -0
  175. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/arrow.py +0 -0
  176. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/audio.py +0 -0
  177. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/clip.py +0 -0
  178. {datachain-0.31.1/src/datachain/remote → datachain-0.31.3/src/datachain/lib/convert}/__init__.py +0 -0
  179. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/convert/flatten.py +0 -0
  180. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/convert/python_to_sql.py +0 -0
  181. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/convert/sql_to_python.py +0 -0
  182. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/convert/unflatten.py +0 -0
  183. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  184. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/data_model.py +0 -0
  185. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/dataset_info.py +0 -0
  186. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/dc/__init__.py +0 -0
  187. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/dc/csv.py +0 -0
  188. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/dc/database.py +0 -0
  189. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/dc/datachain.py +0 -0
  190. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/dc/datasets.py +0 -0
  191. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/dc/hf.py +0 -0
  192. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/dc/json.py +0 -0
  193. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/dc/listings.py +0 -0
  194. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/dc/pandas.py +0 -0
  195. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/dc/parquet.py +0 -0
  196. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/dc/records.py +0 -0
  197. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/dc/utils.py +0 -0
  198. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/dc/values.py +0 -0
  199. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/file.py +0 -0
  200. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/hf.py +0 -0
  201. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/image.py +0 -0
  202. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/listing.py +0 -0
  203. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/listing_info.py +0 -0
  204. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/meta_formats.py +0 -0
  205. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/model_store.py +0 -0
  206. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/namespaces.py +0 -0
  207. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/projects.py +0 -0
  208. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/pytorch.py +0 -0
  209. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/settings.py +0 -0
  210. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/signal_schema.py +0 -0
  211. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/tar.py +0 -0
  212. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/text.py +0 -0
  213. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/udf.py +0 -0
  214. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/udf_signature.py +0 -0
  215. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/utils.py +0 -0
  216. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/video.py +0 -0
  217. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/webdataset.py +0 -0
  218. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/lib/webdataset_laion.py +0 -0
  219. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/listing.py +0 -0
  220. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/model/__init__.py +0 -0
  221. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/model/bbox.py +0 -0
  222. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/model/pose.py +0 -0
  223. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/model/segment.py +0 -0
  224. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/model/ultralytics/__init__.py +0 -0
  225. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/model/ultralytics/bbox.py +0 -0
  226. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/model/ultralytics/pose.py +0 -0
  227. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/model/ultralytics/segment.py +0 -0
  228. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/model/utils.py +0 -0
  229. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/namespace.py +0 -0
  230. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/node.py +0 -0
  231. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/nodes_fetcher.py +0 -0
  232. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/nodes_thread_pool.py +0 -0
  233. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/progress.py +0 -0
  234. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/project.py +0 -0
  235. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/py.typed +0 -0
  236. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/query/__init__.py +0 -0
  237. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/query/batch.py +0 -0
  238. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/query/dataset.py +0 -0
  239. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/query/dispatch.py +0 -0
  240. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/query/metrics.py +0 -0
  241. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/query/params.py +0 -0
  242. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/query/queue.py +0 -0
  243. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/query/schema.py +0 -0
  244. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/query/session.py +0 -0
  245. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/query/udf.py +0 -0
  246. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/query/utils.py +0 -0
  247. {datachain-0.31.1/src/datachain/sql/functions → datachain-0.31.3/src/datachain/remote}/__init__.py +0 -0
  248. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/remote/studio.py +0 -0
  249. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/script_meta.py +0 -0
  250. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/semver.py +0 -0
  251. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/sql/__init__.py +0 -0
  252. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/sql/default/__init__.py +0 -0
  253. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/sql/default/base.py +0 -0
  254. {datachain-0.31.1/tests/benchmarks → datachain-0.31.3/src/datachain/sql/functions}/__init__.py +0 -0
  255. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/sql/functions/aggregate.py +0 -0
  256. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/sql/functions/array.py +0 -0
  257. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/sql/functions/conditional.py +0 -0
  258. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/sql/functions/numeric.py +0 -0
  259. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/sql/functions/path.py +0 -0
  260. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/sql/functions/random.py +0 -0
  261. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/sql/functions/string.py +0 -0
  262. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/sql/postgresql_dialect.py +0 -0
  263. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/sql/postgresql_types.py +0 -0
  264. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/sql/selectable.py +0 -0
  265. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/sql/sqlite/__init__.py +0 -0
  266. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/sql/sqlite/base.py +0 -0
  267. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/sql/sqlite/types.py +0 -0
  268. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/sql/sqlite/vector.py +0 -0
  269. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/sql/types.py +0 -0
  270. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/sql/utils.py +0 -0
  271. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/studio.py +0 -0
  272. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/telemetry.py +0 -0
  273. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/toolkit/__init__.py +0 -0
  274. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/toolkit/split.py +0 -0
  275. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/torch/__init__.py +0 -0
  276. {datachain-0.31.1 → datachain-0.31.3}/src/datachain/utils.py +0 -0
  277. {datachain-0.31.1 → datachain-0.31.3}/src/datachain.egg-info/dependency_links.txt +0 -0
  278. {datachain-0.31.1 → datachain-0.31.3}/src/datachain.egg-info/entry_points.txt +0 -0
  279. {datachain-0.31.1 → datachain-0.31.3}/src/datachain.egg-info/requires.txt +0 -0
  280. {datachain-0.31.1 → datachain-0.31.3}/src/datachain.egg-info/top_level.txt +0 -0
  281. {datachain-0.31.1 → datachain-0.31.3}/tests/__init__.py +0 -0
  282. {datachain-0.31.1/tests/examples → datachain-0.31.3/tests/benchmarks}/__init__.py +0 -0
  283. {datachain-0.31.1 → datachain-0.31.3}/tests/benchmarks/conftest.py +0 -0
  284. {datachain-0.31.1 → datachain-0.31.3}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  285. {datachain-0.31.1 → datachain-0.31.3}/tests/benchmarks/datasets/.dvc/config +0 -0
  286. {datachain-0.31.1 → datachain-0.31.3}/tests/benchmarks/datasets/.gitignore +0 -0
  287. {datachain-0.31.1 → datachain-0.31.3}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  288. {datachain-0.31.1 → datachain-0.31.3}/tests/benchmarks/test_datachain.py +0 -0
  289. {datachain-0.31.1 → datachain-0.31.3}/tests/benchmarks/test_ls.py +0 -0
  290. {datachain-0.31.1 → datachain-0.31.3}/tests/benchmarks/test_version.py +0 -0
  291. {datachain-0.31.1 → datachain-0.31.3}/tests/data.py +0 -0
  292. {datachain-0.31.1/tests/func → datachain-0.31.3/tests/examples}/__init__.py +0 -0
  293. {datachain-0.31.1 → datachain-0.31.3}/tests/examples/test_examples.py +0 -0
  294. {datachain-0.31.1 → datachain-0.31.3}/tests/examples/test_wds_e2e.py +0 -0
  295. {datachain-0.31.1 → datachain-0.31.3}/tests/examples/wds_data.py +0 -0
  296. {datachain-0.31.1/tests/func/functions → datachain-0.31.3/tests/func}/__init__.py +0 -0
  297. {datachain-0.31.1 → datachain-0.31.3}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  298. {datachain-0.31.1 → datachain-0.31.3}/tests/func/data/lena.jpg +0 -0
  299. {datachain-0.31.1 → datachain-0.31.3}/tests/func/fake-service-account-credentials.json +0 -0
  300. {datachain-0.31.1/tests/func/model → datachain-0.31.3/tests/func/functions}/__init__.py +0 -0
  301. {datachain-0.31.1 → datachain-0.31.3}/tests/func/functions/test_aggregate.py +0 -0
  302. {datachain-0.31.1 → datachain-0.31.3}/tests/func/functions/test_array.py +0 -0
  303. {datachain-0.31.1 → datachain-0.31.3}/tests/func/functions/test_conditional.py +0 -0
  304. {datachain-0.31.1 → datachain-0.31.3}/tests/func/functions/test_numeric.py +0 -0
  305. {datachain-0.31.1 → datachain-0.31.3}/tests/func/functions/test_path.py +0 -0
  306. {datachain-0.31.1 → datachain-0.31.3}/tests/func/functions/test_random.py +0 -0
  307. {datachain-0.31.1 → datachain-0.31.3}/tests/func/functions/test_string.py +0 -0
  308. {datachain-0.31.1/tests/unit → datachain-0.31.3/tests/func/model}/__init__.py +0 -0
  309. {datachain-0.31.1 → datachain-0.31.3}/tests/func/model/data/running-mask0.png +0 -0
  310. {datachain-0.31.1 → datachain-0.31.3}/tests/func/model/data/running-mask1.png +0 -0
  311. {datachain-0.31.1 → datachain-0.31.3}/tests/func/model/data/running.jpg +0 -0
  312. {datachain-0.31.1 → datachain-0.31.3}/tests/func/model/data/ships.jpg +0 -0
  313. {datachain-0.31.1 → datachain-0.31.3}/tests/func/model/test_yolo.py +0 -0
  314. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_audio.py +0 -0
  315. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_batching.py +0 -0
  316. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_catalog.py +0 -0
  317. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_client.py +0 -0
  318. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_cloud_transfer.py +0 -0
  319. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_data_storage.py +0 -0
  320. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_dataset_query.py +0 -0
  321. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_delta.py +0 -0
  322. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_feature_pickling.py +0 -0
  323. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_file.py +0 -0
  324. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_hf.py +0 -0
  325. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_hidden_field.py +0 -0
  326. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_image.py +0 -0
  327. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_listing.py +0 -0
  328. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_ls.py +0 -0
  329. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_meta_formats.py +0 -0
  330. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_metastore.py +0 -0
  331. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_metrics.py +0 -0
  332. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_mutate.py +0 -0
  333. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_pull.py +0 -0
  334. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_pytorch.py +0 -0
  335. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_query.py +0 -0
  336. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_read_database.py +0 -0
  337. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_read_dataset_remote.py +0 -0
  338. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  339. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_retry.py +0 -0
  340. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_session.py +0 -0
  341. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_studio_datetime_parsing.py +0 -0
  342. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_to_database.py +0 -0
  343. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_toolkit.py +0 -0
  344. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_video.py +0 -0
  345. {datachain-0.31.1 → datachain-0.31.3}/tests/func/test_warehouse.py +0 -0
  346. {datachain-0.31.1 → datachain-0.31.3}/tests/scripts/feature_class.py +0 -0
  347. {datachain-0.31.1 → datachain-0.31.3}/tests/scripts/feature_class_exception.py +0 -0
  348. {datachain-0.31.1 → datachain-0.31.3}/tests/scripts/feature_class_parallel.py +0 -0
  349. {datachain-0.31.1 → datachain-0.31.3}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  350. {datachain-0.31.1 → datachain-0.31.3}/tests/scripts/name_len_slow.py +0 -0
  351. {datachain-0.31.1 → datachain-0.31.3}/tests/test_atomicity.py +0 -0
  352. {datachain-0.31.1 → datachain-0.31.3}/tests/test_cli_e2e.py +0 -0
  353. {datachain-0.31.1 → datachain-0.31.3}/tests/test_cli_studio.py +0 -0
  354. {datachain-0.31.1 → datachain-0.31.3}/tests/test_import_time.py +0 -0
  355. {datachain-0.31.1 → datachain-0.31.3}/tests/test_query_e2e.py +0 -0
  356. {datachain-0.31.1 → datachain-0.31.3}/tests/test_telemetry.py +0 -0
  357. {datachain-0.31.1/tests/unit/lib → datachain-0.31.3/tests/unit}/__init__.py +0 -0
  358. {datachain-0.31.1/tests/unit/model → datachain-0.31.3/tests/unit/lib}/__init__.py +0 -0
  359. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/conftest.py +0 -0
  360. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/test_arrow.py +0 -0
  361. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/test_audio.py +0 -0
  362. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/test_clip.py +0 -0
  363. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/test_datachain.py +0 -0
  364. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  365. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/test_datachain_merge.py +0 -0
  366. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/test_diff.py +0 -0
  367. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/test_feature.py +0 -0
  368. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/test_feature_utils.py +0 -0
  369. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/test_file.py +0 -0
  370. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/test_hf.py +0 -0
  371. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/test_image.py +0 -0
  372. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/test_listing_info.py +0 -0
  373. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/test_namespace.py +0 -0
  374. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/test_partition_by.py +0 -0
  375. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/test_project.py +0 -0
  376. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/test_python_to_sql.py +0 -0
  377. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/test_schema.py +0 -0
  378. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/test_settings.py +0 -0
  379. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/test_signal_schema.py +0 -0
  380. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/test_sql_to_python.py +0 -0
  381. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/test_text.py +0 -0
  382. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/test_udf.py +0 -0
  383. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/test_udf_signature.py +0 -0
  384. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/test_utils.py +0 -0
  385. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/lib/test_webdataset.py +0 -0
  386. {datachain-0.31.1/tests/unit/sql → datachain-0.31.3/tests/unit/model}/__init__.py +0 -0
  387. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/model/test_bbox.py +0 -0
  388. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/model/test_pose.py +0 -0
  389. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/model/test_segment.py +0 -0
  390. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/model/test_utils.py +0 -0
  391. {datachain-0.31.1/tests/unit/sql/sqlite → datachain-0.31.3/tests/unit/sql}/__init__.py +0 -0
  392. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/sql/sqlite/test_types.py +0 -0
  393. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/sql/sqlite/test_utils.py +0 -0
  394. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/sql/test_array.py +0 -0
  395. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/sql/test_conditional.py +0 -0
  396. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/sql/test_path.py +0 -0
  397. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/sql/test_random.py +0 -0
  398. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/sql/test_selectable.py +0 -0
  399. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/sql/test_string.py +0 -0
  400. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_asyn.py +0 -0
  401. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_cache.py +0 -0
  402. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_catalog.py +0 -0
  403. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_catalog_loader.py +0 -0
  404. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_cli_datasets.py +0 -0
  405. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_cli_parsing.py +0 -0
  406. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_client.py +0 -0
  407. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_client_gcs.py +0 -0
  408. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_client_s3.py +0 -0
  409. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_config.py +0 -0
  410. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_data_storage.py +0 -0
  411. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_database_engine.py +0 -0
  412. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_dataset.py +0 -0
  413. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_dispatch.py +0 -0
  414. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_fileslice.py +0 -0
  415. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_func.py +0 -0
  416. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_listing.py +0 -0
  417. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_metastore.py +0 -0
  418. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_module_exports.py +0 -0
  419. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_pytorch.py +0 -0
  420. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_query_metrics.py +0 -0
  421. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_query_params.py +0 -0
  422. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_script_meta.py +0 -0
  423. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_semver.py +0 -0
  424. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_serializer.py +0 -0
  425. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_session.py +0 -0
  426. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_utils.py +0 -0
  427. {datachain-0.31.1 → datachain-0.31.3}/tests/unit/test_warehouse.py +0 -0
  428. {datachain-0.31.1 → datachain-0.31.3}/tests/utils.py +0 -0
@@ -149,3 +149,5 @@ cython_debug/
149
149
  # for local dev, e.g. LLM generated files, .env.test to override
150
150
  # test variables, local scripts to try, etc
151
151
  local/
152
+ docs/openapi.json
153
+ docs/studio/api/index.md
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.31.1
3
+ Version: 0.31.3
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -0,0 +1,35 @@
1
+ # ruff: noqa: INP001
2
+ def on_pre_build(**kwargs):
3
+ import os
4
+
5
+ import requests
6
+
7
+ # Download OpenAPI spec
8
+ response = requests.get(
9
+ "https://studio.datachain.ai/api/openapi.json",
10
+ timeout=30,
11
+ )
12
+
13
+ # Write to file
14
+ print("Writing OpenAPI spec to docs/openapi.json")
15
+ with open("docs/openapi.json", "w") as f:
16
+ f.write(response.text)
17
+
18
+ # Generate API docs using widdershins
19
+ print("Generating API docs using widdershins")
20
+ cmd = [
21
+ "npx widdershins",
22
+ "docs/openapi.json",
23
+ "-o docs/studio/api/index.md",
24
+ "--language_tabs 'python:Python'",
25
+ "--language_tabs 'shell:curl'",
26
+ "--expandBody true",
27
+ "--summary",
28
+ "--shallowSchemas",
29
+ "--omitBody",
30
+ "--resolve",
31
+ "--httpsnippet",
32
+ "-u docs/templates",
33
+ "--omitHeader",
34
+ ]
35
+ os.system(" ".join(cmd)) # noqa: S605
@@ -0,0 +1,81 @@
1
+ <h1 id="{{=data.title_prefix||''}}">{{=data.api.info && data.api.info.title}} {{=data.version||''}}</h1>
2
+
3
+ > Scroll down for {{? data.header.language_tabs.length}}code samples, {{?}}example requests and responses.{{? data.header.language_tabs.length}} .{{?}}
4
+
5
+ {{? data.api.info && data.api.info.description}}{{=data.api.info.description||''}}{{?}}
6
+
7
+
8
+ {{? data.api.info && data.api.info.termsOfService}}<a href="{{=data.api.info.termsOfService||''}}">Terms of service</a>{{?}}
9
+ {{? data.api.info && data.api.info.contact}}{{? data.api.info.contact.email}}Email: <a href="mailto:{{=data.api.info.contact.email||''}}">{{=data.api.info.contact.name || 'Support'}}</a> {{?}}{{? data.api.info.contact.url}}Web: <a href="{{=data.api.info.contact.url||''}}">{{= data.api.info.contact.name || 'Support'}}</a> {{?}}{{?}}
10
+ {{? data.api.info && data.api.info.license}}{{? data.api.info.license.url}}License: <a href="{{=data.api.info.license.url||''}}">{{=data.api.info.license.name||''}}</a>{{??}} License: {{=data.api.info.license.name||''}}{{?}}{{?}}
11
+ {{= data.tags.endSection }}
12
+
13
+ {{? data.api.components && data.api.components.securitySchemes && data.def && data.def.security}}
14
+ {{#def.security}}
15
+ {{?}}
16
+
17
+ {{ for (var r in data.resources) { }}
18
+ {{ data.resource = data.resources[r]; }}
19
+
20
+ {{= data.tags.section }}
21
+ <h1 id="{{=data.title_prefix||''+'-'+data.utils.slugify(r)}}">{{= r||''}}</h1>
22
+
23
+ {{? data.resource.description }}{{= data.resource.description||''}}{{?}}
24
+
25
+ {{? data.resource.externalDocs}}
26
+ <a href="{{=data.resource.externalDocs.url||''}}">{{=data.resource.externalDocs.description||'External documentation'}}</a>
27
+ {{?}}
28
+
29
+ {{ for (var m in data.resource.methods) { }}
30
+ {{ data.operationUniqueName = m; }}
31
+ {{ data.method = data.resource.methods[m]; }}
32
+ {{ data.operationUniqueSlug = data.method.slug; }}
33
+ {{ data.operation = data.method.operation; }}
34
+ {{= data.templates.operation(data) }}
35
+ {{ } /* of methods */ }}
36
+
37
+ {{= data.tags.endSection }}
38
+ {{ } /* of resources */ }}
39
+
40
+ {{? data.api.components && data.api.components.schemas }}
41
+ {{= data.tags.section }}
42
+
43
+ # Schemas
44
+
45
+ {{ for (var s in data.components.schemas) { }}
46
+ {{ var origSchema = data.components.schemas[s]; }}
47
+ {{ var schema = data.api.components.schemas[s]; }}
48
+
49
+ {{= data.tags.section }}
50
+ <h2 id="tocS_{{=s||''}}">{{=s||''}}</h2>
51
+ {{ /* backwards compatibility */ }}
52
+ <a id="schema{{=s.toLowerCase()||''}}"></a>
53
+ <a id="schema_{{=s||''}}"></a>
54
+ <a id="tocS{{=s.toLowerCase()||''}}"></a>
55
+ <a id="tocs{{=s.toLowerCase()||''}}"></a>
56
+
57
+ {{? data.options.yaml }}
58
+ ```yaml
59
+ {{=data.utils.yaml.stringify(data.utils.getSample(schema,data.options,{quiet:true},data.api))}}
60
+ {{??}}
61
+ ```json
62
+ {{=data.utils.safejson(data.utils.getSample(schema,data.options,{quiet:true},data.api),null,2)}}
63
+ {{?}}
64
+ ```
65
+
66
+
67
+
68
+ {{= data.tags.endSection }}
69
+ {{= data.tags.endSection }}
70
+
71
+ {{ } /* of schemas */ }}
72
+
73
+ {{?}}
74
+
75
+ {{? data.options.footer}}
76
+ {{#def.footer}}
77
+ {{?}}
78
+
79
+ {{? data.options.discovery}}
80
+ {{#def.discovery}}
81
+ {{?}}
@@ -0,0 +1,47 @@
1
+ {{= data.tags.section }}
2
+
3
+ ## {{= data.operationUniqueName||''}}
4
+
5
+ {{? data.operation.operationId}}
6
+ <a id="opId{{=data.operation.operationId||''}}"></a>
7
+ {{?}}
8
+
9
+ {{ data.methodUpper = data.method.verb.toUpperCase(); }}
10
+ {{ data.url = data.utils.slashes(data.baseUrl + data.method.path); }}
11
+ {{ data.parameters = data.operation.parameters; }}
12
+ {{ data.enums = []; }}
13
+ {{ data.utils.fakeProdCons(data); }}
14
+ {{ data.utils.fakeBodyParameter(data); }}
15
+ {{ data.utils.mergePathParameters(data); }}
16
+ {{ data.utils.getParameters(data); }}
17
+
18
+ {{? data.options.codeSamples || data.operation["x-code-samples"] }}
19
+ > Code samples
20
+
21
+ {{? data.utils.getCodeSamples(data) }}{{= data.utils.getCodeSamples(data) }}{{?}}
22
+ {{?}}
23
+
24
+ `{{= data.methodUpper||''}} {{=data.method.path||''}}`
25
+
26
+ {{? data.operation.summary && !data.options.tocSummary}}*{{= data.operation.summary||'' }}*{{?}}
27
+
28
+ {{? data.operation.description}}{{= data.operation.description||'' }}{{?}}
29
+
30
+ {{? data.operation.requestBody}}
31
+ > Body parameter
32
+
33
+ {{? data.bodyParameter.exampleValues.description }}
34
+ > {{= data.bodyParameter.exampleValues.description||'' }}
35
+ {{?}}
36
+
37
+ {{? data.utils.getBodyParameterExamples(data) }}{{= data.utils.getBodyParameterExamples(data) }}{{?}}
38
+ {{?}}
39
+
40
+ {{#def.responses}}
41
+
42
+ {{ data.security = data.operation.security ? data.operation.security : data.api.security; }}
43
+ {{? data.security && data.security.length }}
44
+ {{??}}
45
+ {{#def.authentication_none}}
46
+ {{?}}
47
+ {{= data.tags.endSection }}
@@ -0,0 +1,61 @@
1
+ {{ data.responses = data.utils.getResponses(data); }}
2
+ {{ data.responseSchemas = false; }}
3
+ {{~ data.responses :response }}
4
+ {{ if (response.content) data.responseSchemas = true; }}
5
+ {{~}}
6
+
7
+ {{? data.responseSchemas }}
8
+ > Example responses
9
+
10
+ {{? data.utils.getResponseExamples(data) }}{{= data.utils.getResponseExamples(data) }}{{?}}
11
+ {{?}}
12
+
13
+ {{= data.tags.section }}
14
+ <h3 id="{{=data.operationUniqueSlug||''}}-responses">Responses</h3>
15
+
16
+ |Status|Meaning|Description|Schema|
17
+ |---|---|---|---|
18
+ {{~ data.responses :r}}|{{=r.status}}|{{=r.meaning}}|{{=r.description || 'none'}}|{{=r.schema||'none'}}|
19
+ {{~}}
20
+
21
+ {{ data.responseSchemas = false; }}
22
+ {{~ data.responses :response }}
23
+ {{ if (response.content && !response.$ref && !data.utils.isPrimitive(response.type)) data.responseSchemas = true; }}
24
+ {{~}}
25
+ {{? data.responseSchemas }}
26
+ <h3 id="{{=data.operationUniqueSlug||''}}-responseschema">Response Schema</h3>
27
+ {{~ data.responses :response}}
28
+ {{? response.content && !response.$ref && !data.utils.isPrimitive(response.type)}}
29
+ {{? Object.keys(response.content).length }}
30
+ {{ var responseKey = Object.keys(response.content)[0]; }}
31
+ {{ var responseSchema = response.content[responseKey].schema; }}
32
+ {{ var enums = []; }}
33
+ {{ var blocks = data.utils.schemaToArray(responseSchema,0,{trim:true,join:true},data); }}
34
+ {{ for (var block of blocks) {
35
+ for (var p of block.rows) {
36
+ if (p.schema && p.schema.enum) {
37
+ for (var e of p.schema.enum) {
38
+ enums.push({name:p.name,value:e});
39
+ }
40
+ }
41
+ }
42
+ }
43
+ }}
44
+
45
+ Status Code **{{=response.status||''}}**
46
+
47
+ {{ data.response = response; }}
48
+
49
+ {{?}}
50
+ {{?}}
51
+ {{~}}
52
+ {{?}}
53
+ {{ data.responseHeaders = data.utils.getResponseHeaders(data); }}
54
+ {{? data.responseHeaders.length }}
55
+ ### Response Headers
56
+ |Status|Header|Type|Format|Description|
57
+ |---|---|---|---|---|
58
+ {{~ data.responseHeaders :h}}|{{=h.status}}|{{=h.header}}|{{=h.type}}|{{=h.format||''}}|{{=h.description||'none'}}|
59
+ {{~}}
60
+ {{?}}
61
+ {{= data.tags.endSection }}
@@ -117,6 +117,8 @@ nav:
117
117
  - Environment Variables: guide/env.md
118
118
  - Namespaces: guide/namespaces.md
119
119
  - Local DB Migrations: guide/db_migrations.md
120
+ - 🔗 Studio:
121
+ - API: studio/api/index.md
120
122
  - 🤝 Contributing: contributing.md
121
123
 
122
124
  - DataChain Website ↗: https://datachain.ai" target="_blank"
@@ -161,6 +163,8 @@ extra:
161
163
  - icon: fontawesome/brands/linkedin
162
164
  link: https://www.linkedin.com/company/dvc-ai
163
165
 
166
+ hooks:
167
+ - docs/api_hooks.py
164
168
  plugins:
165
169
  - search
166
170
  - section-index
@@ -144,19 +144,26 @@ def shutdown_process(
144
144
  return proc.wait()
145
145
 
146
146
 
147
- def _process_stream(stream: "IO[bytes]", callback: Callable[[str], None]) -> None:
147
+ def process_output(stream: IO[bytes], callback: Callable[[str], None]) -> None:
148
148
  buffer = b""
149
- while byt := stream.read(1): # Read one byte at a time
150
- buffer += byt
151
149
 
152
- if byt in (b"\n", b"\r"): # Check for newline or carriage return
153
- line = buffer.decode("utf-8")
154
- callback(line)
155
- buffer = b"" # Clear buffer for next line
150
+ try:
151
+ while byt := stream.read(1): # Read one byte at a time
152
+ buffer += byt
156
153
 
157
- if buffer: # Handle any remaining data in the buffer
158
- line = buffer.decode("utf-8")
159
- callback(line)
154
+ if byt in (b"\n", b"\r"): # Check for newline or carriage return
155
+ line = buffer.decode("utf-8", errors="replace")
156
+ callback(line)
157
+ buffer = b"" # Clear buffer for the next line
158
+
159
+ if buffer: # Handle any remaining data in the buffer
160
+ line = buffer.decode("utf-8", errors="replace")
161
+ callback(line)
162
+ finally:
163
+ try:
164
+ stream.close() # Ensure output is closed
165
+ except Exception: # noqa: BLE001, S110
166
+ pass
160
167
 
161
168
 
162
169
  class DatasetRowsFetcher(NodesThreadPool):
@@ -1760,13 +1767,13 @@ class Catalog:
1760
1767
  recursive=recursive,
1761
1768
  )
1762
1769
 
1770
+ @staticmethod
1763
1771
  def query(
1764
- self,
1765
1772
  query_script: str,
1766
1773
  env: Optional[Mapping[str, str]] = None,
1767
1774
  python_executable: str = sys.executable,
1768
- capture_output: bool = False,
1769
- output_hook: Callable[[str], None] = noop,
1775
+ stdout_callback: Optional[Callable[[str], None]] = None,
1776
+ stderr_callback: Optional[Callable[[str], None]] = None,
1770
1777
  params: Optional[dict[str, str]] = None,
1771
1778
  job_id: Optional[str] = None,
1772
1779
  interrupt_timeout: Optional[int] = None,
@@ -1781,13 +1788,18 @@ class Catalog:
1781
1788
  },
1782
1789
  )
1783
1790
  popen_kwargs: dict[str, Any] = {}
1784
- if capture_output:
1785
- popen_kwargs = {"stdout": subprocess.PIPE, "stderr": subprocess.STDOUT}
1791
+
1792
+ if stdout_callback is not None:
1793
+ popen_kwargs = {"stdout": subprocess.PIPE}
1794
+ if stderr_callback is not None:
1795
+ popen_kwargs["stderr"] = subprocess.PIPE
1786
1796
 
1787
1797
  def raise_termination_signal(sig: int, _: Any) -> NoReturn:
1788
1798
  raise TerminationSignal(sig)
1789
1799
 
1790
- thread: Optional[Thread] = None
1800
+ stdout_thread: Optional[Thread] = None
1801
+ stderr_thread: Optional[Thread] = None
1802
+
1791
1803
  with subprocess.Popen(cmd, env=env, **popen_kwargs) as proc: # noqa: S603
1792
1804
  logger.info("Starting process %s", proc.pid)
1793
1805
 
@@ -1801,10 +1813,20 @@ class Catalog:
1801
1813
  orig_sigterm_handler = signal.getsignal(signal.SIGTERM)
1802
1814
  signal.signal(signal.SIGTERM, raise_termination_signal)
1803
1815
  try:
1804
- if capture_output:
1805
- args = (proc.stdout, output_hook)
1806
- thread = Thread(target=_process_stream, args=args, daemon=True)
1807
- thread.start()
1816
+ if stdout_callback is not None:
1817
+ stdout_thread = Thread(
1818
+ target=process_output,
1819
+ args=(proc.stdout, stdout_callback),
1820
+ daemon=True,
1821
+ )
1822
+ stdout_thread.start()
1823
+ if stderr_callback is not None:
1824
+ stderr_thread = Thread(
1825
+ target=process_output,
1826
+ args=(proc.stderr, stderr_callback),
1827
+ daemon=True,
1828
+ )
1829
+ stderr_thread.start()
1808
1830
 
1809
1831
  proc.wait()
1810
1832
  except TerminationSignal as exc:
@@ -1822,8 +1844,22 @@ class Catalog:
1822
1844
  finally:
1823
1845
  signal.signal(signal.SIGTERM, orig_sigterm_handler)
1824
1846
  signal.signal(signal.SIGINT, orig_sigint_handler)
1825
- if thread:
1826
- thread.join() # wait for the reader thread
1847
+ # wait for the reader thread
1848
+ thread_join_timeout_seconds = 30
1849
+ if stdout_thread is not None:
1850
+ stdout_thread.join(timeout=thread_join_timeout_seconds)
1851
+ if stdout_thread.is_alive():
1852
+ logger.warning(
1853
+ "stdout thread is still alive after %s seconds",
1854
+ thread_join_timeout_seconds,
1855
+ )
1856
+ if stderr_thread is not None:
1857
+ stderr_thread.join(timeout=thread_join_timeout_seconds)
1858
+ if stderr_thread.is_alive():
1859
+ logger.warning(
1860
+ "stderr thread is still alive after %s seconds",
1861
+ thread_join_timeout_seconds,
1862
+ )
1827
1863
 
1828
1864
  logger.info("Process %s exited with return code %s", proc.pid, proc.returncode)
1829
1865
  if proc.returncode in (
@@ -44,6 +44,7 @@ FETCH_WORKERS = 100
44
44
  DELIMITER = "/" # Path delimiter.
45
45
 
46
46
  DATA_SOURCE_URI_PATTERN = re.compile(r"^[\w]+:\/\/.*$")
47
+ CLOUD_STORAGE_PROTOCOLS = {"s3", "gs", "az", "hf"}
47
48
 
48
49
  ResultQueue = asyncio.Queue[Optional[Sequence["File"]]]
49
50
 
@@ -62,6 +63,16 @@ def _is_win_local_path(uri: str) -> bool:
62
63
  return False
63
64
 
64
65
 
66
+ def is_cloud_uri(uri: str) -> bool:
67
+ protocol = urlparse(uri).scheme
68
+ return protocol in CLOUD_STORAGE_PROTOCOLS
69
+
70
+
71
+ def get_cloud_schemes() -> list[str]:
72
+ """Get list of cloud storage scheme prefixes."""
73
+ return [f"{p}://" for p in CLOUD_STORAGE_PROTOCOLS]
74
+
75
+
65
76
  class Bucket(NamedTuple):
66
77
  name: str
67
78
  uri: "StorageURI"
@@ -3,6 +3,13 @@ from collections.abc import Sequence
3
3
  from functools import reduce
4
4
  from typing import TYPE_CHECKING, Optional, Union
5
5
 
6
+ from datachain.lib.dc.storage_pattern import (
7
+ apply_glob_filter,
8
+ expand_brace_pattern,
9
+ should_use_recursion,
10
+ split_uri_pattern,
11
+ validate_cloud_bucket_name,
12
+ )
6
13
  from datachain.lib.file import FileType, get_file_type
7
14
  from datachain.lib.listing import get_file_info, get_listing, list_bucket, ls
8
15
  from datachain.query import Session
@@ -38,14 +45,18 @@ def read_storage(
38
45
  It returns the chain itself as usual.
39
46
 
40
47
  Parameters:
41
- uri: storage URI with directory or list of URIs.
42
- URIs must start with storage prefix such
43
- as `s3://`, `gs://`, `az://` or "file:///"
48
+ uri: Storage path(s) or URI(s). Can be a local path or start with a
49
+ storage prefix like `s3://`, `gs://`, `az://`, `hf://` or "file:///".
50
+ Supports glob patterns:
51
+ - `*` : wildcard
52
+ - `**` : recursive wildcard
53
+ - `?` : single character
54
+ - `{a,b}` : brace expansion
44
55
  type: read file as "binary", "text", or "image" data. Default is "binary".
45
56
  recursive: search recursively for the given path.
46
- column: Created column name.
57
+ column: Column name that will contain File objects. Default is "file".
47
58
  update: force storage reindexing. Default is False.
48
- anon: If True, we will treat cloud bucket as public one
59
+ anon: If True, we will treat cloud bucket as public one.
49
60
  client_config: Optional client configuration for the storage client.
50
61
  delta: If True, only process new or changed files instead of reprocessing
51
62
  everything. This saves time by skipping files that were already processed in
@@ -80,12 +91,19 @@ def read_storage(
80
91
  chain = dc.read_storage("s3://my-bucket/my-dir")
81
92
  ```
82
93
 
94
+ Match all .json files recursively using glob pattern
95
+ ```py
96
+ chain = dc.read_storage("gs://bucket/meta/**/*.json")
97
+ ```
98
+
99
+ Match image file extensions for directories with pattern
100
+ ```py
101
+ chain = dc.read_storage("s3://bucket/202?/**/*.{jpg,jpeg,png}")
102
+ ```
103
+
83
104
  Multiple URIs:
84
105
  ```python
85
- chain = dc.read_storage([
86
- "s3://bucket1/dir1",
87
- "s3://bucket2/dir2"
88
- ])
106
+ chain = dc.read_storage(["s3://my-bkt/dir1", "s3://bucket2/dir2/dir3"])
89
107
  ```
90
108
 
91
109
  With AWS S3-compatible storage:
@@ -95,19 +113,6 @@ def read_storage(
95
113
  client_config = {"aws_endpoint_url": "<minio-endpoint-url>"}
96
114
  )
97
115
  ```
98
-
99
- Pass existing session
100
- ```py
101
- session = Session.get()
102
- chain = dc.read_storage([
103
- "path/to/dir1",
104
- "path/to/dir2"
105
- ], session=session, recursive=True)
106
- ```
107
-
108
- Note:
109
- When using multiple URIs with `update=True`, the function optimizes by
110
- avoiding redundant updates for URIs pointing to the same storage location.
111
116
  """
112
117
  from .datachain import DataChain
113
118
  from .datasets import read_dataset
@@ -130,13 +135,36 @@ def read_storage(
130
135
  if not uris:
131
136
  raise ValueError("No URIs provided")
132
137
 
138
+ # Then expand all URIs that contain brace patterns
139
+ expanded_uris = []
140
+ for single_uri in uris:
141
+ uri_str = str(single_uri)
142
+ validate_cloud_bucket_name(uri_str)
143
+ expanded_uris.extend(expand_brace_pattern(uri_str))
144
+
145
+ # Now process each expanded URI
133
146
  chains = []
134
147
  listed_ds_name = set()
135
148
  file_values = []
136
149
 
137
- for single_uri in uris:
150
+ updated_uris = set()
151
+
152
+ for single_uri in expanded_uris:
153
+ # Check if URI contains glob patterns and split them
154
+ base_uri, glob_pattern = split_uri_pattern(single_uri)
155
+
156
+ # If a pattern is found, use the base_uri for listing
157
+ # The pattern will be used for filtering later
158
+ list_uri_to_use = base_uri if glob_pattern else single_uri
159
+
160
+ # Avoid double updates for the same URI
161
+ update_single_uri = False
162
+ if update and (list_uri_to_use not in updated_uris):
163
+ updated_uris.add(list_uri_to_use)
164
+ update_single_uri = True
165
+
138
166
  list_ds_name, list_uri, list_path, list_ds_exists = get_listing(
139
- single_uri, session, update=update
167
+ list_uri_to_use, session, update=update_single_uri
140
168
  )
141
169
 
142
170
  # list_ds_name is None if object is a file, we don't want to use cache
@@ -185,7 +213,21 @@ def read_storage(
185
213
  lambda ds_name=list_ds_name, lst_uri=list_uri: lst_fn(ds_name, lst_uri)
186
214
  )
187
215
 
188
- chains.append(ls(dc, list_path, recursive=recursive, column=column))
216
+ # If a glob pattern was detected, use it for filtering
217
+ # Otherwise, use the original list_path from get_listing
218
+ if glob_pattern:
219
+ # Determine if we should use recursive listing based on the pattern
220
+ use_recursive = should_use_recursion(glob_pattern, recursive or False)
221
+
222
+ # Apply glob filter - no need for brace expansion here as it's done above
223
+ chain = apply_glob_filter(
224
+ dc, glob_pattern, list_path, use_recursive, column
225
+ )
226
+ chains.append(chain)
227
+ else:
228
+ # No glob pattern detected, use normal ls behavior
229
+ chains.append(ls(dc, list_path, recursive=recursive, column=column))
230
+
189
231
  listed_ds_name.add(list_ds_name)
190
232
 
191
233
  storage_chain = None if not chains else reduce(lambda x, y: x.union(y), chains)