datachain 0.31.4__tar.gz → 0.32.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (431) hide show
  1. {datachain-0.31.4 → datachain-0.32.1}/PKG-INFO +11 -23
  2. {datachain-0.31.4 → datachain-0.32.1}/README.rst +10 -22
  3. {datachain-0.31.4 → datachain-0.32.1}/docs/api_hooks.py +7 -0
  4. datachain-0.32.1/docs/assets/webhook_dialog.png +0 -0
  5. datachain-0.32.1/docs/assets/webhook_list.png +0 -0
  6. {datachain-0.31.4 → datachain-0.32.1}/docs/guide/namespaces.md +23 -0
  7. {datachain-0.31.4 → datachain-0.32.1}/docs/references/datachain.md +2 -0
  8. datachain-0.32.1/docs/studio/webhooks.md +265 -0
  9. {datachain-0.31.4 → datachain-0.32.1}/mkdocs.yml +1 -0
  10. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/__init__.py +2 -0
  11. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/data_storage/metastore.py +79 -15
  12. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/error.py +8 -0
  13. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/convert/python_to_sql.py +18 -4
  14. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/namespaces.py +56 -2
  15. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/projects.py +47 -1
  16. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/namespace.py +19 -0
  17. {datachain-0.31.4 → datachain-0.32.1}/src/datachain.egg-info/PKG-INFO +11 -23
  18. {datachain-0.31.4 → datachain-0.32.1}/src/datachain.egg-info/SOURCES.txt +3 -0
  19. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_namespace.py +45 -0
  20. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_project.py +60 -0
  21. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_python_to_sql.py +19 -0
  22. {datachain-0.31.4 → datachain-0.32.1}/.cruft.json +0 -0
  23. {datachain-0.31.4 → datachain-0.32.1}/.gitattributes +0 -0
  24. {datachain-0.31.4 → datachain-0.32.1}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  25. {datachain-0.31.4 → datachain-0.32.1}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  26. {datachain-0.31.4 → datachain-0.32.1}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  27. {datachain-0.31.4 → datachain-0.32.1}/.github/codecov.yaml +0 -0
  28. {datachain-0.31.4 → datachain-0.32.1}/.github/dependabot.yml +0 -0
  29. {datachain-0.31.4 → datachain-0.32.1}/.github/workflows/benchmarks.yml +0 -0
  30. {datachain-0.31.4 → datachain-0.32.1}/.github/workflows/release.yml +0 -0
  31. {datachain-0.31.4 → datachain-0.32.1}/.github/workflows/tests-studio.yml +0 -0
  32. {datachain-0.31.4 → datachain-0.32.1}/.github/workflows/tests.yml +0 -0
  33. {datachain-0.31.4 → datachain-0.32.1}/.github/workflows/update-template.yaml +0 -0
  34. {datachain-0.31.4 → datachain-0.32.1}/.gitignore +0 -0
  35. {datachain-0.31.4 → datachain-0.32.1}/.pre-commit-config.yaml +0 -0
  36. {datachain-0.31.4 → datachain-0.32.1}/CODE_OF_CONDUCT.rst +0 -0
  37. {datachain-0.31.4 → datachain-0.32.1}/LICENSE +0 -0
  38. {datachain-0.31.4 → datachain-0.32.1}/docs/assets/captioned_cartoons.png +0 -0
  39. {datachain-0.31.4 → datachain-0.32.1}/docs/assets/datachain-white.svg +0 -0
  40. {datachain-0.31.4 → datachain-0.32.1}/docs/assets/datachain.svg +0 -0
  41. {datachain-0.31.4 → datachain-0.32.1}/docs/commands/auth/login.md +0 -0
  42. {datachain-0.31.4 → datachain-0.32.1}/docs/commands/auth/logout.md +0 -0
  43. {datachain-0.31.4 → datachain-0.32.1}/docs/commands/auth/team.md +0 -0
  44. {datachain-0.31.4 → datachain-0.32.1}/docs/commands/auth/token.md +0 -0
  45. {datachain-0.31.4 → datachain-0.32.1}/docs/commands/index.md +0 -0
  46. {datachain-0.31.4 → datachain-0.32.1}/docs/commands/job/cancel.md +0 -0
  47. {datachain-0.31.4 → datachain-0.32.1}/docs/commands/job/clusters.md +0 -0
  48. {datachain-0.31.4 → datachain-0.32.1}/docs/commands/job/logs.md +0 -0
  49. {datachain-0.31.4 → datachain-0.32.1}/docs/commands/job/ls.md +0 -0
  50. {datachain-0.31.4 → datachain-0.32.1}/docs/commands/job/run.md +0 -0
  51. {datachain-0.31.4 → datachain-0.32.1}/docs/contributing.md +0 -0
  52. {datachain-0.31.4 → datachain-0.32.1}/docs/css/github-permalink-style.css +0 -0
  53. {datachain-0.31.4 → datachain-0.32.1}/docs/examples.md +0 -0
  54. {datachain-0.31.4 → datachain-0.32.1}/docs/guide/db_migrations.md +0 -0
  55. {datachain-0.31.4 → datachain-0.32.1}/docs/guide/delta.md +0 -0
  56. {datachain-0.31.4 → datachain-0.32.1}/docs/guide/env.md +0 -0
  57. {datachain-0.31.4 → datachain-0.32.1}/docs/guide/index.md +0 -0
  58. {datachain-0.31.4 → datachain-0.32.1}/docs/guide/processing.md +0 -0
  59. {datachain-0.31.4 → datachain-0.32.1}/docs/guide/remotes.md +0 -0
  60. {datachain-0.31.4 → datachain-0.32.1}/docs/guide/retry.md +0 -0
  61. {datachain-0.31.4 → datachain-0.32.1}/docs/index.md +0 -0
  62. {datachain-0.31.4 → datachain-0.32.1}/docs/overrides/main.html +0 -0
  63. {datachain-0.31.4 → datachain-0.32.1}/docs/quick-start.md +0 -0
  64. {datachain-0.31.4 → datachain-0.32.1}/docs/references/data-types/arrowrow.md +0 -0
  65. {datachain-0.31.4 → datachain-0.32.1}/docs/references/data-types/bbox.md +0 -0
  66. {datachain-0.31.4 → datachain-0.32.1}/docs/references/data-types/file.md +0 -0
  67. {datachain-0.31.4 → datachain-0.32.1}/docs/references/data-types/imagefile.md +0 -0
  68. {datachain-0.31.4 → datachain-0.32.1}/docs/references/data-types/index.md +0 -0
  69. {datachain-0.31.4 → datachain-0.32.1}/docs/references/data-types/pose.md +0 -0
  70. {datachain-0.31.4 → datachain-0.32.1}/docs/references/data-types/segment.md +0 -0
  71. {datachain-0.31.4 → datachain-0.32.1}/docs/references/data-types/tarvfile.md +0 -0
  72. {datachain-0.31.4 → datachain-0.32.1}/docs/references/data-types/textfile.md +0 -0
  73. {datachain-0.31.4 → datachain-0.32.1}/docs/references/data-types/videofile.md +0 -0
  74. {datachain-0.31.4 → datachain-0.32.1}/docs/references/func.md +0 -0
  75. {datachain-0.31.4 → datachain-0.32.1}/docs/references/functions/aggregate.md +0 -0
  76. {datachain-0.31.4 → datachain-0.32.1}/docs/references/functions/array.md +0 -0
  77. {datachain-0.31.4 → datachain-0.32.1}/docs/references/functions/conditional.md +0 -0
  78. {datachain-0.31.4 → datachain-0.32.1}/docs/references/functions/numeric.md +0 -0
  79. {datachain-0.31.4 → datachain-0.32.1}/docs/references/functions/path.md +0 -0
  80. {datachain-0.31.4 → datachain-0.32.1}/docs/references/functions/random.md +0 -0
  81. {datachain-0.31.4 → datachain-0.32.1}/docs/references/functions/string.md +0 -0
  82. {datachain-0.31.4 → datachain-0.32.1}/docs/references/functions/window.md +0 -0
  83. {datachain-0.31.4 → datachain-0.32.1}/docs/references/index.md +0 -0
  84. {datachain-0.31.4 → datachain-0.32.1}/docs/references/toolkit.md +0 -0
  85. {datachain-0.31.4 → datachain-0.32.1}/docs/references/torch.md +0 -0
  86. {datachain-0.31.4 → datachain-0.32.1}/docs/references/udf.md +0 -0
  87. {datachain-0.31.4 → datachain-0.32.1}/docs/studio/api/.gitkeep +0 -0
  88. {datachain-0.31.4 → datachain-0.32.1}/docs/templates/main.dot +0 -0
  89. {datachain-0.31.4 → datachain-0.32.1}/docs/templates/operation.dot +0 -0
  90. {datachain-0.31.4 → datachain-0.32.1}/docs/templates/responses.def +0 -0
  91. {datachain-0.31.4 → datachain-0.32.1}/docs/tutorials.md +0 -0
  92. {datachain-0.31.4 → datachain-0.32.1}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  93. {datachain-0.31.4 → datachain-0.32.1}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  94. {datachain-0.31.4 → datachain-0.32.1}/examples/computer_vision/openimage-detect.py +0 -0
  95. {datachain-0.31.4 → datachain-0.32.1}/examples/computer_vision/ultralytics-bbox.py +0 -0
  96. {datachain-0.31.4 → datachain-0.32.1}/examples/computer_vision/ultralytics-pose.py +0 -0
  97. {datachain-0.31.4 → datachain-0.32.1}/examples/computer_vision/ultralytics-segment.py +0 -0
  98. {datachain-0.31.4 → datachain-0.32.1}/examples/get_started/common_sql_functions.py +0 -0
  99. {datachain-0.31.4 → datachain-0.32.1}/examples/get_started/json-csv-reader.py +0 -0
  100. {datachain-0.31.4 → datachain-0.32.1}/examples/get_started/nested_datamodel.py +0 -0
  101. {datachain-0.31.4 → datachain-0.32.1}/examples/get_started/torch-loader.py +0 -0
  102. {datachain-0.31.4 → datachain-0.32.1}/examples/get_started/udfs/parallel.py +0 -0
  103. {datachain-0.31.4 → datachain-0.32.1}/examples/get_started/udfs/simple.py +0 -0
  104. {datachain-0.31.4 → datachain-0.32.1}/examples/get_started/udfs/stateful.py +0 -0
  105. {datachain-0.31.4 → datachain-0.32.1}/examples/incremental_processing/delta.py +0 -0
  106. {datachain-0.31.4 → datachain-0.32.1}/examples/incremental_processing/retry.py +0 -0
  107. {datachain-0.31.4 → datachain-0.32.1}/examples/incremental_processing/utils.py +0 -0
  108. {datachain-0.31.4 → datachain-0.32.1}/examples/llm_and_nlp/claude-query.py +0 -0
  109. {datachain-0.31.4 → datachain-0.32.1}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  110. {datachain-0.31.4 → datachain-0.32.1}/examples/multimodal/audio-to-text.py +0 -0
  111. {datachain-0.31.4 → datachain-0.32.1}/examples/multimodal/clip_inference.py +0 -0
  112. {datachain-0.31.4 → datachain-0.32.1}/examples/multimodal/hf_pipeline.py +0 -0
  113. {datachain-0.31.4 → datachain-0.32.1}/examples/multimodal/openai_image_desc_lib.py +0 -0
  114. {datachain-0.31.4 → datachain-0.32.1}/examples/multimodal/wds.py +0 -0
  115. {datachain-0.31.4 → datachain-0.32.1}/examples/multimodal/wds_filtered.py +0 -0
  116. {datachain-0.31.4 → datachain-0.32.1}/noxfile.py +0 -0
  117. {datachain-0.31.4 → datachain-0.32.1}/pyproject.toml +0 -0
  118. {datachain-0.31.4 → datachain-0.32.1}/setup.cfg +0 -0
  119. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/__main__.py +0 -0
  120. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/asyn.py +0 -0
  121. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cache.py +0 -0
  122. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/catalog/__init__.py +0 -0
  123. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/catalog/catalog.py +0 -0
  124. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/catalog/datasource.py +0 -0
  125. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/catalog/loader.py +0 -0
  126. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/__init__.py +0 -0
  127. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/commands/__init__.py +0 -0
  128. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/commands/datasets.py +0 -0
  129. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/commands/du.py +0 -0
  130. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/commands/index.py +0 -0
  131. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/commands/ls.py +0 -0
  132. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/commands/misc.py +0 -0
  133. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/commands/query.py +0 -0
  134. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/commands/show.py +0 -0
  135. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/parser/__init__.py +0 -0
  136. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/parser/job.py +0 -0
  137. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/parser/studio.py +0 -0
  138. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/parser/utils.py +0 -0
  139. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/utils.py +0 -0
  140. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/client/__init__.py +0 -0
  141. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/client/azure.py +0 -0
  142. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/client/fileslice.py +0 -0
  143. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/client/fsspec.py +0 -0
  144. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/client/gcs.py +0 -0
  145. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/client/hf.py +0 -0
  146. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/client/local.py +0 -0
  147. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/client/s3.py +0 -0
  148. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/config.py +0 -0
  149. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/data_storage/__init__.py +0 -0
  150. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/data_storage/db_engine.py +0 -0
  151. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/data_storage/job.py +0 -0
  152. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/data_storage/schema.py +0 -0
  153. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/data_storage/serializer.py +0 -0
  154. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/data_storage/sqlite.py +0 -0
  155. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/data_storage/warehouse.py +0 -0
  156. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/dataset.py +0 -0
  157. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/delta.py +0 -0
  158. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/diff/__init__.py +0 -0
  159. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/fs/__init__.py +0 -0
  160. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/fs/reference.py +0 -0
  161. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/fs/utils.py +0 -0
  162. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/func/__init__.py +0 -0
  163. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/func/aggregate.py +0 -0
  164. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/func/array.py +0 -0
  165. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/func/base.py +0 -0
  166. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/func/conditional.py +0 -0
  167. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/func/func.py +0 -0
  168. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/func/numeric.py +0 -0
  169. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/func/path.py +0 -0
  170. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/func/random.py +0 -0
  171. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/func/string.py +0 -0
  172. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/func/window.py +0 -0
  173. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/job.py +0 -0
  174. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/__init__.py +0 -0
  175. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/arrow.py +0 -0
  176. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/audio.py +0 -0
  177. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/clip.py +0 -0
  178. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/convert/__init__.py +0 -0
  179. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/convert/flatten.py +0 -0
  180. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/convert/sql_to_python.py +0 -0
  181. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/convert/unflatten.py +0 -0
  182. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  183. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/data_model.py +0 -0
  184. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dataset_info.py +0 -0
  185. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/__init__.py +0 -0
  186. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/csv.py +0 -0
  187. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/database.py +0 -0
  188. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/datachain.py +0 -0
  189. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/datasets.py +0 -0
  190. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/hf.py +0 -0
  191. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/json.py +0 -0
  192. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/listings.py +0 -0
  193. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/pandas.py +0 -0
  194. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/parquet.py +0 -0
  195. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/records.py +0 -0
  196. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/storage.py +0 -0
  197. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/storage_pattern.py +0 -0
  198. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/utils.py +0 -0
  199. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/values.py +0 -0
  200. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/file.py +0 -0
  201. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/hf.py +0 -0
  202. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/image.py +0 -0
  203. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/listing.py +0 -0
  204. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/listing_info.py +0 -0
  205. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/meta_formats.py +0 -0
  206. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/model_store.py +0 -0
  207. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/pytorch.py +0 -0
  208. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/settings.py +0 -0
  209. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/signal_schema.py +0 -0
  210. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/tar.py +0 -0
  211. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/text.py +0 -0
  212. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/udf.py +0 -0
  213. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/udf_signature.py +0 -0
  214. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/utils.py +0 -0
  215. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/video.py +0 -0
  216. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/webdataset.py +0 -0
  217. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/webdataset_laion.py +0 -0
  218. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/listing.py +0 -0
  219. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/model/__init__.py +0 -0
  220. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/model/bbox.py +0 -0
  221. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/model/pose.py +0 -0
  222. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/model/segment.py +0 -0
  223. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/model/ultralytics/__init__.py +0 -0
  224. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/model/ultralytics/bbox.py +0 -0
  225. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/model/ultralytics/pose.py +0 -0
  226. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/model/ultralytics/segment.py +0 -0
  227. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/model/utils.py +0 -0
  228. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/node.py +0 -0
  229. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/nodes_fetcher.py +0 -0
  230. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/nodes_thread_pool.py +0 -0
  231. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/progress.py +0 -0
  232. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/project.py +0 -0
  233. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/py.typed +0 -0
  234. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/query/__init__.py +0 -0
  235. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/query/batch.py +0 -0
  236. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/query/dataset.py +0 -0
  237. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/query/dispatch.py +0 -0
  238. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/query/metrics.py +0 -0
  239. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/query/params.py +0 -0
  240. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/query/queue.py +0 -0
  241. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/query/schema.py +0 -0
  242. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/query/session.py +0 -0
  243. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/query/udf.py +0 -0
  244. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/query/utils.py +0 -0
  245. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/remote/__init__.py +0 -0
  246. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/remote/studio.py +0 -0
  247. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/script_meta.py +0 -0
  248. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/semver.py +0 -0
  249. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/__init__.py +0 -0
  250. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/default/__init__.py +0 -0
  251. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/default/base.py +0 -0
  252. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/functions/__init__.py +0 -0
  253. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/functions/aggregate.py +0 -0
  254. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/functions/array.py +0 -0
  255. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/functions/conditional.py +0 -0
  256. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/functions/numeric.py +0 -0
  257. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/functions/path.py +0 -0
  258. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/functions/random.py +0 -0
  259. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/functions/string.py +0 -0
  260. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/postgresql_dialect.py +0 -0
  261. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/postgresql_types.py +0 -0
  262. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/selectable.py +0 -0
  263. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/sqlite/__init__.py +0 -0
  264. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/sqlite/base.py +0 -0
  265. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/sqlite/types.py +0 -0
  266. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/sqlite/vector.py +0 -0
  267. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/types.py +0 -0
  268. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/utils.py +0 -0
  269. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/studio.py +0 -0
  270. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/telemetry.py +0 -0
  271. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/toolkit/__init__.py +0 -0
  272. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/toolkit/split.py +0 -0
  273. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/torch/__init__.py +0 -0
  274. {datachain-0.31.4 → datachain-0.32.1}/src/datachain/utils.py +0 -0
  275. {datachain-0.31.4 → datachain-0.32.1}/src/datachain.egg-info/dependency_links.txt +0 -0
  276. {datachain-0.31.4 → datachain-0.32.1}/src/datachain.egg-info/entry_points.txt +0 -0
  277. {datachain-0.31.4 → datachain-0.32.1}/src/datachain.egg-info/requires.txt +0 -0
  278. {datachain-0.31.4 → datachain-0.32.1}/src/datachain.egg-info/top_level.txt +0 -0
  279. {datachain-0.31.4 → datachain-0.32.1}/tests/__init__.py +0 -0
  280. {datachain-0.31.4 → datachain-0.32.1}/tests/benchmarks/__init__.py +0 -0
  281. {datachain-0.31.4 → datachain-0.32.1}/tests/benchmarks/conftest.py +0 -0
  282. {datachain-0.31.4 → datachain-0.32.1}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  283. {datachain-0.31.4 → datachain-0.32.1}/tests/benchmarks/datasets/.dvc/config +0 -0
  284. {datachain-0.31.4 → datachain-0.32.1}/tests/benchmarks/datasets/.gitignore +0 -0
  285. {datachain-0.31.4 → datachain-0.32.1}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  286. {datachain-0.31.4 → datachain-0.32.1}/tests/benchmarks/test_datachain.py +0 -0
  287. {datachain-0.31.4 → datachain-0.32.1}/tests/benchmarks/test_ls.py +0 -0
  288. {datachain-0.31.4 → datachain-0.32.1}/tests/benchmarks/test_version.py +0 -0
  289. {datachain-0.31.4 → datachain-0.32.1}/tests/conftest.py +0 -0
  290. {datachain-0.31.4 → datachain-0.32.1}/tests/data.py +0 -0
  291. {datachain-0.31.4 → datachain-0.32.1}/tests/examples/__init__.py +0 -0
  292. {datachain-0.31.4 → datachain-0.32.1}/tests/examples/test_examples.py +0 -0
  293. {datachain-0.31.4 → datachain-0.32.1}/tests/examples/test_wds_e2e.py +0 -0
  294. {datachain-0.31.4 → datachain-0.32.1}/tests/examples/wds_data.py +0 -0
  295. {datachain-0.31.4 → datachain-0.32.1}/tests/func/__init__.py +0 -0
  296. {datachain-0.31.4 → datachain-0.32.1}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  297. {datachain-0.31.4 → datachain-0.32.1}/tests/func/data/lena.jpg +0 -0
  298. {datachain-0.31.4 → datachain-0.32.1}/tests/func/fake-service-account-credentials.json +0 -0
  299. {datachain-0.31.4 → datachain-0.32.1}/tests/func/functions/__init__.py +0 -0
  300. {datachain-0.31.4 → datachain-0.32.1}/tests/func/functions/test_aggregate.py +0 -0
  301. {datachain-0.31.4 → datachain-0.32.1}/tests/func/functions/test_array.py +0 -0
  302. {datachain-0.31.4 → datachain-0.32.1}/tests/func/functions/test_conditional.py +0 -0
  303. {datachain-0.31.4 → datachain-0.32.1}/tests/func/functions/test_numeric.py +0 -0
  304. {datachain-0.31.4 → datachain-0.32.1}/tests/func/functions/test_path.py +0 -0
  305. {datachain-0.31.4 → datachain-0.32.1}/tests/func/functions/test_random.py +0 -0
  306. {datachain-0.31.4 → datachain-0.32.1}/tests/func/functions/test_string.py +0 -0
  307. {datachain-0.31.4 → datachain-0.32.1}/tests/func/model/__init__.py +0 -0
  308. {datachain-0.31.4 → datachain-0.32.1}/tests/func/model/data/running-mask0.png +0 -0
  309. {datachain-0.31.4 → datachain-0.32.1}/tests/func/model/data/running-mask1.png +0 -0
  310. {datachain-0.31.4 → datachain-0.32.1}/tests/func/model/data/running.jpg +0 -0
  311. {datachain-0.31.4 → datachain-0.32.1}/tests/func/model/data/ships.jpg +0 -0
  312. {datachain-0.31.4 → datachain-0.32.1}/tests/func/model/test_yolo.py +0 -0
  313. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_audio.py +0 -0
  314. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_batching.py +0 -0
  315. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_catalog.py +0 -0
  316. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_client.py +0 -0
  317. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_cloud_transfer.py +0 -0
  318. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_data_storage.py +0 -0
  319. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_datachain.py +0 -0
  320. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_datachain_merge.py +0 -0
  321. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_dataset_query.py +0 -0
  322. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_datasets.py +0 -0
  323. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_delta.py +0 -0
  324. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_feature_pickling.py +0 -0
  325. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_file.py +0 -0
  326. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_hf.py +0 -0
  327. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_hidden_field.py +0 -0
  328. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_image.py +0 -0
  329. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_listing.py +0 -0
  330. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_ls.py +0 -0
  331. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_meta_formats.py +0 -0
  332. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_metastore.py +0 -0
  333. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_metrics.py +0 -0
  334. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_mutate.py +0 -0
  335. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_pull.py +0 -0
  336. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_pytorch.py +0 -0
  337. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_query.py +0 -0
  338. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_read_database.py +0 -0
  339. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_read_dataset_remote.py +0 -0
  340. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  341. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_retry.py +0 -0
  342. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_session.py +0 -0
  343. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_storage_pattern.py +0 -0
  344. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_studio_datetime_parsing.py +0 -0
  345. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_to_database.py +0 -0
  346. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_toolkit.py +0 -0
  347. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_video.py +0 -0
  348. {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_warehouse.py +0 -0
  349. {datachain-0.31.4 → datachain-0.32.1}/tests/scripts/feature_class.py +0 -0
  350. {datachain-0.31.4 → datachain-0.32.1}/tests/scripts/feature_class_exception.py +0 -0
  351. {datachain-0.31.4 → datachain-0.32.1}/tests/scripts/feature_class_parallel.py +0 -0
  352. {datachain-0.31.4 → datachain-0.32.1}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  353. {datachain-0.31.4 → datachain-0.32.1}/tests/scripts/name_len_slow.py +0 -0
  354. {datachain-0.31.4 → datachain-0.32.1}/tests/test_atomicity.py +0 -0
  355. {datachain-0.31.4 → datachain-0.32.1}/tests/test_cli_e2e.py +0 -0
  356. {datachain-0.31.4 → datachain-0.32.1}/tests/test_cli_studio.py +0 -0
  357. {datachain-0.31.4 → datachain-0.32.1}/tests/test_import_time.py +0 -0
  358. {datachain-0.31.4 → datachain-0.32.1}/tests/test_query_e2e.py +0 -0
  359. {datachain-0.31.4 → datachain-0.32.1}/tests/test_telemetry.py +0 -0
  360. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/__init__.py +0 -0
  361. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/__init__.py +0 -0
  362. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/conftest.py +0 -0
  363. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_arrow.py +0 -0
  364. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_audio.py +0 -0
  365. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_clip.py +0 -0
  366. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_datachain.py +0 -0
  367. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  368. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_datachain_merge.py +0 -0
  369. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_diff.py +0 -0
  370. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_feature.py +0 -0
  371. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_feature_utils.py +0 -0
  372. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_file.py +0 -0
  373. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_hf.py +0 -0
  374. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_image.py +0 -0
  375. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_listing_info.py +0 -0
  376. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_partition_by.py +0 -0
  377. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_schema.py +0 -0
  378. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_settings.py +0 -0
  379. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_signal_schema.py +0 -0
  380. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_sql_to_python.py +0 -0
  381. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_storage_pattern.py +0 -0
  382. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_text.py +0 -0
  383. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_udf.py +0 -0
  384. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_udf_signature.py +0 -0
  385. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_utils.py +0 -0
  386. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_webdataset.py +0 -0
  387. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/model/__init__.py +0 -0
  388. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/model/test_bbox.py +0 -0
  389. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/model/test_pose.py +0 -0
  390. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/model/test_segment.py +0 -0
  391. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/model/test_utils.py +0 -0
  392. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/sql/__init__.py +0 -0
  393. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/sql/sqlite/__init__.py +0 -0
  394. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/sql/sqlite/test_types.py +0 -0
  395. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/sql/sqlite/test_utils.py +0 -0
  396. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/sql/test_array.py +0 -0
  397. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/sql/test_conditional.py +0 -0
  398. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/sql/test_path.py +0 -0
  399. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/sql/test_random.py +0 -0
  400. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/sql/test_selectable.py +0 -0
  401. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/sql/test_string.py +0 -0
  402. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_asyn.py +0 -0
  403. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_cache.py +0 -0
  404. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_catalog.py +0 -0
  405. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_catalog_loader.py +0 -0
  406. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_cli_datasets.py +0 -0
  407. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_cli_parsing.py +0 -0
  408. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_client.py +0 -0
  409. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_client_gcs.py +0 -0
  410. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_client_s3.py +0 -0
  411. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_config.py +0 -0
  412. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_data_storage.py +0 -0
  413. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_database_engine.py +0 -0
  414. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_dataset.py +0 -0
  415. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_dispatch.py +0 -0
  416. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_fileslice.py +0 -0
  417. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_func.py +0 -0
  418. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_listing.py +0 -0
  419. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_metastore.py +0 -0
  420. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_module_exports.py +0 -0
  421. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_pytorch.py +0 -0
  422. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_query.py +0 -0
  423. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_query_metrics.py +0 -0
  424. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_query_params.py +0 -0
  425. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_script_meta.py +0 -0
  426. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_semver.py +0 -0
  427. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_serializer.py +0 -0
  428. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_session.py +0 -0
  429. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_utils.py +0 -0
  430. {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_warehouse.py +0 -0
  431. {datachain-0.31.4 → datachain-0.32.1}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.31.4
3
+ Version: 0.32.1
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -210,45 +210,33 @@ datasets that evolve over time and may occasionally have processing errors.
210
210
  .. code:: py
211
211
 
212
212
  import datachain as dc
213
- from datachain import C, File
214
213
 
215
- def process_file(file: File):
216
- """Process a file, which may occasionally fail."""
214
+ def process_file(file: dc.File) -> tuple[str, str, str]:
215
+ """Analyze a file, may occasionally fail."""
217
216
  try:
218
217
  # Your processing logic here
219
218
  content = file.read_text()
220
- result = analyze_content(content)
221
- return {
222
- "content": content,
223
- "result": result,
224
- "error": None # No error
225
- }
219
+ result = content.upper()
220
+ return content, result, "" # No error
226
221
  except Exception as e:
227
222
  # Return an error that will trigger reprocessing next time
228
- return {
229
- "content": None,
230
- "result": None,
231
- "error": str(e) # Error field will trigger retry
232
- }
223
+ return "", "", str(e) # Error field will trigger retry
233
224
 
234
225
  # Process files efficiently with delta and retry
226
+ # Run it many times, keep adding files, to see delta and retry in action
235
227
  chain = (
236
228
  dc.read_storage(
237
229
  "data/",
238
230
  update=True,
239
231
  delta=True, # Process only new/changed files
240
232
  delta_on="file.path", # Identify files by path
241
- retry_on="error" # Field that indicates errors
233
+ delta_retry="error", # Process files with error again
242
234
  )
243
- .map(processed_result=process_file)
244
- .mutate(
245
- content=C("processed_result.content"),
246
- result=C("processed_result.result"),
247
- error=C("processed_result.error")
248
- )
249
- .save(name="processed_data")
235
+ .map(process_file, output=("content", "result", "error"))
236
+ .save("processed-data")
250
237
  )
251
238
 
239
+
252
240
  Example: LLM based text-file evaluation
253
241
  ---------------------------------------
254
242
 
@@ -89,45 +89,33 @@ datasets that evolve over time and may occasionally have processing errors.
89
89
  .. code:: py
90
90
 
91
91
  import datachain as dc
92
- from datachain import C, File
93
92
 
94
- def process_file(file: File):
95
- """Process a file, which may occasionally fail."""
93
+ def process_file(file: dc.File) -> tuple[str, str, str]:
94
+ """Analyze a file, may occasionally fail."""
96
95
  try:
97
96
  # Your processing logic here
98
97
  content = file.read_text()
99
- result = analyze_content(content)
100
- return {
101
- "content": content,
102
- "result": result,
103
- "error": None # No error
104
- }
98
+ result = content.upper()
99
+ return content, result, "" # No error
105
100
  except Exception as e:
106
101
  # Return an error that will trigger reprocessing next time
107
- return {
108
- "content": None,
109
- "result": None,
110
- "error": str(e) # Error field will trigger retry
111
- }
102
+ return "", "", str(e) # Error field will trigger retry
112
103
 
113
104
  # Process files efficiently with delta and retry
105
+ # Run it many times, keep adding files, to see delta and retry in action
114
106
  chain = (
115
107
  dc.read_storage(
116
108
  "data/",
117
109
  update=True,
118
110
  delta=True, # Process only new/changed files
119
111
  delta_on="file.path", # Identify files by path
120
- retry_on="error" # Field that indicates errors
112
+ delta_retry="error", # Process files with error again
121
113
  )
122
- .map(processed_result=process_file)
123
- .mutate(
124
- content=C("processed_result.content"),
125
- result=C("processed_result.result"),
126
- error=C("processed_result.error")
127
- )
128
- .save(name="processed_data")
114
+ .map(process_file, output=("content", "result", "error"))
115
+ .save("processed-data")
129
116
  )
130
117
 
118
+
131
119
  Example: LLM based text-file evaluation
132
120
  ---------------------------------------
133
121
 
@@ -4,6 +4,13 @@ def on_pre_build(**kwargs):
4
4
 
5
5
  import requests
6
6
 
7
+ # Skip if files already exist
8
+ if os.path.exists("docs/openapi.json") and os.path.exists(
9
+ "docs/studio/api/index.md"
10
+ ):
11
+ print("API docs already exist, skipping generation")
12
+ return
13
+
7
14
  # Download OpenAPI spec
8
15
  response = requests.get(
9
16
  "https://studio.datachain.ai/api/openapi.json",
@@ -159,3 +159,26 @@ dc.read_values(scores=[0.8, 1.5, 2.1]).save("metrics")
159
159
 
160
160
  ds = dc.read_dataset("local.local.metrics")
161
161
  ds.show()
162
+ ```
163
+
164
+ ## Removing Namespaces and Projects
165
+
166
+ Use `delete_namespace` to remove an empty namespace or an empty project within a namespace. Delete will fail if the target is not empty.
167
+
168
+ ### Signature
169
+
170
+ ```python
171
+ def delete_namespace(name: str, session: Optional[Session]) -> None:
172
+ ```
173
+
174
+ - **`<namespace>`** — deletes the namespace (must contain no projects or datasets).
175
+ - **`<namespace>.<project>`** — deletes the project (must contain no datasets).
176
+
177
+ ### Examples
178
+
179
+ ```python
180
+ import datachain as dc
181
+
182
+ dc.delete_namespace("dev.my-project") # delete project
183
+ dc.delete_namespace("dev") # delete namespace
184
+ ```
@@ -19,6 +19,8 @@ for examples of how to create a chain.
19
19
 
20
20
  ::: datachain.lib.dc.datasets.move_dataset
21
21
 
22
+ ::: datachain.lib.namespaces.delete_namespace
23
+
22
24
  ::: datachain.lib.dc.hf.read_hf
23
25
 
24
26
  ::: datachain.lib.dc.json.read_json
@@ -0,0 +1,265 @@
1
+ # Webhooks in Studio
2
+
3
+ ## About webhooks
4
+
5
+ Webhooks provide a way for the notifications to be delivered to an external web server whenever certain events occur in [Studio](https://studio.datachain.ai). With webhooks, you can set a setting once that you want to hear about certain events or activities .
6
+
7
+ When you create a webhook, you specify a URL, and necessary information you want us to send to along with the events that you want to listen on Datachain. When the event occurs, Datachain Studio will send an HTTP request with the data about the event to the URL that you specified. If your server is setup to listen for webhook deliveries at that URL, it can take action when it receives one.
8
+
9
+ For example, you can subscribe your webhook to events that occur when a job is created, is complete, is failed, is running, and so on. You can then monitor whenever a job is failed through this webhook.
10
+
11
+ ### Alternative
12
+ As opposed to webhooks, you can also use [CLI command](../commands/index.md) to get the job information or some of our available [API endpoints](api/index.md) but webhook requires less effort than polling an API since it allows near real time updates.
13
+
14
+ ## Available event type
15
+ As of now, your server can receive two different types of events.
16
+
17
+ ### JOB
18
+
19
+ Whenever any job is created or any status is changed to the job, you will receive the JOB webhook event. The payload you get with the job webhook is as:
20
+
21
+ Header: `http-x-datachain-event`: `JOB`
22
+
23
+ Payload:
24
+ ```json
25
+ {
26
+ "action": "job_status",
27
+ "job": {
28
+ "id": "da59df47-d121-4eb6-aa76-dc452755544e",
29
+ "status": "COMPLETE",
30
+ "error_message": "",
31
+ "created_at": "2021-07-27T16:02:08.070557",
32
+ "updated_at": "2021-07-27T16:22:08.070557",
33
+ "finished_at": "2021-07-27T16:22:08.070557",
34
+ "url": "https://studio.datachain.ai/team/TeamName/datasets/jobs/da59df47-d121-4eb6-aa76-dc452755544e"
35
+ },
36
+ "timestamp": "2021-07-27T16:22:08.070557",
37
+ }
38
+ ```
39
+
40
+ ### PING
41
+ Whenever you add your webhook to your team, Studio sends a PING event to check the delivery to the server. You can check the recent deliveries to check if the webhook is successfully connected.
42
+
43
+ Header: `http-x-datachain-event`: `PING`.
44
+
45
+ Payload:
46
+ ```json
47
+ {
48
+ "action": "PING"
49
+ }
50
+ ```
51
+
52
+
53
+ ## Creating webhooks
54
+
55
+ You should have admin access to a team to create the webhooks in the team. To create a webhook, go to settings for the team and under the section Webhooks, click on Add new Webhook.
56
+ ![Webhook Settings](../assets/webhook_list.png)
57
+
58
+ Enter the necessary information to create the webhooks.
59
+
60
+ - **URL:** Enter the valid URL where you’d like to receive the webhook payload in
61
+ - **Secret:** A string to use as a secret key. You should choose a random string of text with high entropy. You can use the webhook secret to [validate incoming requests](#validating-webhook-deliveries) to those only originating from Datachain Studio.
62
+ - **Events:** Under events, select the events you would like to trigger the webhook.
63
+ - **JOB:**
64
+ - CREATED: When a job is created but not yet scheduled to run
65
+ - SCHEDULED: Job has been scheduled to run
66
+ - QUEUED: Job is in the queue waiting to be processed
67
+ - INIT: Job is initializing (starting up)
68
+ - RUNNING: When a job starts running
69
+ - COMPLETE: Job has completed successfully
70
+ - FAILED: Job failed with error
71
+ - CANCELED: Job has been canceled successfully
72
+ - CANCELING: Job has been scheduled to cancel
73
+ - TASK: A scheduled task is created.
74
+
75
+ - SSL Verification: By default, we verify SSL certificates when delivering payloads. SSL verification helps ensure that hook payloads are delivered to your URL endpoint securely, keeping your data away from prying eyes. Disabling this option is **not recommended**.
76
+ - HTTP Method: By default, we make a post request, but you can specify other http method if necessary.
77
+ - Content Type: Optionally, select the data format you want to receive the webhook payload in
78
+ - **application/json** will deliver the JSON payload directly as the body of the `POST` request.
79
+ - **application/x-www-form-urlencoded** will send the JSON payload as a form parameter called `payload`.
80
+
81
+ ![Add webhook](../assets/webhook_dialog.png)
82
+
83
+
84
+ ## Handling webhook deliveries
85
+
86
+ When you create a webhook, you specify a URL and subscribe to event types. When any event that your webhook is subscribed to occurs, Datachain Studio will send an HTTP request with the data about the event to the event that you specified. If your server is setup at that URL, it can take action when it receives one.
87
+
88
+ ### Setup
89
+
90
+ In order to test your webhook locally, you can use a webhook proxy URL to forward the webhooks from Studio to your computer or codespace. We are using [smee.io](http://smee.io) to provide a webhook proxy url and forward webhooks.
91
+
92
+ 1. Go to [smee.io](http://smee.io)
93
+ 2. Start a new channel
94
+ 3. Copy the full URL under the webhook proxy URL. We will use this URL in the following setup steps.
95
+ 4. Install smee-client if it is not already installed using `npm install --global smee-client`
96
+ 5. To receive forwarded webhooks from smee.io, run the following command in your terminal. Replace the `WEBHOOK_PROXY_URL` with your webhook proxy URL from earlier.
97
+
98
+ ```bash
99
+ smee --path /webhook --port 3000 --url WEBHOOK_PROXY_URL
100
+ ```
101
+
102
+ 6. Keep this running while you test out your webhook. When you want to stop forwarding the webhooks, enter Ctrl + C
103
+ 7. Create webhook using the step as mentioned above or edit the one if you already have with the url from earlier.
104
+ 8. Write code to handle webhook deliveries
105
+ 1. Initialize your server to listen for requests to your webhook URL
106
+ 2. Read HTTP headers and body from request
107
+ 3. Take desired action in response to the request.
108
+
109
+ You can use any programming languages that you can to run on your server.
110
+
111
+ ### Example Code
112
+
113
+ #### Python
114
+
115
+ This example uses the Python and Flask libraries to handle the routes and HTTP requests.
116
+
117
+ To use this you must install flask library in your project. For example:
118
+
119
+ ```bash
120
+ pip install Flask
121
+ ```
122
+
123
+ Create a python file with following contents. Modify the code to handle only the event types that your webhook is subscribed to as well as the ping event that Studio sends when you create a webhook. This example handles job, dataset and ping events.
124
+
125
+ ```python
126
+ # You installed the `flask` library earlier.
127
+ from flask import Flask, request
128
+
129
+ # This defines the port where your server should listen.
130
+ # 3000 matches the port that you specified for webhook forwarding.
131
+ #
132
+ # Once you deploy your code to a server,
133
+ # Change this to match the port where your server is listening.
134
+ port = 3000
135
+ secret = "secretString"
136
+
137
+ # This initializes a new Flask application.
138
+ app = Flask(__name__)
139
+
140
+ # This defines a POST route at the `/webhook` path.
141
+ # It matches the path you specified for the smee.io forwarding.
142
+ #
143
+ # Once you deploy your code to a server and update your webhook URL,
144
+ # Change this to match the path portion of the URL for your webhook.
145
+ @app.route('/webhook', methods=['POST'])
146
+ def webhook():
147
+ # Respond to indicate that delivery was successfully received.
148
+ # Your server should respond with a 2XX response
149
+ # within 10 seconds of receiving a webhook delivery.
150
+ # If your server takes longer than that to respond,
151
+ # then Studio terminates the connection.
152
+
153
+ # Check `http-x-datachain-event` header for the event type.
154
+ datachain_event = request.headers.get('http-x-datachain-event')
155
+
156
+ # You should add logic to handle each event type
157
+ # that your webhook is subscribed to.
158
+ # For example, this code handles the `JOB` and `PING` events.
159
+ if datachain_event == 'JOB':
160
+ data = request.get_json()
161
+ action = data.get('action')
162
+ if action == 'job_status':
163
+ print(
164
+ f"Job status for job {data['job']['id']} was" \
165
+ " changed to {data['job']['status']}"
166
+ )
167
+ else:
168
+ print(f"Unhandled action for the job event: {action}")
169
+ elif datachain_event == 'PING':
170
+ print('Ping event received')
171
+ else:
172
+ print(f"Unhandled event: {datachain_event}")
173
+
174
+ return '', 202 # 202 Accepted status code
175
+
176
+ # This starts the server.
177
+ if __name__ == '__main__':
178
+ app.run(host='0.0.0.0', port=port, debug=True)
179
+ print(f"Server is running on port {port}")
180
+
181
+ ```
182
+
183
+ To test the code, run the file using `python FILENAME`. Make sure that you are forwarding the webhooks in a separate terminal.
184
+
185
+ When you run a job in Studio, you will see some similar response as below:
186
+
187
+ ```prolog
188
+ Ping event received
189
+ Job status for job a852ee4a-091a-456f-ba1a-c809f7e804f3 was changed to CREATED
190
+ Job status for job a852ee4a-091a-456f-ba1a-c809f7e804f3 was changed to SCHEDULED
191
+ Job status for job a852ee4a-091a-456f-ba1a-c809f7e804f3 was changed to QUEUED
192
+ Job status for job a852ee4a-091a-456f-ba1a-c809f7e804f3 was changed to INIT
193
+ Job status for job a852ee4a-091a-456f-ba1a-c809f7e804f3 was changed to RUNNING
194
+ Job status for job a852ee4a-091a-456f-ba1a-c809f7e804f3 was changed to COMPLETE
195
+ ```
196
+
197
+ ## Validating webhook deliveries
198
+
199
+ Once your server is configured to receive payloads, it will listen for any delivery that’s sent to the endpoint you configured. To ensure that your server only processes webhook deliveries that were sent by Datachain Studio and to ensure that the delivery was not tampered with, you should validate webhook signature before processing the delivery further.
200
+
201
+ Studio will use the secret you added when creating your webhook to create a hash signature that’s sent to you with each payload. The hash signature will appear in each delivery as the value of `X-datachain-signature-256` header.
202
+
203
+ In your code that handles webhook deliveries, you should calculate a hash using your secret token and compare the hash Studio sent with the expected hash that you calculate and ensure they match.
204
+
205
+ Notes:
206
+
207
+ - Studio uses HMAC hex digest to compute the hash
208
+ - The hash signature always starts with `sha256=`
209
+ - The hash signature is generated using webhook’s secret token and payload contents.
210
+ - Never use a plain `==` operator. Instead consider using a method like [`secure_compare`](https://www.rubydoc.info/gems/rack/Rack%2FUtils:secure_compare) or [`crypto.timingSafeEqual`](https://nodejs.org/api/crypto.html#cryptotimingsafeequala-b), which performs a "constant time" string comparison to help mitigate certain timing attacks against regular equality operators, or regular loops in JIT-optimized languages.
211
+
212
+ Updating the example above:
213
+
214
+ ```python
215
+ import hashlib
216
+ import hmac
217
+ from flask import abort
218
+
219
+ def verify_signature(payload_body, secret_token, signature_header):
220
+ """Verify the payload was sent from Studio by validating SHA256.
221
+
222
+ Raise and return 403 if not authorized.
223
+
224
+ Args:
225
+ payload_body: request body to verify (request.body())
226
+ secret_token: Studio webhook token (WEBHOOK_SECRET)
227
+ signature_header: header (x-datachain-signature-256)
228
+ """
229
+ if not signature_header:
230
+ abort(403, "X-datachain-signature-256 is missing!")
231
+ hash_object = hmac.new(
232
+ secret_token.encode('utf-8'),
233
+ msg=payload_body,
234
+ digestmod=hashlib.sha256
235
+ )
236
+ expected_signature = "sha256=" + hash_object.hexdigest()
237
+ if not hmac.compare_digest(
238
+ expected_signature, signature_header
239
+ ):
240
+ abort(403, "Request signatures didn't match!")
241
+ ```
242
+
243
+ Add the following call in the api receiver.
244
+
245
+ ```python
246
+ # Get the signature header
247
+ signature = request.headers.get('X-Datachain-Signature-256')
248
+
249
+ # Re-enable signature verification with improved JSON handling
250
+ if signature:
251
+ verify_signature(request.get_data(), secret, signature)
252
+ else:
253
+ print("Warning: No signature header found")
254
+ ```
255
+
256
+ ## Best practices for using Webhooks
257
+
258
+ 1. You should only subscribe to the webhook events that you need. This will reduce the amount of work your server needs to do.
259
+ 2. The webhook secret should be a random string of text with high entropy. You should securely store your webhook secret in a way that your server can access.
260
+ 3. You should ensure that your server uses an HTTPS connection. By default, Studio will verify SSL certificates when delivering webhooks. Studio recommends that you leave SSL verification enabled.
261
+ 4. Your server should respond with a 2XX response within 10 seconds of receiving a webhook delivery. If your server takes longer than that to respond, then Studio terminates the connection and considers the delivery a failure.
262
+ 5. Check the event header and action type before processing the event.
263
+ 6. Make sure the endpoints are idempotent meaning if multiple requests for same event is received, the server should handle this.
264
+ 7. Datachain Studio may deliver webhooks in a different order than the order in which the events took place. If you need to know when the event occurred relative to another event, you should use the timestamps that are included in the delivery payload.
265
+ 8. Consecutive 10 failures to webhook will disable the webhook deliveries.
@@ -119,6 +119,7 @@ nav:
119
119
  - Local DB Migrations: guide/db_migrations.md
120
120
  - 🔗 Studio:
121
121
  - API: studio/api/index.md
122
+ - Webhooks: studio/webhooks.md
122
123
  - 🤝 Contributing: contributing.md
123
124
 
124
125
  - DataChain Website ↗: https://datachain.ai" target="_blank"
@@ -37,6 +37,7 @@ from datachain.lib.file import (
37
37
  VideoFrame,
38
38
  )
39
39
  from datachain.lib.model_store import ModelStore
40
+ from datachain.lib.namespaces import delete_namespace
40
41
  from datachain.lib.projects import create as create_project
41
42
  from datachain.lib.udf import Aggregator, Generator, Mapper
42
43
  from datachain.lib.utils import AbstractUDF, DataChainError
@@ -74,6 +75,7 @@ __all__ = [
74
75
  "create_project",
75
76
  "datasets",
76
77
  "delete_dataset",
78
+ "delete_namespace",
77
79
  "is_chain_type",
78
80
  "is_studio",
79
81
  "listings",
@@ -22,6 +22,7 @@ from sqlalchemy import (
22
22
  UniqueConstraint,
23
23
  select,
24
24
  )
25
+ from sqlalchemy.sql import func as f
25
26
 
26
27
  from datachain.data_storage import JobQueryType, JobStatus
27
28
  from datachain.data_storage.serializer import Serializable
@@ -37,7 +38,9 @@ from datachain.dataset import (
37
38
  from datachain.error import (
38
39
  DatasetNotFoundError,
39
40
  DatasetVersionNotFoundError,
41
+ NamespaceDeleteNotAllowedError,
40
42
  NamespaceNotFoundError,
43
+ ProjectDeleteNotAllowedError,
41
44
  ProjectNotFoundError,
42
45
  TableMissingError,
43
46
  )
@@ -141,6 +144,10 @@ class AbstractMetastore(ABC, Serializable):
141
144
  def get_namespace(self, name: str, conn=None) -> Namespace:
142
145
  """Gets a single namespace by name"""
143
146
 
147
+ @abstractmethod
148
+ def remove_namespace(self, namespace_id: int, conn=None) -> None:
149
+ """Removes a single namespace by id"""
150
+
144
151
  @abstractmethod
145
152
  def list_namespaces(self, conn=None) -> list[Namespace]:
146
153
  """Gets a list of all namespaces"""
@@ -190,10 +197,30 @@ class AbstractMetastore(ABC, Serializable):
190
197
  It also creates project if not found and create flag is set to True.
191
198
  """
192
199
 
200
+ def is_default_project(self, project_name: str, namespace_name: str) -> bool:
201
+ return (
202
+ project_name == self.default_project_name
203
+ and namespace_name == self.default_namespace_name
204
+ )
205
+
206
+ def is_listing_project(self, project_name: str, namespace_name: str) -> bool:
207
+ return (
208
+ project_name == self.listing_project_name
209
+ and namespace_name == self.system_namespace_name
210
+ )
211
+
193
212
  @abstractmethod
194
213
  def get_project_by_id(self, project_id: int, conn=None) -> Project:
195
214
  """Gets a single project by id"""
196
215
 
216
+ @abstractmethod
217
+ def count_projects(self, namespace_id: Optional[int] = None) -> int:
218
+ """Counts projects in some namespace or in general."""
219
+
220
+ @abstractmethod
221
+ def remove_project(self, project_id: int, conn=None) -> None:
222
+ """Removes a single project by id"""
223
+
197
224
  @abstractmethod
198
225
  def list_projects(self, namespace_id: Optional[int], conn=None) -> list[Project]:
199
226
  """Gets list of projects in some namespace or in general (in all namespaces)"""
@@ -270,6 +297,10 @@ class AbstractMetastore(ABC, Serializable):
270
297
  ) -> Iterator[DatasetListRecord]:
271
298
  """Lists all datasets in some project or in all projects."""
272
299
 
300
+ @abstractmethod
301
+ def count_datasets(self, project_id: Optional[int] = None) -> int:
302
+ """Counts datasets in some project or in all projects."""
303
+
273
304
  @abstractmethod
274
305
  def list_datasets_by_prefix(
275
306
  self, prefix: str, project_id: Optional[int] = None
@@ -735,6 +766,18 @@ class AbstractDBMetastore(AbstractMetastore):
735
766
 
736
767
  return self.get_namespace(name)
737
768
 
769
+ def remove_namespace(self, namespace_id: int, conn=None) -> None:
770
+ num_projects = self.count_projects(namespace_id)
771
+ if num_projects > 0:
772
+ raise NamespaceDeleteNotAllowedError(
773
+ f"Namespace cannot be removed. It contains {num_projects} project(s). "
774
+ "Please remove the project(s) first."
775
+ )
776
+
777
+ n = self._namespaces
778
+ with self.db.transaction():
779
+ self.db.execute(self._namespaces_delete().where(n.c.id == namespace_id))
780
+
738
781
  def get_namespace(self, name: str, conn=None) -> Namespace:
739
782
  """Gets a single namespace by name"""
740
783
  n = self._namespaces
@@ -796,18 +839,6 @@ class AbstractDBMetastore(AbstractMetastore):
796
839
 
797
840
  return self.get_project(name, namespace.name)
798
841
 
799
- def _is_listing_project(self, project_name: str, namespace_name: str) -> bool:
800
- return (
801
- project_name == self.listing_project_name
802
- and namespace_name == self.system_namespace_name
803
- )
804
-
805
- def _is_default_project(self, project_name: str, namespace_name: str) -> bool:
806
- return (
807
- project_name == self.default_project_name
808
- and namespace_name == self.default_namespace_name
809
- )
810
-
811
842
  def get_project(
812
843
  self, name: str, namespace_name: str, create: bool = False, conn=None
813
844
  ) -> Project:
@@ -816,7 +847,7 @@ class AbstractDBMetastore(AbstractMetastore):
816
847
  p = self._projects
817
848
  validate = True
818
849
 
819
- if self._is_listing_project(name, namespace_name) or self._is_default_project(
850
+ if self.is_listing_project(name, namespace_name) or self.is_default_project(
820
851
  name, namespace_name
821
852
  ):
822
853
  # we are always creating default and listing projects if they don't exist
@@ -858,7 +889,31 @@ class AbstractDBMetastore(AbstractMetastore):
858
889
  raise ProjectNotFoundError(f"Project with id {project_id} not found.")
859
890
  return self.project_class.parse(*rows[0])
860
891
 
861
- def list_projects(self, namespace_id: Optional[int], conn=None) -> list[Project]:
892
+ def count_projects(self, namespace_id: Optional[int] = None) -> int:
893
+ p = self._projects
894
+ query = self._projects_select()
895
+ if namespace_id:
896
+ query = query.where(p.c.namespace_id == namespace_id)
897
+
898
+ query = select(f.count(1)).select_from(query.subquery())
899
+
900
+ return next(self.db.execute(query))[0]
901
+
902
+ def remove_project(self, project_id: int, conn=None) -> None:
903
+ num_datasets = self.count_datasets(project_id)
904
+ if num_datasets > 0:
905
+ raise ProjectDeleteNotAllowedError(
906
+ f"Project cannot be removed. It contains {num_datasets} dataset(s). "
907
+ "Please remove the dataset(s) first."
908
+ )
909
+
910
+ p = self._projects
911
+ with self.db.transaction():
912
+ self.db.execute(self._projects_delete().where(p.c.id == project_id))
913
+
914
+ def list_projects(
915
+ self, namespace_id: Optional[int] = None, conn=None
916
+ ) -> list[Project]:
862
917
  """
863
918
  Gets a list of projects inside some namespace, or in all namespaces
864
919
  """
@@ -1189,7 +1244,6 @@ class AbstractDBMetastore(AbstractMetastore):
1189
1244
  def list_datasets(
1190
1245
  self, project_id: Optional[int] = None
1191
1246
  ) -> Iterator["DatasetListRecord"]:
1192
- """Lists all datasets."""
1193
1247
  d = self._datasets
1194
1248
  query = self._base_list_datasets_query().order_by(
1195
1249
  self._datasets.c.name, self._datasets_versions.c.version
@@ -1198,6 +1252,16 @@ class AbstractDBMetastore(AbstractMetastore):
1198
1252
  query = query.where(d.c.project_id == project_id)
1199
1253
  yield from self._parse_dataset_list(self.db.execute(query))
1200
1254
 
1255
+ def count_datasets(self, project_id: Optional[int] = None) -> int:
1256
+ d = self._datasets
1257
+ query = self._datasets_select()
1258
+ if project_id:
1259
+ query = query.where(d.c.project_id == project_id)
1260
+
1261
+ query = select(f.count(1)).select_from(query.subquery())
1262
+
1263
+ return next(self.db.execute(query))[0]
1264
+
1201
1265
  def list_datasets_by_prefix(
1202
1266
  self, prefix: str, project_id: Optional[int] = None, conn=None
1203
1267
  ) -> Iterator["DatasetListRecord"]:
@@ -34,6 +34,14 @@ class ProjectCreateNotAllowedError(NotAllowedError):
34
34
  pass
35
35
 
36
36
 
37
+ class ProjectDeleteNotAllowedError(NotAllowedError):
38
+ pass
39
+
40
+
41
+ class NamespaceDeleteNotAllowedError(NotAllowedError):
42
+ pass
43
+
44
+
37
45
  class ProjectNotFoundError(NotFoundError):
38
46
  pass
39
47