datachain 0.30.3__tar.gz → 0.30.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (420) hide show
  1. {datachain-0.30.3 → datachain-0.30.5}/.pre-commit-config.yaml +1 -1
  2. {datachain-0.30.3 → datachain-0.30.5}/PKG-INFO +3 -3
  3. datachain-0.30.5/examples/get_started/nested_datamodel.py +70 -0
  4. {datachain-0.30.3 → datachain-0.30.5}/pyproject.toml +2 -2
  5. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/__init__.py +2 -0
  6. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/catalog/__init__.py +2 -0
  7. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/catalog/catalog.py +14 -2
  8. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/catalog/loader.py +4 -2
  9. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/cli/__init__.py +1 -0
  10. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/cli/commands/datasets.py +16 -10
  11. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/data_storage/metastore.py +0 -21
  12. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/data_storage/sqlite.py +0 -4
  13. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/data_storage/warehouse.py +2 -2
  14. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/arrow.py +2 -2
  15. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/dc/__init__.py +2 -1
  16. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/dc/datachain.py +26 -18
  17. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/dc/datasets.py +3 -3
  18. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/dc/utils.py +5 -0
  19. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/model_store.py +12 -0
  20. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/namespaces.py +3 -1
  21. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/projects.py +3 -1
  22. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/signal_schema.py +28 -17
  23. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/query/dataset.py +22 -18
  24. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/query/dispatch.py +5 -0
  25. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/query/schema.py +4 -0
  26. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/sql/sqlite/base.py +12 -11
  27. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/sql/sqlite/types.py +8 -13
  28. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/sql/types.py +3 -3
  29. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/utils.py +8 -1
  30. {datachain-0.30.3 → datachain-0.30.5}/src/datachain.egg-info/PKG-INFO +3 -3
  31. {datachain-0.30.3 → datachain-0.30.5}/src/datachain.egg-info/SOURCES.txt +2 -0
  32. {datachain-0.30.3 → datachain-0.30.5}/src/datachain.egg-info/requires.txt +2 -2
  33. {datachain-0.30.3 → datachain-0.30.5}/tests/conftest.py +4 -32
  34. {datachain-0.30.3 → datachain-0.30.5}/tests/func/functions/test_array.py +82 -4
  35. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_catalog.py +2 -0
  36. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_data_storage.py +4 -4
  37. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_datachain.py +0 -70
  38. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_dataset_query.py +19 -6
  39. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_datasets.py +0 -1
  40. datachain-0.30.5/tests/func/test_mutate.py +284 -0
  41. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_pull.py +1 -0
  42. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_read_dataset_remote.py +10 -0
  43. {datachain-0.30.3 → datachain-0.30.5}/tests/test_cli_studio.py +1 -0
  44. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/test_datachain.py +41 -15
  45. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/test_namespace.py +2 -2
  46. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/test_project.py +1 -1
  47. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/test_signal_schema.py +4 -2
  48. datachain-0.30.5/tests/unit/sql/sqlite/test_types.py +40 -0
  49. {datachain-0.30.3 → datachain-0.30.5}/tests/utils.py +2 -14
  50. datachain-0.30.3/tests/unit/sql/sqlite/test_types.py +0 -19
  51. {datachain-0.30.3 → datachain-0.30.5}/.cruft.json +0 -0
  52. {datachain-0.30.3 → datachain-0.30.5}/.gitattributes +0 -0
  53. {datachain-0.30.3 → datachain-0.30.5}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  54. {datachain-0.30.3 → datachain-0.30.5}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  55. {datachain-0.30.3 → datachain-0.30.5}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  56. {datachain-0.30.3 → datachain-0.30.5}/.github/codecov.yaml +0 -0
  57. {datachain-0.30.3 → datachain-0.30.5}/.github/dependabot.yml +0 -0
  58. {datachain-0.30.3 → datachain-0.30.5}/.github/workflows/benchmarks.yml +0 -0
  59. {datachain-0.30.3 → datachain-0.30.5}/.github/workflows/release.yml +0 -0
  60. {datachain-0.30.3 → datachain-0.30.5}/.github/workflows/tests-studio.yml +0 -0
  61. {datachain-0.30.3 → datachain-0.30.5}/.github/workflows/tests.yml +0 -0
  62. {datachain-0.30.3 → datachain-0.30.5}/.github/workflows/update-template.yaml +0 -0
  63. {datachain-0.30.3 → datachain-0.30.5}/.gitignore +0 -0
  64. {datachain-0.30.3 → datachain-0.30.5}/CODE_OF_CONDUCT.rst +0 -0
  65. {datachain-0.30.3 → datachain-0.30.5}/LICENSE +0 -0
  66. {datachain-0.30.3 → datachain-0.30.5}/README.rst +0 -0
  67. {datachain-0.30.3 → datachain-0.30.5}/docs/assets/captioned_cartoons.png +0 -0
  68. {datachain-0.30.3 → datachain-0.30.5}/docs/assets/datachain-white.svg +0 -0
  69. {datachain-0.30.3 → datachain-0.30.5}/docs/assets/datachain.svg +0 -0
  70. {datachain-0.30.3 → datachain-0.30.5}/docs/commands/auth/login.md +0 -0
  71. {datachain-0.30.3 → datachain-0.30.5}/docs/commands/auth/logout.md +0 -0
  72. {datachain-0.30.3 → datachain-0.30.5}/docs/commands/auth/team.md +0 -0
  73. {datachain-0.30.3 → datachain-0.30.5}/docs/commands/auth/token.md +0 -0
  74. {datachain-0.30.3 → datachain-0.30.5}/docs/commands/index.md +0 -0
  75. {datachain-0.30.3 → datachain-0.30.5}/docs/commands/job/cancel.md +0 -0
  76. {datachain-0.30.3 → datachain-0.30.5}/docs/commands/job/clusters.md +0 -0
  77. {datachain-0.30.3 → datachain-0.30.5}/docs/commands/job/logs.md +0 -0
  78. {datachain-0.30.3 → datachain-0.30.5}/docs/commands/job/ls.md +0 -0
  79. {datachain-0.30.3 → datachain-0.30.5}/docs/commands/job/run.md +0 -0
  80. {datachain-0.30.3 → datachain-0.30.5}/docs/contributing.md +0 -0
  81. {datachain-0.30.3 → datachain-0.30.5}/docs/css/github-permalink-style.css +0 -0
  82. {datachain-0.30.3 → datachain-0.30.5}/docs/examples.md +0 -0
  83. {datachain-0.30.3 → datachain-0.30.5}/docs/guide/db_migrations.md +0 -0
  84. {datachain-0.30.3 → datachain-0.30.5}/docs/guide/delta.md +0 -0
  85. {datachain-0.30.3 → datachain-0.30.5}/docs/guide/env.md +0 -0
  86. {datachain-0.30.3 → datachain-0.30.5}/docs/guide/index.md +0 -0
  87. {datachain-0.30.3 → datachain-0.30.5}/docs/guide/namespaces.md +0 -0
  88. {datachain-0.30.3 → datachain-0.30.5}/docs/guide/processing.md +0 -0
  89. {datachain-0.30.3 → datachain-0.30.5}/docs/guide/remotes.md +0 -0
  90. {datachain-0.30.3 → datachain-0.30.5}/docs/guide/retry.md +0 -0
  91. {datachain-0.30.3 → datachain-0.30.5}/docs/index.md +0 -0
  92. {datachain-0.30.3 → datachain-0.30.5}/docs/overrides/main.html +0 -0
  93. {datachain-0.30.3 → datachain-0.30.5}/docs/quick-start.md +0 -0
  94. {datachain-0.30.3 → datachain-0.30.5}/docs/references/data-types/arrowrow.md +0 -0
  95. {datachain-0.30.3 → datachain-0.30.5}/docs/references/data-types/bbox.md +0 -0
  96. {datachain-0.30.3 → datachain-0.30.5}/docs/references/data-types/file.md +0 -0
  97. {datachain-0.30.3 → datachain-0.30.5}/docs/references/data-types/imagefile.md +0 -0
  98. {datachain-0.30.3 → datachain-0.30.5}/docs/references/data-types/index.md +0 -0
  99. {datachain-0.30.3 → datachain-0.30.5}/docs/references/data-types/pose.md +0 -0
  100. {datachain-0.30.3 → datachain-0.30.5}/docs/references/data-types/segment.md +0 -0
  101. {datachain-0.30.3 → datachain-0.30.5}/docs/references/data-types/tarvfile.md +0 -0
  102. {datachain-0.30.3 → datachain-0.30.5}/docs/references/data-types/textfile.md +0 -0
  103. {datachain-0.30.3 → datachain-0.30.5}/docs/references/data-types/videofile.md +0 -0
  104. {datachain-0.30.3 → datachain-0.30.5}/docs/references/datachain.md +0 -0
  105. {datachain-0.30.3 → datachain-0.30.5}/docs/references/func.md +0 -0
  106. {datachain-0.30.3 → datachain-0.30.5}/docs/references/functions/aggregate.md +0 -0
  107. {datachain-0.30.3 → datachain-0.30.5}/docs/references/functions/array.md +0 -0
  108. {datachain-0.30.3 → datachain-0.30.5}/docs/references/functions/conditional.md +0 -0
  109. {datachain-0.30.3 → datachain-0.30.5}/docs/references/functions/numeric.md +0 -0
  110. {datachain-0.30.3 → datachain-0.30.5}/docs/references/functions/path.md +0 -0
  111. {datachain-0.30.3 → datachain-0.30.5}/docs/references/functions/random.md +0 -0
  112. {datachain-0.30.3 → datachain-0.30.5}/docs/references/functions/string.md +0 -0
  113. {datachain-0.30.3 → datachain-0.30.5}/docs/references/functions/window.md +0 -0
  114. {datachain-0.30.3 → datachain-0.30.5}/docs/references/index.md +0 -0
  115. {datachain-0.30.3 → datachain-0.30.5}/docs/references/toolkit.md +0 -0
  116. {datachain-0.30.3 → datachain-0.30.5}/docs/references/torch.md +0 -0
  117. {datachain-0.30.3 → datachain-0.30.5}/docs/references/udf.md +0 -0
  118. {datachain-0.30.3 → datachain-0.30.5}/docs/tutorials.md +0 -0
  119. {datachain-0.30.3 → datachain-0.30.5}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  120. {datachain-0.30.3 → datachain-0.30.5}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  121. {datachain-0.30.3 → datachain-0.30.5}/examples/computer_vision/openimage-detect.py +0 -0
  122. {datachain-0.30.3 → datachain-0.30.5}/examples/computer_vision/ultralytics-bbox.py +0 -0
  123. {datachain-0.30.3 → datachain-0.30.5}/examples/computer_vision/ultralytics-pose.py +0 -0
  124. {datachain-0.30.3 → datachain-0.30.5}/examples/computer_vision/ultralytics-segment.py +0 -0
  125. {datachain-0.30.3 → datachain-0.30.5}/examples/get_started/common_sql_functions.py +0 -0
  126. {datachain-0.30.3 → datachain-0.30.5}/examples/get_started/json-csv-reader.py +0 -0
  127. {datachain-0.30.3 → datachain-0.30.5}/examples/get_started/torch-loader.py +0 -0
  128. {datachain-0.30.3 → datachain-0.30.5}/examples/get_started/udfs/parallel.py +0 -0
  129. {datachain-0.30.3 → datachain-0.30.5}/examples/get_started/udfs/simple.py +0 -0
  130. {datachain-0.30.3 → datachain-0.30.5}/examples/get_started/udfs/stateful.py +0 -0
  131. {datachain-0.30.3 → datachain-0.30.5}/examples/incremental_processing/delta.py +0 -0
  132. {datachain-0.30.3 → datachain-0.30.5}/examples/incremental_processing/retry.py +0 -0
  133. {datachain-0.30.3 → datachain-0.30.5}/examples/incremental_processing/utils.py +0 -0
  134. {datachain-0.30.3 → datachain-0.30.5}/examples/llm_and_nlp/claude-query.py +0 -0
  135. {datachain-0.30.3 → datachain-0.30.5}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  136. {datachain-0.30.3 → datachain-0.30.5}/examples/multimodal/audio-to-text.py +0 -0
  137. {datachain-0.30.3 → datachain-0.30.5}/examples/multimodal/clip_inference.py +0 -0
  138. {datachain-0.30.3 → datachain-0.30.5}/examples/multimodal/hf_pipeline.py +0 -0
  139. {datachain-0.30.3 → datachain-0.30.5}/examples/multimodal/openai_image_desc_lib.py +0 -0
  140. {datachain-0.30.3 → datachain-0.30.5}/examples/multimodal/wds.py +0 -0
  141. {datachain-0.30.3 → datachain-0.30.5}/examples/multimodal/wds_filtered.py +0 -0
  142. {datachain-0.30.3 → datachain-0.30.5}/mkdocs.yml +0 -0
  143. {datachain-0.30.3 → datachain-0.30.5}/noxfile.py +0 -0
  144. {datachain-0.30.3 → datachain-0.30.5}/setup.cfg +0 -0
  145. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/__main__.py +0 -0
  146. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/asyn.py +0 -0
  147. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/cache.py +0 -0
  148. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/catalog/datasource.py +0 -0
  149. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/cli/commands/__init__.py +0 -0
  150. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/cli/commands/du.py +0 -0
  151. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/cli/commands/index.py +0 -0
  152. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/cli/commands/ls.py +0 -0
  153. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/cli/commands/misc.py +0 -0
  154. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/cli/commands/query.py +0 -0
  155. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/cli/commands/show.py +0 -0
  156. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/cli/parser/__init__.py +0 -0
  157. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/cli/parser/job.py +0 -0
  158. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/cli/parser/studio.py +0 -0
  159. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/cli/parser/utils.py +0 -0
  160. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/cli/utils.py +0 -0
  161. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/client/__init__.py +0 -0
  162. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/client/azure.py +0 -0
  163. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/client/fileslice.py +0 -0
  164. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/client/fsspec.py +0 -0
  165. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/client/gcs.py +0 -0
  166. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/client/hf.py +0 -0
  167. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/client/local.py +0 -0
  168. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/client/s3.py +0 -0
  169. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/config.py +0 -0
  170. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/data_storage/__init__.py +0 -0
  171. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/data_storage/db_engine.py +0 -0
  172. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/data_storage/job.py +0 -0
  173. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/data_storage/schema.py +0 -0
  174. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/data_storage/serializer.py +0 -0
  175. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/dataset.py +0 -0
  176. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/delta.py +0 -0
  177. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/diff/__init__.py +0 -0
  178. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/error.py +0 -0
  179. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/fs/__init__.py +0 -0
  180. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/fs/reference.py +0 -0
  181. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/fs/utils.py +0 -0
  182. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/func/__init__.py +0 -0
  183. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/func/aggregate.py +0 -0
  184. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/func/array.py +0 -0
  185. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/func/base.py +0 -0
  186. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/func/conditional.py +0 -0
  187. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/func/func.py +0 -0
  188. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/func/numeric.py +0 -0
  189. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/func/path.py +0 -0
  190. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/func/random.py +0 -0
  191. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/func/string.py +0 -0
  192. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/func/window.py +0 -0
  193. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/job.py +0 -0
  194. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/__init__.py +0 -0
  195. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/audio.py +0 -0
  196. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/clip.py +0 -0
  197. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/convert/__init__.py +0 -0
  198. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/convert/flatten.py +0 -0
  199. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/convert/python_to_sql.py +0 -0
  200. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/convert/sql_to_python.py +0 -0
  201. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/convert/unflatten.py +0 -0
  202. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  203. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/data_model.py +0 -0
  204. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/dataset_info.py +0 -0
  205. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/dc/csv.py +0 -0
  206. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/dc/database.py +0 -0
  207. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/dc/hf.py +0 -0
  208. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/dc/json.py +0 -0
  209. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/dc/listings.py +0 -0
  210. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/dc/pandas.py +0 -0
  211. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/dc/parquet.py +0 -0
  212. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/dc/records.py +0 -0
  213. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/dc/storage.py +0 -0
  214. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/dc/values.py +0 -0
  215. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/file.py +0 -0
  216. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/hf.py +0 -0
  217. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/image.py +0 -0
  218. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/listing.py +0 -0
  219. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/listing_info.py +0 -0
  220. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/meta_formats.py +0 -0
  221. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/pytorch.py +0 -0
  222. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/settings.py +0 -0
  223. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/tar.py +0 -0
  224. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/text.py +0 -0
  225. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/udf.py +0 -0
  226. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/udf_signature.py +0 -0
  227. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/utils.py +0 -0
  228. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/video.py +0 -0
  229. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/webdataset.py +0 -0
  230. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/lib/webdataset_laion.py +0 -0
  231. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/listing.py +0 -0
  232. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/model/__init__.py +0 -0
  233. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/model/bbox.py +0 -0
  234. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/model/pose.py +0 -0
  235. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/model/segment.py +0 -0
  236. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/model/ultralytics/__init__.py +0 -0
  237. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/model/ultralytics/bbox.py +0 -0
  238. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/model/ultralytics/pose.py +0 -0
  239. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/model/ultralytics/segment.py +0 -0
  240. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/model/utils.py +0 -0
  241. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/namespace.py +0 -0
  242. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/node.py +0 -0
  243. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/nodes_fetcher.py +0 -0
  244. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/nodes_thread_pool.py +0 -0
  245. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/progress.py +0 -0
  246. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/project.py +0 -0
  247. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/py.typed +0 -0
  248. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/query/__init__.py +0 -0
  249. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/query/batch.py +0 -0
  250. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/query/metrics.py +0 -0
  251. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/query/params.py +0 -0
  252. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/query/queue.py +0 -0
  253. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/query/session.py +0 -0
  254. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/query/udf.py +0 -0
  255. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/query/utils.py +0 -0
  256. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/remote/__init__.py +0 -0
  257. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/remote/studio.py +0 -0
  258. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/script_meta.py +0 -0
  259. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/semver.py +0 -0
  260. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/sql/__init__.py +0 -0
  261. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/sql/default/__init__.py +0 -0
  262. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/sql/default/base.py +0 -0
  263. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/sql/functions/__init__.py +0 -0
  264. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/sql/functions/aggregate.py +0 -0
  265. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/sql/functions/array.py +0 -0
  266. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/sql/functions/conditional.py +0 -0
  267. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/sql/functions/numeric.py +0 -0
  268. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/sql/functions/path.py +0 -0
  269. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/sql/functions/random.py +0 -0
  270. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/sql/functions/string.py +0 -0
  271. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/sql/postgresql_dialect.py +0 -0
  272. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/sql/postgresql_types.py +0 -0
  273. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/sql/selectable.py +0 -0
  274. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/sql/sqlite/__init__.py +0 -0
  275. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/sql/sqlite/vector.py +0 -0
  276. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/sql/utils.py +0 -0
  277. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/studio.py +0 -0
  278. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/telemetry.py +0 -0
  279. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/toolkit/__init__.py +0 -0
  280. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/toolkit/split.py +0 -0
  281. {datachain-0.30.3 → datachain-0.30.5}/src/datachain/torch/__init__.py +0 -0
  282. {datachain-0.30.3 → datachain-0.30.5}/src/datachain.egg-info/dependency_links.txt +0 -0
  283. {datachain-0.30.3 → datachain-0.30.5}/src/datachain.egg-info/entry_points.txt +0 -0
  284. {datachain-0.30.3 → datachain-0.30.5}/src/datachain.egg-info/top_level.txt +0 -0
  285. {datachain-0.30.3 → datachain-0.30.5}/tests/__init__.py +0 -0
  286. {datachain-0.30.3 → datachain-0.30.5}/tests/benchmarks/__init__.py +0 -0
  287. {datachain-0.30.3 → datachain-0.30.5}/tests/benchmarks/conftest.py +0 -0
  288. {datachain-0.30.3 → datachain-0.30.5}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  289. {datachain-0.30.3 → datachain-0.30.5}/tests/benchmarks/datasets/.dvc/config +0 -0
  290. {datachain-0.30.3 → datachain-0.30.5}/tests/benchmarks/datasets/.gitignore +0 -0
  291. {datachain-0.30.3 → datachain-0.30.5}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  292. {datachain-0.30.3 → datachain-0.30.5}/tests/benchmarks/test_datachain.py +0 -0
  293. {datachain-0.30.3 → datachain-0.30.5}/tests/benchmarks/test_ls.py +0 -0
  294. {datachain-0.30.3 → datachain-0.30.5}/tests/benchmarks/test_version.py +0 -0
  295. {datachain-0.30.3 → datachain-0.30.5}/tests/data.py +0 -0
  296. {datachain-0.30.3 → datachain-0.30.5}/tests/examples/__init__.py +0 -0
  297. {datachain-0.30.3 → datachain-0.30.5}/tests/examples/test_examples.py +0 -0
  298. {datachain-0.30.3 → datachain-0.30.5}/tests/examples/test_wds_e2e.py +0 -0
  299. {datachain-0.30.3 → datachain-0.30.5}/tests/examples/wds_data.py +0 -0
  300. {datachain-0.30.3 → datachain-0.30.5}/tests/func/__init__.py +0 -0
  301. {datachain-0.30.3 → datachain-0.30.5}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  302. {datachain-0.30.3 → datachain-0.30.5}/tests/func/data/lena.jpg +0 -0
  303. {datachain-0.30.3 → datachain-0.30.5}/tests/func/fake-service-account-credentials.json +0 -0
  304. {datachain-0.30.3 → datachain-0.30.5}/tests/func/functions/__init__.py +0 -0
  305. {datachain-0.30.3 → datachain-0.30.5}/tests/func/functions/test_aggregate.py +0 -0
  306. {datachain-0.30.3 → datachain-0.30.5}/tests/func/functions/test_conditional.py +0 -0
  307. {datachain-0.30.3 → datachain-0.30.5}/tests/func/functions/test_numeric.py +0 -0
  308. {datachain-0.30.3 → datachain-0.30.5}/tests/func/functions/test_path.py +0 -0
  309. {datachain-0.30.3 → datachain-0.30.5}/tests/func/functions/test_random.py +0 -0
  310. {datachain-0.30.3 → datachain-0.30.5}/tests/func/functions/test_string.py +0 -0
  311. {datachain-0.30.3 → datachain-0.30.5}/tests/func/model/__init__.py +0 -0
  312. {datachain-0.30.3 → datachain-0.30.5}/tests/func/model/data/running-mask0.png +0 -0
  313. {datachain-0.30.3 → datachain-0.30.5}/tests/func/model/data/running-mask1.png +0 -0
  314. {datachain-0.30.3 → datachain-0.30.5}/tests/func/model/data/running.jpg +0 -0
  315. {datachain-0.30.3 → datachain-0.30.5}/tests/func/model/data/ships.jpg +0 -0
  316. {datachain-0.30.3 → datachain-0.30.5}/tests/func/model/test_yolo.py +0 -0
  317. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_audio.py +0 -0
  318. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_batching.py +0 -0
  319. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_client.py +0 -0
  320. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_cloud_transfer.py +0 -0
  321. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_datachain_merge.py +0 -0
  322. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_delta.py +0 -0
  323. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_feature_pickling.py +0 -0
  324. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_file.py +0 -0
  325. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_hf.py +0 -0
  326. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_hidden_field.py +0 -0
  327. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_image.py +0 -0
  328. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_listing.py +0 -0
  329. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_ls.py +0 -0
  330. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_meta_formats.py +0 -0
  331. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_metastore.py +0 -0
  332. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_metrics.py +0 -0
  333. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_pytorch.py +0 -0
  334. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_query.py +0 -0
  335. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_read_database.py +0 -0
  336. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  337. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_retry.py +0 -0
  338. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_session.py +0 -0
  339. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_studio_datetime_parsing.py +0 -0
  340. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_to_database.py +0 -0
  341. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_toolkit.py +0 -0
  342. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_video.py +0 -0
  343. {datachain-0.30.3 → datachain-0.30.5}/tests/func/test_warehouse.py +0 -0
  344. {datachain-0.30.3 → datachain-0.30.5}/tests/scripts/feature_class.py +0 -0
  345. {datachain-0.30.3 → datachain-0.30.5}/tests/scripts/feature_class_exception.py +0 -0
  346. {datachain-0.30.3 → datachain-0.30.5}/tests/scripts/feature_class_parallel.py +0 -0
  347. {datachain-0.30.3 → datachain-0.30.5}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  348. {datachain-0.30.3 → datachain-0.30.5}/tests/scripts/name_len_slow.py +0 -0
  349. {datachain-0.30.3 → datachain-0.30.5}/tests/test_atomicity.py +0 -0
  350. {datachain-0.30.3 → datachain-0.30.5}/tests/test_cli_e2e.py +0 -0
  351. {datachain-0.30.3 → datachain-0.30.5}/tests/test_import_time.py +0 -0
  352. {datachain-0.30.3 → datachain-0.30.5}/tests/test_query_e2e.py +0 -0
  353. {datachain-0.30.3 → datachain-0.30.5}/tests/test_telemetry.py +0 -0
  354. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/__init__.py +0 -0
  355. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/__init__.py +0 -0
  356. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/conftest.py +0 -0
  357. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/test_arrow.py +0 -0
  358. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/test_audio.py +0 -0
  359. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/test_clip.py +0 -0
  360. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  361. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/test_datachain_merge.py +0 -0
  362. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/test_diff.py +0 -0
  363. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/test_feature.py +0 -0
  364. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/test_feature_utils.py +0 -0
  365. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/test_file.py +0 -0
  366. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/test_hf.py +0 -0
  367. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/test_image.py +0 -0
  368. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/test_listing_info.py +0 -0
  369. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/test_partition_by.py +0 -0
  370. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/test_python_to_sql.py +0 -0
  371. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/test_schema.py +0 -0
  372. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/test_settings.py +0 -0
  373. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/test_sql_to_python.py +0 -0
  374. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/test_text.py +0 -0
  375. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/test_udf.py +0 -0
  376. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/test_udf_signature.py +0 -0
  377. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/test_utils.py +0 -0
  378. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/lib/test_webdataset.py +0 -0
  379. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/model/__init__.py +0 -0
  380. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/model/test_bbox.py +0 -0
  381. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/model/test_pose.py +0 -0
  382. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/model/test_segment.py +0 -0
  383. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/model/test_utils.py +0 -0
  384. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/sql/__init__.py +0 -0
  385. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/sql/sqlite/__init__.py +0 -0
  386. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/sql/sqlite/test_utils.py +0 -0
  387. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/sql/test_array.py +0 -0
  388. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/sql/test_conditional.py +0 -0
  389. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/sql/test_path.py +0 -0
  390. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/sql/test_random.py +0 -0
  391. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/sql/test_selectable.py +0 -0
  392. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/sql/test_string.py +0 -0
  393. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_asyn.py +0 -0
  394. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_cache.py +0 -0
  395. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_catalog.py +0 -0
  396. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_catalog_loader.py +0 -0
  397. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_cli_parsing.py +0 -0
  398. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_client.py +0 -0
  399. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_client_gcs.py +0 -0
  400. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_client_s3.py +0 -0
  401. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_config.py +0 -0
  402. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_data_storage.py +0 -0
  403. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_database_engine.py +0 -0
  404. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_dataset.py +0 -0
  405. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_dispatch.py +0 -0
  406. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_fileslice.py +0 -0
  407. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_func.py +0 -0
  408. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_listing.py +0 -0
  409. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_metastore.py +0 -0
  410. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_module_exports.py +0 -0
  411. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_pytorch.py +0 -0
  412. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_query.py +0 -0
  413. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_query_metrics.py +0 -0
  414. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_query_params.py +0 -0
  415. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_script_meta.py +0 -0
  416. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_semver.py +0 -0
  417. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_serializer.py +0 -0
  418. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_session.py +0 -0
  419. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_utils.py +0 -0
  420. {datachain-0.30.3 → datachain-0.30.5}/tests/unit/test_warehouse.py +0 -0
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.12.9'
27
+ rev: 'v0.12.10'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.30.3
3
+ Version: 0.30.5
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -22,6 +22,7 @@ Requires-Dist: tomlkit
22
22
  Requires-Dist: tqdm
23
23
  Requires-Dist: numpy<3,>=1
24
24
  Requires-Dist: pandas>=2.0.0
25
+ Requires-Dist: ujson>=5.10.0
25
26
  Requires-Dist: packaging
26
27
  Requires-Dist: pyarrow
27
28
  Requires-Dist: typing-extensions
@@ -38,7 +39,6 @@ Requires-Dist: shtab<2,>=1.3.4
38
39
  Requires-Dist: sqlalchemy>=2
39
40
  Requires-Dist: multiprocess==0.70.16
40
41
  Requires-Dist: cloudpickle
41
- Requires-Dist: orjson>=3.10.5
42
42
  Requires-Dist: pydantic
43
43
  Requires-Dist: jmespath>=1.0
44
44
  Requires-Dist: datamodel-code-generator>=0.25
@@ -92,7 +92,7 @@ Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
92
92
  Requires-Dist: pytest-servers[all]>=0.5.9; extra == "tests"
93
93
  Requires-Dist: pytest-benchmark[histogram]; extra == "tests"
94
94
  Requires-Dist: pytest-xdist>=3.3.1; extra == "tests"
95
- Requires-Dist: pytest-env>=1.1.0; extra == "tests"
95
+ Requires-Dist: pytest-dotenv; extra == "tests"
96
96
  Requires-Dist: virtualenv; extra == "tests"
97
97
  Requires-Dist: dulwich; extra == "tests"
98
98
  Requires-Dist: hypothesis; extra == "tests"
@@ -0,0 +1,70 @@
1
+ """Example: Nested DataModels with parallel execution.
2
+
3
+ Demonstrates mapping a function that returns a nested DataModel (a DataModel
4
+ containing other DataModels).
5
+
6
+ The example keeps things minimal: we persist a tiny dataset, run a parallel map
7
+ that returns a nested DataModel, and display the result.
8
+ """
9
+
10
+ from typing import Optional
11
+
12
+ from pydantic import Field
13
+
14
+ import datachain as dc
15
+
16
+
17
+ class Metric(dc.DataModel):
18
+ """Represents a single computed metric with quality metadata."""
19
+
20
+ value: Optional[float] = Field(default=None, description="Computed metric value")
21
+ confidence: Optional[float] = Field(
22
+ default=None, description="Confidence / quality score"
23
+ )
24
+ status: Optional[str] = Field(default=None, description="Processing status label")
25
+ metric_error: Optional[str] = Field(
26
+ default=None, description="Error message if metric computation failed"
27
+ )
28
+
29
+
30
+ class SampleMetrics(dc.DataModel):
31
+ """Container for two illustrative nested metrics.
32
+
33
+ Each sub-field is its own DataModel instance to demonstrate nested schemas
34
+ """
35
+
36
+ metric_primary: Metric = Field(
37
+ default_factory=lambda: Metric(), description="Primary metric"
38
+ )
39
+ metric_secondary: Metric = Field(
40
+ default_factory=lambda: Metric(), description="Secondary metric"
41
+ )
42
+
43
+
44
+ def generate_sample_metrics() -> SampleMetrics:
45
+ """Synthesize a pair of metrics.
46
+
47
+ In real scenarios you'd compute these values; here we just return constants
48
+ to keep the example deterministic.
49
+ """
50
+
51
+ return SampleMetrics(
52
+ metric_primary=Metric(value=50.0, confidence=0.95, status="ok"),
53
+ )
54
+
55
+
56
+ def main():
57
+ (
58
+ dc.read_values(record_id=[1, 2])
59
+ .settings(parallel=2) # Keep it parallel to test serialization
60
+ .map(metrics=generate_sample_metrics)
61
+ .save("nested_datamodel")
62
+ )
63
+
64
+ dc.read_dataset("nested_datamodel").show()
65
+
66
+ print(dc.read_dataset("nested_datamodel").to_values("metrics"))
67
+
68
+
69
+ if __name__ == "__main__":
70
+ main()
@@ -26,6 +26,7 @@ dependencies = [
26
26
  "tqdm",
27
27
  "numpy>=1,<3",
28
28
  "pandas>=2.0.0",
29
+ "ujson>=5.10.0",
29
30
  "packaging",
30
31
  "pyarrow",
31
32
  "typing-extensions",
@@ -42,7 +43,6 @@ dependencies = [
42
43
  "sqlalchemy>=2",
43
44
  "multiprocess==0.70.16",
44
45
  "cloudpickle",
45
- "orjson>=3.10.5",
46
46
  "pydantic",
47
47
  "jmespath>=1.0",
48
48
  "datamodel-code-generator>=0.25",
@@ -108,7 +108,7 @@ tests = [
108
108
  "pytest-servers[all]>=0.5.9",
109
109
  "pytest-benchmark[histogram]",
110
110
  "pytest-xdist>=3.3.1",
111
- "pytest-env>=1.1.0",
111
+ "pytest-dotenv",
112
112
  "virtualenv",
113
113
  "dulwich",
114
114
  "hypothesis",
@@ -6,6 +6,7 @@ from datachain.lib.dc import (
6
6
  Sys,
7
7
  datasets,
8
8
  delete_dataset,
9
+ is_studio,
9
10
  listings,
10
11
  move_dataset,
11
12
  read_csv,
@@ -74,6 +75,7 @@ __all__ = [
74
75
  "datasets",
75
76
  "delete_dataset",
76
77
  "is_chain_type",
78
+ "is_studio",
77
79
  "listings",
78
80
  "metrics",
79
81
  "move_dataset",
@@ -3,6 +3,7 @@ from .catalog import (
3
3
  QUERY_SCRIPT_CANCELED_EXIT_CODE,
4
4
  QUERY_SCRIPT_INVALID_LAST_STATEMENT_EXIT_CODE,
5
5
  Catalog,
6
+ is_namespace_local,
6
7
  )
7
8
  from .loader import get_catalog
8
9
 
@@ -12,4 +13,5 @@ __all__ = [
12
13
  "QUERY_SCRIPT_INVALID_LAST_STATEMENT_EXIT_CODE",
13
14
  "Catalog",
14
15
  "get_catalog",
16
+ "is_namespace_local",
15
17
  ]
@@ -113,6 +113,11 @@ else:
113
113
  SIGINT = signal.SIGINT
114
114
 
115
115
 
116
+ def is_namespace_local(namespace_name) -> bool:
117
+ """Checks if namespace is from local environment, i.e. is `local`"""
118
+ return namespace_name == "local"
119
+
120
+
116
121
  def shutdown_process(
117
122
  proc: subprocess.Popen,
118
123
  interrupt_timeout: Optional[int] = None,
@@ -1121,6 +1126,8 @@ class Catalog:
1121
1126
  pull_dataset: bool = False,
1122
1127
  update: bool = False,
1123
1128
  ) -> DatasetRecord:
1129
+ from datachain.lib.dc.utils import is_studio
1130
+
1124
1131
  # Intentionally ignore update flag is version is provided. Here only exact
1125
1132
  # version can be provided and update then doesn't make sense.
1126
1133
  # It corresponds to a query like this for example:
@@ -1129,7 +1136,12 @@ class Catalog:
1129
1136
  if version:
1130
1137
  update = False
1131
1138
 
1132
- if self.metastore.is_local_dataset(namespace_name) or not update:
1139
+ # we don't do Studio fallback is script is already ran in Studio, or if we try
1140
+ # to fetch dataset with local namespace as that one cannot
1141
+ # exist in Studio in the first place
1142
+ no_fallback = is_studio() or is_namespace_local(namespace_name)
1143
+
1144
+ if no_fallback or not update:
1133
1145
  try:
1134
1146
  ds = self.get_dataset(
1135
1147
  name,
@@ -1141,7 +1153,7 @@ class Catalog:
1141
1153
  except (NamespaceNotFoundError, ProjectNotFoundError, DatasetNotFoundError):
1142
1154
  pass
1143
1155
 
1144
- if self.metastore.is_local_dataset(namespace_name):
1156
+ if no_fallback:
1145
1157
  raise DatasetNotFoundError(
1146
1158
  f"Dataset {name}"
1147
1159
  + (f" version {version} " if version else " ")
@@ -127,7 +127,8 @@ def get_udf_distributor_class() -> Optional[type["AbstractUDFDistributor"]]:
127
127
 
128
128
 
129
129
  def get_catalog(
130
- client_config: Optional[dict[str, Any]] = None, in_memory: bool = False
130
+ client_config: Optional[dict[str, Any]] = None,
131
+ in_memory: bool = False,
131
132
  ) -> "Catalog":
132
133
  """
133
134
  Function that creates Catalog instance with appropriate metastore
@@ -142,8 +143,9 @@ def get_catalog(
142
143
  """
143
144
  from datachain.catalog import Catalog
144
145
 
146
+ metastore = get_metastore(in_memory=in_memory)
145
147
  return Catalog(
146
- metastore=get_metastore(in_memory=in_memory),
148
+ metastore=metastore,
147
149
  warehouse=get_warehouse(in_memory=in_memory),
148
150
  client_config=client_config,
149
151
  in_memory=in_memory,
@@ -6,6 +6,7 @@ from multiprocessing import freeze_support
6
6
  from typing import Optional
7
7
 
8
8
  from datachain.cli.utils import get_logging_level
9
+ from datachain.error import DataChainError as DataChainError
9
10
 
10
11
  from .commands import (
11
12
  clear_cache,
@@ -6,6 +6,7 @@ from tabulate import tabulate
6
6
  if TYPE_CHECKING:
7
7
  from datachain.catalog import Catalog
8
8
 
9
+ from datachain.catalog import is_namespace_local
9
10
  from datachain.cli.utils import determine_flavors
10
11
  from datachain.config import Config
11
12
  from datachain.error import DataChainError, DatasetNotFoundError
@@ -138,15 +139,18 @@ def rm_dataset(
138
139
  ):
139
140
  namespace_name, project_name, name = catalog.get_full_dataset_name(name)
140
141
 
141
- if not catalog.metastore.is_local_dataset(namespace_name) and studio:
142
+ if studio:
143
+ # removing Studio dataset from CLI
142
144
  from datachain.studio import remove_studio_dataset
143
145
 
144
- token = Config().read().get("studio", {}).get("token")
145
- if not token:
146
+ if Config().read().get("studio", {}).get("token"):
147
+ remove_studio_dataset(
148
+ team, name, namespace_name, project_name, version, force
149
+ )
150
+ else:
146
151
  raise DataChainError(
147
152
  "Not logged in to Studio. Log in with 'datachain auth login'."
148
153
  )
149
- remove_studio_dataset(team, name, namespace_name, project_name, version, force)
150
154
  else:
151
155
  try:
152
156
  project = catalog.metastore.get_project(project_name, namespace_name)
@@ -163,9 +167,11 @@ def edit_dataset(
163
167
  attrs: Optional[list[str]] = None,
164
168
  team: Optional[str] = None,
165
169
  ):
170
+ from datachain.lib.dc.utils import is_studio
171
+
166
172
  namespace_name, project_name, name = catalog.get_full_dataset_name(name)
167
173
 
168
- if catalog.metastore.is_local_dataset(namespace_name):
174
+ if is_studio() or is_namespace_local(namespace_name):
169
175
  try:
170
176
  catalog.edit_dataset(
171
177
  name, catalog.metastore.default_project, new_name, description, attrs
@@ -175,11 +181,11 @@ def edit_dataset(
175
181
  else:
176
182
  from datachain.studio import edit_studio_dataset
177
183
 
178
- token = Config().read().get("studio", {}).get("token")
179
- if not token:
184
+ if Config().read().get("studio", {}).get("token"):
185
+ edit_studio_dataset(
186
+ team, name, namespace_name, project_name, new_name, description, attrs
187
+ )
188
+ else:
180
189
  raise DataChainError(
181
190
  "Not logged in to Studio. Log in with 'datachain auth login'."
182
191
  )
183
- edit_studio_dataset(
184
- team, name, namespace_name, project_name, new_name, description, attrs
185
- )
@@ -145,23 +145,6 @@ class AbstractMetastore(ABC, Serializable):
145
145
  def list_namespaces(self, conn=None) -> list[Namespace]:
146
146
  """Gets a list of all namespaces"""
147
147
 
148
- @property
149
- @abstractmethod
150
- def is_studio(self) -> bool:
151
- """Returns True if this code is ran in Studio"""
152
-
153
- def is_local_dataset(self, dataset_namespace: str) -> bool:
154
- """
155
- Returns True if this is local dataset i.e. not pulled from Studio but
156
- created locally. This is False if we ran code in CLI mode but using dataset
157
- names that are present in Studio.
158
- """
159
- return self.is_studio or dataset_namespace == Namespace.default()
160
-
161
- @property
162
- def namespace_allowed_to_create(self):
163
- return self.is_studio
164
-
165
148
  #
166
149
  # Projects
167
150
  #
@@ -215,10 +198,6 @@ class AbstractMetastore(ABC, Serializable):
215
198
  def list_projects(self, namespace_id: Optional[int], conn=None) -> list[Project]:
216
199
  """Gets list of projects in some namespace or in general (in all namespaces)"""
217
200
 
218
- @property
219
- def project_allowed_to_create(self):
220
- return self.is_studio
221
-
222
201
  #
223
202
  # Datasets
224
203
  #
@@ -542,10 +542,6 @@ class SQLiteMetastore(AbstractDBMetastore):
542
542
  def _jobs_insert(self) -> "Insert":
543
543
  return sqlite.insert(self._jobs)
544
544
 
545
- @property
546
- def is_studio(self) -> bool:
547
- return False
548
-
549
545
  #
550
546
  # Namespaces
551
547
  #
@@ -1,5 +1,4 @@
1
1
  import glob
2
- import json
3
2
  import logging
4
3
  import posixpath
5
4
  import random
@@ -11,6 +10,7 @@ from urllib.parse import urlparse
11
10
 
12
11
  import attrs
13
12
  import sqlalchemy as sa
13
+ import ujson as json
14
14
  from sqlalchemy.sql.expression import true
15
15
 
16
16
  from datachain.client import Client
@@ -122,7 +122,7 @@ class AbstractWarehouse(ABC, Serializable):
122
122
  if value_type is str:
123
123
  return val
124
124
  if value_type in (dict, list):
125
- return json.dumps(val)
125
+ return json.dumps(val, ensure_ascii=False)
126
126
  raise ValueError(
127
127
  f"Cannot convert value {val!r} with type {value_type} to JSON"
128
128
  )
@@ -2,8 +2,8 @@ from collections.abc import Sequence
2
2
  from itertools import islice
3
3
  from typing import TYPE_CHECKING, Any, Optional
4
4
 
5
- import orjson
6
5
  import pyarrow as pa
6
+ import ujson as json
7
7
  from pyarrow._csv import ParseOptions
8
8
  from pyarrow.dataset import CsvFileFormat, dataset
9
9
  from tqdm.auto import tqdm
@@ -269,7 +269,7 @@ def _get_hf_schema(
269
269
  def _get_datachain_schema(schema: "pa.Schema") -> Optional[SignalSchema]:
270
270
  """Return a restored SignalSchema from parquet metadata, if any is found."""
271
271
  if schema.metadata and DATACHAIN_SIGNAL_SCHEMA_PARQUET_KEY in schema.metadata:
272
- serialized_signal_schema = orjson.loads(
272
+ serialized_signal_schema = json.loads(
273
273
  schema.metadata[DATACHAIN_SIGNAL_SCHEMA_PARQUET_KEY]
274
274
  )
275
275
  return SignalSchema.deserialize(serialized_signal_schema)
@@ -9,7 +9,7 @@ from .pandas import read_pandas
9
9
  from .parquet import read_parquet
10
10
  from .records import read_records
11
11
  from .storage import read_storage
12
- from .utils import DatasetMergeError, DatasetPrepareError, Sys
12
+ from .utils import DatasetMergeError, DatasetPrepareError, Sys, is_studio
13
13
  from .values import read_values
14
14
 
15
15
  __all__ = [
@@ -21,6 +21,7 @@ __all__ = [
21
21
  "Sys",
22
22
  "datasets",
23
23
  "delete_dataset",
24
+ "is_studio",
24
25
  "listings",
25
26
  "move_dataset",
26
27
  "read_csv",
@@ -19,8 +19,8 @@ from typing import (
19
19
  overload,
20
20
  )
21
21
 
22
- import orjson
23
22
  import sqlalchemy
23
+ import ujson as json
24
24
  from pydantic import BaseModel
25
25
  from sqlalchemy.sql.elements import ColumnElement
26
26
  from tqdm import tqdm
@@ -67,6 +67,7 @@ from .utils import (
67
67
  Sys,
68
68
  _get_merge_error_str,
69
69
  _validate_merge_on,
70
+ is_studio,
70
71
  resolve_columns,
71
72
  )
72
73
 
@@ -461,8 +462,6 @@ class DataChain:
461
462
  Returns:
462
463
  DataChain: A new DataChain instance with the new set of columns.
463
464
  """
464
- import json
465
-
466
465
  import pyarrow as pa
467
466
 
468
467
  from datachain.lib.arrow import schema_to_output
@@ -609,7 +608,7 @@ class DataChain:
609
608
  project = self.session.catalog.metastore.get_project(
610
609
  project_name,
611
610
  namespace_name,
612
- create=self.session.catalog.metastore.project_allowed_to_create,
611
+ create=is_studio(),
613
612
  )
614
613
  except ProjectNotFoundError as e:
615
614
  # not being able to create it as creation is not allowed
@@ -1184,17 +1183,13 @@ class DataChain:
1184
1183
  )
1185
1184
 
1186
1185
  def mutate(self, **kwargs) -> "Self":
1187
- """Create new signals based on existing signals.
1188
-
1189
- This method cannot modify existing columns. If you need to modify an
1190
- existing column, use a different name for the new column and then use
1191
- `select()` to choose which columns to keep.
1186
+ """Create or modify signals based on existing signals.
1192
1187
 
1193
1188
  This method is vectorized and more efficient compared to map(), and it does not
1194
1189
  extract or download any data from the internal database. However, it can only
1195
1190
  utilize predefined built-in functions and their combinations.
1196
1191
 
1197
- The supported functions:
1192
+ Supported functions:
1198
1193
  Numerical: +, -, *, /, rand(), avg(), count(), func(),
1199
1194
  greatest(), least(), max(), min(), sum()
1200
1195
  String: length(), split(), replace(), regexp_replace()
@@ -1221,13 +1216,20 @@ class DataChain:
1221
1216
  ```
1222
1217
 
1223
1218
  This method can be also used to rename signals. If the Column("name") provided
1224
- as value for the new signal - the old column will be dropped. Otherwise a new
1225
- column is created.
1219
+ as value for the new signal - the old signal will be dropped. Otherwise a new
1220
+ signal is created. Exception, if the old signal is nested one (e.g.
1221
+ `C("file.path")`), it will be kept to keep the object intact.
1226
1222
 
1227
1223
  Example:
1228
1224
  ```py
1229
1225
  dc.mutate(
1230
- newkey=Column("oldkey")
1226
+ newkey=Column("oldkey") # drops oldkey
1227
+ )
1228
+ ```
1229
+
1230
+ ```py
1231
+ dc.mutate(
1232
+ size=Column("file.size") # keeps `file.size`
1231
1233
  )
1232
1234
  ```
1233
1235
  """
@@ -1262,8 +1264,10 @@ class DataChain:
1262
1264
  # adding new signal
1263
1265
  mutated[name] = value
1264
1266
 
1267
+ new_schema = schema.mutate(kwargs)
1265
1268
  return self._evolve(
1266
- query=self._query.mutate(**mutated), signal_schema=schema.mutate(kwargs)
1269
+ query=self._query.mutate(new_schema=new_schema, **mutated),
1270
+ signal_schema=new_schema,
1267
1271
  )
1268
1272
 
1269
1273
  @property
@@ -2123,9 +2127,9 @@ class DataChain:
2123
2127
  fsspec_fs = client.create_fs(**fs_kwargs)
2124
2128
 
2125
2129
  _partition_cols = list(partition_cols) if partition_cols else None
2126
- signal_schema_metadata = orjson.dumps(
2127
- self._effective_signals_schema.serialize()
2128
- )
2130
+ signal_schema_metadata = json.dumps(
2131
+ self._effective_signals_schema.serialize(), ensure_ascii=False
2132
+ ).encode("utf-8")
2129
2133
 
2130
2134
  column_names, column_chunks = self.to_columnar_data_with_names(chunk_size)
2131
2135
 
@@ -2272,7 +2276,11 @@ class DataChain:
2272
2276
  f.write(b"\n")
2273
2277
  else:
2274
2278
  is_first = False
2275
- f.write(orjson.dumps(row_to_nested_dict(headers, row)))
2279
+ f.write(
2280
+ json.dumps(
2281
+ row_to_nested_dict(headers, row), ensure_ascii=False
2282
+ ).encode("utf-8")
2283
+ )
2276
2284
  if include_outer_list:
2277
2285
  # This makes the file JSON instead of JSON lines.
2278
2286
  f.write(b"\n]\n")
@@ -13,7 +13,7 @@ from datachain.lib.signal_schema import SignalSchema
13
13
  from datachain.query import Session
14
14
  from datachain.query.dataset import DatasetQuery
15
15
 
16
- from .utils import Sys
16
+ from .utils import Sys, is_studio
17
17
  from .values import read_values
18
18
 
19
19
  if TYPE_CHECKING:
@@ -343,7 +343,7 @@ def delete_dataset(
343
343
  namespace_name=namespace,
344
344
  )
345
345
 
346
- if not catalog.metastore.is_local_dataset(namespace_name) and studio:
346
+ if not is_studio() and studio:
347
347
  return remove_studio_dataset(
348
348
  None, name, namespace_name, project_name, version=version, force=force
349
349
  )
@@ -418,6 +418,6 @@ def move_dataset(
418
418
  project_id=catalog.metastore.get_project(
419
419
  dest_project,
420
420
  dest_namespace,
421
- create=catalog.metastore.project_allowed_to_create,
421
+ create=is_studio(),
422
422
  ).id,
423
423
  )
@@ -15,6 +15,7 @@ from datachain.func.base import Function
15
15
  from datachain.lib.data_model import DataModel, DataType
16
16
  from datachain.lib.utils import DataChainParamsError
17
17
  from datachain.query.schema import DEFAULT_DELIMITER
18
+ from datachain.utils import getenv_bool
18
19
 
19
20
  if TYPE_CHECKING:
20
21
  from typing_extensions import Concatenate, ParamSpec
@@ -26,6 +27,10 @@ if TYPE_CHECKING:
26
27
  D = TypeVar("D", bound="DataChain")
27
28
 
28
29
 
30
+ def is_studio() -> bool:
31
+ return getenv_bool("DATACHAIN_IS_STUDIO", default=False)
32
+
33
+
29
34
  def resolve_columns(
30
35
  method: "Callable[Concatenate[D, P], D]",
31
36
  ) -> "Callable[Concatenate[D, P], D]":
@@ -89,3 +89,15 @@ class ModelStore:
89
89
  and ModelStore.is_pydantic(parent_type)
90
90
  and "@" in ModelStore.get_name(parent_type)
91
91
  )
92
+
93
+ @classmethod
94
+ def rebuild_all(cls) -> None:
95
+ """Ensure pydantic schemas are (re)built for all registered models.
96
+
97
+ Uses ``force=True`` to avoid subtle cases where a deserialized class
98
+ (e.g. from by-value cloudpickle in workers) reports built state but
99
+ nested model field schemas aren't fully resolved yet.
100
+ """
101
+ for versions in cls.store.values():
102
+ for model in versions.values():
103
+ model.model_rebuild(force=True)
@@ -28,7 +28,9 @@ def create(
28
28
  """
29
29
  session = Session.get(session)
30
30
 
31
- if not session.catalog.metastore.namespace_allowed_to_create:
31
+ from datachain.lib.dc.utils import is_studio
32
+
33
+ if not is_studio():
32
34
  raise NamespaceCreateNotAllowedError("Creating namespace is not allowed")
33
35
 
34
36
  Namespace.validate_name(name)
@@ -32,7 +32,9 @@ def create(
32
32
  """
33
33
  session = Session.get(session)
34
34
 
35
- if not session.catalog.metastore.project_allowed_to_create:
35
+ from datachain.lib.dc.utils import is_studio
36
+
37
+ if not is_studio():
36
38
  raise ProjectCreateNotAllowedError("Creating project is not allowed")
37
39
 
38
40
  Project.validate_name(name)