datachain 0.30.3__tar.gz → 0.30.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (418) hide show
  1. {datachain-0.30.3 → datachain-0.30.4}/.pre-commit-config.yaml +1 -1
  2. {datachain-0.30.3 → datachain-0.30.4}/PKG-INFO +1 -1
  3. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/__init__.py +2 -0
  4. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/catalog/__init__.py +2 -0
  5. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/catalog/catalog.py +14 -2
  6. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/catalog/loader.py +4 -2
  7. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/cli/__init__.py +1 -0
  8. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/cli/commands/datasets.py +16 -10
  9. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/data_storage/metastore.py +0 -21
  10. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/data_storage/sqlite.py +0 -4
  11. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/dc/__init__.py +2 -1
  12. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/dc/datachain.py +17 -11
  13. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/dc/datasets.py +3 -3
  14. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/dc/utils.py +5 -0
  15. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/namespaces.py +3 -1
  16. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/projects.py +3 -1
  17. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/signal_schema.py +28 -17
  18. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/query/dataset.py +22 -18
  19. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/query/schema.py +4 -0
  20. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/utils.py +7 -0
  21. {datachain-0.30.3 → datachain-0.30.4}/src/datachain.egg-info/PKG-INFO +1 -1
  22. {datachain-0.30.3 → datachain-0.30.4}/src/datachain.egg-info/SOURCES.txt +1 -0
  23. {datachain-0.30.3 → datachain-0.30.4}/tests/conftest.py +4 -32
  24. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_catalog.py +2 -0
  25. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_data_storage.py +2 -2
  26. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_datachain.py +0 -70
  27. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_dataset_query.py +19 -6
  28. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_datasets.py +0 -1
  29. datachain-0.30.4/tests/func/test_mutate.py +284 -0
  30. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_pull.py +1 -0
  31. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_read_dataset_remote.py +10 -0
  32. {datachain-0.30.3 → datachain-0.30.4}/tests/test_cli_studio.py +1 -0
  33. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/test_datachain.py +12 -15
  34. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/test_namespace.py +2 -2
  35. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/test_project.py +1 -1
  36. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/test_signal_schema.py +4 -2
  37. {datachain-0.30.3 → datachain-0.30.4}/tests/utils.py +2 -14
  38. {datachain-0.30.3 → datachain-0.30.4}/.cruft.json +0 -0
  39. {datachain-0.30.3 → datachain-0.30.4}/.gitattributes +0 -0
  40. {datachain-0.30.3 → datachain-0.30.4}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  41. {datachain-0.30.3 → datachain-0.30.4}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  42. {datachain-0.30.3 → datachain-0.30.4}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  43. {datachain-0.30.3 → datachain-0.30.4}/.github/codecov.yaml +0 -0
  44. {datachain-0.30.3 → datachain-0.30.4}/.github/dependabot.yml +0 -0
  45. {datachain-0.30.3 → datachain-0.30.4}/.github/workflows/benchmarks.yml +0 -0
  46. {datachain-0.30.3 → datachain-0.30.4}/.github/workflows/release.yml +0 -0
  47. {datachain-0.30.3 → datachain-0.30.4}/.github/workflows/tests-studio.yml +0 -0
  48. {datachain-0.30.3 → datachain-0.30.4}/.github/workflows/tests.yml +0 -0
  49. {datachain-0.30.3 → datachain-0.30.4}/.github/workflows/update-template.yaml +0 -0
  50. {datachain-0.30.3 → datachain-0.30.4}/.gitignore +0 -0
  51. {datachain-0.30.3 → datachain-0.30.4}/CODE_OF_CONDUCT.rst +0 -0
  52. {datachain-0.30.3 → datachain-0.30.4}/LICENSE +0 -0
  53. {datachain-0.30.3 → datachain-0.30.4}/README.rst +0 -0
  54. {datachain-0.30.3 → datachain-0.30.4}/docs/assets/captioned_cartoons.png +0 -0
  55. {datachain-0.30.3 → datachain-0.30.4}/docs/assets/datachain-white.svg +0 -0
  56. {datachain-0.30.3 → datachain-0.30.4}/docs/assets/datachain.svg +0 -0
  57. {datachain-0.30.3 → datachain-0.30.4}/docs/commands/auth/login.md +0 -0
  58. {datachain-0.30.3 → datachain-0.30.4}/docs/commands/auth/logout.md +0 -0
  59. {datachain-0.30.3 → datachain-0.30.4}/docs/commands/auth/team.md +0 -0
  60. {datachain-0.30.3 → datachain-0.30.4}/docs/commands/auth/token.md +0 -0
  61. {datachain-0.30.3 → datachain-0.30.4}/docs/commands/index.md +0 -0
  62. {datachain-0.30.3 → datachain-0.30.4}/docs/commands/job/cancel.md +0 -0
  63. {datachain-0.30.3 → datachain-0.30.4}/docs/commands/job/clusters.md +0 -0
  64. {datachain-0.30.3 → datachain-0.30.4}/docs/commands/job/logs.md +0 -0
  65. {datachain-0.30.3 → datachain-0.30.4}/docs/commands/job/ls.md +0 -0
  66. {datachain-0.30.3 → datachain-0.30.4}/docs/commands/job/run.md +0 -0
  67. {datachain-0.30.3 → datachain-0.30.4}/docs/contributing.md +0 -0
  68. {datachain-0.30.3 → datachain-0.30.4}/docs/css/github-permalink-style.css +0 -0
  69. {datachain-0.30.3 → datachain-0.30.4}/docs/examples.md +0 -0
  70. {datachain-0.30.3 → datachain-0.30.4}/docs/guide/db_migrations.md +0 -0
  71. {datachain-0.30.3 → datachain-0.30.4}/docs/guide/delta.md +0 -0
  72. {datachain-0.30.3 → datachain-0.30.4}/docs/guide/env.md +0 -0
  73. {datachain-0.30.3 → datachain-0.30.4}/docs/guide/index.md +0 -0
  74. {datachain-0.30.3 → datachain-0.30.4}/docs/guide/namespaces.md +0 -0
  75. {datachain-0.30.3 → datachain-0.30.4}/docs/guide/processing.md +0 -0
  76. {datachain-0.30.3 → datachain-0.30.4}/docs/guide/remotes.md +0 -0
  77. {datachain-0.30.3 → datachain-0.30.4}/docs/guide/retry.md +0 -0
  78. {datachain-0.30.3 → datachain-0.30.4}/docs/index.md +0 -0
  79. {datachain-0.30.3 → datachain-0.30.4}/docs/overrides/main.html +0 -0
  80. {datachain-0.30.3 → datachain-0.30.4}/docs/quick-start.md +0 -0
  81. {datachain-0.30.3 → datachain-0.30.4}/docs/references/data-types/arrowrow.md +0 -0
  82. {datachain-0.30.3 → datachain-0.30.4}/docs/references/data-types/bbox.md +0 -0
  83. {datachain-0.30.3 → datachain-0.30.4}/docs/references/data-types/file.md +0 -0
  84. {datachain-0.30.3 → datachain-0.30.4}/docs/references/data-types/imagefile.md +0 -0
  85. {datachain-0.30.3 → datachain-0.30.4}/docs/references/data-types/index.md +0 -0
  86. {datachain-0.30.3 → datachain-0.30.4}/docs/references/data-types/pose.md +0 -0
  87. {datachain-0.30.3 → datachain-0.30.4}/docs/references/data-types/segment.md +0 -0
  88. {datachain-0.30.3 → datachain-0.30.4}/docs/references/data-types/tarvfile.md +0 -0
  89. {datachain-0.30.3 → datachain-0.30.4}/docs/references/data-types/textfile.md +0 -0
  90. {datachain-0.30.3 → datachain-0.30.4}/docs/references/data-types/videofile.md +0 -0
  91. {datachain-0.30.3 → datachain-0.30.4}/docs/references/datachain.md +0 -0
  92. {datachain-0.30.3 → datachain-0.30.4}/docs/references/func.md +0 -0
  93. {datachain-0.30.3 → datachain-0.30.4}/docs/references/functions/aggregate.md +0 -0
  94. {datachain-0.30.3 → datachain-0.30.4}/docs/references/functions/array.md +0 -0
  95. {datachain-0.30.3 → datachain-0.30.4}/docs/references/functions/conditional.md +0 -0
  96. {datachain-0.30.3 → datachain-0.30.4}/docs/references/functions/numeric.md +0 -0
  97. {datachain-0.30.3 → datachain-0.30.4}/docs/references/functions/path.md +0 -0
  98. {datachain-0.30.3 → datachain-0.30.4}/docs/references/functions/random.md +0 -0
  99. {datachain-0.30.3 → datachain-0.30.4}/docs/references/functions/string.md +0 -0
  100. {datachain-0.30.3 → datachain-0.30.4}/docs/references/functions/window.md +0 -0
  101. {datachain-0.30.3 → datachain-0.30.4}/docs/references/index.md +0 -0
  102. {datachain-0.30.3 → datachain-0.30.4}/docs/references/toolkit.md +0 -0
  103. {datachain-0.30.3 → datachain-0.30.4}/docs/references/torch.md +0 -0
  104. {datachain-0.30.3 → datachain-0.30.4}/docs/references/udf.md +0 -0
  105. {datachain-0.30.3 → datachain-0.30.4}/docs/tutorials.md +0 -0
  106. {datachain-0.30.3 → datachain-0.30.4}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  107. {datachain-0.30.3 → datachain-0.30.4}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  108. {datachain-0.30.3 → datachain-0.30.4}/examples/computer_vision/openimage-detect.py +0 -0
  109. {datachain-0.30.3 → datachain-0.30.4}/examples/computer_vision/ultralytics-bbox.py +0 -0
  110. {datachain-0.30.3 → datachain-0.30.4}/examples/computer_vision/ultralytics-pose.py +0 -0
  111. {datachain-0.30.3 → datachain-0.30.4}/examples/computer_vision/ultralytics-segment.py +0 -0
  112. {datachain-0.30.3 → datachain-0.30.4}/examples/get_started/common_sql_functions.py +0 -0
  113. {datachain-0.30.3 → datachain-0.30.4}/examples/get_started/json-csv-reader.py +0 -0
  114. {datachain-0.30.3 → datachain-0.30.4}/examples/get_started/torch-loader.py +0 -0
  115. {datachain-0.30.3 → datachain-0.30.4}/examples/get_started/udfs/parallel.py +0 -0
  116. {datachain-0.30.3 → datachain-0.30.4}/examples/get_started/udfs/simple.py +0 -0
  117. {datachain-0.30.3 → datachain-0.30.4}/examples/get_started/udfs/stateful.py +0 -0
  118. {datachain-0.30.3 → datachain-0.30.4}/examples/incremental_processing/delta.py +0 -0
  119. {datachain-0.30.3 → datachain-0.30.4}/examples/incremental_processing/retry.py +0 -0
  120. {datachain-0.30.3 → datachain-0.30.4}/examples/incremental_processing/utils.py +0 -0
  121. {datachain-0.30.3 → datachain-0.30.4}/examples/llm_and_nlp/claude-query.py +0 -0
  122. {datachain-0.30.3 → datachain-0.30.4}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  123. {datachain-0.30.3 → datachain-0.30.4}/examples/multimodal/audio-to-text.py +0 -0
  124. {datachain-0.30.3 → datachain-0.30.4}/examples/multimodal/clip_inference.py +0 -0
  125. {datachain-0.30.3 → datachain-0.30.4}/examples/multimodal/hf_pipeline.py +0 -0
  126. {datachain-0.30.3 → datachain-0.30.4}/examples/multimodal/openai_image_desc_lib.py +0 -0
  127. {datachain-0.30.3 → datachain-0.30.4}/examples/multimodal/wds.py +0 -0
  128. {datachain-0.30.3 → datachain-0.30.4}/examples/multimodal/wds_filtered.py +0 -0
  129. {datachain-0.30.3 → datachain-0.30.4}/mkdocs.yml +0 -0
  130. {datachain-0.30.3 → datachain-0.30.4}/noxfile.py +0 -0
  131. {datachain-0.30.3 → datachain-0.30.4}/pyproject.toml +0 -0
  132. {datachain-0.30.3 → datachain-0.30.4}/setup.cfg +0 -0
  133. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/__main__.py +0 -0
  134. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/asyn.py +0 -0
  135. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/cache.py +0 -0
  136. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/catalog/datasource.py +0 -0
  137. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/cli/commands/__init__.py +0 -0
  138. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/cli/commands/du.py +0 -0
  139. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/cli/commands/index.py +0 -0
  140. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/cli/commands/ls.py +0 -0
  141. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/cli/commands/misc.py +0 -0
  142. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/cli/commands/query.py +0 -0
  143. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/cli/commands/show.py +0 -0
  144. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/cli/parser/__init__.py +0 -0
  145. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/cli/parser/job.py +0 -0
  146. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/cli/parser/studio.py +0 -0
  147. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/cli/parser/utils.py +0 -0
  148. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/cli/utils.py +0 -0
  149. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/client/__init__.py +0 -0
  150. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/client/azure.py +0 -0
  151. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/client/fileslice.py +0 -0
  152. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/client/fsspec.py +0 -0
  153. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/client/gcs.py +0 -0
  154. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/client/hf.py +0 -0
  155. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/client/local.py +0 -0
  156. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/client/s3.py +0 -0
  157. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/config.py +0 -0
  158. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/data_storage/__init__.py +0 -0
  159. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/data_storage/db_engine.py +0 -0
  160. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/data_storage/job.py +0 -0
  161. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/data_storage/schema.py +0 -0
  162. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/data_storage/serializer.py +0 -0
  163. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/data_storage/warehouse.py +0 -0
  164. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/dataset.py +0 -0
  165. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/delta.py +0 -0
  166. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/diff/__init__.py +0 -0
  167. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/error.py +0 -0
  168. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/fs/__init__.py +0 -0
  169. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/fs/reference.py +0 -0
  170. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/fs/utils.py +0 -0
  171. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/func/__init__.py +0 -0
  172. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/func/aggregate.py +0 -0
  173. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/func/array.py +0 -0
  174. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/func/base.py +0 -0
  175. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/func/conditional.py +0 -0
  176. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/func/func.py +0 -0
  177. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/func/numeric.py +0 -0
  178. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/func/path.py +0 -0
  179. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/func/random.py +0 -0
  180. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/func/string.py +0 -0
  181. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/func/window.py +0 -0
  182. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/job.py +0 -0
  183. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/__init__.py +0 -0
  184. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/arrow.py +0 -0
  185. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/audio.py +0 -0
  186. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/clip.py +0 -0
  187. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/convert/__init__.py +0 -0
  188. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/convert/flatten.py +0 -0
  189. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/convert/python_to_sql.py +0 -0
  190. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/convert/sql_to_python.py +0 -0
  191. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/convert/unflatten.py +0 -0
  192. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  193. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/data_model.py +0 -0
  194. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/dataset_info.py +0 -0
  195. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/dc/csv.py +0 -0
  196. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/dc/database.py +0 -0
  197. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/dc/hf.py +0 -0
  198. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/dc/json.py +0 -0
  199. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/dc/listings.py +0 -0
  200. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/dc/pandas.py +0 -0
  201. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/dc/parquet.py +0 -0
  202. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/dc/records.py +0 -0
  203. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/dc/storage.py +0 -0
  204. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/dc/values.py +0 -0
  205. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/file.py +0 -0
  206. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/hf.py +0 -0
  207. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/image.py +0 -0
  208. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/listing.py +0 -0
  209. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/listing_info.py +0 -0
  210. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/meta_formats.py +0 -0
  211. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/model_store.py +0 -0
  212. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/pytorch.py +0 -0
  213. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/settings.py +0 -0
  214. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/tar.py +0 -0
  215. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/text.py +0 -0
  216. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/udf.py +0 -0
  217. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/udf_signature.py +0 -0
  218. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/utils.py +0 -0
  219. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/video.py +0 -0
  220. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/webdataset.py +0 -0
  221. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/lib/webdataset_laion.py +0 -0
  222. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/listing.py +0 -0
  223. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/model/__init__.py +0 -0
  224. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/model/bbox.py +0 -0
  225. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/model/pose.py +0 -0
  226. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/model/segment.py +0 -0
  227. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/model/ultralytics/__init__.py +0 -0
  228. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/model/ultralytics/bbox.py +0 -0
  229. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/model/ultralytics/pose.py +0 -0
  230. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/model/ultralytics/segment.py +0 -0
  231. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/model/utils.py +0 -0
  232. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/namespace.py +0 -0
  233. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/node.py +0 -0
  234. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/nodes_fetcher.py +0 -0
  235. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/nodes_thread_pool.py +0 -0
  236. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/progress.py +0 -0
  237. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/project.py +0 -0
  238. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/py.typed +0 -0
  239. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/query/__init__.py +0 -0
  240. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/query/batch.py +0 -0
  241. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/query/dispatch.py +0 -0
  242. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/query/metrics.py +0 -0
  243. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/query/params.py +0 -0
  244. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/query/queue.py +0 -0
  245. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/query/session.py +0 -0
  246. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/query/udf.py +0 -0
  247. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/query/utils.py +0 -0
  248. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/remote/__init__.py +0 -0
  249. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/remote/studio.py +0 -0
  250. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/script_meta.py +0 -0
  251. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/semver.py +0 -0
  252. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/sql/__init__.py +0 -0
  253. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/sql/default/__init__.py +0 -0
  254. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/sql/default/base.py +0 -0
  255. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/sql/functions/__init__.py +0 -0
  256. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/sql/functions/aggregate.py +0 -0
  257. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/sql/functions/array.py +0 -0
  258. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/sql/functions/conditional.py +0 -0
  259. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/sql/functions/numeric.py +0 -0
  260. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/sql/functions/path.py +0 -0
  261. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/sql/functions/random.py +0 -0
  262. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/sql/functions/string.py +0 -0
  263. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/sql/postgresql_dialect.py +0 -0
  264. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/sql/postgresql_types.py +0 -0
  265. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/sql/selectable.py +0 -0
  266. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/sql/sqlite/__init__.py +0 -0
  267. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/sql/sqlite/base.py +0 -0
  268. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/sql/sqlite/types.py +0 -0
  269. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/sql/sqlite/vector.py +0 -0
  270. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/sql/types.py +0 -0
  271. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/sql/utils.py +0 -0
  272. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/studio.py +0 -0
  273. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/telemetry.py +0 -0
  274. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/toolkit/__init__.py +0 -0
  275. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/toolkit/split.py +0 -0
  276. {datachain-0.30.3 → datachain-0.30.4}/src/datachain/torch/__init__.py +0 -0
  277. {datachain-0.30.3 → datachain-0.30.4}/src/datachain.egg-info/dependency_links.txt +0 -0
  278. {datachain-0.30.3 → datachain-0.30.4}/src/datachain.egg-info/entry_points.txt +0 -0
  279. {datachain-0.30.3 → datachain-0.30.4}/src/datachain.egg-info/requires.txt +0 -0
  280. {datachain-0.30.3 → datachain-0.30.4}/src/datachain.egg-info/top_level.txt +0 -0
  281. {datachain-0.30.3 → datachain-0.30.4}/tests/__init__.py +0 -0
  282. {datachain-0.30.3 → datachain-0.30.4}/tests/benchmarks/__init__.py +0 -0
  283. {datachain-0.30.3 → datachain-0.30.4}/tests/benchmarks/conftest.py +0 -0
  284. {datachain-0.30.3 → datachain-0.30.4}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  285. {datachain-0.30.3 → datachain-0.30.4}/tests/benchmarks/datasets/.dvc/config +0 -0
  286. {datachain-0.30.3 → datachain-0.30.4}/tests/benchmarks/datasets/.gitignore +0 -0
  287. {datachain-0.30.3 → datachain-0.30.4}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  288. {datachain-0.30.3 → datachain-0.30.4}/tests/benchmarks/test_datachain.py +0 -0
  289. {datachain-0.30.3 → datachain-0.30.4}/tests/benchmarks/test_ls.py +0 -0
  290. {datachain-0.30.3 → datachain-0.30.4}/tests/benchmarks/test_version.py +0 -0
  291. {datachain-0.30.3 → datachain-0.30.4}/tests/data.py +0 -0
  292. {datachain-0.30.3 → datachain-0.30.4}/tests/examples/__init__.py +0 -0
  293. {datachain-0.30.3 → datachain-0.30.4}/tests/examples/test_examples.py +0 -0
  294. {datachain-0.30.3 → datachain-0.30.4}/tests/examples/test_wds_e2e.py +0 -0
  295. {datachain-0.30.3 → datachain-0.30.4}/tests/examples/wds_data.py +0 -0
  296. {datachain-0.30.3 → datachain-0.30.4}/tests/func/__init__.py +0 -0
  297. {datachain-0.30.3 → datachain-0.30.4}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  298. {datachain-0.30.3 → datachain-0.30.4}/tests/func/data/lena.jpg +0 -0
  299. {datachain-0.30.3 → datachain-0.30.4}/tests/func/fake-service-account-credentials.json +0 -0
  300. {datachain-0.30.3 → datachain-0.30.4}/tests/func/functions/__init__.py +0 -0
  301. {datachain-0.30.3 → datachain-0.30.4}/tests/func/functions/test_aggregate.py +0 -0
  302. {datachain-0.30.3 → datachain-0.30.4}/tests/func/functions/test_array.py +0 -0
  303. {datachain-0.30.3 → datachain-0.30.4}/tests/func/functions/test_conditional.py +0 -0
  304. {datachain-0.30.3 → datachain-0.30.4}/tests/func/functions/test_numeric.py +0 -0
  305. {datachain-0.30.3 → datachain-0.30.4}/tests/func/functions/test_path.py +0 -0
  306. {datachain-0.30.3 → datachain-0.30.4}/tests/func/functions/test_random.py +0 -0
  307. {datachain-0.30.3 → datachain-0.30.4}/tests/func/functions/test_string.py +0 -0
  308. {datachain-0.30.3 → datachain-0.30.4}/tests/func/model/__init__.py +0 -0
  309. {datachain-0.30.3 → datachain-0.30.4}/tests/func/model/data/running-mask0.png +0 -0
  310. {datachain-0.30.3 → datachain-0.30.4}/tests/func/model/data/running-mask1.png +0 -0
  311. {datachain-0.30.3 → datachain-0.30.4}/tests/func/model/data/running.jpg +0 -0
  312. {datachain-0.30.3 → datachain-0.30.4}/tests/func/model/data/ships.jpg +0 -0
  313. {datachain-0.30.3 → datachain-0.30.4}/tests/func/model/test_yolo.py +0 -0
  314. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_audio.py +0 -0
  315. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_batching.py +0 -0
  316. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_client.py +0 -0
  317. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_cloud_transfer.py +0 -0
  318. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_datachain_merge.py +0 -0
  319. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_delta.py +0 -0
  320. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_feature_pickling.py +0 -0
  321. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_file.py +0 -0
  322. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_hf.py +0 -0
  323. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_hidden_field.py +0 -0
  324. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_image.py +0 -0
  325. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_listing.py +0 -0
  326. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_ls.py +0 -0
  327. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_meta_formats.py +0 -0
  328. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_metastore.py +0 -0
  329. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_metrics.py +0 -0
  330. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_pytorch.py +0 -0
  331. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_query.py +0 -0
  332. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_read_database.py +0 -0
  333. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  334. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_retry.py +0 -0
  335. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_session.py +0 -0
  336. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_studio_datetime_parsing.py +0 -0
  337. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_to_database.py +0 -0
  338. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_toolkit.py +0 -0
  339. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_video.py +0 -0
  340. {datachain-0.30.3 → datachain-0.30.4}/tests/func/test_warehouse.py +0 -0
  341. {datachain-0.30.3 → datachain-0.30.4}/tests/scripts/feature_class.py +0 -0
  342. {datachain-0.30.3 → datachain-0.30.4}/tests/scripts/feature_class_exception.py +0 -0
  343. {datachain-0.30.3 → datachain-0.30.4}/tests/scripts/feature_class_parallel.py +0 -0
  344. {datachain-0.30.3 → datachain-0.30.4}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  345. {datachain-0.30.3 → datachain-0.30.4}/tests/scripts/name_len_slow.py +0 -0
  346. {datachain-0.30.3 → datachain-0.30.4}/tests/test_atomicity.py +0 -0
  347. {datachain-0.30.3 → datachain-0.30.4}/tests/test_cli_e2e.py +0 -0
  348. {datachain-0.30.3 → datachain-0.30.4}/tests/test_import_time.py +0 -0
  349. {datachain-0.30.3 → datachain-0.30.4}/tests/test_query_e2e.py +0 -0
  350. {datachain-0.30.3 → datachain-0.30.4}/tests/test_telemetry.py +0 -0
  351. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/__init__.py +0 -0
  352. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/__init__.py +0 -0
  353. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/conftest.py +0 -0
  354. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/test_arrow.py +0 -0
  355. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/test_audio.py +0 -0
  356. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/test_clip.py +0 -0
  357. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  358. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/test_datachain_merge.py +0 -0
  359. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/test_diff.py +0 -0
  360. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/test_feature.py +0 -0
  361. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/test_feature_utils.py +0 -0
  362. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/test_file.py +0 -0
  363. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/test_hf.py +0 -0
  364. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/test_image.py +0 -0
  365. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/test_listing_info.py +0 -0
  366. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/test_partition_by.py +0 -0
  367. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/test_python_to_sql.py +0 -0
  368. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/test_schema.py +0 -0
  369. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/test_settings.py +0 -0
  370. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/test_sql_to_python.py +0 -0
  371. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/test_text.py +0 -0
  372. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/test_udf.py +0 -0
  373. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/test_udf_signature.py +0 -0
  374. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/test_utils.py +0 -0
  375. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/lib/test_webdataset.py +0 -0
  376. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/model/__init__.py +0 -0
  377. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/model/test_bbox.py +0 -0
  378. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/model/test_pose.py +0 -0
  379. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/model/test_segment.py +0 -0
  380. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/model/test_utils.py +0 -0
  381. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/sql/__init__.py +0 -0
  382. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/sql/sqlite/__init__.py +0 -0
  383. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/sql/sqlite/test_types.py +0 -0
  384. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/sql/sqlite/test_utils.py +0 -0
  385. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/sql/test_array.py +0 -0
  386. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/sql/test_conditional.py +0 -0
  387. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/sql/test_path.py +0 -0
  388. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/sql/test_random.py +0 -0
  389. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/sql/test_selectable.py +0 -0
  390. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/sql/test_string.py +0 -0
  391. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_asyn.py +0 -0
  392. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_cache.py +0 -0
  393. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_catalog.py +0 -0
  394. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_catalog_loader.py +0 -0
  395. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_cli_parsing.py +0 -0
  396. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_client.py +0 -0
  397. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_client_gcs.py +0 -0
  398. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_client_s3.py +0 -0
  399. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_config.py +0 -0
  400. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_data_storage.py +0 -0
  401. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_database_engine.py +0 -0
  402. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_dataset.py +0 -0
  403. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_dispatch.py +0 -0
  404. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_fileslice.py +0 -0
  405. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_func.py +0 -0
  406. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_listing.py +0 -0
  407. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_metastore.py +0 -0
  408. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_module_exports.py +0 -0
  409. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_pytorch.py +0 -0
  410. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_query.py +0 -0
  411. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_query_metrics.py +0 -0
  412. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_query_params.py +0 -0
  413. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_script_meta.py +0 -0
  414. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_semver.py +0 -0
  415. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_serializer.py +0 -0
  416. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_session.py +0 -0
  417. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_utils.py +0 -0
  418. {datachain-0.30.3 → datachain-0.30.4}/tests/unit/test_warehouse.py +0 -0
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.12.9'
27
+ rev: 'v0.12.10'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.30.3
3
+ Version: 0.30.4
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -6,6 +6,7 @@ from datachain.lib.dc import (
6
6
  Sys,
7
7
  datasets,
8
8
  delete_dataset,
9
+ is_studio,
9
10
  listings,
10
11
  move_dataset,
11
12
  read_csv,
@@ -74,6 +75,7 @@ __all__ = [
74
75
  "datasets",
75
76
  "delete_dataset",
76
77
  "is_chain_type",
78
+ "is_studio",
77
79
  "listings",
78
80
  "metrics",
79
81
  "move_dataset",
@@ -3,6 +3,7 @@ from .catalog import (
3
3
  QUERY_SCRIPT_CANCELED_EXIT_CODE,
4
4
  QUERY_SCRIPT_INVALID_LAST_STATEMENT_EXIT_CODE,
5
5
  Catalog,
6
+ is_namespace_local,
6
7
  )
7
8
  from .loader import get_catalog
8
9
 
@@ -12,4 +13,5 @@ __all__ = [
12
13
  "QUERY_SCRIPT_INVALID_LAST_STATEMENT_EXIT_CODE",
13
14
  "Catalog",
14
15
  "get_catalog",
16
+ "is_namespace_local",
15
17
  ]
@@ -113,6 +113,11 @@ else:
113
113
  SIGINT = signal.SIGINT
114
114
 
115
115
 
116
+ def is_namespace_local(namespace_name) -> bool:
117
+ """Checks if namespace is from local environment, i.e. is `local`"""
118
+ return namespace_name == "local"
119
+
120
+
116
121
  def shutdown_process(
117
122
  proc: subprocess.Popen,
118
123
  interrupt_timeout: Optional[int] = None,
@@ -1121,6 +1126,8 @@ class Catalog:
1121
1126
  pull_dataset: bool = False,
1122
1127
  update: bool = False,
1123
1128
  ) -> DatasetRecord:
1129
+ from datachain.lib.dc.utils import is_studio
1130
+
1124
1131
  # Intentionally ignore update flag is version is provided. Here only exact
1125
1132
  # version can be provided and update then doesn't make sense.
1126
1133
  # It corresponds to a query like this for example:
@@ -1129,7 +1136,12 @@ class Catalog:
1129
1136
  if version:
1130
1137
  update = False
1131
1138
 
1132
- if self.metastore.is_local_dataset(namespace_name) or not update:
1139
+ # we don't do Studio fallback is script is already ran in Studio, or if we try
1140
+ # to fetch dataset with local namespace as that one cannot
1141
+ # exist in Studio in the first place
1142
+ no_fallback = is_studio() or is_namespace_local(namespace_name)
1143
+
1144
+ if no_fallback or not update:
1133
1145
  try:
1134
1146
  ds = self.get_dataset(
1135
1147
  name,
@@ -1141,7 +1153,7 @@ class Catalog:
1141
1153
  except (NamespaceNotFoundError, ProjectNotFoundError, DatasetNotFoundError):
1142
1154
  pass
1143
1155
 
1144
- if self.metastore.is_local_dataset(namespace_name):
1156
+ if no_fallback:
1145
1157
  raise DatasetNotFoundError(
1146
1158
  f"Dataset {name}"
1147
1159
  + (f" version {version} " if version else " ")
@@ -127,7 +127,8 @@ def get_udf_distributor_class() -> Optional[type["AbstractUDFDistributor"]]:
127
127
 
128
128
 
129
129
  def get_catalog(
130
- client_config: Optional[dict[str, Any]] = None, in_memory: bool = False
130
+ client_config: Optional[dict[str, Any]] = None,
131
+ in_memory: bool = False,
131
132
  ) -> "Catalog":
132
133
  """
133
134
  Function that creates Catalog instance with appropriate metastore
@@ -142,8 +143,9 @@ def get_catalog(
142
143
  """
143
144
  from datachain.catalog import Catalog
144
145
 
146
+ metastore = get_metastore(in_memory=in_memory)
145
147
  return Catalog(
146
- metastore=get_metastore(in_memory=in_memory),
148
+ metastore=metastore,
147
149
  warehouse=get_warehouse(in_memory=in_memory),
148
150
  client_config=client_config,
149
151
  in_memory=in_memory,
@@ -6,6 +6,7 @@ from multiprocessing import freeze_support
6
6
  from typing import Optional
7
7
 
8
8
  from datachain.cli.utils import get_logging_level
9
+ from datachain.error import DataChainError as DataChainError
9
10
 
10
11
  from .commands import (
11
12
  clear_cache,
@@ -6,6 +6,7 @@ from tabulate import tabulate
6
6
  if TYPE_CHECKING:
7
7
  from datachain.catalog import Catalog
8
8
 
9
+ from datachain.catalog import is_namespace_local
9
10
  from datachain.cli.utils import determine_flavors
10
11
  from datachain.config import Config
11
12
  from datachain.error import DataChainError, DatasetNotFoundError
@@ -138,15 +139,18 @@ def rm_dataset(
138
139
  ):
139
140
  namespace_name, project_name, name = catalog.get_full_dataset_name(name)
140
141
 
141
- if not catalog.metastore.is_local_dataset(namespace_name) and studio:
142
+ if studio:
143
+ # removing Studio dataset from CLI
142
144
  from datachain.studio import remove_studio_dataset
143
145
 
144
- token = Config().read().get("studio", {}).get("token")
145
- if not token:
146
+ if Config().read().get("studio", {}).get("token"):
147
+ remove_studio_dataset(
148
+ team, name, namespace_name, project_name, version, force
149
+ )
150
+ else:
146
151
  raise DataChainError(
147
152
  "Not logged in to Studio. Log in with 'datachain auth login'."
148
153
  )
149
- remove_studio_dataset(team, name, namespace_name, project_name, version, force)
150
154
  else:
151
155
  try:
152
156
  project = catalog.metastore.get_project(project_name, namespace_name)
@@ -163,9 +167,11 @@ def edit_dataset(
163
167
  attrs: Optional[list[str]] = None,
164
168
  team: Optional[str] = None,
165
169
  ):
170
+ from datachain.lib.dc.utils import is_studio
171
+
166
172
  namespace_name, project_name, name = catalog.get_full_dataset_name(name)
167
173
 
168
- if catalog.metastore.is_local_dataset(namespace_name):
174
+ if is_studio() or is_namespace_local(namespace_name):
169
175
  try:
170
176
  catalog.edit_dataset(
171
177
  name, catalog.metastore.default_project, new_name, description, attrs
@@ -175,11 +181,11 @@ def edit_dataset(
175
181
  else:
176
182
  from datachain.studio import edit_studio_dataset
177
183
 
178
- token = Config().read().get("studio", {}).get("token")
179
- if not token:
184
+ if Config().read().get("studio", {}).get("token"):
185
+ edit_studio_dataset(
186
+ team, name, namespace_name, project_name, new_name, description, attrs
187
+ )
188
+ else:
180
189
  raise DataChainError(
181
190
  "Not logged in to Studio. Log in with 'datachain auth login'."
182
191
  )
183
- edit_studio_dataset(
184
- team, name, namespace_name, project_name, new_name, description, attrs
185
- )
@@ -145,23 +145,6 @@ class AbstractMetastore(ABC, Serializable):
145
145
  def list_namespaces(self, conn=None) -> list[Namespace]:
146
146
  """Gets a list of all namespaces"""
147
147
 
148
- @property
149
- @abstractmethod
150
- def is_studio(self) -> bool:
151
- """Returns True if this code is ran in Studio"""
152
-
153
- def is_local_dataset(self, dataset_namespace: str) -> bool:
154
- """
155
- Returns True if this is local dataset i.e. not pulled from Studio but
156
- created locally. This is False if we ran code in CLI mode but using dataset
157
- names that are present in Studio.
158
- """
159
- return self.is_studio or dataset_namespace == Namespace.default()
160
-
161
- @property
162
- def namespace_allowed_to_create(self):
163
- return self.is_studio
164
-
165
148
  #
166
149
  # Projects
167
150
  #
@@ -215,10 +198,6 @@ class AbstractMetastore(ABC, Serializable):
215
198
  def list_projects(self, namespace_id: Optional[int], conn=None) -> list[Project]:
216
199
  """Gets list of projects in some namespace or in general (in all namespaces)"""
217
200
 
218
- @property
219
- def project_allowed_to_create(self):
220
- return self.is_studio
221
-
222
201
  #
223
202
  # Datasets
224
203
  #
@@ -542,10 +542,6 @@ class SQLiteMetastore(AbstractDBMetastore):
542
542
  def _jobs_insert(self) -> "Insert":
543
543
  return sqlite.insert(self._jobs)
544
544
 
545
- @property
546
- def is_studio(self) -> bool:
547
- return False
548
-
549
545
  #
550
546
  # Namespaces
551
547
  #
@@ -9,7 +9,7 @@ from .pandas import read_pandas
9
9
  from .parquet import read_parquet
10
10
  from .records import read_records
11
11
  from .storage import read_storage
12
- from .utils import DatasetMergeError, DatasetPrepareError, Sys
12
+ from .utils import DatasetMergeError, DatasetPrepareError, Sys, is_studio
13
13
  from .values import read_values
14
14
 
15
15
  __all__ = [
@@ -21,6 +21,7 @@ __all__ = [
21
21
  "Sys",
22
22
  "datasets",
23
23
  "delete_dataset",
24
+ "is_studio",
24
25
  "listings",
25
26
  "move_dataset",
26
27
  "read_csv",
@@ -67,6 +67,7 @@ from .utils import (
67
67
  Sys,
68
68
  _get_merge_error_str,
69
69
  _validate_merge_on,
70
+ is_studio,
70
71
  resolve_columns,
71
72
  )
72
73
 
@@ -609,7 +610,7 @@ class DataChain:
609
610
  project = self.session.catalog.metastore.get_project(
610
611
  project_name,
611
612
  namespace_name,
612
- create=self.session.catalog.metastore.project_allowed_to_create,
613
+ create=is_studio(),
613
614
  )
614
615
  except ProjectNotFoundError as e:
615
616
  # not being able to create it as creation is not allowed
@@ -1184,17 +1185,13 @@ class DataChain:
1184
1185
  )
1185
1186
 
1186
1187
  def mutate(self, **kwargs) -> "Self":
1187
- """Create new signals based on existing signals.
1188
-
1189
- This method cannot modify existing columns. If you need to modify an
1190
- existing column, use a different name for the new column and then use
1191
- `select()` to choose which columns to keep.
1188
+ """Create or modify signals based on existing signals.
1192
1189
 
1193
1190
  This method is vectorized and more efficient compared to map(), and it does not
1194
1191
  extract or download any data from the internal database. However, it can only
1195
1192
  utilize predefined built-in functions and their combinations.
1196
1193
 
1197
- The supported functions:
1194
+ Supported functions:
1198
1195
  Numerical: +, -, *, /, rand(), avg(), count(), func(),
1199
1196
  greatest(), least(), max(), min(), sum()
1200
1197
  String: length(), split(), replace(), regexp_replace()
@@ -1221,13 +1218,20 @@ class DataChain:
1221
1218
  ```
1222
1219
 
1223
1220
  This method can be also used to rename signals. If the Column("name") provided
1224
- as value for the new signal - the old column will be dropped. Otherwise a new
1225
- column is created.
1221
+ as value for the new signal - the old signal will be dropped. Otherwise a new
1222
+ signal is created. Exception, if the old signal is nested one (e.g.
1223
+ `C("file.path")`), it will be kept to keep the object intact.
1226
1224
 
1227
1225
  Example:
1228
1226
  ```py
1229
1227
  dc.mutate(
1230
- newkey=Column("oldkey")
1228
+ newkey=Column("oldkey") # drops oldkey
1229
+ )
1230
+ ```
1231
+
1232
+ ```py
1233
+ dc.mutate(
1234
+ size=Column("file.size") # keeps `file.size`
1231
1235
  )
1232
1236
  ```
1233
1237
  """
@@ -1262,8 +1266,10 @@ class DataChain:
1262
1266
  # adding new signal
1263
1267
  mutated[name] = value
1264
1268
 
1269
+ new_schema = schema.mutate(kwargs)
1265
1270
  return self._evolve(
1266
- query=self._query.mutate(**mutated), signal_schema=schema.mutate(kwargs)
1271
+ query=self._query.mutate(new_schema=new_schema, **mutated),
1272
+ signal_schema=new_schema,
1267
1273
  )
1268
1274
 
1269
1275
  @property
@@ -13,7 +13,7 @@ from datachain.lib.signal_schema import SignalSchema
13
13
  from datachain.query import Session
14
14
  from datachain.query.dataset import DatasetQuery
15
15
 
16
- from .utils import Sys
16
+ from .utils import Sys, is_studio
17
17
  from .values import read_values
18
18
 
19
19
  if TYPE_CHECKING:
@@ -343,7 +343,7 @@ def delete_dataset(
343
343
  namespace_name=namespace,
344
344
  )
345
345
 
346
- if not catalog.metastore.is_local_dataset(namespace_name) and studio:
346
+ if not is_studio() and studio:
347
347
  return remove_studio_dataset(
348
348
  None, name, namespace_name, project_name, version=version, force=force
349
349
  )
@@ -418,6 +418,6 @@ def move_dataset(
418
418
  project_id=catalog.metastore.get_project(
419
419
  dest_project,
420
420
  dest_namespace,
421
- create=catalog.metastore.project_allowed_to_create,
421
+ create=is_studio(),
422
422
  ).id,
423
423
  )
@@ -15,6 +15,7 @@ from datachain.func.base import Function
15
15
  from datachain.lib.data_model import DataModel, DataType
16
16
  from datachain.lib.utils import DataChainParamsError
17
17
  from datachain.query.schema import DEFAULT_DELIMITER
18
+ from datachain.utils import getenv_bool
18
19
 
19
20
  if TYPE_CHECKING:
20
21
  from typing_extensions import Concatenate, ParamSpec
@@ -26,6 +27,10 @@ if TYPE_CHECKING:
26
27
  D = TypeVar("D", bound="DataChain")
27
28
 
28
29
 
30
+ def is_studio() -> bool:
31
+ return getenv_bool("DATACHAIN_IS_STUDIO", default=False)
32
+
33
+
29
34
  def resolve_columns(
30
35
  method: "Callable[Concatenate[D, P], D]",
31
36
  ) -> "Callable[Concatenate[D, P], D]":
@@ -28,7 +28,9 @@ def create(
28
28
  """
29
29
  session = Session.get(session)
30
30
 
31
- if not session.catalog.metastore.namespace_allowed_to_create:
31
+ from datachain.lib.dc.utils import is_studio
32
+
33
+ if not is_studio():
32
34
  raise NamespaceCreateNotAllowedError("Creating namespace is not allowed")
33
35
 
34
36
  Namespace.validate_name(name)
@@ -32,7 +32,9 @@ def create(
32
32
  """
33
33
  session = Session.get(session)
34
34
 
35
- if not session.catalog.metastore.project_allowed_to_create:
35
+ from datachain.lib.dc.utils import is_studio
36
+
37
+ if not is_studio():
36
38
  raise ProjectCreateNotAllowedError("Creating project is not allowed")
37
39
 
38
40
  Project.validate_name(name)
@@ -34,7 +34,7 @@ from datachain.lib.data_model import DataModel, DataType, DataValue
34
34
  from datachain.lib.file import File
35
35
  from datachain.lib.model_store import ModelStore
36
36
  from datachain.lib.utils import DataChainParamsError
37
- from datachain.query.schema import DEFAULT_DELIMITER, Column, ColumnMeta
37
+ from datachain.query.schema import DEFAULT_DELIMITER, C, Column, ColumnMeta
38
38
  from datachain.sql.types import SQLType
39
39
 
40
40
  if TYPE_CHECKING:
@@ -680,35 +680,46 @@ class SignalSchema:
680
680
  primitives = (bool, str, int, float)
681
681
 
682
682
  for name, value in args_map.items():
683
+ current_type = None
684
+
685
+ if C.is_nested(name):
686
+ try:
687
+ current_type = self.get_column_type(name)
688
+ except SignalResolvingError as err:
689
+ msg = f"Creating new nested columns directly is not allowed: {name}"
690
+ raise ValueError(msg) from err
691
+
683
692
  if isinstance(value, Column) and value.name in self.values:
684
693
  # renaming existing signal
694
+ # Note: it won't touch nested signals here (e.g. file__path)
695
+ # we don't allow removing nested columns to keep objects consistent
685
696
  del new_values[value.name]
686
697
  new_values[name] = self.values[value.name]
687
- continue
688
- if isinstance(value, Column):
698
+ elif isinstance(value, Column):
689
699
  # adding new signal from existing signal field
690
- try:
691
- new_values[name] = self.get_column_type(
692
- value.name, with_subtree=True
693
- )
694
- continue
695
- except SignalResolvingError:
696
- pass
697
- if isinstance(value, Func):
700
+ new_values[name] = self.get_column_type(value.name, with_subtree=True)
701
+ elif isinstance(value, Func):
698
702
  # adding new signal with function
699
703
  new_values[name] = value.get_result_type(self)
700
- continue
701
- if isinstance(value, primitives):
704
+ elif isinstance(value, primitives):
702
705
  # For primitives, store the type, not the value
703
706
  val = literal(value)
704
707
  val.type = python_to_sql(type(value))()
705
708
  new_values[name] = sql_to_python(val)
706
- continue
707
- if isinstance(value, ColumnElement):
709
+ elif isinstance(value, ColumnElement):
708
710
  # adding new signal
709
711
  new_values[name] = sql_to_python(value)
710
- continue
711
- new_values[name] = value
712
+ else:
713
+ new_values[name] = value
714
+
715
+ if C.is_nested(name):
716
+ if current_type != new_values[name]:
717
+ msg = (
718
+ f"Altering nested column type is not allowed: {name}, "
719
+ f"current type: {current_type}, new type: {new_values[name]}"
720
+ )
721
+ raise ValueError(msg)
722
+ del new_values[name]
712
723
 
713
724
  return SignalSchema(new_values)
714
725
 
@@ -10,7 +10,6 @@ from abc import ABC, abstractmethod
10
10
  from collections.abc import Generator, Iterable, Iterator, Sequence
11
11
  from copy import copy
12
12
  from functools import wraps
13
- from secrets import token_hex
14
13
  from types import GeneratorType
15
14
  from typing import (
16
15
  TYPE_CHECKING,
@@ -29,7 +28,7 @@ from attrs import frozen
29
28
  from fsspec.callbacks import DEFAULT_CALLBACK, Callback, TqdmCallback
30
29
  from sqlalchemy import Column
31
30
  from sqlalchemy.sql import func as f
32
- from sqlalchemy.sql.elements import ColumnClause, ColumnElement
31
+ from sqlalchemy.sql.elements import ColumnClause, ColumnElement, Label
33
32
  from sqlalchemy.sql.expression import label
34
33
  from sqlalchemy.sql.schema import TableClause
35
34
  from sqlalchemy.sql.selectable import Select
@@ -46,6 +45,7 @@ from datachain.dataset import DatasetDependency, DatasetStatus, RowDict
46
45
  from datachain.error import DatasetNotFoundError, QueryScriptCancelError
47
46
  from datachain.func.base import Function
48
47
  from datachain.lib.listing import is_listing_dataset, listing_dataset_expired
48
+ from datachain.lib.signal_schema import SignalSchema
49
49
  from datachain.lib.udf import UDFAdapter, _get_cache
50
50
  from datachain.progress import CombinedDownloadCallback, TqdmCombinedDownloadCallback
51
51
  from datachain.project import Project
@@ -795,28 +795,32 @@ class SQLSelectExcept(SQLClause):
795
795
 
796
796
  @frozen
797
797
  class SQLMutate(SQLClause):
798
- args: tuple[Union[Function, ColumnElement], ...]
798
+ args: tuple[Label, ...]
799
+ new_schema: SignalSchema
799
800
 
800
801
  def apply_sql_clause(self, query: Select) -> Select:
801
802
  original_subquery = query.subquery()
802
- args = [
803
- original_subquery.c[str(c)] if isinstance(c, (str, C)) else c
804
- for c in self.parse_cols(self.args)
805
- ]
806
- to_mutate = {c.name for c in args}
803
+ to_mutate = {c.name for c in self.args}
807
804
 
808
- prefix = f"mutate{token_hex(8)}_"
809
- cols = [
810
- c.label(prefix + c.name) if c.name in to_mutate else c
805
+ # Drop the original versions to avoid name collisions, exclude renamed
806
+ # columns. Always keep system columns (sys__*) if they exist in original query
807
+ new_schema_columns = set(self.new_schema.db_signals())
808
+ base_cols = [
809
+ c
811
810
  for c in original_subquery.c
811
+ if c.name not in to_mutate
812
+ and (c.name in new_schema_columns or c.name.startswith("sys__"))
812
813
  ]
813
- # this is needed for new column to be used in clauses
814
- # like ORDER BY, otherwise new column is not recognized
815
- subquery = (
816
- sqlalchemy.select(*cols, *args).select_from(original_subquery).subquery()
814
+
815
+ # Create intermediate subquery to properly handle window functions
816
+ intermediate_query = sqlalchemy.select(*base_cols, *self.args).select_from(
817
+ original_subquery
817
818
  )
819
+ intermediate_subquery = intermediate_query.subquery()
818
820
 
819
- return sqlalchemy.select(*subquery.c).select_from(subquery)
821
+ return sqlalchemy.select(*intermediate_subquery.c).select_from(
822
+ intermediate_subquery
823
+ )
820
824
 
821
825
 
822
826
  @frozen
@@ -1470,7 +1474,7 @@ class DatasetQuery:
1470
1474
  return query
1471
1475
 
1472
1476
  @detach
1473
- def mutate(self, *args, **kwargs) -> "Self":
1477
+ def mutate(self, *args, new_schema, **kwargs) -> "Self":
1474
1478
  """
1475
1479
  Add new columns to this query.
1476
1480
 
@@ -1482,7 +1486,7 @@ class DatasetQuery:
1482
1486
  """
1483
1487
  query_args = [v.label(k) for k, v in dict(args, **kwargs).items()]
1484
1488
  query = self.clone()
1485
- query.steps.append(SQLMutate((*query_args,)))
1489
+ query.steps.append(SQLMutate((*query_args,), new_schema))
1486
1490
  return query
1487
1491
 
1488
1492
  @detach
@@ -36,6 +36,10 @@ class ColumnMeta(type):
36
36
  def __getattr__(cls, name: str):
37
37
  return cls(ColumnMeta.to_db_name(name))
38
38
 
39
+ @staticmethod
40
+ def is_nested(name: str) -> bool:
41
+ return DEFAULT_DELIMITER in name
42
+
39
43
 
40
44
  class Column(sa.ColumnClause, metaclass=ColumnMeta):
41
45
  inherit_cache: Optional[bool] = True
@@ -531,3 +531,10 @@ def safe_closing(thing: T) -> Iterator[T]:
531
531
  finally:
532
532
  if hasattr(thing, "close"):
533
533
  thing.close()
534
+
535
+
536
+ def getenv_bool(name: str, default: bool = False) -> bool:
537
+ val = os.getenv(name)
538
+ if val is None:
539
+ return default
540
+ return val.lower() in ("1", "true", "yes", "on")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.30.3
3
+ Version: 0.30.4
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -307,6 +307,7 @@ tests/func/test_ls.py
307
307
  tests/func/test_meta_formats.py
308
308
  tests/func/test_metastore.py
309
309
  tests/func/test_metrics.py
310
+ tests/func/test_mutate.py
310
311
  tests/func/test_pull.py
311
312
  tests/func/test_pytorch.py
312
313
  tests/func/test_query.py
@@ -8,7 +8,6 @@ from datetime import datetime
8
8
  from pathlib import PosixPath
9
9
  from time import sleep
10
10
  from typing import NamedTuple
11
- from unittest.mock import PropertyMock, patch
12
11
 
13
12
  import attrs
14
13
  import pytest
@@ -21,7 +20,6 @@ from datachain.catalog import Catalog
21
20
  from datachain.catalog.loader import get_metastore, get_warehouse
22
21
  from datachain.cli.utils import CommaSeparatedArgs
23
22
  from datachain.config import Config, ConfigLevel
24
- from datachain.data_storage.metastore import AbstractMetastore
25
23
  from datachain.data_storage.sqlite import (
26
24
  SQLiteDatabaseEngine,
27
25
  SQLiteMetastore,
@@ -543,42 +541,16 @@ def cloud_test_catalog_tmpfile(
543
541
 
544
542
 
545
543
  @pytest.fixture
546
- def allow_create_project():
544
+ def is_studio():
547
545
  return True
548
546
 
549
547
 
550
- @pytest.fixture
551
- def allow_create_namespace():
552
- return True
553
-
554
-
555
- @pytest.fixture(autouse=True)
556
- def mock_allowed_to_create_project(allow_create_project):
557
- if not allow_create_project:
558
- yield
559
- else:
560
- with patch.object(
561
- AbstractMetastore, "project_allowed_to_create", new_callable=PropertyMock
562
- ) as mock_metastore:
563
- mock_metastore.return_value = True
564
- yield
565
-
566
-
567
548
  @pytest.fixture(autouse=True)
568
- def mock_allowed_to_create_namespace(allow_create_namespace):
569
- if not allow_create_namespace:
549
+ def mock_is_studio(monkeypatch, is_studio):
550
+ if not is_studio:
570
551
  yield
571
552
  else:
572
- with patch.object(
573
- AbstractMetastore, "namespace_allowed_to_create", new_callable=PropertyMock
574
- ) as mock_metastore:
575
- mock_metastore.return_value = True
576
- yield
577
-
578
-
579
- @pytest.fixture
580
- def mock_is_local_dataset():
581
- with patch.object(AbstractMetastore, "is_local_dataset", return_value=True):
553
+ monkeypatch.setenv("DATACHAIN_IS_STUDIO", True)
582
554
  yield
583
555
 
584
556