datachain 0.22.0__tar.gz → 0.23.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (397) hide show
  1. {datachain-0.22.0 → datachain-0.23.0}/PKG-INFO +1 -1
  2. {datachain-0.22.0 → datachain-0.23.0}/docs/guide/env.md +4 -0
  3. {datachain-0.22.0 → datachain-0.23.0}/docs/guide/namespaces.md +43 -1
  4. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/catalog/catalog.py +33 -0
  5. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/commands/datasets.py +4 -10
  6. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/data_storage/metastore.py +13 -2
  7. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/data_storage/sqlite.py +6 -2
  8. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/dataset.py +3 -1
  9. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/datachain.py +6 -12
  10. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/datasets.py +8 -9
  11. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/records.py +1 -1
  12. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/signal_schema.py +8 -0
  13. {datachain-0.22.0 → datachain-0.23.0}/src/datachain.egg-info/PKG-INFO +1 -1
  14. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_datachain.py +46 -0
  15. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_datachain.py +111 -1
  16. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_dataset.py +1 -1
  17. {datachain-0.22.0 → datachain-0.23.0}/.cruft.json +0 -0
  18. {datachain-0.22.0 → datachain-0.23.0}/.gitattributes +0 -0
  19. {datachain-0.22.0 → datachain-0.23.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  20. {datachain-0.22.0 → datachain-0.23.0}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  21. {datachain-0.22.0 → datachain-0.23.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  22. {datachain-0.22.0 → datachain-0.23.0}/.github/codecov.yaml +0 -0
  23. {datachain-0.22.0 → datachain-0.23.0}/.github/dependabot.yml +0 -0
  24. {datachain-0.22.0 → datachain-0.23.0}/.github/workflows/benchmarks.yml +0 -0
  25. {datachain-0.22.0 → datachain-0.23.0}/.github/workflows/release.yml +0 -0
  26. {datachain-0.22.0 → datachain-0.23.0}/.github/workflows/tests-studio.yml +0 -0
  27. {datachain-0.22.0 → datachain-0.23.0}/.github/workflows/tests.yml +0 -0
  28. {datachain-0.22.0 → datachain-0.23.0}/.github/workflows/update-template.yaml +0 -0
  29. {datachain-0.22.0 → datachain-0.23.0}/.gitignore +0 -0
  30. {datachain-0.22.0 → datachain-0.23.0}/.pre-commit-config.yaml +0 -0
  31. {datachain-0.22.0 → datachain-0.23.0}/CODE_OF_CONDUCT.rst +0 -0
  32. {datachain-0.22.0 → datachain-0.23.0}/LICENSE +0 -0
  33. {datachain-0.22.0 → datachain-0.23.0}/README.rst +0 -0
  34. {datachain-0.22.0 → datachain-0.23.0}/docs/assets/captioned_cartoons.png +0 -0
  35. {datachain-0.22.0 → datachain-0.23.0}/docs/assets/datachain-white.svg +0 -0
  36. {datachain-0.22.0 → datachain-0.23.0}/docs/assets/datachain.svg +0 -0
  37. {datachain-0.22.0 → datachain-0.23.0}/docs/commands/auth/login.md +0 -0
  38. {datachain-0.22.0 → datachain-0.23.0}/docs/commands/auth/logout.md +0 -0
  39. {datachain-0.22.0 → datachain-0.23.0}/docs/commands/auth/team.md +0 -0
  40. {datachain-0.22.0 → datachain-0.23.0}/docs/commands/auth/token.md +0 -0
  41. {datachain-0.22.0 → datachain-0.23.0}/docs/commands/index.md +0 -0
  42. {datachain-0.22.0 → datachain-0.23.0}/docs/commands/job/cancel.md +0 -0
  43. {datachain-0.22.0 → datachain-0.23.0}/docs/commands/job/clusters.md +0 -0
  44. {datachain-0.22.0 → datachain-0.23.0}/docs/commands/job/logs.md +0 -0
  45. {datachain-0.22.0 → datachain-0.23.0}/docs/commands/job/ls.md +0 -0
  46. {datachain-0.22.0 → datachain-0.23.0}/docs/commands/job/run.md +0 -0
  47. {datachain-0.22.0 → datachain-0.23.0}/docs/contributing.md +0 -0
  48. {datachain-0.22.0 → datachain-0.23.0}/docs/css/github-permalink-style.css +0 -0
  49. {datachain-0.22.0 → datachain-0.23.0}/docs/examples.md +0 -0
  50. {datachain-0.22.0 → datachain-0.23.0}/docs/guide/db_migrations.md +0 -0
  51. {datachain-0.22.0 → datachain-0.23.0}/docs/guide/delta.md +0 -0
  52. {datachain-0.22.0 → datachain-0.23.0}/docs/guide/index.md +0 -0
  53. {datachain-0.22.0 → datachain-0.23.0}/docs/guide/processing.md +0 -0
  54. {datachain-0.22.0 → datachain-0.23.0}/docs/guide/remotes.md +0 -0
  55. {datachain-0.22.0 → datachain-0.23.0}/docs/guide/retry.md +0 -0
  56. {datachain-0.22.0 → datachain-0.23.0}/docs/index.md +0 -0
  57. {datachain-0.22.0 → datachain-0.23.0}/docs/overrides/main.html +0 -0
  58. {datachain-0.22.0 → datachain-0.23.0}/docs/quick-start.md +0 -0
  59. {datachain-0.22.0 → datachain-0.23.0}/docs/references/data-types/arrowrow.md +0 -0
  60. {datachain-0.22.0 → datachain-0.23.0}/docs/references/data-types/bbox.md +0 -0
  61. {datachain-0.22.0 → datachain-0.23.0}/docs/references/data-types/file.md +0 -0
  62. {datachain-0.22.0 → datachain-0.23.0}/docs/references/data-types/imagefile.md +0 -0
  63. {datachain-0.22.0 → datachain-0.23.0}/docs/references/data-types/index.md +0 -0
  64. {datachain-0.22.0 → datachain-0.23.0}/docs/references/data-types/pose.md +0 -0
  65. {datachain-0.22.0 → datachain-0.23.0}/docs/references/data-types/segment.md +0 -0
  66. {datachain-0.22.0 → datachain-0.23.0}/docs/references/data-types/tarvfile.md +0 -0
  67. {datachain-0.22.0 → datachain-0.23.0}/docs/references/data-types/textfile.md +0 -0
  68. {datachain-0.22.0 → datachain-0.23.0}/docs/references/data-types/videofile.md +0 -0
  69. {datachain-0.22.0 → datachain-0.23.0}/docs/references/datachain.md +0 -0
  70. {datachain-0.22.0 → datachain-0.23.0}/docs/references/func.md +0 -0
  71. {datachain-0.22.0 → datachain-0.23.0}/docs/references/index.md +0 -0
  72. {datachain-0.22.0 → datachain-0.23.0}/docs/references/toolkit.md +0 -0
  73. {datachain-0.22.0 → datachain-0.23.0}/docs/references/torch.md +0 -0
  74. {datachain-0.22.0 → datachain-0.23.0}/docs/references/udf.md +0 -0
  75. {datachain-0.22.0 → datachain-0.23.0}/docs/tutorials.md +0 -0
  76. {datachain-0.22.0 → datachain-0.23.0}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  77. {datachain-0.22.0 → datachain-0.23.0}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  78. {datachain-0.22.0 → datachain-0.23.0}/examples/computer_vision/openimage-detect.py +0 -0
  79. {datachain-0.22.0 → datachain-0.23.0}/examples/computer_vision/ultralytics-bbox.py +0 -0
  80. {datachain-0.22.0 → datachain-0.23.0}/examples/computer_vision/ultralytics-pose.py +0 -0
  81. {datachain-0.22.0 → datachain-0.23.0}/examples/computer_vision/ultralytics-segment.py +0 -0
  82. {datachain-0.22.0 → datachain-0.23.0}/examples/get_started/common_sql_functions.py +0 -0
  83. {datachain-0.22.0 → datachain-0.23.0}/examples/get_started/json-csv-reader.py +0 -0
  84. {datachain-0.22.0 → datachain-0.23.0}/examples/get_started/torch-loader.py +0 -0
  85. {datachain-0.22.0 → datachain-0.23.0}/examples/get_started/udfs/parallel.py +0 -0
  86. {datachain-0.22.0 → datachain-0.23.0}/examples/get_started/udfs/simple.py +0 -0
  87. {datachain-0.22.0 → datachain-0.23.0}/examples/get_started/udfs/stateful.py +0 -0
  88. {datachain-0.22.0 → datachain-0.23.0}/examples/incremental_processing/delta.py +0 -0
  89. {datachain-0.22.0 → datachain-0.23.0}/examples/incremental_processing/retry.py +0 -0
  90. {datachain-0.22.0 → datachain-0.23.0}/examples/incremental_processing/utils.py +0 -0
  91. {datachain-0.22.0 → datachain-0.23.0}/examples/llm_and_nlp/claude-query.py +0 -0
  92. {datachain-0.22.0 → datachain-0.23.0}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  93. {datachain-0.22.0 → datachain-0.23.0}/examples/multimodal/clip_inference.py +0 -0
  94. {datachain-0.22.0 → datachain-0.23.0}/examples/multimodal/hf_pipeline.py +0 -0
  95. {datachain-0.22.0 → datachain-0.23.0}/examples/multimodal/openai_image_desc_lib.py +0 -0
  96. {datachain-0.22.0 → datachain-0.23.0}/examples/multimodal/wds.py +0 -0
  97. {datachain-0.22.0 → datachain-0.23.0}/examples/multimodal/wds_filtered.py +0 -0
  98. {datachain-0.22.0 → datachain-0.23.0}/mkdocs.yml +0 -0
  99. {datachain-0.22.0 → datachain-0.23.0}/noxfile.py +0 -0
  100. {datachain-0.22.0 → datachain-0.23.0}/pyproject.toml +0 -0
  101. {datachain-0.22.0 → datachain-0.23.0}/setup.cfg +0 -0
  102. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/__init__.py +0 -0
  103. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/__main__.py +0 -0
  104. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/asyn.py +0 -0
  105. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cache.py +0 -0
  106. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/catalog/__init__.py +0 -0
  107. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/catalog/datasource.py +0 -0
  108. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/catalog/loader.py +0 -0
  109. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/__init__.py +0 -0
  110. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/commands/__init__.py +0 -0
  111. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/commands/du.py +0 -0
  112. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/commands/index.py +0 -0
  113. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/commands/ls.py +0 -0
  114. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/commands/misc.py +0 -0
  115. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/commands/query.py +0 -0
  116. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/commands/show.py +0 -0
  117. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/parser/__init__.py +0 -0
  118. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/parser/job.py +0 -0
  119. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/parser/studio.py +0 -0
  120. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/parser/utils.py +0 -0
  121. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/utils.py +0 -0
  122. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/client/__init__.py +0 -0
  123. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/client/azure.py +0 -0
  124. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/client/fileslice.py +0 -0
  125. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/client/fsspec.py +0 -0
  126. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/client/gcs.py +0 -0
  127. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/client/hf.py +0 -0
  128. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/client/local.py +0 -0
  129. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/client/s3.py +0 -0
  130. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/config.py +0 -0
  131. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/data_storage/__init__.py +0 -0
  132. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/data_storage/db_engine.py +0 -0
  133. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/data_storage/job.py +0 -0
  134. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/data_storage/schema.py +0 -0
  135. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/data_storage/serializer.py +0 -0
  136. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/data_storage/warehouse.py +0 -0
  137. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/delta.py +0 -0
  138. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/diff/__init__.py +0 -0
  139. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/error.py +0 -0
  140. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/fs/__init__.py +0 -0
  141. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/fs/reference.py +0 -0
  142. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/fs/utils.py +0 -0
  143. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/func/__init__.py +0 -0
  144. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/func/aggregate.py +0 -0
  145. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/func/array.py +0 -0
  146. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/func/base.py +0 -0
  147. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/func/conditional.py +0 -0
  148. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/func/func.py +0 -0
  149. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/func/numeric.py +0 -0
  150. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/func/path.py +0 -0
  151. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/func/random.py +0 -0
  152. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/func/string.py +0 -0
  153. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/func/window.py +0 -0
  154. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/job.py +0 -0
  155. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/__init__.py +0 -0
  156. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/arrow.py +0 -0
  157. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/clip.py +0 -0
  158. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/convert/__init__.py +0 -0
  159. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/convert/flatten.py +0 -0
  160. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/convert/python_to_sql.py +0 -0
  161. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/convert/sql_to_python.py +0 -0
  162. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/convert/unflatten.py +0 -0
  163. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  164. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/data_model.py +0 -0
  165. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dataset_info.py +0 -0
  166. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/__init__.py +0 -0
  167. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/csv.py +0 -0
  168. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/database.py +0 -0
  169. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/hf.py +0 -0
  170. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/json.py +0 -0
  171. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/listings.py +0 -0
  172. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/pandas.py +0 -0
  173. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/parquet.py +0 -0
  174. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/storage.py +0 -0
  175. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/utils.py +0 -0
  176. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/values.py +0 -0
  177. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/file.py +0 -0
  178. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/hf.py +0 -0
  179. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/image.py +0 -0
  180. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/listing.py +0 -0
  181. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/listing_info.py +0 -0
  182. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/meta_formats.py +0 -0
  183. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/model_store.py +0 -0
  184. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/namespaces.py +0 -0
  185. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/projects.py +0 -0
  186. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/pytorch.py +0 -0
  187. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/settings.py +0 -0
  188. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/tar.py +0 -0
  189. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/text.py +0 -0
  190. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/udf.py +0 -0
  191. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/udf_signature.py +0 -0
  192. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/utils.py +0 -0
  193. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/video.py +0 -0
  194. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/webdataset.py +0 -0
  195. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/webdataset_laion.py +0 -0
  196. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/listing.py +0 -0
  197. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/model/__init__.py +0 -0
  198. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/model/bbox.py +0 -0
  199. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/model/pose.py +0 -0
  200. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/model/segment.py +0 -0
  201. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/model/ultralytics/__init__.py +0 -0
  202. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/model/ultralytics/bbox.py +0 -0
  203. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/model/ultralytics/pose.py +0 -0
  204. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/model/ultralytics/segment.py +0 -0
  205. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/model/utils.py +0 -0
  206. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/namespace.py +0 -0
  207. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/node.py +0 -0
  208. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/nodes_fetcher.py +0 -0
  209. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/nodes_thread_pool.py +0 -0
  210. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/progress.py +0 -0
  211. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/project.py +0 -0
  212. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/py.typed +0 -0
  213. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/query/__init__.py +0 -0
  214. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/query/batch.py +0 -0
  215. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/query/dataset.py +0 -0
  216. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/query/dispatch.py +0 -0
  217. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/query/metrics.py +0 -0
  218. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/query/params.py +0 -0
  219. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/query/queue.py +0 -0
  220. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/query/schema.py +0 -0
  221. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/query/session.py +0 -0
  222. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/query/udf.py +0 -0
  223. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/query/utils.py +0 -0
  224. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/remote/__init__.py +0 -0
  225. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/remote/studio.py +0 -0
  226. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/script_meta.py +0 -0
  227. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/semver.py +0 -0
  228. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/__init__.py +0 -0
  229. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/default/__init__.py +0 -0
  230. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/default/base.py +0 -0
  231. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/functions/__init__.py +0 -0
  232. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/functions/aggregate.py +0 -0
  233. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/functions/array.py +0 -0
  234. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/functions/conditional.py +0 -0
  235. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/functions/numeric.py +0 -0
  236. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/functions/path.py +0 -0
  237. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/functions/random.py +0 -0
  238. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/functions/string.py +0 -0
  239. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/selectable.py +0 -0
  240. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/sqlite/__init__.py +0 -0
  241. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/sqlite/base.py +0 -0
  242. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/sqlite/types.py +0 -0
  243. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/sqlite/vector.py +0 -0
  244. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/types.py +0 -0
  245. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/utils.py +0 -0
  246. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/studio.py +0 -0
  247. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/telemetry.py +0 -0
  248. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/toolkit/__init__.py +0 -0
  249. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/toolkit/split.py +0 -0
  250. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/torch/__init__.py +0 -0
  251. {datachain-0.22.0 → datachain-0.23.0}/src/datachain/utils.py +0 -0
  252. {datachain-0.22.0 → datachain-0.23.0}/src/datachain.egg-info/SOURCES.txt +0 -0
  253. {datachain-0.22.0 → datachain-0.23.0}/src/datachain.egg-info/dependency_links.txt +0 -0
  254. {datachain-0.22.0 → datachain-0.23.0}/src/datachain.egg-info/entry_points.txt +0 -0
  255. {datachain-0.22.0 → datachain-0.23.0}/src/datachain.egg-info/requires.txt +0 -0
  256. {datachain-0.22.0 → datachain-0.23.0}/src/datachain.egg-info/top_level.txt +0 -0
  257. {datachain-0.22.0 → datachain-0.23.0}/tests/__init__.py +0 -0
  258. {datachain-0.22.0 → datachain-0.23.0}/tests/benchmarks/__init__.py +0 -0
  259. {datachain-0.22.0 → datachain-0.23.0}/tests/benchmarks/conftest.py +0 -0
  260. {datachain-0.22.0 → datachain-0.23.0}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  261. {datachain-0.22.0 → datachain-0.23.0}/tests/benchmarks/datasets/.dvc/config +0 -0
  262. {datachain-0.22.0 → datachain-0.23.0}/tests/benchmarks/datasets/.gitignore +0 -0
  263. {datachain-0.22.0 → datachain-0.23.0}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  264. {datachain-0.22.0 → datachain-0.23.0}/tests/benchmarks/test_datachain.py +0 -0
  265. {datachain-0.22.0 → datachain-0.23.0}/tests/benchmarks/test_ls.py +0 -0
  266. {datachain-0.22.0 → datachain-0.23.0}/tests/benchmarks/test_version.py +0 -0
  267. {datachain-0.22.0 → datachain-0.23.0}/tests/conftest.py +0 -0
  268. {datachain-0.22.0 → datachain-0.23.0}/tests/data.py +0 -0
  269. {datachain-0.22.0 → datachain-0.23.0}/tests/examples/__init__.py +0 -0
  270. {datachain-0.22.0 → datachain-0.23.0}/tests/examples/test_examples.py +0 -0
  271. {datachain-0.22.0 → datachain-0.23.0}/tests/examples/test_wds_e2e.py +0 -0
  272. {datachain-0.22.0 → datachain-0.23.0}/tests/examples/wds_data.py +0 -0
  273. {datachain-0.22.0 → datachain-0.23.0}/tests/func/__init__.py +0 -0
  274. {datachain-0.22.0 → datachain-0.23.0}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  275. {datachain-0.22.0 → datachain-0.23.0}/tests/func/data/lena.jpg +0 -0
  276. {datachain-0.22.0 → datachain-0.23.0}/tests/func/fake-service-account-credentials.json +0 -0
  277. {datachain-0.22.0 → datachain-0.23.0}/tests/func/functions/__init__.py +0 -0
  278. {datachain-0.22.0 → datachain-0.23.0}/tests/func/functions/test_aggregate.py +0 -0
  279. {datachain-0.22.0 → datachain-0.23.0}/tests/func/functions/test_array.py +0 -0
  280. {datachain-0.22.0 → datachain-0.23.0}/tests/func/functions/test_conditional.py +0 -0
  281. {datachain-0.22.0 → datachain-0.23.0}/tests/func/functions/test_numeric.py +0 -0
  282. {datachain-0.22.0 → datachain-0.23.0}/tests/func/functions/test_path.py +0 -0
  283. {datachain-0.22.0 → datachain-0.23.0}/tests/func/functions/test_random.py +0 -0
  284. {datachain-0.22.0 → datachain-0.23.0}/tests/func/functions/test_string.py +0 -0
  285. {datachain-0.22.0 → datachain-0.23.0}/tests/func/model/__init__.py +0 -0
  286. {datachain-0.22.0 → datachain-0.23.0}/tests/func/model/data/running-mask0.png +0 -0
  287. {datachain-0.22.0 → datachain-0.23.0}/tests/func/model/data/running-mask1.png +0 -0
  288. {datachain-0.22.0 → datachain-0.23.0}/tests/func/model/data/running.jpg +0 -0
  289. {datachain-0.22.0 → datachain-0.23.0}/tests/func/model/data/ships.jpg +0 -0
  290. {datachain-0.22.0 → datachain-0.23.0}/tests/func/model/test_yolo.py +0 -0
  291. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_batching.py +0 -0
  292. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_catalog.py +0 -0
  293. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_client.py +0 -0
  294. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_cloud_transfer.py +0 -0
  295. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_data_storage.py +0 -0
  296. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_datachain_merge.py +0 -0
  297. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_dataset_query.py +0 -0
  298. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_datasets.py +0 -0
  299. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_delta.py +0 -0
  300. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_feature_pickling.py +0 -0
  301. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_file.py +0 -0
  302. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_hf.py +0 -0
  303. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_hidden_field.py +0 -0
  304. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_image.py +0 -0
  305. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_listing.py +0 -0
  306. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_ls.py +0 -0
  307. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_meta_formats.py +0 -0
  308. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_metastore.py +0 -0
  309. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_metrics.py +0 -0
  310. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_pull.py +0 -0
  311. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_pytorch.py +0 -0
  312. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_query.py +0 -0
  313. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_read_database.py +0 -0
  314. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_retry.py +0 -0
  315. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_session.py +0 -0
  316. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_toolkit.py +0 -0
  317. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_video.py +0 -0
  318. {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_warehouse.py +0 -0
  319. {datachain-0.22.0 → datachain-0.23.0}/tests/scripts/feature_class.py +0 -0
  320. {datachain-0.22.0 → datachain-0.23.0}/tests/scripts/feature_class_exception.py +0 -0
  321. {datachain-0.22.0 → datachain-0.23.0}/tests/scripts/feature_class_parallel.py +0 -0
  322. {datachain-0.22.0 → datachain-0.23.0}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  323. {datachain-0.22.0 → datachain-0.23.0}/tests/scripts/name_len_slow.py +0 -0
  324. {datachain-0.22.0 → datachain-0.23.0}/tests/test_atomicity.py +0 -0
  325. {datachain-0.22.0 → datachain-0.23.0}/tests/test_cli_e2e.py +0 -0
  326. {datachain-0.22.0 → datachain-0.23.0}/tests/test_cli_studio.py +0 -0
  327. {datachain-0.22.0 → datachain-0.23.0}/tests/test_import_time.py +0 -0
  328. {datachain-0.22.0 → datachain-0.23.0}/tests/test_query_e2e.py +0 -0
  329. {datachain-0.22.0 → datachain-0.23.0}/tests/test_telemetry.py +0 -0
  330. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/__init__.py +0 -0
  331. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/__init__.py +0 -0
  332. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/conftest.py +0 -0
  333. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_arrow.py +0 -0
  334. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_clip.py +0 -0
  335. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  336. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_datachain_merge.py +0 -0
  337. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_diff.py +0 -0
  338. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_feature.py +0 -0
  339. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_feature_utils.py +0 -0
  340. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_file.py +0 -0
  341. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_hf.py +0 -0
  342. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_image.py +0 -0
  343. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_listing_info.py +0 -0
  344. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_namespace.py +0 -0
  345. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_project.py +0 -0
  346. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_python_to_sql.py +0 -0
  347. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_schema.py +0 -0
  348. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_signal_schema.py +0 -0
  349. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_sql_to_python.py +0 -0
  350. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_text.py +0 -0
  351. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_udf.py +0 -0
  352. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_udf_signature.py +0 -0
  353. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_utils.py +0 -0
  354. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_webdataset.py +0 -0
  355. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/model/__init__.py +0 -0
  356. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/model/test_bbox.py +0 -0
  357. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/model/test_pose.py +0 -0
  358. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/model/test_segment.py +0 -0
  359. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/model/test_utils.py +0 -0
  360. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/sql/__init__.py +0 -0
  361. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/sql/sqlite/__init__.py +0 -0
  362. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/sql/sqlite/test_types.py +0 -0
  363. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/sql/sqlite/test_utils.py +0 -0
  364. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/sql/test_array.py +0 -0
  365. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/sql/test_conditional.py +0 -0
  366. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/sql/test_path.py +0 -0
  367. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/sql/test_random.py +0 -0
  368. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/sql/test_selectable.py +0 -0
  369. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/sql/test_string.py +0 -0
  370. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_asyn.py +0 -0
  371. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_cache.py +0 -0
  372. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_catalog.py +0 -0
  373. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_catalog_loader.py +0 -0
  374. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_cli_parsing.py +0 -0
  375. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_client.py +0 -0
  376. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_client_gcs.py +0 -0
  377. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_client_s3.py +0 -0
  378. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_config.py +0 -0
  379. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_data_storage.py +0 -0
  380. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_database_engine.py +0 -0
  381. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_dispatch.py +0 -0
  382. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_fileslice.py +0 -0
  383. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_func.py +0 -0
  384. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_listing.py +0 -0
  385. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_metastore.py +0 -0
  386. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_module_exports.py +0 -0
  387. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_pytorch.py +0 -0
  388. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_query.py +0 -0
  389. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_query_metrics.py +0 -0
  390. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_query_params.py +0 -0
  391. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_script_meta.py +0 -0
  392. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_semver.py +0 -0
  393. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_serializer.py +0 -0
  394. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_session.py +0 -0
  395. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_utils.py +0 -0
  396. {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_warehouse.py +0 -0
  397. {datachain-0.22.0 → datachain-0.23.0}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.22.0
3
+ Version: 0.23.0
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -15,4 +15,8 @@ List of environment variables used to configure DataChain behavior.
15
15
  - `DATACHAIN_STUDIO_TOKEN` – Authentication token for Studio.
16
16
  - `DATACHAIN_STUDIO_TEAM` – Studio team name.
17
17
 
18
+ ### Namespaces and projects
19
+ - `DATACHAIN_NAMESPACE` – Namespace name to use as default.
20
+ - `DATACHAIN_PROJECT` – Project name or combination of namespace name and project name separated by `.` to use as default, example: `DATACHAIN_PROJECT=dev.analytics`
21
+
18
22
  Note: Some environment variables are used internally and may not be documented here. For the most up-to-date list, refer to the source code.
@@ -82,6 +82,49 @@ This is equivalent to saving to `dev.analytics.metrics`.
82
82
 
83
83
  In CLI, `.settings()` is only supported when both `namespace` and `project` are set to `"local"`.
84
84
 
85
+ ## Setting Namespace and Project via Environment Variables
86
+
87
+ In addition to using `.settings()`, you can configure the namespace and project using environment variables:
88
+
89
+ - `DATACHAIN_NAMESPACE` sets the namespace.
90
+ - `DATACHAIN_PROJECT` sets the project name, or both the namespace and project using the format `namespace.project`.
91
+
92
+ ### Examples
93
+
94
+ ```
95
+ # Set namespace only
96
+ export DATACHAIN_NAMESPACE=dev
97
+
98
+ # Set project only
99
+ export DATACHAIN_PROJECT=analytics
100
+
101
+ # Set both namespace and project
102
+ export DATACHAIN_PROJECT=dev.analytics
103
+ ```
104
+
105
+ ## How Namespace and Project Are Resolved
106
+
107
+ When determining which namespace and project to use, Datachain applies the following precedence:
108
+
109
+ 1. **Fully qualified dataset name**
110
+ If the dataset name includes both the namespace and project, these values take highest precedence.
111
+ ```python
112
+ dc.read_dataset("dev.analytics.metrics")
113
+
114
+ 2. **Explicit settings in code**
115
+ Values provided via `.settings()` or passed directly to `read_dataset()` or similar methods.
116
+ ```python
117
+ dc.settings(namespace="dev", project="analytics")
118
+ dc.read_dataset("metrics", namespace="dev", project="analytics")
119
+ ```
120
+ 3. **Environment variables**
121
+ Namespace and project set using environment variables:
122
+ ```console
123
+ export DATACHAIN_PROJECT=dev.analytics
124
+ ```
125
+ 4. **Defaults**
126
+ If none of the above are provided, Datachain falls back to the default namespace and project.
127
+
85
128
  ## Reading a Dataset from a Project
86
129
 
87
130
  To read a dataset from a specific namespace and project:
@@ -116,4 +159,3 @@ dc.read_values(scores=[0.8, 1.5, 2.1]).save("metrics")
116
159
 
117
160
  ds = dc.read_dataset("local.local.metrics")
118
161
  ds.show()
119
- ```
@@ -1059,6 +1059,39 @@ class Catalog:
1059
1059
 
1060
1060
  return self.get_dataset(name, project)
1061
1061
 
1062
+ def get_full_dataset_name(
1063
+ self,
1064
+ name: str,
1065
+ project_name: Optional[str] = None,
1066
+ namespace_name: Optional[str] = None,
1067
+ ) -> tuple[str, str, str]:
1068
+ """
1069
+ Returns dataset name together with separated namespace and project name.
1070
+ It takes into account all the ways namespace and project can be added.
1071
+ """
1072
+ parsed_namespace_name, parsed_project_name, name = parse_dataset_name(name)
1073
+
1074
+ namespace_env = os.environ.get("DATACHAIN_NAMESPACE")
1075
+ project_env = os.environ.get("DATACHAIN_PROJECT")
1076
+ if project_env and len(project_env.split(".")) == 2:
1077
+ # we allow setting both namespace and project in DATACHAIN_PROJECT
1078
+ namespace_env, project_env = project_env.split(".")
1079
+
1080
+ namespace_name = (
1081
+ parsed_namespace_name
1082
+ or namespace_name
1083
+ or namespace_env
1084
+ or self.metastore.default_namespace_name
1085
+ )
1086
+ project_name = (
1087
+ parsed_project_name
1088
+ or project_name
1089
+ or project_env
1090
+ or self.metastore.default_project_name
1091
+ )
1092
+
1093
+ return namespace_name, project_name, name
1094
+
1062
1095
  def get_dataset(
1063
1096
  self, name: str, project: Optional[Project] = None
1064
1097
  ) -> DatasetRecord:
@@ -8,7 +8,6 @@ if TYPE_CHECKING:
8
8
 
9
9
  from datachain.cli.utils import determine_flavors
10
10
  from datachain.config import Config
11
- from datachain.dataset import parse_dataset_name
12
11
  from datachain.error import DataChainError, DatasetNotFoundError
13
12
  from datachain.studio import list_datasets as list_datasets_studio
14
13
 
@@ -106,9 +105,8 @@ def list_datasets_local(catalog: "Catalog", name: Optional[str] = None):
106
105
 
107
106
 
108
107
  def list_datasets_local_versions(catalog: "Catalog", name: str):
109
- namespace_name, project_name, name = parse_dataset_name(name)
110
- namespace_name = namespace_name or catalog.metastore.default_namespace_name
111
- project_name = project_name or catalog.metastore.default_project_name
108
+ namespace_name, project_name, name = catalog.get_full_dataset_name(name)
109
+
112
110
  project = catalog.metastore.get_project(project_name, namespace_name)
113
111
  ds = catalog.get_dataset(name, project)
114
112
  for v in ds.versions:
@@ -137,9 +135,7 @@ def rm_dataset(
137
135
  studio: Optional[bool] = False,
138
136
  team: Optional[str] = None,
139
137
  ):
140
- namespace_name, project_name, name = parse_dataset_name(name)
141
- namespace_name = namespace_name or catalog.metastore.default_namespace_name
142
- project_name = project_name or catalog.metastore.default_project_name
138
+ namespace_name, project_name, name = catalog.get_full_dataset_name(name)
143
139
 
144
140
  if not catalog.metastore.is_local_dataset(namespace_name) and studio:
145
141
  from datachain.studio import remove_studio_dataset
@@ -166,9 +162,7 @@ def edit_dataset(
166
162
  attrs: Optional[list[str]] = None,
167
163
  team: Optional[str] = None,
168
164
  ):
169
- namespace_name, project_name, name = parse_dataset_name(name)
170
- namespace_name = namespace_name or catalog.metastore.default_namespace_name
171
- project_name = project_name or catalog.metastore.default_project_name
165
+ namespace_name, project_name, name = catalog.get_full_dataset_name(name)
172
166
 
173
167
  if catalog.metastore.is_local_dataset(namespace_name):
174
168
  try:
@@ -132,6 +132,7 @@ class AbstractMetastore(ABC, Serializable):
132
132
  description: Optional[str] = None,
133
133
  uuid: Optional[str] = None,
134
134
  ignore_if_exists: bool = True,
135
+ validate: bool = True,
135
136
  **kwargs,
136
137
  ) -> Namespace:
137
138
  """Creates new namespace"""
@@ -192,6 +193,7 @@ class AbstractMetastore(ABC, Serializable):
192
193
  description: Optional[str] = None,
193
194
  uuid: Optional[str] = None,
194
195
  ignore_if_exists: bool = True,
196
+ validate: bool = True,
195
197
  **kwargs,
196
198
  ) -> Project:
197
199
  """Creates new project in specific namespace"""
@@ -725,8 +727,11 @@ class AbstractDBMetastore(AbstractMetastore):
725
727
  description: Optional[str] = None,
726
728
  uuid: Optional[str] = None,
727
729
  ignore_if_exists: bool = True,
730
+ validate: bool = True,
728
731
  **kwargs,
729
732
  ) -> Namespace:
733
+ if validate:
734
+ Namespace.validate_name(name)
730
735
  query = self._namespaces_insert().values(
731
736
  name=name,
732
737
  uuid=uuid or str(uuid4()),
@@ -775,12 +780,15 @@ class AbstractDBMetastore(AbstractMetastore):
775
780
  description: Optional[str] = None,
776
781
  uuid: Optional[str] = None,
777
782
  ignore_if_exists: bool = True,
783
+ validate: bool = True,
778
784
  **kwargs,
779
785
  ) -> Project:
786
+ if validate:
787
+ Project.validate_name(name)
780
788
  try:
781
789
  namespace = self.get_namespace(namespace_name)
782
790
  except NamespaceNotFoundError:
783
- namespace = self.create_namespace(namespace_name)
791
+ namespace = self.create_namespace(namespace_name, validate=validate)
784
792
 
785
793
  query = self._projects_insert().values(
786
794
  namespace_id=namespace.id,
@@ -817,11 +825,14 @@ class AbstractDBMetastore(AbstractMetastore):
817
825
  """Gets a single project inside some namespace by name"""
818
826
  n = self._namespaces
819
827
  p = self._projects
828
+ validate = True
829
+
820
830
  if self._is_listing_project(name, namespace_name) or self._is_default_project(
821
831
  name, namespace_name
822
832
  ):
823
833
  # we are always creating default and listing projects if they don't exist
824
834
  create = True
835
+ validate = False
825
836
 
826
837
  query = self._projects_select(
827
838
  *(getattr(n.c, f) for f in self._namespaces_fields),
@@ -834,7 +845,7 @@ class AbstractDBMetastore(AbstractMetastore):
834
845
  rows = list(self.db.execute(query, conn=conn))
835
846
  if not rows:
836
847
  if create:
837
- return self.create_project(namespace_name, name)
848
+ return self.create_project(namespace_name, name, validate=validate)
838
849
  raise ProjectNotFoundError(
839
850
  f"Project {name} in namespace {namespace_name} not found."
840
851
  )
@@ -468,8 +468,12 @@ class SQLiteMetastore(AbstractDBMetastore):
468
468
  be created implicitly though, to keep the same fully qualified name with
469
469
  Studio dataset.
470
470
  """
471
- system_namespace = self.create_namespace(Namespace.system(), "System namespace")
472
- self.create_project(system_namespace.name, Project.listing(), "Listing project")
471
+ system_namespace = self.create_namespace(
472
+ Namespace.system(), "System namespace", validate=False
473
+ )
474
+ self.create_project(
475
+ system_namespace.name, Project.listing(), "Listing project", validate=False
476
+ )
473
477
 
474
478
  def _check_schema_version(self) -> None:
475
479
  """
@@ -81,8 +81,10 @@ def create_dataset_uri(
81
81
  def parse_dataset_name(name: str) -> tuple[Optional[str], Optional[str], str]:
82
82
  """Parses dataset name and returns namespace, project and name"""
83
83
  if not name:
84
- raise ValueError("Name must be defined to parse it")
84
+ raise InvalidDatasetNameError("Name must be defined to parse it")
85
85
  split = name.split(".")
86
+ if len(split) > 3:
87
+ raise InvalidDatasetNameError(f"Invalid dataset name {name}")
86
88
  name = split[-1]
87
89
  project_name = split[-2] if len(split) > 1 else None
88
90
  namespace_name = split[-3] if len(split) > 2 else None
@@ -24,7 +24,7 @@ from pydantic import BaseModel
24
24
  from tqdm import tqdm
25
25
 
26
26
  from datachain import semver
27
- from datachain.dataset import DatasetRecord, parse_dataset_name
27
+ from datachain.dataset import DatasetRecord
28
28
  from datachain.delta import delta_disabled
29
29
  from datachain.error import ProjectCreateNotAllowedError, ProjectNotFoundError
30
30
  from datachain.func import literal
@@ -557,6 +557,7 @@ class DataChain:
557
557
  update_version: which part of the dataset version to automatically increase.
558
558
  Available values: `major`, `minor` or `patch`. Default is `patch`.
559
559
  """
560
+ catalog = self.session.catalog
560
561
  if version is not None:
561
562
  semver.validate(version)
562
563
 
@@ -570,17 +571,10 @@ class DataChain:
570
571
  " patch"
571
572
  )
572
573
 
573
- namespace_name, project_name, name = parse_dataset_name(name)
574
-
575
- namespace_name = (
576
- namespace_name
577
- or self._settings.namespace
578
- or self.session.catalog.metastore.default_namespace_name
579
- )
580
- project_name = (
581
- project_name
582
- or self._settings.project
583
- or self.session.catalog.metastore.default_project_name
574
+ namespace_name, project_name, name = catalog.get_full_dataset_name(
575
+ name,
576
+ namespace_name=self._settings.namespace,
577
+ project_name=self._settings.project,
584
578
  )
585
579
 
586
580
  try:
@@ -1,7 +1,6 @@
1
1
  from collections.abc import Sequence
2
2
  from typing import TYPE_CHECKING, Optional, Union, get_origin, get_type_hints
3
3
 
4
- from datachain.dataset import parse_dataset_name
5
4
  from datachain.error import (
6
5
  DatasetNotFoundError,
7
6
  DatasetVersionNotFoundError,
@@ -125,11 +124,11 @@ def read_dataset(
125
124
  session = Session.get(session)
126
125
  catalog = session.catalog
127
126
 
128
- namespace_name, project_name, name = parse_dataset_name(name)
129
- namespace_name = (
130
- namespace_name or namespace or catalog.metastore.default_namespace_name
127
+ namespace_name, project_name, name = catalog.get_full_dataset_name(
128
+ name,
129
+ project_name=project,
130
+ namespace_name=namespace,
131
131
  )
132
- project_name = project_name or project or catalog.metastore.default_project_name
133
132
 
134
133
  if version is not None:
135
134
  try:
@@ -320,11 +319,11 @@ def delete_dataset(
320
319
  session = Session.get(session, in_memory=in_memory)
321
320
  catalog = session.catalog
322
321
 
323
- namespace_name, project_name, name = parse_dataset_name(name)
324
- namespace_name = (
325
- namespace_name or namespace or catalog.metastore.default_namespace_name
322
+ namespace_name, project_name, name = catalog.get_full_dataset_name(
323
+ name,
324
+ project_name=project,
325
+ namespace_name=namespace,
326
326
  )
327
- project_name = project_name or project or catalog.metastore.default_project_name
328
327
 
329
328
  if not catalog.metastore.is_local_dataset(namespace_name) and studio:
330
329
  return remove_studio_dataset(
@@ -97,4 +97,4 @@ def read_records(
97
97
  for chunk in batched(records, INSERT_BATCH_SIZE):
98
98
  warehouse.insert_rows(table, chunk)
99
99
  warehouse.insert_rows_done(table)
100
- return read_dataset(name=dsr.name, session=session, settings=settings)
100
+ return read_dataset(name=dsr.full_name, session=session, settings=settings)
@@ -25,6 +25,7 @@ from pydantic import BaseModel, Field, create_model
25
25
  from sqlalchemy import ColumnElement
26
26
  from typing_extensions import Literal as LiteralEx
27
27
 
28
+ from datachain.func import literal
28
29
  from datachain.func.func import Func
29
30
  from datachain.lib.convert.python_to_sql import python_to_sql
30
31
  from datachain.lib.convert.sql_to_python import sql_to_python
@@ -659,6 +660,7 @@ class SignalSchema:
659
660
 
660
661
  def mutate(self, args_map: dict) -> "SignalSchema":
661
662
  new_values = self.values.copy()
663
+ primitives = (bool, str, int, float)
662
664
 
663
665
  for name, value in args_map.items():
664
666
  if isinstance(value, Column) and value.name in self.values:
@@ -679,6 +681,12 @@ class SignalSchema:
679
681
  # adding new signal with function
680
682
  new_values[name] = value.get_result_type(self)
681
683
  continue
684
+ if isinstance(value, primitives):
685
+ # For primitives, store the type, not the value
686
+ val = literal(value)
687
+ val.type = python_to_sql(type(value))()
688
+ new_values[name] = sql_to_python(val)
689
+ continue
682
690
  if isinstance(value, ColumnElement):
683
691
  # adding new signal
684
692
  new_values[name] = sql_to_python(value)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.22.0
3
+ Version: 0.23.0
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -756,6 +756,52 @@ def test_mutate_existing_column(test_session):
756
756
  assert ds.order_by("ids").to_list() == [(2,), (3,), (4,)]
757
757
 
758
758
 
759
+ def test_mutate_with_primitives_save_load(test_session):
760
+ """Test that mutate with primitive values properly persists schema
761
+ through save/load cycle."""
762
+ original_data = [1, 2, 3]
763
+
764
+ # Create dataset with multiple primitive columns added via mutate
765
+ ds = dc.read_values(data=original_data, session=test_session).mutate(
766
+ str_col="test_string",
767
+ int_col=42,
768
+ float_col=3.14,
769
+ bool_col=True,
770
+ )
771
+
772
+ # Verify schema before saving
773
+ schema = ds.signals_schema.values
774
+ assert schema.get("str_col") is str
775
+ assert schema.get("int_col") is int
776
+ assert schema.get("float_col") is float
777
+ assert schema.get("bool_col") is bool
778
+
779
+ ds.save("test_mutate_primitives")
780
+
781
+ # Load the dataset back
782
+ loaded_ds = dc.read_dataset("test_mutate_primitives", session=test_session)
783
+
784
+ # Verify schema after loading
785
+ loaded_schema = loaded_ds.signals_schema.values
786
+ assert loaded_schema.get("str_col") is str
787
+ assert loaded_schema.get("int_col") is int
788
+ assert loaded_schema.get("float_col") is float
789
+ assert loaded_schema.get("bool_col") is bool
790
+
791
+ # Verify data integrity
792
+ results = set(loaded_ds.to_list())
793
+ assert len(results) == 3
794
+
795
+ # Expected tuples: (data, str_col, int_col, float_col, bool_col)
796
+ expected_results = {
797
+ (1, "test_string", 42, 3.14, True),
798
+ (2, "test_string", 42, 3.14, True),
799
+ (3, "test_string", 42, 3.14, True),
800
+ }
801
+
802
+ assert results == expected_results
803
+
804
+
759
805
  @pytest.mark.parametrize("processes", [False, 2, True])
760
806
  @pytest.mark.xdist_group(name="tmpfile")
761
807
  def test_parallel(processes, test_session_tmpfile):
@@ -20,6 +20,9 @@ from datachain.error import (
20
20
  DatasetInvalidVersionError,
21
21
  DatasetNotFoundError,
22
22
  DatasetVersionNotFoundError,
23
+ InvalidDatasetNameError,
24
+ InvalidNamespaceNameError,
25
+ InvalidProjectNameError,
23
26
  ProjectCreateNotAllowedError,
24
27
  )
25
28
  from datachain.lib.data_model import DataModel
@@ -3425,7 +3428,9 @@ def test_save_specify_only_non_default_project(
3425
3428
  default_namespace_name = catalog.metastore.default_namespace_name
3426
3429
 
3427
3430
  if project_created_upfront:
3428
- catalog.metastore.create_project(default_namespace_name, "numbers")
3431
+ catalog.metastore.create_project(
3432
+ default_namespace_name, "numbers", validate=False
3433
+ )
3429
3434
 
3430
3435
  ds = dc.read_values(fib=[1, 1, 2, 3, 5, 8], session=test_session)
3431
3436
  if use_settings:
@@ -3445,6 +3450,111 @@ def test_save_specify_only_non_default_project(
3445
3450
  dc.read_dataset(name="fibonacci")
3446
3451
 
3447
3452
 
3453
+ @pytest.mark.parametrize(
3454
+ (
3455
+ "ds_name_namespace,ds_name_project,"
3456
+ "settings_namespace,settings_project,"
3457
+ "env_namespace,env_project,"
3458
+ "result_ds_namespace,result_ds_project"
3459
+ ),
3460
+ [
3461
+ ("n3", "p3", "n2", "p2", "n1", "p1", "n3", "p3"),
3462
+ ("", "", "n2", "p2", "n1", "p1", "n2", "p2"),
3463
+ ("", "", "", "", "n1", "p1", "n1", "p1"),
3464
+ ("", "", "", "", "n5", "n1.p1", "n1", "p1"),
3465
+ ("", "", "", "", "", "n1.p1", "n1", "p1"),
3466
+ ("", "", "", "", "", "n5.p5", "n5", "p5"),
3467
+ ("n3", "p3", "n2", "p2", "", "", "n3", "p3"),
3468
+ ("n3", "p3", "", "", "", "", "n3", "p3"),
3469
+ ("n3", "p3", "", "", "n1", "p1", "n3", "p3"),
3470
+ ("", "", "", "", "", "", "", ""),
3471
+ ],
3472
+ )
3473
+ def test_save_all_ways_to_set_project(
3474
+ test_session,
3475
+ monkeypatch,
3476
+ ds_name_namespace,
3477
+ ds_name_project,
3478
+ settings_namespace,
3479
+ settings_project,
3480
+ env_namespace,
3481
+ env_project,
3482
+ result_ds_namespace,
3483
+ result_ds_project,
3484
+ ):
3485
+ def _full_name(namespace, project, name) -> str:
3486
+ if namespace and project:
3487
+ return f"{namespace}.{project}.{name}"
3488
+ return name
3489
+
3490
+ metastore = test_session.catalog.metastore
3491
+ ds_name = "numbers"
3492
+
3493
+ monkeypatch.setenv("DATACHAIN_NAMESPACE", env_namespace)
3494
+ monkeypatch.setenv("DATACHAIN_PROJECT", env_project)
3495
+
3496
+ if not result_ds_namespace and not result_ds_project:
3497
+ # special case when nothing is defined - we set default ones
3498
+ result_ds_namespace = metastore.default_namespace_name
3499
+ result_ds_project = metastore.default_project_name
3500
+
3501
+ ds = (
3502
+ dc.read_values(num=[1, 2, 3, 4], session=test_session)
3503
+ .settings(namespace=settings_namespace, project=settings_project)
3504
+ .save(_full_name(ds_name_namespace, ds_name_project, ds_name))
3505
+ )
3506
+
3507
+ assert ds.dataset.project == metastore.get_project(
3508
+ result_ds_project, result_ds_namespace
3509
+ )
3510
+ dc.read_dataset(_full_name(result_ds_namespace, result_ds_project, ds_name))
3511
+
3512
+
3513
+ @pytest.mark.parametrize(
3514
+ (
3515
+ "ds_name_namespace,ds_name_project,"
3516
+ "settings_namespace,settings_project,"
3517
+ "env_namespace,env_project,"
3518
+ "error"
3519
+ ),
3520
+ [
3521
+ ("n3.n3", "p3", "n2", "p2", "n1", "p1", InvalidDatasetNameError),
3522
+ ("n3", "p3.p3", "n2", "p2", "n1", "p1", InvalidDatasetNameError),
3523
+ ("", "", "n2.n2", "p2", "n1", "p1", InvalidNamespaceNameError),
3524
+ ("", "", "n2", "p2.p2", "n1", "p1", InvalidProjectNameError),
3525
+ ("", "", "", "", "n1.n1", "p1", InvalidNamespaceNameError),
3526
+ ("", "", "", "", "n1", "p1.p1.p1", InvalidProjectNameError),
3527
+ ],
3528
+ )
3529
+ def test_save_all_ways_to_set_project_invalid_name(
3530
+ test_session,
3531
+ monkeypatch,
3532
+ ds_name_namespace,
3533
+ ds_name_project,
3534
+ settings_namespace,
3535
+ settings_project,
3536
+ env_namespace,
3537
+ env_project,
3538
+ error,
3539
+ ):
3540
+ def _full_name(namespace, project, name) -> str:
3541
+ if namespace and project:
3542
+ return f"{namespace}.{project}.{name}"
3543
+ return name
3544
+
3545
+ ds_name = "numbers"
3546
+
3547
+ monkeypatch.setenv("DATACHAIN_NAMESPACE", env_namespace)
3548
+ monkeypatch.setenv("DATACHAIN_PROJECT", env_project)
3549
+
3550
+ with pytest.raises(error):
3551
+ (
3552
+ dc.read_values(num=[1, 2, 3, 4], session=test_session)
3553
+ .settings(namespace=settings_namespace, project=settings_project)
3554
+ .save(_full_name(ds_name_namespace, ds_name_project, ds_name))
3555
+ )
3556
+
3557
+
3448
3558
  @pytest.mark.parametrize("allow_create_project", [False])
3449
3559
  @skip_if_not_sqlite
3450
3560
  def test_save_create_project_not_allowed(test_session, allow_create_project):
@@ -178,5 +178,5 @@ def test_parse_dataset_name(full_name, namespace, project, name):
178
178
 
179
179
 
180
180
  def test_parse_dataset_name_empty_name():
181
- with pytest.raises(ValueError):
181
+ with pytest.raises(InvalidDatasetNameError):
182
182
  assert parse_dataset_name(None)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes