datachain 0.24.5__tar.gz → 0.25.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (399) hide show
  1. {datachain-0.24.5 → datachain-0.25.0}/PKG-INFO +1 -1
  2. {datachain-0.24.5 → datachain-0.25.0}/docs/guide/env.md +1 -1
  3. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/__init__.py +2 -0
  4. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/catalog/catalog.py +5 -20
  5. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/data_storage/metastore.py +30 -1
  6. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/data_storage/warehouse.py +16 -17
  7. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/__init__.py +2 -1
  8. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/datasets.py +55 -0
  9. {datachain-0.24.5 → datachain-0.25.0}/src/datachain.egg-info/PKG-INFO +1 -1
  10. {datachain-0.24.5 → datachain-0.25.0}/tests/conftest.py +6 -0
  11. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_datasets.py +101 -14
  12. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_datachain.py +7 -1
  13. {datachain-0.24.5 → datachain-0.25.0}/tests/utils.py +8 -0
  14. {datachain-0.24.5 → datachain-0.25.0}/.cruft.json +0 -0
  15. {datachain-0.24.5 → datachain-0.25.0}/.gitattributes +0 -0
  16. {datachain-0.24.5 → datachain-0.25.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  17. {datachain-0.24.5 → datachain-0.25.0}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  18. {datachain-0.24.5 → datachain-0.25.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  19. {datachain-0.24.5 → datachain-0.25.0}/.github/codecov.yaml +0 -0
  20. {datachain-0.24.5 → datachain-0.25.0}/.github/dependabot.yml +0 -0
  21. {datachain-0.24.5 → datachain-0.25.0}/.github/workflows/benchmarks.yml +0 -0
  22. {datachain-0.24.5 → datachain-0.25.0}/.github/workflows/release.yml +0 -0
  23. {datachain-0.24.5 → datachain-0.25.0}/.github/workflows/tests-studio.yml +0 -0
  24. {datachain-0.24.5 → datachain-0.25.0}/.github/workflows/tests.yml +0 -0
  25. {datachain-0.24.5 → datachain-0.25.0}/.github/workflows/update-template.yaml +0 -0
  26. {datachain-0.24.5 → datachain-0.25.0}/.gitignore +0 -0
  27. {datachain-0.24.5 → datachain-0.25.0}/.pre-commit-config.yaml +0 -0
  28. {datachain-0.24.5 → datachain-0.25.0}/CODE_OF_CONDUCT.rst +0 -0
  29. {datachain-0.24.5 → datachain-0.25.0}/LICENSE +0 -0
  30. {datachain-0.24.5 → datachain-0.25.0}/README.rst +0 -0
  31. {datachain-0.24.5 → datachain-0.25.0}/docs/assets/captioned_cartoons.png +0 -0
  32. {datachain-0.24.5 → datachain-0.25.0}/docs/assets/datachain-white.svg +0 -0
  33. {datachain-0.24.5 → datachain-0.25.0}/docs/assets/datachain.svg +0 -0
  34. {datachain-0.24.5 → datachain-0.25.0}/docs/commands/auth/login.md +0 -0
  35. {datachain-0.24.5 → datachain-0.25.0}/docs/commands/auth/logout.md +0 -0
  36. {datachain-0.24.5 → datachain-0.25.0}/docs/commands/auth/team.md +0 -0
  37. {datachain-0.24.5 → datachain-0.25.0}/docs/commands/auth/token.md +0 -0
  38. {datachain-0.24.5 → datachain-0.25.0}/docs/commands/index.md +0 -0
  39. {datachain-0.24.5 → datachain-0.25.0}/docs/commands/job/cancel.md +0 -0
  40. {datachain-0.24.5 → datachain-0.25.0}/docs/commands/job/clusters.md +0 -0
  41. {datachain-0.24.5 → datachain-0.25.0}/docs/commands/job/logs.md +0 -0
  42. {datachain-0.24.5 → datachain-0.25.0}/docs/commands/job/ls.md +0 -0
  43. {datachain-0.24.5 → datachain-0.25.0}/docs/commands/job/run.md +0 -0
  44. {datachain-0.24.5 → datachain-0.25.0}/docs/contributing.md +0 -0
  45. {datachain-0.24.5 → datachain-0.25.0}/docs/css/github-permalink-style.css +0 -0
  46. {datachain-0.24.5 → datachain-0.25.0}/docs/examples.md +0 -0
  47. {datachain-0.24.5 → datachain-0.25.0}/docs/guide/db_migrations.md +0 -0
  48. {datachain-0.24.5 → datachain-0.25.0}/docs/guide/delta.md +0 -0
  49. {datachain-0.24.5 → datachain-0.25.0}/docs/guide/index.md +0 -0
  50. {datachain-0.24.5 → datachain-0.25.0}/docs/guide/namespaces.md +0 -0
  51. {datachain-0.24.5 → datachain-0.25.0}/docs/guide/processing.md +0 -0
  52. {datachain-0.24.5 → datachain-0.25.0}/docs/guide/remotes.md +0 -0
  53. {datachain-0.24.5 → datachain-0.25.0}/docs/guide/retry.md +0 -0
  54. {datachain-0.24.5 → datachain-0.25.0}/docs/index.md +0 -0
  55. {datachain-0.24.5 → datachain-0.25.0}/docs/overrides/main.html +0 -0
  56. {datachain-0.24.5 → datachain-0.25.0}/docs/quick-start.md +0 -0
  57. {datachain-0.24.5 → datachain-0.25.0}/docs/references/data-types/arrowrow.md +0 -0
  58. {datachain-0.24.5 → datachain-0.25.0}/docs/references/data-types/bbox.md +0 -0
  59. {datachain-0.24.5 → datachain-0.25.0}/docs/references/data-types/file.md +0 -0
  60. {datachain-0.24.5 → datachain-0.25.0}/docs/references/data-types/imagefile.md +0 -0
  61. {datachain-0.24.5 → datachain-0.25.0}/docs/references/data-types/index.md +0 -0
  62. {datachain-0.24.5 → datachain-0.25.0}/docs/references/data-types/pose.md +0 -0
  63. {datachain-0.24.5 → datachain-0.25.0}/docs/references/data-types/segment.md +0 -0
  64. {datachain-0.24.5 → datachain-0.25.0}/docs/references/data-types/tarvfile.md +0 -0
  65. {datachain-0.24.5 → datachain-0.25.0}/docs/references/data-types/textfile.md +0 -0
  66. {datachain-0.24.5 → datachain-0.25.0}/docs/references/data-types/videofile.md +0 -0
  67. {datachain-0.24.5 → datachain-0.25.0}/docs/references/datachain.md +0 -0
  68. {datachain-0.24.5 → datachain-0.25.0}/docs/references/func.md +0 -0
  69. {datachain-0.24.5 → datachain-0.25.0}/docs/references/index.md +0 -0
  70. {datachain-0.24.5 → datachain-0.25.0}/docs/references/toolkit.md +0 -0
  71. {datachain-0.24.5 → datachain-0.25.0}/docs/references/torch.md +0 -0
  72. {datachain-0.24.5 → datachain-0.25.0}/docs/references/udf.md +0 -0
  73. {datachain-0.24.5 → datachain-0.25.0}/docs/tutorials.md +0 -0
  74. {datachain-0.24.5 → datachain-0.25.0}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  75. {datachain-0.24.5 → datachain-0.25.0}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  76. {datachain-0.24.5 → datachain-0.25.0}/examples/computer_vision/openimage-detect.py +0 -0
  77. {datachain-0.24.5 → datachain-0.25.0}/examples/computer_vision/ultralytics-bbox.py +0 -0
  78. {datachain-0.24.5 → datachain-0.25.0}/examples/computer_vision/ultralytics-pose.py +0 -0
  79. {datachain-0.24.5 → datachain-0.25.0}/examples/computer_vision/ultralytics-segment.py +0 -0
  80. {datachain-0.24.5 → datachain-0.25.0}/examples/get_started/common_sql_functions.py +0 -0
  81. {datachain-0.24.5 → datachain-0.25.0}/examples/get_started/json-csv-reader.py +0 -0
  82. {datachain-0.24.5 → datachain-0.25.0}/examples/get_started/torch-loader.py +0 -0
  83. {datachain-0.24.5 → datachain-0.25.0}/examples/get_started/udfs/parallel.py +0 -0
  84. {datachain-0.24.5 → datachain-0.25.0}/examples/get_started/udfs/simple.py +0 -0
  85. {datachain-0.24.5 → datachain-0.25.0}/examples/get_started/udfs/stateful.py +0 -0
  86. {datachain-0.24.5 → datachain-0.25.0}/examples/incremental_processing/delta.py +0 -0
  87. {datachain-0.24.5 → datachain-0.25.0}/examples/incremental_processing/retry.py +0 -0
  88. {datachain-0.24.5 → datachain-0.25.0}/examples/incremental_processing/utils.py +0 -0
  89. {datachain-0.24.5 → datachain-0.25.0}/examples/llm_and_nlp/claude-query.py +0 -0
  90. {datachain-0.24.5 → datachain-0.25.0}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  91. {datachain-0.24.5 → datachain-0.25.0}/examples/multimodal/clip_inference.py +0 -0
  92. {datachain-0.24.5 → datachain-0.25.0}/examples/multimodal/hf_pipeline.py +0 -0
  93. {datachain-0.24.5 → datachain-0.25.0}/examples/multimodal/openai_image_desc_lib.py +0 -0
  94. {datachain-0.24.5 → datachain-0.25.0}/examples/multimodal/wds.py +0 -0
  95. {datachain-0.24.5 → datachain-0.25.0}/examples/multimodal/wds_filtered.py +0 -0
  96. {datachain-0.24.5 → datachain-0.25.0}/mkdocs.yml +0 -0
  97. {datachain-0.24.5 → datachain-0.25.0}/noxfile.py +0 -0
  98. {datachain-0.24.5 → datachain-0.25.0}/pyproject.toml +0 -0
  99. {datachain-0.24.5 → datachain-0.25.0}/setup.cfg +0 -0
  100. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/__main__.py +0 -0
  101. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/asyn.py +0 -0
  102. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cache.py +0 -0
  103. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/catalog/__init__.py +0 -0
  104. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/catalog/datasource.py +0 -0
  105. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/catalog/loader.py +0 -0
  106. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/__init__.py +0 -0
  107. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/commands/__init__.py +0 -0
  108. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/commands/datasets.py +0 -0
  109. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/commands/du.py +0 -0
  110. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/commands/index.py +0 -0
  111. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/commands/ls.py +0 -0
  112. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/commands/misc.py +0 -0
  113. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/commands/query.py +0 -0
  114. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/commands/show.py +0 -0
  115. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/parser/__init__.py +0 -0
  116. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/parser/job.py +0 -0
  117. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/parser/studio.py +0 -0
  118. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/parser/utils.py +0 -0
  119. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/utils.py +0 -0
  120. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/client/__init__.py +0 -0
  121. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/client/azure.py +0 -0
  122. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/client/fileslice.py +0 -0
  123. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/client/fsspec.py +0 -0
  124. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/client/gcs.py +0 -0
  125. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/client/hf.py +0 -0
  126. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/client/local.py +0 -0
  127. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/client/s3.py +0 -0
  128. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/config.py +0 -0
  129. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/data_storage/__init__.py +0 -0
  130. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/data_storage/db_engine.py +0 -0
  131. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/data_storage/job.py +0 -0
  132. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/data_storage/schema.py +0 -0
  133. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/data_storage/serializer.py +0 -0
  134. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/data_storage/sqlite.py +0 -0
  135. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/dataset.py +0 -0
  136. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/delta.py +0 -0
  137. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/diff/__init__.py +0 -0
  138. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/error.py +0 -0
  139. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/fs/__init__.py +0 -0
  140. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/fs/reference.py +0 -0
  141. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/fs/utils.py +0 -0
  142. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/func/__init__.py +0 -0
  143. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/func/aggregate.py +0 -0
  144. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/func/array.py +0 -0
  145. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/func/base.py +0 -0
  146. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/func/conditional.py +0 -0
  147. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/func/func.py +0 -0
  148. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/func/numeric.py +0 -0
  149. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/func/path.py +0 -0
  150. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/func/random.py +0 -0
  151. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/func/string.py +0 -0
  152. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/func/window.py +0 -0
  153. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/job.py +0 -0
  154. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/__init__.py +0 -0
  155. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/arrow.py +0 -0
  156. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/clip.py +0 -0
  157. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/convert/__init__.py +0 -0
  158. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/convert/flatten.py +0 -0
  159. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/convert/python_to_sql.py +0 -0
  160. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/convert/sql_to_python.py +0 -0
  161. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/convert/unflatten.py +0 -0
  162. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  163. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/data_model.py +0 -0
  164. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dataset_info.py +0 -0
  165. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/csv.py +0 -0
  166. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/database.py +0 -0
  167. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/datachain.py +0 -0
  168. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/hf.py +0 -0
  169. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/json.py +0 -0
  170. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/listings.py +0 -0
  171. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/pandas.py +0 -0
  172. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/parquet.py +0 -0
  173. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/records.py +0 -0
  174. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/storage.py +0 -0
  175. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/utils.py +0 -0
  176. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/values.py +0 -0
  177. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/file.py +0 -0
  178. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/hf.py +0 -0
  179. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/image.py +0 -0
  180. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/listing.py +0 -0
  181. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/listing_info.py +0 -0
  182. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/meta_formats.py +0 -0
  183. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/model_store.py +0 -0
  184. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/namespaces.py +0 -0
  185. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/projects.py +0 -0
  186. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/pytorch.py +0 -0
  187. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/settings.py +0 -0
  188. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/signal_schema.py +0 -0
  189. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/tar.py +0 -0
  190. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/text.py +0 -0
  191. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/udf.py +0 -0
  192. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/udf_signature.py +0 -0
  193. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/utils.py +0 -0
  194. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/video.py +0 -0
  195. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/webdataset.py +0 -0
  196. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/webdataset_laion.py +0 -0
  197. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/listing.py +0 -0
  198. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/model/__init__.py +0 -0
  199. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/model/bbox.py +0 -0
  200. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/model/pose.py +0 -0
  201. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/model/segment.py +0 -0
  202. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/model/ultralytics/__init__.py +0 -0
  203. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/model/ultralytics/bbox.py +0 -0
  204. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/model/ultralytics/pose.py +0 -0
  205. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/model/ultralytics/segment.py +0 -0
  206. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/model/utils.py +0 -0
  207. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/namespace.py +0 -0
  208. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/node.py +0 -0
  209. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/nodes_fetcher.py +0 -0
  210. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/nodes_thread_pool.py +0 -0
  211. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/progress.py +0 -0
  212. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/project.py +0 -0
  213. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/py.typed +0 -0
  214. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/query/__init__.py +0 -0
  215. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/query/batch.py +0 -0
  216. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/query/dataset.py +0 -0
  217. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/query/dispatch.py +0 -0
  218. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/query/metrics.py +0 -0
  219. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/query/params.py +0 -0
  220. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/query/queue.py +0 -0
  221. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/query/schema.py +0 -0
  222. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/query/session.py +0 -0
  223. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/query/udf.py +0 -0
  224. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/query/utils.py +0 -0
  225. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/remote/__init__.py +0 -0
  226. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/remote/studio.py +0 -0
  227. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/script_meta.py +0 -0
  228. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/semver.py +0 -0
  229. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/__init__.py +0 -0
  230. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/default/__init__.py +0 -0
  231. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/default/base.py +0 -0
  232. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/functions/__init__.py +0 -0
  233. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/functions/aggregate.py +0 -0
  234. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/functions/array.py +0 -0
  235. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/functions/conditional.py +0 -0
  236. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/functions/numeric.py +0 -0
  237. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/functions/path.py +0 -0
  238. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/functions/random.py +0 -0
  239. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/functions/string.py +0 -0
  240. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/selectable.py +0 -0
  241. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/sqlite/__init__.py +0 -0
  242. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/sqlite/base.py +0 -0
  243. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/sqlite/types.py +0 -0
  244. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/sqlite/vector.py +0 -0
  245. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/types.py +0 -0
  246. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/utils.py +0 -0
  247. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/studio.py +0 -0
  248. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/telemetry.py +0 -0
  249. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/toolkit/__init__.py +0 -0
  250. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/toolkit/split.py +0 -0
  251. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/torch/__init__.py +0 -0
  252. {datachain-0.24.5 → datachain-0.25.0}/src/datachain/utils.py +0 -0
  253. {datachain-0.24.5 → datachain-0.25.0}/src/datachain.egg-info/SOURCES.txt +0 -0
  254. {datachain-0.24.5 → datachain-0.25.0}/src/datachain.egg-info/dependency_links.txt +0 -0
  255. {datachain-0.24.5 → datachain-0.25.0}/src/datachain.egg-info/entry_points.txt +0 -0
  256. {datachain-0.24.5 → datachain-0.25.0}/src/datachain.egg-info/requires.txt +0 -0
  257. {datachain-0.24.5 → datachain-0.25.0}/src/datachain.egg-info/top_level.txt +0 -0
  258. {datachain-0.24.5 → datachain-0.25.0}/tests/__init__.py +0 -0
  259. {datachain-0.24.5 → datachain-0.25.0}/tests/benchmarks/__init__.py +0 -0
  260. {datachain-0.24.5 → datachain-0.25.0}/tests/benchmarks/conftest.py +0 -0
  261. {datachain-0.24.5 → datachain-0.25.0}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  262. {datachain-0.24.5 → datachain-0.25.0}/tests/benchmarks/datasets/.dvc/config +0 -0
  263. {datachain-0.24.5 → datachain-0.25.0}/tests/benchmarks/datasets/.gitignore +0 -0
  264. {datachain-0.24.5 → datachain-0.25.0}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  265. {datachain-0.24.5 → datachain-0.25.0}/tests/benchmarks/test_datachain.py +0 -0
  266. {datachain-0.24.5 → datachain-0.25.0}/tests/benchmarks/test_ls.py +0 -0
  267. {datachain-0.24.5 → datachain-0.25.0}/tests/benchmarks/test_version.py +0 -0
  268. {datachain-0.24.5 → datachain-0.25.0}/tests/data.py +0 -0
  269. {datachain-0.24.5 → datachain-0.25.0}/tests/examples/__init__.py +0 -0
  270. {datachain-0.24.5 → datachain-0.25.0}/tests/examples/test_examples.py +0 -0
  271. {datachain-0.24.5 → datachain-0.25.0}/tests/examples/test_wds_e2e.py +0 -0
  272. {datachain-0.24.5 → datachain-0.25.0}/tests/examples/wds_data.py +0 -0
  273. {datachain-0.24.5 → datachain-0.25.0}/tests/func/__init__.py +0 -0
  274. {datachain-0.24.5 → datachain-0.25.0}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  275. {datachain-0.24.5 → datachain-0.25.0}/tests/func/data/lena.jpg +0 -0
  276. {datachain-0.24.5 → datachain-0.25.0}/tests/func/fake-service-account-credentials.json +0 -0
  277. {datachain-0.24.5 → datachain-0.25.0}/tests/func/functions/__init__.py +0 -0
  278. {datachain-0.24.5 → datachain-0.25.0}/tests/func/functions/test_aggregate.py +0 -0
  279. {datachain-0.24.5 → datachain-0.25.0}/tests/func/functions/test_array.py +0 -0
  280. {datachain-0.24.5 → datachain-0.25.0}/tests/func/functions/test_conditional.py +0 -0
  281. {datachain-0.24.5 → datachain-0.25.0}/tests/func/functions/test_numeric.py +0 -0
  282. {datachain-0.24.5 → datachain-0.25.0}/tests/func/functions/test_path.py +0 -0
  283. {datachain-0.24.5 → datachain-0.25.0}/tests/func/functions/test_random.py +0 -0
  284. {datachain-0.24.5 → datachain-0.25.0}/tests/func/functions/test_string.py +0 -0
  285. {datachain-0.24.5 → datachain-0.25.0}/tests/func/model/__init__.py +0 -0
  286. {datachain-0.24.5 → datachain-0.25.0}/tests/func/model/data/running-mask0.png +0 -0
  287. {datachain-0.24.5 → datachain-0.25.0}/tests/func/model/data/running-mask1.png +0 -0
  288. {datachain-0.24.5 → datachain-0.25.0}/tests/func/model/data/running.jpg +0 -0
  289. {datachain-0.24.5 → datachain-0.25.0}/tests/func/model/data/ships.jpg +0 -0
  290. {datachain-0.24.5 → datachain-0.25.0}/tests/func/model/test_yolo.py +0 -0
  291. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_batching.py +0 -0
  292. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_catalog.py +0 -0
  293. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_client.py +0 -0
  294. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_cloud_transfer.py +0 -0
  295. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_data_storage.py +0 -0
  296. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_datachain.py +0 -0
  297. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_datachain_merge.py +0 -0
  298. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_dataset_query.py +0 -0
  299. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_delta.py +0 -0
  300. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_feature_pickling.py +0 -0
  301. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_file.py +0 -0
  302. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_hf.py +0 -0
  303. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_hidden_field.py +0 -0
  304. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_image.py +0 -0
  305. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_listing.py +0 -0
  306. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_ls.py +0 -0
  307. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_meta_formats.py +0 -0
  308. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_metastore.py +0 -0
  309. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_metrics.py +0 -0
  310. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_pull.py +0 -0
  311. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_pytorch.py +0 -0
  312. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_query.py +0 -0
  313. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_read_database.py +0 -0
  314. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_read_dataset_remote.py +0 -0
  315. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  316. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_retry.py +0 -0
  317. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_session.py +0 -0
  318. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_toolkit.py +0 -0
  319. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_video.py +0 -0
  320. {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_warehouse.py +0 -0
  321. {datachain-0.24.5 → datachain-0.25.0}/tests/scripts/feature_class.py +0 -0
  322. {datachain-0.24.5 → datachain-0.25.0}/tests/scripts/feature_class_exception.py +0 -0
  323. {datachain-0.24.5 → datachain-0.25.0}/tests/scripts/feature_class_parallel.py +0 -0
  324. {datachain-0.24.5 → datachain-0.25.0}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  325. {datachain-0.24.5 → datachain-0.25.0}/tests/scripts/name_len_slow.py +0 -0
  326. {datachain-0.24.5 → datachain-0.25.0}/tests/test_atomicity.py +0 -0
  327. {datachain-0.24.5 → datachain-0.25.0}/tests/test_cli_e2e.py +0 -0
  328. {datachain-0.24.5 → datachain-0.25.0}/tests/test_cli_studio.py +0 -0
  329. {datachain-0.24.5 → datachain-0.25.0}/tests/test_import_time.py +0 -0
  330. {datachain-0.24.5 → datachain-0.25.0}/tests/test_query_e2e.py +0 -0
  331. {datachain-0.24.5 → datachain-0.25.0}/tests/test_telemetry.py +0 -0
  332. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/__init__.py +0 -0
  333. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/__init__.py +0 -0
  334. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/conftest.py +0 -0
  335. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_arrow.py +0 -0
  336. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_clip.py +0 -0
  337. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  338. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_datachain_merge.py +0 -0
  339. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_diff.py +0 -0
  340. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_feature.py +0 -0
  341. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_feature_utils.py +0 -0
  342. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_file.py +0 -0
  343. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_hf.py +0 -0
  344. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_image.py +0 -0
  345. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_listing_info.py +0 -0
  346. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_namespace.py +0 -0
  347. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_project.py +0 -0
  348. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_python_to_sql.py +0 -0
  349. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_schema.py +0 -0
  350. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_signal_schema.py +0 -0
  351. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_sql_to_python.py +0 -0
  352. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_text.py +0 -0
  353. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_udf.py +0 -0
  354. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_udf_signature.py +0 -0
  355. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_utils.py +0 -0
  356. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_webdataset.py +0 -0
  357. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/model/__init__.py +0 -0
  358. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/model/test_bbox.py +0 -0
  359. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/model/test_pose.py +0 -0
  360. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/model/test_segment.py +0 -0
  361. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/model/test_utils.py +0 -0
  362. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/sql/__init__.py +0 -0
  363. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/sql/sqlite/__init__.py +0 -0
  364. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/sql/sqlite/test_types.py +0 -0
  365. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/sql/sqlite/test_utils.py +0 -0
  366. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/sql/test_array.py +0 -0
  367. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/sql/test_conditional.py +0 -0
  368. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/sql/test_path.py +0 -0
  369. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/sql/test_random.py +0 -0
  370. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/sql/test_selectable.py +0 -0
  371. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/sql/test_string.py +0 -0
  372. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_asyn.py +0 -0
  373. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_cache.py +0 -0
  374. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_catalog.py +0 -0
  375. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_catalog_loader.py +0 -0
  376. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_cli_parsing.py +0 -0
  377. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_client.py +0 -0
  378. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_client_gcs.py +0 -0
  379. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_client_s3.py +0 -0
  380. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_config.py +0 -0
  381. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_data_storage.py +0 -0
  382. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_database_engine.py +0 -0
  383. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_dataset.py +0 -0
  384. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_dispatch.py +0 -0
  385. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_fileslice.py +0 -0
  386. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_func.py +0 -0
  387. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_listing.py +0 -0
  388. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_metastore.py +0 -0
  389. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_module_exports.py +0 -0
  390. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_pytorch.py +0 -0
  391. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_query.py +0 -0
  392. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_query_metrics.py +0 -0
  393. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_query_params.py +0 -0
  394. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_script_meta.py +0 -0
  395. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_semver.py +0 -0
  396. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_serializer.py +0 -0
  397. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_session.py +0 -0
  398. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_utils.py +0 -0
  399. {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_warehouse.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.24.5
3
+ Version: 0.25.0
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -4,7 +4,7 @@ List of environment variables used to configure DataChain behavior.
4
4
 
5
5
  ### Core Configuration
6
6
 
7
- - `DATACHAIN_ROOT_DIR` – Specifies the root directory where DataChain will create the `.datachain` folder to store its internal data. (default: the current working directory).
7
+ - `DATACHAIN_ROOT_DIR` – Specifies the root directory where DataChain will create the `.datachain` folder to store its internal data. (default: user home directory).
8
8
  - `DATACHAIN_SYSTEM_CONFIG_DIR` – Overrides the system-wide configuration directory (default depends on the platform).
9
9
  - `DATACHAIN_GLOBAL_CONFIG_DIR` – Overrides the user's global configuration directory (default depends on the platform).
10
10
  - `DATACHAIN_NO_ANALYTICS` – Disables telemetry.
@@ -7,6 +7,7 @@ from datachain.lib.dc import (
7
7
  datasets,
8
8
  delete_dataset,
9
9
  listings,
10
+ move_dataset,
10
11
  read_csv,
11
12
  read_database,
12
13
  read_dataset,
@@ -69,6 +70,7 @@ __all__ = [
69
70
  "is_chain_type",
70
71
  "listings",
71
72
  "metrics",
73
+ "move_dataset",
72
74
  "param",
73
75
  "read_csv",
74
76
  "read_database",
@@ -956,26 +956,9 @@ class Catalog:
956
956
  self, dataset: DatasetRecord, conn=None, **kwargs
957
957
  ) -> DatasetRecord:
958
958
  """Updates dataset fields."""
959
- old_name = None
960
- new_name = None
961
- if "name" in kwargs and kwargs["name"] != dataset.name:
962
- old_name = dataset.name
963
- new_name = kwargs["name"]
964
-
965
- dataset = self.metastore.update_dataset(dataset, conn=conn, **kwargs)
966
-
967
- if old_name and new_name:
968
- # updating name must result in updating dataset table names as well
969
- for version in [v.version for v in dataset.versions]:
970
- self.warehouse.rename_dataset_table(
971
- dataset,
972
- old_name,
973
- new_name,
974
- old_version=version,
975
- new_version=version,
976
- )
977
-
978
- return dataset
959
+ dataset_updated = self.metastore.update_dataset(dataset, conn=conn, **kwargs)
960
+ self.warehouse.rename_dataset_tables(dataset, dataset_updated)
961
+ return dataset_updated
979
962
 
980
963
  def remove_dataset_version(
981
964
  self, dataset: DatasetRecord, version: str, drop_rows: Optional[bool] = True
@@ -1555,12 +1538,14 @@ class Catalog:
1555
1538
  remote_ds.project.namespace.name,
1556
1539
  description=remote_ds.project.namespace.descr,
1557
1540
  uuid=remote_ds.project.namespace.uuid,
1541
+ validate=False,
1558
1542
  )
1559
1543
  project = self.metastore.create_project(
1560
1544
  namespace.name,
1561
1545
  remote_ds.project.name,
1562
1546
  description=remote_ds.project.descr,
1563
1547
  uuid=remote_ds.project.uuid,
1548
+ validate=False,
1564
1549
  )
1565
1550
 
1566
1551
  try:
@@ -207,6 +207,10 @@ class AbstractMetastore(ABC, Serializable):
207
207
  It also creates project if not found and create flag is set to True.
208
208
  """
209
209
 
210
+ @abstractmethod
211
+ def get_project_by_id(self, project_id: int, conn=None) -> Project:
212
+ """Gets a single project by id"""
213
+
210
214
  @abstractmethod
211
215
  def list_projects(self, namespace_id: Optional[int], conn=None) -> list[Project]:
212
216
  """Gets list of projects in some namespace or in general (in all namespaces)"""
@@ -851,6 +855,24 @@ class AbstractDBMetastore(AbstractMetastore):
851
855
  )
852
856
  return self.project_class.parse(*rows[0])
853
857
 
858
+ def get_project_by_id(self, project_id: int, conn=None) -> Project:
859
+ """Gets a single project by id"""
860
+ n = self._namespaces
861
+ p = self._projects
862
+
863
+ query = self._projects_select(
864
+ *(getattr(n.c, f) for f in self._namespaces_fields),
865
+ *(getattr(p.c, f) for f in self._projects_fields),
866
+ )
867
+ query = query.select_from(n.join(p, n.c.id == p.c.namespace_id)).where(
868
+ p.c.id == project_id
869
+ )
870
+
871
+ rows = list(self.db.execute(query, conn=conn))
872
+ if not rows:
873
+ raise ProjectNotFoundError(f"Project with id {project_id} not found.")
874
+ return self.project_class.parse(*rows[0])
875
+
854
876
  def list_projects(self, namespace_id: Optional[int], conn=None) -> list[Project]:
855
877
  """
856
878
  Gets a list of projects inside some namespace, or in all namespaces
@@ -1008,6 +1030,11 @@ class AbstractDBMetastore(AbstractMetastore):
1008
1030
  else:
1009
1031
  values[field] = json.dumps(value)
1010
1032
  dataset_values[field] = DatasetRecord.parse_schema(value)
1033
+ elif field == "project_id":
1034
+ if not value:
1035
+ raise ValueError("Cannot set empty project_id for dataset")
1036
+ dataset_values["project"] = self.get_project_by_id(value)
1037
+ values[field] = value
1011
1038
  else:
1012
1039
  values[field] = value
1013
1040
  dataset_values[field] = value
@@ -1017,7 +1044,9 @@ class AbstractDBMetastore(AbstractMetastore):
1017
1044
 
1018
1045
  d = self._datasets
1019
1046
  self.db.execute(
1020
- self._datasets_update().where(d.c.name == dataset.name).values(values),
1047
+ self._datasets_update()
1048
+ .where(d.c.name == dataset.name, d.c.project_id == dataset.project.id)
1049
+ .values(values),
1021
1050
  conn=conn,
1022
1051
  ) # type: ignore [attr-defined]
1023
1052
 
@@ -356,24 +356,23 @@ class AbstractWarehouse(ABC, Serializable):
356
356
  self, dataset: DatasetRecord, version: str
357
357
  ) -> list[StorageURI]: ...
358
358
 
359
- def rename_dataset_table(
360
- self,
361
- dataset: DatasetRecord,
362
- old_name: str,
363
- new_name: str,
364
- old_version: str,
365
- new_version: str,
359
+ def rename_dataset_tables(
360
+ self, dataset: DatasetRecord, dataset_updated: DatasetRecord
366
361
  ) -> None:
367
- namespace = dataset.project.namespace.name
368
- project = dataset.project.name
369
- old_ds_table_name = self._construct_dataset_table_name(
370
- namespace, project, old_name, old_version
371
- )
372
- new_ds_table_name = self._construct_dataset_table_name(
373
- namespace, project, new_name, new_version
374
- )
375
-
376
- self.db.rename_table(old_ds_table_name, new_ds_table_name)
362
+ """
363
+ Renames all dataset version tables when parts of the dataset that
364
+ are used in constructing table name are updated.
365
+ If nothing important is changed, nothing will be renamed (no DB calls
366
+ will be made at all).
367
+ """
368
+ for version in [v.version for v in dataset_updated.versions]:
369
+ if not dataset.has_version(version):
370
+ continue
371
+ src = self.dataset_table_name(dataset, version)
372
+ dest = self.dataset_table_name(dataset_updated, version)
373
+ if src == dest:
374
+ continue
375
+ self.db.rename_table(src, dest)
377
376
 
378
377
  def dataset_rows_count(self, dataset: DatasetRecord, version=None) -> int:
379
378
  """Returns total number of rows in a dataset"""
@@ -1,7 +1,7 @@
1
1
  from .csv import read_csv
2
2
  from .database import read_database
3
3
  from .datachain import C, Column, DataChain
4
- from .datasets import datasets, delete_dataset, read_dataset
4
+ from .datasets import datasets, delete_dataset, move_dataset, read_dataset
5
5
  from .hf import read_hf
6
6
  from .json import read_json
7
7
  from .listings import listings
@@ -22,6 +22,7 @@ __all__ = [
22
22
  "datasets",
23
23
  "delete_dataset",
24
24
  "listings",
25
+ "move_dataset",
25
26
  "read_csv",
26
27
  "read_database",
27
28
  "read_dataset",
@@ -361,3 +361,58 @@ def delete_dataset(
361
361
  else:
362
362
  version = None
363
363
  catalog.remove_dataset(name, ds_project, version=version, force=force)
364
+
365
+
366
+ def move_dataset(
367
+ src: str,
368
+ dest: str,
369
+ session: Optional[Session] = None,
370
+ in_memory: bool = False,
371
+ ) -> None:
372
+ """Moves an entire dataset between namespaces and projects.
373
+
374
+ Args:
375
+ src: The source dataset name. This can be a fully qualified name that includes
376
+ the namespace and project, or a regular name. If a regular name is used,
377
+ default values will be applied. The source dataset will no longer exist
378
+ after the move.
379
+ dst: The destination dataset name. This can also be a fully qualified
380
+ name with a namespace and project, or just a regular name (default values
381
+ will be used in that case). The original dataset will be moved here.
382
+ session: An optional session instance. If not provided, the default session
383
+ will be used.
384
+ in_memory: If True, creates an in-memory session. Defaults to False.
385
+
386
+ Returns:
387
+ None
388
+
389
+ Examples:
390
+ ```python
391
+ import datachain as dc
392
+ dc.move_dataset("cats", "new_cats")
393
+ ```
394
+
395
+ ```python
396
+ import datachain as dc
397
+ dc.move_dataset("dev.animals.cats", "prod.animals.cats")
398
+ ```
399
+ """
400
+ session = Session.get(session, in_memory=in_memory)
401
+ catalog = session.catalog
402
+
403
+ namespace, project, name = catalog.get_full_dataset_name(src)
404
+ dest_namespace, dest_project, dest_name = catalog.get_full_dataset_name(dest)
405
+
406
+ dataset = catalog.get_dataset(
407
+ name, catalog.metastore.get_project(project, namespace)
408
+ )
409
+
410
+ catalog.update_dataset(
411
+ dataset,
412
+ name=dest_name,
413
+ project_id=catalog.metastore.get_project(
414
+ dest_project,
415
+ dest_namespace,
416
+ create=catalog.metastore.project_allowed_to_create,
417
+ ).id,
418
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.24.5
3
+ Version: 0.25.0
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -576,6 +576,12 @@ def mock_allowed_to_create_namespace(allow_create_namespace):
576
576
  yield
577
577
 
578
578
 
579
+ @pytest.fixture
580
+ def mock_is_local_dataset():
581
+ with patch.object(AbstractMetastore, "is_local_dataset", return_value=True):
582
+ yield
583
+
584
+
579
585
  @pytest.fixture
580
586
  def project(test_session):
581
587
  return dc.create_project("dev", "animals", "Animals project")
@@ -11,12 +11,13 @@ from datachain.dataset import DatasetDependencyType, DatasetStatus
11
11
  from datachain.error import (
12
12
  DatasetInvalidVersionError,
13
13
  DatasetNotFoundError,
14
+ ProjectNotFoundError,
14
15
  )
15
16
  from datachain.lib.file import File
16
17
  from datachain.lib.listing import parse_listing_uri
17
18
  from datachain.query.dataset import DatasetQuery
18
19
  from datachain.sql.types import Float32, Int, Int64
19
- from tests.utils import assert_row_names, dataset_dependency_asdict
20
+ from tests.utils import assert_row_names, dataset_dependency_asdict, table_row_count
20
21
 
21
22
  FILE_SCHEMA = {
22
23
  f"file__{name}": _type if _type != Int else Int64
@@ -169,14 +170,6 @@ def test_get_dataset(cloud_test_catalog, dogs_dataset):
169
170
  catalog.get_dataset("wrong name", dogs_dataset.project)
170
171
 
171
172
 
172
- # Returns None if the table does not exist
173
- def get_table_row_count(db, table_name):
174
- if not db.has_table(table_name):
175
- return None
176
- query = sa.select(sa.func.count()).select_from(sa.table(table_name))
177
- return next(db.execute(query), (None,))[0]
178
-
179
-
180
173
  def test_create_dataset_from_sources(listed_bucket, cloud_test_catalog, project):
181
174
  dataset_name = uuid.uuid4().hex
182
175
  src_uri = cloud_test_catalog.src_uri
@@ -327,7 +320,7 @@ def test_remove_dataset(cloud_test_catalog, dogs_dataset):
327
320
  catalog.get_dataset(dogs_dataset.name, dogs_dataset.project)
328
321
 
329
322
  dataset_table_name = catalog.warehouse.dataset_table_name(dogs_dataset, "1.0.0")
330
- assert get_table_row_count(catalog.warehouse.db, dataset_table_name) is None
323
+ assert table_row_count(catalog.warehouse.db, dataset_table_name) is None
331
324
 
332
325
  assert (
333
326
  catalog.metastore.get_direct_dataset_dependencies(dogs_dataset, "1.0.0") == []
@@ -391,14 +384,108 @@ def test_edit_dataset(cloud_test_catalog, dogs_dataset):
391
384
  old_dataset_table_name = catalog.warehouse.dataset_table_name(dogs_dataset, "1.0.0")
392
385
  new_dataset_table_name = catalog.warehouse.dataset_table_name(dataset, "1.0.0")
393
386
 
394
- assert get_table_row_count(catalog.warehouse.db, old_dataset_table_name) is None
395
- expected_table_row_count = get_table_row_count(
387
+ assert table_row_count(catalog.warehouse.db, old_dataset_table_name) is None
388
+ expected_table_row_count = table_row_count(
396
389
  catalog.warehouse.db, new_dataset_table_name
397
390
  )
398
391
  assert expected_table_row_count
399
392
  assert dataset.get_version("1.0.0").num_objects == expected_table_row_count
400
393
 
401
394
 
395
+ @pytest.mark.parametrize(
396
+ "old_name,new_name",
397
+ [
398
+ ("old.old.numbers", "new.new.numbers"),
399
+ ("old.old.numbers", "new.new.numbers_new"),
400
+ ("old.old.numbers", "old.new.numbers"),
401
+ ("old.old.numbers", "old.old.numbers"),
402
+ ("numbers", "numbers2"),
403
+ ("numbers", "numbers"),
404
+ ],
405
+ )
406
+ def test_move_dataset(
407
+ test_session,
408
+ old_name,
409
+ new_name,
410
+ mock_is_local_dataset,
411
+ ):
412
+ catalog = test_session.catalog
413
+
414
+ # create 2 versions of dataset in old project
415
+ for _ in range(2):
416
+ (dc.read_values(num=[1, 2, 3], session=test_session).save(old_name))
417
+
418
+ dataset = dc.read_dataset(old_name).dataset
419
+
420
+ dc.move_dataset(old_name, new_name, session=test_session)
421
+
422
+ if old_name != new_name:
423
+ # check that old dataset doesn't exist any more
424
+ with pytest.raises(DatasetNotFoundError):
425
+ dc.read_dataset(old_name).save("wrong")
426
+
427
+ dataset_updated = dc.read_dataset(new_name).dataset
428
+
429
+ # check if dataset tables are renamed correctly as well
430
+ for version in [v.version for v in dataset.versions]:
431
+ old_table_name = catalog.warehouse.dataset_table_name(dataset, version)
432
+ new_table_name = catalog.warehouse.dataset_table_name(dataset_updated, version)
433
+ if old_name == new_name:
434
+ assert old_table_name == new_table_name
435
+ else:
436
+ assert table_row_count(catalog.warehouse.db, old_table_name) is None
437
+
438
+ assert table_row_count(catalog.warehouse.db, new_table_name) == 3
439
+
440
+
441
+ def test_move_dataset_then_save_into(test_session):
442
+ old_name = "old.old.numbers"
443
+ new_name = "new.new.numbers"
444
+
445
+ # create 2 versions of dataset in old project
446
+ for _ in range(2):
447
+ dc.read_values(num=[1, 2, 3], session=test_session).save(old_name)
448
+
449
+ dc.move_dataset(old_name, new_name, session=test_session)
450
+ dc.read_values(num=[1, 2, 3], session=test_session).save(new_name)
451
+
452
+ ds = dc.datasets(column="dataset", session=test_session)
453
+ datasets = [
454
+ d
455
+ for d in ds.to_values("dataset")
456
+ if d.name == "numbers" and d.project == "new" and d.namespace == "new"
457
+ ]
458
+
459
+ assert len(datasets) == 3
460
+
461
+
462
+ def test_move_dataset_wrong_old_project(test_session, project):
463
+ dc.read_values(num=[1, 2, 3], session=test_session).save("old.old.numbers")
464
+
465
+ with pytest.raises(ProjectNotFoundError):
466
+ dc.move_dataset("wrong.wrong.numbers", "new.new.numbers", session=test_session)
467
+
468
+
469
+ def test_move_dataset_error_in_session_moved_dataset_removed(catalog):
470
+ from datachain.query.session import Session
471
+
472
+ old_name = "old.old.numbers"
473
+ new_name = "new.new.numbers"
474
+
475
+ with pytest.raises(DatasetNotFoundError):
476
+ with Session("new", catalog=catalog) as test_session:
477
+ dc.read_values(num=[1, 2, 3]).save("aa")
478
+ dc.read_values(num=[1, 2, 3], session=test_session).save(old_name)
479
+ dc.move_dataset(old_name, new_name, session=test_session)
480
+
481
+ # throws DatasetNotFoundError
482
+ dc.read_dataset("wrong", session=test_session)
483
+
484
+ ds = dc.datasets(column="dataset")
485
+ datasets = [d for d in ds.to_values("dataset")] # noqa: C416
486
+ assert len(datasets) == 0
487
+
488
+
402
489
  def test_edit_dataset_same_name(cloud_test_catalog, dogs_dataset):
403
490
  dataset_new_name = dogs_dataset.name
404
491
  catalog = cloud_test_catalog.catalog
@@ -414,12 +501,12 @@ def test_edit_dataset_same_name(cloud_test_catalog, dogs_dataset):
414
501
  old_dataset_table_name = catalog.warehouse.dataset_table_name(dogs_dataset, "1.0.0")
415
502
  new_dataset_table_name = catalog.warehouse.dataset_table_name(dataset, "1.0.0")
416
503
 
417
- expected_table_row_count = get_table_row_count(
504
+ expected_table_row_count = table_row_count(
418
505
  catalog.warehouse.db, old_dataset_table_name
419
506
  )
420
507
  assert expected_table_row_count
421
508
  assert dataset.get_version("1.0.0").num_objects == expected_table_row_count
422
- assert expected_table_row_count == get_table_row_count(
509
+ assert expected_table_row_count == table_row_count(
423
510
  catalog.warehouse.db, new_dataset_table_name
424
511
  )
425
512
 
@@ -43,7 +43,13 @@ from datachain.lib.udf_signature import UdfSignatureError
43
43
  from datachain.lib.utils import DataChainColumnError, DataChainParamsError
44
44
  from datachain.sql.types import Float, Int64, String
45
45
  from datachain.utils import STUDIO_URL
46
- from tests.utils import ANY_VALUE, df_equal, skip_if_not_sqlite, sort_df, sorted_dicts
46
+ from tests.utils import (
47
+ ANY_VALUE,
48
+ df_equal,
49
+ skip_if_not_sqlite,
50
+ sort_df,
51
+ sorted_dicts,
52
+ )
47
53
 
48
54
  DF_DATA = {
49
55
  "first_name": ["Alice", "Bob", "Charlie", "David", "Eva"],
@@ -10,6 +10,7 @@ from time import sleep, time
10
10
  from typing import Any, Callable, Optional
11
11
 
12
12
  import pytest
13
+ import sqlalchemy as sa
13
14
  from PIL import Image
14
15
 
15
16
  import datachain as dc
@@ -231,3 +232,10 @@ def sort_df(df):
231
232
  def df_equal(df1, df2) -> bool:
232
233
  """Helper function to check if two dataframes are equal regardless of ordering"""
233
234
  return sort_df(df1).equals(sort_df(df2))
235
+
236
+
237
+ def table_row_count(db, table_name) -> Optional[int]:
238
+ if not db.has_table(table_name):
239
+ return None
240
+ query = sa.select(sa.func.count()).select_from(sa.table(table_name))
241
+ return next(db.execute(query), (None,))[0]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes