datachain 0.24.3__tar.gz → 0.24.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (399) hide show
  1. {datachain-0.24.3 → datachain-0.24.5}/.github/workflows/tests-studio.yml +0 -1
  2. {datachain-0.24.3 → datachain-0.24.5}/.pre-commit-config.yaml +1 -1
  3. {datachain-0.24.3 → datachain-0.24.5}/PKG-INFO +1 -1
  4. {datachain-0.24.3 → datachain-0.24.5}/docs/guide/namespaces.md +3 -3
  5. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/catalog/catalog.py +8 -0
  6. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/dataset.py +1 -1
  7. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/dc/datachain.py +26 -1
  8. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/dc/datasets.py +1 -0
  9. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/namespace.py +1 -1
  10. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/project.py +1 -1
  11. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/query/dataset.py +5 -1
  12. {datachain-0.24.3 → datachain-0.24.5}/src/datachain.egg-info/PKG-INFO +1 -1
  13. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_read_dataset_remote.py +49 -4
  14. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/lib/test_datachain.py +57 -0
  15. {datachain-0.24.3 → datachain-0.24.5}/.cruft.json +0 -0
  16. {datachain-0.24.3 → datachain-0.24.5}/.gitattributes +0 -0
  17. {datachain-0.24.3 → datachain-0.24.5}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  18. {datachain-0.24.3 → datachain-0.24.5}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  19. {datachain-0.24.3 → datachain-0.24.5}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  20. {datachain-0.24.3 → datachain-0.24.5}/.github/codecov.yaml +0 -0
  21. {datachain-0.24.3 → datachain-0.24.5}/.github/dependabot.yml +0 -0
  22. {datachain-0.24.3 → datachain-0.24.5}/.github/workflows/benchmarks.yml +0 -0
  23. {datachain-0.24.3 → datachain-0.24.5}/.github/workflows/release.yml +0 -0
  24. {datachain-0.24.3 → datachain-0.24.5}/.github/workflows/tests.yml +0 -0
  25. {datachain-0.24.3 → datachain-0.24.5}/.github/workflows/update-template.yaml +0 -0
  26. {datachain-0.24.3 → datachain-0.24.5}/.gitignore +0 -0
  27. {datachain-0.24.3 → datachain-0.24.5}/CODE_OF_CONDUCT.rst +0 -0
  28. {datachain-0.24.3 → datachain-0.24.5}/LICENSE +0 -0
  29. {datachain-0.24.3 → datachain-0.24.5}/README.rst +0 -0
  30. {datachain-0.24.3 → datachain-0.24.5}/docs/assets/captioned_cartoons.png +0 -0
  31. {datachain-0.24.3 → datachain-0.24.5}/docs/assets/datachain-white.svg +0 -0
  32. {datachain-0.24.3 → datachain-0.24.5}/docs/assets/datachain.svg +0 -0
  33. {datachain-0.24.3 → datachain-0.24.5}/docs/commands/auth/login.md +0 -0
  34. {datachain-0.24.3 → datachain-0.24.5}/docs/commands/auth/logout.md +0 -0
  35. {datachain-0.24.3 → datachain-0.24.5}/docs/commands/auth/team.md +0 -0
  36. {datachain-0.24.3 → datachain-0.24.5}/docs/commands/auth/token.md +0 -0
  37. {datachain-0.24.3 → datachain-0.24.5}/docs/commands/index.md +0 -0
  38. {datachain-0.24.3 → datachain-0.24.5}/docs/commands/job/cancel.md +0 -0
  39. {datachain-0.24.3 → datachain-0.24.5}/docs/commands/job/clusters.md +0 -0
  40. {datachain-0.24.3 → datachain-0.24.5}/docs/commands/job/logs.md +0 -0
  41. {datachain-0.24.3 → datachain-0.24.5}/docs/commands/job/ls.md +0 -0
  42. {datachain-0.24.3 → datachain-0.24.5}/docs/commands/job/run.md +0 -0
  43. {datachain-0.24.3 → datachain-0.24.5}/docs/contributing.md +0 -0
  44. {datachain-0.24.3 → datachain-0.24.5}/docs/css/github-permalink-style.css +0 -0
  45. {datachain-0.24.3 → datachain-0.24.5}/docs/examples.md +0 -0
  46. {datachain-0.24.3 → datachain-0.24.5}/docs/guide/db_migrations.md +0 -0
  47. {datachain-0.24.3 → datachain-0.24.5}/docs/guide/delta.md +0 -0
  48. {datachain-0.24.3 → datachain-0.24.5}/docs/guide/env.md +0 -0
  49. {datachain-0.24.3 → datachain-0.24.5}/docs/guide/index.md +0 -0
  50. {datachain-0.24.3 → datachain-0.24.5}/docs/guide/processing.md +0 -0
  51. {datachain-0.24.3 → datachain-0.24.5}/docs/guide/remotes.md +0 -0
  52. {datachain-0.24.3 → datachain-0.24.5}/docs/guide/retry.md +0 -0
  53. {datachain-0.24.3 → datachain-0.24.5}/docs/index.md +0 -0
  54. {datachain-0.24.3 → datachain-0.24.5}/docs/overrides/main.html +0 -0
  55. {datachain-0.24.3 → datachain-0.24.5}/docs/quick-start.md +0 -0
  56. {datachain-0.24.3 → datachain-0.24.5}/docs/references/data-types/arrowrow.md +0 -0
  57. {datachain-0.24.3 → datachain-0.24.5}/docs/references/data-types/bbox.md +0 -0
  58. {datachain-0.24.3 → datachain-0.24.5}/docs/references/data-types/file.md +0 -0
  59. {datachain-0.24.3 → datachain-0.24.5}/docs/references/data-types/imagefile.md +0 -0
  60. {datachain-0.24.3 → datachain-0.24.5}/docs/references/data-types/index.md +0 -0
  61. {datachain-0.24.3 → datachain-0.24.5}/docs/references/data-types/pose.md +0 -0
  62. {datachain-0.24.3 → datachain-0.24.5}/docs/references/data-types/segment.md +0 -0
  63. {datachain-0.24.3 → datachain-0.24.5}/docs/references/data-types/tarvfile.md +0 -0
  64. {datachain-0.24.3 → datachain-0.24.5}/docs/references/data-types/textfile.md +0 -0
  65. {datachain-0.24.3 → datachain-0.24.5}/docs/references/data-types/videofile.md +0 -0
  66. {datachain-0.24.3 → datachain-0.24.5}/docs/references/datachain.md +0 -0
  67. {datachain-0.24.3 → datachain-0.24.5}/docs/references/func.md +0 -0
  68. {datachain-0.24.3 → datachain-0.24.5}/docs/references/index.md +0 -0
  69. {datachain-0.24.3 → datachain-0.24.5}/docs/references/toolkit.md +0 -0
  70. {datachain-0.24.3 → datachain-0.24.5}/docs/references/torch.md +0 -0
  71. {datachain-0.24.3 → datachain-0.24.5}/docs/references/udf.md +0 -0
  72. {datachain-0.24.3 → datachain-0.24.5}/docs/tutorials.md +0 -0
  73. {datachain-0.24.3 → datachain-0.24.5}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  74. {datachain-0.24.3 → datachain-0.24.5}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  75. {datachain-0.24.3 → datachain-0.24.5}/examples/computer_vision/openimage-detect.py +0 -0
  76. {datachain-0.24.3 → datachain-0.24.5}/examples/computer_vision/ultralytics-bbox.py +0 -0
  77. {datachain-0.24.3 → datachain-0.24.5}/examples/computer_vision/ultralytics-pose.py +0 -0
  78. {datachain-0.24.3 → datachain-0.24.5}/examples/computer_vision/ultralytics-segment.py +0 -0
  79. {datachain-0.24.3 → datachain-0.24.5}/examples/get_started/common_sql_functions.py +0 -0
  80. {datachain-0.24.3 → datachain-0.24.5}/examples/get_started/json-csv-reader.py +0 -0
  81. {datachain-0.24.3 → datachain-0.24.5}/examples/get_started/torch-loader.py +0 -0
  82. {datachain-0.24.3 → datachain-0.24.5}/examples/get_started/udfs/parallel.py +0 -0
  83. {datachain-0.24.3 → datachain-0.24.5}/examples/get_started/udfs/simple.py +0 -0
  84. {datachain-0.24.3 → datachain-0.24.5}/examples/get_started/udfs/stateful.py +0 -0
  85. {datachain-0.24.3 → datachain-0.24.5}/examples/incremental_processing/delta.py +0 -0
  86. {datachain-0.24.3 → datachain-0.24.5}/examples/incremental_processing/retry.py +0 -0
  87. {datachain-0.24.3 → datachain-0.24.5}/examples/incremental_processing/utils.py +0 -0
  88. {datachain-0.24.3 → datachain-0.24.5}/examples/llm_and_nlp/claude-query.py +0 -0
  89. {datachain-0.24.3 → datachain-0.24.5}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  90. {datachain-0.24.3 → datachain-0.24.5}/examples/multimodal/clip_inference.py +0 -0
  91. {datachain-0.24.3 → datachain-0.24.5}/examples/multimodal/hf_pipeline.py +0 -0
  92. {datachain-0.24.3 → datachain-0.24.5}/examples/multimodal/openai_image_desc_lib.py +0 -0
  93. {datachain-0.24.3 → datachain-0.24.5}/examples/multimodal/wds.py +0 -0
  94. {datachain-0.24.3 → datachain-0.24.5}/examples/multimodal/wds_filtered.py +0 -0
  95. {datachain-0.24.3 → datachain-0.24.5}/mkdocs.yml +0 -0
  96. {datachain-0.24.3 → datachain-0.24.5}/noxfile.py +0 -0
  97. {datachain-0.24.3 → datachain-0.24.5}/pyproject.toml +0 -0
  98. {datachain-0.24.3 → datachain-0.24.5}/setup.cfg +0 -0
  99. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/__init__.py +0 -0
  100. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/__main__.py +0 -0
  101. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/asyn.py +0 -0
  102. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/cache.py +0 -0
  103. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/catalog/__init__.py +0 -0
  104. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/catalog/datasource.py +0 -0
  105. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/catalog/loader.py +0 -0
  106. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/cli/__init__.py +0 -0
  107. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/cli/commands/__init__.py +0 -0
  108. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/cli/commands/datasets.py +0 -0
  109. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/cli/commands/du.py +0 -0
  110. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/cli/commands/index.py +0 -0
  111. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/cli/commands/ls.py +0 -0
  112. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/cli/commands/misc.py +0 -0
  113. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/cli/commands/query.py +0 -0
  114. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/cli/commands/show.py +0 -0
  115. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/cli/parser/__init__.py +0 -0
  116. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/cli/parser/job.py +0 -0
  117. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/cli/parser/studio.py +0 -0
  118. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/cli/parser/utils.py +0 -0
  119. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/cli/utils.py +0 -0
  120. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/client/__init__.py +0 -0
  121. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/client/azure.py +0 -0
  122. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/client/fileslice.py +0 -0
  123. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/client/fsspec.py +0 -0
  124. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/client/gcs.py +0 -0
  125. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/client/hf.py +0 -0
  126. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/client/local.py +0 -0
  127. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/client/s3.py +0 -0
  128. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/config.py +0 -0
  129. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/data_storage/__init__.py +0 -0
  130. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/data_storage/db_engine.py +0 -0
  131. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/data_storage/job.py +0 -0
  132. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/data_storage/metastore.py +0 -0
  133. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/data_storage/schema.py +0 -0
  134. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/data_storage/serializer.py +0 -0
  135. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/data_storage/sqlite.py +0 -0
  136. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/data_storage/warehouse.py +0 -0
  137. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/delta.py +0 -0
  138. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/diff/__init__.py +0 -0
  139. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/error.py +0 -0
  140. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/fs/__init__.py +0 -0
  141. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/fs/reference.py +0 -0
  142. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/fs/utils.py +0 -0
  143. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/func/__init__.py +0 -0
  144. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/func/aggregate.py +0 -0
  145. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/func/array.py +0 -0
  146. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/func/base.py +0 -0
  147. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/func/conditional.py +0 -0
  148. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/func/func.py +0 -0
  149. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/func/numeric.py +0 -0
  150. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/func/path.py +0 -0
  151. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/func/random.py +0 -0
  152. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/func/string.py +0 -0
  153. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/func/window.py +0 -0
  154. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/job.py +0 -0
  155. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/__init__.py +0 -0
  156. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/arrow.py +0 -0
  157. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/clip.py +0 -0
  158. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/convert/__init__.py +0 -0
  159. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/convert/flatten.py +0 -0
  160. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/convert/python_to_sql.py +0 -0
  161. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/convert/sql_to_python.py +0 -0
  162. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/convert/unflatten.py +0 -0
  163. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  164. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/data_model.py +0 -0
  165. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/dataset_info.py +0 -0
  166. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/dc/__init__.py +0 -0
  167. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/dc/csv.py +0 -0
  168. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/dc/database.py +0 -0
  169. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/dc/hf.py +0 -0
  170. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/dc/json.py +0 -0
  171. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/dc/listings.py +0 -0
  172. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/dc/pandas.py +0 -0
  173. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/dc/parquet.py +0 -0
  174. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/dc/records.py +0 -0
  175. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/dc/storage.py +0 -0
  176. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/dc/utils.py +0 -0
  177. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/dc/values.py +0 -0
  178. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/file.py +0 -0
  179. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/hf.py +0 -0
  180. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/image.py +0 -0
  181. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/listing.py +0 -0
  182. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/listing_info.py +0 -0
  183. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/meta_formats.py +0 -0
  184. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/model_store.py +0 -0
  185. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/namespaces.py +0 -0
  186. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/projects.py +0 -0
  187. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/pytorch.py +0 -0
  188. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/settings.py +0 -0
  189. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/signal_schema.py +0 -0
  190. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/tar.py +0 -0
  191. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/text.py +0 -0
  192. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/udf.py +0 -0
  193. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/udf_signature.py +0 -0
  194. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/utils.py +0 -0
  195. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/video.py +0 -0
  196. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/webdataset.py +0 -0
  197. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/lib/webdataset_laion.py +0 -0
  198. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/listing.py +0 -0
  199. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/model/__init__.py +0 -0
  200. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/model/bbox.py +0 -0
  201. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/model/pose.py +0 -0
  202. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/model/segment.py +0 -0
  203. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/model/ultralytics/__init__.py +0 -0
  204. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/model/ultralytics/bbox.py +0 -0
  205. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/model/ultralytics/pose.py +0 -0
  206. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/model/ultralytics/segment.py +0 -0
  207. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/model/utils.py +0 -0
  208. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/node.py +0 -0
  209. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/nodes_fetcher.py +0 -0
  210. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/nodes_thread_pool.py +0 -0
  211. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/progress.py +0 -0
  212. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/py.typed +0 -0
  213. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/query/__init__.py +0 -0
  214. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/query/batch.py +0 -0
  215. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/query/dispatch.py +0 -0
  216. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/query/metrics.py +0 -0
  217. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/query/params.py +0 -0
  218. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/query/queue.py +0 -0
  219. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/query/schema.py +0 -0
  220. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/query/session.py +0 -0
  221. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/query/udf.py +0 -0
  222. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/query/utils.py +0 -0
  223. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/remote/__init__.py +0 -0
  224. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/remote/studio.py +0 -0
  225. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/script_meta.py +0 -0
  226. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/semver.py +0 -0
  227. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/sql/__init__.py +0 -0
  228. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/sql/default/__init__.py +0 -0
  229. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/sql/default/base.py +0 -0
  230. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/sql/functions/__init__.py +0 -0
  231. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/sql/functions/aggregate.py +0 -0
  232. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/sql/functions/array.py +0 -0
  233. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/sql/functions/conditional.py +0 -0
  234. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/sql/functions/numeric.py +0 -0
  235. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/sql/functions/path.py +0 -0
  236. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/sql/functions/random.py +0 -0
  237. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/sql/functions/string.py +0 -0
  238. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/sql/selectable.py +0 -0
  239. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/sql/sqlite/__init__.py +0 -0
  240. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/sql/sqlite/base.py +0 -0
  241. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/sql/sqlite/types.py +0 -0
  242. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/sql/sqlite/vector.py +0 -0
  243. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/sql/types.py +0 -0
  244. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/sql/utils.py +0 -0
  245. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/studio.py +0 -0
  246. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/telemetry.py +0 -0
  247. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/toolkit/__init__.py +0 -0
  248. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/toolkit/split.py +0 -0
  249. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/torch/__init__.py +0 -0
  250. {datachain-0.24.3 → datachain-0.24.5}/src/datachain/utils.py +0 -0
  251. {datachain-0.24.3 → datachain-0.24.5}/src/datachain.egg-info/SOURCES.txt +0 -0
  252. {datachain-0.24.3 → datachain-0.24.5}/src/datachain.egg-info/dependency_links.txt +0 -0
  253. {datachain-0.24.3 → datachain-0.24.5}/src/datachain.egg-info/entry_points.txt +0 -0
  254. {datachain-0.24.3 → datachain-0.24.5}/src/datachain.egg-info/requires.txt +0 -0
  255. {datachain-0.24.3 → datachain-0.24.5}/src/datachain.egg-info/top_level.txt +0 -0
  256. {datachain-0.24.3 → datachain-0.24.5}/tests/__init__.py +0 -0
  257. {datachain-0.24.3 → datachain-0.24.5}/tests/benchmarks/__init__.py +0 -0
  258. {datachain-0.24.3 → datachain-0.24.5}/tests/benchmarks/conftest.py +0 -0
  259. {datachain-0.24.3 → datachain-0.24.5}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  260. {datachain-0.24.3 → datachain-0.24.5}/tests/benchmarks/datasets/.dvc/config +0 -0
  261. {datachain-0.24.3 → datachain-0.24.5}/tests/benchmarks/datasets/.gitignore +0 -0
  262. {datachain-0.24.3 → datachain-0.24.5}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  263. {datachain-0.24.3 → datachain-0.24.5}/tests/benchmarks/test_datachain.py +0 -0
  264. {datachain-0.24.3 → datachain-0.24.5}/tests/benchmarks/test_ls.py +0 -0
  265. {datachain-0.24.3 → datachain-0.24.5}/tests/benchmarks/test_version.py +0 -0
  266. {datachain-0.24.3 → datachain-0.24.5}/tests/conftest.py +0 -0
  267. {datachain-0.24.3 → datachain-0.24.5}/tests/data.py +0 -0
  268. {datachain-0.24.3 → datachain-0.24.5}/tests/examples/__init__.py +0 -0
  269. {datachain-0.24.3 → datachain-0.24.5}/tests/examples/test_examples.py +0 -0
  270. {datachain-0.24.3 → datachain-0.24.5}/tests/examples/test_wds_e2e.py +0 -0
  271. {datachain-0.24.3 → datachain-0.24.5}/tests/examples/wds_data.py +0 -0
  272. {datachain-0.24.3 → datachain-0.24.5}/tests/func/__init__.py +0 -0
  273. {datachain-0.24.3 → datachain-0.24.5}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  274. {datachain-0.24.3 → datachain-0.24.5}/tests/func/data/lena.jpg +0 -0
  275. {datachain-0.24.3 → datachain-0.24.5}/tests/func/fake-service-account-credentials.json +0 -0
  276. {datachain-0.24.3 → datachain-0.24.5}/tests/func/functions/__init__.py +0 -0
  277. {datachain-0.24.3 → datachain-0.24.5}/tests/func/functions/test_aggregate.py +0 -0
  278. {datachain-0.24.3 → datachain-0.24.5}/tests/func/functions/test_array.py +0 -0
  279. {datachain-0.24.3 → datachain-0.24.5}/tests/func/functions/test_conditional.py +0 -0
  280. {datachain-0.24.3 → datachain-0.24.5}/tests/func/functions/test_numeric.py +0 -0
  281. {datachain-0.24.3 → datachain-0.24.5}/tests/func/functions/test_path.py +0 -0
  282. {datachain-0.24.3 → datachain-0.24.5}/tests/func/functions/test_random.py +0 -0
  283. {datachain-0.24.3 → datachain-0.24.5}/tests/func/functions/test_string.py +0 -0
  284. {datachain-0.24.3 → datachain-0.24.5}/tests/func/model/__init__.py +0 -0
  285. {datachain-0.24.3 → datachain-0.24.5}/tests/func/model/data/running-mask0.png +0 -0
  286. {datachain-0.24.3 → datachain-0.24.5}/tests/func/model/data/running-mask1.png +0 -0
  287. {datachain-0.24.3 → datachain-0.24.5}/tests/func/model/data/running.jpg +0 -0
  288. {datachain-0.24.3 → datachain-0.24.5}/tests/func/model/data/ships.jpg +0 -0
  289. {datachain-0.24.3 → datachain-0.24.5}/tests/func/model/test_yolo.py +0 -0
  290. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_batching.py +0 -0
  291. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_catalog.py +0 -0
  292. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_client.py +0 -0
  293. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_cloud_transfer.py +0 -0
  294. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_data_storage.py +0 -0
  295. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_datachain.py +0 -0
  296. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_datachain_merge.py +0 -0
  297. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_dataset_query.py +0 -0
  298. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_datasets.py +0 -0
  299. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_delta.py +0 -0
  300. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_feature_pickling.py +0 -0
  301. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_file.py +0 -0
  302. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_hf.py +0 -0
  303. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_hidden_field.py +0 -0
  304. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_image.py +0 -0
  305. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_listing.py +0 -0
  306. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_ls.py +0 -0
  307. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_meta_formats.py +0 -0
  308. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_metastore.py +0 -0
  309. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_metrics.py +0 -0
  310. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_pull.py +0 -0
  311. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_pytorch.py +0 -0
  312. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_query.py +0 -0
  313. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_read_database.py +0 -0
  314. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_read_dataset_version_specifiers.py +0 -0
  315. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_retry.py +0 -0
  316. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_session.py +0 -0
  317. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_toolkit.py +0 -0
  318. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_video.py +0 -0
  319. {datachain-0.24.3 → datachain-0.24.5}/tests/func/test_warehouse.py +0 -0
  320. {datachain-0.24.3 → datachain-0.24.5}/tests/scripts/feature_class.py +0 -0
  321. {datachain-0.24.3 → datachain-0.24.5}/tests/scripts/feature_class_exception.py +0 -0
  322. {datachain-0.24.3 → datachain-0.24.5}/tests/scripts/feature_class_parallel.py +0 -0
  323. {datachain-0.24.3 → datachain-0.24.5}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  324. {datachain-0.24.3 → datachain-0.24.5}/tests/scripts/name_len_slow.py +0 -0
  325. {datachain-0.24.3 → datachain-0.24.5}/tests/test_atomicity.py +0 -0
  326. {datachain-0.24.3 → datachain-0.24.5}/tests/test_cli_e2e.py +0 -0
  327. {datachain-0.24.3 → datachain-0.24.5}/tests/test_cli_studio.py +0 -0
  328. {datachain-0.24.3 → datachain-0.24.5}/tests/test_import_time.py +0 -0
  329. {datachain-0.24.3 → datachain-0.24.5}/tests/test_query_e2e.py +0 -0
  330. {datachain-0.24.3 → datachain-0.24.5}/tests/test_telemetry.py +0 -0
  331. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/__init__.py +0 -0
  332. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/lib/__init__.py +0 -0
  333. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/lib/conftest.py +0 -0
  334. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/lib/test_arrow.py +0 -0
  335. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/lib/test_clip.py +0 -0
  336. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  337. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/lib/test_datachain_merge.py +0 -0
  338. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/lib/test_diff.py +0 -0
  339. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/lib/test_feature.py +0 -0
  340. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/lib/test_feature_utils.py +0 -0
  341. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/lib/test_file.py +0 -0
  342. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/lib/test_hf.py +0 -0
  343. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/lib/test_image.py +0 -0
  344. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/lib/test_listing_info.py +0 -0
  345. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/lib/test_namespace.py +0 -0
  346. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/lib/test_project.py +0 -0
  347. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/lib/test_python_to_sql.py +0 -0
  348. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/lib/test_schema.py +0 -0
  349. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/lib/test_signal_schema.py +0 -0
  350. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/lib/test_sql_to_python.py +0 -0
  351. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/lib/test_text.py +0 -0
  352. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/lib/test_udf.py +0 -0
  353. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/lib/test_udf_signature.py +0 -0
  354. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/lib/test_utils.py +0 -0
  355. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/lib/test_webdataset.py +0 -0
  356. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/model/__init__.py +0 -0
  357. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/model/test_bbox.py +0 -0
  358. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/model/test_pose.py +0 -0
  359. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/model/test_segment.py +0 -0
  360. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/model/test_utils.py +0 -0
  361. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/sql/__init__.py +0 -0
  362. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/sql/sqlite/__init__.py +0 -0
  363. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/sql/sqlite/test_types.py +0 -0
  364. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/sql/sqlite/test_utils.py +0 -0
  365. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/sql/test_array.py +0 -0
  366. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/sql/test_conditional.py +0 -0
  367. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/sql/test_path.py +0 -0
  368. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/sql/test_random.py +0 -0
  369. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/sql/test_selectable.py +0 -0
  370. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/sql/test_string.py +0 -0
  371. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_asyn.py +0 -0
  372. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_cache.py +0 -0
  373. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_catalog.py +0 -0
  374. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_catalog_loader.py +0 -0
  375. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_cli_parsing.py +0 -0
  376. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_client.py +0 -0
  377. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_client_gcs.py +0 -0
  378. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_client_s3.py +0 -0
  379. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_config.py +0 -0
  380. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_data_storage.py +0 -0
  381. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_database_engine.py +0 -0
  382. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_dataset.py +0 -0
  383. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_dispatch.py +0 -0
  384. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_fileslice.py +0 -0
  385. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_func.py +0 -0
  386. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_listing.py +0 -0
  387. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_metastore.py +0 -0
  388. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_module_exports.py +0 -0
  389. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_pytorch.py +0 -0
  390. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_query.py +0 -0
  391. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_query_metrics.py +0 -0
  392. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_query_params.py +0 -0
  393. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_script_meta.py +0 -0
  394. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_semver.py +0 -0
  395. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_serializer.py +0 -0
  396. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_session.py +0 -0
  397. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_utils.py +0 -0
  398. {datachain-0.24.3 → datachain-0.24.5}/tests/unit/test_warehouse.py +0 -0
  399. {datachain-0.24.3 → datachain-0.24.5}/tests/utils.py +0 -0
@@ -98,7 +98,6 @@ jobs:
98
98
  - name: Run tests
99
99
  # Generate `.test_durations` file with `pytest --store-durations --durations-path ../.github/.test_durations ...`
100
100
  run: >
101
- DATACHAIN_METASTORE_ARG_PROJECT=john
102
101
  PYTHONPATH="$(pwd)/..:${PYTHONPATH}"
103
102
  pytest
104
103
  --config-file=pyproject.toml -rs
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.12.1'
27
+ rev: 'v0.12.2'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.24.3
3
+ Version: 0.24.5
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -20,13 +20,13 @@ If no namespace or project is specified, DataChain uses defaults depending on wh
20
20
 
21
21
  ### Studio
22
22
 
23
- - **Namespace:** `users`
24
- - **Project:** your username (e.g. `jondoe`)
23
+ - **Namespace:** `@<username>` (e.g. `@jondoe`)
24
+ - **Project:** `default`
25
25
  - Saving without namespace/project:
26
26
 
27
27
  ```python
28
28
  dc.read_values(scores=[1.2, 3.4, 2.5]).save("metrics")
29
- # Saved as users.jondoe.metrics
29
+ # Saved as @jondoe.default.metrics
30
30
  ```
31
31
 
32
32
  ### CLI
@@ -1120,6 +1120,14 @@ class Catalog:
1120
1120
  pull_dataset: bool = False,
1121
1121
  update: bool = False,
1122
1122
  ) -> DatasetRecord:
1123
+ # Intentionally ignore update flag is version is provided. Here only exact
1124
+ # version can be provided and update then doesn't make sense.
1125
+ # It corresponds to a query like this for example:
1126
+ #
1127
+ # dc.read_dataset("some.remote.dataset", version="1.0.0", update=True)
1128
+ if version:
1129
+ update = False
1130
+
1123
1131
  if self.metastore.is_local_dataset(namespace_name) or not update:
1124
1132
  try:
1125
1133
  project = self.metastore.get_project(project_name, namespace_name)
@@ -32,7 +32,7 @@ QUERY_DATASET_PREFIX = "ds_query_"
32
32
  LISTING_PREFIX = "lst__"
33
33
 
34
34
  DEFAULT_DATASET_VERSION = "1.0.0"
35
- DATASET_NAME_RESERVED_CHARS = ["."]
35
+ DATASET_NAME_RESERVED_CHARS = [".", "@"]
36
36
  DATASET_NAME_REPLACEMENT_CHAR = "_"
37
37
 
38
38
 
@@ -21,6 +21,7 @@ from typing import (
21
21
  import orjson
22
22
  import sqlalchemy
23
23
  from pydantic import BaseModel
24
+ from sqlalchemy.sql.elements import ColumnElement
24
25
  from tqdm import tqdm
25
26
 
26
27
  from datachain import semver
@@ -806,11 +807,35 @@ class DataChain:
806
807
  chain.save("new_dataset")
807
808
  ```
808
809
  """
810
+ # Convert string partition_by parameters to Column objects
811
+ processed_partition_by = partition_by
812
+ if partition_by is not None:
813
+ if isinstance(partition_by, (str, Function, ColumnElement)):
814
+ list_partition_by = [partition_by]
815
+ else:
816
+ list_partition_by = list(partition_by)
817
+
818
+ processed_partition_columns: list[ColumnElement] = []
819
+ for col in list_partition_by:
820
+ if isinstance(col, str):
821
+ col_db_name = ColumnMeta.to_db_name(col)
822
+ col_type = self.signals_schema.get_column_type(col_db_name)
823
+ column = Column(col_db_name, python_to_sql(col_type))
824
+ processed_partition_columns.append(column)
825
+ elif isinstance(col, Function):
826
+ column = col.get_column(self.signals_schema)
827
+ processed_partition_columns.append(column)
828
+ else:
829
+ # Assume it's already a ColumnElement
830
+ processed_partition_columns.append(col)
831
+
832
+ processed_partition_by = processed_partition_columns
833
+
809
834
  udf_obj = self._udf_to_obj(Aggregator, func, params, output, signal_map)
810
835
  return self._evolve(
811
836
  query=self._query.generate(
812
837
  udf_obj.to_udf_wrapper(),
813
- partition_by=partition_by,
838
+ partition_by=processed_partition_by,
814
839
  **self._settings.to_dict(),
815
840
  ),
816
841
  signal_schema=udf_obj.output,
@@ -189,6 +189,7 @@ def read_dataset(
189
189
  namespace_name=namespace_name,
190
190
  version=version, # type: ignore[arg-type]
191
191
  session=session,
192
+ update=update,
192
193
  )
193
194
 
194
195
  signals_schema = SignalSchema({"sys": Sys})
@@ -6,7 +6,7 @@ from typing import Any, Optional, TypeVar
6
6
  from datachain.error import InvalidNamespaceNameError
7
7
 
8
8
  N = TypeVar("N", bound="Namespace")
9
- NAMESPACE_NAME_RESERVED_CHARS = ["."]
9
+ NAMESPACE_NAME_RESERVED_CHARS = [".", "@"]
10
10
 
11
11
 
12
12
  @dataclass(frozen=True)
@@ -7,7 +7,7 @@ from datachain.error import InvalidProjectNameError
7
7
  from datachain.namespace import Namespace
8
8
 
9
9
  P = TypeVar("P", bound="Project")
10
- PROJECT_NAME_RESERVED_CHARS = ["."]
10
+ PROJECT_NAME_RESERVED_CHARS = [".", "@"]
11
11
 
12
12
 
13
13
  @dataclass(frozen=True)
@@ -82,7 +82,10 @@ if TYPE_CHECKING:
82
82
  INSERT_BATCH_SIZE = 10000
83
83
 
84
84
  PartitionByType = Union[
85
- Function, ColumnElement, Sequence[Union[Function, ColumnElement]]
85
+ str,
86
+ Function,
87
+ ColumnElement,
88
+ Sequence[Union[str, Function, ColumnElement]],
86
89
  ]
87
90
  JoinPredicateType = Union[str, ColumnClause, ColumnElement]
88
91
  DatasetDependencyType = tuple["DatasetRecord", str]
@@ -1142,6 +1145,7 @@ class DatasetQuery:
1142
1145
  project_name=project_name,
1143
1146
  version=version,
1144
1147
  pull_dataset=True,
1148
+ update=update,
1145
1149
  )
1146
1150
  )
1147
1151
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.24.3
3
+ Version: 0.24.5
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -362,8 +362,16 @@ def test_read_dataset_remote_update_flag(
362
362
  assert dc.datasets().to_values("version") == ["1.0.0"]
363
363
  assert ds1.to_values("version")[0] == "1.0.0"
364
364
 
365
+ # Read without update and version returns a cached version
366
+ ds1 = dc.read_dataset(
367
+ f"{REMOTE_NAMESPACE_NAME}.{REMOTE_PROJECT_NAME}.dogs",
368
+ session=test_session,
369
+ )
370
+ assert dc.datasets().to_values("version") == ["1.0.0"]
371
+ assert ds1.to_values("version")[0] == "1.0.0"
372
+
365
373
  # Second read with update=True with the exact version
366
- # returns the same
374
+ # returns the same dataset version
367
375
  ds2 = dc.read_dataset(
368
376
  f"{REMOTE_NAMESPACE_NAME}.{REMOTE_PROJECT_NAME}.dogs",
369
377
  version="1.0.0",
@@ -385,9 +393,7 @@ def test_read_dataset_remote_update_flag(
385
393
  assert dc.datasets().to_values("version") == ["1.0.0"]
386
394
  assert ds3.to_values("version")[0] == "1.0.0"
387
395
 
388
- # Finally, read with update=False even with version specifier
389
- # that allows for newer version still bring the same version
390
- # as the one already downloaded
396
+ # Finally, read with update=True brings the latest version
391
397
  ds4 = dc.read_dataset(
392
398
  f"{REMOTE_NAMESPACE_NAME}.{REMOTE_PROJECT_NAME}.dogs",
393
399
  version=">=1.0.0",
@@ -399,6 +405,45 @@ def test_read_dataset_remote_update_flag(
399
405
  assert dc.datasets().to_values("version") == ["1.0.0", "2.0.0"]
400
406
 
401
407
 
408
+ @skip_if_not_sqlite
409
+ def test_read_dataset_remote_update_flag_no_version(
410
+ studio_token,
411
+ test_session,
412
+ remote_dataset_multi_version,
413
+ mock_dataset_info_endpoint,
414
+ mock_export_endpoint_with_urls,
415
+ mock_export_status_completed,
416
+ mock_s3_parquet_download,
417
+ mock_dataset_rows_fetcher_status_check,
418
+ requests_mock,
419
+ ):
420
+ """Test read_dataset with update=True flag to force remote check."""
421
+
422
+ # Mock the Studio API responses
423
+ mock_dataset_info_endpoint(remote_dataset_multi_version)
424
+ mock_s3_parquet_download()
425
+
426
+ # First read - downloads version 1.0.0
427
+ ds1 = dc.read_dataset(
428
+ f"{REMOTE_NAMESPACE_NAME}.{REMOTE_PROJECT_NAME}.dogs",
429
+ version="1.0.0",
430
+ session=test_session,
431
+ )
432
+ assert dc.datasets().to_values("version") == ["1.0.0"]
433
+ assert ds1.to_values("version")[0] == "1.0.0"
434
+
435
+ # Read with update=True w/o version specifier also
436
+ # checks the most recent remote version and brings it
437
+ ds4 = dc.read_dataset(
438
+ f"{REMOTE_NAMESPACE_NAME}.{REMOTE_PROJECT_NAME}.dogs",
439
+ update=True,
440
+ session=test_session,
441
+ )
442
+
443
+ assert ds4.to_values("version")[0] == "2.0.0"
444
+ assert dc.datasets().to_values("version") == ["1.0.0", "2.0.0"]
445
+
446
+
402
447
  @skip_if_not_sqlite
403
448
  def test_read_dataset_remote_version_specifiers(
404
449
  studio_token,
@@ -3595,3 +3595,60 @@ def test_save_create_project_not_allowed(test_session, allow_create_project):
3595
3595
  dc.read_values(fib=[1, 1, 2, 3, 5, 8], session=test_session).save(
3596
3596
  "dev.numbers.fibonacci"
3597
3597
  )
3598
+
3599
+
3600
+ def test_agg_partition_by_string_notation(test_session):
3601
+ """Test that agg method supports string notation for partition_by."""
3602
+
3603
+ class _ImageGroup(BaseModel):
3604
+ name: str
3605
+ size: int
3606
+
3607
+ def func(key, val) -> Iterator[tuple[File, _ImageGroup]]:
3608
+ n = "-".join(key)
3609
+ v = sum(val)
3610
+ yield File(path=n), _ImageGroup(name=n, size=v)
3611
+
3612
+ keys = ["n1", "n2", "n1"]
3613
+ values = [1, 5, 9]
3614
+
3615
+ # Test using string notation (NEW functionality)
3616
+ ds = dc.read_values(key=keys, val=values, session=test_session).agg(
3617
+ x=func,
3618
+ partition_by="key", # String notation instead of C("key")
3619
+ )
3620
+
3621
+ assert ds.order_by("x_1.name").to_values("x_1.name") == ["n1-n1", "n2"]
3622
+ assert ds.order_by("x_1.size").to_values("x_1.size") == [5, 10]
3623
+
3624
+
3625
+ def test_agg_partition_by_string_sequence(test_session):
3626
+ """Test that agg method supports sequence of strings for partition_by."""
3627
+
3628
+ class _ImageGroup(BaseModel):
3629
+ name: str
3630
+ size: int
3631
+
3632
+ def func(key1, key2, val) -> Iterator[tuple[File, _ImageGroup]]:
3633
+ n = f"{key1[0]}-{key2[0]}"
3634
+ v = sum(val)
3635
+ yield File(path=n), _ImageGroup(name=n, size=v)
3636
+
3637
+ key1_values = ["a", "a", "b"]
3638
+ key2_values = ["x", "y", "x"]
3639
+ values = [1, 5, 9]
3640
+
3641
+ # Test using sequence of strings (NEW functionality)
3642
+ ds = dc.read_values(
3643
+ key1=key1_values, key2=key2_values, val=values, session=test_session
3644
+ ).agg(
3645
+ x=func,
3646
+ partition_by=["key1", "key2"], # Sequence of strings
3647
+ )
3648
+
3649
+ result_names = ds.order_by("x_1.name").to_values("x_1.name")
3650
+ result_sizes = ds.order_by("x_1.size").to_values("x_1.size")
3651
+
3652
+ # Should have 3 partitions: (a,x), (a,y), (b,x)
3653
+ assert len(result_names) == 3
3654
+ assert len(result_sizes) == 3
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes