datachain 0.20.1__tar.gz → 0.20.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (396) hide show
  1. {datachain-0.20.1 → datachain-0.20.3}/.pre-commit-config.yaml +1 -1
  2. {datachain-0.20.1 → datachain-0.20.3}/PKG-INFO +1 -1
  3. {datachain-0.20.1 → datachain-0.20.3}/examples/get_started/json-csv-reader.py +1 -1
  4. {datachain-0.20.1 → datachain-0.20.3}/examples/incremental_processing/delta.py +1 -1
  5. {datachain-0.20.1 → datachain-0.20.3}/examples/llm_and_nlp/hf-dataset-llm-eval.py +15 -5
  6. {datachain-0.20.1 → datachain-0.20.3}/pyproject.toml +2 -1
  7. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/__init__.py +2 -3
  8. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cache.py +2 -2
  9. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/catalog/catalog.py +3 -3
  10. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/commands/ls.py +2 -2
  11. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/client/fsspec.py +5 -3
  12. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/client/hf.py +10 -0
  13. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/client/local.py +4 -4
  14. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/data_storage/metastore.py +19 -6
  15. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/data_storage/sqlite.py +2 -2
  16. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/dataset.py +4 -3
  17. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/delta.py +2 -2
  18. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/func/func.py +1 -1
  19. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/arrow.py +3 -3
  20. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dataset_info.py +4 -4
  21. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/datachain.py +174 -86
  22. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/datasets.py +25 -37
  23. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/storage.py +24 -38
  24. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/file.py +77 -23
  25. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/meta_formats.py +1 -1
  26. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/namespaces.py +16 -18
  27. datachain-0.20.3/src/datachain/lib/projects.py +86 -0
  28. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/pytorch.py +1 -1
  29. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/tar.py +1 -2
  30. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/udf_signature.py +1 -1
  31. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/webdataset.py +30 -20
  32. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/namespace.py +3 -3
  33. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/project.py +5 -5
  34. {datachain-0.20.1 → datachain-0.20.3}/src/datachain.egg-info/PKG-INFO +1 -1
  35. {datachain-0.20.1 → datachain-0.20.3}/tests/conftest.py +6 -11
  36. {datachain-0.20.1 → datachain-0.20.3}/tests/examples/test_wds_e2e.py +5 -5
  37. {datachain-0.20.1 → datachain-0.20.3}/tests/func/functions/test_aggregate.py +7 -9
  38. {datachain-0.20.1 → datachain-0.20.3}/tests/func/functions/test_array.py +20 -21
  39. {datachain-0.20.1 → datachain-0.20.3}/tests/func/functions/test_conditional.py +6 -7
  40. {datachain-0.20.1 → datachain-0.20.3}/tests/func/functions/test_numeric.py +4 -5
  41. {datachain-0.20.1 → datachain-0.20.3}/tests/func/functions/test_path.py +6 -8
  42. {datachain-0.20.1 → datachain-0.20.3}/tests/func/functions/test_random.py +3 -6
  43. {datachain-0.20.1 → datachain-0.20.3}/tests/func/functions/test_string.py +6 -7
  44. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_datachain.py +31 -36
  45. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_delta.py +15 -29
  46. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_file.py +33 -7
  47. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_listing.py +1 -1
  48. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_pull.py +3 -4
  49. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_retry.py +6 -8
  50. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_toolkit.py +2 -2
  51. {datachain-0.20.1 → datachain-0.20.3}/tests/test_atomicity.py +1 -2
  52. {datachain-0.20.1 → datachain-0.20.3}/tests/test_import_time.py +2 -2
  53. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_datachain.py +170 -119
  54. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_datachain_bootstrap.py +3 -3
  55. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_datachain_merge.py +11 -11
  56. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_diff.py +43 -45
  57. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_feature_utils.py +2 -2
  58. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_file.py +50 -8
  59. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_listing_info.py +3 -5
  60. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_namespace.py +18 -16
  61. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_project.py +42 -57
  62. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_schema.py +1 -4
  63. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_dataset.py +18 -0
  64. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_func.py +149 -125
  65. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_session.py +1 -2
  66. {datachain-0.20.1 → datachain-0.20.3}/tests/utils.py +1 -1
  67. datachain-0.20.1/src/datachain/lib/projects.py +0 -86
  68. {datachain-0.20.1 → datachain-0.20.3}/.cruft.json +0 -0
  69. {datachain-0.20.1 → datachain-0.20.3}/.gitattributes +0 -0
  70. {datachain-0.20.1 → datachain-0.20.3}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  71. {datachain-0.20.1 → datachain-0.20.3}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  72. {datachain-0.20.1 → datachain-0.20.3}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  73. {datachain-0.20.1 → datachain-0.20.3}/.github/codecov.yaml +0 -0
  74. {datachain-0.20.1 → datachain-0.20.3}/.github/dependabot.yml +0 -0
  75. {datachain-0.20.1 → datachain-0.20.3}/.github/workflows/benchmarks.yml +0 -0
  76. {datachain-0.20.1 → datachain-0.20.3}/.github/workflows/release.yml +0 -0
  77. {datachain-0.20.1 → datachain-0.20.3}/.github/workflows/tests-studio.yml +0 -0
  78. {datachain-0.20.1 → datachain-0.20.3}/.github/workflows/tests.yml +0 -0
  79. {datachain-0.20.1 → datachain-0.20.3}/.github/workflows/update-template.yaml +0 -0
  80. {datachain-0.20.1 → datachain-0.20.3}/.gitignore +0 -0
  81. {datachain-0.20.1 → datachain-0.20.3}/CODE_OF_CONDUCT.rst +0 -0
  82. {datachain-0.20.1 → datachain-0.20.3}/LICENSE +0 -0
  83. {datachain-0.20.1 → datachain-0.20.3}/README.rst +0 -0
  84. {datachain-0.20.1 → datachain-0.20.3}/docs/assets/captioned_cartoons.png +0 -0
  85. {datachain-0.20.1 → datachain-0.20.3}/docs/assets/datachain-white.svg +0 -0
  86. {datachain-0.20.1 → datachain-0.20.3}/docs/assets/datachain.svg +0 -0
  87. {datachain-0.20.1 → datachain-0.20.3}/docs/commands/auth/login.md +0 -0
  88. {datachain-0.20.1 → datachain-0.20.3}/docs/commands/auth/logout.md +0 -0
  89. {datachain-0.20.1 → datachain-0.20.3}/docs/commands/auth/team.md +0 -0
  90. {datachain-0.20.1 → datachain-0.20.3}/docs/commands/auth/token.md +0 -0
  91. {datachain-0.20.1 → datachain-0.20.3}/docs/commands/index.md +0 -0
  92. {datachain-0.20.1 → datachain-0.20.3}/docs/commands/job/cancel.md +0 -0
  93. {datachain-0.20.1 → datachain-0.20.3}/docs/commands/job/clusters.md +0 -0
  94. {datachain-0.20.1 → datachain-0.20.3}/docs/commands/job/logs.md +0 -0
  95. {datachain-0.20.1 → datachain-0.20.3}/docs/commands/job/ls.md +0 -0
  96. {datachain-0.20.1 → datachain-0.20.3}/docs/commands/job/run.md +0 -0
  97. {datachain-0.20.1 → datachain-0.20.3}/docs/contributing.md +0 -0
  98. {datachain-0.20.1 → datachain-0.20.3}/docs/css/github-permalink-style.css +0 -0
  99. {datachain-0.20.1 → datachain-0.20.3}/docs/examples.md +0 -0
  100. {datachain-0.20.1 → datachain-0.20.3}/docs/guide/delta.md +0 -0
  101. {datachain-0.20.1 → datachain-0.20.3}/docs/guide/env.md +0 -0
  102. {datachain-0.20.1 → datachain-0.20.3}/docs/guide/index.md +0 -0
  103. {datachain-0.20.1 → datachain-0.20.3}/docs/guide/processing.md +0 -0
  104. {datachain-0.20.1 → datachain-0.20.3}/docs/guide/remotes.md +0 -0
  105. {datachain-0.20.1 → datachain-0.20.3}/docs/guide/retry.md +0 -0
  106. {datachain-0.20.1 → datachain-0.20.3}/docs/index.md +0 -0
  107. {datachain-0.20.1 → datachain-0.20.3}/docs/overrides/main.html +0 -0
  108. {datachain-0.20.1 → datachain-0.20.3}/docs/quick-start.md +0 -0
  109. {datachain-0.20.1 → datachain-0.20.3}/docs/references/data-types/arrowrow.md +0 -0
  110. {datachain-0.20.1 → datachain-0.20.3}/docs/references/data-types/bbox.md +0 -0
  111. {datachain-0.20.1 → datachain-0.20.3}/docs/references/data-types/file.md +0 -0
  112. {datachain-0.20.1 → datachain-0.20.3}/docs/references/data-types/imagefile.md +0 -0
  113. {datachain-0.20.1 → datachain-0.20.3}/docs/references/data-types/index.md +0 -0
  114. {datachain-0.20.1 → datachain-0.20.3}/docs/references/data-types/pose.md +0 -0
  115. {datachain-0.20.1 → datachain-0.20.3}/docs/references/data-types/segment.md +0 -0
  116. {datachain-0.20.1 → datachain-0.20.3}/docs/references/data-types/tarvfile.md +0 -0
  117. {datachain-0.20.1 → datachain-0.20.3}/docs/references/data-types/textfile.md +0 -0
  118. {datachain-0.20.1 → datachain-0.20.3}/docs/references/data-types/videofile.md +0 -0
  119. {datachain-0.20.1 → datachain-0.20.3}/docs/references/datachain.md +0 -0
  120. {datachain-0.20.1 → datachain-0.20.3}/docs/references/func.md +0 -0
  121. {datachain-0.20.1 → datachain-0.20.3}/docs/references/index.md +0 -0
  122. {datachain-0.20.1 → datachain-0.20.3}/docs/references/toolkit.md +0 -0
  123. {datachain-0.20.1 → datachain-0.20.3}/docs/references/torch.md +0 -0
  124. {datachain-0.20.1 → datachain-0.20.3}/docs/references/udf.md +0 -0
  125. {datachain-0.20.1 → datachain-0.20.3}/docs/tutorials.md +0 -0
  126. {datachain-0.20.1 → datachain-0.20.3}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  127. {datachain-0.20.1 → datachain-0.20.3}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  128. {datachain-0.20.1 → datachain-0.20.3}/examples/computer_vision/openimage-detect.py +0 -0
  129. {datachain-0.20.1 → datachain-0.20.3}/examples/computer_vision/ultralytics-bbox.py +0 -0
  130. {datachain-0.20.1 → datachain-0.20.3}/examples/computer_vision/ultralytics-pose.py +0 -0
  131. {datachain-0.20.1 → datachain-0.20.3}/examples/computer_vision/ultralytics-segment.py +0 -0
  132. {datachain-0.20.1 → datachain-0.20.3}/examples/get_started/common_sql_functions.py +0 -0
  133. {datachain-0.20.1 → datachain-0.20.3}/examples/get_started/torch-loader.py +0 -0
  134. {datachain-0.20.1 → datachain-0.20.3}/examples/get_started/udfs/parallel.py +0 -0
  135. {datachain-0.20.1 → datachain-0.20.3}/examples/get_started/udfs/simple.py +0 -0
  136. {datachain-0.20.1 → datachain-0.20.3}/examples/get_started/udfs/stateful.py +0 -0
  137. {datachain-0.20.1 → datachain-0.20.3}/examples/incremental_processing/retry.py +0 -0
  138. {datachain-0.20.1 → datachain-0.20.3}/examples/incremental_processing/utils.py +0 -0
  139. {datachain-0.20.1 → datachain-0.20.3}/examples/llm_and_nlp/claude-query.py +0 -0
  140. {datachain-0.20.1 → datachain-0.20.3}/examples/multimodal/clip_inference.py +0 -0
  141. {datachain-0.20.1 → datachain-0.20.3}/examples/multimodal/hf_pipeline.py +0 -0
  142. {datachain-0.20.1 → datachain-0.20.3}/examples/multimodal/openai_image_desc_lib.py +0 -0
  143. {datachain-0.20.1 → datachain-0.20.3}/examples/multimodal/wds.py +0 -0
  144. {datachain-0.20.1 → datachain-0.20.3}/examples/multimodal/wds_filtered.py +0 -0
  145. {datachain-0.20.1 → datachain-0.20.3}/mkdocs.yml +0 -0
  146. {datachain-0.20.1 → datachain-0.20.3}/noxfile.py +0 -0
  147. {datachain-0.20.1 → datachain-0.20.3}/setup.cfg +0 -0
  148. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/__main__.py +0 -0
  149. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/asyn.py +0 -0
  150. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/catalog/__init__.py +0 -0
  151. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/catalog/datasource.py +0 -0
  152. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/catalog/loader.py +0 -0
  153. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/__init__.py +0 -0
  154. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/commands/__init__.py +0 -0
  155. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/commands/datasets.py +0 -0
  156. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/commands/du.py +0 -0
  157. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/commands/index.py +0 -0
  158. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/commands/misc.py +0 -0
  159. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/commands/query.py +0 -0
  160. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/commands/show.py +0 -0
  161. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/parser/__init__.py +0 -0
  162. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/parser/job.py +0 -0
  163. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/parser/studio.py +0 -0
  164. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/parser/utils.py +0 -0
  165. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/cli/utils.py +0 -0
  166. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/client/__init__.py +0 -0
  167. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/client/azure.py +0 -0
  168. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/client/fileslice.py +0 -0
  169. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/client/gcs.py +0 -0
  170. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/client/s3.py +0 -0
  171. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/config.py +0 -0
  172. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/data_storage/__init__.py +0 -0
  173. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/data_storage/db_engine.py +0 -0
  174. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/data_storage/job.py +0 -0
  175. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/data_storage/schema.py +0 -0
  176. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/data_storage/serializer.py +0 -0
  177. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/data_storage/warehouse.py +0 -0
  178. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/diff/__init__.py +0 -0
  179. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/error.py +0 -0
  180. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/fs/__init__.py +0 -0
  181. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/fs/reference.py +0 -0
  182. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/fs/utils.py +0 -0
  183. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/func/__init__.py +0 -0
  184. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/func/aggregate.py +0 -0
  185. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/func/array.py +0 -0
  186. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/func/base.py +0 -0
  187. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/func/conditional.py +0 -0
  188. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/func/numeric.py +0 -0
  189. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/func/path.py +0 -0
  190. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/func/random.py +0 -0
  191. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/func/string.py +0 -0
  192. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/func/window.py +0 -0
  193. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/job.py +0 -0
  194. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/__init__.py +0 -0
  195. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/clip.py +0 -0
  196. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/convert/__init__.py +0 -0
  197. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/convert/flatten.py +0 -0
  198. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/convert/python_to_sql.py +0 -0
  199. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/convert/sql_to_python.py +0 -0
  200. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/convert/unflatten.py +0 -0
  201. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  202. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/data_model.py +0 -0
  203. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/__init__.py +0 -0
  204. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/csv.py +0 -0
  205. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/database.py +0 -0
  206. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/hf.py +0 -0
  207. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/json.py +0 -0
  208. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/listings.py +0 -0
  209. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/pandas.py +0 -0
  210. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/parquet.py +0 -0
  211. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/records.py +0 -0
  212. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/utils.py +0 -0
  213. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/dc/values.py +0 -0
  214. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/hf.py +0 -0
  215. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/image.py +0 -0
  216. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/listing.py +0 -0
  217. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/listing_info.py +0 -0
  218. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/model_store.py +0 -0
  219. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/settings.py +0 -0
  220. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/signal_schema.py +0 -0
  221. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/text.py +0 -0
  222. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/udf.py +0 -0
  223. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/utils.py +0 -0
  224. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/video.py +0 -0
  225. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/lib/webdataset_laion.py +0 -0
  226. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/listing.py +0 -0
  227. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/model/__init__.py +0 -0
  228. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/model/bbox.py +0 -0
  229. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/model/pose.py +0 -0
  230. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/model/segment.py +0 -0
  231. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/model/ultralytics/__init__.py +0 -0
  232. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/model/ultralytics/bbox.py +0 -0
  233. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/model/ultralytics/pose.py +0 -0
  234. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/model/ultralytics/segment.py +0 -0
  235. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/model/utils.py +0 -0
  236. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/node.py +0 -0
  237. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/nodes_fetcher.py +0 -0
  238. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/nodes_thread_pool.py +0 -0
  239. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/progress.py +0 -0
  240. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/py.typed +0 -0
  241. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/query/__init__.py +0 -0
  242. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/query/batch.py +0 -0
  243. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/query/dataset.py +0 -0
  244. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/query/dispatch.py +0 -0
  245. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/query/metrics.py +0 -0
  246. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/query/params.py +0 -0
  247. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/query/queue.py +0 -0
  248. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/query/schema.py +0 -0
  249. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/query/session.py +0 -0
  250. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/query/udf.py +0 -0
  251. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/query/utils.py +0 -0
  252. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/remote/__init__.py +0 -0
  253. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/remote/studio.py +0 -0
  254. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/script_meta.py +0 -0
  255. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/semver.py +0 -0
  256. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/__init__.py +0 -0
  257. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/default/__init__.py +0 -0
  258. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/default/base.py +0 -0
  259. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/functions/__init__.py +0 -0
  260. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/functions/aggregate.py +0 -0
  261. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/functions/array.py +0 -0
  262. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/functions/conditional.py +0 -0
  263. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/functions/numeric.py +0 -0
  264. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/functions/path.py +0 -0
  265. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/functions/random.py +0 -0
  266. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/functions/string.py +0 -0
  267. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/selectable.py +0 -0
  268. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/sqlite/__init__.py +0 -0
  269. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/sqlite/base.py +0 -0
  270. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/sqlite/types.py +0 -0
  271. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/sqlite/vector.py +0 -0
  272. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/types.py +0 -0
  273. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/sql/utils.py +0 -0
  274. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/studio.py +0 -0
  275. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/telemetry.py +0 -0
  276. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/toolkit/__init__.py +0 -0
  277. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/toolkit/split.py +0 -0
  278. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/torch/__init__.py +0 -0
  279. {datachain-0.20.1 → datachain-0.20.3}/src/datachain/utils.py +0 -0
  280. {datachain-0.20.1 → datachain-0.20.3}/src/datachain.egg-info/SOURCES.txt +0 -0
  281. {datachain-0.20.1 → datachain-0.20.3}/src/datachain.egg-info/dependency_links.txt +0 -0
  282. {datachain-0.20.1 → datachain-0.20.3}/src/datachain.egg-info/entry_points.txt +0 -0
  283. {datachain-0.20.1 → datachain-0.20.3}/src/datachain.egg-info/requires.txt +0 -0
  284. {datachain-0.20.1 → datachain-0.20.3}/src/datachain.egg-info/top_level.txt +0 -0
  285. {datachain-0.20.1 → datachain-0.20.3}/tests/__init__.py +0 -0
  286. {datachain-0.20.1 → datachain-0.20.3}/tests/benchmarks/__init__.py +0 -0
  287. {datachain-0.20.1 → datachain-0.20.3}/tests/benchmarks/conftest.py +0 -0
  288. {datachain-0.20.1 → datachain-0.20.3}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  289. {datachain-0.20.1 → datachain-0.20.3}/tests/benchmarks/datasets/.dvc/config +0 -0
  290. {datachain-0.20.1 → datachain-0.20.3}/tests/benchmarks/datasets/.gitignore +0 -0
  291. {datachain-0.20.1 → datachain-0.20.3}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  292. {datachain-0.20.1 → datachain-0.20.3}/tests/benchmarks/test_datachain.py +0 -0
  293. {datachain-0.20.1 → datachain-0.20.3}/tests/benchmarks/test_ls.py +0 -0
  294. {datachain-0.20.1 → datachain-0.20.3}/tests/benchmarks/test_version.py +0 -0
  295. {datachain-0.20.1 → datachain-0.20.3}/tests/data.py +0 -0
  296. {datachain-0.20.1 → datachain-0.20.3}/tests/examples/__init__.py +0 -0
  297. {datachain-0.20.1 → datachain-0.20.3}/tests/examples/test_examples.py +0 -0
  298. {datachain-0.20.1 → datachain-0.20.3}/tests/examples/wds_data.py +0 -0
  299. {datachain-0.20.1 → datachain-0.20.3}/tests/func/__init__.py +0 -0
  300. {datachain-0.20.1 → datachain-0.20.3}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  301. {datachain-0.20.1 → datachain-0.20.3}/tests/func/data/lena.jpg +0 -0
  302. {datachain-0.20.1 → datachain-0.20.3}/tests/func/fake-service-account-credentials.json +0 -0
  303. {datachain-0.20.1 → datachain-0.20.3}/tests/func/functions/__init__.py +0 -0
  304. {datachain-0.20.1 → datachain-0.20.3}/tests/func/model/__init__.py +0 -0
  305. {datachain-0.20.1 → datachain-0.20.3}/tests/func/model/data/running-mask0.png +0 -0
  306. {datachain-0.20.1 → datachain-0.20.3}/tests/func/model/data/running-mask1.png +0 -0
  307. {datachain-0.20.1 → datachain-0.20.3}/tests/func/model/data/running.jpg +0 -0
  308. {datachain-0.20.1 → datachain-0.20.3}/tests/func/model/data/ships.jpg +0 -0
  309. {datachain-0.20.1 → datachain-0.20.3}/tests/func/model/test_yolo.py +0 -0
  310. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_batching.py +0 -0
  311. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_catalog.py +0 -0
  312. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_client.py +0 -0
  313. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_cloud_transfer.py +0 -0
  314. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_data_storage.py +0 -0
  315. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_datachain_merge.py +0 -0
  316. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_dataset_query.py +0 -0
  317. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_datasets.py +0 -0
  318. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_feature_pickling.py +0 -0
  319. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_hf.py +0 -0
  320. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_hidden_field.py +0 -0
  321. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_image.py +0 -0
  322. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_ls.py +0 -0
  323. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_meta_formats.py +0 -0
  324. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_metastore.py +0 -0
  325. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_metrics.py +0 -0
  326. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_pytorch.py +0 -0
  327. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_query.py +0 -0
  328. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_read_database.py +0 -0
  329. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_session.py +0 -0
  330. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_video.py +0 -0
  331. {datachain-0.20.1 → datachain-0.20.3}/tests/func/test_warehouse.py +0 -0
  332. {datachain-0.20.1 → datachain-0.20.3}/tests/scripts/feature_class.py +0 -0
  333. {datachain-0.20.1 → datachain-0.20.3}/tests/scripts/feature_class_exception.py +0 -0
  334. {datachain-0.20.1 → datachain-0.20.3}/tests/scripts/feature_class_parallel.py +0 -0
  335. {datachain-0.20.1 → datachain-0.20.3}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  336. {datachain-0.20.1 → datachain-0.20.3}/tests/scripts/name_len_slow.py +0 -0
  337. {datachain-0.20.1 → datachain-0.20.3}/tests/test_cli_e2e.py +0 -0
  338. {datachain-0.20.1 → datachain-0.20.3}/tests/test_cli_studio.py +0 -0
  339. {datachain-0.20.1 → datachain-0.20.3}/tests/test_query_e2e.py +0 -0
  340. {datachain-0.20.1 → datachain-0.20.3}/tests/test_telemetry.py +0 -0
  341. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/__init__.py +0 -0
  342. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/__init__.py +0 -0
  343. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/conftest.py +0 -0
  344. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_arrow.py +0 -0
  345. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_clip.py +0 -0
  346. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_feature.py +0 -0
  347. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_hf.py +0 -0
  348. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_image.py +0 -0
  349. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_python_to_sql.py +0 -0
  350. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_signal_schema.py +0 -0
  351. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_sql_to_python.py +0 -0
  352. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_text.py +0 -0
  353. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_udf.py +0 -0
  354. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_udf_signature.py +0 -0
  355. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_utils.py +0 -0
  356. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/lib/test_webdataset.py +0 -0
  357. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/model/__init__.py +0 -0
  358. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/model/test_bbox.py +0 -0
  359. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/model/test_pose.py +0 -0
  360. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/model/test_segment.py +0 -0
  361. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/model/test_utils.py +0 -0
  362. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/sql/__init__.py +0 -0
  363. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/sql/sqlite/__init__.py +0 -0
  364. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/sql/sqlite/test_types.py +0 -0
  365. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/sql/sqlite/test_utils.py +0 -0
  366. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/sql/test_array.py +0 -0
  367. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/sql/test_conditional.py +0 -0
  368. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/sql/test_path.py +0 -0
  369. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/sql/test_random.py +0 -0
  370. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/sql/test_selectable.py +0 -0
  371. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/sql/test_string.py +0 -0
  372. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_asyn.py +0 -0
  373. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_cache.py +0 -0
  374. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_catalog.py +0 -0
  375. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_catalog_loader.py +0 -0
  376. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_cli_parsing.py +0 -0
  377. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_client.py +0 -0
  378. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_client_gcs.py +0 -0
  379. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_client_s3.py +0 -0
  380. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_config.py +0 -0
  381. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_data_storage.py +0 -0
  382. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_database_engine.py +0 -0
  383. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_dispatch.py +0 -0
  384. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_fileslice.py +0 -0
  385. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_listing.py +0 -0
  386. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_metastore.py +0 -0
  387. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_module_exports.py +0 -0
  388. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_pytorch.py +0 -0
  389. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_query.py +0 -0
  390. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_query_metrics.py +0 -0
  391. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_query_params.py +0 -0
  392. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_script_meta.py +0 -0
  393. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_semver.py +0 -0
  394. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_serializer.py +0 -0
  395. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_utils.py +0 -0
  396. {datachain-0.20.1 → datachain-0.20.3}/tests/unit/test_warehouse.py +0 -0
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.11.13'
27
+ rev: 'v0.12.0'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.20.1
3
+ Version: 0.20.3
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -48,7 +48,7 @@ def main():
48
48
 
49
49
  # Print JSON schema in Pydantic format from main COCO annotation
50
50
  chain = dc.read_storage(uri, anon="True").filter(dc.C("file.path").glob("*.json"))
51
- file = next(chain.limit(1).collect("file"))
51
+ file = chain.limit(1).to_values("file")[0]
52
52
  print(gen_datamodel_code(file, jmespath="@", model_name="Coco"))
53
53
 
54
54
  # Static JSON schema test parsing 3/7 objects
@@ -47,7 +47,7 @@ def process_files_with_delta():
47
47
  print("\nDataset versions:")
48
48
  test_dataset = dc.datasets().filter(C("name") == "test_files")
49
49
 
50
- for version in test_dataset.collect("version"):
50
+ for version in test_dataset.to_iter("version"):
51
51
  print(f"- Version: {version}")
52
52
 
53
53
  # Show the last 3 records to demonstrate the incremental processing
@@ -1,3 +1,5 @@
1
+ import os
2
+
1
3
  from huggingface_hub import InferenceClient
2
4
  from requests import HTTPError
3
5
 
@@ -23,6 +25,7 @@ def eval_dialog(
23
25
  ) -> DialogEval:
24
26
  try:
25
27
  completion = client.chat_completion(
28
+ model="meta-llama/Llama-3.3-70B-Instruct",
26
29
  messages=[
27
30
  {
28
31
  "role": "user",
@@ -31,9 +34,10 @@ def eval_dialog(
31
34
  ],
32
35
  response_format={"type": "json", "value": DialogEval.model_json_schema()},
33
36
  )
34
- except HTTPError:
37
+ except HTTPError as e:
35
38
  return DialogEval(
36
- result="Error", reason="Error while interacting with the Hugging Face API."
39
+ result="Error",
40
+ reason=f"Error while interacting with the Hugging Face API. {e}",
37
41
  )
38
42
 
39
43
  message = completion.choices[0].message
@@ -48,9 +52,15 @@ def eval_dialog(
48
52
  # Save to HF as Parquet. Dataset can be previewed here:
49
53
  # https://huggingface.co/datasets/dvcorg/test-datachain-llm-eval/viewer
50
54
  (
51
- dc.read_csv("hf://datasets/infinite-dataset-hub/MobilePlanAssistant/data.csv")
52
- .settings(parallel=10)
53
- .setup(client=lambda: InferenceClient("meta-llama/Llama-3.1-70B-Instruct"))
55
+ dc.read_csv(
56
+ "hf://datasets/infinite-dataset-hub/MobilePlanAssistant/data.csv", source=False
57
+ )
58
+ .settings(parallel=True)
59
+ .setup(
60
+ client=lambda: InferenceClient(
61
+ provider="hf-inference", api_key=os.environ["HF_TOKEN"]
62
+ )
63
+ )
54
64
  .map(response=eval_dialog)
55
65
  .to_parquet("hf://datasets/dvcorg/test-datachain-llm-eval/data.parquet")
56
66
  )
@@ -221,7 +221,8 @@ ignore = [
221
221
  "PERF203", # perflint - try-except-in-loop, irrelevant for Python>=3.11
222
222
  "PERF401",
223
223
  "D100", # undocumented-public-module
224
- "D205" # one-blank-line-after-class
224
+ "D205", # one-blank-line-after-class
225
+ "PLC0415" # import-outside-top-level
225
226
  ]
226
227
  select = [
227
228
  "B", # flake8-bugbear
@@ -1,4 +1,3 @@
1
- from datachain.lib import namespaces, projects
2
1
  from datachain.lib.data_model import DataModel, DataType, is_chain_type
3
2
  from datachain.lib.dc import (
4
3
  C,
@@ -33,6 +32,7 @@ from datachain.lib.file import (
33
32
  VideoFrame,
34
33
  )
35
34
  from datachain.lib.model_store import ModelStore
35
+ from datachain.lib.projects import create as create_project
36
36
  from datachain.lib.udf import Aggregator, Generator, Mapper
37
37
  from datachain.lib.utils import AbstractUDF, DataChainError
38
38
  from datachain.query import metrics, param
@@ -63,14 +63,13 @@ __all__ = [
63
63
  "VideoFile",
64
64
  "VideoFragment",
65
65
  "VideoFrame",
66
+ "create_project",
66
67
  "datasets",
67
68
  "delete_dataset",
68
69
  "is_chain_type",
69
70
  "listings",
70
71
  "metrics",
71
- "namespaces",
72
72
  "param",
73
- "projects",
74
73
  "read_csv",
75
74
  "read_database",
76
75
  "read_dataset",
@@ -39,7 +39,7 @@ def temporary_cache(
39
39
  cache.destroy()
40
40
 
41
41
 
42
- class Cache:
42
+ class Cache: # noqa: PLW1641
43
43
  def __init__(self, cache_dir: str, tmp_dir: str):
44
44
  self.odb = LocalHashFileDB(
45
45
  LocalFileSystem(),
@@ -76,9 +76,9 @@ class Cache:
76
76
  async def download(
77
77
  self, file: "File", client: "Client", callback: Optional[Callback] = None
78
78
  ) -> None:
79
- from_path = f"{file.source}/{file.path}"
80
79
  from dvc_objects.fs.utils import tmp_fname
81
80
 
81
+ from_path = file.get_uri()
82
82
  odb_fs = self.odb.fs
83
83
  tmp_info = odb_fs.join(self.odb.tmp_dir, tmp_fname()) # type: ignore[arg-type]
84
84
  size = file.size
@@ -1491,13 +1491,13 @@ class Catalog:
1491
1491
 
1492
1492
  namespace = self.metastore.create_namespace(
1493
1493
  remote_ds.project.namespace.name,
1494
- description=remote_ds.project.namespace.description,
1494
+ description=remote_ds.project.namespace.descr,
1495
1495
  uuid=remote_ds.project.namespace.uuid,
1496
1496
  )
1497
1497
  project = self.metastore.create_project(
1498
- remote_ds.project.name,
1499
1498
  namespace.name,
1500
- description=remote_ds.project.description,
1499
+ remote_ds.project.name,
1500
+ description=remote_ds.project.descr,
1501
1501
  uuid=remote_ds.project.uuid,
1502
1502
  )
1503
1503
 
@@ -63,8 +63,8 @@ def ls_local(
63
63
  print(format_ls_entry(entry))
64
64
  else:
65
65
  # Collect results in a list here to prevent interference from `tqdm` and `print`
66
- listing = list(listings().collect("listing"))
67
- for ls in listing:
66
+ listing = listings().to_list("listing")
67
+ for (ls,) in listing:
68
68
  print(format_ls_entry(f"{ls.uri}@v{ls.version}")) # type: ignore[union-attr]
69
69
 
70
70
 
@@ -207,13 +207,14 @@ class Client(ABC):
207
207
  )
208
208
 
209
209
  async def get_current_etag(self, file: "File") -> str:
210
+ file_path = file.get_path_normalized()
210
211
  kwargs = {}
211
212
  if self._is_version_aware():
212
213
  kwargs["version_id"] = file.version
213
214
  info = await self.fs._info(
214
- self.get_full_path(file.path, file.version), **kwargs
215
+ self.get_full_path(file_path, file.version), **kwargs
215
216
  )
216
- return self.info_to_file(info, file.path).etag
217
+ return self.info_to_file(info, file_path).etag
217
218
 
218
219
  def get_file_info(self, path: str, version_id: Optional[str] = None) -> "File":
219
220
  info = self.fs.info(self.get_full_path(path, version_id), version_id=version_id)
@@ -386,7 +387,8 @@ class Client(ABC):
386
387
  return open(cache_path, mode="rb")
387
388
  assert not file.location
388
389
  return FileWrapper(
389
- self.fs.open(self.get_full_path(file.path, file.version)), cb
390
+ self.fs.open(self.get_full_path(file.get_path_normalized(), file.version)),
391
+ cb,
390
392
  ) # type: ignore[return-value]
391
393
 
392
394
  def upload(self, data: bytes, path: str) -> "File":
@@ -21,6 +21,9 @@ def _wrap_class(sync_fs_class):
21
21
  asynchronous to False by default. This is similar to other Async FS
22
22
  we initialize. E.g. it means we don't break things in Jupyter where code
23
23
  run in async.
24
+
25
+ This also fixes write operations by ensuring they are properly forwarded
26
+ to the underlying filesystem without async buffering issues.
24
27
  """
25
28
  from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper
26
29
 
@@ -29,6 +32,13 @@ def _wrap_class(sync_fs_class):
29
32
  sync_fs = sync_fs_class(*args, **kwargs)
30
33
  super().__init__(sync_fs, asynchronous=False)
31
34
 
35
+ def open(self, path, mode="rb", **kwargs):
36
+ # Override open to ensure write operations work correctly.
37
+ # It seems to be a bug in the fsspec wrapper. It avoids
38
+ # wrapping open() explicitly but also doesn't redirect it to
39
+ # sync filesystem.
40
+ return self.sync_fs.open(path, mode, **kwargs)
41
+
32
42
  GeneratedAsyncFileSystemWrapper.__name__ = f"Async{sync_fs_class.__name__}Wrapper"
33
43
  return GeneratedAsyncFileSystemWrapper
34
44
 
@@ -99,7 +99,7 @@ class FileClient(Client):
99
99
  )
100
100
 
101
101
  async def get_current_etag(self, file: "File") -> str:
102
- info = self.fs.info(self.get_full_path(file.path))
102
+ info = self.fs.info(self.get_full_path(file.get_path_normalized()))
103
103
  return self.info_to_file(info, "").etag
104
104
 
105
105
  async def get_size(self, path: str, version_id: Optional[str] = None) -> int:
@@ -138,8 +138,8 @@ class FileClient(Client):
138
138
  if not self.use_symlinks:
139
139
  super().fetch_nodes(nodes, shared_progress_bar)
140
140
 
141
- def do_instantiate_object(self, uid, dst):
141
+ def do_instantiate_object(self, file: File, dst: str) -> None:
142
142
  if self.use_symlinks:
143
- os.symlink(Path(self.name, uid.path), dst)
143
+ os.symlink(Path(self.name, file.path), dst)
144
144
  else:
145
- super().do_instantiate_object(uid, dst)
145
+ super().do_instantiate_object(file, dst)
@@ -185,8 +185,8 @@ class AbstractMetastore(ABC, Serializable):
185
185
  @abstractmethod
186
186
  def create_project(
187
187
  self,
188
- name: str,
189
188
  namespace_name: str,
189
+ name: str,
190
190
  description: Optional[str] = None,
191
191
  uuid: Optional[str] = None,
192
192
  ignore_if_exists: bool = True,
@@ -195,8 +195,13 @@ class AbstractMetastore(ABC, Serializable):
195
195
  """Creates new project in specific namespace"""
196
196
 
197
197
  @abstractmethod
198
- def get_project(self, name: str, namespace_name: str, conn=None) -> Project:
199
- """Gets a single project inside some namespace by name"""
198
+ def get_project(
199
+ self, name: str, namespace_name: str, create: bool = False, conn=None
200
+ ) -> Project:
201
+ """
202
+ Gets a single project inside some namespace by name.
203
+ It also creates project if not found and create flag is set to True.
204
+ """
200
205
 
201
206
  @abstractmethod
202
207
  def list_projects(self, namespace_id: Optional[int], conn=None) -> list[Project]:
@@ -763,14 +768,18 @@ class AbstractDBMetastore(AbstractMetastore):
763
768
 
764
769
  def create_project(
765
770
  self,
766
- name: str,
767
771
  namespace_name: str,
772
+ name: str,
768
773
  description: Optional[str] = None,
769
774
  uuid: Optional[str] = None,
770
775
  ignore_if_exists: bool = True,
771
776
  **kwargs,
772
777
  ) -> Project:
773
- namespace = self.get_namespace(namespace_name)
778
+ try:
779
+ namespace = self.get_namespace(namespace_name)
780
+ except NamespaceNotFoundError:
781
+ namespace = self.create_namespace(namespace_name)
782
+
774
783
  query = self._projects_insert().values(
775
784
  namespace_id=namespace.id,
776
785
  uuid=uuid or str(uuid4()),
@@ -788,7 +797,9 @@ class AbstractDBMetastore(AbstractMetastore):
788
797
 
789
798
  return self.get_project(name, namespace.name)
790
799
 
791
- def get_project(self, name: str, namespace_name: str, conn=None) -> Project:
800
+ def get_project(
801
+ self, name: str, namespace_name: str, create: bool = False, conn=None
802
+ ) -> Project:
792
803
  """Gets a single project inside some namespace by name"""
793
804
  n = self._namespaces
794
805
  p = self._projects
@@ -803,6 +814,8 @@ class AbstractDBMetastore(AbstractMetastore):
803
814
 
804
815
  rows = list(self.db.execute(query, conn=conn))
805
816
  if not rows:
817
+ if create:
818
+ return self.create_project(namespace_name, name)
806
819
  raise ProjectNotFoundError(
807
820
  f"Project {name} in namespace {namespace_name} not found."
808
821
  )
@@ -469,10 +469,10 @@ class SQLiteMetastore(AbstractDBMetastore):
469
469
  Studio dataset.
470
470
  """
471
471
  system_namespace = self.create_namespace(Namespace.system(), "System namespace")
472
- self.create_project(Project.listing(), system_namespace.name, "Listing project")
472
+ self.create_project(system_namespace.name, Project.listing(), "Listing project")
473
473
 
474
474
  local_namespace = self.create_namespace(Namespace.default(), "Local namespace")
475
- self.create_project(Project.default(), local_namespace.name, "Local project")
475
+ self.create_project(local_namespace.name, Project.default(), "Local project")
476
476
 
477
477
  def _check_schema_version(self) -> None:
478
478
  """
@@ -83,10 +83,11 @@ def parse_dataset_name(name: str) -> tuple[Optional[str], Optional[str], str]:
83
83
  if not name:
84
84
  raise ValueError("Name must be defined to parse it")
85
85
  split = name.split(".")
86
- if len(split) == 3:
87
- return tuple(split) # type: ignore[return-value]
86
+ name = split[-1]
87
+ project_name = split[-2] if len(split) > 1 else None
88
+ namespace_name = split[-3] if len(split) > 2 else None
88
89
 
89
- return None, None, name
90
+ return namespace_name, project_name, name
90
91
 
91
92
 
92
93
  class DatasetDependencyType:
@@ -62,7 +62,7 @@ def _get_delta_chain(
62
62
  )
63
63
 
64
64
  # Calculate diff between source versions
65
- return source_dc_latest.compare(source_dc, on=on, compare=compare, deleted=False)
65
+ return source_dc_latest.diff(source_dc, on=on, compare=compare, deleted=False)
66
66
 
67
67
 
68
68
  def _get_retry_chain(
@@ -237,7 +237,7 @@ def delta_retry_update(
237
237
  return None, None, False
238
238
 
239
239
  latest_dataset = datachain.read_dataset(name, version=latest_version)
240
- compared_chain = latest_dataset.compare(
240
+ compared_chain = latest_dataset.diff(
241
241
  processing_chain,
242
242
  on=right_on or on,
243
243
  added=True,
@@ -25,7 +25,7 @@ if TYPE_CHECKING:
25
25
  ColT = Union[str, Column, ColumnElement, "Func", tuple]
26
26
 
27
27
 
28
- class Func(Function):
28
+ class Func(Function): # noqa: PLW1641
29
29
  """Represents a function to be applied to a column in a SQL query."""
30
30
 
31
31
  def __init__(
@@ -76,7 +76,7 @@ class ArrowGenerator(Generator):
76
76
  fs_path = file.path
77
77
  fs = ReferenceFileSystem({fs_path: [cache_path]})
78
78
  else:
79
- fs, fs_path = file.get_fs(), file.get_path()
79
+ fs, fs_path = file.get_fs(), file.get_fs_path()
80
80
 
81
81
  kwargs = self.kwargs
82
82
  if format := kwargs.get("format"):
@@ -160,8 +160,8 @@ def infer_schema(chain: "DataChain", **kwargs) -> pa.Schema:
160
160
  kwargs["format"] = fix_pyarrow_format(format, parse_options)
161
161
 
162
162
  schemas = []
163
- for file in chain.collect("file"):
164
- ds = dataset(file.get_path(), filesystem=file.get_fs(), **kwargs) # type: ignore[union-attr]
163
+ for (file,) in chain.to_iter("file"):
164
+ ds = dataset(file.get_fs_path(), filesystem=file.get_fs(), **kwargs) # type: ignore[union-attr]
165
165
  schemas.append(ds.schema)
166
166
  if not schemas:
167
167
  raise ValueError(
@@ -22,8 +22,8 @@ if TYPE_CHECKING:
22
22
 
23
23
  class DatasetInfo(DataModel):
24
24
  name: str
25
- namespace_name: str
26
- project_name: str
25
+ namespace: str
26
+ project: str
27
27
  uuid: str = Field(default=str(uuid4()))
28
28
  version: str = Field(default=DEFAULT_DATASET_VERSION)
29
29
  status: int = Field(default=DatasetStatus.CREATED)
@@ -93,8 +93,8 @@ class DatasetInfo(DataModel):
93
93
  return cls(
94
94
  uuid=version.uuid,
95
95
  name=dataset.name,
96
- namespace_name=dataset.project.namespace.name,
97
- project_name=dataset.project.name,
96
+ namespace=dataset.project.namespace.name,
97
+ project=dataset.project.name,
98
98
  version=version.version,
99
99
  status=version.status,
100
100
  created_at=version.created_at,