datachain 0.8.11__tar.gz → 0.8.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (309) hide show
  1. {datachain-0.8.11 → datachain-0.8.12}/PKG-INFO +1 -1
  2. datachain-0.8.12/docs/references/func.md +5 -0
  3. {datachain-0.8.11 → datachain-0.8.12}/docs/references/index.md +1 -1
  4. {datachain-0.8.11 → datachain-0.8.12}/mkdocs.yml +1 -1
  5. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/catalog/catalog.py +1 -20
  6. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/cli/__init__.py +0 -8
  7. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/cli/commands/__init__.py +0 -2
  8. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/cli/commands/datasets.py +0 -19
  9. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/cli/parser/__init__.py +0 -25
  10. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/dataset.py +0 -6
  11. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/func/conditional.py +16 -9
  12. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/func/func.py +4 -5
  13. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/remote/studio.py +1 -13
  14. {datachain-0.8.11 → datachain-0.8.12}/src/datachain.egg-info/PKG-INFO +1 -1
  15. {datachain-0.8.11 → datachain-0.8.12}/src/datachain.egg-info/SOURCES.txt +1 -1
  16. {datachain-0.8.11 → datachain-0.8.12}/tests/func/test_catalog.py +23 -22
  17. {datachain-0.8.11 → datachain-0.8.12}/tests/func/test_datachain.py +4 -3
  18. {datachain-0.8.11 → datachain-0.8.12}/tests/func/test_datasets.py +3 -3
  19. {datachain-0.8.11 → datachain-0.8.12}/tests/func/test_pull.py +0 -32
  20. {datachain-0.8.11 → datachain-0.8.12}/tests/test_cli_studio.py +1 -1
  21. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/lib/test_datachain.py +23 -42
  22. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/lib/test_diff.py +20 -20
  23. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/test_func.py +30 -0
  24. datachain-0.8.11/docs/references/sql.md +0 -18
  25. {datachain-0.8.11 → datachain-0.8.12}/.cruft.json +0 -0
  26. {datachain-0.8.11 → datachain-0.8.12}/.gitattributes +0 -0
  27. {datachain-0.8.11 → datachain-0.8.12}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  28. {datachain-0.8.11 → datachain-0.8.12}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  29. {datachain-0.8.11 → datachain-0.8.12}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  30. {datachain-0.8.11 → datachain-0.8.12}/.github/codecov.yaml +0 -0
  31. {datachain-0.8.11 → datachain-0.8.12}/.github/dependabot.yml +0 -0
  32. {datachain-0.8.11 → datachain-0.8.12}/.github/workflows/benchmarks.yml +0 -0
  33. {datachain-0.8.11 → datachain-0.8.12}/.github/workflows/release.yml +0 -0
  34. {datachain-0.8.11 → datachain-0.8.12}/.github/workflows/tests-studio.yml +0 -0
  35. {datachain-0.8.11 → datachain-0.8.12}/.github/workflows/tests.yml +0 -0
  36. {datachain-0.8.11 → datachain-0.8.12}/.github/workflows/update-template.yaml +0 -0
  37. {datachain-0.8.11 → datachain-0.8.12}/.gitignore +0 -0
  38. {datachain-0.8.11 → datachain-0.8.12}/.pre-commit-config.yaml +0 -0
  39. {datachain-0.8.11 → datachain-0.8.12}/CODE_OF_CONDUCT.rst +0 -0
  40. {datachain-0.8.11 → datachain-0.8.12}/LICENSE +0 -0
  41. {datachain-0.8.11 → datachain-0.8.12}/README.rst +0 -0
  42. {datachain-0.8.11 → datachain-0.8.12}/docs/assets/captioned_cartoons.png +0 -0
  43. {datachain-0.8.11 → datachain-0.8.12}/docs/assets/datachain-white.svg +0 -0
  44. {datachain-0.8.11 → datachain-0.8.12}/docs/assets/datachain.svg +0 -0
  45. {datachain-0.8.11 → datachain-0.8.12}/docs/contributing.md +0 -0
  46. {datachain-0.8.11 → datachain-0.8.12}/docs/css/github-permalink-style.css +0 -0
  47. {datachain-0.8.11 → datachain-0.8.12}/docs/examples.md +0 -0
  48. {datachain-0.8.11 → datachain-0.8.12}/docs/index.md +0 -0
  49. {datachain-0.8.11 → datachain-0.8.12}/docs/overrides/main.html +0 -0
  50. {datachain-0.8.11 → datachain-0.8.12}/docs/quick-start.md +0 -0
  51. {datachain-0.8.11 → datachain-0.8.12}/docs/references/datachain.md +0 -0
  52. {datachain-0.8.11 → datachain-0.8.12}/docs/references/datatype.md +0 -0
  53. {datachain-0.8.11 → datachain-0.8.12}/docs/references/file.md +0 -0
  54. {datachain-0.8.11 → datachain-0.8.12}/docs/references/torch.md +0 -0
  55. {datachain-0.8.11 → datachain-0.8.12}/docs/references/udf.md +0 -0
  56. {datachain-0.8.11 → datachain-0.8.12}/docs/tutorials.md +0 -0
  57. {datachain-0.8.11 → datachain-0.8.12}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  58. {datachain-0.8.11 → datachain-0.8.12}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  59. {datachain-0.8.11 → datachain-0.8.12}/examples/computer_vision/openimage-detect.py +0 -0
  60. {datachain-0.8.11 → datachain-0.8.12}/examples/computer_vision/ultralytics-bbox.py +0 -0
  61. {datachain-0.8.11 → datachain-0.8.12}/examples/computer_vision/ultralytics-pose.py +0 -0
  62. {datachain-0.8.11 → datachain-0.8.12}/examples/computer_vision/ultralytics-segment.py +0 -0
  63. {datachain-0.8.11 → datachain-0.8.12}/examples/get_started/common_sql_functions.py +0 -0
  64. {datachain-0.8.11 → datachain-0.8.12}/examples/get_started/json-csv-reader.py +0 -0
  65. {datachain-0.8.11 → datachain-0.8.12}/examples/get_started/torch-loader.py +0 -0
  66. {datachain-0.8.11 → datachain-0.8.12}/examples/get_started/udfs/parallel.py +0 -0
  67. {datachain-0.8.11 → datachain-0.8.12}/examples/get_started/udfs/simple.py +0 -0
  68. {datachain-0.8.11 → datachain-0.8.12}/examples/get_started/udfs/stateful.py +0 -0
  69. {datachain-0.8.11 → datachain-0.8.12}/examples/llm_and_nlp/claude-query.py +0 -0
  70. {datachain-0.8.11 → datachain-0.8.12}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  71. {datachain-0.8.11 → datachain-0.8.12}/examples/multimodal/clip_inference.py +0 -0
  72. {datachain-0.8.11 → datachain-0.8.12}/examples/multimodal/hf_pipeline.py +0 -0
  73. {datachain-0.8.11 → datachain-0.8.12}/examples/multimodal/openai_image_desc_lib.py +0 -0
  74. {datachain-0.8.11 → datachain-0.8.12}/examples/multimodal/wds.py +0 -0
  75. {datachain-0.8.11 → datachain-0.8.12}/examples/multimodal/wds_filtered.py +0 -0
  76. {datachain-0.8.11 → datachain-0.8.12}/noxfile.py +0 -0
  77. {datachain-0.8.11 → datachain-0.8.12}/pyproject.toml +0 -0
  78. {datachain-0.8.11 → datachain-0.8.12}/setup.cfg +0 -0
  79. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/__init__.py +0 -0
  80. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/__main__.py +0 -0
  81. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/asyn.py +0 -0
  82. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/cache.py +0 -0
  83. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/catalog/__init__.py +0 -0
  84. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/catalog/datasource.py +0 -0
  85. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/catalog/loader.py +0 -0
  86. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/cli/commands/du.py +0 -0
  87. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/cli/commands/index.py +0 -0
  88. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/cli/commands/ls.py +0 -0
  89. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/cli/commands/misc.py +0 -0
  90. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/cli/commands/query.py +0 -0
  91. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/cli/commands/show.py +0 -0
  92. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/cli/parser/job.py +0 -0
  93. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/cli/parser/studio.py +0 -0
  94. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/cli/parser/utils.py +0 -0
  95. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/cli/utils.py +0 -0
  96. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/client/__init__.py +0 -0
  97. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/client/azure.py +0 -0
  98. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/client/fileslice.py +0 -0
  99. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/client/fsspec.py +0 -0
  100. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/client/gcs.py +0 -0
  101. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/client/hf.py +0 -0
  102. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/client/local.py +0 -0
  103. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/client/s3.py +0 -0
  104. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/config.py +0 -0
  105. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/data_storage/__init__.py +0 -0
  106. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/data_storage/db_engine.py +0 -0
  107. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/data_storage/job.py +0 -0
  108. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/data_storage/metastore.py +0 -0
  109. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/data_storage/schema.py +0 -0
  110. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/data_storage/serializer.py +0 -0
  111. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/data_storage/sqlite.py +0 -0
  112. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/data_storage/warehouse.py +0 -0
  113. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/diff/__init__.py +0 -0
  114. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/error.py +0 -0
  115. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/func/__init__.py +0 -0
  116. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/func/aggregate.py +0 -0
  117. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/func/array.py +0 -0
  118. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/func/base.py +0 -0
  119. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/func/numeric.py +0 -0
  120. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/func/path.py +0 -0
  121. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/func/random.py +0 -0
  122. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/func/string.py +0 -0
  123. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/func/window.py +0 -0
  124. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/job.py +0 -0
  125. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/__init__.py +0 -0
  126. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/arrow.py +0 -0
  127. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/clip.py +0 -0
  128. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/convert/__init__.py +0 -0
  129. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/convert/flatten.py +0 -0
  130. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/convert/python_to_sql.py +0 -0
  131. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/convert/sql_to_python.py +0 -0
  132. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/convert/unflatten.py +0 -0
  133. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  134. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/data_model.py +0 -0
  135. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/dataset_info.py +0 -0
  136. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/dc.py +0 -0
  137. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/file.py +0 -0
  138. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/hf.py +0 -0
  139. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/image.py +0 -0
  140. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/listing.py +0 -0
  141. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/listing_info.py +0 -0
  142. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/meta_formats.py +0 -0
  143. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/model_store.py +0 -0
  144. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/pytorch.py +0 -0
  145. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/settings.py +0 -0
  146. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/signal_schema.py +0 -0
  147. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/tar.py +0 -0
  148. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/text.py +0 -0
  149. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/udf.py +0 -0
  150. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/udf_signature.py +0 -0
  151. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/utils.py +0 -0
  152. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/vfile.py +0 -0
  153. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/webdataset.py +0 -0
  154. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/lib/webdataset_laion.py +0 -0
  155. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/listing.py +0 -0
  156. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/model/__init__.py +0 -0
  157. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/model/bbox.py +0 -0
  158. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/model/pose.py +0 -0
  159. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/model/segment.py +0 -0
  160. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/model/ultralytics/__init__.py +0 -0
  161. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/model/ultralytics/bbox.py +0 -0
  162. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/model/ultralytics/pose.py +0 -0
  163. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/model/ultralytics/segment.py +0 -0
  164. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/node.py +0 -0
  165. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/nodes_fetcher.py +0 -0
  166. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/nodes_thread_pool.py +0 -0
  167. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/progress.py +0 -0
  168. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/py.typed +0 -0
  169. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/query/__init__.py +0 -0
  170. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/query/batch.py +0 -0
  171. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/query/dataset.py +0 -0
  172. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/query/dispatch.py +0 -0
  173. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/query/metrics.py +0 -0
  174. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/query/params.py +0 -0
  175. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/query/queue.py +0 -0
  176. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/query/schema.py +0 -0
  177. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/query/session.py +0 -0
  178. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/query/udf.py +0 -0
  179. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/query/utils.py +0 -0
  180. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/remote/__init__.py +0 -0
  181. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/sql/__init__.py +0 -0
  182. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/sql/default/__init__.py +0 -0
  183. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/sql/default/base.py +0 -0
  184. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/sql/functions/__init__.py +0 -0
  185. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/sql/functions/aggregate.py +0 -0
  186. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/sql/functions/array.py +0 -0
  187. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/sql/functions/conditional.py +0 -0
  188. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/sql/functions/numeric.py +0 -0
  189. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/sql/functions/path.py +0 -0
  190. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/sql/functions/random.py +0 -0
  191. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/sql/functions/string.py +0 -0
  192. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/sql/selectable.py +0 -0
  193. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/sql/sqlite/__init__.py +0 -0
  194. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/sql/sqlite/base.py +0 -0
  195. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/sql/sqlite/types.py +0 -0
  196. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/sql/sqlite/vector.py +0 -0
  197. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/sql/types.py +0 -0
  198. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/sql/utils.py +0 -0
  199. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/studio.py +0 -0
  200. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/telemetry.py +0 -0
  201. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/toolkit/__init__.py +0 -0
  202. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/toolkit/split.py +0 -0
  203. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/torch/__init__.py +0 -0
  204. {datachain-0.8.11 → datachain-0.8.12}/src/datachain/utils.py +0 -0
  205. {datachain-0.8.11 → datachain-0.8.12}/src/datachain.egg-info/dependency_links.txt +0 -0
  206. {datachain-0.8.11 → datachain-0.8.12}/src/datachain.egg-info/entry_points.txt +0 -0
  207. {datachain-0.8.11 → datachain-0.8.12}/src/datachain.egg-info/requires.txt +0 -0
  208. {datachain-0.8.11 → datachain-0.8.12}/src/datachain.egg-info/top_level.txt +0 -0
  209. {datachain-0.8.11 → datachain-0.8.12}/tests/__init__.py +0 -0
  210. {datachain-0.8.11 → datachain-0.8.12}/tests/benchmarks/__init__.py +0 -0
  211. {datachain-0.8.11 → datachain-0.8.12}/tests/benchmarks/conftest.py +0 -0
  212. {datachain-0.8.11 → datachain-0.8.12}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  213. {datachain-0.8.11 → datachain-0.8.12}/tests/benchmarks/datasets/.dvc/config +0 -0
  214. {datachain-0.8.11 → datachain-0.8.12}/tests/benchmarks/datasets/.gitignore +0 -0
  215. {datachain-0.8.11 → datachain-0.8.12}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  216. {datachain-0.8.11 → datachain-0.8.12}/tests/benchmarks/test_datachain.py +0 -0
  217. {datachain-0.8.11 → datachain-0.8.12}/tests/benchmarks/test_ls.py +0 -0
  218. {datachain-0.8.11 → datachain-0.8.12}/tests/benchmarks/test_version.py +0 -0
  219. {datachain-0.8.11 → datachain-0.8.12}/tests/conftest.py +0 -0
  220. {datachain-0.8.11 → datachain-0.8.12}/tests/data.py +0 -0
  221. {datachain-0.8.11 → datachain-0.8.12}/tests/examples/__init__.py +0 -0
  222. {datachain-0.8.11 → datachain-0.8.12}/tests/examples/test_examples.py +0 -0
  223. {datachain-0.8.11 → datachain-0.8.12}/tests/examples/test_wds_e2e.py +0 -0
  224. {datachain-0.8.11 → datachain-0.8.12}/tests/examples/wds_data.py +0 -0
  225. {datachain-0.8.11 → datachain-0.8.12}/tests/func/__init__.py +0 -0
  226. {datachain-0.8.11 → datachain-0.8.12}/tests/func/fake-service-account-credentials.json +0 -0
  227. {datachain-0.8.11 → datachain-0.8.12}/tests/func/test_client.py +0 -0
  228. {datachain-0.8.11 → datachain-0.8.12}/tests/func/test_data_storage.py +0 -0
  229. {datachain-0.8.11 → datachain-0.8.12}/tests/func/test_datachain_merge.py +0 -0
  230. {datachain-0.8.11 → datachain-0.8.12}/tests/func/test_dataset_query.py +0 -0
  231. {datachain-0.8.11 → datachain-0.8.12}/tests/func/test_feature_pickling.py +0 -0
  232. {datachain-0.8.11 → datachain-0.8.12}/tests/func/test_file.py +0 -0
  233. {datachain-0.8.11 → datachain-0.8.12}/tests/func/test_hf.py +0 -0
  234. {datachain-0.8.11 → datachain-0.8.12}/tests/func/test_listing.py +0 -0
  235. {datachain-0.8.11 → datachain-0.8.12}/tests/func/test_ls.py +0 -0
  236. {datachain-0.8.11 → datachain-0.8.12}/tests/func/test_meta_formats.py +0 -0
  237. {datachain-0.8.11 → datachain-0.8.12}/tests/func/test_metrics.py +0 -0
  238. {datachain-0.8.11 → datachain-0.8.12}/tests/func/test_pytorch.py +0 -0
  239. {datachain-0.8.11 → datachain-0.8.12}/tests/func/test_query.py +0 -0
  240. {datachain-0.8.11 → datachain-0.8.12}/tests/func/test_session.py +0 -0
  241. {datachain-0.8.11 → datachain-0.8.12}/tests/func/test_toolkit.py +0 -0
  242. {datachain-0.8.11 → datachain-0.8.12}/tests/func/test_warehouse.py +0 -0
  243. {datachain-0.8.11 → datachain-0.8.12}/tests/scripts/feature_class.py +0 -0
  244. {datachain-0.8.11 → datachain-0.8.12}/tests/scripts/feature_class_exception.py +0 -0
  245. {datachain-0.8.11 → datachain-0.8.12}/tests/scripts/feature_class_parallel.py +0 -0
  246. {datachain-0.8.11 → datachain-0.8.12}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  247. {datachain-0.8.11 → datachain-0.8.12}/tests/scripts/name_len_slow.py +0 -0
  248. {datachain-0.8.11 → datachain-0.8.12}/tests/test_atomicity.py +0 -0
  249. {datachain-0.8.11 → datachain-0.8.12}/tests/test_cli_e2e.py +0 -0
  250. {datachain-0.8.11 → datachain-0.8.12}/tests/test_query_e2e.py +0 -0
  251. {datachain-0.8.11 → datachain-0.8.12}/tests/test_telemetry.py +0 -0
  252. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/__init__.py +0 -0
  253. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/lib/__init__.py +0 -0
  254. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/lib/conftest.py +0 -0
  255. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/lib/test_arrow.py +0 -0
  256. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/lib/test_clip.py +0 -0
  257. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  258. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/lib/test_datachain_merge.py +0 -0
  259. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/lib/test_feature.py +0 -0
  260. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/lib/test_feature_utils.py +0 -0
  261. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/lib/test_file.py +0 -0
  262. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/lib/test_hf.py +0 -0
  263. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/lib/test_image.py +0 -0
  264. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/lib/test_listing_info.py +0 -0
  265. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/lib/test_models.py +0 -0
  266. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/lib/test_python_to_sql.py +0 -0
  267. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/lib/test_schema.py +0 -0
  268. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/lib/test_signal_schema.py +0 -0
  269. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/lib/test_sql_to_python.py +0 -0
  270. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/lib/test_text.py +0 -0
  271. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/lib/test_udf_signature.py +0 -0
  272. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/lib/test_utils.py +0 -0
  273. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/lib/test_webdataset.py +0 -0
  274. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/sql/__init__.py +0 -0
  275. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/sql/sqlite/__init__.py +0 -0
  276. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/sql/sqlite/test_types.py +0 -0
  277. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/sql/sqlite/test_utils.py +0 -0
  278. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/sql/test_array.py +0 -0
  279. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/sql/test_conditional.py +0 -0
  280. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/sql/test_path.py +0 -0
  281. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/sql/test_random.py +0 -0
  282. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/sql/test_selectable.py +0 -0
  283. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/sql/test_string.py +0 -0
  284. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/test_asyn.py +0 -0
  285. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/test_cache.py +0 -0
  286. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/test_catalog.py +0 -0
  287. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/test_catalog_loader.py +0 -0
  288. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/test_cli_parsing.py +0 -0
  289. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/test_client.py +0 -0
  290. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/test_client_gcs.py +0 -0
  291. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/test_client_s3.py +0 -0
  292. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/test_config.py +0 -0
  293. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/test_data_storage.py +0 -0
  294. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/test_database_engine.py +0 -0
  295. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/test_dataset.py +0 -0
  296. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/test_dispatch.py +0 -0
  297. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/test_fileslice.py +0 -0
  298. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/test_listing.py +0 -0
  299. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/test_metastore.py +0 -0
  300. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/test_module_exports.py +0 -0
  301. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/test_pytorch.py +0 -0
  302. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/test_query.py +0 -0
  303. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/test_query_metrics.py +0 -0
  304. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/test_query_params.py +0 -0
  305. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/test_serializer.py +0 -0
  306. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/test_session.py +0 -0
  307. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/test_utils.py +0 -0
  308. {datachain-0.8.11 → datachain-0.8.12}/tests/unit/test_warehouse.py +0 -0
  309. {datachain-0.8.11 → datachain-0.8.12}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: datachain
3
- Version: 0.8.11
3
+ Version: 0.8.12
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -0,0 +1,5 @@
1
+ # Functions
2
+
3
+ Use built-in functions for data manipulation and analysis to operate on the underlying database storing the chain data. These functions are useful for operations like [`DataChain.filter`](datachain.md#datachain.lib.dc.DataChain.filter) and [`DataChain.mutate`](datachain.md#datachain.lib.dc.DataChain.mutate). Import these functions from `datachain.func`.
4
+
5
+ ::: datachain.func
@@ -10,5 +10,5 @@ DataChain's API is organized into several modules:
10
10
  - [DataType](./datatype.md) - Type system and schema definitions
11
11
  - [File](./file.md) - File handling and storage operations
12
12
  - [UDF](./udf.md) - User-defined functions and transformations
13
- - [SQL](./sql.md) - SQL query integration
13
+ - [Functions](./func.md) - Built-in functions for data manipulation and analysis
14
14
  - [Torch](./torch.md) - PyTorch data loading utilities
@@ -73,7 +73,7 @@ nav:
73
73
  - File: references/file.md
74
74
  - UDF: references/udf.md
75
75
  - Torch: references/torch.md
76
- - SQL: references/sql.md
76
+ - Functions: references/func.md
77
77
  - 🤝 Contributing: contributing.md
78
78
 
79
79
  - DataChain Website ↗: https://datachain.ai" target="_blank"
@@ -38,7 +38,6 @@ from datachain.dataset import (
38
38
  DatasetDependency,
39
39
  DatasetListRecord,
40
40
  DatasetRecord,
41
- DatasetStats,
42
41
  DatasetStatus,
43
42
  StorageURI,
44
43
  create_dataset_uri,
@@ -1235,17 +1234,6 @@ class Catalog:
1235
1234
  dataset = self.get_dataset(name)
1236
1235
  return self.warehouse.dataset_table_export_file_names(dataset, version)
1237
1236
 
1238
- def dataset_stats(self, name: str, version: Optional[int]) -> DatasetStats:
1239
- """
1240
- Returns tuple with dataset stats: total number of rows and total dataset size.
1241
- """
1242
- dataset = self.get_dataset(name)
1243
- dataset_version = dataset.get_version(version or dataset.latest_version)
1244
- return DatasetStats(
1245
- num_objects=dataset_version.num_objects,
1246
- size=dataset_version.size,
1247
- )
1248
-
1249
1237
  def remove_dataset(
1250
1238
  self,
1251
1239
  name: str,
@@ -1391,19 +1379,12 @@ class Catalog:
1391
1379
  except DatasetNotFoundError:
1392
1380
  pass
1393
1381
 
1394
- stats_response = studio_client.dataset_stats(
1395
- remote_ds_name, remote_ds_version.version
1396
- )
1397
- if not stats_response.ok:
1398
- raise_remote_error(stats_response.message)
1399
- ds_stats = stats_response.data
1400
-
1401
1382
  dataset_save_progress_bar = tqdm(
1402
1383
  desc=f"Saving dataset {remote_ds_uri} locally: ",
1403
1384
  unit=" rows",
1404
1385
  unit_scale=True,
1405
1386
  unit_divisor=1000,
1406
- total=ds_stats.num_objects, # type: ignore [union-attr]
1387
+ total=remote_ds_version.num_objects, # type: ignore [union-attr]
1407
1388
  leave=False,
1408
1389
  )
1409
1390
 
@@ -11,7 +11,6 @@ from datachain.telemetry import telemetry
11
11
  from .commands import (
12
12
  clear_cache,
13
13
  completion,
14
- dataset_stats,
15
14
  du,
16
15
  edit_dataset,
17
16
  garbage_collect,
@@ -182,13 +181,6 @@ def handle_dataset_command(args, catalog):
182
181
  all=args.all,
183
182
  team=args.team,
184
183
  ),
185
- "stats": lambda: dataset_stats(
186
- catalog,
187
- args.name,
188
- args.version,
189
- show_bytes=args.bytes,
190
- si=args.si,
191
- ),
192
184
  }
193
185
 
194
186
  handler = dataset_commands.get(args.datasets_cmd)
@@ -1,5 +1,4 @@
1
1
  from .datasets import (
2
- dataset_stats,
3
2
  edit_dataset,
4
3
  list_datasets,
5
4
  list_datasets_local,
@@ -15,7 +14,6 @@ from .show import show
15
14
  __all__ = [
16
15
  "clear_cache",
17
16
  "completion",
18
- "dataset_stats",
19
17
  "du",
20
18
  "edit_dataset",
21
19
  "garbage_collect",
@@ -3,8 +3,6 @@ from typing import TYPE_CHECKING, Optional
3
3
 
4
4
  from tabulate import tabulate
5
5
 
6
- from datachain import utils
7
-
8
6
  if TYPE_CHECKING:
9
7
  from datachain.catalog import Catalog
10
8
 
@@ -109,20 +107,3 @@ def edit_dataset(
109
107
 
110
108
  if (all or studio) and token:
111
109
  edit_studio_dataset(team, name, new_name, description, labels)
112
-
113
-
114
- def dataset_stats(
115
- catalog: "Catalog",
116
- name: str,
117
- version: int,
118
- show_bytes=False,
119
- si=False,
120
- ):
121
- stats = catalog.dataset_stats(name, version)
122
-
123
- if stats:
124
- print(f"Number of objects: {stats.num_objects}")
125
- if show_bytes:
126
- print(f"Total objects size: {stats.size}")
127
- else:
128
- print(f"Total objects size: {utils.sizeof_fmt(stats.size, si=si): >7}")
@@ -307,31 +307,6 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
307
307
  help="The team to delete a dataset. By default, it will use team from config",
308
308
  )
309
309
 
310
- dataset_stats_parser = datasets_subparser.add_parser(
311
- "stats", parents=[parent_parser], description="Show basic dataset statistics."
312
- )
313
- dataset_stats_parser.add_argument("name", type=str, help="Dataset name")
314
- dataset_stats_parser.add_argument(
315
- "--version",
316
- action="store",
317
- default=None,
318
- type=int,
319
- help="Dataset version",
320
- )
321
- dataset_stats_parser.add_argument(
322
- "-b",
323
- "--bytes",
324
- default=False,
325
- action="store_true",
326
- help="Display size in bytes instead of human-readable size",
327
- )
328
- dataset_stats_parser.add_argument(
329
- "--si",
330
- default=False,
331
- action="store_true",
332
- help="Display size using powers of 1000 not 1024",
333
- )
334
-
335
310
  parse_ls = subp.add_parser(
336
311
  "ls", parents=[parent_parser], description="List storage contents."
337
312
  )
@@ -150,12 +150,6 @@ class DatasetDependency:
150
150
  return hash(f"{self.type}_{self.name}_{self.version}")
151
151
 
152
152
 
153
- @dataclass
154
- class DatasetStats:
155
- num_objects: Optional[int] # None if table is missing
156
- size: Optional[int] # in bytes None if table is missing or empty
157
-
158
-
159
153
  class DatasetStatus:
160
154
  CREATED = 1
161
155
  PENDING = 2
@@ -9,7 +9,7 @@ from datachain.sql.functions import conditional
9
9
 
10
10
  from .func import ColT, Func
11
11
 
12
- CaseT = Union[int, float, complex, bool, str, Func]
12
+ CaseT = Union[int, float, complex, bool, str, Func, ColumnElement]
13
13
 
14
14
 
15
15
  def greatest(*args: Union[ColT, float]) -> Func:
@@ -94,11 +94,12 @@ def case(
94
94
  """
95
95
  Returns the case function that produces case expression which has a list of
96
96
  conditions and corresponding results. Results can be python primitives like string,
97
- numbers or booleans but can also be other nested function (including case function).
97
+ numbers or booleans but can also be other nested functions (including case function)
98
+ or columns.
98
99
  Result type is inferred from condition results.
99
100
 
100
101
  Args:
101
- args (tuple((ColumnElement, Func), (str | int | float | complex | bool, Func))):
102
+ args tuple((ColumnElement | Func),(str | int | float | complex | bool, Func, ColumnElement)):
102
103
  Tuple of condition and values pair.
103
104
  else_ (str | int | float | complex | bool, Func): optional else value in case
104
105
  expression. If omitted, and no case conditions are satisfied, the result
@@ -113,13 +114,16 @@ def case(
113
114
  res=func.case((C("num") > 0, "P"), (C("num") < 0, "N"), else_="Z"),
114
115
  )
115
116
  ```
116
- """
117
+ """ # noqa: E501
117
118
  supported_types = [int, float, complex, str, bool]
118
119
 
119
120
  def _get_type(val):
120
121
  if isinstance(val, Func):
121
122
  # nested functions
122
123
  return val.result_type
124
+ if isinstance(val, Column):
125
+ # at this point we cannot know what is the type of a column
126
+ return None
123
127
  return type(val)
124
128
 
125
129
  if not args:
@@ -129,13 +133,16 @@ def case(
129
133
 
130
134
  for arg in args:
131
135
  arg_type = _get_type(arg[1])
136
+ if arg_type is None:
137
+ # we couldn't figure out the type of case value
138
+ continue
132
139
  if type_ and arg_type != type_:
133
140
  raise DataChainParamsError(
134
141
  f"Statement values must be of the same type, got {type_} and {arg_type}"
135
142
  )
136
143
  type_ = arg_type
137
144
 
138
- if type_ not in supported_types:
145
+ if type_ is not None and type_ not in supported_types:
139
146
  raise DataChainParamsError(
140
147
  f"Only python literals ({supported_types}) are supported for values"
141
148
  )
@@ -151,15 +158,15 @@ def ifelse(
151
158
  """
152
159
  Returns the ifelse function that produces if expression which has a condition
153
160
  and values for true and false outcome. Results can be one of python primitives
154
- like string, numbers or booleans, but can also be nested functions.
161
+ like string, numbers or booleans, but can also be nested functions or columns.
155
162
  Result type is inferred from the values.
156
163
 
157
164
  Args:
158
165
  condition (ColumnElement, Func): Condition which is evaluated.
159
- if_val (str | int | float | complex | bool, Func): Value for true
166
+ if_val (str | int | float | complex | bool, Func, ColumnElement): Value for true
160
167
  condition outcome.
161
- else_val (str | int | float | complex | bool, Func): Value for false condition
162
- outcome.
168
+ else_val (str | int | float | complex | bool, Func, ColumnElement): Value for
169
+ false condition outcome.
163
170
 
164
171
  Returns:
165
172
  Func: A Func object that represents the ifelse function.
@@ -424,10 +424,9 @@ class Func(Function):
424
424
 
425
425
  def get_db_col_type(signals_schema: "SignalSchema", col: ColT) -> "DataType":
426
426
  if isinstance(col, tuple):
427
- raise DataChainParamsError(
428
- "Cannot get type from tuple, please provide type hint to the function"
429
- )
430
-
427
+ # we can only get tuple from case statement where the first tuple item
428
+ # is condition, and second one is value which type is important
429
+ col = col[1]
431
430
  if isinstance(col, Func):
432
431
  return col.get_result_type(signals_schema)
433
432
 
@@ -435,7 +434,7 @@ def get_db_col_type(signals_schema: "SignalSchema", col: ColT) -> "DataType":
435
434
  return sql_to_python(col)
436
435
 
437
436
  return signals_schema.get_column_type(
438
- col.name if isinstance(col, ColumnElement) else col
437
+ col.name if isinstance(col, ColumnElement) else col # type: ignore[arg-type]
439
438
  )
440
439
 
441
440
 
@@ -16,14 +16,12 @@ from urllib.parse import urlparse, urlunparse
16
16
  import websockets
17
17
 
18
18
  from datachain.config import Config
19
- from datachain.dataset import DatasetStats
20
19
  from datachain.error import DataChainError
21
20
  from datachain.utils import STUDIO_URL, retry_with_backoff
22
21
 
23
22
  T = TypeVar("T")
24
23
  LsData = Optional[list[dict[str, Any]]]
25
24
  DatasetInfoData = Optional[dict[str, Any]]
26
- DatasetStatsData = Optional[DatasetStats]
27
25
  DatasetRowsData = Optional[Iterable[dict[str, Any]]]
28
26
  DatasetJobVersionsData = Optional[dict[str, Any]]
29
27
  DatasetExportStatus = Optional[dict[str, Any]]
@@ -309,7 +307,7 @@ class StudioClient:
309
307
  "datachain/datasets",
310
308
  {
311
309
  "dataset_name": name,
312
- "version": version,
310
+ "dataset_version": version,
313
311
  "force": force,
314
312
  },
315
313
  method="DELETE",
@@ -347,16 +345,6 @@ class StudioClient:
347
345
  method="GET",
348
346
  )
349
347
 
350
- def dataset_stats(self, name: str, version: int) -> Response[DatasetStatsData]:
351
- response = self._send_request(
352
- "datachain/datasets/stats",
353
- {"dataset_name": name, "dataset_version": version},
354
- method="GET",
355
- )
356
- if response.ok:
357
- response.data = DatasetStats(**response.data)
358
- return response
359
-
360
348
  def export_dataset_table(
361
349
  self, name: str, version: int
362
350
  ) -> Response[DatasetExportSignedUrls]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: datachain
3
- Version: 0.8.11
3
+ Version: 0.8.12
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -31,8 +31,8 @@ docs/overrides/main.html
31
31
  docs/references/datachain.md
32
32
  docs/references/datatype.md
33
33
  docs/references/file.md
34
+ docs/references/func.md
34
35
  docs/references/index.md
35
- docs/references/sql.md
36
36
  docs/references/torch.md
37
37
  docs/references/udf.md
38
38
  examples/computer_vision/iptc_exif_xmp_lib.py
@@ -17,7 +17,8 @@ from tests.utils import DEFAULT_TREE, skip_if_not_sqlite, tree_from_path
17
17
  def listing_stats(uri, catalog):
18
18
  list_dataset_name, _, _ = parse_listing_uri(uri, catalog.client_config)
19
19
  dataset = catalog.get_dataset(list_dataset_name)
20
- return catalog.dataset_stats(dataset.name, dataset.latest_version)
20
+ dataset_version = dataset.get_version(dataset.latest_version)
21
+ return dataset_version.num_objects, dataset_version.size
21
22
 
22
23
 
23
24
  @pytest.fixture
@@ -582,23 +583,23 @@ def test_listing_stats(cloud_test_catalog):
582
583
  listing_stats(src_uri, catalog)
583
584
 
584
585
  catalog.enlist_source(src_uri)
585
- stats = listing_stats(src_uri, catalog)
586
- assert stats.num_objects == 7
587
- assert stats.size == 36
586
+ num_objects, size = listing_stats(src_uri, catalog)
587
+ assert num_objects == 7
588
+ assert size == 36
588
589
 
589
590
  catalog.enlist_source(f"{src_uri}/dogs/", update=True)
590
- stats = listing_stats(src_uri, catalog)
591
- assert stats.num_objects == 7
592
- assert stats.size == 36
591
+ num_objects, size = listing_stats(src_uri, catalog)
592
+ assert num_objects == 7
593
+ assert size == 36
593
594
 
594
- stats = listing_stats(f"{src_uri}/dogs/", catalog)
595
- assert stats.num_objects == 4
596
- assert stats.size == 15
595
+ num_objects, size = listing_stats(f"{src_uri}/dogs/", catalog)
596
+ assert num_objects == 4
597
+ assert size == 15
597
598
 
598
599
  catalog.enlist_source(f"{src_uri}/dogs/")
599
- stats = listing_stats(src_uri, catalog)
600
- assert stats.num_objects == 7
601
- assert stats.size == 36
600
+ num_objects, size = listing_stats(src_uri, catalog)
601
+ assert num_objects == 7
602
+ assert size == 36
602
603
 
603
604
 
604
605
  @pytest.mark.parametrize("cloud_type", ["s3", "azure", "gs"], indirect=True)
@@ -608,15 +609,15 @@ def test_enlist_source_handles_slash(cloud_test_catalog):
608
609
  src_path = f"{src_uri}/dogs"
609
610
 
610
611
  catalog.enlist_source(src_path)
611
- stats = listing_stats(src_path, catalog)
612
- assert stats.num_objects == len(DEFAULT_TREE["dogs"])
613
- assert stats.size == 15
612
+ num_objects, size = listing_stats(src_path, catalog)
613
+ assert num_objects == len(DEFAULT_TREE["dogs"])
614
+ assert size == 15
614
615
 
615
616
  src_path = f"{src_uri}/dogs"
616
617
  catalog.enlist_source(src_path, update=True)
617
- stats = listing_stats(src_path, catalog)
618
- assert stats.num_objects == len(DEFAULT_TREE["dogs"])
619
- assert stats.size == 15
618
+ num_objects, size = listing_stats(src_path, catalog)
619
+ assert num_objects == len(DEFAULT_TREE["dogs"])
620
+ assert size == 15
620
621
 
621
622
 
622
623
  @pytest.mark.parametrize("cloud_type", ["s3", "azure", "gs"], indirect=True)
@@ -626,10 +627,10 @@ def test_enlist_source_handles_glob(cloud_test_catalog):
626
627
  src_path = f"{src_uri}/dogs/*.jpg"
627
628
 
628
629
  catalog.enlist_source(src_path)
629
- stats = listing_stats(src_path, catalog)
630
+ num_objects, size = listing_stats(src_path, catalog)
630
631
 
631
- assert stats.num_objects == len(DEFAULT_TREE["dogs"])
632
- assert stats.size == 15
632
+ assert num_objects == len(DEFAULT_TREE["dogs"])
633
+ assert size == 15
633
634
 
634
635
 
635
636
  @pytest.mark.parametrize("cloud_type", ["s3", "azure", "gs"], indirect=True)
@@ -20,7 +20,7 @@ from sqlalchemy import Column
20
20
  from datachain import DataModel, func
21
21
  from datachain.catalog.catalog import QUERY_SCRIPT_CANCELED_EXIT_CODE
22
22
  from datachain.data_storage.sqlite import SQLiteWarehouse
23
- from datachain.dataset import DatasetDependencyType, DatasetStats
23
+ from datachain.dataset import DatasetDependencyType
24
24
  from datachain.func import path as pathfunc
25
25
  from datachain.lib.dc import C, DataChain
26
26
  from datachain.lib.file import File, ImageFile
@@ -515,8 +515,9 @@ def test_from_storage_dataset_stats(tmp_dir, test_session):
515
515
  dc = DataChain.from_storage(tmp_dir.as_uri(), session=test_session).save(
516
516
  "test-data"
517
517
  )
518
- stats = test_session.catalog.dataset_stats(dc.name, dc.version)
519
- assert stats == DatasetStats(num_objects=4, size=20)
518
+ version = test_session.catalog.get_dataset(dc.name).get_version(dc.version)
519
+ assert version.num_objects == 4
520
+ assert version.size == 20
520
521
 
521
522
 
522
523
  def test_from_storage_check_rows(tmp_dir, test_session):
@@ -845,9 +845,9 @@ def test_row_random(cloud_test_catalog):
845
845
 
846
846
  def test_dataset_stats_registered_ds(cloud_test_catalog, dogs_dataset):
847
847
  catalog = cloud_test_catalog.catalog
848
- stats = catalog.dataset_stats(dogs_dataset.name, 1)
849
- assert stats.num_objects == 4
850
- assert stats.size == 15
848
+ dataset = catalog.get_dataset(dogs_dataset.name).get_version(1)
849
+ assert dataset.num_objects == 4
850
+ assert dataset.size == 15
851
851
  rows_count = catalog.warehouse.dataset_rows_count(dogs_dataset, 1)
852
852
  assert rows_count == 4
853
853
 
@@ -154,14 +154,6 @@ def remote_dataset_info(requests_mock, remote_dataset):
154
154
  requests_mock.get(f"{STUDIO_URL}/api/datachain/datasets/info", json=remote_dataset)
155
155
 
156
156
 
157
- @pytest.fixture
158
- def remote_dataset_stats(requests_mock):
159
- requests_mock.get(
160
- f"{STUDIO_URL}/api/datachain/datasets/stats",
161
- json={"num_objects": 5, "size": 1000},
162
- )
163
-
164
-
165
157
  @pytest.fixture
166
158
  def dataset_export(requests_mock, remote_dataset_chunk_url):
167
159
  requests_mock.get(
@@ -194,7 +186,6 @@ def test_pull_dataset_success(
194
186
  mocker,
195
187
  cloud_test_catalog,
196
188
  remote_dataset_info,
197
- remote_dataset_stats,
198
189
  dataset_export,
199
190
  dataset_export_status,
200
191
  dataset_export_data_chunk,
@@ -322,25 +313,6 @@ def test_pull_dataset_not_found_in_remote(
322
313
  assert str(exc_info.value) == "Error from server: Dataset not found"
323
314
 
324
315
 
325
- @pytest.mark.parametrize("cloud_type, version_aware", [("s3", False)], indirect=True)
326
- @skip_if_not_sqlite
327
- def test_pull_dataset_error_on_fetching_stats(
328
- requests_mock,
329
- cloud_test_catalog,
330
- remote_dataset_info,
331
- ):
332
- requests_mock.get(
333
- f"{STUDIO_URL}/api/datachain/datasets/stats",
334
- status_code=400,
335
- json={"message": "Internal error"},
336
- )
337
- catalog = cloud_test_catalog.catalog
338
-
339
- with pytest.raises(DataChainError) as exc_info:
340
- catalog.pull_dataset("ds://dogs@v1")
341
- assert str(exc_info.value) == "Error from server: Internal error"
342
-
343
-
344
316
  @pytest.mark.parametrize("cloud_type, version_aware", [("s3", False)], indirect=True)
345
317
  @pytest.mark.parametrize("export_status", ["failed", "removed"])
346
318
  @skip_if_not_sqlite
@@ -348,7 +320,6 @@ def test_pull_dataset_exporting_dataset_failed_in_remote(
348
320
  requests_mock,
349
321
  cloud_test_catalog,
350
322
  remote_dataset_info,
351
- remote_dataset_stats,
352
323
  dataset_export,
353
324
  export_status,
354
325
  ):
@@ -372,7 +343,6 @@ def test_pull_dataset_empty_parquet(
372
343
  requests_mock,
373
344
  cloud_test_catalog,
374
345
  remote_dataset_info,
375
- remote_dataset_stats,
376
346
  dataset_export,
377
347
  dataset_export_status,
378
348
  remote_dataset_chunk_url,
@@ -389,7 +359,6 @@ def test_pull_dataset_empty_parquet(
389
359
  def test_pull_dataset_already_exists_locally(
390
360
  cloud_test_catalog,
391
361
  remote_dataset_info,
392
- remote_dataset_stats,
393
362
  dataset_export,
394
363
  dataset_export_status,
395
364
  dataset_export_data_chunk,
@@ -416,7 +385,6 @@ def test_pull_dataset_already_exists_locally(
416
385
  def test_pull_dataset_local_name_already_exists(
417
386
  cloud_test_catalog,
418
387
  remote_dataset_info,
419
- remote_dataset_stats,
420
388
  dataset_export,
421
389
  dataset_export_status,
422
390
  dataset_export_data_chunk,
@@ -294,7 +294,7 @@ def test_studio_rm_dataset(capsys, mocker):
294
294
  assert last_request.json() == {
295
295
  "dataset_name": "name",
296
296
  "team_name": "team_name",
297
- "version": 1,
297
+ "dataset_version": 1,
298
298
  "force": True,
299
299
  }
300
300