datachain 0.14.1__tar.gz → 0.14.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (354) hide show
  1. {datachain-0.14.1/src/datachain.egg-info → datachain-0.14.2}/PKG-INFO +5 -5
  2. {datachain-0.14.1 → datachain-0.14.2}/README.rst +3 -3
  3. {datachain-0.14.1 → datachain-0.14.2}/docs/examples.md +20 -20
  4. {datachain-0.14.1 → datachain-0.14.2}/docs/quick-start.md +9 -9
  5. {datachain-0.14.1 → datachain-0.14.2}/docs/references/data-types/file.md +2 -2
  6. {datachain-0.14.1 → datachain-0.14.2}/docs/references/data-types/imagefile.md +2 -2
  7. {datachain-0.14.1 → datachain-0.14.2}/docs/references/data-types/textfile.md +2 -2
  8. {datachain-0.14.1 → datachain-0.14.2}/docs/references/data-types/videofile.md +2 -2
  9. {datachain-0.14.1 → datachain-0.14.2}/docs/references/datachain.md +9 -9
  10. {datachain-0.14.1 → datachain-0.14.2}/docs/references/remotes.md +5 -4
  11. {datachain-0.14.1 → datachain-0.14.2}/examples/computer_vision/iptc_exif_xmp_lib.py +1 -1
  12. {datachain-0.14.1 → datachain-0.14.2}/examples/computer_vision/llava2_image_desc_lib.py +1 -1
  13. {datachain-0.14.1 → datachain-0.14.2}/examples/computer_vision/openimage-detect.py +1 -1
  14. {datachain-0.14.1 → datachain-0.14.2}/examples/computer_vision/ultralytics-bbox.py +1 -1
  15. {datachain-0.14.1 → datachain-0.14.2}/examples/computer_vision/ultralytics-pose.py +1 -1
  16. {datachain-0.14.1 → datachain-0.14.2}/examples/computer_vision/ultralytics-segment.py +1 -1
  17. {datachain-0.14.1 → datachain-0.14.2}/examples/get_started/common_sql_functions.py +1 -1
  18. {datachain-0.14.1 → datachain-0.14.2}/examples/get_started/json-csv-reader.py +7 -7
  19. {datachain-0.14.1 → datachain-0.14.2}/examples/get_started/torch-loader.py +1 -1
  20. {datachain-0.14.1 → datachain-0.14.2}/examples/get_started/udfs/parallel.py +1 -1
  21. {datachain-0.14.1 → datachain-0.14.2}/examples/get_started/udfs/simple.py +1 -1
  22. {datachain-0.14.1 → datachain-0.14.2}/examples/get_started/udfs/stateful.py +1 -1
  23. {datachain-0.14.1 → datachain-0.14.2}/examples/llm_and_nlp/claude-query.py +1 -1
  24. {datachain-0.14.1 → datachain-0.14.2}/examples/llm_and_nlp/hf-dataset-llm-eval.py +2 -2
  25. {datachain-0.14.1 → datachain-0.14.2}/examples/multimodal/clip_inference.py +2 -2
  26. {datachain-0.14.1 → datachain-0.14.2}/examples/multimodal/hf_pipeline.py +4 -4
  27. {datachain-0.14.1 → datachain-0.14.2}/examples/multimodal/openai_image_desc_lib.py +1 -1
  28. {datachain-0.14.1 → datachain-0.14.2}/examples/multimodal/wds.py +3 -3
  29. {datachain-0.14.1 → datachain-0.14.2}/examples/multimodal/wds_filtered.py +2 -2
  30. {datachain-0.14.1 → datachain-0.14.2}/pyproject.toml +1 -1
  31. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/__init__.py +18 -18
  32. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/catalog/catalog.py +5 -5
  33. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/cli/commands/show.py +2 -2
  34. datachain-0.14.2/src/datachain/lib/dc/__init__.py +32 -0
  35. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/dc/csv.py +5 -5
  36. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/dc/datachain.py +42 -42
  37. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/dc/datasets.py +7 -7
  38. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/dc/hf.py +5 -5
  39. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/dc/json.py +5 -5
  40. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/dc/listings.py +2 -2
  41. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/dc/pandas.py +4 -4
  42. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/dc/parquet.py +5 -5
  43. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/dc/records.py +4 -4
  44. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/dc/storage.py +13 -12
  45. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/dc/values.py +4 -4
  46. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/listing.py +11 -0
  47. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/meta_formats.py +2 -2
  48. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/pytorch.py +2 -2
  49. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/udf.py +1 -1
  50. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/query/dataset.py +23 -10
  51. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/toolkit/split.py +1 -1
  52. {datachain-0.14.1 → datachain-0.14.2/src/datachain.egg-info}/PKG-INFO +5 -5
  53. {datachain-0.14.1 → datachain-0.14.2}/src/datachain.egg-info/requires.txt +1 -1
  54. {datachain-0.14.1 → datachain-0.14.2}/tests/benchmarks/test_datachain.py +1 -1
  55. {datachain-0.14.1 → datachain-0.14.2}/tests/conftest.py +3 -3
  56. {datachain-0.14.1 → datachain-0.14.2}/tests/examples/test_wds_e2e.py +3 -3
  57. {datachain-0.14.1 → datachain-0.14.2}/tests/func/test_catalog.py +4 -4
  58. {datachain-0.14.1 → datachain-0.14.2}/tests/func/test_cloud_transfer.py +1 -1
  59. {datachain-0.14.1 → datachain-0.14.2}/tests/func/test_datachain.py +130 -116
  60. {datachain-0.14.1 → datachain-0.14.2}/tests/func/test_datachain_merge.py +4 -4
  61. {datachain-0.14.1 → datachain-0.14.2}/tests/func/test_datasets.py +6 -6
  62. {datachain-0.14.1 → datachain-0.14.2}/tests/func/test_feature_pickling.py +5 -5
  63. {datachain-0.14.1 → datachain-0.14.2}/tests/func/test_file.py +1 -1
  64. {datachain-0.14.1 → datachain-0.14.2}/tests/func/test_hidden_field.py +3 -3
  65. {datachain-0.14.1 → datachain-0.14.2}/tests/func/test_listing.py +1 -1
  66. {datachain-0.14.1 → datachain-0.14.2}/tests/func/test_ls.py +1 -1
  67. {datachain-0.14.1 → datachain-0.14.2}/tests/func/test_pull.py +2 -2
  68. {datachain-0.14.1 → datachain-0.14.2}/tests/func/test_pytorch.py +2 -2
  69. {datachain-0.14.1 → datachain-0.14.2}/tests/func/test_query.py +1 -1
  70. {datachain-0.14.1 → datachain-0.14.2}/tests/func/test_session.py +2 -2
  71. {datachain-0.14.1 → datachain-0.14.2}/tests/scripts/feature_class.py +1 -1
  72. {datachain-0.14.1 → datachain-0.14.2}/tests/scripts/feature_class_exception.py +5 -5
  73. {datachain-0.14.1 → datachain-0.14.2}/tests/scripts/feature_class_parallel.py +1 -1
  74. {datachain-0.14.1 → datachain-0.14.2}/tests/scripts/feature_class_parallel_data_model.py +1 -1
  75. {datachain-0.14.1 → datachain-0.14.2}/tests/scripts/name_len_slow.py +1 -1
  76. {datachain-0.14.1 → datachain-0.14.2}/tests/test_import_time.py +1 -1
  77. {datachain-0.14.1 → datachain-0.14.2}/tests/test_telemetry.py +1 -1
  78. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/lib/test_arrow.py +1 -1
  79. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/lib/test_datachain.py +225 -225
  80. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/lib/test_datachain_bootstrap.py +4 -4
  81. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/lib/test_datachain_merge.py +17 -17
  82. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/lib/test_diff.py +36 -36
  83. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/lib/test_feature_utils.py +9 -9
  84. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/lib/test_schema.py +1 -1
  85. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/sql/test_selectable.py +1 -1
  86. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_func.py +4 -4
  87. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_listing.py +3 -3
  88. {datachain-0.14.1 → datachain-0.14.2}/tests/utils.py +1 -1
  89. datachain-0.14.1/src/datachain/lib/dc/__init__.py +0 -32
  90. {datachain-0.14.1 → datachain-0.14.2}/.cruft.json +0 -0
  91. {datachain-0.14.1 → datachain-0.14.2}/.gitattributes +0 -0
  92. {datachain-0.14.1 → datachain-0.14.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  93. {datachain-0.14.1 → datachain-0.14.2}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  94. {datachain-0.14.1 → datachain-0.14.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  95. {datachain-0.14.1 → datachain-0.14.2}/.github/codecov.yaml +0 -0
  96. {datachain-0.14.1 → datachain-0.14.2}/.github/dependabot.yml +0 -0
  97. {datachain-0.14.1 → datachain-0.14.2}/.github/workflows/benchmarks.yml +0 -0
  98. {datachain-0.14.1 → datachain-0.14.2}/.github/workflows/release.yml +0 -0
  99. {datachain-0.14.1 → datachain-0.14.2}/.github/workflows/tests-studio.yml +0 -0
  100. {datachain-0.14.1 → datachain-0.14.2}/.github/workflows/tests.yml +0 -0
  101. {datachain-0.14.1 → datachain-0.14.2}/.github/workflows/update-template.yaml +0 -0
  102. {datachain-0.14.1 → datachain-0.14.2}/.gitignore +0 -0
  103. {datachain-0.14.1 → datachain-0.14.2}/.pre-commit-config.yaml +0 -0
  104. {datachain-0.14.1 → datachain-0.14.2}/CODE_OF_CONDUCT.rst +0 -0
  105. {datachain-0.14.1 → datachain-0.14.2}/LICENSE +0 -0
  106. {datachain-0.14.1 → datachain-0.14.2}/docs/assets/captioned_cartoons.png +0 -0
  107. {datachain-0.14.1 → datachain-0.14.2}/docs/assets/datachain-white.svg +0 -0
  108. {datachain-0.14.1 → datachain-0.14.2}/docs/assets/datachain.svg +0 -0
  109. {datachain-0.14.1 → datachain-0.14.2}/docs/contributing.md +0 -0
  110. {datachain-0.14.1 → datachain-0.14.2}/docs/css/github-permalink-style.css +0 -0
  111. {datachain-0.14.1 → datachain-0.14.2}/docs/index.md +0 -0
  112. {datachain-0.14.1 → datachain-0.14.2}/docs/overrides/main.html +0 -0
  113. {datachain-0.14.1 → datachain-0.14.2}/docs/references/data-types/arrowrow.md +0 -0
  114. {datachain-0.14.1 → datachain-0.14.2}/docs/references/data-types/bbox.md +0 -0
  115. {datachain-0.14.1 → datachain-0.14.2}/docs/references/data-types/index.md +0 -0
  116. {datachain-0.14.1 → datachain-0.14.2}/docs/references/data-types/pose.md +0 -0
  117. {datachain-0.14.1 → datachain-0.14.2}/docs/references/data-types/segment.md +0 -0
  118. {datachain-0.14.1 → datachain-0.14.2}/docs/references/data-types/tarvfile.md +0 -0
  119. {datachain-0.14.1 → datachain-0.14.2}/docs/references/func.md +0 -0
  120. {datachain-0.14.1 → datachain-0.14.2}/docs/references/index.md +0 -0
  121. {datachain-0.14.1 → datachain-0.14.2}/docs/references/toolkit.md +0 -0
  122. {datachain-0.14.1 → datachain-0.14.2}/docs/references/torch.md +0 -0
  123. {datachain-0.14.1 → datachain-0.14.2}/docs/references/udf.md +0 -0
  124. {datachain-0.14.1 → datachain-0.14.2}/docs/tutorials.md +0 -0
  125. {datachain-0.14.1 → datachain-0.14.2}/mkdocs.yml +0 -0
  126. {datachain-0.14.1 → datachain-0.14.2}/noxfile.py +0 -0
  127. {datachain-0.14.1 → datachain-0.14.2}/setup.cfg +0 -0
  128. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/__main__.py +0 -0
  129. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/asyn.py +0 -0
  130. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/cache.py +0 -0
  131. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/catalog/__init__.py +0 -0
  132. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/catalog/datasource.py +0 -0
  133. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/catalog/loader.py +0 -0
  134. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/cli/__init__.py +0 -0
  135. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/cli/commands/__init__.py +0 -0
  136. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/cli/commands/datasets.py +0 -0
  137. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/cli/commands/du.py +0 -0
  138. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/cli/commands/index.py +0 -0
  139. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/cli/commands/ls.py +0 -0
  140. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/cli/commands/misc.py +0 -0
  141. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/cli/commands/query.py +0 -0
  142. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/cli/parser/__init__.py +0 -0
  143. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/cli/parser/job.py +0 -0
  144. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/cli/parser/studio.py +0 -0
  145. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/cli/parser/utils.py +0 -0
  146. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/cli/utils.py +0 -0
  147. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/client/__init__.py +0 -0
  148. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/client/azure.py +0 -0
  149. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/client/fileslice.py +0 -0
  150. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/client/fsspec.py +0 -0
  151. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/client/gcs.py +0 -0
  152. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/client/hf.py +0 -0
  153. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/client/local.py +0 -0
  154. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/client/s3.py +0 -0
  155. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/config.py +0 -0
  156. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/data_storage/__init__.py +0 -0
  157. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/data_storage/db_engine.py +0 -0
  158. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/data_storage/job.py +0 -0
  159. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/data_storage/metastore.py +0 -0
  160. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/data_storage/schema.py +0 -0
  161. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/data_storage/serializer.py +0 -0
  162. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/data_storage/sqlite.py +0 -0
  163. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/data_storage/warehouse.py +0 -0
  164. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/dataset.py +0 -0
  165. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/diff/__init__.py +0 -0
  166. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/error.py +0 -0
  167. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/fs/__init__.py +0 -0
  168. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/fs/reference.py +0 -0
  169. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/fs/utils.py +0 -0
  170. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/func/__init__.py +0 -0
  171. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/func/aggregate.py +0 -0
  172. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/func/array.py +0 -0
  173. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/func/base.py +0 -0
  174. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/func/conditional.py +0 -0
  175. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/func/func.py +0 -0
  176. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/func/numeric.py +0 -0
  177. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/func/path.py +0 -0
  178. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/func/random.py +0 -0
  179. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/func/string.py +0 -0
  180. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/func/window.py +0 -0
  181. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/job.py +0 -0
  182. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/__init__.py +0 -0
  183. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/arrow.py +0 -0
  184. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/clip.py +0 -0
  185. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/convert/__init__.py +0 -0
  186. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/convert/flatten.py +0 -0
  187. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/convert/python_to_sql.py +0 -0
  188. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/convert/sql_to_python.py +0 -0
  189. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/convert/unflatten.py +0 -0
  190. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  191. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/data_model.py +0 -0
  192. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/dataset_info.py +0 -0
  193. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/dc/utils.py +0 -0
  194. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/file.py +0 -0
  195. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/hf.py +0 -0
  196. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/image.py +0 -0
  197. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/listing_info.py +0 -0
  198. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/model_store.py +0 -0
  199. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/settings.py +0 -0
  200. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/signal_schema.py +0 -0
  201. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/tar.py +0 -0
  202. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/text.py +0 -0
  203. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/udf_signature.py +0 -0
  204. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/utils.py +0 -0
  205. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/video.py +0 -0
  206. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/webdataset.py +0 -0
  207. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/lib/webdataset_laion.py +0 -0
  208. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/listing.py +0 -0
  209. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/model/__init__.py +0 -0
  210. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/model/bbox.py +0 -0
  211. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/model/pose.py +0 -0
  212. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/model/segment.py +0 -0
  213. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/model/ultralytics/__init__.py +0 -0
  214. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/model/ultralytics/bbox.py +0 -0
  215. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/model/ultralytics/pose.py +0 -0
  216. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/model/ultralytics/segment.py +0 -0
  217. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/model/utils.py +0 -0
  218. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/node.py +0 -0
  219. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/nodes_fetcher.py +0 -0
  220. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/nodes_thread_pool.py +0 -0
  221. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/progress.py +0 -0
  222. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/py.typed +0 -0
  223. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/query/__init__.py +0 -0
  224. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/query/batch.py +0 -0
  225. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/query/dispatch.py +0 -0
  226. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/query/metrics.py +0 -0
  227. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/query/params.py +0 -0
  228. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/query/queue.py +0 -0
  229. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/query/schema.py +0 -0
  230. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/query/session.py +0 -0
  231. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/query/udf.py +0 -0
  232. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/query/utils.py +0 -0
  233. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/remote/__init__.py +0 -0
  234. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/remote/studio.py +0 -0
  235. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/script_meta.py +0 -0
  236. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/sql/__init__.py +0 -0
  237. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/sql/default/__init__.py +0 -0
  238. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/sql/default/base.py +0 -0
  239. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/sql/functions/__init__.py +0 -0
  240. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/sql/functions/aggregate.py +0 -0
  241. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/sql/functions/array.py +0 -0
  242. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/sql/functions/conditional.py +0 -0
  243. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/sql/functions/numeric.py +0 -0
  244. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/sql/functions/path.py +0 -0
  245. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/sql/functions/random.py +0 -0
  246. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/sql/functions/string.py +0 -0
  247. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/sql/selectable.py +0 -0
  248. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/sql/sqlite/__init__.py +0 -0
  249. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/sql/sqlite/base.py +0 -0
  250. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/sql/sqlite/types.py +0 -0
  251. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/sql/sqlite/vector.py +0 -0
  252. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/sql/types.py +0 -0
  253. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/sql/utils.py +0 -0
  254. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/studio.py +0 -0
  255. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/telemetry.py +0 -0
  256. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/toolkit/__init__.py +0 -0
  257. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/torch/__init__.py +0 -0
  258. {datachain-0.14.1 → datachain-0.14.2}/src/datachain/utils.py +0 -0
  259. {datachain-0.14.1 → datachain-0.14.2}/src/datachain.egg-info/SOURCES.txt +0 -0
  260. {datachain-0.14.1 → datachain-0.14.2}/src/datachain.egg-info/dependency_links.txt +0 -0
  261. {datachain-0.14.1 → datachain-0.14.2}/src/datachain.egg-info/entry_points.txt +0 -0
  262. {datachain-0.14.1 → datachain-0.14.2}/src/datachain.egg-info/top_level.txt +0 -0
  263. {datachain-0.14.1 → datachain-0.14.2}/tests/__init__.py +0 -0
  264. {datachain-0.14.1 → datachain-0.14.2}/tests/benchmarks/__init__.py +0 -0
  265. {datachain-0.14.1 → datachain-0.14.2}/tests/benchmarks/conftest.py +0 -0
  266. {datachain-0.14.1 → datachain-0.14.2}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  267. {datachain-0.14.1 → datachain-0.14.2}/tests/benchmarks/datasets/.dvc/config +0 -0
  268. {datachain-0.14.1 → datachain-0.14.2}/tests/benchmarks/datasets/.gitignore +0 -0
  269. {datachain-0.14.1 → datachain-0.14.2}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  270. {datachain-0.14.1 → datachain-0.14.2}/tests/benchmarks/test_ls.py +0 -0
  271. {datachain-0.14.1 → datachain-0.14.2}/tests/benchmarks/test_version.py +0 -0
  272. {datachain-0.14.1 → datachain-0.14.2}/tests/data.py +0 -0
  273. {datachain-0.14.1 → datachain-0.14.2}/tests/examples/__init__.py +0 -0
  274. {datachain-0.14.1 → datachain-0.14.2}/tests/examples/test_examples.py +0 -0
  275. {datachain-0.14.1 → datachain-0.14.2}/tests/examples/wds_data.py +0 -0
  276. {datachain-0.14.1 → datachain-0.14.2}/tests/func/__init__.py +0 -0
  277. {datachain-0.14.1 → datachain-0.14.2}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
  278. {datachain-0.14.1 → datachain-0.14.2}/tests/func/data/lena.jpg +0 -0
  279. {datachain-0.14.1 → datachain-0.14.2}/tests/func/fake-service-account-credentials.json +0 -0
  280. {datachain-0.14.1 → datachain-0.14.2}/tests/func/model/__init__.py +0 -0
  281. {datachain-0.14.1 → datachain-0.14.2}/tests/func/model/data/running-mask0.png +0 -0
  282. {datachain-0.14.1 → datachain-0.14.2}/tests/func/model/data/running-mask1.png +0 -0
  283. {datachain-0.14.1 → datachain-0.14.2}/tests/func/model/data/running.jpg +0 -0
  284. {datachain-0.14.1 → datachain-0.14.2}/tests/func/model/data/ships.jpg +0 -0
  285. {datachain-0.14.1 → datachain-0.14.2}/tests/func/model/test_yolo.py +0 -0
  286. {datachain-0.14.1 → datachain-0.14.2}/tests/func/test_client.py +0 -0
  287. {datachain-0.14.1 → datachain-0.14.2}/tests/func/test_data_storage.py +0 -0
  288. {datachain-0.14.1 → datachain-0.14.2}/tests/func/test_dataset_query.py +0 -0
  289. {datachain-0.14.1 → datachain-0.14.2}/tests/func/test_hf.py +0 -0
  290. {datachain-0.14.1 → datachain-0.14.2}/tests/func/test_image.py +0 -0
  291. {datachain-0.14.1 → datachain-0.14.2}/tests/func/test_meta_formats.py +0 -0
  292. {datachain-0.14.1 → datachain-0.14.2}/tests/func/test_metrics.py +0 -0
  293. {datachain-0.14.1 → datachain-0.14.2}/tests/func/test_toolkit.py +0 -0
  294. {datachain-0.14.1 → datachain-0.14.2}/tests/func/test_video.py +0 -0
  295. {datachain-0.14.1 → datachain-0.14.2}/tests/func/test_warehouse.py +0 -0
  296. {datachain-0.14.1 → datachain-0.14.2}/tests/test_atomicity.py +0 -0
  297. {datachain-0.14.1 → datachain-0.14.2}/tests/test_cli_e2e.py +0 -0
  298. {datachain-0.14.1 → datachain-0.14.2}/tests/test_cli_studio.py +0 -0
  299. {datachain-0.14.1 → datachain-0.14.2}/tests/test_query_e2e.py +0 -0
  300. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/__init__.py +0 -0
  301. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/lib/__init__.py +0 -0
  302. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/lib/conftest.py +0 -0
  303. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/lib/test_clip.py +0 -0
  304. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/lib/test_feature.py +0 -0
  305. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/lib/test_file.py +0 -0
  306. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/lib/test_hf.py +0 -0
  307. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/lib/test_image.py +0 -0
  308. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/lib/test_listing_info.py +0 -0
  309. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/lib/test_python_to_sql.py +0 -0
  310. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/lib/test_signal_schema.py +0 -0
  311. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/lib/test_sql_to_python.py +0 -0
  312. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/lib/test_text.py +0 -0
  313. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/lib/test_udf_signature.py +0 -0
  314. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/lib/test_utils.py +0 -0
  315. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/lib/test_webdataset.py +0 -0
  316. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/model/__init__.py +0 -0
  317. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/model/test_bbox.py +0 -0
  318. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/model/test_pose.py +0 -0
  319. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/model/test_segment.py +0 -0
  320. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/model/test_utils.py +0 -0
  321. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/sql/__init__.py +0 -0
  322. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/sql/sqlite/__init__.py +0 -0
  323. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/sql/sqlite/test_types.py +0 -0
  324. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/sql/sqlite/test_utils.py +0 -0
  325. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/sql/test_array.py +0 -0
  326. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/sql/test_conditional.py +0 -0
  327. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/sql/test_path.py +0 -0
  328. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/sql/test_random.py +0 -0
  329. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/sql/test_string.py +0 -0
  330. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_asyn.py +0 -0
  331. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_cache.py +0 -0
  332. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_catalog.py +0 -0
  333. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_catalog_loader.py +0 -0
  334. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_cli_parsing.py +0 -0
  335. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_client.py +0 -0
  336. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_client_gcs.py +0 -0
  337. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_client_s3.py +0 -0
  338. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_config.py +0 -0
  339. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_data_storage.py +0 -0
  340. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_database_engine.py +0 -0
  341. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_dataset.py +0 -0
  342. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_dispatch.py +0 -0
  343. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_fileslice.py +0 -0
  344. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_metastore.py +0 -0
  345. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_module_exports.py +0 -0
  346. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_pytorch.py +0 -0
  347. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_query.py +0 -0
  348. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_query_metrics.py +0 -0
  349. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_query_params.py +0 -0
  350. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_script_meta.py +0 -0
  351. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_serializer.py +0 -0
  352. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_session.py +0 -0
  353. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_utils.py +0 -0
  354. {datachain-0.14.1 → datachain-0.14.2}/tests/unit/test_warehouse.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.14.1
3
+ Version: 0.14.2
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -38,7 +38,7 @@ Requires-Dist: sqlalchemy>=2
38
38
  Requires-Dist: multiprocess==0.70.16
39
39
  Requires-Dist: cloudpickle
40
40
  Requires-Dist: orjson>=3.10.5
41
- Requires-Dist: pydantic<3,>=2
41
+ Requires-Dist: pydantic<2.11,>=2
42
42
  Requires-Dist: jmespath>=1.0
43
43
  Requires-Dist: datamodel-code-generator>=0.25
44
44
  Requires-Dist: Pillow<12,>=10.0.0
@@ -171,8 +171,8 @@ high confidence scores.
171
171
 
172
172
  import datachain as dc
173
173
 
174
- meta = dc.from_json("gs://datachain-demo/dogs-and-cats/*json", object_name="meta", anon=True)
175
- images = dc.from_storage("gs://datachain-demo/dogs-and-cats/*jpg", anon=True)
174
+ meta = dc.read_json("gs://datachain-demo/dogs-and-cats/*json", object_name="meta", anon=True)
175
+ images = dc.read_storage("gs://datachain-demo/dogs-and-cats/*jpg", anon=True)
176
176
 
177
177
  images_id = images.map(id=lambda file: file.path.split('.')[-2])
178
178
  annotated = images_id.merge(meta, on="id", right_on="meta.id")
@@ -213,7 +213,7 @@ Python code:
213
213
  return result.lower().startswith("success")
214
214
 
215
215
  chain = (
216
- dc.from_storage("gs://datachain-demo/chatbot-KiT/", object_name="file", anon=True)
216
+ dc.read_storage("gs://datachain-demo/chatbot-KiT/", object_name="file", anon=True)
217
217
  .settings(parallel=4, cache=True)
218
218
  .map(is_success=eval_dialogue)
219
219
  .save("mistral_files")
@@ -60,8 +60,8 @@ high confidence scores.
60
60
 
61
61
  import datachain as dc
62
62
 
63
- meta = dc.from_json("gs://datachain-demo/dogs-and-cats/*json", object_name="meta", anon=True)
64
- images = dc.from_storage("gs://datachain-demo/dogs-and-cats/*jpg", anon=True)
63
+ meta = dc.read_json("gs://datachain-demo/dogs-and-cats/*json", object_name="meta", anon=True)
64
+ images = dc.read_storage("gs://datachain-demo/dogs-and-cats/*jpg", anon=True)
65
65
 
66
66
  images_id = images.map(id=lambda file: file.path.split('.')[-2])
67
67
  annotated = images_id.merge(meta, on="id", right_on="meta.id")
@@ -102,7 +102,7 @@ Python code:
102
102
  return result.lower().startswith("success")
103
103
 
104
104
  chain = (
105
- dc.from_storage("gs://datachain-demo/chatbot-KiT/", object_name="file", anon=True)
105
+ dc.read_storage("gs://datachain-demo/chatbot-KiT/", object_name="file", anon=True)
106
106
  .settings(parallel=4, cache=True)
107
107
  .map(is_success=eval_dialogue)
108
108
  .save("mistral_files")
@@ -16,7 +16,7 @@ title: Examples
16
16
  import datachain as dc # (1)!
17
17
  from transformers import AutoProcessor, PaliGemmaForConditionalGeneration # (2)!
18
18
 
19
- images = dc.from_storage("gs://datachain-demo/newyorker_caption_contest/images", type="image")
19
+ images = dc.read_storage("gs://datachain-demo/newyorker_caption_contest/images", type="image")
20
20
 
21
21
  model = PaliGemmaForConditionalGeneration.from_pretrained("google/paligemma-3b-mix-224")
22
22
  processor = AutoProcessor.from_pretrained("google/paligemma-3b-mix-224")
@@ -93,20 +93,20 @@ dc.DataModel.register(MistralModel)
93
93
 
94
94
  chain = (
95
95
  dc
96
- .from_storage("gs://datachain-demo/chatbot-KiT/", type="text")
96
+ .read_storage("gs://datachain-demo/chatbot-KiT/", type="text")
97
97
  .filter(dc.Column("file.name").glob("*.txt"))
98
98
  .limit(5)
99
99
  .settings(parallel=4, cache=True)
100
100
  .map(
101
- mistral=lambda file: MistralClient(api_key=api_key).chat(
102
- model="open-mixtral-8x22b",
103
- response_format={"type": "json_object"},
104
- messages= [
105
- ChatMessage(role="system", content=f"{prompt}"),
106
- ChatMessage(role="user", content=f"{file.read()}")
107
- ]
108
- ),
109
- output=MistralModel
101
+ mistral=lambda file: MistralClient(api_key=api_key).chat(
102
+ model="open-mixtral-8x22b",
103
+ response_format={"type": "json_object"},
104
+ messages=[
105
+ ChatMessage(role="system", content=f"{prompt}"),
106
+ ChatMessage(role="user", content=f"{file.read()}")
107
+ ]
108
+ ),
109
+ output=MistralModel
110
110
  )
111
111
  .save("dialog-rating")
112
112
  )
@@ -145,7 +145,7 @@ The cost of 5 calls to Mixtral 8x22b : $0.0142
145
145
  The “save” operation makes chain dataset persistent in the current (working) directory of the query. A hidden folder `.datachain/` holds the records. A persistent dataset can be accessed later to start a derivative chain:
146
146
 
147
147
  ```python
148
- dc.from_dataset("rating").limit(2).save("dialog-rating")
148
+ dc.read_dataset("rating").limit(2).save("dialog-rating")
149
149
  ```
150
150
 
151
151
  Persistent datasets are immutable and automatically versioned. Here is how to access the dataset registry:
@@ -167,7 +167,7 @@ dialog-rating@v2
167
167
  By default, when a saved dataset is loaded, the latest version is fetched but another version can be requested:
168
168
 
169
169
  ```python
170
- ds = dc.from_dataset("dialog-rating", version = 1)
170
+ ds = dc.read_dataset("dialog-rating", version=1)
171
171
  ```
172
172
 
173
173
  ### Chain execution, optimization and parallelism
@@ -189,8 +189,8 @@ Here is an example of reading a simple CSV file where schema is heuristically de
189
189
  ```python
190
190
  from datachain import DataChain
191
191
 
192
- uri="gs://datachain-demo/chatbot-csv/"
193
- csv_dataset = dc.from_csv(uri)
192
+ uri = "gs://datachain-demo/chatbot-csv/"
193
+ csv_dataset = dc.read_csv(uri)
194
194
 
195
195
  print(csv_dataset.to_pandas())
196
196
  ```
@@ -233,12 +233,12 @@ However, Datachain can easily parse the entire COCO structure via several readin
233
233
  ```python
234
234
  import datachain as dc
235
235
 
236
- images_uri="gs://datachain-demo/coco2017/images/val/"
237
- captions_uri="gs://datachain-demo/coco2017/annotations/captions_val2017.json"
236
+ images_uri = "gs://datachain-demo/coco2017/images/val/"
237
+ captions_uri = "gs://datachain-demo/coco2017/annotations/captions_val2017.json"
238
238
 
239
- images = dc.from_storage(images_uri)
240
- meta = dc.from_json(captions_uri, jmespath = "images")
241
- captions = dc.from_json(captions_uri, jmespath = "annotations")
239
+ images = dc.read_storage(images_uri)
240
+ meta = dc.read_json(captions_uri, jmespath="images")
241
+ captions = dc.read_json(captions_uri, jmespath="annotations")
242
242
 
243
243
  images_meta = images.merge(meta, on="file.name", right_on="images.file_name")
244
244
  captioned_images = images_meta.merge(captions, on="images.id", right_on="annotations.image_id")
@@ -39,8 +39,8 @@ using JSON metadata:
39
39
  ``` py
40
40
  import datachain as dc
41
41
 
42
- meta = dc.from_json("gs://datachain-demo/dogs-and-cats/*json", object_name="meta", anon=True)
43
- images = dc.from_storage("gs://datachain-demo/dogs-and-cats/*jpg", anon=True)
42
+ meta = dc.read_json("gs://datachain-demo/dogs-and-cats/*json", object_name="meta", anon=True)
43
+ images = dc.read_storage("gs://datachain-demo/dogs-and-cats/*jpg", anon=True)
44
44
 
45
45
  images_id = images.map(id=lambda file: file.path.split('.')[-2])
46
46
  annotated = images_id.merge(meta, on="id", right_on="meta.id")
@@ -77,7 +77,7 @@ def is_positive_dialogue_ending(file) -> bool:
77
77
  return classifier(dialogue_ending)[0]["label"] == "POSITIVE"
78
78
 
79
79
  chain = (
80
- dc.from_storage("gs://datachain-demo/chatbot-KiT/",
80
+ dc.read_storage("gs://datachain-demo/chatbot-KiT/",
81
81
  object_name="file", type="text", anon=True)
82
82
  .settings(parallel=8, cache=True)
83
83
  .map(is_positive=is_positive_dialogue_ending)
@@ -132,7 +132,7 @@ def eval_dialogue(file: dc.File) -> bool:
132
132
  return result.lower().startswith("success")
133
133
 
134
134
  chain = (
135
- dc.from_storage("gs://datachain-demo/chatbot-KiT/", object_name="file", anon=True)
135
+ dc.read_storage("gs://datachain-demo/chatbot-KiT/", object_name="file", anon=True)
136
136
  .map(is_success=eval_dialogue)
137
137
  .save("mistral_files")
138
138
  )
@@ -177,7 +177,7 @@ def eval_dialog(file: dc.File) -> ChatCompletionResponse:
177
177
  {"role": "user", "content": file.read()}])
178
178
 
179
179
  chain = (
180
- dc.from_storage("gs://datachain-demo/chatbot-KiT/", object_name="file", anon=True)
180
+ dc.read_storage("gs://datachain-demo/chatbot-KiT/", object_name="file", anon=True)
181
181
  .settings(parallel=4, cache=True)
182
182
  .map(response=eval_dialog)
183
183
  .map(status=lambda response: response.choices[0].message.content.lower()[:7])
@@ -210,14 +210,14 @@ name usage usage usage
210
210
 
211
211
  In the previous examples, datasets were saved in the embedded database
212
212
  (`SQLite` in folder `.datachain` of the working directory). These datasets were automatically versioned, and
213
- can be accessed using `dc.from_dataset("dataset_name")`.
213
+ can be accessed using `dc.read_dataset("dataset_name")`.
214
214
 
215
215
  Here is how to retrieve a saved dataset and iterate over the objects:
216
216
 
217
217
  ``` py
218
218
  import datachain as dc
219
219
 
220
- chain = dc.from_dataset("response")
220
+ chain = dc.read_dataset("response")
221
221
 
222
222
  # Iterating one-by-one: support out-of-memory workflow
223
223
  for file, response in chain.limit(5).collect("file", "response"):
@@ -248,7 +248,7 @@ output tokens:
248
248
 
249
249
  ``` py
250
250
  import datachain as dc
251
- chain = dc.from_dataset("mistral_dataset")
251
+ chain = dc.read_dataset("mistral_dataset")
252
252
 
253
253
  cost = chain.sum("response.usage.prompt_tokens")*0.000002 \
254
254
  + chain.sum("response.usage.completion_tokens")*0.000006
@@ -276,7 +276,7 @@ import datachain as dc
276
276
  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
277
277
 
278
278
  chain = (
279
- dc.from_storage("gs://datachain-demo/dogs-and-cats/", type="image", anon=True)
279
+ dc.read_storage("gs://datachain-demo/dogs-and-cats/", type="image", anon=True)
280
280
  .map(label=lambda name: name.split(".")[0], params=["file.name"])
281
281
  .select("file", "label").to_pytorch(
282
282
  transform=processor.image_processor,
@@ -2,12 +2,12 @@
2
2
 
3
3
  `File` is a special [`DataModel`](index.md#datachain.lib.data_model.DataModel),
4
4
  which is automatically generated when a `DataChain` is created from files,
5
- such as in [`dc.from_storage`](../datachain.md#datachain.lib.dc.storage.from_storage):
5
+ such as in [`dc.read_storage`](../datachain.md#datachain.lib.dc.storage.read_storage):
6
6
 
7
7
  ```python
8
8
  import datachain as dc
9
9
 
10
- chain = dc.from_storage("gs://datachain-demo/dogs-and-cats")
10
+ chain = dc.read_storage("gs://datachain-demo/dogs-and-cats")
11
11
  chain.print_schema()
12
12
  ```
13
13
 
@@ -2,12 +2,12 @@
2
2
 
3
3
  `ImageFile` is inherited from [`File`](file.md) with additional methods for working with image files.
4
4
 
5
- `ImageFile` is generated when a `DataChain` is created [from storage](../datachain.md#datachain.lib.dc.storage.from_storage), using `type="image"` param:
5
+ `ImageFile` is generated when a `DataChain` is created [from storage](../datachain.md#datachain.lib.dc.storage.read_storage), using `type="image"` param:
6
6
 
7
7
  ```python
8
8
  import datachain as dc
9
9
 
10
- chain = dc.from_storage("s3://bucket-name/", type="image")
10
+ chain = dc.read_storage("s3://bucket-name/", type="image")
11
11
  ```
12
12
 
13
13
  ::: datachain.lib.file.ImageFile
@@ -2,12 +2,12 @@
2
2
 
3
3
  `TextFile` is inherited from [`File`](file.md) with additional methods for working with text files.
4
4
 
5
- `TextFile` is generated when a `DataChain` is created [from storage](../datachain.md#datachain.lib.dc.storage.from_storage), using `type="text"` param:
5
+ `TextFile` is generated when a `DataChain` is created [from storage](../datachain.md#datachain.lib.dc.storage.read_storage), using `type="text"` param:
6
6
 
7
7
  ```python
8
8
  import datachain as dc
9
9
 
10
- chain = dc.from_storage("s3://bucket-name/", type="text")
10
+ chain = dc.read_storage("s3://bucket-name/", type="text")
11
11
  ```
12
12
 
13
13
  ::: datachain.lib.file.TextFile
@@ -2,12 +2,12 @@
2
2
 
3
3
  `VideoFile` extends [`File`](file.md) and provides additional methods for working with video files.
4
4
 
5
- `VideoFile` instances are created when a `DataChain` is initialized [from storage](../datachain.md#datachain.lib.dc.storage.from_storage) with the `type="video"` parameter:
5
+ `VideoFile` instances are created when a `DataChain` is initialized [from storage](../datachain.md#datachain.lib.dc.storage.read_storage) with the `type="video"` parameter:
6
6
 
7
7
  ```python
8
8
  import datachain as dc
9
9
 
10
- chain = dc.from_storage("s3://bucket-name/", type="video")
10
+ chain = dc.read_storage("s3://bucket-name/", type="video")
11
11
  ```
12
12
 
13
13
  There are additional models for working with video files:
@@ -9,27 +9,27 @@ for examples of how to create a chain.
9
9
 
10
10
  ::: datachain.query.schema.Column
11
11
 
12
- ::: datachain.lib.dc.csv.from_csv
12
+ ::: datachain.lib.dc.csv.read_csv
13
13
 
14
- ::: datachain.lib.dc.datasets.from_dataset
14
+ ::: datachain.lib.dc.datasets.read_dataset
15
15
 
16
16
  ::: datachain.lib.dc.datasets.datasets
17
17
 
18
- ::: datachain.lib.dc.hf.from_hf
18
+ ::: datachain.lib.dc.hf.read_hf
19
19
 
20
- ::: datachain.lib.dc.json.from_json
20
+ ::: datachain.lib.dc.json.read_json
21
21
 
22
22
  ::: datachain.lib.dc.listings.listings
23
23
 
24
- ::: datachain.lib.dc.pandas.from_pandas
24
+ ::: datachain.lib.dc.pandas.read_pandas
25
25
 
26
- ::: datachain.lib.dc.parquet.from_parquet
26
+ ::: datachain.lib.dc.parquet.read_parquet
27
27
 
28
- ::: datachain.lib.dc.records.from_records
28
+ ::: datachain.lib.dc.records.read_records
29
29
 
30
- ::: datachain.lib.dc.storage.from_storage
30
+ ::: datachain.lib.dc.storage.read_storage
31
31
 
32
- ::: datachain.lib.dc.values.from_values
32
+ ::: datachain.lib.dc.values.read_values
33
33
 
34
34
  ::: datachain.lib.dc.DataChain
35
35
 
@@ -1,17 +1,17 @@
1
1
  # Interacting with remote storage
2
2
 
3
- DataChain supports reading and writing data from different remote storages using methods like `dc.from_storage` and `dc.to_storage`. The supported storages includes: local file system, AWS S3 storage, Google Cloud Storage, Azure Blob Storage, Hugging Face and more.
3
+ DataChain supports reading and writing data from different remote storages using methods like `dc.read_storage` and `dc.to_storage`. The supported storages includes: local file system, AWS S3 storage, Google Cloud Storage, Azure Blob Storage, Hugging Face and more.
4
4
 
5
5
  Example implementation for reading and writing data from/to different remote storages:
6
6
 
7
7
  ```python
8
8
  import datachain as dc
9
9
 
10
- dc = dc.from_storage("s3://bucket-name/path/to/data")
10
+ dc = dc.read_storage("s3://bucket-name/path/to/data")
11
11
  dc.to_storage("gs://bucket-name/path/to/data")
12
12
  ```
13
13
 
14
- DataChain uses [fsspec](https://filesystem-spec.readthedocs.io/en/latest/) to interact with different remote storages. You can pass the following fsspec-supported URIs to `from_storage` and `to_storage` methods.
14
+ DataChain uses [fsspec](https://filesystem-spec.readthedocs.io/en/latest/) to interact with different remote storages. You can pass the following fsspec-supported URIs to `read_storage` and `to_storage` methods.
15
15
 
16
16
  - Local file system: `file://path/to/data`
17
17
  - AWS S3 storage: `s3://bucket-name/path/to/data`
@@ -134,8 +134,9 @@ DataChain uses [s3fs](https://s3fs.readthedocs.io/en/latest/) to interact with A
134
134
 
135
135
 
136
136
  Example:
137
+
137
138
  ```python
138
- chain = dc.from_storage(
139
+ chain = dc.read_storage(
139
140
  "s3://my-bucket/my-dir",
140
141
  client_config = {
141
142
  "endpoint_url": "<minio-endpoint-url>",
@@ -67,7 +67,7 @@ def image_description(file):
67
67
 
68
68
  if __name__ == "__main__":
69
69
  (
70
- dc.from_storage(source, type="image")
70
+ dc.read_storage(source, type="image")
71
71
  .settings(parallel=-1)
72
72
  .filter(dc.C("file.path").glob("*.jpg"))
73
73
  .limit(5000)
@@ -71,7 +71,7 @@ class LLaVADescribe(dc.Mapper):
71
71
 
72
72
  if __name__ == "__main__":
73
73
  (
74
- dc.from_storage(source, type="image")
74
+ dc.read_storage(source, type="image")
75
75
  .filter(dc.C("file.path").glob("*/cat*.jpg"))
76
76
  .map(
77
77
  desc=LLaVADescribe(
@@ -41,7 +41,7 @@ def openimage_detect(args):
41
41
  source = "gs://datachain-demo/openimages-v6-test-jsonpairs/"
42
42
 
43
43
  (
44
- dc.from_storage(source)
44
+ dc.read_storage(source)
45
45
  .filter(dc.C("file.path").glob("*.jpg") | dc.C("file.path").glob("*.json"))
46
46
  .agg(
47
47
  openimage_detect,
@@ -10,7 +10,7 @@ def process_bboxes(yolo: YOLO, file: dc.File) -> YoloBBoxes:
10
10
 
11
11
 
12
12
  (
13
- dc.from_storage("gs://datachain-demo/openimages-v6-test-jsonpairs/")
13
+ dc.read_storage("gs://datachain-demo/openimages-v6-test-jsonpairs/")
14
14
  .filter(dc.C("file.path").glob("*.jpg"))
15
15
  .limit(20)
16
16
  .setup(yolo=lambda: YOLO("yolo11n.pt"))
@@ -10,7 +10,7 @@ def process_poses(yolo: YOLO, file: dc.File) -> YoloPoses:
10
10
 
11
11
 
12
12
  (
13
- dc.from_storage("gs://datachain-demo/openimages-v6-test-jsonpairs/")
13
+ dc.read_storage("gs://datachain-demo/openimages-v6-test-jsonpairs/")
14
14
  .filter(dc.C("file.path").glob("*.jpg"))
15
15
  .limit(20)
16
16
  .setup(yolo=lambda: YOLO("yolo11n-pose.pt"))
@@ -10,7 +10,7 @@ def process_segments(yolo: YOLO, file: dc.File) -> YoloSegments:
10
10
 
11
11
 
12
12
  (
13
- dc.from_storage("gs://datachain-demo/openimages-v6-test-jsonpairs/")
13
+ dc.read_storage("gs://datachain-demo/openimages-v6-test-jsonpairs/")
14
14
  .filter(dc.C("file.path").glob("*.jpg"))
15
15
  .limit(20)
16
16
  .setup(yolo=lambda: YOLO("yolo11n-seg.pt"))
@@ -9,7 +9,7 @@ def num_chars_udf(file):
9
9
  return ([],)
10
10
 
11
11
 
12
- chain = dc.from_storage("gs://datachain-demo/dogs-and-cats/", anon=True)
12
+ chain = dc.read_storage("gs://datachain-demo/dogs-and-cats/", anon=True)
13
13
  chain.map(num_chars_udf, params=["file"], output={"num_chars": list[str]}).select(
14
14
  "file.path", "num_chars"
15
15
  ).show(5)
@@ -31,7 +31,7 @@ ChatFeature = ModelStore.register(ChatDialog)
31
31
  def main():
32
32
  # Dynamic JSONl schema from 2 objects
33
33
  uri = "gs://datachain-demo/jsonl/object.jsonl"
34
- jsonl_ds = dc.from_json(uri, format="jsonl", anon="True")
34
+ jsonl_ds = dc.read_json(uri, format="jsonl", anon="True")
35
35
  jsonl_ds.show()
36
36
 
37
37
  # Dynamic JSON schema from 200 OpenImage json-pairs with validation errors
@@ -39,7 +39,7 @@ def main():
39
39
  schema_uri = (
40
40
  "gs://datachain-demo/openimages-v6-test-jsonpairs/08392c290ecc9d2a.json"
41
41
  )
42
- json_pairs_ds = dc.from_json(
42
+ json_pairs_ds = dc.read_json(
43
43
  uri, schema_from=schema_uri, jmespath="@", model_name="OpenImage", anon="True"
44
44
  )
45
45
  json_pairs_ds.show()
@@ -47,29 +47,29 @@ def main():
47
47
  uri = "gs://datachain-demo/coco2017/annotations_captions/"
48
48
 
49
49
  # Print JSON schema in Pydantic format from main COCO annotation
50
- chain = dc.from_storage(uri, anon="True").filter(dc.C("file.path").glob("*.json"))
50
+ chain = dc.read_storage(uri, anon="True").filter(dc.C("file.path").glob("*.json"))
51
51
  file = next(chain.limit(1).collect("file"))
52
52
  print(gen_datamodel_code(file, jmespath="@", model_name="Coco"))
53
53
 
54
54
  # Static JSON schema test parsing 3/7 objects
55
- static_json_ds = dc.from_json(
55
+ static_json_ds = dc.read_json(
56
56
  uri, jmespath="licenses", spec=LicenseFeature, nrows=3, anon="True"
57
57
  )
58
58
  static_json_ds.show()
59
59
 
60
60
  # Dynamic JSON schema test parsing 5K objects
61
- dynamic_json_ds = dc.from_json(uri, jmespath="images", anon="True")
61
+ dynamic_json_ds = dc.read_json(uri, jmespath="images", anon="True")
62
62
  print(dynamic_json_ds.to_pandas())
63
63
 
64
64
  # Static CSV with header schema test parsing 3.5K objects
65
65
  uri = "gs://datachain-demo/chatbot-csv/"
66
- static_csv_ds = dc.from_csv(uri, output=ChatDialog, object_name="chat", anon="True")
66
+ static_csv_ds = dc.read_csv(uri, output=ChatDialog, object_name="chat", anon="True")
67
67
  static_csv_ds.print_schema()
68
68
  static_csv_ds.show()
69
69
 
70
70
  # Dynamic CSV with header schema test parsing 3/3M objects
71
71
  uri = "gs://datachain-demo/laion-aesthetics-csv/laion_aesthetics_1024_33M_1.csv"
72
- dynamic_csv_ds = dc.from_csv(uri, object_name="laion", nrows=3, anon="True")
72
+ dynamic_csv_ds = dc.read_csv(uri, object_name="laion", nrows=3, anon="True")
73
73
  dynamic_csv_ds.print_schema()
74
74
  dynamic_csv_ds.show()
75
75
 
@@ -55,7 +55,7 @@ class CNN(nn.Module):
55
55
 
56
56
  if __name__ == "__main__":
57
57
  ds = (
58
- dc.from_storage(STORAGE, type="image")
58
+ dc.read_storage(STORAGE, type="image")
59
59
  .settings(prefetch=25)
60
60
  .filter(dc.C("file.path").glob("*.jpg"))
61
61
  .map(
@@ -30,7 +30,7 @@ def path_len_benchmark(path):
30
30
 
31
31
 
32
32
  # Run in chain
33
- dc.from_storage(
33
+ dc.read_storage(
34
34
  "gs://datachain-demo/dogs-and-cats/",
35
35
  ).settings(parallel=-1).map(
36
36
  path_len_benchmark,
@@ -10,7 +10,7 @@ def path_len(path):
10
10
 
11
11
  if __name__ == "__main__":
12
12
  # Run in chain
13
- dc.from_storage(
13
+ dc.read_storage(
14
14
  uri="gs://datachain-demo/dogs-and-cats/",
15
15
  ).map(
16
16
  path_len,
@@ -34,7 +34,7 @@ class ImageEncoder(dc.Mapper):
34
34
  if __name__ == "__main__":
35
35
  # Run in chain
36
36
  (
37
- dc.from_storage("gs://datachain-demo/dogs-and-cats/", type="image")
37
+ dc.read_storage("gs://datachain-demo/dogs-and-cats/", type="image")
38
38
  .filter(dc.C("file.path").glob("*cat*.jpg"))
39
39
  .settings(parallel=2)
40
40
  .limit(5)
@@ -39,7 +39,7 @@ class Rating(BaseModel):
39
39
 
40
40
 
41
41
  chain = (
42
- dc.from_storage(DATA, type="text")
42
+ dc.read_storage(DATA, type="text")
43
43
  .filter(dc.Column("file.path").glob("*.txt"))
44
44
  .limit(5)
45
45
  .settings(parallel=4, cache=True)
@@ -48,7 +48,7 @@ def eval_dialog(
48
48
  # Save to HF as Parquet. Dataset can be previewed here:
49
49
  # https://huggingface.co/datasets/dvcorg/test-datachain-llm-eval/viewer
50
50
  (
51
- dc.from_csv("hf://datasets/infinite-dataset-hub/MobilePlanAssistant/data.csv")
51
+ dc.read_csv("hf://datasets/infinite-dataset-hub/MobilePlanAssistant/data.csv")
52
52
  .settings(parallel=10)
53
53
  .setup(client=lambda: InferenceClient("meta-llama/Llama-3.1-70B-Instruct"))
54
54
  .map(response=eval_dialog)
@@ -58,7 +58,7 @@ def eval_dialog(
58
58
  # Read it back to filter and show.
59
59
  # It restores the Pydantic model from Parquet under the hood.
60
60
  (
61
- dc.from_parquet(
61
+ dc.read_parquet(
62
62
  "hf://datasets/dvcorg/test-datachain-llm-eval/data.parquet", source=False
63
63
  )
64
64
  .filter(dc.C("response.result") == "Failure")
@@ -9,8 +9,8 @@ source = "gs://datachain-demo/50k-laion-files/000000/00000000*"
9
9
 
10
10
 
11
11
  def create_dataset():
12
- imgs = dc.from_storage(source, type="image").filter(dc.C("file.path").glob("*.jpg"))
13
- captions = dc.from_storage(source, type="text").filter(
12
+ imgs = dc.read_storage(source, type="image").filter(dc.C("file.path").glob("*.jpg"))
13
+ captions = dc.read_storage(source, type="text").filter(
14
14
  dc.C("file.path").glob("*.txt")
15
15
  )
16
16
  return imgs.merge(
@@ -50,7 +50,7 @@ if __name__ == "__main__":
50
50
  print("** HuggingFace pipeline helper model zoo demo **")
51
51
  print("\nZero-shot object detection and classification:")
52
52
  (
53
- dc.from_storage(
53
+ dc.read_storage(
54
54
  image_source,
55
55
  anon=True,
56
56
  type="image",
@@ -72,7 +72,7 @@ if __name__ == "__main__":
72
72
 
73
73
  print("\nNot-safe-for-work image detection:")
74
74
  (
75
- dc.from_storage(
75
+ dc.read_storage(
76
76
  image_source,
77
77
  anon=True,
78
78
  type="image",
@@ -95,7 +95,7 @@ if __name__ == "__main__":
95
95
  try:
96
96
  subprocess.run(["ffmpeg", "-L"], check=True) # noqa: S603, S607
97
97
  (
98
- dc.from_storage(
98
+ dc.read_storage(
99
99
  audio_source,
100
100
  anon=True,
101
101
  type="binary",
@@ -118,7 +118,7 @@ if __name__ == "__main__":
118
118
 
119
119
  print("\nLong text summarization:")
120
120
  (
121
- dc.from_storage(
121
+ dc.read_storage(
122
122
  text_source,
123
123
  anon=True,
124
124
  type="text",
@@ -72,7 +72,7 @@ def describe_image(
72
72
 
73
73
  if __name__ == "__main__":
74
74
  (
75
- dc.from_storage(
75
+ dc.read_storage(
76
76
  SOURCE,
77
77
  anon=True,
78
78
  )
@@ -16,18 +16,18 @@ NPZ_METADATA = os.getenv(
16
16
  )
17
17
 
18
18
  wds_images = (
19
- dc.from_storage(IMAGE_TARS, type="image")
19
+ dc.read_storage(IMAGE_TARS, type="image")
20
20
  .settings(cache=True)
21
21
  .gen(laion=process_webdataset(spec=WDSLaion), params="file")
22
22
  )
23
23
 
24
24
  wds_with_pq = (
25
- dc.from_parquet(PARQUET_METADATA)
25
+ dc.read_parquet(PARQUET_METADATA)
26
26
  .settings(cache=True)
27
27
  .merge(wds_images, on="uid", right_on="laion.json.uid", inner=True)
28
28
  )
29
29
 
30
- wds_npz = dc.from_storage(NPZ_METADATA).settings(cache=True).gen(emd=process_laion_meta)
30
+ wds_npz = dc.read_storage(NPZ_METADATA).settings(cache=True).gen(emd=process_laion_meta)
31
31
 
32
32
 
33
33
  res = wds_npz.merge(
@@ -6,10 +6,10 @@ from datachain.lib.webdataset_laion import WDSLaion
6
6
 
7
7
  name = "wds"
8
8
  try:
9
- wds = dc.from_dataset(name=name)
9
+ wds = dc.read_dataset(name=name)
10
10
  except datachain.error.DatasetNotFoundError:
11
11
  wds = (
12
- dc.from_storage("gs://datachain-demo/datacomp-small/shards")
12
+ dc.read_storage("gs://datachain-demo/datacomp-small/shards")
13
13
  .filter(dc.C("file.path").glob("*/00000000.tar"))
14
14
  .settings(cache=True)
15
15
  .gen(laion=process_webdataset(spec=WDSLaion), params="file")