datachain 0.6.3__tar.gz → 0.6.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (259) hide show
  1. {datachain-0.6.3 → datachain-0.6.5}/.github/workflows/tests.yml +2 -0
  2. {datachain-0.6.3/src/datachain.egg-info → datachain-0.6.5}/PKG-INFO +1 -1
  3. datachain-0.6.5/examples/llm_and_nlp/hf-dataset-llm-eval.py +59 -0
  4. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/catalog/catalog.py +3 -25
  5. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/cli.py +0 -8
  6. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/client/fsspec.py +10 -5
  7. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/client/hf.py +1 -0
  8. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/client/local.py +7 -3
  9. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/data_storage/metastore.py +11 -478
  10. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/data_storage/sqlite.py +9 -41
  11. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/data_storage/warehouse.py +1 -2
  12. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/dataset.py +12 -10
  13. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/error.py +0 -4
  14. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/arrow.py +1 -1
  15. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/node.py +1 -1
  16. {datachain-0.6.3 → datachain-0.6.5/src/datachain.egg-info}/PKG-INFO +1 -1
  17. {datachain-0.6.3 → datachain-0.6.5}/src/datachain.egg-info/SOURCES.txt +1 -2
  18. {datachain-0.6.3 → datachain-0.6.5}/tests/func/test_catalog.py +0 -5
  19. {datachain-0.6.3 → datachain-0.6.5}/tests/func/test_datachain.py +2 -3
  20. {datachain-0.6.3 → datachain-0.6.5}/tests/func/test_dataset_query.py +20 -35
  21. {datachain-0.6.3 → datachain-0.6.5}/tests/func/test_datasets.py +0 -1
  22. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_catalog_loader.py +3 -8
  23. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_metastore.py +2 -6
  24. datachain-0.6.3/src/datachain/storage.py +0 -136
  25. datachain-0.6.3/tests/unit/test_storage.py +0 -188
  26. {datachain-0.6.3 → datachain-0.6.5}/.cruft.json +0 -0
  27. {datachain-0.6.3 → datachain-0.6.5}/.gitattributes +0 -0
  28. {datachain-0.6.3 → datachain-0.6.5}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  29. {datachain-0.6.3 → datachain-0.6.5}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  30. {datachain-0.6.3 → datachain-0.6.5}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  31. {datachain-0.6.3 → datachain-0.6.5}/.github/codecov.yaml +0 -0
  32. {datachain-0.6.3 → datachain-0.6.5}/.github/dependabot.yml +0 -0
  33. {datachain-0.6.3 → datachain-0.6.5}/.github/workflows/benchmarks.yml +0 -0
  34. {datachain-0.6.3 → datachain-0.6.5}/.github/workflows/release.yml +0 -0
  35. {datachain-0.6.3 → datachain-0.6.5}/.github/workflows/tests-studio.yml +0 -0
  36. {datachain-0.6.3 → datachain-0.6.5}/.github/workflows/update-template.yaml +0 -0
  37. {datachain-0.6.3 → datachain-0.6.5}/.gitignore +0 -0
  38. {datachain-0.6.3 → datachain-0.6.5}/.pre-commit-config.yaml +0 -0
  39. {datachain-0.6.3 → datachain-0.6.5}/CODE_OF_CONDUCT.rst +0 -0
  40. {datachain-0.6.3 → datachain-0.6.5}/CONTRIBUTING.rst +0 -0
  41. {datachain-0.6.3 → datachain-0.6.5}/LICENSE +0 -0
  42. {datachain-0.6.3 → datachain-0.6.5}/README.rst +0 -0
  43. {datachain-0.6.3 → datachain-0.6.5}/docs/assets/captioned_cartoons.png +0 -0
  44. {datachain-0.6.3 → datachain-0.6.5}/docs/assets/datachain-white.svg +0 -0
  45. {datachain-0.6.3 → datachain-0.6.5}/docs/assets/datachain.svg +0 -0
  46. {datachain-0.6.3 → datachain-0.6.5}/docs/assets/flowchart.png +0 -0
  47. {datachain-0.6.3 → datachain-0.6.5}/docs/index.md +0 -0
  48. {datachain-0.6.3 → datachain-0.6.5}/docs/references/datachain.md +0 -0
  49. {datachain-0.6.3 → datachain-0.6.5}/docs/references/datatype.md +0 -0
  50. {datachain-0.6.3 → datachain-0.6.5}/docs/references/file.md +0 -0
  51. {datachain-0.6.3 → datachain-0.6.5}/docs/references/index.md +0 -0
  52. {datachain-0.6.3 → datachain-0.6.5}/docs/references/sql.md +0 -0
  53. {datachain-0.6.3 → datachain-0.6.5}/docs/references/torch.md +0 -0
  54. {datachain-0.6.3 → datachain-0.6.5}/docs/references/udf.md +0 -0
  55. {datachain-0.6.3 → datachain-0.6.5}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  56. {datachain-0.6.3 → datachain-0.6.5}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  57. {datachain-0.6.3 → datachain-0.6.5}/examples/computer_vision/openimage-detect.py +0 -0
  58. {datachain-0.6.3 → datachain-0.6.5}/examples/get_started/common_sql_functions.py +0 -0
  59. {datachain-0.6.3 → datachain-0.6.5}/examples/get_started/json-csv-reader.py +0 -0
  60. {datachain-0.6.3 → datachain-0.6.5}/examples/get_started/torch-loader.py +0 -0
  61. {datachain-0.6.3 → datachain-0.6.5}/examples/get_started/udfs/parallel.py +0 -0
  62. {datachain-0.6.3 → datachain-0.6.5}/examples/get_started/udfs/simple.py +0 -0
  63. {datachain-0.6.3 → datachain-0.6.5}/examples/get_started/udfs/stateful.py +0 -0
  64. {datachain-0.6.3 → datachain-0.6.5}/examples/llm_and_nlp/claude-query.py +0 -0
  65. {datachain-0.6.3 → datachain-0.6.5}/examples/llm_and_nlp/unstructured-embeddings-gen.py +0 -0
  66. {datachain-0.6.3 → datachain-0.6.5}/examples/llm_and_nlp/unstructured-summary-map.py +0 -0
  67. {datachain-0.6.3 → datachain-0.6.5}/examples/multimodal/clip_inference.py +0 -0
  68. {datachain-0.6.3 → datachain-0.6.5}/examples/multimodal/hf_pipeline.py +0 -0
  69. {datachain-0.6.3 → datachain-0.6.5}/examples/multimodal/openai_image_desc_lib.py +0 -0
  70. {datachain-0.6.3 → datachain-0.6.5}/examples/multimodal/wds.py +0 -0
  71. {datachain-0.6.3 → datachain-0.6.5}/examples/multimodal/wds_filtered.py +0 -0
  72. {datachain-0.6.3 → datachain-0.6.5}/mkdocs.yml +0 -0
  73. {datachain-0.6.3 → datachain-0.6.5}/noxfile.py +0 -0
  74. {datachain-0.6.3 → datachain-0.6.5}/overrides/main.html +0 -0
  75. {datachain-0.6.3 → datachain-0.6.5}/pyproject.toml +0 -0
  76. {datachain-0.6.3 → datachain-0.6.5}/setup.cfg +0 -0
  77. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/__init__.py +0 -0
  78. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/__main__.py +0 -0
  79. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/asyn.py +0 -0
  80. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/cache.py +0 -0
  81. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/catalog/__init__.py +0 -0
  82. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/catalog/datasource.py +0 -0
  83. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/catalog/loader.py +0 -0
  84. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/cli_utils.py +0 -0
  85. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/client/__init__.py +0 -0
  86. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/client/azure.py +0 -0
  87. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/client/fileslice.py +0 -0
  88. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/client/gcs.py +0 -0
  89. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/client/s3.py +0 -0
  90. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/config.py +0 -0
  91. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/data_storage/__init__.py +0 -0
  92. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/data_storage/db_engine.py +0 -0
  93. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/data_storage/id_generator.py +0 -0
  94. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/data_storage/job.py +0 -0
  95. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/data_storage/schema.py +0 -0
  96. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/data_storage/serializer.py +0 -0
  97. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/job.py +0 -0
  98. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/__init__.py +0 -0
  99. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/clip.py +0 -0
  100. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/convert/__init__.py +0 -0
  101. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/convert/flatten.py +0 -0
  102. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/convert/python_to_sql.py +0 -0
  103. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/convert/sql_to_python.py +0 -0
  104. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/convert/unflatten.py +0 -0
  105. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  106. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/data_model.py +0 -0
  107. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/dataset_info.py +0 -0
  108. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/dc.py +0 -0
  109. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/file.py +0 -0
  110. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/func/__init__.py +0 -0
  111. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/func/aggregate.py +0 -0
  112. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/func/func.py +0 -0
  113. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/hf.py +0 -0
  114. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/image.py +0 -0
  115. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/listing.py +0 -0
  116. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/listing_info.py +0 -0
  117. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/meta_formats.py +0 -0
  118. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/model_store.py +0 -0
  119. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/pytorch.py +0 -0
  120. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/settings.py +0 -0
  121. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/signal_schema.py +0 -0
  122. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/tar.py +0 -0
  123. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/text.py +0 -0
  124. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/udf.py +0 -0
  125. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/udf_signature.py +0 -0
  126. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/utils.py +0 -0
  127. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/vfile.py +0 -0
  128. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/webdataset.py +0 -0
  129. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/lib/webdataset_laion.py +0 -0
  130. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/listing.py +0 -0
  131. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/nodes_fetcher.py +0 -0
  132. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/nodes_thread_pool.py +0 -0
  133. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/progress.py +0 -0
  134. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/py.typed +0 -0
  135. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/query/__init__.py +0 -0
  136. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/query/batch.py +0 -0
  137. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/query/dataset.py +0 -0
  138. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/query/dispatch.py +0 -0
  139. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/query/metrics.py +0 -0
  140. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/query/params.py +0 -0
  141. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/query/queue.py +0 -0
  142. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/query/schema.py +0 -0
  143. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/query/session.py +0 -0
  144. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/remote/__init__.py +0 -0
  145. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/remote/studio.py +0 -0
  146. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/__init__.py +0 -0
  147. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/default/__init__.py +0 -0
  148. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/default/base.py +0 -0
  149. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/functions/__init__.py +0 -0
  150. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/functions/aggregate.py +0 -0
  151. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/functions/array.py +0 -0
  152. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/functions/conditional.py +0 -0
  153. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/functions/path.py +0 -0
  154. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/functions/random.py +0 -0
  155. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/functions/string.py +0 -0
  156. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/selectable.py +0 -0
  157. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/sqlite/__init__.py +0 -0
  158. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/sqlite/base.py +0 -0
  159. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/sqlite/types.py +0 -0
  160. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/sqlite/vector.py +0 -0
  161. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/types.py +0 -0
  162. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/sql/utils.py +0 -0
  163. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/studio.py +0 -0
  164. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/telemetry.py +0 -0
  165. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/torch/__init__.py +0 -0
  166. {datachain-0.6.3 → datachain-0.6.5}/src/datachain/utils.py +0 -0
  167. {datachain-0.6.3 → datachain-0.6.5}/src/datachain.egg-info/dependency_links.txt +0 -0
  168. {datachain-0.6.3 → datachain-0.6.5}/src/datachain.egg-info/entry_points.txt +0 -0
  169. {datachain-0.6.3 → datachain-0.6.5}/src/datachain.egg-info/requires.txt +0 -0
  170. {datachain-0.6.3 → datachain-0.6.5}/src/datachain.egg-info/top_level.txt +0 -0
  171. {datachain-0.6.3 → datachain-0.6.5}/tests/__init__.py +0 -0
  172. {datachain-0.6.3 → datachain-0.6.5}/tests/benchmarks/__init__.py +0 -0
  173. {datachain-0.6.3 → datachain-0.6.5}/tests/benchmarks/conftest.py +0 -0
  174. {datachain-0.6.3 → datachain-0.6.5}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  175. {datachain-0.6.3 → datachain-0.6.5}/tests/benchmarks/datasets/.dvc/config +0 -0
  176. {datachain-0.6.3 → datachain-0.6.5}/tests/benchmarks/datasets/.gitignore +0 -0
  177. {datachain-0.6.3 → datachain-0.6.5}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  178. {datachain-0.6.3 → datachain-0.6.5}/tests/benchmarks/test_datachain.py +0 -0
  179. {datachain-0.6.3 → datachain-0.6.5}/tests/benchmarks/test_ls.py +0 -0
  180. {datachain-0.6.3 → datachain-0.6.5}/tests/benchmarks/test_version.py +0 -0
  181. {datachain-0.6.3 → datachain-0.6.5}/tests/conftest.py +0 -0
  182. {datachain-0.6.3 → datachain-0.6.5}/tests/data.py +0 -0
  183. {datachain-0.6.3 → datachain-0.6.5}/tests/examples/__init__.py +0 -0
  184. {datachain-0.6.3 → datachain-0.6.5}/tests/examples/test_examples.py +0 -0
  185. {datachain-0.6.3 → datachain-0.6.5}/tests/examples/test_wds_e2e.py +0 -0
  186. {datachain-0.6.3 → datachain-0.6.5}/tests/examples/wds_data.py +0 -0
  187. {datachain-0.6.3 → datachain-0.6.5}/tests/func/__init__.py +0 -0
  188. {datachain-0.6.3 → datachain-0.6.5}/tests/func/test_client.py +0 -0
  189. {datachain-0.6.3 → datachain-0.6.5}/tests/func/test_feature_pickling.py +0 -0
  190. {datachain-0.6.3 → datachain-0.6.5}/tests/func/test_listing.py +0 -0
  191. {datachain-0.6.3 → datachain-0.6.5}/tests/func/test_ls.py +0 -0
  192. {datachain-0.6.3 → datachain-0.6.5}/tests/func/test_meta_formats.py +0 -0
  193. {datachain-0.6.3 → datachain-0.6.5}/tests/func/test_metrics.py +0 -0
  194. {datachain-0.6.3 → datachain-0.6.5}/tests/func/test_pull.py +0 -0
  195. {datachain-0.6.3 → datachain-0.6.5}/tests/func/test_pytorch.py +0 -0
  196. {datachain-0.6.3 → datachain-0.6.5}/tests/func/test_query.py +0 -0
  197. {datachain-0.6.3 → datachain-0.6.5}/tests/scripts/feature_class.py +0 -0
  198. {datachain-0.6.3 → datachain-0.6.5}/tests/scripts/feature_class_exception.py +0 -0
  199. {datachain-0.6.3 → datachain-0.6.5}/tests/scripts/feature_class_parallel.py +0 -0
  200. {datachain-0.6.3 → datachain-0.6.5}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  201. {datachain-0.6.3 → datachain-0.6.5}/tests/scripts/name_len_slow.py +0 -0
  202. {datachain-0.6.3 → datachain-0.6.5}/tests/test_atomicity.py +0 -0
  203. {datachain-0.6.3 → datachain-0.6.5}/tests/test_cli_e2e.py +0 -0
  204. {datachain-0.6.3 → datachain-0.6.5}/tests/test_cli_studio.py +0 -0
  205. {datachain-0.6.3 → datachain-0.6.5}/tests/test_query_e2e.py +0 -0
  206. {datachain-0.6.3 → datachain-0.6.5}/tests/test_telemetry.py +0 -0
  207. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/__init__.py +0 -0
  208. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/__init__.py +0 -0
  209. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/conftest.py +0 -0
  210. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_arrow.py +0 -0
  211. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_clip.py +0 -0
  212. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_datachain.py +0 -0
  213. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  214. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_datachain_merge.py +0 -0
  215. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_feature.py +0 -0
  216. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_feature_utils.py +0 -0
  217. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_file.py +0 -0
  218. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_hf.py +0 -0
  219. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_image.py +0 -0
  220. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_listing_info.py +0 -0
  221. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_schema.py +0 -0
  222. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_signal_schema.py +0 -0
  223. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_sql_to_python.py +0 -0
  224. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_text.py +0 -0
  225. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_udf_signature.py +0 -0
  226. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_utils.py +0 -0
  227. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/lib/test_webdataset.py +0 -0
  228. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/sql/__init__.py +0 -0
  229. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/sql/sqlite/__init__.py +0 -0
  230. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/sql/sqlite/test_utils.py +0 -0
  231. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/sql/test_array.py +0 -0
  232. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/sql/test_conditional.py +0 -0
  233. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/sql/test_path.py +0 -0
  234. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/sql/test_random.py +0 -0
  235. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/sql/test_selectable.py +0 -0
  236. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/sql/test_string.py +0 -0
  237. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_asyn.py +0 -0
  238. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_cache.py +0 -0
  239. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_catalog.py +0 -0
  240. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_cli_parsing.py +0 -0
  241. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_client.py +0 -0
  242. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_client_s3.py +0 -0
  243. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_config.py +0 -0
  244. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_data_storage.py +0 -0
  245. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_database_engine.py +0 -0
  246. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_dataset.py +0 -0
  247. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_dispatch.py +0 -0
  248. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_fileslice.py +0 -0
  249. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_id_generator.py +0 -0
  250. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_listing.py +0 -0
  251. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_module_exports.py +0 -0
  252. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_query.py +0 -0
  253. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_query_metrics.py +0 -0
  254. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_query_params.py +0 -0
  255. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_serializer.py +0 -0
  256. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_session.py +0 -0
  257. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_utils.py +0 -0
  258. {datachain-0.6.3 → datachain-0.6.5}/tests/unit/test_warehouse.py +0 -0
  259. {datachain-0.6.3 → datachain-0.6.5}/tests/utils.py +0 -0
@@ -152,4 +152,6 @@ jobs:
152
152
  run: uv pip install nox --system
153
153
 
154
154
  - name: Run examples
155
+ env:
156
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
155
157
  run: nox -s examples -p ${{ matrix.pyv }} -- -m "${{ matrix.group }}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.6.3
3
+ Version: 0.6.5
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -0,0 +1,59 @@
1
+ from huggingface_hub import InferenceClient
2
+
3
+ from datachain import C, DataChain, DataModel
4
+
5
+ PROMPT = """
6
+ Was this dialog successful? Put result as a single word: Success or Failure.
7
+ Explain the reason in a few words.
8
+ """
9
+
10
+
11
+ class DialogEval(DataModel):
12
+ result: str
13
+ reason: str
14
+
15
+
16
+ # DataChain function to evaluate dialog.
17
+ # DataChain is using types for inputs, results to automatically infer schema.
18
+ def eval_dialog(user_input: str, bot_response: str) -> DialogEval:
19
+ client = InferenceClient("meta-llama/Llama-3.1-70B-Instruct")
20
+
21
+ completion = client.chat_completion(
22
+ messages=[
23
+ {
24
+ "role": "user",
25
+ "content": f"{PROMPT}\n\nUser: {user_input}\nBot: {bot_response}",
26
+ },
27
+ ],
28
+ response_format={"type": "json", "value": DialogEval.model_json_schema()},
29
+ )
30
+
31
+ message = completion.choices[0].message
32
+ try:
33
+ return DialogEval.model_validate_json(message.content)
34
+ except ValueError:
35
+ return DialogEval(result="Error", reason="Failed to parse response.")
36
+
37
+
38
+ # Run HF inference in parallel for each example.
39
+ # Get result as Pydantic model that DataChain can understand and serialize it.
40
+ # Save to HF as Parquet. Dataset can be previewed here:
41
+ # https://huggingface.co/datasets/dvcorg/test-datachain-llm-eval/viewer
42
+ (
43
+ DataChain.from_csv(
44
+ "hf://datasets/infinite-dataset-hub/MobilePlanAssistant/data.csv"
45
+ )
46
+ .settings(parallel=10)
47
+ .map(response=eval_dialog)
48
+ .to_parquet("hf://datasets/dvcorg/test-datachain-llm-eval/data.parquet")
49
+ )
50
+
51
+ # Read it back to filter and show.
52
+ # It restores the Pydantic model from Parquet under the hood.
53
+ (
54
+ DataChain.from_parquet(
55
+ "hf://datasets/dvcorg/test-datachain-llm-eval/data.parquet", source=False
56
+ )
57
+ .filter(C("response.result") == "Failure")
58
+ .show(3)
59
+ )
@@ -42,6 +42,7 @@ from datachain.dataset import (
42
42
  DatasetStats,
43
43
  DatasetStatus,
44
44
  RowDict,
45
+ StorageURI,
45
46
  create_dataset_uri,
46
47
  parse_dataset_uri,
47
48
  )
@@ -58,7 +59,6 @@ from datachain.node import DirType, Node, NodeWithPath
58
59
  from datachain.nodes_thread_pool import NodesThreadPool
59
60
  from datachain.remote.studio import StudioClient
60
61
  from datachain.sql.types import DateTime, SQLType, String
61
- from datachain.storage import StorageURI
62
62
  from datachain.utils import (
63
63
  DataChainDir,
64
64
  batched,
@@ -1702,31 +1702,9 @@ class Catalog:
1702
1702
  *,
1703
1703
  client_config=None,
1704
1704
  ) -> None:
1705
- root_sources = [
1706
- src for src in sources if Client.get_implementation(src).is_root_url(src)
1707
- ]
1708
- non_root_sources = [
1709
- src
1710
- for src in sources
1711
- if not Client.get_implementation(src).is_root_url(src)
1712
- ]
1713
-
1714
- client_config = client_config or self.client_config
1715
-
1716
- # for root sources (e.g s3://) we are just getting all buckets and
1717
- # saving them as storages, without further indexing in each bucket
1718
- for source in root_sources:
1719
- for bucket in Client.get_implementation(source).ls_buckets(**client_config):
1720
- client = self.get_client(bucket.uri, **client_config)
1721
- print(f"Registering storage {client.uri}")
1722
- self.metastore.create_storage_if_not_registered(client.uri)
1723
-
1724
1705
  self.enlist_sources(
1725
- non_root_sources,
1706
+ sources,
1726
1707
  update,
1727
- client_config=client_config,
1708
+ client_config=client_config or self.client_config,
1728
1709
  only_index=True,
1729
1710
  )
1730
-
1731
- def find_stale_storages(self) -> None:
1732
- self.metastore.find_stale_storages()
@@ -568,12 +568,6 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
568
568
  )
569
569
  add_sources_arg(parse_index)
570
570
 
571
- subp.add_parser(
572
- "find-stale-storages",
573
- parents=[parent_parser],
574
- description="Finds and marks stale storages",
575
- )
576
-
577
571
  show_parser = subp.add_parser(
578
572
  "show",
579
573
  parents=[parent_parser],
@@ -1100,8 +1094,6 @@ def main(argv: Optional[list[str]] = None) -> int: # noqa: C901, PLR0912, PLR09
1100
1094
  )
1101
1095
  elif args.command == "completion":
1102
1096
  print(completion(args.shell))
1103
- elif args.command == "find-stale-storages":
1104
- catalog.find_stale_storages()
1105
1097
  elif args.command == "query":
1106
1098
  query(
1107
1099
  catalog,
@@ -31,11 +31,12 @@ from datachain.error import ClientError as DataChainClientError
31
31
  from datachain.lib.file import File
32
32
  from datachain.nodes_fetcher import NodesFetcher
33
33
  from datachain.nodes_thread_pool import NodeChunk
34
- from datachain.storage import StorageURI
35
34
 
36
35
  if TYPE_CHECKING:
37
36
  from fsspec.spec import AbstractFileSystem
38
37
 
38
+ from datachain.dataset import StorageURI
39
+
39
40
 
40
41
  logger = logging.getLogger("datachain")
41
42
 
@@ -63,7 +64,7 @@ def _is_win_local_path(uri: str) -> bool:
63
64
 
64
65
  class Bucket(NamedTuple):
65
66
  name: str
66
- uri: StorageURI
67
+ uri: "StorageURI"
67
68
  created: Optional[datetime]
68
69
 
69
70
 
@@ -115,7 +116,7 @@ class Client(ABC):
115
116
  return DATA_SOURCE_URI_PATTERN.match(name) is not None
116
117
 
117
118
  @staticmethod
118
- def parse_url(source: str) -> tuple[StorageURI, str]:
119
+ def parse_url(source: str) -> tuple["StorageURI", str]:
119
120
  cls = Client.get_implementation(source)
120
121
  storage_name, rel_path = cls.split_url(source)
121
122
  return cls.get_uri(storage_name), rel_path
@@ -148,7 +149,7 @@ class Client(ABC):
148
149
  @classmethod
149
150
  def from_source(
150
151
  cls,
151
- uri: StorageURI,
152
+ uri: "StorageURI",
152
153
  cache: DataChainCache,
153
154
  **kwargs,
154
155
  ) -> "Client":
@@ -156,6 +157,8 @@ class Client(ABC):
156
157
 
157
158
  @classmethod
158
159
  def ls_buckets(cls, **kwargs) -> Iterator[Bucket]:
160
+ from datachain.dataset import StorageURI
161
+
159
162
  for entry in cls.create_fs(**kwargs).ls(cls.PREFIX, detail=True):
160
163
  name = entry["name"].rstrip("/")
161
164
  yield Bucket(
@@ -169,7 +172,9 @@ class Client(ABC):
169
172
  return url == cls.PREFIX
170
173
 
171
174
  @classmethod
172
- def get_uri(cls, name) -> StorageURI:
175
+ def get_uri(cls, name) -> "StorageURI":
176
+ from datachain.dataset import StorageURI
177
+
173
178
  return StorageURI(f"{cls.PREFIX}{name}")
174
179
 
175
180
  @classmethod
@@ -23,6 +23,7 @@ class HfClient(Client):
23
23
 
24
24
  def info_to_file(self, v: dict[str, Any], path: str) -> File:
25
25
  return File(
26
+ source=self.uri,
26
27
  path=path,
27
28
  size=v["size"],
28
29
  version=v["last_commit"].oid,
@@ -2,16 +2,18 @@ import os
2
2
  import posixpath
3
3
  from datetime import datetime, timezone
4
4
  from pathlib import Path
5
- from typing import Any
5
+ from typing import TYPE_CHECKING, Any
6
6
  from urllib.parse import urlparse
7
7
 
8
8
  from fsspec.implementations.local import LocalFileSystem
9
9
 
10
10
  from datachain.lib.file import File
11
- from datachain.storage import StorageURI
12
11
 
13
12
  from .fsspec import Client
14
13
 
14
+ if TYPE_CHECKING:
15
+ from datachain.dataset import StorageURI
16
+
15
17
 
16
18
  class FileClient(Client):
17
19
  FS_CLASS = LocalFileSystem
@@ -28,7 +30,9 @@ class FileClient(Client):
28
30
  raise TypeError("Signed urls are not implemented for local file system")
29
31
 
30
32
  @classmethod
31
- def get_uri(cls, name) -> StorageURI:
33
+ def get_uri(cls, name) -> "StorageURI":
34
+ from datachain.dataset import StorageURI
35
+
32
36
  return StorageURI(f'{cls.PREFIX}/{name.removeprefix("/")}')
33
37
 
34
38
  @classmethod