datachain 0.3.9__tar.gz → 0.3.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (249) hide show
  1. {datachain-0.3.9 → datachain-0.3.11}/.pre-commit-config.yaml +1 -1
  2. {datachain-0.3.9/src/datachain.egg-info → datachain-0.3.11}/PKG-INFO +7 -3
  3. {datachain-0.3.9 → datachain-0.3.11}/README.rst +4 -1
  4. datachain-0.3.11/docs/assets/datachain.svg +24 -0
  5. datachain-0.3.11/docs/assets/datachain_logotype.svg +33 -0
  6. {datachain-0.3.9 → datachain-0.3.11}/docs/index.md +1 -1
  7. {datachain-0.3.9 → datachain-0.3.11}/examples/get_started/udfs/parallel.py +1 -1
  8. {datachain-0.3.9 → datachain-0.3.11}/examples/get_started/udfs/simple.py +1 -1
  9. {datachain-0.3.9 → datachain-0.3.11}/examples/multimodal/clip_inference.py +10 -9
  10. {datachain-0.3.9 → datachain-0.3.11}/examples/multimodal/wds.py +10 -11
  11. {datachain-0.3.9 → datachain-0.3.11}/mkdocs.yml +3 -3
  12. {datachain-0.3.9 → datachain-0.3.11}/pyproject.toml +3 -2
  13. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/catalog/catalog.py +57 -212
  14. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/cli.py +6 -38
  15. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/client/fsspec.py +3 -0
  16. datachain-0.3.11/src/datachain/client/hf.py +47 -0
  17. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/data_storage/metastore.py +2 -29
  18. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/data_storage/sqlite.py +3 -12
  19. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/data_storage/warehouse.py +20 -29
  20. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/dataset.py +44 -32
  21. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/job.py +4 -3
  22. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/arrow.py +21 -5
  23. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/dataset_info.py +4 -0
  24. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/dc.py +183 -59
  25. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/file.py +10 -33
  26. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/hf.py +2 -1
  27. datachain-0.3.11/src/datachain/lib/listing.py +119 -0
  28. datachain-0.3.11/src/datachain/lib/listing_info.py +32 -0
  29. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/meta_formats.py +39 -56
  30. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/signal_schema.py +5 -2
  31. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/node.py +13 -0
  32. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/query/dataset.py +12 -105
  33. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/query/metrics.py +8 -0
  34. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/utils.py +5 -0
  35. {datachain-0.3.9 → datachain-0.3.11/src/datachain.egg-info}/PKG-INFO +7 -3
  36. {datachain-0.3.9 → datachain-0.3.11}/src/datachain.egg-info/SOURCES.txt +6 -2
  37. {datachain-0.3.9 → datachain-0.3.11}/src/datachain.egg-info/requires.txt +2 -1
  38. {datachain-0.3.9 → datachain-0.3.11}/tests/benchmarks/test_datachain.py +4 -6
  39. {datachain-0.3.9 → datachain-0.3.11}/tests/conftest.py +4 -0
  40. {datachain-0.3.9 → datachain-0.3.11}/tests/func/test_catalog.py +24 -82
  41. {datachain-0.3.9 → datachain-0.3.11}/tests/func/test_datachain.py +147 -11
  42. {datachain-0.3.9 → datachain-0.3.11}/tests/func/test_dataset_query.py +20 -4
  43. {datachain-0.3.9 → datachain-0.3.11}/tests/func/test_datasets.py +18 -13
  44. {datachain-0.3.9 → datachain-0.3.11}/tests/func/test_feature_pickling.py +21 -16
  45. {datachain-0.3.9 → datachain-0.3.11}/tests/func/test_ls.py +7 -4
  46. datachain-0.3.11/tests/func/test_meta_formats.py +88 -0
  47. datachain-0.3.11/tests/func/test_metrics.py +14 -0
  48. {datachain-0.3.9 → datachain-0.3.11}/tests/func/test_query.py +27 -43
  49. {datachain-0.3.9 → datachain-0.3.11}/tests/scripts/feature_class.py +2 -2
  50. {datachain-0.3.9 → datachain-0.3.11}/tests/scripts/feature_class_parallel.py +1 -1
  51. {datachain-0.3.9 → datachain-0.3.11}/tests/scripts/feature_class_parallel_data_model.py +1 -1
  52. {datachain-0.3.9 → datachain-0.3.11}/tests/test_query_e2e.py +5 -4
  53. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/lib/test_arrow.py +38 -1
  54. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/lib/test_datachain.py +60 -4
  55. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/lib/test_datachain_merge.py +57 -4
  56. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/lib/test_signal_schema.py +20 -3
  57. datachain-0.3.11/tests/unit/test_catalog.py +28 -0
  58. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/test_dataset.py +28 -0
  59. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/test_listing.py +86 -0
  60. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/test_storage.py +0 -34
  61. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/test_utils.py +17 -0
  62. datachain-0.3.9/docs/assets/datachain.png +0 -0
  63. datachain-0.3.9/src/datachain/catalog/subclass.py +0 -60
  64. datachain-0.3.9/src/datachain/lib/listing.py +0 -111
  65. datachain-0.3.9/tests/unit/test_catalog.py +0 -170
  66. {datachain-0.3.9 → datachain-0.3.11}/.cruft.json +0 -0
  67. {datachain-0.3.9 → datachain-0.3.11}/.gitattributes +0 -0
  68. {datachain-0.3.9 → datachain-0.3.11}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  69. {datachain-0.3.9 → datachain-0.3.11}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  70. {datachain-0.3.9 → datachain-0.3.11}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  71. {datachain-0.3.9 → datachain-0.3.11}/.github/codecov.yaml +0 -0
  72. {datachain-0.3.9 → datachain-0.3.11}/.github/dependabot.yml +0 -0
  73. {datachain-0.3.9 → datachain-0.3.11}/.github/workflows/benchmarks.yml +0 -0
  74. {datachain-0.3.9 → datachain-0.3.11}/.github/workflows/release.yml +0 -0
  75. {datachain-0.3.9 → datachain-0.3.11}/.github/workflows/tests-studio.yml +0 -0
  76. {datachain-0.3.9 → datachain-0.3.11}/.github/workflows/tests.yml +0 -0
  77. {datachain-0.3.9 → datachain-0.3.11}/.github/workflows/update-template.yaml +0 -0
  78. {datachain-0.3.9 → datachain-0.3.11}/.gitignore +0 -0
  79. {datachain-0.3.9 → datachain-0.3.11}/CODE_OF_CONDUCT.rst +0 -0
  80. {datachain-0.3.9 → datachain-0.3.11}/CONTRIBUTING.rst +0 -0
  81. {datachain-0.3.9 → datachain-0.3.11}/LICENSE +0 -0
  82. {datachain-0.3.9 → datachain-0.3.11}/docs/assets/captioned_cartoons.png +0 -0
  83. {datachain-0.3.9 → datachain-0.3.11}/docs/assets/flowchart.png +0 -0
  84. {datachain-0.3.9 → datachain-0.3.11}/docs/references/datachain.md +0 -0
  85. {datachain-0.3.9 → datachain-0.3.11}/docs/references/datatype.md +0 -0
  86. {datachain-0.3.9 → datachain-0.3.11}/docs/references/file.md +0 -0
  87. {datachain-0.3.9 → datachain-0.3.11}/docs/references/index.md +0 -0
  88. {datachain-0.3.9 → datachain-0.3.11}/docs/references/sql.md +0 -0
  89. {datachain-0.3.9 → datachain-0.3.11}/docs/references/torch.md +0 -0
  90. {datachain-0.3.9 → datachain-0.3.11}/docs/references/udf.md +0 -0
  91. {datachain-0.3.9 → datachain-0.3.11}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  92. {datachain-0.3.9 → datachain-0.3.11}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  93. {datachain-0.3.9 → datachain-0.3.11}/examples/computer_vision/openimage-detect.py +0 -0
  94. {datachain-0.3.9 → datachain-0.3.11}/examples/get_started/common_sql_functions.py +0 -0
  95. {datachain-0.3.9 → datachain-0.3.11}/examples/get_started/json-csv-reader.py +0 -0
  96. {datachain-0.3.9 → datachain-0.3.11}/examples/get_started/torch-loader.py +0 -0
  97. {datachain-0.3.9 → datachain-0.3.11}/examples/get_started/udfs/stateful.py +0 -0
  98. {datachain-0.3.9 → datachain-0.3.11}/examples/llm_and_nlp/claude-query.py +0 -0
  99. {datachain-0.3.9 → datachain-0.3.11}/examples/llm_and_nlp/unstructured-text.py +0 -0
  100. {datachain-0.3.9 → datachain-0.3.11}/examples/multimodal/hf_pipeline.py +0 -0
  101. {datachain-0.3.9 → datachain-0.3.11}/examples/multimodal/openai_image_desc_lib.py +0 -0
  102. {datachain-0.3.9 → datachain-0.3.11}/examples/multimodal/wds_filtered.py +0 -0
  103. {datachain-0.3.9 → datachain-0.3.11}/noxfile.py +0 -0
  104. {datachain-0.3.9 → datachain-0.3.11}/setup.cfg +0 -0
  105. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/__init__.py +0 -0
  106. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/__main__.py +0 -0
  107. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/asyn.py +0 -0
  108. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/cache.py +0 -0
  109. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/catalog/__init__.py +0 -0
  110. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/catalog/datasource.py +0 -0
  111. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/catalog/loader.py +0 -0
  112. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/cli_utils.py +0 -0
  113. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/client/__init__.py +0 -0
  114. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/client/azure.py +0 -0
  115. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/client/fileslice.py +0 -0
  116. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/client/gcs.py +0 -0
  117. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/client/local.py +0 -0
  118. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/client/s3.py +0 -0
  119. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/config.py +0 -0
  120. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/data_storage/__init__.py +0 -0
  121. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/data_storage/db_engine.py +0 -0
  122. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/data_storage/id_generator.py +0 -0
  123. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/data_storage/job.py +0 -0
  124. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/data_storage/schema.py +0 -0
  125. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/data_storage/serializer.py +0 -0
  126. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/error.py +0 -0
  127. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/__init__.py +0 -0
  128. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/clip.py +0 -0
  129. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/convert/__init__.py +0 -0
  130. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/convert/flatten.py +0 -0
  131. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/convert/python_to_sql.py +0 -0
  132. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/convert/sql_to_python.py +0 -0
  133. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/convert/unflatten.py +0 -0
  134. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  135. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/data_model.py +0 -0
  136. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/image.py +0 -0
  137. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/model_store.py +0 -0
  138. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/pytorch.py +0 -0
  139. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/settings.py +0 -0
  140. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/text.py +0 -0
  141. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/udf.py +0 -0
  142. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/udf_signature.py +0 -0
  143. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/utils.py +0 -0
  144. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/vfile.py +0 -0
  145. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/webdataset.py +0 -0
  146. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/lib/webdataset_laion.py +0 -0
  147. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/listing.py +0 -0
  148. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/nodes_fetcher.py +0 -0
  149. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/nodes_thread_pool.py +0 -0
  150. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/progress.py +0 -0
  151. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/py.typed +0 -0
  152. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/query/__init__.py +0 -0
  153. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/query/batch.py +0 -0
  154. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/query/builtins.py +0 -0
  155. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/query/dispatch.py +0 -0
  156. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/query/params.py +0 -0
  157. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/query/queue.py +0 -0
  158. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/query/schema.py +0 -0
  159. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/query/session.py +0 -0
  160. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/query/udf.py +0 -0
  161. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/remote/__init__.py +0 -0
  162. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/remote/studio.py +0 -0
  163. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/sql/__init__.py +0 -0
  164. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/sql/default/__init__.py +0 -0
  165. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/sql/default/base.py +0 -0
  166. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/sql/functions/__init__.py +0 -0
  167. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/sql/functions/array.py +0 -0
  168. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/sql/functions/conditional.py +0 -0
  169. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/sql/functions/path.py +0 -0
  170. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/sql/functions/random.py +0 -0
  171. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/sql/functions/string.py +0 -0
  172. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/sql/selectable.py +0 -0
  173. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/sql/sqlite/__init__.py +0 -0
  174. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/sql/sqlite/base.py +0 -0
  175. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/sql/sqlite/types.py +0 -0
  176. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/sql/sqlite/vector.py +0 -0
  177. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/sql/types.py +0 -0
  178. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/sql/utils.py +0 -0
  179. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/storage.py +0 -0
  180. {datachain-0.3.9 → datachain-0.3.11}/src/datachain/torch/__init__.py +0 -0
  181. {datachain-0.3.9 → datachain-0.3.11}/src/datachain.egg-info/dependency_links.txt +0 -0
  182. {datachain-0.3.9 → datachain-0.3.11}/src/datachain.egg-info/entry_points.txt +0 -0
  183. {datachain-0.3.9 → datachain-0.3.11}/src/datachain.egg-info/top_level.txt +0 -0
  184. {datachain-0.3.9 → datachain-0.3.11}/tests/__init__.py +0 -0
  185. {datachain-0.3.9 → datachain-0.3.11}/tests/benchmarks/__init__.py +0 -0
  186. {datachain-0.3.9 → datachain-0.3.11}/tests/benchmarks/conftest.py +0 -0
  187. {datachain-0.3.9 → datachain-0.3.11}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  188. {datachain-0.3.9 → datachain-0.3.11}/tests/benchmarks/datasets/.dvc/config +0 -0
  189. {datachain-0.3.9 → datachain-0.3.11}/tests/benchmarks/datasets/.gitignore +0 -0
  190. {datachain-0.3.9 → datachain-0.3.11}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  191. {datachain-0.3.9 → datachain-0.3.11}/tests/benchmarks/test_ls.py +0 -0
  192. {datachain-0.3.9 → datachain-0.3.11}/tests/benchmarks/test_version.py +0 -0
  193. {datachain-0.3.9 → datachain-0.3.11}/tests/data.py +0 -0
  194. {datachain-0.3.9 → datachain-0.3.11}/tests/examples/__init__.py +0 -0
  195. {datachain-0.3.9 → datachain-0.3.11}/tests/examples/test_examples.py +0 -0
  196. {datachain-0.3.9 → datachain-0.3.11}/tests/examples/test_wds_e2e.py +0 -0
  197. {datachain-0.3.9 → datachain-0.3.11}/tests/examples/wds_data.py +0 -0
  198. {datachain-0.3.9 → datachain-0.3.11}/tests/func/__init__.py +0 -0
  199. {datachain-0.3.9 → datachain-0.3.11}/tests/func/test_client.py +0 -0
  200. {datachain-0.3.9 → datachain-0.3.11}/tests/func/test_listing.py +0 -0
  201. {datachain-0.3.9 → datachain-0.3.11}/tests/func/test_pull.py +0 -0
  202. {datachain-0.3.9 → datachain-0.3.11}/tests/func/test_pytorch.py +0 -0
  203. {datachain-0.3.9 → datachain-0.3.11}/tests/scripts/name_len_slow.py +0 -0
  204. {datachain-0.3.9 → datachain-0.3.11}/tests/test_cli_e2e.py +0 -0
  205. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/__init__.py +0 -0
  206. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/lib/__init__.py +0 -0
  207. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/lib/conftest.py +0 -0
  208. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/lib/test_clip.py +0 -0
  209. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  210. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/lib/test_feature.py +0 -0
  211. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/lib/test_feature_utils.py +0 -0
  212. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/lib/test_file.py +0 -0
  213. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/lib/test_hf.py +0 -0
  214. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/lib/test_image.py +0 -0
  215. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/lib/test_schema.py +0 -0
  216. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/lib/test_sql_to_python.py +0 -0
  217. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/lib/test_text.py +0 -0
  218. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/lib/test_udf_signature.py +0 -0
  219. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/lib/test_utils.py +0 -0
  220. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/lib/test_webdataset.py +0 -0
  221. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/sql/__init__.py +0 -0
  222. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/sql/sqlite/__init__.py +0 -0
  223. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/sql/sqlite/test_utils.py +0 -0
  224. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/sql/test_array.py +0 -0
  225. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/sql/test_conditional.py +0 -0
  226. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/sql/test_path.py +0 -0
  227. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/sql/test_random.py +0 -0
  228. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/sql/test_selectable.py +0 -0
  229. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/sql/test_string.py +0 -0
  230. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/test_asyn.py +0 -0
  231. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/test_cache.py +0 -0
  232. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/test_catalog_loader.py +0 -0
  233. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/test_cli_parsing.py +0 -0
  234. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/test_client.py +0 -0
  235. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/test_client_s3.py +0 -0
  236. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/test_data_storage.py +0 -0
  237. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/test_database_engine.py +0 -0
  238. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/test_dispatch.py +0 -0
  239. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/test_fileslice.py +0 -0
  240. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/test_id_generator.py +0 -0
  241. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/test_metastore.py +0 -0
  242. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/test_module_exports.py +0 -0
  243. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/test_query_metrics.py +0 -0
  244. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/test_query_params.py +0 -0
  245. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/test_serializer.py +0 -0
  246. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/test_session.py +0 -0
  247. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/test_udf.py +0 -0
  248. {datachain-0.3.9 → datachain-0.3.11}/tests/unit/test_warehouse.py +0 -0
  249. {datachain-0.3.9 → datachain-0.3.11}/tests/utils.py +0 -0
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.6.2'
27
+ rev: 'v0.6.3'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.3.9
3
+ Version: 0.3.11
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -43,6 +43,7 @@ Requires-Dist: datamodel-code-generator>=0.25
43
43
  Requires-Dist: Pillow<11,>=10.0.0
44
44
  Requires-Dist: msgpack<2,>=1.0.4
45
45
  Requires-Dist: psutil
46
+ Requires-Dist: huggingface_hub
46
47
  Provides-Extra: docs
47
48
  Requires-Dist: mkdocs>=1.5.2; extra == "docs"
48
49
  Requires-Dist: mkdocs-gen-files>=0.5.0; extra == "docs"
@@ -61,7 +62,7 @@ Provides-Extra: vector
61
62
  Requires-Dist: usearch; extra == "vector"
62
63
  Provides-Extra: hf
63
64
  Requires-Dist: numba>=0.60.0; extra == "hf"
64
- Requires-Dist: datasets[audio,vision]; extra == "hf"
65
+ Requires-Dist: datasets[audio,vision]>=2.21.0; extra == "hf"
65
66
  Provides-Extra: tests
66
67
  Requires-Dist: datachain[hf,remote,torch,vector]; extra == "tests"
67
68
  Requires-Dist: pytest<9,>=8; extra == "tests"
@@ -95,6 +96,10 @@ Requires-Dist: unstructured[pdf]; extra == "examples"
95
96
  Requires-Dist: pdfplumber==0.11.4; extra == "examples"
96
97
  Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
97
98
 
99
+ .. image:: docs/assets/datachain_logotype.svg
100
+ :height: 48
101
+ :alt: DataChain logo
102
+
98
103
  |PyPI| |Python Version| |Codecov| |Tests|
99
104
 
100
105
  .. |PyPI| image:: https://img.shields.io/pypi/v/datachain.svg
@@ -110,7 +115,6 @@ Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
110
115
  :target: https://github.com/iterative/datachain/actions/workflows/tests.yml
111
116
  :alt: Tests
112
117
 
113
- AI 🔗 DataChain
114
118
  ----------------
115
119
 
116
120
  DataChain is a modern Pythonic data-frame library designed for artificial intelligence.
@@ -1,3 +1,7 @@
1
+ .. image:: docs/assets/datachain_logotype.svg
2
+ :height: 48
3
+ :alt: DataChain logo
4
+
1
5
  |PyPI| |Python Version| |Codecov| |Tests|
2
6
 
3
7
  .. |PyPI| image:: https://img.shields.io/pypi/v/datachain.svg
@@ -13,7 +17,6 @@
13
17
  :target: https://github.com/iterative/datachain/actions/workflows/tests.yml
14
18
  :alt: Tests
15
19
 
16
- AI 🔗 DataChain
17
20
  ----------------
18
21
 
19
22
  DataChain is a modern Pythonic data-frame library designed for artificial intelligence.
@@ -0,0 +1,24 @@
1
+ <svg width="33" height="33" viewBox="0 0 33 33" fill="none" xmlns="http://www.w3.org/2000/svg">
2
+ <path fill-rule="evenodd" clip-rule="evenodd" d="M18.7492 22.785L20.5786 20.9554L23.0316 23.4046L21.1898 25.2343C20.379 26.0444 19.2798 26.4994 18.1338 26.4994C16.9878 26.4994 15.8887 26.0444 15.0779 25.2343L11.4025 21.5625C10.5926 20.7516 10.1377 19.6523 10.1377 18.5061C10.1377 17.3599 10.5926 16.2606 11.4025 15.4497L15.1222 11.7285L17.5628 14.182L13.8556 17.8906C13.7748 17.971 13.7106 18.0666 13.6668 18.1719C13.6231 18.2771 13.6005 18.39 13.6005 18.504C13.6005 18.618 13.6231 18.7309 13.6668 18.8362C13.7106 18.9414 13.7748 19.037 13.8556 19.1174L17.5268 22.785C17.689 22.9471 17.9088 23.0381 18.138 23.0381C18.3672 23.0381 18.587 22.9471 18.7492 22.785Z" fill="url(#paint0_linear_426_297)"/>
3
+ <path fill-rule="evenodd" clip-rule="evenodd" d="M29.4817 11.941C29.436 12.0491 29.3736 12.1406 29.2988 12.2196L20.5124 21.0074L22.9571 23.4567L31.7352 14.673C32.5451 13.8621 33 12.7628 33 11.6166C33 10.4704 32.5451 9.3711 31.7352 8.5602L28.064 4.88419C27.2532 4.07415 26.1541 3.61914 25.0081 3.61914C23.8621 3.61914 22.7629 4.07415 21.9521 4.88419L19.3906 7.43354L21.8395 9.88282L24.3927 7.32932C24.5549 7.16731 24.7747 7.07631 25.0039 7.07631C25.2331 7.07631 25.4529 7.16731 25.6151 7.32932L29.2822 10.997C29.404 11.1177 29.4873 11.2718 29.5213 11.4399C29.5554 11.608 29.5387 11.7824 29.4734 11.941H29.4776H29.4817Z" fill="url(#paint1_linear_426_297)"/>
4
+ <path d="M19.1743 15.1218L17.3446 16.9511L17.3446 16.9636L15.4656 18.8289L17.919 21.2778L21.6235 17.5665C22.4336 16.7557 22.8886 15.6566 22.8886 14.5106C22.8886 13.3646 22.4336 12.2654 21.6235 11.4547L17.9475 7.77926C17.1366 6.96935 16.0373 6.51442 14.8911 6.51442C13.7449 6.51442 12.6456 6.96935 11.8347 7.77926L7.18188 12.4318L9.63532 14.8765L14.2799 10.2323C14.442 10.0703 14.6619 9.97933 14.8911 9.97933C15.1204 9.97933 15.3402 10.0703 15.5024 10.2323L19.1743 13.8994C19.3363 14.0615 19.4273 14.2814 19.4273 14.5106C19.4273 14.7398 19.3363 14.9596 19.1743 15.1218Z" fill="url(#paint2_linear_426_297)"/>
5
+ <path d="M3.52721 21.0699C3.56879 20.966 3.63532 20.8703 3.71433 20.7913L5.54818 18.9578L5.54402 18.9495L7.99746 16.5048L9.6749 14.8364L7.22562 12.3834L1.26505 18.3508C0.455006 19.1615 -3.99616e-07 20.2607 -3.49523e-07 21.4067C-2.9943e-07 22.5527 0.455006 23.6518 1.26505 24.4626L4.94105 28.138C5.75196 28.9479 6.85127 29.4028 7.99746 29.4028C9.14364 29.4028 10.243 28.9479 11.0539 28.138L13.5353 25.6527L11.086 23.208L8.6129 25.6808C8.53251 25.7616 8.43695 25.8258 8.33168 25.8695C8.22642 25.9133 8.11354 25.9358 7.99954 25.9358C7.88553 25.9358 7.77265 25.9133 7.66739 25.8695C7.56213 25.8258 7.46656 25.7616 7.38618 25.6808L3.71433 22.0137C3.59316 21.8926 3.51069 21.7383 3.47737 21.5702C3.44406 21.4022 3.4614 21.2281 3.52721 21.0699Z" fill="url(#paint3_linear_426_297)"/>
6
+ <defs>
7
+ <linearGradient id="paint0_linear_426_297" x1="28.1527" y1="7.84149" x2="14.1164" y2="21.6319" gradientUnits="userSpaceOnUse">
8
+ <stop stop-color="#F46837"/>
9
+ <stop offset="1" stop-color="#945DD6"/>
10
+ </linearGradient>
11
+ <linearGradient id="paint1_linear_426_297" x1="28.1626" y1="8.00042" x2="14.1164" y2="21.8731" gradientUnits="userSpaceOnUse">
12
+ <stop stop-color="#F46837"/>
13
+ <stop offset="1" stop-color="#945DD6"/>
14
+ </linearGradient>
15
+ <linearGradient id="paint2_linear_426_297" x1="4.62869" y1="25.0522" x2="18.6605" y2="11.619" gradientUnits="userSpaceOnUse">
16
+ <stop stop-color="#13ADC7"/>
17
+ <stop offset="1" stop-color="#945DD6"/>
18
+ </linearGradient>
19
+ <linearGradient id="paint3_linear_426_297" x1="4.50795" y1="25.1728" x2="18.4191" y2="11.2572" gradientUnits="userSpaceOnUse">
20
+ <stop stop-color="#13ADC7"/>
21
+ <stop offset="1" stop-color="#945DD6"/>
22
+ </linearGradient>
23
+ </defs>
24
+ </svg>
@@ -0,0 +1,33 @@
1
+ <svg width="180" height="33" viewBox="0 0 180 33" fill="none" xmlns="http://www.w3.org/2000/svg">
2
+ <path fill-rule="evenodd" clip-rule="evenodd" d="M23.9965 24.5299L26.3379 22.1881L29.4775 25.3229L27.1201 27.6647C26.0825 28.7015 24.6757 29.2838 23.209 29.2838C21.7422 29.2838 20.3355 28.7015 19.2978 27.6647L14.5937 22.9652C13.5571 21.9273 12.9749 20.5203 12.9749 19.0534C12.9749 17.5864 13.5571 16.1794 14.5937 15.1415L19.3544 10.3789L22.4781 13.519L17.7333 18.2657C17.6298 18.3686 17.5477 18.4909 17.4917 18.6256C17.4357 18.7603 17.4069 18.9048 17.4069 19.0507C17.4069 19.1966 17.4357 19.3411 17.4917 19.4758C17.5477 19.6105 17.6298 19.7329 17.7333 19.8357L22.432 24.5299C22.6396 24.7373 22.9209 24.8537 23.2143 24.8537C23.5076 24.8537 23.789 24.7373 23.9965 24.5299Z" fill="url(#paint0_linear_449_28)"/>
3
+ <path fill-rule="evenodd" clip-rule="evenodd" d="M37.733 10.6509C37.6745 10.7893 37.5947 10.9064 37.4989 11.0075L26.2534 22.2549L29.3823 25.3897L40.6172 14.1476C41.6538 13.1097 42.236 11.7027 42.236 10.2358C42.236 8.76879 41.6538 7.36181 40.6172 6.32395L35.9184 1.61911C34.8807 0.582353 33.474 0 32.0072 0C30.5405 0 29.1338 0.582353 28.0961 1.61911L24.8176 4.88196L27.9519 8.01674L31.2197 4.74857C31.4272 4.54122 31.7086 4.42475 32.0019 4.42475C32.2953 4.42475 32.5766 4.54122 32.7842 4.74857L37.4776 9.44276C37.6335 9.59718 37.7401 9.79449 37.7837 10.0096C37.8273 10.2247 37.8059 10.4479 37.7224 10.6509H37.7277H37.733Z" fill="url(#paint1_linear_449_28)"/>
4
+ <path d="M24.5407 14.722L22.1989 17.0634L22.1989 17.0794L19.794 19.4668L22.9341 22.6011L27.6755 17.851C28.7122 16.8133 29.2946 15.4065 29.2946 13.9398C29.2946 12.4731 28.7122 11.0663 27.6754 10.0286L22.9706 5.32455C21.9328 4.28796 20.5258 3.7057 19.0588 3.7057C17.5918 3.7057 16.1848 4.28796 15.147 5.32455L9.19189 11.2792L12.332 14.4082L18.2764 8.46415C18.484 8.25683 18.7654 8.14037 19.0588 8.14037C19.3522 8.14037 19.6336 8.25683 19.8412 8.46415L24.5407 13.1576C24.748 13.3651 24.8645 13.6465 24.8645 13.9398C24.8645 14.2332 24.748 14.5145 24.5407 14.722Z" fill="url(#paint2_linear_449_28)"/>
5
+ <path d="M4.51439 22.3349C4.56762 22.2018 4.65277 22.0794 4.75389 21.9783L7.10099 19.6316L7.09567 19.621L10.2358 16.492L12.3827 14.3568L9.24791 11.2172L1.61911 18.8547C0.582352 19.8924 -5.1146e-07 21.2992 -4.47347e-07 22.7659C-3.83234e-07 24.2326 0.582353 25.6394 1.61911 26.6771L6.32395 31.3812C7.36181 32.4177 8.76879 33 10.2358 33C11.7027 33 13.1097 32.4177 14.1476 31.3812L17.3235 28.2003L14.1887 25.0714L11.0235 28.2362C10.9206 28.3397 10.7983 28.4218 10.6635 28.4778C10.5288 28.5338 10.3843 28.5627 10.2384 28.5627C10.0925 28.5627 9.94805 28.5338 9.81333 28.4778C9.6786 28.4218 9.55628 28.3397 9.45341 28.2362L4.75389 23.5428C4.59881 23.3878 4.49325 23.1903 4.45062 22.9752C4.40798 22.7602 4.43017 22.5373 4.51439 22.3349Z" fill="url(#paint3_linear_449_28)"/>
6
+ <path d="M55.6446 26.6131C54.6515 26.6131 53.7368 26.4306 52.9005 26.0657C52.0643 25.7008 51.3413 25.1882 50.7315 24.528C50.1217 23.8677 49.6513 23.1031 49.3203 22.2343C48.9892 21.3481 48.8237 20.3925 48.8237 19.3673V18.8199C48.8237 17.8121 48.9805 16.8651 49.2941 15.979C49.6252 15.0928 50.0781 14.3195 50.6531 13.6593C51.2454 12.999 51.951 12.4864 52.7699 12.1215C53.6061 11.7392 54.5295 11.5481 55.54 11.5481C56.655 11.5481 57.6307 11.7913 58.467 12.2779C59.3207 12.747 60.0001 13.4594 60.5054 14.4151C61.0106 15.3708 61.2894 16.5697 61.3416 18.0119L60.2702 16.7609V7.11719H63.9027V26.1439H61.028V20.1231H61.6552C61.603 21.5653 61.3068 22.773 60.7667 23.746C60.2266 24.7017 59.5123 25.4228 58.6238 25.9094C57.7527 26.3785 56.7596 26.6131 55.6446 26.6131ZM56.4547 23.5636C57.169 23.5636 57.8223 23.4072 58.4147 23.0944C59.0071 22.7643 59.4775 22.2951 59.8259 21.687C60.1918 21.0614 60.3747 20.3316 60.3747 19.4976V18.455C60.3747 17.621 60.1918 16.9259 59.8259 16.3699C59.46 15.7965 58.9809 15.3621 58.3886 15.0667C57.7962 14.7539 57.1516 14.5976 56.4547 14.5976C55.6707 14.5976 54.9738 14.7887 54.364 15.171C53.7717 15.5359 53.3013 16.0571 52.9528 16.7348C52.6218 17.4125 52.4563 18.2031 52.4563 19.1066C52.4563 20.0102 52.6305 20.8008 52.9789 21.4785C53.3274 22.1388 53.7978 22.6513 54.3902 23.0162C54.9999 23.3811 55.6881 23.5636 56.4547 23.5636Z" fill="white"/>
7
+ <path d="M76.6354 26.1439V21.9476H76.0343V17.2822C76.0343 16.4655 75.834 15.8573 75.4333 15.4577C75.0326 15.058 74.4141 14.8582 73.5778 14.8582C73.1422 14.8582 72.6196 14.8669 72.0098 14.8843C71.4 14.9016 70.7815 14.9277 70.1543 14.9625C69.5445 14.9798 68.9957 15.0059 68.5079 15.0406V11.9651C68.9086 11.9303 69.3616 11.8956 69.8669 11.8608C70.3721 11.8261 70.8861 11.8087 71.4087 11.8087C71.9488 11.7913 72.4541 11.7826 72.9245 11.7826C74.3879 11.7826 75.5988 11.9738 76.557 12.3561C77.5327 12.7383 78.2644 13.3378 78.7522 14.1545C79.2575 14.9711 79.5101 16.0398 79.5101 17.3603V26.1439H76.6354ZM72.0621 26.5088C71.0342 26.5088 70.1282 26.3264 69.3442 25.9615C68.5776 25.5966 67.9765 25.0753 67.541 24.3976C67.1228 23.72 66.9138 22.9033 66.9138 21.9476C66.9138 20.9051 67.1664 20.0536 67.6716 19.3933C68.1943 18.7331 68.9173 18.2378 69.8407 17.9077C70.7815 17.5775 71.8791 17.4125 73.1335 17.4125H76.4263V19.5758H73.0813C72.245 19.5758 71.6004 19.7843 71.1474 20.2013C70.7118 20.601 70.4941 21.1223 70.4941 21.7652C70.4941 22.4081 70.7118 22.9294 71.1474 23.329C71.6004 23.7287 72.245 23.9285 73.0813 23.9285C73.5865 23.9285 74.0482 23.8416 74.4663 23.6678C74.9019 23.4767 75.2591 23.1639 75.5378 22.7295C75.834 22.2778 75.9995 21.6696 76.0343 20.9051L76.9229 21.9216C76.8358 22.912 76.5919 23.746 76.1911 24.4237C75.8079 25.1014 75.2678 25.6226 74.5709 25.9875C73.8914 26.3351 73.0551 26.5088 72.0621 26.5088Z" fill="white"/>
8
+ <path d="M89.5686 26.3264C88.2619 26.3264 87.1818 26.1613 86.3281 25.8312C85.4918 25.4836 84.8646 24.9102 84.4465 24.1109C84.0283 23.2943 83.8193 22.1996 83.8193 20.8269L83.8454 8.00336H87.2427L87.2166 21.0614C87.2166 21.7565 87.3995 22.2951 87.7654 22.6774C88.1487 23.0423 88.6888 23.2248 89.3857 23.2248H91.607V26.3264H89.5686ZM81.5718 14.6497V11.9912H91.607V14.6497H81.5718Z" fill="white"/>
9
+ <path d="M103.203 26.1439V21.9476H102.602V17.2822C102.602 16.4655 102.401 15.8573 102.001 15.4577C101.6 15.058 100.981 14.8582 100.145 14.8582C99.7095 14.8582 99.1869 14.8669 98.5771 14.8843C97.9673 14.9016 97.3488 14.9277 96.7216 14.9625C96.1118 14.9798 95.563 15.0059 95.0752 15.0406V11.9651C95.4759 11.9303 95.9289 11.8956 96.4341 11.8608C96.9394 11.8261 97.4533 11.8087 97.976 11.8087C98.5161 11.7913 99.0214 11.7826 99.4918 11.7826C100.955 11.7826 102.166 11.9738 103.124 12.3561C104.1 12.7383 104.832 13.3378 105.32 14.1545C105.825 14.9711 106.077 16.0398 106.077 17.3603V26.1439H103.203ZM98.6293 26.5088C97.6014 26.5088 96.6955 26.3264 95.9115 25.9615C95.1449 25.5966 94.5438 25.0753 94.1083 24.3976C93.6901 23.72 93.4811 22.9033 93.4811 21.9476C93.4811 20.9051 93.7337 20.0536 94.2389 19.3933C94.7616 18.7331 95.4846 18.2378 96.408 17.9077C97.3488 17.5775 98.4464 17.4125 99.7008 17.4125H102.994V19.5758H99.6486C98.8123 19.5758 98.1677 19.7843 97.7147 20.2013C97.2791 20.601 97.0613 21.1223 97.0613 21.7652C97.0613 22.4081 97.2791 22.9294 97.7147 23.329C98.1677 23.7287 98.8123 23.9285 99.6486 23.9285C100.154 23.9285 100.615 23.8416 101.034 23.6678C101.469 23.4767 101.826 23.1639 102.105 22.7295C102.401 22.2778 102.567 21.6696 102.602 20.9051L103.49 21.9216C103.403 22.912 103.159 23.746 102.758 24.4237C102.375 25.1014 101.835 25.6226 101.138 25.9875C100.459 26.3351 99.6224 26.5088 98.6293 26.5088Z" fill="white"/>
10
+ <path d="M116.267 26.6391C115.03 26.6391 113.958 26.4306 113.052 26.0136C112.164 25.5966 111.414 25.0405 110.805 24.3455C110.212 23.6505 109.768 22.8685 109.472 21.9997C109.176 21.1309 109.028 20.2448 109.028 19.3412V18.846C109.028 17.8903 109.176 16.9781 109.472 16.1093C109.785 15.2231 110.247 14.4412 110.857 13.7635C111.467 13.0685 112.216 12.5211 113.104 12.1215C114.01 11.7045 115.056 11.4959 116.24 11.4959C117.477 11.4959 118.584 11.7392 119.559 12.2257C120.535 12.6949 121.31 13.3552 121.885 14.2066C122.478 15.058 122.809 16.0485 122.878 17.1779H119.35C119.263 16.4481 118.95 15.8399 118.409 15.3534C117.887 14.8669 117.164 14.6236 116.24 14.6236C115.439 14.6236 114.768 14.8148 114.228 15.197C113.705 15.5793 113.313 16.1093 113.052 16.7869C112.791 17.4472 112.66 18.2118 112.66 19.0806C112.66 19.9146 112.782 20.6705 113.026 21.3481C113.287 22.0258 113.679 22.5558 114.202 22.9381C114.742 23.3203 115.43 23.5115 116.267 23.5115C116.894 23.5115 117.434 23.3985 117.887 23.1726C118.34 22.9467 118.697 22.634 118.958 22.2343C119.237 21.8347 119.411 21.3829 119.481 20.879H123.009C122.939 22.0258 122.6 23.0336 121.99 23.9024C121.397 24.7538 120.605 25.4228 119.612 25.9094C118.636 26.3959 117.521 26.6391 116.267 26.6391Z" fill="white"/>
11
+ <path d="M125.919 26.1439V7.11719H129.552V18.2205H128.924C128.924 16.7956 129.107 15.588 129.473 14.5976C129.839 13.6071 130.379 12.8513 131.093 12.33C131.825 11.8087 132.749 11.5481 133.864 11.5481H134.02C135.641 11.5481 136.869 12.1041 137.705 13.2162C138.541 14.3282 138.96 15.9442 138.96 18.0641V26.1439H135.327V17.7252C135.327 16.8217 135.066 16.1093 134.543 15.588C134.038 15.0667 133.367 14.8061 132.531 14.8061C131.642 14.8061 130.919 15.1015 130.362 15.6923C129.822 16.2657 129.552 17.0215 129.552 17.9598V26.1439H125.919Z" fill="white"/>
12
+ <path d="M151.463 26.1439V21.9476H150.862V17.2822C150.862 16.4655 150.661 15.8573 150.261 15.4577C149.86 15.058 149.241 14.8582 148.405 14.8582C147.97 14.8582 147.447 14.8669 146.837 14.8843C146.227 14.9016 145.609 14.9277 144.982 14.9625C144.372 14.9798 143.823 15.0059 143.335 15.0406V11.9651C143.736 11.9303 144.189 11.8956 144.694 11.8608C145.199 11.8261 145.713 11.8087 146.236 11.8087C146.776 11.7913 147.281 11.7826 147.752 11.7826C149.215 11.7826 150.426 11.9738 151.384 12.3561C152.36 12.7383 153.092 13.3378 153.58 14.1545C154.085 14.9711 154.337 16.0398 154.337 17.3603V26.1439H151.463ZM146.889 26.5088C145.862 26.5088 144.956 26.3264 144.172 25.9615C143.405 25.5966 142.804 25.0753 142.368 24.3976C141.95 23.72 141.741 22.9033 141.741 21.9476C141.741 20.9051 141.994 20.0536 142.499 19.3933C143.022 18.7331 143.745 18.2378 144.668 17.9077C145.609 17.5775 146.706 17.4125 147.961 17.4125H151.254V19.5758H147.909C147.072 19.5758 146.428 19.7843 145.975 20.2013C145.539 20.601 145.321 21.1223 145.321 21.7652C145.321 22.4081 145.539 22.9294 145.975 23.329C146.428 23.7287 147.072 23.9285 147.909 23.9285C148.414 23.9285 148.876 23.8416 149.294 23.6678C149.729 23.4767 150.086 23.1639 150.365 22.7295C150.661 22.2778 150.827 21.6696 150.862 20.9051L151.75 21.9216C151.663 22.912 151.419 23.746 151.019 24.4237C150.635 25.1014 150.095 25.6226 149.398 25.9875C148.719 26.3351 147.882 26.5088 146.889 26.5088Z" fill="white"/>
13
+ <path d="M158.908 26.1439V11.9912H162.54V26.1439H158.908ZM156.922 14.7018V11.9912H162.54V14.7018H156.922ZM160.241 10.297C159.526 10.297 158.995 10.1145 158.647 9.74965C158.316 9.36738 158.15 8.88954 158.15 8.31613C158.15 7.74272 158.316 7.27357 158.647 6.90867C158.995 6.54378 159.526 6.36133 160.241 6.36133C160.955 6.36133 161.478 6.54378 161.809 6.90867C162.14 7.27357 162.305 7.74272 162.305 8.31613C162.305 8.88954 162.14 9.36738 161.809 9.74965C161.478 10.1145 160.955 10.297 160.241 10.297Z" fill="white"/>
14
+ <path d="M166.727 26.1439V11.9912H169.602V18.0641H169.34C169.34 16.6219 169.532 15.4229 169.915 14.4672C170.299 13.4942 170.865 12.7644 171.614 12.2779C172.38 11.7913 173.33 11.5481 174.462 11.5481H174.619C176.309 11.5481 177.59 12.0954 178.461 13.1901C179.332 14.2674 179.768 15.8834 179.768 18.038V26.1439H176.135V17.7252C176.135 16.8564 175.882 16.1527 175.377 15.6141C174.889 15.0754 174.21 14.8061 173.339 14.8061C172.45 14.8061 171.727 15.0841 171.17 15.6401C170.63 16.1788 170.359 16.9086 170.359 17.8295V26.1439H166.727Z" fill="white"/>
15
+ <defs>
16
+ <linearGradient id="paint0_linear_449_28" x1="36.0319" y1="5.40399" x2="18.0672" y2="23.0541" gradientUnits="userSpaceOnUse">
17
+ <stop stop-color="#F46837"/>
18
+ <stop offset="1" stop-color="#945DD6"/>
19
+ </linearGradient>
20
+ <linearGradient id="paint1_linear_449_28" x1="36.0447" y1="5.6075" x2="18.0673" y2="23.3629" gradientUnits="userSpaceOnUse">
21
+ <stop stop-color="#F46837"/>
22
+ <stop offset="1" stop-color="#945DD6"/>
23
+ </linearGradient>
24
+ <linearGradient id="paint2_linear_449_28" x1="5.92412" y1="27.4317" x2="23.8831" y2="10.239" gradientUnits="userSpaceOnUse">
25
+ <stop stop-color="#13ADC7"/>
26
+ <stop offset="1" stop-color="#945DD6"/>
27
+ </linearGradient>
28
+ <linearGradient id="paint3_linear_449_28" x1="5.76963" y1="27.5861" x2="23.5741" y2="9.77574" gradientUnits="userSpaceOnUse">
29
+ <stop stop-color="#13ADC7"/>
30
+ <stop offset="1" stop-color="#945DD6"/>
31
+ </linearGradient>
32
+ </defs>
33
+ </svg>
@@ -1,4 +1,4 @@
1
- # 🔗 DataChain Getting Started
1
+ # Get Started with DataChain
2
2
 
3
3
  🔨Wrangle unstructured AI data at scale
4
4
 
@@ -31,7 +31,7 @@ def path_len_benchmark(path):
31
31
 
32
32
  # Run in chain
33
33
  DataChain.from_storage(
34
- path="gs://datachain-demo/dogs-and-cats/",
34
+ "gs://datachain-demo/dogs-and-cats/",
35
35
  ).settings(parallel=-1).map(
36
36
  path_len_benchmark,
37
37
  params=["file.path"],
@@ -11,7 +11,7 @@ def path_len(path):
11
11
  if __name__ == "__main__":
12
12
  # Run in chain
13
13
  DataChain.from_storage(
14
- path="gs://datachain-demo/dogs-and-cats/",
14
+ uri="gs://datachain-demo/dogs-and-cats/",
15
15
  ).map(
16
16
  path_len,
17
17
  params=["file.path"],
@@ -4,22 +4,23 @@ from torch.nn.functional import cosine_similarity
4
4
  from torch.utils.data import DataLoader
5
5
 
6
6
  from datachain import C, DataChain
7
+ from datachain.sql.functions import path
7
8
 
8
9
  source = "gs://datachain-demo/50k-laion-files/000000/00000000*"
9
10
 
10
11
 
11
12
  def create_dataset():
12
- imgs = (
13
- DataChain.from_storage(source, type="image")
14
- .filter(C("file.path").glob("*.jpg"))
15
- .map(stem=lambda file: file.get_file_stem(), params=["file"], output=str)
13
+ imgs = DataChain.from_storage(source, type="image").filter(
14
+ C("file.path").glob("*.jpg")
16
15
  )
17
- captions = (
18
- DataChain.from_storage(source, type="text")
19
- .filter(C("file.path").glob("*.txt"))
20
- .map(stem=lambda file: file.get_file_stem(), params=["file"], output=str)
16
+ captions = DataChain.from_storage(source, type="text").filter(
17
+ C("file.path").glob("*.txt")
18
+ )
19
+ return imgs.merge(
20
+ captions,
21
+ on=path.file_stem(imgs.c("file.path")),
22
+ right_on=path.file_stem(captions.c("file.path")),
21
23
  )
22
- return imgs.merge(captions, on="stem")
23
24
 
24
25
 
25
26
  if __name__ == "__main__":
@@ -1,6 +1,6 @@
1
1
  import os
2
2
 
3
- from datachain import C, DataChain
3
+ from datachain import DataChain
4
4
  from datachain.lib.webdataset import process_webdataset
5
5
  from datachain.lib.webdataset_laion import WDSLaion, process_laion_meta
6
6
  from datachain.sql.functions import path
@@ -25,21 +25,20 @@ wds_with_pq = (
25
25
  DataChain.from_parquet(PARQUET_METADATA)
26
26
  .settings(cache=True)
27
27
  .merge(wds_images, on="uid", right_on="laion.json.uid", inner=True)
28
- .mutate(stem=path.file_stem(C("source.file.path")))
29
28
  )
30
29
 
31
- res = (
30
+ wds_npz = (
32
31
  DataChain.from_storage(NPZ_METADATA)
33
32
  .settings(cache=True)
34
33
  .gen(emd=process_laion_meta)
35
- .mutate(stem=path.file_stem(C("emd.file.path")))
36
- .merge(
37
- wds_with_pq,
38
- on=["stem", "emd.index"],
39
- right_on=["stem", "source.index"],
40
- inner=True,
41
- )
42
- .save("wds")
43
34
  )
44
35
 
36
+
37
+ res = wds_npz.merge(
38
+ wds_with_pq,
39
+ on=[path.file_stem(wds_npz.c("emd.file.path")), "emd.index"],
40
+ right_on=[path.file_stem(wds_with_pq.c("source.file.path")), "source.index"],
41
+ inner=True,
42
+ ).save("wds")
43
+
45
44
  res.show(5)
@@ -1,4 +1,4 @@
1
- site_name: DataChain
1
+ site_name: ''
2
2
  site_url: https://datachain.dvc.ai
3
3
  site_description: Wrangle unstructured AI data at scale
4
4
 
@@ -15,8 +15,8 @@ validation:
15
15
 
16
16
  theme:
17
17
  name: material
18
- logo: assets/datachain.png
19
- favicon: assets/datachain.png
18
+ logo: assets/datachain_logotype.svg
19
+ favicon: assets/datachain.svg
20
20
  icon:
21
21
  repo: fontawesome/brands/github
22
22
  features:
@@ -45,7 +45,8 @@ dependencies = [
45
45
  "datamodel-code-generator>=0.25",
46
46
  "Pillow>=10.0.0,<11",
47
47
  "msgpack>=1.0.4,<2",
48
- "psutil"
48
+ "psutil",
49
+ "huggingface_hub"
49
50
  ]
50
51
 
51
52
  [project.optional-dependencies]
@@ -71,7 +72,7 @@ vector = [
71
72
  ]
72
73
  hf = [
73
74
  "numba>=0.60.0",
74
- "datasets[audio,vision]"
75
+ "datasets[audio,vision]>=2.21.0"
75
76
  ]
76
77
  tests = [
77
78
  "datachain[torch,remote,vector,hf]",