datachain 0.8.10__tar.gz → 0.8.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (314) hide show
  1. {datachain-0.8.10 → datachain-0.8.12}/.github/workflows/tests.yml +6 -14
  2. {datachain-0.8.10 → datachain-0.8.12}/.pre-commit-config.yaml +2 -2
  3. {datachain-0.8.10 → datachain-0.8.12}/PKG-INFO +3 -7
  4. {datachain-0.8.10 → datachain-0.8.12}/docs/overrides/main.html +10 -0
  5. datachain-0.8.12/docs/references/func.md +5 -0
  6. {datachain-0.8.10 → datachain-0.8.12}/docs/references/index.md +1 -1
  7. {datachain-0.8.10 → datachain-0.8.12}/examples/llm_and_nlp/hf-dataset-llm-eval.py +15 -9
  8. {datachain-0.8.10 → datachain-0.8.12}/mkdocs.yml +1 -1
  9. {datachain-0.8.10 → datachain-0.8.12}/pyproject.toml +2 -6
  10. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cache.py +4 -4
  11. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/catalog/__init__.py +0 -2
  12. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/catalog/catalog.py +103 -158
  13. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/__init__.py +7 -14
  14. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/commands/__init__.py +0 -2
  15. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/commands/datasets.py +0 -19
  16. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/parser/__init__.py +27 -41
  17. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/parser/studio.py +7 -6
  18. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/parser/utils.py +18 -0
  19. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/client/fsspec.py +11 -8
  20. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/client/local.py +4 -4
  21. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/data_storage/schema.py +1 -1
  22. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/dataset.py +1 -7
  23. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/error.py +12 -0
  24. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/func/__init__.py +2 -1
  25. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/func/conditional.py +77 -26
  26. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/func/func.py +17 -6
  27. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/dc.py +24 -4
  28. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/file.py +16 -0
  29. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/listing.py +30 -12
  30. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/pytorch.py +1 -1
  31. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/udf.py +1 -1
  32. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/listing.py +1 -13
  33. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/node.py +0 -15
  34. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/nodes_fetcher.py +2 -2
  35. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/remote/studio.py +2 -14
  36. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/studio.py +1 -1
  37. {datachain-0.8.10 → datachain-0.8.12}/src/datachain.egg-info/PKG-INFO +3 -7
  38. {datachain-0.8.10 → datachain-0.8.12}/src/datachain.egg-info/SOURCES.txt +6 -4
  39. {datachain-0.8.10 → datachain-0.8.12}/src/datachain.egg-info/requires.txt +2 -6
  40. {datachain-0.8.10 → datachain-0.8.12}/tests/func/test_catalog.py +59 -391
  41. {datachain-0.8.10/tests/unit → datachain-0.8.12/tests/func}/test_client.py +87 -24
  42. {datachain-0.8.10/tests/unit → datachain-0.8.12/tests/func}/test_data_storage.py +1 -52
  43. {datachain-0.8.10 → datachain-0.8.12}/tests/func/test_datachain.py +96 -6
  44. datachain-0.8.12/tests/func/test_datachain_merge.py +101 -0
  45. {datachain-0.8.10 → datachain-0.8.12}/tests/func/test_datasets.py +4 -4
  46. datachain-0.8.12/tests/func/test_file.py +65 -0
  47. datachain-0.8.12/tests/func/test_hf.py +50 -0
  48. datachain-0.8.12/tests/func/test_listing.py +64 -0
  49. {datachain-0.8.10 → datachain-0.8.12}/tests/func/test_pull.py +0 -32
  50. {datachain-0.8.10 → datachain-0.8.12}/tests/func/test_pytorch.py +14 -11
  51. {datachain-0.8.10 → datachain-0.8.12}/tests/func/test_query.py +79 -0
  52. datachain-0.8.12/tests/func/test_warehouse.py +6 -0
  53. {datachain-0.8.10 → datachain-0.8.12}/tests/scripts/name_len_slow.py +1 -1
  54. {datachain-0.8.10 → datachain-0.8.12}/tests/test_cli_studio.py +1 -1
  55. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_datachain.py +51 -100
  56. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_datachain_bootstrap.py +0 -30
  57. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_datachain_merge.py +0 -97
  58. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_diff.py +94 -107
  59. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_file.py +0 -42
  60. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_hf.py +1 -44
  61. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/sql/test_conditional.py +31 -1
  62. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_cache.py +4 -4
  63. datachain-0.8.12/tests/unit/test_client.py +33 -0
  64. datachain-0.8.12/tests/unit/test_data_storage.py +77 -0
  65. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_func.py +119 -0
  66. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_listing.py +0 -36
  67. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_pytorch.py +3 -3
  68. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_query.py +16 -1
  69. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_warehouse.py +0 -8
  70. datachain-0.8.10/docs/references/sql.md +0 -18
  71. datachain-0.8.10/examples/llm_and_nlp/unstructured-embeddings-gen.py +0 -78
  72. datachain-0.8.10/examples/llm_and_nlp/unstructured-summary-map.py +0 -67
  73. datachain-0.8.10/tests/func/test_client.py +0 -93
  74. datachain-0.8.10/tests/func/test_listing.py +0 -27
  75. datachain-0.8.10/tests/unit/test_diff.py +0 -70
  76. {datachain-0.8.10 → datachain-0.8.12}/.cruft.json +0 -0
  77. {datachain-0.8.10 → datachain-0.8.12}/.gitattributes +0 -0
  78. {datachain-0.8.10 → datachain-0.8.12}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  79. {datachain-0.8.10 → datachain-0.8.12}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  80. {datachain-0.8.10 → datachain-0.8.12}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  81. {datachain-0.8.10 → datachain-0.8.12}/.github/codecov.yaml +0 -0
  82. {datachain-0.8.10 → datachain-0.8.12}/.github/dependabot.yml +0 -0
  83. {datachain-0.8.10 → datachain-0.8.12}/.github/workflows/benchmarks.yml +0 -0
  84. {datachain-0.8.10 → datachain-0.8.12}/.github/workflows/release.yml +0 -0
  85. {datachain-0.8.10 → datachain-0.8.12}/.github/workflows/tests-studio.yml +0 -0
  86. {datachain-0.8.10 → datachain-0.8.12}/.github/workflows/update-template.yaml +0 -0
  87. {datachain-0.8.10 → datachain-0.8.12}/.gitignore +0 -0
  88. {datachain-0.8.10 → datachain-0.8.12}/CODE_OF_CONDUCT.rst +0 -0
  89. {datachain-0.8.10 → datachain-0.8.12}/LICENSE +0 -0
  90. {datachain-0.8.10 → datachain-0.8.12}/README.rst +0 -0
  91. {datachain-0.8.10 → datachain-0.8.12}/docs/assets/captioned_cartoons.png +0 -0
  92. {datachain-0.8.10 → datachain-0.8.12}/docs/assets/datachain-white.svg +0 -0
  93. {datachain-0.8.10 → datachain-0.8.12}/docs/assets/datachain.svg +0 -0
  94. {datachain-0.8.10 → datachain-0.8.12}/docs/contributing.md +0 -0
  95. {datachain-0.8.10 → datachain-0.8.12}/docs/css/github-permalink-style.css +0 -0
  96. {datachain-0.8.10 → datachain-0.8.12}/docs/examples.md +0 -0
  97. {datachain-0.8.10 → datachain-0.8.12}/docs/index.md +0 -0
  98. {datachain-0.8.10 → datachain-0.8.12}/docs/quick-start.md +0 -0
  99. {datachain-0.8.10 → datachain-0.8.12}/docs/references/datachain.md +0 -0
  100. {datachain-0.8.10 → datachain-0.8.12}/docs/references/datatype.md +0 -0
  101. {datachain-0.8.10 → datachain-0.8.12}/docs/references/file.md +0 -0
  102. {datachain-0.8.10 → datachain-0.8.12}/docs/references/torch.md +0 -0
  103. {datachain-0.8.10 → datachain-0.8.12}/docs/references/udf.md +0 -0
  104. {datachain-0.8.10 → datachain-0.8.12}/docs/tutorials.md +0 -0
  105. {datachain-0.8.10 → datachain-0.8.12}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  106. {datachain-0.8.10 → datachain-0.8.12}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  107. {datachain-0.8.10 → datachain-0.8.12}/examples/computer_vision/openimage-detect.py +0 -0
  108. {datachain-0.8.10 → datachain-0.8.12}/examples/computer_vision/ultralytics-bbox.py +0 -0
  109. {datachain-0.8.10 → datachain-0.8.12}/examples/computer_vision/ultralytics-pose.py +0 -0
  110. {datachain-0.8.10 → datachain-0.8.12}/examples/computer_vision/ultralytics-segment.py +0 -0
  111. {datachain-0.8.10 → datachain-0.8.12}/examples/get_started/common_sql_functions.py +0 -0
  112. {datachain-0.8.10 → datachain-0.8.12}/examples/get_started/json-csv-reader.py +0 -0
  113. {datachain-0.8.10 → datachain-0.8.12}/examples/get_started/torch-loader.py +0 -0
  114. {datachain-0.8.10 → datachain-0.8.12}/examples/get_started/udfs/parallel.py +0 -0
  115. {datachain-0.8.10 → datachain-0.8.12}/examples/get_started/udfs/simple.py +0 -0
  116. {datachain-0.8.10 → datachain-0.8.12}/examples/get_started/udfs/stateful.py +0 -0
  117. {datachain-0.8.10 → datachain-0.8.12}/examples/llm_and_nlp/claude-query.py +0 -0
  118. {datachain-0.8.10 → datachain-0.8.12}/examples/multimodal/clip_inference.py +0 -0
  119. {datachain-0.8.10 → datachain-0.8.12}/examples/multimodal/hf_pipeline.py +0 -0
  120. {datachain-0.8.10 → datachain-0.8.12}/examples/multimodal/openai_image_desc_lib.py +0 -0
  121. {datachain-0.8.10 → datachain-0.8.12}/examples/multimodal/wds.py +0 -0
  122. {datachain-0.8.10 → datachain-0.8.12}/examples/multimodal/wds_filtered.py +0 -0
  123. {datachain-0.8.10 → datachain-0.8.12}/noxfile.py +0 -0
  124. {datachain-0.8.10 → datachain-0.8.12}/setup.cfg +0 -0
  125. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/__init__.py +0 -0
  126. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/__main__.py +0 -0
  127. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/asyn.py +0 -0
  128. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/catalog/datasource.py +0 -0
  129. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/catalog/loader.py +0 -0
  130. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/commands/du.py +0 -0
  131. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/commands/index.py +0 -0
  132. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/commands/ls.py +0 -0
  133. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/commands/misc.py +0 -0
  134. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/commands/query.py +0 -0
  135. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/commands/show.py +0 -0
  136. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/parser/job.py +0 -0
  137. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/cli/utils.py +0 -0
  138. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/client/__init__.py +0 -0
  139. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/client/azure.py +0 -0
  140. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/client/fileslice.py +0 -0
  141. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/client/gcs.py +0 -0
  142. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/client/hf.py +0 -0
  143. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/client/s3.py +0 -0
  144. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/config.py +0 -0
  145. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/data_storage/__init__.py +0 -0
  146. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/data_storage/db_engine.py +0 -0
  147. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/data_storage/job.py +0 -0
  148. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/data_storage/metastore.py +0 -0
  149. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/data_storage/serializer.py +0 -0
  150. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/data_storage/sqlite.py +0 -0
  151. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/data_storage/warehouse.py +0 -0
  152. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/diff/__init__.py +0 -0
  153. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/func/aggregate.py +0 -0
  154. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/func/array.py +0 -0
  155. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/func/base.py +0 -0
  156. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/func/numeric.py +0 -0
  157. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/func/path.py +0 -0
  158. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/func/random.py +0 -0
  159. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/func/string.py +0 -0
  160. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/func/window.py +0 -0
  161. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/job.py +0 -0
  162. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/__init__.py +0 -0
  163. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/arrow.py +0 -0
  164. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/clip.py +0 -0
  165. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/convert/__init__.py +0 -0
  166. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/convert/flatten.py +0 -0
  167. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/convert/python_to_sql.py +0 -0
  168. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/convert/sql_to_python.py +0 -0
  169. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/convert/unflatten.py +0 -0
  170. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  171. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/data_model.py +0 -0
  172. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/dataset_info.py +0 -0
  173. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/hf.py +0 -0
  174. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/image.py +0 -0
  175. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/listing_info.py +0 -0
  176. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/meta_formats.py +0 -0
  177. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/model_store.py +0 -0
  178. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/settings.py +0 -0
  179. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/signal_schema.py +0 -0
  180. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/tar.py +0 -0
  181. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/text.py +0 -0
  182. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/udf_signature.py +0 -0
  183. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/utils.py +0 -0
  184. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/vfile.py +0 -0
  185. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/webdataset.py +0 -0
  186. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/lib/webdataset_laion.py +0 -0
  187. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/model/__init__.py +0 -0
  188. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/model/bbox.py +0 -0
  189. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/model/pose.py +0 -0
  190. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/model/segment.py +0 -0
  191. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/model/ultralytics/__init__.py +0 -0
  192. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/model/ultralytics/bbox.py +0 -0
  193. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/model/ultralytics/pose.py +0 -0
  194. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/model/ultralytics/segment.py +0 -0
  195. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/nodes_thread_pool.py +0 -0
  196. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/progress.py +0 -0
  197. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/py.typed +0 -0
  198. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/query/__init__.py +0 -0
  199. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/query/batch.py +0 -0
  200. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/query/dataset.py +0 -0
  201. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/query/dispatch.py +0 -0
  202. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/query/metrics.py +0 -0
  203. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/query/params.py +0 -0
  204. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/query/queue.py +0 -0
  205. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/query/schema.py +0 -0
  206. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/query/session.py +0 -0
  207. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/query/udf.py +0 -0
  208. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/query/utils.py +0 -0
  209. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/remote/__init__.py +0 -0
  210. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/__init__.py +0 -0
  211. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/default/__init__.py +0 -0
  212. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/default/base.py +0 -0
  213. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/functions/__init__.py +0 -0
  214. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/functions/aggregate.py +0 -0
  215. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/functions/array.py +0 -0
  216. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/functions/conditional.py +0 -0
  217. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/functions/numeric.py +0 -0
  218. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/functions/path.py +0 -0
  219. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/functions/random.py +0 -0
  220. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/functions/string.py +0 -0
  221. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/selectable.py +0 -0
  222. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/sqlite/__init__.py +0 -0
  223. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/sqlite/base.py +0 -0
  224. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/sqlite/types.py +0 -0
  225. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/sqlite/vector.py +0 -0
  226. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/types.py +0 -0
  227. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/sql/utils.py +0 -0
  228. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/telemetry.py +0 -0
  229. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/toolkit/__init__.py +0 -0
  230. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/toolkit/split.py +0 -0
  231. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/torch/__init__.py +0 -0
  232. {datachain-0.8.10 → datachain-0.8.12}/src/datachain/utils.py +0 -0
  233. {datachain-0.8.10 → datachain-0.8.12}/src/datachain.egg-info/dependency_links.txt +0 -0
  234. {datachain-0.8.10 → datachain-0.8.12}/src/datachain.egg-info/entry_points.txt +0 -0
  235. {datachain-0.8.10 → datachain-0.8.12}/src/datachain.egg-info/top_level.txt +0 -0
  236. {datachain-0.8.10 → datachain-0.8.12}/tests/__init__.py +0 -0
  237. {datachain-0.8.10 → datachain-0.8.12}/tests/benchmarks/__init__.py +0 -0
  238. {datachain-0.8.10 → datachain-0.8.12}/tests/benchmarks/conftest.py +0 -0
  239. {datachain-0.8.10 → datachain-0.8.12}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  240. {datachain-0.8.10 → datachain-0.8.12}/tests/benchmarks/datasets/.dvc/config +0 -0
  241. {datachain-0.8.10 → datachain-0.8.12}/tests/benchmarks/datasets/.gitignore +0 -0
  242. {datachain-0.8.10 → datachain-0.8.12}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  243. {datachain-0.8.10 → datachain-0.8.12}/tests/benchmarks/test_datachain.py +0 -0
  244. {datachain-0.8.10 → datachain-0.8.12}/tests/benchmarks/test_ls.py +0 -0
  245. {datachain-0.8.10 → datachain-0.8.12}/tests/benchmarks/test_version.py +0 -0
  246. {datachain-0.8.10 → datachain-0.8.12}/tests/conftest.py +0 -0
  247. {datachain-0.8.10 → datachain-0.8.12}/tests/data.py +0 -0
  248. {datachain-0.8.10 → datachain-0.8.12}/tests/examples/__init__.py +0 -0
  249. {datachain-0.8.10 → datachain-0.8.12}/tests/examples/test_examples.py +0 -0
  250. {datachain-0.8.10 → datachain-0.8.12}/tests/examples/test_wds_e2e.py +0 -0
  251. {datachain-0.8.10 → datachain-0.8.12}/tests/examples/wds_data.py +0 -0
  252. {datachain-0.8.10 → datachain-0.8.12}/tests/func/__init__.py +0 -0
  253. {datachain-0.8.10 → datachain-0.8.12}/tests/func/fake-service-account-credentials.json +0 -0
  254. {datachain-0.8.10 → datachain-0.8.12}/tests/func/test_dataset_query.py +0 -0
  255. {datachain-0.8.10 → datachain-0.8.12}/tests/func/test_feature_pickling.py +0 -0
  256. {datachain-0.8.10 → datachain-0.8.12}/tests/func/test_ls.py +0 -0
  257. {datachain-0.8.10 → datachain-0.8.12}/tests/func/test_meta_formats.py +0 -0
  258. {datachain-0.8.10 → datachain-0.8.12}/tests/func/test_metrics.py +0 -0
  259. {datachain-0.8.10 → datachain-0.8.12}/tests/func/test_session.py +0 -0
  260. {datachain-0.8.10 → datachain-0.8.12}/tests/func/test_toolkit.py +0 -0
  261. {datachain-0.8.10 → datachain-0.8.12}/tests/scripts/feature_class.py +0 -0
  262. {datachain-0.8.10 → datachain-0.8.12}/tests/scripts/feature_class_exception.py +0 -0
  263. {datachain-0.8.10 → datachain-0.8.12}/tests/scripts/feature_class_parallel.py +0 -0
  264. {datachain-0.8.10 → datachain-0.8.12}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  265. {datachain-0.8.10 → datachain-0.8.12}/tests/test_atomicity.py +0 -0
  266. {datachain-0.8.10 → datachain-0.8.12}/tests/test_cli_e2e.py +0 -0
  267. {datachain-0.8.10 → datachain-0.8.12}/tests/test_query_e2e.py +0 -0
  268. {datachain-0.8.10 → datachain-0.8.12}/tests/test_telemetry.py +0 -0
  269. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/__init__.py +0 -0
  270. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/__init__.py +0 -0
  271. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/conftest.py +0 -0
  272. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_arrow.py +0 -0
  273. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_clip.py +0 -0
  274. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_feature.py +0 -0
  275. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_feature_utils.py +0 -0
  276. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_image.py +0 -0
  277. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_listing_info.py +0 -0
  278. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_models.py +0 -0
  279. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_python_to_sql.py +0 -0
  280. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_schema.py +0 -0
  281. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_signal_schema.py +0 -0
  282. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_sql_to_python.py +0 -0
  283. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_text.py +0 -0
  284. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_udf_signature.py +0 -0
  285. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_utils.py +0 -0
  286. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/lib/test_webdataset.py +0 -0
  287. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/sql/__init__.py +0 -0
  288. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/sql/sqlite/__init__.py +0 -0
  289. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/sql/sqlite/test_types.py +0 -0
  290. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/sql/sqlite/test_utils.py +0 -0
  291. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/sql/test_array.py +0 -0
  292. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/sql/test_path.py +0 -0
  293. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/sql/test_random.py +0 -0
  294. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/sql/test_selectable.py +0 -0
  295. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/sql/test_string.py +0 -0
  296. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_asyn.py +0 -0
  297. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_catalog.py +0 -0
  298. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_catalog_loader.py +0 -0
  299. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_cli_parsing.py +0 -0
  300. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_client_gcs.py +0 -0
  301. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_client_s3.py +0 -0
  302. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_config.py +0 -0
  303. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_database_engine.py +0 -0
  304. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_dataset.py +0 -0
  305. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_dispatch.py +0 -0
  306. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_fileslice.py +0 -0
  307. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_metastore.py +0 -0
  308. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_module_exports.py +0 -0
  309. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_query_metrics.py +0 -0
  310. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_query_params.py +0 -0
  311. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_serializer.py +0 -0
  312. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_session.py +0 -0
  313. {datachain-0.8.10 → datachain-0.8.12}/tests/unit/test_utils.py +0 -0
  314. {datachain-0.8.10 → datachain-0.8.12}/tests/utils.py +0 -0
@@ -3,7 +3,7 @@ name: Tests
3
3
  on:
4
4
  push:
5
5
  branches: [main]
6
- pull_request_target:
6
+ pull_request:
7
7
  workflow_dispatch:
8
8
 
9
9
  env:
@@ -14,15 +14,7 @@ concurrency:
14
14
  cancel-in-progress: true
15
15
 
16
16
  jobs:
17
- authorize:
18
- environment: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.repo.full_name != github.repository && 'external' || 'internal' }}
19
- runs-on: ubuntu-latest
20
- steps:
21
- - run: true
22
-
23
17
  lint:
24
- needs: authorize
25
-
26
18
  runs-on: ubuntu-latest
27
19
  steps:
28
20
  - name: Check out the repository
@@ -62,8 +54,6 @@ jobs:
62
54
  run: nox -s lint
63
55
 
64
56
  datachain:
65
- needs: authorize
66
-
67
57
  timeout-minutes: 40
68
58
  runs-on: ${{ matrix.os }}
69
59
  strategy:
@@ -112,7 +102,11 @@ jobs:
112
102
  run: echo 'DISABLE_REMOTES_ARG=--disable-remotes=azure,gs' >> $env:GITHUB_ENV
113
103
 
114
104
  - name: Run tests
115
- run: nox -s tests-${{ matrix.pyv }} -- $DISABLE_REMOTES_ARG
105
+ run: nox -s tests-${{ matrix.pyv }} -- -m "not e2e and not examples" $DISABLE_REMOTES_ARG
106
+ shell: bash
107
+
108
+ - name: Run E2E tests
109
+ run: nox -s tests-${{ matrix.pyv }} -- -m "e2e" --cov-append $DISABLE_REMOTES_ARG
116
110
  shell: bash
117
111
 
118
112
  - name: Upload coverage report
@@ -129,8 +123,6 @@ jobs:
129
123
  run: nox -s docs
130
124
 
131
125
  examples:
132
- needs: authorize
133
-
134
126
  runs-on: ${{ matrix.os }}
135
127
  timeout-minutes: 60
136
128
  strategy:
@@ -24,13 +24,13 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.9.1'
27
+ rev: 'v0.9.3'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
31
31
  - id: ruff-format
32
32
  - repo: https://github.com/codespell-project/codespell
33
- rev: v2.3.0
33
+ rev: v2.4.0
34
34
  hooks:
35
35
  - id: codespell
36
36
  additional_dependencies: ["tomli"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: datachain
3
- Version: 0.8.10
3
+ Version: 0.8.12
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -78,7 +78,6 @@ Requires-Dist: pytest-xdist>=3.3.1; extra == "tests"
78
78
  Requires-Dist: virtualenv; extra == "tests"
79
79
  Requires-Dist: dulwich; extra == "tests"
80
80
  Requires-Dist: hypothesis; extra == "tests"
81
- Requires-Dist: open_clip_torch; extra == "tests"
82
81
  Requires-Dist: aiotools>=1.7.0; extra == "tests"
83
82
  Requires-Dist: requests-mock; extra == "tests"
84
83
  Requires-Dist: scipy; extra == "tests"
@@ -94,12 +93,9 @@ Provides-Extra: examples
94
93
  Requires-Dist: datachain[tests]; extra == "examples"
95
94
  Requires-Dist: defusedxml; extra == "examples"
96
95
  Requires-Dist: accelerate; extra == "examples"
97
- Requires-Dist: unstructured_ingest[embed-huggingface]; extra == "examples"
98
- Requires-Dist: unstructured[pdf]<0.16.12; extra == "examples"
99
- Requires-Dist: pdfplumber==0.11.5; extra == "examples"
100
96
  Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
101
- Requires-Dist: onnx==1.16.1; extra == "examples"
102
- Requires-Dist: ultralytics==8.3.61; extra == "examples"
97
+ Requires-Dist: ultralytics==8.3.68; extra == "examples"
98
+ Requires-Dist: open_clip_torch; extra == "examples"
103
99
 
104
100
  ================
105
101
  |logo| DataChain
@@ -8,6 +8,16 @@
8
8
  <script type="text/javascript">
9
9
  !function () { var e, t, n; e = "14ffd92a6cbf5f2", t = function () { Reo.init({ clientID: "14ffd92a6cbf5f2" }) }, (n = document.createElement("script")).src = "https://static.reo.dev/" + e + "/reo.js", n.async = !0, n.onload = t, document.head.appendChild(n) }();
10
10
  </script>
11
+ <script>
12
+ function initApollo() {
13
+ var n = Math.random().toString(36).substring(7), o = document.createElement("script");
14
+ o.src = "https://assets.apollo.io/micro/website-tracker/tracker.iife.js?nocache=" + n, o.async = !0, o.defer = !0,
15
+ o.onload = function () { window.trackingFunctions.onLoad({ appId: "66315101e9aa7501c79140d9" }) },
16
+ document.head.appendChild(o)
17
+ };
18
+ initApollo();
19
+ </script>
20
+
11
21
 
12
22
 
13
23
  {% endblock %}
@@ -0,0 +1,5 @@
1
+ # Functions
2
+
3
+ Use built-in functions for data manipulation and analysis to operate on the underlying database storing the chain data. These functions are useful for operations like [`DataChain.filter`](datachain.md#datachain.lib.dc.DataChain.filter) and [`DataChain.mutate`](datachain.md#datachain.lib.dc.DataChain.mutate). Import these functions from `datachain.func`.
4
+
5
+ ::: datachain.func
@@ -10,5 +10,5 @@ DataChain's API is organized into several modules:
10
10
  - [DataType](./datatype.md) - Type system and schema definitions
11
11
  - [File](./file.md) - File handling and storage operations
12
12
  - [UDF](./udf.md) - User-defined functions and transformations
13
- - [SQL](./sql.md) - SQL query integration
13
+ - [Functions](./func.md) - Built-in functions for data manipulation and analysis
14
14
  - [Torch](./torch.md) - PyTorch data loading utilities
@@ -1,4 +1,5 @@
1
1
  from huggingface_hub import InferenceClient
2
+ from requests import HTTPError
2
3
 
3
4
  from datachain import C, DataChain, DataModel
4
5
 
@@ -20,15 +21,20 @@ def eval_dialog(
20
21
  user_input: str,
21
22
  bot_response: str,
22
23
  ) -> DialogEval:
23
- completion = client.chat_completion(
24
- messages=[
25
- {
26
- "role": "user",
27
- "content": f"{PROMPT}\n\nUser: {user_input}\nBot: {bot_response}",
28
- },
29
- ],
30
- response_format={"type": "json", "value": DialogEval.model_json_schema()},
31
- )
24
+ try:
25
+ completion = client.chat_completion(
26
+ messages=[
27
+ {
28
+ "role": "user",
29
+ "content": f"{PROMPT}\n\nUser: {user_input}\nBot: {bot_response}",
30
+ },
31
+ ],
32
+ response_format={"type": "json", "value": DialogEval.model_json_schema()},
33
+ )
34
+ except HTTPError:
35
+ return DialogEval(
36
+ result="Error", reason="Error while interacting with the Hugging Face API."
37
+ )
32
38
 
33
39
  message = completion.choices[0].message
34
40
  try:
@@ -73,7 +73,7 @@ nav:
73
73
  - File: references/file.md
74
74
  - UDF: references/udf.md
75
75
  - Torch: references/torch.md
76
- - SQL: references/sql.md
76
+ - Functions: references/func.md
77
77
  - 🤝 Contributing: contributing.md
78
78
 
79
79
  - DataChain Website ↗: https://datachain.ai" target="_blank"
@@ -89,7 +89,6 @@ tests = [
89
89
  "virtualenv",
90
90
  "dulwich",
91
91
  "hypothesis",
92
- "open_clip_torch",
93
92
  "aiotools>=1.7.0",
94
93
  "requests-mock",
95
94
  "scipy"
@@ -107,12 +106,9 @@ examples = [
107
106
  "datachain[tests]",
108
107
  "defusedxml",
109
108
  "accelerate",
110
- "unstructured_ingest[embed-huggingface]",
111
- "unstructured[pdf]<0.16.12",
112
- "pdfplumber==0.11.5",
113
109
  "huggingface_hub[hf_transfer]",
114
- "onnx==1.16.1",
115
- "ultralytics==8.3.61"
110
+ "ultralytics==8.3.68",
111
+ "open_clip_torch"
116
112
  ]
117
113
 
118
114
  [project.urls]
@@ -22,15 +22,15 @@ def try_scandir(path):
22
22
  pass
23
23
 
24
24
 
25
- def get_temp_cache(tmp_dir: str, prefix: Optional[str] = None) -> "DataChainCache":
25
+ def get_temp_cache(tmp_dir: str, prefix: Optional[str] = None) -> "Cache":
26
26
  cache_dir = mkdtemp(prefix=prefix, dir=tmp_dir)
27
- return DataChainCache(cache_dir, tmp_dir=tmp_dir)
27
+ return Cache(cache_dir, tmp_dir=tmp_dir)
28
28
 
29
29
 
30
30
  @contextmanager
31
31
  def temporary_cache(
32
32
  tmp_dir: str, prefix: Optional[str] = None, delete: bool = True
33
- ) -> Iterator["DataChainCache"]:
33
+ ) -> Iterator["Cache"]:
34
34
  cache = get_temp_cache(tmp_dir, prefix=prefix)
35
35
  try:
36
36
  yield cache
@@ -39,7 +39,7 @@ def temporary_cache(
39
39
  cache.destroy()
40
40
 
41
41
 
42
- class DataChainCache:
42
+ class Cache:
43
43
  def __init__(self, cache_dir: str, tmp_dir: str):
44
44
  self.odb = LocalHashFileDB(
45
45
  LocalFileSystem(),
@@ -3,7 +3,6 @@ from .catalog import (
3
3
  QUERY_SCRIPT_CANCELED_EXIT_CODE,
4
4
  QUERY_SCRIPT_INVALID_LAST_STATEMENT_EXIT_CODE,
5
5
  Catalog,
6
- parse_edatachain_file,
7
6
  )
8
7
  from .loader import get_catalog
9
8
 
@@ -13,5 +12,4 @@ __all__ = [
13
12
  "QUERY_SCRIPT_INVALID_LAST_STATEMENT_EXIT_CODE",
14
13
  "Catalog",
15
14
  "get_catalog",
16
- "parse_edatachain_file",
17
15
  ]