datachain 0.3.16__tar.gz → 0.3.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (251) hide show
  1. {datachain-0.3.16 → datachain-0.3.18}/.pre-commit-config.yaml +1 -1
  2. {datachain-0.3.16/src/datachain.egg-info → datachain-0.3.18}/PKG-INFO +5 -3
  3. datachain-0.3.18/examples/llm_and_nlp/unstructured-embeddings-gen.py +76 -0
  4. datachain-0.3.16/examples/llm_and_nlp/unstructured-text.py → datachain-0.3.18/examples/llm_and_nlp/unstructured-summary-map.py +7 -3
  5. {datachain-0.3.16 → datachain-0.3.18}/examples/multimodal/hf_pipeline.py +7 -1
  6. {datachain-0.3.16 → datachain-0.3.18}/examples/multimodal/openai_image_desc_lib.py +0 -2
  7. {datachain-0.3.16 → datachain-0.3.18}/pyproject.toml +6 -4
  8. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/cache.py +14 -55
  9. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/catalog/catalog.py +21 -55
  10. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/cli.py +7 -26
  11. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/client/fsspec.py +29 -63
  12. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/client/local.py +2 -3
  13. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/data_storage/metastore.py +7 -66
  14. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/data_storage/sqlite.py +5 -2
  15. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/data_storage/warehouse.py +0 -22
  16. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/arrow.py +2 -1
  17. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/dc.py +5 -2
  18. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/file.py +41 -23
  19. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/listing.py +3 -0
  20. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/tar.py +2 -1
  21. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/listing.py +4 -4
  22. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/node.py +23 -9
  23. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/nodes_fetcher.py +12 -5
  24. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/nodes_thread_pool.py +1 -1
  25. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/progress.py +2 -12
  26. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/query/__init__.py +0 -2
  27. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/query/dataset.py +26 -144
  28. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/query/dispatch.py +2 -15
  29. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/query/schema.py +36 -24
  30. datachain-0.3.18/src/datachain/query/udf.py +126 -0
  31. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/sql/types.py +4 -2
  32. datachain-0.3.18/src/datachain/telemetry.py +37 -0
  33. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/utils.py +11 -40
  34. {datachain-0.3.16 → datachain-0.3.18/src/datachain.egg-info}/PKG-INFO +5 -3
  35. {datachain-0.3.16 → datachain-0.3.18}/src/datachain.egg-info/SOURCES.txt +4 -3
  36. {datachain-0.3.16 → datachain-0.3.18}/src/datachain.egg-info/requires.txt +4 -2
  37. {datachain-0.3.16 → datachain-0.3.18}/tests/conftest.py +20 -9
  38. {datachain-0.3.16 → datachain-0.3.18}/tests/func/test_catalog.py +0 -116
  39. {datachain-0.3.16 → datachain-0.3.18}/tests/func/test_datachain.py +627 -12
  40. datachain-0.3.18/tests/func/test_dataset_query.py +1195 -0
  41. {datachain-0.3.16 → datachain-0.3.18}/tests/func/test_datasets.py +102 -91
  42. {datachain-0.3.16 → datachain-0.3.18}/tests/func/test_feature_pickling.py +0 -8
  43. {datachain-0.3.16 → datachain-0.3.18}/tests/func/test_pull.py +23 -11
  44. {datachain-0.3.16 → datachain-0.3.18}/tests/func/test_query.py +16 -10
  45. {datachain-0.3.16 → datachain-0.3.18}/tests/scripts/name_len_slow.py +9 -15
  46. {datachain-0.3.16 → datachain-0.3.18}/tests/test_cli_e2e.py +1 -0
  47. datachain-0.3.18/tests/test_telemetry.py +20 -0
  48. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/lib/test_datachain.py +15 -0
  49. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/lib/test_datachain_merge.py +98 -1
  50. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/lib/test_file.py +3 -26
  51. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/test_cache.py +9 -4
  52. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/test_data_storage.py +18 -11
  53. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/test_utils.py +0 -25
  54. {datachain-0.3.16 → datachain-0.3.18}/tests/utils.py +22 -63
  55. datachain-0.3.16/src/datachain/query/builtins.py +0 -96
  56. datachain-0.3.16/src/datachain/query/udf.py +0 -272
  57. datachain-0.3.16/tests/func/test_dataset_query.py +0 -3580
  58. datachain-0.3.16/tests/unit/test_udf.py +0 -98
  59. {datachain-0.3.16 → datachain-0.3.18}/.cruft.json +0 -0
  60. {datachain-0.3.16 → datachain-0.3.18}/.gitattributes +0 -0
  61. {datachain-0.3.16 → datachain-0.3.18}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  62. {datachain-0.3.16 → datachain-0.3.18}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  63. {datachain-0.3.16 → datachain-0.3.18}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  64. {datachain-0.3.16 → datachain-0.3.18}/.github/codecov.yaml +0 -0
  65. {datachain-0.3.16 → datachain-0.3.18}/.github/dependabot.yml +0 -0
  66. {datachain-0.3.16 → datachain-0.3.18}/.github/workflows/benchmarks.yml +0 -0
  67. {datachain-0.3.16 → datachain-0.3.18}/.github/workflows/release.yml +0 -0
  68. {datachain-0.3.16 → datachain-0.3.18}/.github/workflows/tests-studio.yml +0 -0
  69. {datachain-0.3.16 → datachain-0.3.18}/.github/workflows/tests.yml +0 -0
  70. {datachain-0.3.16 → datachain-0.3.18}/.github/workflows/update-template.yaml +0 -0
  71. {datachain-0.3.16 → datachain-0.3.18}/.gitignore +0 -0
  72. {datachain-0.3.16 → datachain-0.3.18}/CODE_OF_CONDUCT.rst +0 -0
  73. {datachain-0.3.16 → datachain-0.3.18}/CONTRIBUTING.rst +0 -0
  74. {datachain-0.3.16 → datachain-0.3.18}/LICENSE +0 -0
  75. {datachain-0.3.16 → datachain-0.3.18}/README.rst +0 -0
  76. {datachain-0.3.16 → datachain-0.3.18}/docs/assets/captioned_cartoons.png +0 -0
  77. {datachain-0.3.16 → datachain-0.3.18}/docs/assets/datachain-white.svg +0 -0
  78. {datachain-0.3.16 → datachain-0.3.18}/docs/assets/datachain.svg +0 -0
  79. {datachain-0.3.16 → datachain-0.3.18}/docs/assets/flowchart.png +0 -0
  80. {datachain-0.3.16 → datachain-0.3.18}/docs/index.md +0 -0
  81. {datachain-0.3.16 → datachain-0.3.18}/docs/references/datachain.md +0 -0
  82. {datachain-0.3.16 → datachain-0.3.18}/docs/references/datatype.md +0 -0
  83. {datachain-0.3.16 → datachain-0.3.18}/docs/references/file.md +0 -0
  84. {datachain-0.3.16 → datachain-0.3.18}/docs/references/index.md +0 -0
  85. {datachain-0.3.16 → datachain-0.3.18}/docs/references/sql.md +0 -0
  86. {datachain-0.3.16 → datachain-0.3.18}/docs/references/torch.md +0 -0
  87. {datachain-0.3.16 → datachain-0.3.18}/docs/references/udf.md +0 -0
  88. {datachain-0.3.16 → datachain-0.3.18}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  89. {datachain-0.3.16 → datachain-0.3.18}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  90. {datachain-0.3.16 → datachain-0.3.18}/examples/computer_vision/openimage-detect.py +0 -0
  91. {datachain-0.3.16 → datachain-0.3.18}/examples/get_started/common_sql_functions.py +0 -0
  92. {datachain-0.3.16 → datachain-0.3.18}/examples/get_started/json-csv-reader.py +0 -0
  93. {datachain-0.3.16 → datachain-0.3.18}/examples/get_started/torch-loader.py +0 -0
  94. {datachain-0.3.16 → datachain-0.3.18}/examples/get_started/udfs/parallel.py +0 -0
  95. {datachain-0.3.16 → datachain-0.3.18}/examples/get_started/udfs/simple.py +0 -0
  96. {datachain-0.3.16 → datachain-0.3.18}/examples/get_started/udfs/stateful.py +0 -0
  97. {datachain-0.3.16 → datachain-0.3.18}/examples/llm_and_nlp/claude-query.py +0 -0
  98. {datachain-0.3.16 → datachain-0.3.18}/examples/multimodal/clip_inference.py +0 -0
  99. {datachain-0.3.16 → datachain-0.3.18}/examples/multimodal/wds.py +0 -0
  100. {datachain-0.3.16 → datachain-0.3.18}/examples/multimodal/wds_filtered.py +0 -0
  101. {datachain-0.3.16 → datachain-0.3.18}/mkdocs.yml +0 -0
  102. {datachain-0.3.16 → datachain-0.3.18}/noxfile.py +0 -0
  103. {datachain-0.3.16 → datachain-0.3.18}/setup.cfg +0 -0
  104. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/__init__.py +0 -0
  105. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/__main__.py +0 -0
  106. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/asyn.py +0 -0
  107. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/catalog/__init__.py +0 -0
  108. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/catalog/datasource.py +0 -0
  109. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/catalog/loader.py +0 -0
  110. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/cli_utils.py +0 -0
  111. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/client/__init__.py +0 -0
  112. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/client/azure.py +0 -0
  113. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/client/fileslice.py +0 -0
  114. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/client/gcs.py +0 -0
  115. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/client/hf.py +0 -0
  116. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/client/s3.py +0 -0
  117. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/config.py +0 -0
  118. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/data_storage/__init__.py +0 -0
  119. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/data_storage/db_engine.py +0 -0
  120. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/data_storage/id_generator.py +0 -0
  121. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/data_storage/job.py +0 -0
  122. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/data_storage/schema.py +0 -0
  123. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/data_storage/serializer.py +0 -0
  124. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/dataset.py +0 -0
  125. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/error.py +0 -0
  126. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/job.py +0 -0
  127. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/__init__.py +0 -0
  128. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/clip.py +0 -0
  129. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/convert/__init__.py +0 -0
  130. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/convert/flatten.py +0 -0
  131. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/convert/python_to_sql.py +0 -0
  132. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/convert/sql_to_python.py +0 -0
  133. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/convert/unflatten.py +0 -0
  134. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  135. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/data_model.py +0 -0
  136. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/dataset_info.py +0 -0
  137. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/hf.py +0 -0
  138. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/image.py +0 -0
  139. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/listing_info.py +0 -0
  140. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/meta_formats.py +0 -0
  141. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/model_store.py +0 -0
  142. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/pytorch.py +0 -0
  143. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/settings.py +0 -0
  144. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/signal_schema.py +0 -0
  145. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/text.py +0 -0
  146. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/udf.py +0 -0
  147. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/udf_signature.py +0 -0
  148. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/utils.py +0 -0
  149. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/vfile.py +0 -0
  150. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/webdataset.py +0 -0
  151. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/lib/webdataset_laion.py +0 -0
  152. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/py.typed +0 -0
  153. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/query/batch.py +0 -0
  154. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/query/metrics.py +0 -0
  155. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/query/params.py +0 -0
  156. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/query/queue.py +0 -0
  157. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/query/session.py +0 -0
  158. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/remote/__init__.py +0 -0
  159. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/remote/studio.py +0 -0
  160. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/sql/__init__.py +0 -0
  161. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/sql/default/__init__.py +0 -0
  162. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/sql/default/base.py +0 -0
  163. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/sql/functions/__init__.py +0 -0
  164. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/sql/functions/array.py +0 -0
  165. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/sql/functions/conditional.py +0 -0
  166. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/sql/functions/path.py +0 -0
  167. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/sql/functions/random.py +0 -0
  168. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/sql/functions/string.py +0 -0
  169. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/sql/selectable.py +0 -0
  170. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/sql/sqlite/__init__.py +0 -0
  171. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/sql/sqlite/base.py +0 -0
  172. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/sql/sqlite/types.py +0 -0
  173. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/sql/sqlite/vector.py +0 -0
  174. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/sql/utils.py +0 -0
  175. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/storage.py +0 -0
  176. {datachain-0.3.16 → datachain-0.3.18}/src/datachain/torch/__init__.py +0 -0
  177. {datachain-0.3.16 → datachain-0.3.18}/src/datachain.egg-info/dependency_links.txt +0 -0
  178. {datachain-0.3.16 → datachain-0.3.18}/src/datachain.egg-info/entry_points.txt +0 -0
  179. {datachain-0.3.16 → datachain-0.3.18}/src/datachain.egg-info/top_level.txt +0 -0
  180. {datachain-0.3.16 → datachain-0.3.18}/tests/__init__.py +0 -0
  181. {datachain-0.3.16 → datachain-0.3.18}/tests/benchmarks/__init__.py +0 -0
  182. {datachain-0.3.16 → datachain-0.3.18}/tests/benchmarks/conftest.py +0 -0
  183. {datachain-0.3.16 → datachain-0.3.18}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  184. {datachain-0.3.16 → datachain-0.3.18}/tests/benchmarks/datasets/.dvc/config +0 -0
  185. {datachain-0.3.16 → datachain-0.3.18}/tests/benchmarks/datasets/.gitignore +0 -0
  186. {datachain-0.3.16 → datachain-0.3.18}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  187. {datachain-0.3.16 → datachain-0.3.18}/tests/benchmarks/test_datachain.py +0 -0
  188. {datachain-0.3.16 → datachain-0.3.18}/tests/benchmarks/test_ls.py +0 -0
  189. {datachain-0.3.16 → datachain-0.3.18}/tests/benchmarks/test_version.py +0 -0
  190. {datachain-0.3.16 → datachain-0.3.18}/tests/data.py +0 -0
  191. {datachain-0.3.16 → datachain-0.3.18}/tests/examples/__init__.py +0 -0
  192. {datachain-0.3.16 → datachain-0.3.18}/tests/examples/test_examples.py +0 -0
  193. {datachain-0.3.16 → datachain-0.3.18}/tests/examples/test_wds_e2e.py +0 -0
  194. {datachain-0.3.16 → datachain-0.3.18}/tests/examples/wds_data.py +0 -0
  195. {datachain-0.3.16 → datachain-0.3.18}/tests/func/__init__.py +0 -0
  196. {datachain-0.3.16 → datachain-0.3.18}/tests/func/test_client.py +0 -0
  197. {datachain-0.3.16 → datachain-0.3.18}/tests/func/test_listing.py +0 -0
  198. {datachain-0.3.16 → datachain-0.3.18}/tests/func/test_ls.py +0 -0
  199. {datachain-0.3.16 → datachain-0.3.18}/tests/func/test_meta_formats.py +0 -0
  200. {datachain-0.3.16 → datachain-0.3.18}/tests/func/test_metrics.py +0 -0
  201. {datachain-0.3.16 → datachain-0.3.18}/tests/func/test_pytorch.py +0 -0
  202. {datachain-0.3.16 → datachain-0.3.18}/tests/scripts/feature_class.py +0 -0
  203. {datachain-0.3.16 → datachain-0.3.18}/tests/scripts/feature_class_parallel.py +0 -0
  204. {datachain-0.3.16 → datachain-0.3.18}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  205. {datachain-0.3.16 → datachain-0.3.18}/tests/test_query_e2e.py +0 -0
  206. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/__init__.py +0 -0
  207. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/lib/__init__.py +0 -0
  208. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/lib/conftest.py +0 -0
  209. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/lib/test_arrow.py +0 -0
  210. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/lib/test_clip.py +0 -0
  211. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  212. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/lib/test_feature.py +0 -0
  213. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/lib/test_feature_utils.py +0 -0
  214. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/lib/test_hf.py +0 -0
  215. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/lib/test_image.py +0 -0
  216. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/lib/test_schema.py +0 -0
  217. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/lib/test_signal_schema.py +0 -0
  218. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/lib/test_sql_to_python.py +0 -0
  219. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/lib/test_text.py +0 -0
  220. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/lib/test_udf_signature.py +0 -0
  221. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/lib/test_utils.py +0 -0
  222. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/lib/test_webdataset.py +0 -0
  223. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/sql/__init__.py +0 -0
  224. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/sql/sqlite/__init__.py +0 -0
  225. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/sql/sqlite/test_utils.py +0 -0
  226. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/sql/test_array.py +0 -0
  227. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/sql/test_conditional.py +0 -0
  228. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/sql/test_path.py +0 -0
  229. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/sql/test_random.py +0 -0
  230. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/sql/test_selectable.py +0 -0
  231. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/sql/test_string.py +0 -0
  232. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/test_asyn.py +0 -0
  233. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/test_catalog.py +0 -0
  234. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/test_catalog_loader.py +0 -0
  235. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/test_cli_parsing.py +0 -0
  236. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/test_client.py +0 -0
  237. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/test_client_s3.py +0 -0
  238. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/test_database_engine.py +0 -0
  239. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/test_dataset.py +0 -0
  240. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/test_dispatch.py +0 -0
  241. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/test_fileslice.py +0 -0
  242. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/test_id_generator.py +0 -0
  243. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/test_listing.py +0 -0
  244. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/test_metastore.py +0 -0
  245. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/test_module_exports.py +0 -0
  246. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/test_query_metrics.py +0 -0
  247. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/test_query_params.py +0 -0
  248. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/test_serializer.py +0 -0
  249. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/test_session.py +0 -0
  250. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/test_storage.py +0 -0
  251. {datachain-0.3.16 → datachain-0.3.18}/tests/unit/test_warehouse.py +0 -0
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.6.4'
27
+ rev: 'v0.6.5'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.3.16
3
+ Version: 0.3.18
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -44,6 +44,7 @@ Requires-Dist: Pillow<11,>=10.0.0
44
44
  Requires-Dist: msgpack<2,>=1.0.4
45
45
  Requires-Dist: psutil
46
46
  Requires-Dist: huggingface_hub
47
+ Requires-Dist: iterative-telemetry>=0.0.9
47
48
  Provides-Extra: docs
48
49
  Requires-Dist: mkdocs>=1.5.2; extra == "docs"
49
50
  Requires-Dist: mkdocs-gen-files>=0.5.0; extra == "docs"
@@ -69,7 +70,7 @@ Requires-Dist: pytest<9,>=8; extra == "tests"
69
70
  Requires-Dist: pytest-sugar>=0.9.6; extra == "tests"
70
71
  Requires-Dist: pytest-cov>=4.1.0; extra == "tests"
71
72
  Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
72
- Requires-Dist: pytest-servers[all]>=0.5.5; extra == "tests"
73
+ Requires-Dist: pytest-servers[all]>=0.5.7; extra == "tests"
73
74
  Requires-Dist: pytest-benchmark[histogram]; extra == "tests"
74
75
  Requires-Dist: pytest-xdist>=3.3.1; extra == "tests"
75
76
  Requires-Dist: virtualenv; extra == "tests"
@@ -91,9 +92,10 @@ Requires-Dist: datachain[tests]; extra == "examples"
91
92
  Requires-Dist: numpy<2,>=1; extra == "examples"
92
93
  Requires-Dist: defusedxml; extra == "examples"
93
94
  Requires-Dist: accelerate; extra == "examples"
94
- Requires-Dist: unstructured[pdf]; extra == "examples"
95
+ Requires-Dist: unstructured[embed-huggingface,pdf]; extra == "examples"
95
96
  Requires-Dist: pdfplumber==0.11.4; extra == "examples"
96
97
  Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
98
+ Requires-Dist: onnx==1.16.1; extra == "examples"
97
99
 
98
100
  ================
99
101
  |logo| DataChain
@@ -0,0 +1,76 @@
1
+ """
2
+ To install the required dependencies:
3
+
4
+ pip install datachain[examples]
5
+
6
+ """
7
+
8
+ from collections.abc import Iterator
9
+
10
+ from unstructured.cleaners.core import (
11
+ clean,
12
+ group_broken_paragraphs,
13
+ replace_unicode_quotes,
14
+ )
15
+ from unstructured.embed.huggingface import (
16
+ HuggingFaceEmbeddingConfig,
17
+ HuggingFaceEmbeddingEncoder,
18
+ )
19
+ from unstructured.partition.pdf import partition_pdf
20
+
21
+ from datachain import C, DataChain, DataModel, File
22
+
23
+ source = "gs://datachain-demo/neurips/1987/"
24
+
25
+
26
+ # Define the output as a DataModel class
27
+ class Chunk(DataModel):
28
+ key: str
29
+ text: str
30
+ embeddings: list[float]
31
+
32
+
33
+ # Define embedding encoder
34
+
35
+ embedding_encoder = HuggingFaceEmbeddingEncoder(config=HuggingFaceEmbeddingConfig())
36
+
37
+
38
+ # Use signatures to define UDF input/output
39
+ # these can be pydantic model or regular Python types
40
+ def process_pdf(file: File) -> Iterator[Chunk]:
41
+ # Ingest the file
42
+ with file.open() as f:
43
+ chunks = partition_pdf(file=f, chunking_strategy="by_title", strategy="fast")
44
+
45
+ # Clean the chunks and add new columns
46
+ for chunk in chunks:
47
+ chunk.apply(
48
+ lambda text: clean(
49
+ text, bullets=True, extra_whitespace=True, trailing_punctuation=True
50
+ )
51
+ )
52
+ chunk.apply(replace_unicode_quotes)
53
+ chunk.apply(group_broken_paragraphs)
54
+
55
+ # create embeddings
56
+ chunks_embedded = embedding_encoder.embed_documents(chunks)
57
+
58
+ # Add new rows to DataChain
59
+ for chunk in chunks_embedded:
60
+ yield Chunk(
61
+ key=file.path,
62
+ text=chunk.text,
63
+ embeddings=chunk.embeddings,
64
+ )
65
+
66
+
67
+ dc = (
68
+ DataChain.from_storage(source)
69
+ .settings(parallel=-1)
70
+ .filter(C.file.path.glob("*.pdf"))
71
+ .gen(document=process_pdf)
72
+ )
73
+
74
+ dc.save("embedded-documents")
75
+
76
+ DataChain.from_dataset("embedded-documents").show()
@@ -1,6 +1,10 @@
1
- #
2
- # pip install unstructured[pdf] huggingface_hub[hf_transfer]
3
- #
1
+ """
2
+ To install the required dependencies:
3
+
4
+ pip install datachain[examples]
5
+
6
+ """
7
+
4
8
  import os
5
9
 
6
10
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
@@ -1,4 +1,10 @@
1
- # pip install scipy torch transformers huggingface_hub[hf_transfer]
1
+ """
2
+ To install the required dependencies:
3
+
4
+ pip install datachain[examples]
5
+
6
+ """
7
+
2
8
  # NOTE: also need to install ffmpeg binary
3
9
  import json
4
10
  import os
@@ -1,5 +1,3 @@
1
- # pip install Pillow
2
-
3
1
  import base64
4
2
  import os
5
3
 
@@ -46,7 +46,8 @@ dependencies = [
46
46
  "Pillow>=10.0.0,<11",
47
47
  "msgpack>=1.0.4,<2",
48
48
  "psutil",
49
- "huggingface_hub"
49
+ "huggingface_hub",
50
+ "iterative-telemetry>=0.0.9"
50
51
  ]
51
52
 
52
53
  [project.optional-dependencies]
@@ -80,7 +81,7 @@ tests = [
80
81
  "pytest-sugar>=0.9.6",
81
82
  "pytest-cov>=4.1.0",
82
83
  "pytest-mock>=3.12.0",
83
- "pytest-servers[all]>=0.5.5",
84
+ "pytest-servers[all]>=0.5.7",
84
85
  "pytest-benchmark[histogram]",
85
86
  "pytest-xdist>=3.3.1",
86
87
  "virtualenv",
@@ -104,9 +105,10 @@ examples = [
104
105
  "numpy>=1,<2",
105
106
  "defusedxml",
106
107
  "accelerate",
107
- "unstructured[pdf]",
108
+ "unstructured[pdf, embed-huggingface]",
108
109
  "pdfplumber==0.11.4",
109
- "huggingface_hub[hf_transfer]"
110
+ "huggingface_hub[hf_transfer]",
111
+ "onnx==1.16.1"
110
112
  ]
111
113
 
112
114
  [project.urls]
@@ -1,56 +1,15 @@
1
- import hashlib
2
- import json
3
1
  import os
4
- from datetime import datetime
5
- from functools import partial
6
2
  from typing import TYPE_CHECKING, Optional
7
3
 
8
- import attrs
9
4
  from dvc_data.hashfile.db.local import LocalHashFileDB
10
5
  from dvc_objects.fs.local import LocalFileSystem
11
6
  from fsspec.callbacks import Callback, TqdmCallback
12
7
 
13
- from datachain.utils import TIME_ZERO
14
-
15
8
  from .progress import Tqdm
16
9
 
17
10
  if TYPE_CHECKING:
18
11
  from datachain.client import Client
19
- from datachain.storage import StorageURI
20
-
21
- sha256 = partial(hashlib.sha256, usedforsecurity=False)
22
-
23
-
24
- @attrs.frozen
25
- class UniqueId:
26
- storage: "StorageURI"
27
- path: str
28
- size: int
29
- etag: str
30
- version: str = ""
31
- is_latest: bool = True
32
- location: Optional[str] = None
33
- last_modified: datetime = TIME_ZERO
34
-
35
- def get_parsed_location(self) -> Optional[dict]:
36
- if not self.location:
37
- return None
38
-
39
- loc_stack = (
40
- json.loads(self.location)
41
- if isinstance(self.location, str)
42
- else self.location
43
- )
44
- if len(loc_stack) > 1:
45
- raise NotImplementedError("Nested v-objects are not supported yet.")
46
-
47
- return loc_stack[0]
48
-
49
- def get_hash(self) -> str:
50
- fingerprint = f"{self.storage}/{self.path}/{self.version}/{self.etag}"
51
- if self.location:
52
- fingerprint += f"/{self.location}"
53
- return sha256(fingerprint.encode()).hexdigest()
12
+ from datachain.lib.file import File
54
13
 
55
14
 
56
15
  def try_scandir(path):
@@ -77,30 +36,30 @@ class DataChainCache:
77
36
  def tmp_dir(self):
78
37
  return self.odb.tmp_dir
79
38
 
80
- def get_path(self, uid: UniqueId) -> Optional[str]:
81
- if self.contains(uid):
82
- return self.path_from_checksum(uid.get_hash())
39
+ def get_path(self, file: "File") -> Optional[str]:
40
+ if self.contains(file):
41
+ return self.path_from_checksum(file.get_hash())
83
42
  return None
84
43
 
85
- def contains(self, uid: UniqueId) -> bool:
86
- return self.odb.exists(uid.get_hash())
44
+ def contains(self, file: "File") -> bool:
45
+ return self.odb.exists(file.get_hash())
87
46
 
88
47
  def path_from_checksum(self, checksum: str) -> str:
89
48
  assert checksum
90
49
  return self.odb.oid_to_path(checksum)
91
50
 
92
- def remove(self, uid: UniqueId) -> None:
93
- self.odb.delete(uid.get_hash())
51
+ def remove(self, file: "File") -> None:
52
+ self.odb.delete(file.get_hash())
94
53
 
95
54
  async def download(
96
- self, uid: UniqueId, client: "Client", callback: Optional[Callback] = None
55
+ self, file: "File", client: "Client", callback: Optional[Callback] = None
97
56
  ) -> None:
98
- from_path = f"{uid.storage}/{uid.path}"
57
+ from_path = f"{file.source}/{file.path}"
99
58
  from dvc_objects.fs.utils import tmp_fname
100
59
 
101
60
  odb_fs = self.odb.fs
102
61
  tmp_info = odb_fs.join(self.odb.tmp_dir, tmp_fname()) # type: ignore[arg-type]
103
- size = uid.size
62
+ size = file.size
104
63
  if size < 0:
105
64
  size = await client.get_size(from_path)
106
65
  cb = callback or TqdmCallback(
@@ -115,13 +74,13 @@ class DataChainCache:
115
74
  cb.close()
116
75
 
117
76
  try:
118
- oid = uid.get_hash()
77
+ oid = file.get_hash()
119
78
  self.odb.add(tmp_info, self.odb.fs, oid)
120
79
  finally:
121
80
  os.unlink(tmp_info)
122
81
 
123
- def store_data(self, uid: UniqueId, contents: bytes) -> None:
124
- checksum = uid.get_hash()
82
+ def store_data(self, file: "File", contents: bytes) -> None:
83
+ checksum = file.get_hash()
125
84
  dst = self.path_from_checksum(checksum)
126
85
  if not os.path.exists(dst):
127
86
  # Create the file only if it's not already in cache
@@ -34,7 +34,7 @@ import yaml
34
34
  from sqlalchemy import Column
35
35
  from tqdm import tqdm
36
36
 
37
- from datachain.cache import DataChainCache, UniqueId
37
+ from datachain.cache import DataChainCache
38
38
  from datachain.client import Client
39
39
  from datachain.config import get_remote_config, read_config
40
40
  from datachain.dataset import (
@@ -68,8 +68,6 @@ from datachain.utils import (
68
68
  DataChainDir,
69
69
  batched,
70
70
  datachain_paths_join,
71
- import_object,
72
- parse_params_string,
73
71
  )
74
72
 
75
73
  from .datasource import DataSource
@@ -621,13 +619,13 @@ class Catalog:
621
619
  code_ast.body[-1:] = new_expressions
622
620
  return code_ast
623
621
 
624
- def get_client(self, uri: StorageURI, **config: Any) -> Client:
622
+ def get_client(self, uri: str, **config: Any) -> Client:
625
623
  """
626
624
  Return the client corresponding to the given source `uri`.
627
625
  """
628
626
  config = config or self.client_config
629
627
  cls = Client.get_implementation(uri)
630
- return cls.from_source(uri, self.cache, **config)
628
+ return cls.from_source(StorageURI(uri), self.cache, **config)
631
629
 
632
630
  def enlist_source(
633
631
  self,
@@ -843,7 +841,7 @@ class Catalog:
843
841
  from datachain.query import DatasetQuery
844
842
 
845
843
  def _row_to_node(d: dict[str, Any]) -> Node:
846
- del d["source"]
844
+ del d["file__source"]
847
845
  return Node.from_dict(d)
848
846
 
849
847
  enlisted_sources: list[tuple[bool, bool, Any]] = []
@@ -1148,30 +1146,28 @@ class Catalog:
1148
1146
  if not sources:
1149
1147
  raise ValueError("Sources needs to be non empty list")
1150
1148
 
1151
- from datachain.query import DatasetQuery
1149
+ from datachain.lib.dc import DataChain
1150
+ from datachain.query.session import Session
1151
+
1152
+ session = Session.get(catalog=self, client_config=client_config)
1152
1153
 
1153
- dataset_queries = []
1154
+ chains = []
1154
1155
  for source in sources:
1155
1156
  if source.startswith(DATASET_PREFIX):
1156
- dq = DatasetQuery(
1157
- name=source[len(DATASET_PREFIX) :],
1158
- catalog=self,
1159
- client_config=client_config,
1157
+ dc = DataChain.from_dataset(
1158
+ source[len(DATASET_PREFIX) :], session=session
1160
1159
  )
1161
1160
  else:
1162
- dq = DatasetQuery(
1163
- path=source,
1164
- catalog=self,
1165
- client_config=client_config,
1166
- recursive=recursive,
1161
+ dc = DataChain.from_storage(
1162
+ source, session=session, recursive=recursive
1167
1163
  )
1168
1164
 
1169
- dataset_queries.append(dq)
1165
+ chains.append(dc)
1170
1166
 
1171
1167
  # create union of all dataset queries created from sources
1172
- dq = reduce(lambda ds1, ds2: ds1.union(ds2), dataset_queries)
1168
+ dc = reduce(lambda dc1, dc2: dc1.union(dc2), chains)
1173
1169
  try:
1174
- dq.save(name)
1170
+ dc.save(name)
1175
1171
  except Exception as e: # noqa: BLE001
1176
1172
  try:
1177
1173
  ds = self.get_dataset(name)
@@ -1435,7 +1431,7 @@ class Catalog:
1435
1431
 
1436
1432
  def get_file_signals(
1437
1433
  self, dataset_name: str, dataset_version: int, row: RowDict
1438
- ) -> Optional[dict]:
1434
+ ) -> Optional[RowDict]:
1439
1435
  """
1440
1436
  Function that returns file signals from dataset row.
1441
1437
  Note that signal names are without prefix, so if there was 'laion__file__source'
@@ -1452,7 +1448,7 @@ class Catalog:
1452
1448
 
1453
1449
  version = self.get_dataset(dataset_name).get_version(dataset_version)
1454
1450
 
1455
- file_signals_values = {}
1451
+ file_signals_values = RowDict()
1456
1452
 
1457
1453
  schema = SignalSchema.deserialize(version.feature_schema)
1458
1454
  for file_signals in schema.get_signals(File):
@@ -1480,6 +1476,8 @@ class Catalog:
1480
1476
  use_cache: bool = True,
1481
1477
  **config: Any,
1482
1478
  ):
1479
+ from datachain.lib.file import File
1480
+
1483
1481
  file_signals = self.get_file_signals(dataset_name, dataset_version, row)
1484
1482
  if not file_signals:
1485
1483
  raise RuntimeError("Cannot open object without file signals")
@@ -1487,22 +1485,10 @@ class Catalog:
1487
1485
  config = config or self.client_config
1488
1486
  client = self.get_client(file_signals["source"], **config)
1489
1487
  return client.open_object(
1490
- self._get_row_uid(file_signals), # type: ignore [arg-type]
1488
+ File._from_row(file_signals),
1491
1489
  use_cache=use_cache,
1492
1490
  )
1493
1491
 
1494
- def _get_row_uid(self, row: RowDict) -> UniqueId:
1495
- return UniqueId(
1496
- row["source"],
1497
- row["path"],
1498
- row["size"],
1499
- row["etag"],
1500
- row["version"],
1501
- row["is_latest"],
1502
- row["location"],
1503
- row["last_modified"],
1504
- )
1505
-
1506
1492
  def ls(
1507
1493
  self,
1508
1494
  sources: list[str],
@@ -1731,26 +1717,6 @@ class Catalog:
1731
1717
  output, sources, client_config=client_config, recursive=recursive
1732
1718
  )
1733
1719
 
1734
- def apply_udf(
1735
- self,
1736
- udf_location: str,
1737
- source: str,
1738
- target_name: str,
1739
- parallel: Optional[int] = None,
1740
- params: Optional[str] = None,
1741
- ):
1742
- from datachain.query import DatasetQuery
1743
-
1744
- if source.startswith(DATASET_PREFIX):
1745
- ds = DatasetQuery(name=source[len(DATASET_PREFIX) :], catalog=self)
1746
- else:
1747
- ds = DatasetQuery(path=source, catalog=self)
1748
- udf = import_object(udf_location)
1749
- if params:
1750
- args, kwargs = parse_params_string(params)
1751
- udf = udf(*args, **kwargs)
1752
- ds.add_signals(udf, parallel=parallel).save(target_name)
1753
-
1754
1720
  def query(
1755
1721
  self,
1756
1722
  query_script: str,
@@ -15,6 +15,7 @@ import shtab
15
15
  from datachain import utils
16
16
  from datachain.cli_utils import BooleanOptionalAction, CommaSeparatedArgs, KeyValueArgs
17
17
  from datachain.lib.dc import DataChain
18
+ from datachain.telemetry import telemetry
18
19
  from datachain.utils import DataChainDir
19
20
 
20
21
  if TYPE_CHECKING:
@@ -494,27 +495,6 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
494
495
  help="Query parameters",
495
496
  )
496
497
 
497
- apply_udf_parser = subp.add_parser(
498
- "apply-udf", parents=[parent_parser], description="Apply UDF"
499
- )
500
- apply_udf_parser.add_argument("udf", type=str, help="UDF location")
501
- apply_udf_parser.add_argument("source", type=str, help="Source storage or dataset")
502
- apply_udf_parser.add_argument("target", type=str, help="Target dataset name")
503
- apply_udf_parser.add_argument(
504
- "--parallel",
505
- nargs="?",
506
- type=int,
507
- const=-1,
508
- default=None,
509
- metavar="N",
510
- help=(
511
- "Use multiprocessing to run the UDF with N worker processes. "
512
- "N defaults to the CPU count."
513
- ),
514
- )
515
- apply_udf_parser.add_argument(
516
- "--udf-params", type=str, default=None, help="UDF class parameters"
517
- )
518
498
  subp.add_parser(
519
499
  "clear-cache", parents=[parent_parser], description="Clear the local file cache"
520
500
  )
@@ -893,6 +873,7 @@ def main(argv: Optional[list[str]] = None) -> int: # noqa: C901, PLR0912, PLR09
893
873
  # This also sets this environment variable for any subprocesses
894
874
  os.environ["DEBUG_SHOW_SQL_QUERIES"] = "True"
895
875
 
876
+ error = None
896
877
  try:
897
878
  catalog = get_catalog(client_config=client_config)
898
879
  if args.command == "cp":
@@ -1016,10 +997,6 @@ def main(argv: Optional[list[str]] = None) -> int: # noqa: C901, PLR0912, PLR09
1016
997
  parallel=args.parallel,
1017
998
  params=args.param,
1018
999
  )
1019
- elif args.command == "apply-udf":
1020
- catalog.apply_udf(
1021
- args.udf, args.source, args.target, args.parallel, args.udf_params
1022
- )
1023
1000
  elif args.command == "clear-cache":
1024
1001
  clear_cache(catalog)
1025
1002
  elif args.command == "gc":
@@ -1028,14 +1005,16 @@ def main(argv: Optional[list[str]] = None) -> int: # noqa: C901, PLR0912, PLR09
1028
1005
  print(f"invalid command: {args.command}", file=sys.stderr)
1029
1006
  return 1
1030
1007
  return 0
1031
- except BrokenPipeError:
1008
+ except BrokenPipeError as exc:
1032
1009
  # Python flushes standard streams on exit; redirect remaining output
1033
1010
  # to devnull to avoid another BrokenPipeError at shutdown
1034
1011
  # See: https://docs.python.org/3/library/signal.html#note-on-sigpipe
1012
+ error = str(exc)
1035
1013
  devnull = os.open(os.devnull, os.O_WRONLY)
1036
1014
  os.dup2(devnull, sys.stdout.fileno())
1037
1015
  return 141 # 128 + 13 (SIGPIPE)
1038
1016
  except (KeyboardInterrupt, Exception) as exc:
1017
+ error = str(exc)
1039
1018
  if isinstance(exc, KeyboardInterrupt):
1040
1019
  msg = "Operation cancelled by the user"
1041
1020
  else:
@@ -1053,3 +1032,5 @@ def main(argv: Optional[list[str]] = None) -> int: # noqa: C901, PLR0912, PLR09
1053
1032
 
1054
1033
  pdb.post_mortem()
1055
1034
  return 1
1035
+ finally:
1036
+ telemetry.send_cli_call(args.command, error=error)