datachain 0.8.12__tar.gz → 0.8.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (308) hide show
  1. {datachain-0.8.12 → datachain-0.8.13}/PKG-INFO +1 -1
  2. {datachain-0.8.12 → datachain-0.8.13}/examples/get_started/common_sql_functions.py +16 -1
  3. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/func/__init__.py +2 -1
  4. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/func/array.py +39 -1
  5. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/sql/functions/array.py +13 -1
  6. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/sql/sqlite/base.py +17 -1
  7. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/sql/sqlite/types.py +5 -0
  8. {datachain-0.8.12 → datachain-0.8.13}/src/datachain.egg-info/PKG-INFO +1 -1
  9. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/sql/test_array.py +18 -0
  10. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/test_func.py +25 -0
  11. {datachain-0.8.12 → datachain-0.8.13}/.cruft.json +0 -0
  12. {datachain-0.8.12 → datachain-0.8.13}/.gitattributes +0 -0
  13. {datachain-0.8.12 → datachain-0.8.13}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  14. {datachain-0.8.12 → datachain-0.8.13}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  15. {datachain-0.8.12 → datachain-0.8.13}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  16. {datachain-0.8.12 → datachain-0.8.13}/.github/codecov.yaml +0 -0
  17. {datachain-0.8.12 → datachain-0.8.13}/.github/dependabot.yml +0 -0
  18. {datachain-0.8.12 → datachain-0.8.13}/.github/workflows/benchmarks.yml +0 -0
  19. {datachain-0.8.12 → datachain-0.8.13}/.github/workflows/release.yml +0 -0
  20. {datachain-0.8.12 → datachain-0.8.13}/.github/workflows/tests-studio.yml +0 -0
  21. {datachain-0.8.12 → datachain-0.8.13}/.github/workflows/tests.yml +0 -0
  22. {datachain-0.8.12 → datachain-0.8.13}/.github/workflows/update-template.yaml +0 -0
  23. {datachain-0.8.12 → datachain-0.8.13}/.gitignore +0 -0
  24. {datachain-0.8.12 → datachain-0.8.13}/.pre-commit-config.yaml +0 -0
  25. {datachain-0.8.12 → datachain-0.8.13}/CODE_OF_CONDUCT.rst +0 -0
  26. {datachain-0.8.12 → datachain-0.8.13}/LICENSE +0 -0
  27. {datachain-0.8.12 → datachain-0.8.13}/README.rst +0 -0
  28. {datachain-0.8.12 → datachain-0.8.13}/docs/assets/captioned_cartoons.png +0 -0
  29. {datachain-0.8.12 → datachain-0.8.13}/docs/assets/datachain-white.svg +0 -0
  30. {datachain-0.8.12 → datachain-0.8.13}/docs/assets/datachain.svg +0 -0
  31. {datachain-0.8.12 → datachain-0.8.13}/docs/contributing.md +0 -0
  32. {datachain-0.8.12 → datachain-0.8.13}/docs/css/github-permalink-style.css +0 -0
  33. {datachain-0.8.12 → datachain-0.8.13}/docs/examples.md +0 -0
  34. {datachain-0.8.12 → datachain-0.8.13}/docs/index.md +0 -0
  35. {datachain-0.8.12 → datachain-0.8.13}/docs/overrides/main.html +0 -0
  36. {datachain-0.8.12 → datachain-0.8.13}/docs/quick-start.md +0 -0
  37. {datachain-0.8.12 → datachain-0.8.13}/docs/references/datachain.md +0 -0
  38. {datachain-0.8.12 → datachain-0.8.13}/docs/references/datatype.md +0 -0
  39. {datachain-0.8.12 → datachain-0.8.13}/docs/references/file.md +0 -0
  40. {datachain-0.8.12 → datachain-0.8.13}/docs/references/func.md +0 -0
  41. {datachain-0.8.12 → datachain-0.8.13}/docs/references/index.md +0 -0
  42. {datachain-0.8.12 → datachain-0.8.13}/docs/references/torch.md +0 -0
  43. {datachain-0.8.12 → datachain-0.8.13}/docs/references/udf.md +0 -0
  44. {datachain-0.8.12 → datachain-0.8.13}/docs/tutorials.md +0 -0
  45. {datachain-0.8.12 → datachain-0.8.13}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  46. {datachain-0.8.12 → datachain-0.8.13}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  47. {datachain-0.8.12 → datachain-0.8.13}/examples/computer_vision/openimage-detect.py +0 -0
  48. {datachain-0.8.12 → datachain-0.8.13}/examples/computer_vision/ultralytics-bbox.py +0 -0
  49. {datachain-0.8.12 → datachain-0.8.13}/examples/computer_vision/ultralytics-pose.py +0 -0
  50. {datachain-0.8.12 → datachain-0.8.13}/examples/computer_vision/ultralytics-segment.py +0 -0
  51. {datachain-0.8.12 → datachain-0.8.13}/examples/get_started/json-csv-reader.py +0 -0
  52. {datachain-0.8.12 → datachain-0.8.13}/examples/get_started/torch-loader.py +0 -0
  53. {datachain-0.8.12 → datachain-0.8.13}/examples/get_started/udfs/parallel.py +0 -0
  54. {datachain-0.8.12 → datachain-0.8.13}/examples/get_started/udfs/simple.py +0 -0
  55. {datachain-0.8.12 → datachain-0.8.13}/examples/get_started/udfs/stateful.py +0 -0
  56. {datachain-0.8.12 → datachain-0.8.13}/examples/llm_and_nlp/claude-query.py +0 -0
  57. {datachain-0.8.12 → datachain-0.8.13}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  58. {datachain-0.8.12 → datachain-0.8.13}/examples/multimodal/clip_inference.py +0 -0
  59. {datachain-0.8.12 → datachain-0.8.13}/examples/multimodal/hf_pipeline.py +0 -0
  60. {datachain-0.8.12 → datachain-0.8.13}/examples/multimodal/openai_image_desc_lib.py +0 -0
  61. {datachain-0.8.12 → datachain-0.8.13}/examples/multimodal/wds.py +0 -0
  62. {datachain-0.8.12 → datachain-0.8.13}/examples/multimodal/wds_filtered.py +0 -0
  63. {datachain-0.8.12 → datachain-0.8.13}/mkdocs.yml +0 -0
  64. {datachain-0.8.12 → datachain-0.8.13}/noxfile.py +0 -0
  65. {datachain-0.8.12 → datachain-0.8.13}/pyproject.toml +0 -0
  66. {datachain-0.8.12 → datachain-0.8.13}/setup.cfg +0 -0
  67. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/__init__.py +0 -0
  68. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/__main__.py +0 -0
  69. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/asyn.py +0 -0
  70. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/cache.py +0 -0
  71. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/catalog/__init__.py +0 -0
  72. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/catalog/catalog.py +0 -0
  73. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/catalog/datasource.py +0 -0
  74. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/catalog/loader.py +0 -0
  75. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/cli/__init__.py +0 -0
  76. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/cli/commands/__init__.py +0 -0
  77. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/cli/commands/datasets.py +0 -0
  78. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/cli/commands/du.py +0 -0
  79. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/cli/commands/index.py +0 -0
  80. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/cli/commands/ls.py +0 -0
  81. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/cli/commands/misc.py +0 -0
  82. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/cli/commands/query.py +0 -0
  83. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/cli/commands/show.py +0 -0
  84. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/cli/parser/__init__.py +0 -0
  85. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/cli/parser/job.py +0 -0
  86. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/cli/parser/studio.py +0 -0
  87. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/cli/parser/utils.py +0 -0
  88. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/cli/utils.py +0 -0
  89. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/client/__init__.py +0 -0
  90. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/client/azure.py +0 -0
  91. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/client/fileslice.py +0 -0
  92. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/client/fsspec.py +0 -0
  93. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/client/gcs.py +0 -0
  94. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/client/hf.py +0 -0
  95. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/client/local.py +0 -0
  96. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/client/s3.py +0 -0
  97. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/config.py +0 -0
  98. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/data_storage/__init__.py +0 -0
  99. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/data_storage/db_engine.py +0 -0
  100. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/data_storage/job.py +0 -0
  101. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/data_storage/metastore.py +0 -0
  102. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/data_storage/schema.py +0 -0
  103. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/data_storage/serializer.py +0 -0
  104. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/data_storage/sqlite.py +0 -0
  105. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/data_storage/warehouse.py +0 -0
  106. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/dataset.py +0 -0
  107. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/diff/__init__.py +0 -0
  108. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/error.py +0 -0
  109. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/func/aggregate.py +0 -0
  110. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/func/base.py +0 -0
  111. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/func/conditional.py +0 -0
  112. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/func/func.py +0 -0
  113. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/func/numeric.py +0 -0
  114. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/func/path.py +0 -0
  115. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/func/random.py +0 -0
  116. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/func/string.py +0 -0
  117. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/func/window.py +0 -0
  118. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/job.py +0 -0
  119. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/__init__.py +0 -0
  120. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/arrow.py +0 -0
  121. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/clip.py +0 -0
  122. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/convert/__init__.py +0 -0
  123. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/convert/flatten.py +0 -0
  124. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/convert/python_to_sql.py +0 -0
  125. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/convert/sql_to_python.py +0 -0
  126. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/convert/unflatten.py +0 -0
  127. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  128. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/data_model.py +0 -0
  129. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/dataset_info.py +0 -0
  130. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/dc.py +0 -0
  131. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/file.py +0 -0
  132. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/hf.py +0 -0
  133. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/image.py +0 -0
  134. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/listing.py +0 -0
  135. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/listing_info.py +0 -0
  136. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/meta_formats.py +0 -0
  137. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/model_store.py +0 -0
  138. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/pytorch.py +0 -0
  139. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/settings.py +0 -0
  140. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/signal_schema.py +0 -0
  141. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/tar.py +0 -0
  142. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/text.py +0 -0
  143. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/udf.py +0 -0
  144. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/udf_signature.py +0 -0
  145. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/utils.py +0 -0
  146. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/vfile.py +0 -0
  147. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/webdataset.py +0 -0
  148. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/lib/webdataset_laion.py +0 -0
  149. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/listing.py +0 -0
  150. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/model/__init__.py +0 -0
  151. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/model/bbox.py +0 -0
  152. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/model/pose.py +0 -0
  153. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/model/segment.py +0 -0
  154. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/model/ultralytics/__init__.py +0 -0
  155. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/model/ultralytics/bbox.py +0 -0
  156. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/model/ultralytics/pose.py +0 -0
  157. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/model/ultralytics/segment.py +0 -0
  158. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/node.py +0 -0
  159. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/nodes_fetcher.py +0 -0
  160. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/nodes_thread_pool.py +0 -0
  161. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/progress.py +0 -0
  162. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/py.typed +0 -0
  163. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/query/__init__.py +0 -0
  164. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/query/batch.py +0 -0
  165. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/query/dataset.py +0 -0
  166. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/query/dispatch.py +0 -0
  167. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/query/metrics.py +0 -0
  168. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/query/params.py +0 -0
  169. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/query/queue.py +0 -0
  170. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/query/schema.py +0 -0
  171. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/query/session.py +0 -0
  172. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/query/udf.py +0 -0
  173. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/query/utils.py +0 -0
  174. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/remote/__init__.py +0 -0
  175. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/remote/studio.py +0 -0
  176. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/sql/__init__.py +0 -0
  177. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/sql/default/__init__.py +0 -0
  178. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/sql/default/base.py +0 -0
  179. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/sql/functions/__init__.py +0 -0
  180. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/sql/functions/aggregate.py +0 -0
  181. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/sql/functions/conditional.py +0 -0
  182. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/sql/functions/numeric.py +0 -0
  183. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/sql/functions/path.py +0 -0
  184. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/sql/functions/random.py +0 -0
  185. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/sql/functions/string.py +0 -0
  186. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/sql/selectable.py +0 -0
  187. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/sql/sqlite/__init__.py +0 -0
  188. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/sql/sqlite/vector.py +0 -0
  189. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/sql/types.py +0 -0
  190. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/sql/utils.py +0 -0
  191. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/studio.py +0 -0
  192. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/telemetry.py +0 -0
  193. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/toolkit/__init__.py +0 -0
  194. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/toolkit/split.py +0 -0
  195. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/torch/__init__.py +0 -0
  196. {datachain-0.8.12 → datachain-0.8.13}/src/datachain/utils.py +0 -0
  197. {datachain-0.8.12 → datachain-0.8.13}/src/datachain.egg-info/SOURCES.txt +0 -0
  198. {datachain-0.8.12 → datachain-0.8.13}/src/datachain.egg-info/dependency_links.txt +0 -0
  199. {datachain-0.8.12 → datachain-0.8.13}/src/datachain.egg-info/entry_points.txt +0 -0
  200. {datachain-0.8.12 → datachain-0.8.13}/src/datachain.egg-info/requires.txt +0 -0
  201. {datachain-0.8.12 → datachain-0.8.13}/src/datachain.egg-info/top_level.txt +0 -0
  202. {datachain-0.8.12 → datachain-0.8.13}/tests/__init__.py +0 -0
  203. {datachain-0.8.12 → datachain-0.8.13}/tests/benchmarks/__init__.py +0 -0
  204. {datachain-0.8.12 → datachain-0.8.13}/tests/benchmarks/conftest.py +0 -0
  205. {datachain-0.8.12 → datachain-0.8.13}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  206. {datachain-0.8.12 → datachain-0.8.13}/tests/benchmarks/datasets/.dvc/config +0 -0
  207. {datachain-0.8.12 → datachain-0.8.13}/tests/benchmarks/datasets/.gitignore +0 -0
  208. {datachain-0.8.12 → datachain-0.8.13}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  209. {datachain-0.8.12 → datachain-0.8.13}/tests/benchmarks/test_datachain.py +0 -0
  210. {datachain-0.8.12 → datachain-0.8.13}/tests/benchmarks/test_ls.py +0 -0
  211. {datachain-0.8.12 → datachain-0.8.13}/tests/benchmarks/test_version.py +0 -0
  212. {datachain-0.8.12 → datachain-0.8.13}/tests/conftest.py +0 -0
  213. {datachain-0.8.12 → datachain-0.8.13}/tests/data.py +0 -0
  214. {datachain-0.8.12 → datachain-0.8.13}/tests/examples/__init__.py +0 -0
  215. {datachain-0.8.12 → datachain-0.8.13}/tests/examples/test_examples.py +0 -0
  216. {datachain-0.8.12 → datachain-0.8.13}/tests/examples/test_wds_e2e.py +0 -0
  217. {datachain-0.8.12 → datachain-0.8.13}/tests/examples/wds_data.py +0 -0
  218. {datachain-0.8.12 → datachain-0.8.13}/tests/func/__init__.py +0 -0
  219. {datachain-0.8.12 → datachain-0.8.13}/tests/func/fake-service-account-credentials.json +0 -0
  220. {datachain-0.8.12 → datachain-0.8.13}/tests/func/test_catalog.py +0 -0
  221. {datachain-0.8.12 → datachain-0.8.13}/tests/func/test_client.py +0 -0
  222. {datachain-0.8.12 → datachain-0.8.13}/tests/func/test_data_storage.py +0 -0
  223. {datachain-0.8.12 → datachain-0.8.13}/tests/func/test_datachain.py +0 -0
  224. {datachain-0.8.12 → datachain-0.8.13}/tests/func/test_datachain_merge.py +0 -0
  225. {datachain-0.8.12 → datachain-0.8.13}/tests/func/test_dataset_query.py +0 -0
  226. {datachain-0.8.12 → datachain-0.8.13}/tests/func/test_datasets.py +0 -0
  227. {datachain-0.8.12 → datachain-0.8.13}/tests/func/test_feature_pickling.py +0 -0
  228. {datachain-0.8.12 → datachain-0.8.13}/tests/func/test_file.py +0 -0
  229. {datachain-0.8.12 → datachain-0.8.13}/tests/func/test_hf.py +0 -0
  230. {datachain-0.8.12 → datachain-0.8.13}/tests/func/test_listing.py +0 -0
  231. {datachain-0.8.12 → datachain-0.8.13}/tests/func/test_ls.py +0 -0
  232. {datachain-0.8.12 → datachain-0.8.13}/tests/func/test_meta_formats.py +0 -0
  233. {datachain-0.8.12 → datachain-0.8.13}/tests/func/test_metrics.py +0 -0
  234. {datachain-0.8.12 → datachain-0.8.13}/tests/func/test_pull.py +0 -0
  235. {datachain-0.8.12 → datachain-0.8.13}/tests/func/test_pytorch.py +0 -0
  236. {datachain-0.8.12 → datachain-0.8.13}/tests/func/test_query.py +0 -0
  237. {datachain-0.8.12 → datachain-0.8.13}/tests/func/test_session.py +0 -0
  238. {datachain-0.8.12 → datachain-0.8.13}/tests/func/test_toolkit.py +0 -0
  239. {datachain-0.8.12 → datachain-0.8.13}/tests/func/test_warehouse.py +0 -0
  240. {datachain-0.8.12 → datachain-0.8.13}/tests/scripts/feature_class.py +0 -0
  241. {datachain-0.8.12 → datachain-0.8.13}/tests/scripts/feature_class_exception.py +0 -0
  242. {datachain-0.8.12 → datachain-0.8.13}/tests/scripts/feature_class_parallel.py +0 -0
  243. {datachain-0.8.12 → datachain-0.8.13}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  244. {datachain-0.8.12 → datachain-0.8.13}/tests/scripts/name_len_slow.py +0 -0
  245. {datachain-0.8.12 → datachain-0.8.13}/tests/test_atomicity.py +0 -0
  246. {datachain-0.8.12 → datachain-0.8.13}/tests/test_cli_e2e.py +0 -0
  247. {datachain-0.8.12 → datachain-0.8.13}/tests/test_cli_studio.py +0 -0
  248. {datachain-0.8.12 → datachain-0.8.13}/tests/test_query_e2e.py +0 -0
  249. {datachain-0.8.12 → datachain-0.8.13}/tests/test_telemetry.py +0 -0
  250. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/__init__.py +0 -0
  251. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/lib/__init__.py +0 -0
  252. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/lib/conftest.py +0 -0
  253. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/lib/test_arrow.py +0 -0
  254. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/lib/test_clip.py +0 -0
  255. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/lib/test_datachain.py +0 -0
  256. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  257. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/lib/test_datachain_merge.py +0 -0
  258. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/lib/test_diff.py +0 -0
  259. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/lib/test_feature.py +0 -0
  260. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/lib/test_feature_utils.py +0 -0
  261. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/lib/test_file.py +0 -0
  262. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/lib/test_hf.py +0 -0
  263. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/lib/test_image.py +0 -0
  264. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/lib/test_listing_info.py +0 -0
  265. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/lib/test_models.py +0 -0
  266. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/lib/test_python_to_sql.py +0 -0
  267. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/lib/test_schema.py +0 -0
  268. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/lib/test_signal_schema.py +0 -0
  269. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/lib/test_sql_to_python.py +0 -0
  270. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/lib/test_text.py +0 -0
  271. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/lib/test_udf_signature.py +0 -0
  272. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/lib/test_utils.py +0 -0
  273. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/lib/test_webdataset.py +0 -0
  274. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/sql/__init__.py +0 -0
  275. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/sql/sqlite/__init__.py +0 -0
  276. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/sql/sqlite/test_types.py +0 -0
  277. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/sql/sqlite/test_utils.py +0 -0
  278. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/sql/test_conditional.py +0 -0
  279. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/sql/test_path.py +0 -0
  280. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/sql/test_random.py +0 -0
  281. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/sql/test_selectable.py +0 -0
  282. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/sql/test_string.py +0 -0
  283. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/test_asyn.py +0 -0
  284. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/test_cache.py +0 -0
  285. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/test_catalog.py +0 -0
  286. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/test_catalog_loader.py +0 -0
  287. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/test_cli_parsing.py +0 -0
  288. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/test_client.py +0 -0
  289. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/test_client_gcs.py +0 -0
  290. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/test_client_s3.py +0 -0
  291. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/test_config.py +0 -0
  292. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/test_data_storage.py +0 -0
  293. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/test_database_engine.py +0 -0
  294. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/test_dataset.py +0 -0
  295. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/test_dispatch.py +0 -0
  296. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/test_fileslice.py +0 -0
  297. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/test_listing.py +0 -0
  298. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/test_metastore.py +0 -0
  299. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/test_module_exports.py +0 -0
  300. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/test_pytorch.py +0 -0
  301. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/test_query.py +0 -0
  302. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/test_query_metrics.py +0 -0
  303. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/test_query_params.py +0 -0
  304. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/test_serializer.py +0 -0
  305. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/test_session.py +0 -0
  306. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/test_utils.py +0 -0
  307. {datachain-0.8.12 → datachain-0.8.13}/tests/unit/test_warehouse.py +0 -0
  308. {datachain-0.8.12 → datachain-0.8.13}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: datachain
3
- Version: 0.8.12
3
+ Version: 0.8.13
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -9,7 +9,7 @@ def num_chars_udf(file):
9
9
  return ([],)
10
10
 
11
11
 
12
- dc = DataChain.from_storage("gs://datachain-demo/dogs-and-cats/")
12
+ dc = DataChain.from_storage("gs://datachain-demo/dogs-and-cats/", anon=True)
13
13
  dc.map(num_chars_udf, params=["file"], output={"num_chars": list[str]}).select(
14
14
  "file.path", "num_chars"
15
15
  ).show(5)
@@ -32,6 +32,12 @@ dc.map(num_chars_udf, params=["file"], output={"num_chars": list[str]}).select(
32
32
  .show(5)
33
33
  )
34
34
 
35
+ parts = string.split(path.name(C("file.path")), ".")
36
+ chain = dc.mutate(
37
+ isdog=array.contains(parts, "dog"),
38
+ iscat=array.contains(parts, "cat"),
39
+ )
40
+ chain.select("file.path", "isdog", "iscat").show(5)
35
41
 
36
42
  chain = dc.mutate(
37
43
  a=array.length(string.split("file.path", "/")),
@@ -79,6 +85,15 @@ Processed: 400 rows [00:00, 16364.66 rows/s]
79
85
  3 dogs-and-cats/cat.10.json cat.10 json
80
86
  4 dogs-and-cats/cat.100.jpg cat.100 jpg
81
87
 
88
+ [Limited by 5 rows]
89
+ file isdog iscat
90
+ path
91
+ 0 dogs-and-cats/cat.1.jpg 0 1
92
+ 1 dogs-and-cats/cat.1.json 0 1
93
+ 2 dogs-and-cats/cat.10.jpg 0 1
94
+ 3 dogs-and-cats/cat.10.json 0 1
95
+ 4 dogs-and-cats/cat.100.jpg 0 1
96
+
82
97
  [Limited by 5 rows]
83
98
  Processed: 400 rows [00:00, 16496.93 rows/s]
84
99
  a b greatest least
@@ -15,7 +15,7 @@ from .aggregate import (
15
15
  row_number,
16
16
  sum,
17
17
  )
18
- from .array import cosine_distance, euclidean_distance, length, sip_hash_64
18
+ from .array import contains, cosine_distance, euclidean_distance, length, sip_hash_64
19
19
  from .conditional import case, greatest, ifelse, isnone, least
20
20
  from .numeric import bit_and, bit_hamming_distance, bit_or, bit_xor, int_hash_64
21
21
  from .random import rand
@@ -34,6 +34,7 @@ __all__ = [
34
34
  "case",
35
35
  "collect",
36
36
  "concat",
37
+ "contains",
37
38
  "cosine_distance",
38
39
  "count",
39
40
  "dense_rank",
@@ -1,5 +1,5 @@
1
1
  from collections.abc import Sequence
2
- from typing import Union
2
+ from typing import Any, Union
3
3
 
4
4
  from datachain.sql.functions import array
5
5
 
@@ -140,6 +140,44 @@ def length(arg: Union[str, Sequence, Func]) -> Func:
140
140
  return Func("length", inner=array.length, cols=cols, args=args, result_type=int)
141
141
 
142
142
 
143
+ def contains(arr: Union[str, Sequence, Func], elem: Any) -> Func:
144
+ """
145
+ Checks whether the `arr` array has the `elem` element.
146
+
147
+ Args:
148
+ arr (str | Sequence | Func): Array to check for the element.
149
+ If a string is provided, it is assumed to be the name of the array column.
150
+ If a sequence is provided, it is assumed to be an array of values.
151
+ If a Func is provided, it is assumed to be a function returning an array.
152
+ elem (Any): Element to check for in the array.
153
+
154
+ Returns:
155
+ Func: A Func object that represents the contains function. Result of the
156
+ function will be 1 if the element is present in the array, and 0 otherwise.
157
+
158
+ Example:
159
+ ```py
160
+ dc.mutate(
161
+ contains1=func.array.contains("signal.values", 3),
162
+ contains2=func.array.contains([1, 2, 3, 4, 5], 7),
163
+ )
164
+ ```
165
+ """
166
+
167
+ def inner(arg):
168
+ is_json = type(elem) in [list, dict]
169
+ return array.contains(arg, elem, is_json)
170
+
171
+ if isinstance(arr, (str, Func)):
172
+ cols = [arr]
173
+ args = None
174
+ else:
175
+ cols = None
176
+ args = [arr]
177
+
178
+ return Func("contains", inner=inner, cols=cols, args=args, result_type=int)
179
+
180
+
143
181
  def sip_hash_64(arg: Union[str, Sequence]) -> Func:
144
182
  """
145
183
  Computes the SipHash-64 hash of the array.
@@ -1,6 +1,6 @@
1
1
  from sqlalchemy.sql.functions import GenericFunction
2
2
 
3
- from datachain.sql.types import Float, Int64
3
+ from datachain.sql.types import Boolean, Float, Int64
4
4
  from datachain.sql.utils import compiler_not_implemented
5
5
 
6
6
 
@@ -37,6 +37,17 @@ class length(GenericFunction): # noqa: N801
37
37
  inherit_cache = True
38
38
 
39
39
 
40
+ class contains(GenericFunction): # noqa: N801
41
+ """
42
+ Checks if element is in the array.
43
+ """
44
+
45
+ type = Boolean()
46
+ package = "array"
47
+ name = "contains"
48
+ inherit_cache = True
49
+
50
+
40
51
  class sip_hash_64(GenericFunction): # noqa: N801
41
52
  """
42
53
  Computes the SipHash-64 hash of the array.
@@ -51,4 +62,5 @@ class sip_hash_64(GenericFunction): # noqa: N801
51
62
  compiler_not_implemented(cosine_distance)
52
63
  compiler_not_implemented(euclidean_distance)
53
64
  compiler_not_implemented(length)
65
+ compiler_not_implemented(contains)
54
66
  compiler_not_implemented(sip_hash_64)
@@ -87,6 +87,7 @@ def setup():
87
87
  compiles(sql_path.file_stem, "sqlite")(compile_path_file_stem)
88
88
  compiles(sql_path.file_ext, "sqlite")(compile_path_file_ext)
89
89
  compiles(array.length, "sqlite")(compile_array_length)
90
+ compiles(array.contains, "sqlite")(compile_array_contains)
90
91
  compiles(string.length, "sqlite")(compile_string_length)
91
92
  compiles(string.split, "sqlite")(compile_string_split)
92
93
  compiles(string.regexp_replace, "sqlite")(compile_string_regexp_replace)
@@ -269,13 +270,16 @@ def register_user_defined_sql_functions() -> None:
269
270
 
270
271
  _registered_function_creators["string_functions"] = create_string_functions
271
272
 
272
- has_json_extension = functions_exist(["json_array_length"])
273
+ has_json_extension = functions_exist(["json_array_length", "json_array_contains"])
273
274
  if not has_json_extension:
274
275
 
275
276
  def create_json_functions(conn):
276
277
  conn.create_function(
277
278
  "json_array_length", 1, py_json_array_length, deterministic=True
278
279
  )
280
+ conn.create_function(
281
+ "json_array_contains", 3, py_json_array_contains, deterministic=True
282
+ )
279
283
 
280
284
  _registered_function_creators["json_functions"] = create_json_functions
281
285
 
@@ -428,10 +432,22 @@ def py_json_array_length(arr):
428
432
  return len(orjson.loads(arr))
429
433
 
430
434
 
435
+ def py_json_array_contains(arr, value, is_json):
436
+ if is_json:
437
+ value = orjson.loads(value)
438
+ return value in orjson.loads(arr)
439
+
440
+
431
441
  def compile_array_length(element, compiler, **kwargs):
432
442
  return compiler.process(func.json_array_length(*element.clauses.clauses), **kwargs)
433
443
 
434
444
 
445
+ def compile_array_contains(element, compiler, **kwargs):
446
+ return compiler.process(
447
+ func.json_array_contains(*element.clauses.clauses), **kwargs
448
+ )
449
+
450
+
435
451
  def compile_string_length(element, compiler, **kwargs):
436
452
  return compiler.process(func.length(*element.clauses.clauses), **kwargs)
437
453
 
@@ -31,6 +31,10 @@ def adapt_array(arr):
31
31
  return orjson.dumps(arr).decode("utf-8")
32
32
 
33
33
 
34
+ def adapt_dict(dct):
35
+ return orjson.dumps(dct).decode("utf-8")
36
+
37
+
34
38
  def convert_array(arr):
35
39
  return orjson.loads(arr)
36
40
 
@@ -52,6 +56,7 @@ def adapt_np_generic(val):
52
56
 
53
57
  def register_type_converters():
54
58
  sqlite3.register_adapter(list, adapt_array)
59
+ sqlite3.register_adapter(dict, adapt_dict)
55
60
  sqlite3.register_converter("ARRAY", convert_array)
56
61
  if numpy_imported:
57
62
  sqlite3.register_adapter(np.ndarray, adapt_np_array)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: datachain
3
- Version: 0.8.12
3
+ Version: 0.8.13
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -65,6 +65,24 @@ def test_length(warehouse):
65
65
  assert result == ((4, 5, 2),)
66
66
 
67
67
 
68
+ def test_contains(warehouse):
69
+ query = select(
70
+ func.contains(["abc", "def", "g", "hi"], "abc").label("contains1"),
71
+ func.contains(["abc", "def", "g", "hi"], "cdf").label("contains2"),
72
+ func.contains([3.0, 5.0, 1.0, 6.0, 1.0], 1.0).label("contains3"),
73
+ func.contains([[1, None, 3], [4, 5, 6]], [1, None, 3]).label("contains4"),
74
+ # Not supported yet by CH, need to add it later + some Pydantic model as
75
+ # an input:
76
+ # func.contains(
77
+ # [{"c": 1, "a": True}, {"b": False}], {"a": True, "c": 1}
78
+ # ).label("contains5"),
79
+ func.contains([1, None, 3], None).label("contains6"),
80
+ func.contains([1, True, 3], True).label("contains7"),
81
+ )
82
+ result = tuple(warehouse.db.execute(query))
83
+ assert result == ((1, 0, 1, 1, 1, 1),)
84
+
85
+
68
86
  def test_length_on_split(warehouse):
69
87
  query = select(
70
88
  func.array.length(func.string.split(func.literal("abc/def/g/hi"), "/")),
@@ -11,6 +11,7 @@ from datachain.func import (
11
11
  isnone,
12
12
  literal,
13
13
  )
14
+ from datachain.func.array import contains
14
15
  from datachain.func.random import rand
15
16
  from datachain.func.string import length as strlen
16
17
  from datachain.lib.signal_schema import SignalSchema
@@ -797,3 +798,27 @@ def test_isnone_with_ifelse_mutate(col):
797
798
  res = dc.mutate(test=ifelse(isnone(col), "NONE", "NOT_NONE"))
798
799
  assert list(res.order_by("num").collect("test")) == ["NOT_NONE"] * 3 + ["NONE"] * 2
799
800
  assert res.schema["test"] is str
801
+
802
+
803
+ def test_array_contains():
804
+ dc = DataChain.from_values(
805
+ arr=[list(range(1, i)) * i for i in range(2, 7)],
806
+ val=list(range(2, 7)),
807
+ )
808
+
809
+ assert list(dc.mutate(res=contains("arr", 3)).order_by("val").collect("res")) == [
810
+ 0,
811
+ 0,
812
+ 1,
813
+ 1,
814
+ 1,
815
+ ]
816
+ assert list(
817
+ dc.mutate(res=contains(C("arr"), 3)).order_by("val").collect("res")
818
+ ) == [0, 0, 1, 1, 1]
819
+ assert list(
820
+ dc.mutate(res=contains(C("arr"), 10)).order_by("val").collect("res")
821
+ ) == [0, 0, 0, 0, 0]
822
+ assert list(
823
+ dc.mutate(res=contains(C("arr"), None)).order_by("val").collect("res")
824
+ ) == [0, 0, 0, 0, 0]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes