datachain 0.7.7__tar.gz → 0.7.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (281) hide show
  1. {datachain-0.7.7 → datachain-0.7.8}/.pre-commit-config.yaml +1 -1
  2. {datachain-0.7.7/src/datachain.egg-info → datachain-0.7.8}/PKG-INFO +1 -1
  3. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/func/__init__.py +5 -0
  4. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/func/func.py +190 -145
  5. datachain-0.7.8/src/datachain/func/numeric.py +162 -0
  6. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/sql/functions/array.py +4 -0
  7. datachain-0.7.8/src/datachain/sql/functions/numeric.py +43 -0
  8. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/sql/sqlite/base.py +68 -1
  9. {datachain-0.7.7 → datachain-0.7.8/src/datachain.egg-info}/PKG-INFO +1 -1
  10. {datachain-0.7.7 → datachain-0.7.8}/src/datachain.egg-info/SOURCES.txt +2 -0
  11. datachain-0.7.8/tests/unit/test_func.py +586 -0
  12. datachain-0.7.7/tests/unit/test_func.py +0 -256
  13. {datachain-0.7.7 → datachain-0.7.8}/.cruft.json +0 -0
  14. {datachain-0.7.7 → datachain-0.7.8}/.gitattributes +0 -0
  15. {datachain-0.7.7 → datachain-0.7.8}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  16. {datachain-0.7.7 → datachain-0.7.8}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
  17. {datachain-0.7.7 → datachain-0.7.8}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  18. {datachain-0.7.7 → datachain-0.7.8}/.github/codecov.yaml +0 -0
  19. {datachain-0.7.7 → datachain-0.7.8}/.github/dependabot.yml +0 -0
  20. {datachain-0.7.7 → datachain-0.7.8}/.github/workflows/benchmarks.yml +0 -0
  21. {datachain-0.7.7 → datachain-0.7.8}/.github/workflows/release.yml +0 -0
  22. {datachain-0.7.7 → datachain-0.7.8}/.github/workflows/tests-studio.yml +0 -0
  23. {datachain-0.7.7 → datachain-0.7.8}/.github/workflows/tests.yml +0 -0
  24. {datachain-0.7.7 → datachain-0.7.8}/.github/workflows/update-template.yaml +0 -0
  25. {datachain-0.7.7 → datachain-0.7.8}/.gitignore +0 -0
  26. {datachain-0.7.7 → datachain-0.7.8}/CODE_OF_CONDUCT.rst +0 -0
  27. {datachain-0.7.7 → datachain-0.7.8}/CONTRIBUTING.rst +0 -0
  28. {datachain-0.7.7 → datachain-0.7.8}/LICENSE +0 -0
  29. {datachain-0.7.7 → datachain-0.7.8}/README.rst +0 -0
  30. {datachain-0.7.7 → datachain-0.7.8}/docs/assets/captioned_cartoons.png +0 -0
  31. {datachain-0.7.7 → datachain-0.7.8}/docs/assets/datachain-white.svg +0 -0
  32. {datachain-0.7.7 → datachain-0.7.8}/docs/assets/datachain.svg +0 -0
  33. {datachain-0.7.7 → datachain-0.7.8}/docs/index.md +0 -0
  34. {datachain-0.7.7 → datachain-0.7.8}/docs/overrides/main.html +0 -0
  35. {datachain-0.7.7 → datachain-0.7.8}/docs/references/datachain.md +0 -0
  36. {datachain-0.7.7 → datachain-0.7.8}/docs/references/datatype.md +0 -0
  37. {datachain-0.7.7 → datachain-0.7.8}/docs/references/file.md +0 -0
  38. {datachain-0.7.7 → datachain-0.7.8}/docs/references/index.md +0 -0
  39. {datachain-0.7.7 → datachain-0.7.8}/docs/references/sql.md +0 -0
  40. {datachain-0.7.7 → datachain-0.7.8}/docs/references/torch.md +0 -0
  41. {datachain-0.7.7 → datachain-0.7.8}/docs/references/udf.md +0 -0
  42. {datachain-0.7.7 → datachain-0.7.8}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
  43. {datachain-0.7.7 → datachain-0.7.8}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
  44. {datachain-0.7.7 → datachain-0.7.8}/examples/computer_vision/openimage-detect.py +0 -0
  45. {datachain-0.7.7 → datachain-0.7.8}/examples/computer_vision/ultralytics-bbox.py +0 -0
  46. {datachain-0.7.7 → datachain-0.7.8}/examples/computer_vision/ultralytics-pose.py +0 -0
  47. {datachain-0.7.7 → datachain-0.7.8}/examples/computer_vision/ultralytics-segment.py +0 -0
  48. {datachain-0.7.7 → datachain-0.7.8}/examples/get_started/common_sql_functions.py +0 -0
  49. {datachain-0.7.7 → datachain-0.7.8}/examples/get_started/json-csv-reader.py +0 -0
  50. {datachain-0.7.7 → datachain-0.7.8}/examples/get_started/torch-loader.py +0 -0
  51. {datachain-0.7.7 → datachain-0.7.8}/examples/get_started/udfs/parallel.py +0 -0
  52. {datachain-0.7.7 → datachain-0.7.8}/examples/get_started/udfs/simple.py +0 -0
  53. {datachain-0.7.7 → datachain-0.7.8}/examples/get_started/udfs/stateful.py +0 -0
  54. {datachain-0.7.7 → datachain-0.7.8}/examples/llm_and_nlp/claude-query.py +0 -0
  55. {datachain-0.7.7 → datachain-0.7.8}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
  56. {datachain-0.7.7 → datachain-0.7.8}/examples/llm_and_nlp/unstructured-embeddings-gen.py +0 -0
  57. {datachain-0.7.7 → datachain-0.7.8}/examples/llm_and_nlp/unstructured-summary-map.py +0 -0
  58. {datachain-0.7.7 → datachain-0.7.8}/examples/multimodal/clip_inference.py +0 -0
  59. {datachain-0.7.7 → datachain-0.7.8}/examples/multimodal/hf_pipeline.py +0 -0
  60. {datachain-0.7.7 → datachain-0.7.8}/examples/multimodal/openai_image_desc_lib.py +0 -0
  61. {datachain-0.7.7 → datachain-0.7.8}/examples/multimodal/wds.py +0 -0
  62. {datachain-0.7.7 → datachain-0.7.8}/examples/multimodal/wds_filtered.py +0 -0
  63. {datachain-0.7.7 → datachain-0.7.8}/mkdocs.yml +0 -0
  64. {datachain-0.7.7 → datachain-0.7.8}/noxfile.py +0 -0
  65. {datachain-0.7.7 → datachain-0.7.8}/pyproject.toml +0 -0
  66. {datachain-0.7.7 → datachain-0.7.8}/setup.cfg +0 -0
  67. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/__init__.py +0 -0
  68. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/__main__.py +0 -0
  69. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/asyn.py +0 -0
  70. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/cache.py +0 -0
  71. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/catalog/__init__.py +0 -0
  72. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/catalog/catalog.py +0 -0
  73. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/catalog/datasource.py +0 -0
  74. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/catalog/loader.py +0 -0
  75. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/cli.py +0 -0
  76. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/cli_utils.py +0 -0
  77. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/client/__init__.py +0 -0
  78. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/client/azure.py +0 -0
  79. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/client/fileslice.py +0 -0
  80. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/client/fsspec.py +0 -0
  81. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/client/gcs.py +0 -0
  82. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/client/hf.py +0 -0
  83. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/client/local.py +0 -0
  84. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/client/s3.py +0 -0
  85. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/config.py +0 -0
  86. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/data_storage/__init__.py +0 -0
  87. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/data_storage/db_engine.py +0 -0
  88. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/data_storage/job.py +0 -0
  89. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/data_storage/metastore.py +0 -0
  90. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/data_storage/schema.py +0 -0
  91. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/data_storage/serializer.py +0 -0
  92. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/data_storage/sqlite.py +0 -0
  93. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/data_storage/warehouse.py +0 -0
  94. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/dataset.py +0 -0
  95. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/error.py +0 -0
  96. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/func/aggregate.py +0 -0
  97. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/func/array.py +0 -0
  98. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/func/base.py +0 -0
  99. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/func/conditional.py +0 -0
  100. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/func/path.py +0 -0
  101. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/func/random.py +0 -0
  102. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/func/string.py +0 -0
  103. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/func/window.py +0 -0
  104. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/job.py +0 -0
  105. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/__init__.py +0 -0
  106. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/arrow.py +0 -0
  107. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/clip.py +0 -0
  108. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/convert/__init__.py +0 -0
  109. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/convert/flatten.py +0 -0
  110. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/convert/python_to_sql.py +0 -0
  111. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/convert/sql_to_python.py +0 -0
  112. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/convert/unflatten.py +0 -0
  113. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/convert/values_to_tuples.py +0 -0
  114. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/data_model.py +0 -0
  115. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/dataset_info.py +0 -0
  116. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/dc.py +0 -0
  117. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/file.py +0 -0
  118. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/hf.py +0 -0
  119. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/image.py +0 -0
  120. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/listing.py +0 -0
  121. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/listing_info.py +0 -0
  122. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/meta_formats.py +0 -0
  123. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/model_store.py +0 -0
  124. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/pytorch.py +0 -0
  125. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/settings.py +0 -0
  126. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/signal_schema.py +0 -0
  127. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/tar.py +0 -0
  128. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/text.py +0 -0
  129. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/udf.py +0 -0
  130. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/udf_signature.py +0 -0
  131. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/utils.py +0 -0
  132. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/vfile.py +0 -0
  133. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/webdataset.py +0 -0
  134. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/lib/webdataset_laion.py +0 -0
  135. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/listing.py +0 -0
  136. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/model/__init__.py +0 -0
  137. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/model/bbox.py +0 -0
  138. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/model/pose.py +0 -0
  139. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/model/segment.py +0 -0
  140. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/model/ultralytics/__init__.py +0 -0
  141. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/model/ultralytics/bbox.py +0 -0
  142. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/model/ultralytics/pose.py +0 -0
  143. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/model/ultralytics/segment.py +0 -0
  144. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/node.py +0 -0
  145. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/nodes_fetcher.py +0 -0
  146. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/nodes_thread_pool.py +0 -0
  147. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/progress.py +0 -0
  148. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/py.typed +0 -0
  149. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/query/__init__.py +0 -0
  150. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/query/batch.py +0 -0
  151. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/query/dataset.py +0 -0
  152. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/query/dispatch.py +0 -0
  153. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/query/metrics.py +0 -0
  154. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/query/params.py +0 -0
  155. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/query/queue.py +0 -0
  156. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/query/schema.py +0 -0
  157. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/query/session.py +0 -0
  158. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/remote/__init__.py +0 -0
  159. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/remote/studio.py +0 -0
  160. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/sql/__init__.py +0 -0
  161. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/sql/default/__init__.py +0 -0
  162. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/sql/default/base.py +0 -0
  163. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/sql/functions/__init__.py +0 -0
  164. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/sql/functions/aggregate.py +0 -0
  165. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/sql/functions/conditional.py +0 -0
  166. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/sql/functions/path.py +0 -0
  167. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/sql/functions/random.py +0 -0
  168. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/sql/functions/string.py +0 -0
  169. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/sql/selectable.py +0 -0
  170. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/sql/sqlite/__init__.py +0 -0
  171. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/sql/sqlite/types.py +0 -0
  172. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/sql/sqlite/vector.py +0 -0
  173. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/sql/types.py +0 -0
  174. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/sql/utils.py +0 -0
  175. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/studio.py +0 -0
  176. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/telemetry.py +0 -0
  177. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/toolkit/__init__.py +0 -0
  178. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/toolkit/split.py +0 -0
  179. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/torch/__init__.py +0 -0
  180. {datachain-0.7.7 → datachain-0.7.8}/src/datachain/utils.py +0 -0
  181. {datachain-0.7.7 → datachain-0.7.8}/src/datachain.egg-info/dependency_links.txt +0 -0
  182. {datachain-0.7.7 → datachain-0.7.8}/src/datachain.egg-info/entry_points.txt +0 -0
  183. {datachain-0.7.7 → datachain-0.7.8}/src/datachain.egg-info/requires.txt +0 -0
  184. {datachain-0.7.7 → datachain-0.7.8}/src/datachain.egg-info/top_level.txt +0 -0
  185. {datachain-0.7.7 → datachain-0.7.8}/tests/__init__.py +0 -0
  186. {datachain-0.7.7 → datachain-0.7.8}/tests/benchmarks/__init__.py +0 -0
  187. {datachain-0.7.7 → datachain-0.7.8}/tests/benchmarks/conftest.py +0 -0
  188. {datachain-0.7.7 → datachain-0.7.8}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
  189. {datachain-0.7.7 → datachain-0.7.8}/tests/benchmarks/datasets/.dvc/config +0 -0
  190. {datachain-0.7.7 → datachain-0.7.8}/tests/benchmarks/datasets/.gitignore +0 -0
  191. {datachain-0.7.7 → datachain-0.7.8}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
  192. {datachain-0.7.7 → datachain-0.7.8}/tests/benchmarks/test_datachain.py +0 -0
  193. {datachain-0.7.7 → datachain-0.7.8}/tests/benchmarks/test_ls.py +0 -0
  194. {datachain-0.7.7 → datachain-0.7.8}/tests/benchmarks/test_version.py +0 -0
  195. {datachain-0.7.7 → datachain-0.7.8}/tests/conftest.py +0 -0
  196. {datachain-0.7.7 → datachain-0.7.8}/tests/data.py +0 -0
  197. {datachain-0.7.7 → datachain-0.7.8}/tests/examples/__init__.py +0 -0
  198. {datachain-0.7.7 → datachain-0.7.8}/tests/examples/test_examples.py +0 -0
  199. {datachain-0.7.7 → datachain-0.7.8}/tests/examples/test_wds_e2e.py +0 -0
  200. {datachain-0.7.7 → datachain-0.7.8}/tests/examples/wds_data.py +0 -0
  201. {datachain-0.7.7 → datachain-0.7.8}/tests/func/__init__.py +0 -0
  202. {datachain-0.7.7 → datachain-0.7.8}/tests/func/test_catalog.py +0 -0
  203. {datachain-0.7.7 → datachain-0.7.8}/tests/func/test_client.py +0 -0
  204. {datachain-0.7.7 → datachain-0.7.8}/tests/func/test_datachain.py +0 -0
  205. {datachain-0.7.7 → datachain-0.7.8}/tests/func/test_dataset_query.py +0 -0
  206. {datachain-0.7.7 → datachain-0.7.8}/tests/func/test_datasets.py +0 -0
  207. {datachain-0.7.7 → datachain-0.7.8}/tests/func/test_feature_pickling.py +0 -0
  208. {datachain-0.7.7 → datachain-0.7.8}/tests/func/test_listing.py +0 -0
  209. {datachain-0.7.7 → datachain-0.7.8}/tests/func/test_ls.py +0 -0
  210. {datachain-0.7.7 → datachain-0.7.8}/tests/func/test_meta_formats.py +0 -0
  211. {datachain-0.7.7 → datachain-0.7.8}/tests/func/test_metrics.py +0 -0
  212. {datachain-0.7.7 → datachain-0.7.8}/tests/func/test_pull.py +0 -0
  213. {datachain-0.7.7 → datachain-0.7.8}/tests/func/test_pytorch.py +0 -0
  214. {datachain-0.7.7 → datachain-0.7.8}/tests/func/test_query.py +0 -0
  215. {datachain-0.7.7 → datachain-0.7.8}/tests/func/test_toolkit.py +0 -0
  216. {datachain-0.7.7 → datachain-0.7.8}/tests/scripts/feature_class.py +0 -0
  217. {datachain-0.7.7 → datachain-0.7.8}/tests/scripts/feature_class_exception.py +0 -0
  218. {datachain-0.7.7 → datachain-0.7.8}/tests/scripts/feature_class_parallel.py +0 -0
  219. {datachain-0.7.7 → datachain-0.7.8}/tests/scripts/feature_class_parallel_data_model.py +0 -0
  220. {datachain-0.7.7 → datachain-0.7.8}/tests/scripts/name_len_slow.py +0 -0
  221. {datachain-0.7.7 → datachain-0.7.8}/tests/test_atomicity.py +0 -0
  222. {datachain-0.7.7 → datachain-0.7.8}/tests/test_cli_e2e.py +0 -0
  223. {datachain-0.7.7 → datachain-0.7.8}/tests/test_cli_studio.py +0 -0
  224. {datachain-0.7.7 → datachain-0.7.8}/tests/test_query_e2e.py +0 -0
  225. {datachain-0.7.7 → datachain-0.7.8}/tests/test_telemetry.py +0 -0
  226. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/__init__.py +0 -0
  227. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/lib/__init__.py +0 -0
  228. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/lib/conftest.py +0 -0
  229. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/lib/test_arrow.py +0 -0
  230. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/lib/test_clip.py +0 -0
  231. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/lib/test_datachain.py +0 -0
  232. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
  233. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/lib/test_datachain_merge.py +0 -0
  234. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/lib/test_feature.py +0 -0
  235. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/lib/test_feature_utils.py +0 -0
  236. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/lib/test_file.py +0 -0
  237. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/lib/test_hf.py +0 -0
  238. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/lib/test_image.py +0 -0
  239. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/lib/test_listing_info.py +0 -0
  240. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/lib/test_models.py +0 -0
  241. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/lib/test_schema.py +0 -0
  242. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/lib/test_signal_schema.py +0 -0
  243. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/lib/test_sql_to_python.py +0 -0
  244. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/lib/test_text.py +0 -0
  245. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/lib/test_udf_signature.py +0 -0
  246. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/lib/test_utils.py +0 -0
  247. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/lib/test_webdataset.py +0 -0
  248. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/sql/__init__.py +0 -0
  249. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/sql/sqlite/__init__.py +0 -0
  250. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/sql/sqlite/test_types.py +0 -0
  251. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/sql/sqlite/test_utils.py +0 -0
  252. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/sql/test_array.py +0 -0
  253. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/sql/test_conditional.py +0 -0
  254. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/sql/test_path.py +0 -0
  255. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/sql/test_random.py +0 -0
  256. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/sql/test_selectable.py +0 -0
  257. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/sql/test_string.py +0 -0
  258. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/test_asyn.py +0 -0
  259. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/test_cache.py +0 -0
  260. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/test_catalog.py +0 -0
  261. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/test_catalog_loader.py +0 -0
  262. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/test_cli_parsing.py +0 -0
  263. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/test_client.py +0 -0
  264. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/test_client_s3.py +0 -0
  265. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/test_config.py +0 -0
  266. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/test_data_storage.py +0 -0
  267. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/test_database_engine.py +0 -0
  268. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/test_dataset.py +0 -0
  269. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/test_dispatch.py +0 -0
  270. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/test_fileslice.py +0 -0
  271. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/test_listing.py +0 -0
  272. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/test_metastore.py +0 -0
  273. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/test_module_exports.py +0 -0
  274. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/test_query.py +0 -0
  275. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/test_query_metrics.py +0 -0
  276. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/test_query_params.py +0 -0
  277. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/test_serializer.py +0 -0
  278. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/test_session.py +0 -0
  279. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/test_utils.py +0 -0
  280. {datachain-0.7.7 → datachain-0.7.8}/tests/unit/test_warehouse.py +0 -0
  281. {datachain-0.7.7 → datachain-0.7.8}/tests/utils.py +0 -0
@@ -24,7 +24,7 @@ repos:
24
24
  - id: trailing-whitespace
25
25
  exclude: '^LICENSES/'
26
26
  - repo: https://github.com/astral-sh/ruff-pre-commit
27
- rev: 'v0.8.0'
27
+ rev: 'v0.8.1'
28
28
  hooks:
29
29
  - id: ruff
30
30
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.7.7
3
+ Version: 0.7.8
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -17,6 +17,7 @@ from .aggregate import (
17
17
  )
18
18
  from .array import cosine_distance, euclidean_distance, length, sip_hash_64
19
19
  from .conditional import greatest, least
20
+ from .numeric import bit_and, bit_or, bit_xor, int_hash_64
20
21
  from .random import rand
21
22
  from .window import window
22
23
 
@@ -24,6 +25,9 @@ __all__ = [
24
25
  "any_value",
25
26
  "array",
26
27
  "avg",
28
+ "bit_and",
29
+ "bit_or",
30
+ "bit_xor",
27
31
  "case",
28
32
  "collect",
29
33
  "concat",
@@ -33,6 +37,7 @@ __all__ = [
33
37
  "euclidean_distance",
34
38
  "first",
35
39
  "greatest",
40
+ "int_hash_64",
36
41
  "least",
37
42
  "length",
38
43
  "literal",
@@ -2,13 +2,15 @@ import inspect
2
2
  from collections.abc import Sequence
3
3
  from typing import TYPE_CHECKING, Any, Callable, Optional, Union
4
4
 
5
- from sqlalchemy import BindParameter, Case, ColumnElement, desc
5
+ from sqlalchemy import BindParameter, Case, ColumnElement, Integer, cast, desc
6
6
  from sqlalchemy.ext.hybrid import Comparator
7
+ from sqlalchemy.sql import func as sa_func
7
8
 
8
9
  from datachain.lib.convert.python_to_sql import python_to_sql
9
10
  from datachain.lib.convert.sql_to_python import sql_to_python
10
11
  from datachain.lib.utils import DataChainColumnError, DataChainParamsError
11
12
  from datachain.query.schema import Column, ColumnMeta
13
+ from datachain.sql.functions import numeric
12
14
 
13
15
  from .base import Function
14
16
 
@@ -98,94 +100,232 @@ class Func(Function):
98
100
  return list[col_type] if self.is_array else col_type # type: ignore[valid-type]
99
101
 
100
102
  def __add__(self, other: Union[ColT, float]) -> "Func":
101
- return math_add(self, other)
103
+ if isinstance(other, (int, float)):
104
+ return Func("add", lambda a: a + other, [self])
105
+ return Func("add", lambda a1, a2: a1 + a2, [self, other])
102
106
 
103
107
  def __radd__(self, other: Union[ColT, float]) -> "Func":
104
- return math_add(other, self)
108
+ if isinstance(other, (int, float)):
109
+ return Func("add", lambda a: other + a, [self])
110
+ return Func("add", lambda a1, a2: a1 + a2, [other, self])
105
111
 
106
112
  def __sub__(self, other: Union[ColT, float]) -> "Func":
107
- return math_sub(self, other)
113
+ if isinstance(other, (int, float)):
114
+ return Func("sub", lambda a: a - other, [self])
115
+ return Func("sub", lambda a1, a2: a1 - a2, [self, other])
108
116
 
109
117
  def __rsub__(self, other: Union[ColT, float]) -> "Func":
110
- return math_sub(other, self)
118
+ if isinstance(other, (int, float)):
119
+ return Func("sub", lambda a: other - a, [self])
120
+ return Func("sub", lambda a1, a2: a1 - a2, [other, self])
111
121
 
112
122
  def __mul__(self, other: Union[ColT, float]) -> "Func":
113
- return math_mul(self, other)
123
+ if isinstance(other, (int, float)):
124
+ return Func("mul", lambda a: a * other, [self])
125
+ return Func("mul", lambda a1, a2: a1 * a2, [self, other])
114
126
 
115
127
  def __rmul__(self, other: Union[ColT, float]) -> "Func":
116
- return math_mul(other, self)
128
+ if isinstance(other, (int, float)):
129
+ return Func("mul", lambda a: other * a, [self])
130
+ return Func("mul", lambda a1, a2: a1 * a2, [other, self])
117
131
 
118
132
  def __truediv__(self, other: Union[ColT, float]) -> "Func":
119
- return math_truediv(self, other)
133
+ if isinstance(other, (int, float)):
134
+ return Func("div", lambda a: _truediv(a, other), [self], result_type=float)
135
+ return Func(
136
+ "div", lambda a1, a2: _truediv(a1, a2), [self, other], result_type=float
137
+ )
120
138
 
121
139
  def __rtruediv__(self, other: Union[ColT, float]) -> "Func":
122
- return math_truediv(other, self)
140
+ if isinstance(other, (int, float)):
141
+ return Func("div", lambda a: _truediv(other, a), [self], result_type=float)
142
+ return Func(
143
+ "div", lambda a1, a2: _truediv(a1, a2), [other, self], result_type=float
144
+ )
123
145
 
124
146
  def __floordiv__(self, other: Union[ColT, float]) -> "Func":
125
- return math_floordiv(self, other)
147
+ if isinstance(other, (int, float)):
148
+ return Func(
149
+ "floordiv", lambda a: _floordiv(a, other), [self], result_type=int
150
+ )
151
+ return Func(
152
+ "floordiv", lambda a1, a2: _floordiv(a1, a2), [self, other], result_type=int
153
+ )
126
154
 
127
155
  def __rfloordiv__(self, other: Union[ColT, float]) -> "Func":
128
- return math_floordiv(other, self)
156
+ if isinstance(other, (int, float)):
157
+ return Func(
158
+ "floordiv", lambda a: _floordiv(other, a), [self], result_type=int
159
+ )
160
+ return Func(
161
+ "floordiv", lambda a1, a2: _floordiv(a1, a2), [other, self], result_type=int
162
+ )
129
163
 
130
164
  def __mod__(self, other: Union[ColT, float]) -> "Func":
131
- return math_mod(self, other)
165
+ if isinstance(other, (int, float)):
166
+ return Func("mod", lambda a: a % other, [self], result_type=int)
167
+ return Func("mod", lambda a1, a2: a1 % a2, [self, other], result_type=int)
132
168
 
133
169
  def __rmod__(self, other: Union[ColT, float]) -> "Func":
134
- return math_mod(other, self)
135
-
136
- def __pow__(self, other: Union[ColT, float]) -> "Func":
137
- return math_pow(self, other)
138
-
139
- def __rpow__(self, other: Union[ColT, float]) -> "Func":
140
- return math_pow(other, self)
141
-
142
- def __lshift__(self, other: Union[ColT, float]) -> "Func":
143
- return math_lshift(self, other)
144
-
145
- def __rlshift__(self, other: Union[ColT, float]) -> "Func":
146
- return math_lshift(other, self)
147
-
148
- def __rshift__(self, other: Union[ColT, float]) -> "Func":
149
- return math_rshift(self, other)
150
-
151
- def __rrshift__(self, other: Union[ColT, float]) -> "Func":
152
- return math_rshift(other, self)
170
+ if isinstance(other, (int, float)):
171
+ return Func("mod", lambda a: other % a, [self], result_type=int)
172
+ return Func("mod", lambda a1, a2: a1 % a2, [other, self], result_type=int)
153
173
 
154
174
  def __and__(self, other: Union[ColT, float]) -> "Func":
155
- return math_and(self, other)
175
+ if isinstance(other, (int, float)):
176
+ return Func(
177
+ "and", lambda a: numeric.bit_and(a, other), [self], result_type=int
178
+ )
179
+ return Func(
180
+ "and",
181
+ lambda a1, a2: numeric.bit_and(a1, a2),
182
+ [self, other],
183
+ result_type=int,
184
+ )
156
185
 
157
186
  def __rand__(self, other: Union[ColT, float]) -> "Func":
158
- return math_and(other, self)
187
+ if isinstance(other, (int, float)):
188
+ return Func(
189
+ "and", lambda a: numeric.bit_and(other, a), [self], result_type=int
190
+ )
191
+ return Func(
192
+ "and",
193
+ lambda a1, a2: numeric.bit_and(a1, a2),
194
+ [other, self],
195
+ result_type=int,
196
+ )
159
197
 
160
198
  def __or__(self, other: Union[ColT, float]) -> "Func":
161
- return math_or(self, other)
199
+ if isinstance(other, (int, float)):
200
+ return Func(
201
+ "or", lambda a: numeric.bit_or(a, other), [self], result_type=int
202
+ )
203
+ return Func(
204
+ "or", lambda a1, a2: numeric.bit_or(a1, a2), [self, other], result_type=int
205
+ )
162
206
 
163
207
  def __ror__(self, other: Union[ColT, float]) -> "Func":
164
- return math_or(other, self)
208
+ if isinstance(other, (int, float)):
209
+ return Func(
210
+ "or", lambda a: numeric.bit_or(other, a), [self], result_type=int
211
+ )
212
+ return Func(
213
+ "or", lambda a1, a2: numeric.bit_or(a1, a2), [other, self], result_type=int
214
+ )
165
215
 
166
216
  def __xor__(self, other: Union[ColT, float]) -> "Func":
167
- return math_xor(self, other)
217
+ if isinstance(other, (int, float)):
218
+ return Func(
219
+ "xor", lambda a: numeric.bit_xor(a, other), [self], result_type=int
220
+ )
221
+ return Func(
222
+ "xor",
223
+ lambda a1, a2: numeric.bit_xor(a1, a2),
224
+ [self, other],
225
+ result_type=int,
226
+ )
168
227
 
169
228
  def __rxor__(self, other: Union[ColT, float]) -> "Func":
170
- return math_xor(other, self)
229
+ if isinstance(other, (int, float)):
230
+ return Func(
231
+ "xor", lambda a: numeric.bit_xor(other, a), [self], result_type=int
232
+ )
233
+ return Func(
234
+ "xor",
235
+ lambda a1, a2: numeric.bit_xor(a1, a2),
236
+ [other, self],
237
+ result_type=int,
238
+ )
239
+
240
+ def __rshift__(self, other: Union[ColT, float]) -> "Func":
241
+ if isinstance(other, (int, float)):
242
+ return Func(
243
+ "rshift",
244
+ lambda a: numeric.bit_rshift(a, other),
245
+ [self],
246
+ result_type=int,
247
+ )
248
+ return Func(
249
+ "rshift",
250
+ lambda a1, a2: numeric.bit_rshift(a1, a2),
251
+ [self, other],
252
+ result_type=int,
253
+ )
254
+
255
+ def __rrshift__(self, other: Union[ColT, float]) -> "Func":
256
+ if isinstance(other, (int, float)):
257
+ return Func(
258
+ "rshift",
259
+ lambda a: numeric.bit_rshift(other, a),
260
+ [self],
261
+ result_type=int,
262
+ )
263
+ return Func(
264
+ "rshift",
265
+ lambda a1, a2: numeric.bit_rshift(a1, a2),
266
+ [other, self],
267
+ result_type=int,
268
+ )
269
+
270
+ def __lshift__(self, other: Union[ColT, float]) -> "Func":
271
+ if isinstance(other, (int, float)):
272
+ return Func(
273
+ "lshift",
274
+ lambda a: numeric.bit_lshift(a, other),
275
+ [self],
276
+ result_type=int,
277
+ )
278
+ return Func(
279
+ "lshift",
280
+ lambda a1, a2: numeric.bit_lshift(a1, a2),
281
+ [self, other],
282
+ result_type=int,
283
+ )
284
+
285
+ def __rlshift__(self, other: Union[ColT, float]) -> "Func":
286
+ if isinstance(other, (int, float)):
287
+ return Func(
288
+ "lshift",
289
+ lambda a: numeric.bit_lshift(other, a),
290
+ [self],
291
+ result_type=int,
292
+ )
293
+ return Func(
294
+ "lshift",
295
+ lambda a1, a2: numeric.bit_lshift(a1, a2),
296
+ [other, self],
297
+ result_type=int,
298
+ )
171
299
 
172
300
  def __lt__(self, other: Union[ColT, float]) -> "Func":
173
- return math_lt(self, other)
301
+ if isinstance(other, (int, float)):
302
+ return Func("lt", lambda a: a < other, [self], result_type=bool)
303
+ return Func("lt", lambda a1, a2: a1 < a2, [self, other], result_type=bool)
174
304
 
175
305
  def __le__(self, other: Union[ColT, float]) -> "Func":
176
- return math_le(self, other)
306
+ if isinstance(other, (int, float)):
307
+ return Func("le", lambda a: a <= other, [self], result_type=bool)
308
+ return Func("le", lambda a1, a2: a1 <= a2, [self, other], result_type=bool)
177
309
 
178
310
  def __eq__(self, other):
179
- return math_eq(self, other)
311
+ if isinstance(other, (int, float)):
312
+ return Func("eq", lambda a: a == other, [self], result_type=bool)
313
+ return Func("eq", lambda a1, a2: a1 == a2, [self, other], result_type=bool)
180
314
 
181
315
  def __ne__(self, other):
182
- return math_ne(self, other)
316
+ if isinstance(other, (int, float)):
317
+ return Func("ne", lambda a: a != other, [self], result_type=bool)
318
+ return Func("ne", lambda a1, a2: a1 != a2, [self, other], result_type=bool)
183
319
 
184
320
  def __gt__(self, other: Union[ColT, float]) -> "Func":
185
- return math_gt(self, other)
321
+ if isinstance(other, (int, float)):
322
+ return Func("gt", lambda a: a > other, [self], result_type=bool)
323
+ return Func("gt", lambda a1, a2: a1 > a2, [self, other], result_type=bool)
186
324
 
187
325
  def __ge__(self, other: Union[ColT, float]) -> "Func":
188
- return math_ge(self, other)
326
+ if isinstance(other, (int, float)):
327
+ return Func("ge", lambda a: a >= other, [self], result_type=bool)
328
+ return Func("ge", lambda a1, a2: a1 >= a2, [self, other], result_type=bool)
189
329
 
190
330
  def label(self, label: str) -> "Func":
191
331
  return Func(
@@ -283,107 +423,12 @@ def get_db_col_type(signals_schema: "SignalSchema", col: ColT) -> "DataType":
283
423
  )
284
424
 
285
425
 
286
- def math_func(
287
- name: str,
288
- inner: Callable,
289
- params: Sequence[Union[ColT, float]],
290
- result_type: Optional["DataType"] = None,
291
- ) -> Func:
292
- """Returns math function from the columns."""
293
- cols, args = [], []
294
- for arg in params:
295
- if isinstance(arg, (int, float)):
296
- args.append(arg)
297
- else:
298
- cols.append(arg)
299
- return Func(name, inner, cols=cols, args=args, result_type=result_type)
300
-
301
-
302
- def math_add(*args: Union[ColT, float]) -> Func:
303
- """Computes the sum of the column."""
304
- return math_func("add", lambda a1, a2: a1 + a2, args)
305
-
306
-
307
- def math_sub(*args: Union[ColT, float]) -> Func:
308
- """Computes the diff of the column."""
309
- return math_func("sub", lambda a1, a2: a1 - a2, args)
310
-
311
-
312
- def math_mul(*args: Union[ColT, float]) -> Func:
313
- """Computes the product of the column."""
314
- return math_func("mul", lambda a1, a2: a1 * a2, args)
315
-
316
-
317
- def math_truediv(*args: Union[ColT, float]) -> Func:
318
- """Computes the division of the column."""
319
- return math_func("div", lambda a1, a2: a1 / a2, args, result_type=float)
320
-
321
-
322
- def math_floordiv(*args: Union[ColT, float]) -> Func:
323
- """Computes the floor division of the column."""
324
- return math_func("floordiv", lambda a1, a2: a1 // a2, args, result_type=float)
325
-
326
-
327
- def math_mod(*args: Union[ColT, float]) -> Func:
328
- """Computes the modulo of the column."""
329
- return math_func("mod", lambda a1, a2: a1 % a2, args, result_type=float)
330
-
331
-
332
- def math_pow(*args: Union[ColT, float]) -> Func:
333
- """Computes the power of the column."""
334
- return math_func("pow", lambda a1, a2: a1**a2, args, result_type=float)
335
-
336
-
337
- def math_lshift(*args: Union[ColT, float]) -> Func:
338
- """Computes the left shift of the column."""
339
- return math_func("lshift", lambda a1, a2: a1 << a2, args, result_type=int)
340
-
341
-
342
- def math_rshift(*args: Union[ColT, float]) -> Func:
343
- """Computes the right shift of the column."""
344
- return math_func("rshift", lambda a1, a2: a1 >> a2, args, result_type=int)
345
-
346
-
347
- def math_and(*args: Union[ColT, float]) -> Func:
348
- """Computes the logical AND of the column."""
349
- return math_func("and", lambda a1, a2: a1 & a2, args, result_type=bool)
350
-
351
-
352
- def math_or(*args: Union[ColT, float]) -> Func:
353
- """Computes the logical OR of the column."""
354
- return math_func("or", lambda a1, a2: a1 | a2, args, result_type=bool)
355
-
356
-
357
- def math_xor(*args: Union[ColT, float]) -> Func:
358
- """Computes the logical XOR of the column."""
359
- return math_func("xor", lambda a1, a2: a1 ^ a2, args, result_type=bool)
360
-
361
-
362
- def math_lt(*args: Union[ColT, float]) -> Func:
363
- """Computes the less than comparison of the column."""
364
- return math_func("lt", lambda a1, a2: a1 < a2, args, result_type=bool)
365
-
366
-
367
- def math_le(*args: Union[ColT, float]) -> Func:
368
- """Computes the less than or equal comparison of the column."""
369
- return math_func("le", lambda a1, a2: a1 <= a2, args, result_type=bool)
370
-
371
-
372
- def math_eq(*args: Union[ColT, float]) -> Func:
373
- """Computes the equality comparison of the column."""
374
- return math_func("eq", lambda a1, a2: a1 == a2, args, result_type=bool)
375
-
376
-
377
- def math_ne(*args: Union[ColT, float]) -> Func:
378
- """Computes the inequality comparison of the column."""
379
- return math_func("ne", lambda a1, a2: a1 != a2, args, result_type=bool)
380
-
381
-
382
- def math_gt(*args: Union[ColT, float]) -> Func:
383
- """Computes the greater than comparison of the column."""
384
- return math_func("gt", lambda a1, a2: a1 > a2, args, result_type=bool)
426
+ def _truediv(a, b):
427
+ # Using sqlalchemy.sql.func.divide here instead of / operator
428
+ # because of a bug in ClickHouse SQLAlchemy dialect
429
+ # See https://github.com/xzkostyan/clickhouse-sqlalchemy/issues/335
430
+ return sa_func.divide(a, b)
385
431
 
386
432
 
387
- def math_ge(*args: Union[ColT, float]) -> Func:
388
- """Computes the greater than or equal comparison of the column."""
389
- return math_func("ge", lambda a1, a2: a1 >= a2, args, result_type=bool)
433
+ def _floordiv(a, b):
434
+ return cast(_truediv(a, b), Integer)
@@ -0,0 +1,162 @@
1
+ from typing import Union
2
+
3
+ from datachain.sql.functions import numeric
4
+
5
+ from .func import ColT, Func
6
+
7
+
8
+ def bit_and(*args: Union[ColT, int]) -> Func:
9
+ """
10
+ Computes the bitwise AND operation between two values.
11
+
12
+ Args:
13
+ args (str | int): Two values to compute the bitwise AND operation between.
14
+ If a string is provided, it is assumed to be the name of the column vector.
15
+ If an integer is provided, it is assumed to be a constant value.
16
+
17
+ Returns:
18
+ Func: A Func object that represents the bitwise AND function.
19
+
20
+ Example:
21
+ ```py
22
+ dc.mutate(
23
+ xor1=func.bit_and("signal.values", 0x0F),
24
+ )
25
+ ```
26
+
27
+ Notes:
28
+ - Result column will always be of type int.
29
+ """
30
+ cols, func_args = [], []
31
+ for arg in args:
32
+ if isinstance(arg, int):
33
+ func_args.append(arg)
34
+ else:
35
+ cols.append(arg)
36
+
37
+ if len(cols) + len(func_args) != 2:
38
+ raise ValueError("bit_and() requires exactly two arguments")
39
+
40
+ return Func(
41
+ "bit_and",
42
+ inner=numeric.bit_and,
43
+ cols=cols,
44
+ args=func_args,
45
+ result_type=int,
46
+ )
47
+
48
+
49
+ def bit_or(*args: Union[ColT, int]) -> Func:
50
+ """
51
+ Computes the bitwise AND operation between two values.
52
+
53
+ Args:
54
+ args (str | int): Two values to compute the bitwise OR operation between.
55
+ If a string is provided, it is assumed to be the name of the column vector.
56
+ If an integer is provided, it is assumed to be a constant value.
57
+
58
+ Returns:
59
+ Func: A Func object that represents the bitwise OR function.
60
+
61
+ Example:
62
+ ```py
63
+ dc.mutate(
64
+ xor1=func.bit_or("signal.values", 0x0F),
65
+ )
66
+ ```
67
+
68
+ Notes:
69
+ - Result column will always be of type int.
70
+ """
71
+ cols, func_args = [], []
72
+ for arg in args:
73
+ if isinstance(arg, int):
74
+ func_args.append(arg)
75
+ else:
76
+ cols.append(arg)
77
+
78
+ if len(cols) + len(func_args) != 2:
79
+ raise ValueError("bit_or() requires exactly two arguments")
80
+
81
+ return Func(
82
+ "bit_or",
83
+ inner=numeric.bit_or,
84
+ cols=cols,
85
+ args=func_args,
86
+ result_type=int,
87
+ )
88
+
89
+
90
+ def bit_xor(*args: Union[ColT, int]) -> Func:
91
+ """
92
+ Computes the bitwise XOR operation between two values.
93
+
94
+ Args:
95
+ args (str | int): Two values to compute the bitwise XOR operation between.
96
+ If a string is provided, it is assumed to be the name of the column vector.
97
+ If an integer is provided, it is assumed to be a constant value.
98
+
99
+ Returns:
100
+ Func: A Func object that represents the bitwise XOR function.
101
+
102
+ Example:
103
+ ```py
104
+ dc.mutate(
105
+ xor1=func.bit_xor("signal.values", 0x0F),
106
+ )
107
+ ```
108
+
109
+ Notes:
110
+ - Result column will always be of type int.
111
+ """
112
+ cols, func_args = [], []
113
+ for arg in args:
114
+ if isinstance(arg, int):
115
+ func_args.append(arg)
116
+ else:
117
+ cols.append(arg)
118
+
119
+ if len(cols) + len(func_args) != 2:
120
+ raise ValueError("bit_xor() requires exactly two arguments")
121
+
122
+ return Func(
123
+ "bit_xor",
124
+ inner=numeric.bit_xor,
125
+ cols=cols,
126
+ args=func_args,
127
+ result_type=int,
128
+ )
129
+
130
+
131
+ def int_hash_64(col: Union[ColT, int]) -> Func:
132
+ """
133
+ Returns the 64-bit hash of an integer.
134
+
135
+ Args:
136
+ col (str | int): String to compute the hash of.
137
+ If a string is provided, it is assumed to be the name of the column.
138
+ If a int is provided, it is assumed to be an int literal.
139
+ If a Func is provided, it is assumed to be a function returning an int.
140
+
141
+ Returns:
142
+ Func: A Func object that represents the 64-bit hash function.
143
+
144
+ Example:
145
+ ```py
146
+ dc.mutate(
147
+ val_hash=func.int_hash_64("val"),
148
+ )
149
+ ```
150
+
151
+ Note:
152
+ - Result column will always be of type int.
153
+ """
154
+ cols, args = [], []
155
+ if isinstance(col, int):
156
+ args.append(col)
157
+ else:
158
+ cols.append(col)
159
+
160
+ return Func(
161
+ "int_hash_64", inner=numeric.int_hash_64, cols=cols, args=args, result_type=int
162
+ )
@@ -38,6 +38,10 @@ class length(GenericFunction): # noqa: N801
38
38
 
39
39
 
40
40
  class sip_hash_64(GenericFunction): # noqa: N801
41
+ """
42
+ Computes the SipHash-64 hash of the array.
43
+ """
44
+
41
45
  type = Int64()
42
46
  package = "hash"
43
47
  name = "sip_hash_64"
@@ -0,0 +1,43 @@
1
+ from sqlalchemy.sql.functions import GenericFunction, ReturnTypeFromArgs
2
+
3
+ from datachain.sql.types import Int64
4
+ from datachain.sql.utils import compiler_not_implemented
5
+
6
+
7
+ class bit_and(ReturnTypeFromArgs): # noqa: N801
8
+ inherit_cache = True
9
+
10
+
11
+ class bit_or(ReturnTypeFromArgs): # noqa: N801
12
+ inherit_cache = True
13
+
14
+
15
+ class bit_xor(ReturnTypeFromArgs): # noqa: N801
16
+ inherit_cache = True
17
+
18
+
19
+ class bit_rshift(ReturnTypeFromArgs): # noqa: N801
20
+ inherit_cache = True
21
+
22
+
23
+ class bit_lshift(ReturnTypeFromArgs): # noqa: N801
24
+ inherit_cache = True
25
+
26
+
27
+ class int_hash_64(GenericFunction): # noqa: N801
28
+ """
29
+ Computes the 64-bit hash of an integer.
30
+ """
31
+
32
+ type = Int64()
33
+ package = "hash"
34
+ name = "int_hash_64"
35
+ inherit_cache = True
36
+
37
+
38
+ compiler_not_implemented(bit_and)
39
+ compiler_not_implemented(bit_or)
40
+ compiler_not_implemented(bit_xor)
41
+ compiler_not_implemented(bit_rshift)
42
+ compiler_not_implemented(bit_lshift)
43
+ compiler_not_implemented(int_hash_64)