datachain 0.7.6__tar.gz → 0.7.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.7.6 → datachain-0.7.8}/.pre-commit-config.yaml +1 -1
- {datachain-0.7.6/src/datachain.egg-info → datachain-0.7.8}/PKG-INFO +1 -1
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/func/__init__.py +7 -1
- datachain-0.7.8/src/datachain/func/func.py +434 -0
- datachain-0.7.8/src/datachain/func/numeric.py +162 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/dc.py +4 -4
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/query/dataset.py +0 -2
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/sql/functions/array.py +4 -0
- datachain-0.7.8/src/datachain/sql/functions/numeric.py +43 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/sql/sqlite/base.py +68 -1
- {datachain-0.7.6 → datachain-0.7.8/src/datachain.egg-info}/PKG-INFO +1 -1
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain.egg-info/SOURCES.txt +2 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/lib/test_datachain.py +71 -8
- datachain-0.7.8/tests/unit/test_func.py +586 -0
- datachain-0.7.6/src/datachain/func/func.py +0 -384
- datachain-0.7.6/tests/unit/test_func.py +0 -256
- {datachain-0.7.6 → datachain-0.7.8}/.cruft.json +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/.gitattributes +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/.github/codecov.yaml +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/.github/dependabot.yml +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/.github/workflows/release.yml +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/.github/workflows/tests.yml +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/.gitignore +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/CONTRIBUTING.rst +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/LICENSE +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/README.rst +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/docs/assets/datachain.svg +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/docs/index.md +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/docs/overrides/main.html +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/docs/references/datachain.md +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/docs/references/datatype.md +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/docs/references/file.md +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/docs/references/index.md +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/docs/references/sql.md +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/docs/references/torch.md +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/docs/references/udf.md +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/examples/llm_and_nlp/unstructured-embeddings-gen.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/examples/llm_and_nlp/unstructured-summary-map.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/examples/multimodal/wds.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/mkdocs.yml +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/noxfile.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/pyproject.toml +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/setup.cfg +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/__init__.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/__main__.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/asyn.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/cache.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/cli.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/cli_utils.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/client/__init__.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/client/azure.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/client/gcs.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/client/hf.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/client/local.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/client/s3.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/config.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/dataset.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/error.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/func/array.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/func/base.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/func/conditional.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/func/path.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/func/random.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/func/string.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/func/window.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/job.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/clip.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/file.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/hf.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/image.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/listing.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/settings.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/tar.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/text.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/udf.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/utils.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/vfile.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/listing.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/model/__init__.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/model/bbox.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/model/pose.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/model/segment.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/node.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/progress.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/py.typed +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/query/__init__.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/query/batch.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/query/metrics.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/query/params.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/query/queue.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/query/schema.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/query/session.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/remote/studio.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/sql/types.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/sql/utils.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/studio.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/telemetry.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain/utils.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/__init__.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/conftest.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/data.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/examples/__init__.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/examples/test_examples.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/examples/wds_data.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/func/__init__.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/func/test_catalog.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/func/test_client.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/func/test_datachain.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/func/test_datasets.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/func/test_listing.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/func/test_ls.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/func/test_metrics.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/func/test_pull.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/func/test_pytorch.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/func/test_query.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/func/test_toolkit.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/scripts/feature_class.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/test_atomicity.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/test_cli_e2e.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/test_cli_studio.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/test_query_e2e.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/test_telemetry.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/__init__.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/lib/test_models.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/test_asyn.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/test_cache.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/test_catalog.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/test_client.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/test_config.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/test_dataset.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/test_listing.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/test_metastore.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/test_query.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/test_query_params.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/test_serializer.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/test_session.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/test_utils.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.7.6 → datachain-0.7.8}/tests/utils.py +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from sqlalchemy import literal
|
|
1
|
+
from sqlalchemy import case, literal
|
|
2
2
|
|
|
3
3
|
from . import array, path, random, string
|
|
4
4
|
from .aggregate import (
|
|
@@ -17,6 +17,7 @@ from .aggregate import (
|
|
|
17
17
|
)
|
|
18
18
|
from .array import cosine_distance, euclidean_distance, length, sip_hash_64
|
|
19
19
|
from .conditional import greatest, least
|
|
20
|
+
from .numeric import bit_and, bit_or, bit_xor, int_hash_64
|
|
20
21
|
from .random import rand
|
|
21
22
|
from .window import window
|
|
22
23
|
|
|
@@ -24,6 +25,10 @@ __all__ = [
|
|
|
24
25
|
"any_value",
|
|
25
26
|
"array",
|
|
26
27
|
"avg",
|
|
28
|
+
"bit_and",
|
|
29
|
+
"bit_or",
|
|
30
|
+
"bit_xor",
|
|
31
|
+
"case",
|
|
27
32
|
"collect",
|
|
28
33
|
"concat",
|
|
29
34
|
"cosine_distance",
|
|
@@ -32,6 +37,7 @@ __all__ = [
|
|
|
32
37
|
"euclidean_distance",
|
|
33
38
|
"first",
|
|
34
39
|
"greatest",
|
|
40
|
+
"int_hash_64",
|
|
35
41
|
"least",
|
|
36
42
|
"length",
|
|
37
43
|
"literal",
|
|
@@ -0,0 +1,434 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
from collections.abc import Sequence
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Callable, Optional, Union
|
|
4
|
+
|
|
5
|
+
from sqlalchemy import BindParameter, Case, ColumnElement, Integer, cast, desc
|
|
6
|
+
from sqlalchemy.ext.hybrid import Comparator
|
|
7
|
+
from sqlalchemy.sql import func as sa_func
|
|
8
|
+
|
|
9
|
+
from datachain.lib.convert.python_to_sql import python_to_sql
|
|
10
|
+
from datachain.lib.convert.sql_to_python import sql_to_python
|
|
11
|
+
from datachain.lib.utils import DataChainColumnError, DataChainParamsError
|
|
12
|
+
from datachain.query.schema import Column, ColumnMeta
|
|
13
|
+
from datachain.sql.functions import numeric
|
|
14
|
+
|
|
15
|
+
from .base import Function
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from sqlalchemy import TableClause
|
|
19
|
+
|
|
20
|
+
from datachain import DataType
|
|
21
|
+
from datachain.lib.signal_schema import SignalSchema
|
|
22
|
+
|
|
23
|
+
from .window import Window
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
ColT = Union[str, ColumnElement, "Func"]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Func(Function):
|
|
30
|
+
"""Represents a function to be applied to a column in a SQL query."""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
name: str,
|
|
35
|
+
inner: Callable,
|
|
36
|
+
cols: Optional[Sequence[ColT]] = None,
|
|
37
|
+
args: Optional[Sequence[Any]] = None,
|
|
38
|
+
result_type: Optional["DataType"] = None,
|
|
39
|
+
is_array: bool = False,
|
|
40
|
+
is_window: bool = False,
|
|
41
|
+
window: Optional["Window"] = None,
|
|
42
|
+
label: Optional[str] = None,
|
|
43
|
+
) -> None:
|
|
44
|
+
self.name = name
|
|
45
|
+
self.inner = inner
|
|
46
|
+
self.cols = cols or []
|
|
47
|
+
self.args = args or []
|
|
48
|
+
self.result_type = result_type
|
|
49
|
+
self.is_array = is_array
|
|
50
|
+
self.is_window = is_window
|
|
51
|
+
self.window = window
|
|
52
|
+
self.col_label = label
|
|
53
|
+
|
|
54
|
+
def __str__(self) -> str:
|
|
55
|
+
return self.name + "()"
|
|
56
|
+
|
|
57
|
+
def over(self, window: "Window") -> "Func":
|
|
58
|
+
if not self.is_window:
|
|
59
|
+
raise DataChainParamsError(f"{self} doesn't support window (over())")
|
|
60
|
+
|
|
61
|
+
return Func(
|
|
62
|
+
"over",
|
|
63
|
+
self.inner,
|
|
64
|
+
self.cols,
|
|
65
|
+
self.args,
|
|
66
|
+
self.result_type,
|
|
67
|
+
self.is_array,
|
|
68
|
+
self.is_window,
|
|
69
|
+
window,
|
|
70
|
+
self.col_label,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def _db_cols(self) -> Sequence[ColT]:
|
|
75
|
+
return (
|
|
76
|
+
[
|
|
77
|
+
col
|
|
78
|
+
if isinstance(col, (Func, BindParameter, Case, Comparator))
|
|
79
|
+
else ColumnMeta.to_db_name(
|
|
80
|
+
col.name if isinstance(col, ColumnElement) else col
|
|
81
|
+
)
|
|
82
|
+
for col in self.cols
|
|
83
|
+
]
|
|
84
|
+
if self.cols
|
|
85
|
+
else []
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
def _db_col_type(self, signals_schema: "SignalSchema") -> Optional["DataType"]:
|
|
89
|
+
if not self._db_cols:
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
col_type: type = get_db_col_type(signals_schema, self._db_cols[0])
|
|
93
|
+
for col in self._db_cols[1:]:
|
|
94
|
+
if get_db_col_type(signals_schema, col) != col_type:
|
|
95
|
+
raise DataChainColumnError(
|
|
96
|
+
str(self),
|
|
97
|
+
"Columns must have the same type to infer result type",
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
return list[col_type] if self.is_array else col_type # type: ignore[valid-type]
|
|
101
|
+
|
|
102
|
+
def __add__(self, other: Union[ColT, float]) -> "Func":
|
|
103
|
+
if isinstance(other, (int, float)):
|
|
104
|
+
return Func("add", lambda a: a + other, [self])
|
|
105
|
+
return Func("add", lambda a1, a2: a1 + a2, [self, other])
|
|
106
|
+
|
|
107
|
+
def __radd__(self, other: Union[ColT, float]) -> "Func":
|
|
108
|
+
if isinstance(other, (int, float)):
|
|
109
|
+
return Func("add", lambda a: other + a, [self])
|
|
110
|
+
return Func("add", lambda a1, a2: a1 + a2, [other, self])
|
|
111
|
+
|
|
112
|
+
def __sub__(self, other: Union[ColT, float]) -> "Func":
|
|
113
|
+
if isinstance(other, (int, float)):
|
|
114
|
+
return Func("sub", lambda a: a - other, [self])
|
|
115
|
+
return Func("sub", lambda a1, a2: a1 - a2, [self, other])
|
|
116
|
+
|
|
117
|
+
def __rsub__(self, other: Union[ColT, float]) -> "Func":
|
|
118
|
+
if isinstance(other, (int, float)):
|
|
119
|
+
return Func("sub", lambda a: other - a, [self])
|
|
120
|
+
return Func("sub", lambda a1, a2: a1 - a2, [other, self])
|
|
121
|
+
|
|
122
|
+
def __mul__(self, other: Union[ColT, float]) -> "Func":
|
|
123
|
+
if isinstance(other, (int, float)):
|
|
124
|
+
return Func("mul", lambda a: a * other, [self])
|
|
125
|
+
return Func("mul", lambda a1, a2: a1 * a2, [self, other])
|
|
126
|
+
|
|
127
|
+
def __rmul__(self, other: Union[ColT, float]) -> "Func":
|
|
128
|
+
if isinstance(other, (int, float)):
|
|
129
|
+
return Func("mul", lambda a: other * a, [self])
|
|
130
|
+
return Func("mul", lambda a1, a2: a1 * a2, [other, self])
|
|
131
|
+
|
|
132
|
+
def __truediv__(self, other: Union[ColT, float]) -> "Func":
|
|
133
|
+
if isinstance(other, (int, float)):
|
|
134
|
+
return Func("div", lambda a: _truediv(a, other), [self], result_type=float)
|
|
135
|
+
return Func(
|
|
136
|
+
"div", lambda a1, a2: _truediv(a1, a2), [self, other], result_type=float
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
def __rtruediv__(self, other: Union[ColT, float]) -> "Func":
|
|
140
|
+
if isinstance(other, (int, float)):
|
|
141
|
+
return Func("div", lambda a: _truediv(other, a), [self], result_type=float)
|
|
142
|
+
return Func(
|
|
143
|
+
"div", lambda a1, a2: _truediv(a1, a2), [other, self], result_type=float
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
def __floordiv__(self, other: Union[ColT, float]) -> "Func":
|
|
147
|
+
if isinstance(other, (int, float)):
|
|
148
|
+
return Func(
|
|
149
|
+
"floordiv", lambda a: _floordiv(a, other), [self], result_type=int
|
|
150
|
+
)
|
|
151
|
+
return Func(
|
|
152
|
+
"floordiv", lambda a1, a2: _floordiv(a1, a2), [self, other], result_type=int
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
def __rfloordiv__(self, other: Union[ColT, float]) -> "Func":
|
|
156
|
+
if isinstance(other, (int, float)):
|
|
157
|
+
return Func(
|
|
158
|
+
"floordiv", lambda a: _floordiv(other, a), [self], result_type=int
|
|
159
|
+
)
|
|
160
|
+
return Func(
|
|
161
|
+
"floordiv", lambda a1, a2: _floordiv(a1, a2), [other, self], result_type=int
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
def __mod__(self, other: Union[ColT, float]) -> "Func":
|
|
165
|
+
if isinstance(other, (int, float)):
|
|
166
|
+
return Func("mod", lambda a: a % other, [self], result_type=int)
|
|
167
|
+
return Func("mod", lambda a1, a2: a1 % a2, [self, other], result_type=int)
|
|
168
|
+
|
|
169
|
+
def __rmod__(self, other: Union[ColT, float]) -> "Func":
|
|
170
|
+
if isinstance(other, (int, float)):
|
|
171
|
+
return Func("mod", lambda a: other % a, [self], result_type=int)
|
|
172
|
+
return Func("mod", lambda a1, a2: a1 % a2, [other, self], result_type=int)
|
|
173
|
+
|
|
174
|
+
def __and__(self, other: Union[ColT, float]) -> "Func":
|
|
175
|
+
if isinstance(other, (int, float)):
|
|
176
|
+
return Func(
|
|
177
|
+
"and", lambda a: numeric.bit_and(a, other), [self], result_type=int
|
|
178
|
+
)
|
|
179
|
+
return Func(
|
|
180
|
+
"and",
|
|
181
|
+
lambda a1, a2: numeric.bit_and(a1, a2),
|
|
182
|
+
[self, other],
|
|
183
|
+
result_type=int,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
def __rand__(self, other: Union[ColT, float]) -> "Func":
|
|
187
|
+
if isinstance(other, (int, float)):
|
|
188
|
+
return Func(
|
|
189
|
+
"and", lambda a: numeric.bit_and(other, a), [self], result_type=int
|
|
190
|
+
)
|
|
191
|
+
return Func(
|
|
192
|
+
"and",
|
|
193
|
+
lambda a1, a2: numeric.bit_and(a1, a2),
|
|
194
|
+
[other, self],
|
|
195
|
+
result_type=int,
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
def __or__(self, other: Union[ColT, float]) -> "Func":
|
|
199
|
+
if isinstance(other, (int, float)):
|
|
200
|
+
return Func(
|
|
201
|
+
"or", lambda a: numeric.bit_or(a, other), [self], result_type=int
|
|
202
|
+
)
|
|
203
|
+
return Func(
|
|
204
|
+
"or", lambda a1, a2: numeric.bit_or(a1, a2), [self, other], result_type=int
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
def __ror__(self, other: Union[ColT, float]) -> "Func":
|
|
208
|
+
if isinstance(other, (int, float)):
|
|
209
|
+
return Func(
|
|
210
|
+
"or", lambda a: numeric.bit_or(other, a), [self], result_type=int
|
|
211
|
+
)
|
|
212
|
+
return Func(
|
|
213
|
+
"or", lambda a1, a2: numeric.bit_or(a1, a2), [other, self], result_type=int
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
def __xor__(self, other: Union[ColT, float]) -> "Func":
|
|
217
|
+
if isinstance(other, (int, float)):
|
|
218
|
+
return Func(
|
|
219
|
+
"xor", lambda a: numeric.bit_xor(a, other), [self], result_type=int
|
|
220
|
+
)
|
|
221
|
+
return Func(
|
|
222
|
+
"xor",
|
|
223
|
+
lambda a1, a2: numeric.bit_xor(a1, a2),
|
|
224
|
+
[self, other],
|
|
225
|
+
result_type=int,
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
def __rxor__(self, other: Union[ColT, float]) -> "Func":
|
|
229
|
+
if isinstance(other, (int, float)):
|
|
230
|
+
return Func(
|
|
231
|
+
"xor", lambda a: numeric.bit_xor(other, a), [self], result_type=int
|
|
232
|
+
)
|
|
233
|
+
return Func(
|
|
234
|
+
"xor",
|
|
235
|
+
lambda a1, a2: numeric.bit_xor(a1, a2),
|
|
236
|
+
[other, self],
|
|
237
|
+
result_type=int,
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
def __rshift__(self, other: Union[ColT, float]) -> "Func":
|
|
241
|
+
if isinstance(other, (int, float)):
|
|
242
|
+
return Func(
|
|
243
|
+
"rshift",
|
|
244
|
+
lambda a: numeric.bit_rshift(a, other),
|
|
245
|
+
[self],
|
|
246
|
+
result_type=int,
|
|
247
|
+
)
|
|
248
|
+
return Func(
|
|
249
|
+
"rshift",
|
|
250
|
+
lambda a1, a2: numeric.bit_rshift(a1, a2),
|
|
251
|
+
[self, other],
|
|
252
|
+
result_type=int,
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
def __rrshift__(self, other: Union[ColT, float]) -> "Func":
|
|
256
|
+
if isinstance(other, (int, float)):
|
|
257
|
+
return Func(
|
|
258
|
+
"rshift",
|
|
259
|
+
lambda a: numeric.bit_rshift(other, a),
|
|
260
|
+
[self],
|
|
261
|
+
result_type=int,
|
|
262
|
+
)
|
|
263
|
+
return Func(
|
|
264
|
+
"rshift",
|
|
265
|
+
lambda a1, a2: numeric.bit_rshift(a1, a2),
|
|
266
|
+
[other, self],
|
|
267
|
+
result_type=int,
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
def __lshift__(self, other: Union[ColT, float]) -> "Func":
|
|
271
|
+
if isinstance(other, (int, float)):
|
|
272
|
+
return Func(
|
|
273
|
+
"lshift",
|
|
274
|
+
lambda a: numeric.bit_lshift(a, other),
|
|
275
|
+
[self],
|
|
276
|
+
result_type=int,
|
|
277
|
+
)
|
|
278
|
+
return Func(
|
|
279
|
+
"lshift",
|
|
280
|
+
lambda a1, a2: numeric.bit_lshift(a1, a2),
|
|
281
|
+
[self, other],
|
|
282
|
+
result_type=int,
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
def __rlshift__(self, other: Union[ColT, float]) -> "Func":
|
|
286
|
+
if isinstance(other, (int, float)):
|
|
287
|
+
return Func(
|
|
288
|
+
"lshift",
|
|
289
|
+
lambda a: numeric.bit_lshift(other, a),
|
|
290
|
+
[self],
|
|
291
|
+
result_type=int,
|
|
292
|
+
)
|
|
293
|
+
return Func(
|
|
294
|
+
"lshift",
|
|
295
|
+
lambda a1, a2: numeric.bit_lshift(a1, a2),
|
|
296
|
+
[other, self],
|
|
297
|
+
result_type=int,
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
def __lt__(self, other: Union[ColT, float]) -> "Func":
|
|
301
|
+
if isinstance(other, (int, float)):
|
|
302
|
+
return Func("lt", lambda a: a < other, [self], result_type=bool)
|
|
303
|
+
return Func("lt", lambda a1, a2: a1 < a2, [self, other], result_type=bool)
|
|
304
|
+
|
|
305
|
+
def __le__(self, other: Union[ColT, float]) -> "Func":
|
|
306
|
+
if isinstance(other, (int, float)):
|
|
307
|
+
return Func("le", lambda a: a <= other, [self], result_type=bool)
|
|
308
|
+
return Func("le", lambda a1, a2: a1 <= a2, [self, other], result_type=bool)
|
|
309
|
+
|
|
310
|
+
def __eq__(self, other):
|
|
311
|
+
if isinstance(other, (int, float)):
|
|
312
|
+
return Func("eq", lambda a: a == other, [self], result_type=bool)
|
|
313
|
+
return Func("eq", lambda a1, a2: a1 == a2, [self, other], result_type=bool)
|
|
314
|
+
|
|
315
|
+
def __ne__(self, other):
|
|
316
|
+
if isinstance(other, (int, float)):
|
|
317
|
+
return Func("ne", lambda a: a != other, [self], result_type=bool)
|
|
318
|
+
return Func("ne", lambda a1, a2: a1 != a2, [self, other], result_type=bool)
|
|
319
|
+
|
|
320
|
+
def __gt__(self, other: Union[ColT, float]) -> "Func":
|
|
321
|
+
if isinstance(other, (int, float)):
|
|
322
|
+
return Func("gt", lambda a: a > other, [self], result_type=bool)
|
|
323
|
+
return Func("gt", lambda a1, a2: a1 > a2, [self, other], result_type=bool)
|
|
324
|
+
|
|
325
|
+
def __ge__(self, other: Union[ColT, float]) -> "Func":
|
|
326
|
+
if isinstance(other, (int, float)):
|
|
327
|
+
return Func("ge", lambda a: a >= other, [self], result_type=bool)
|
|
328
|
+
return Func("ge", lambda a1, a2: a1 >= a2, [self, other], result_type=bool)
|
|
329
|
+
|
|
330
|
+
def label(self, label: str) -> "Func":
|
|
331
|
+
return Func(
|
|
332
|
+
self.name,
|
|
333
|
+
self.inner,
|
|
334
|
+
self.cols,
|
|
335
|
+
self.args,
|
|
336
|
+
self.result_type,
|
|
337
|
+
self.is_array,
|
|
338
|
+
self.is_window,
|
|
339
|
+
self.window,
|
|
340
|
+
label,
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
def get_col_name(self, label: Optional[str] = None) -> str:
|
|
344
|
+
if label:
|
|
345
|
+
return label
|
|
346
|
+
if self.col_label:
|
|
347
|
+
return self.col_label
|
|
348
|
+
if (db_cols := self._db_cols) and len(db_cols) == 1:
|
|
349
|
+
if isinstance(db_cols[0], str):
|
|
350
|
+
return db_cols[0]
|
|
351
|
+
if isinstance(db_cols[0], Column):
|
|
352
|
+
return db_cols[0].name
|
|
353
|
+
if isinstance(db_cols[0], Func):
|
|
354
|
+
return db_cols[0].get_col_name()
|
|
355
|
+
return self.name
|
|
356
|
+
|
|
357
|
+
def get_result_type(
|
|
358
|
+
self, signals_schema: Optional["SignalSchema"] = None
|
|
359
|
+
) -> "DataType":
|
|
360
|
+
if self.result_type:
|
|
361
|
+
return self.result_type
|
|
362
|
+
|
|
363
|
+
if signals_schema and (col_type := self._db_col_type(signals_schema)):
|
|
364
|
+
return col_type
|
|
365
|
+
|
|
366
|
+
raise DataChainColumnError(
|
|
367
|
+
str(self),
|
|
368
|
+
"Column name is required to infer result type",
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
def get_column(
|
|
372
|
+
self,
|
|
373
|
+
signals_schema: Optional["SignalSchema"] = None,
|
|
374
|
+
label: Optional[str] = None,
|
|
375
|
+
table: Optional["TableClause"] = None,
|
|
376
|
+
) -> Column:
|
|
377
|
+
col_type = self.get_result_type(signals_schema)
|
|
378
|
+
sql_type = python_to_sql(col_type)
|
|
379
|
+
|
|
380
|
+
def get_col(col: ColT) -> ColT:
|
|
381
|
+
if isinstance(col, Func):
|
|
382
|
+
return col.get_column(signals_schema, table=table)
|
|
383
|
+
if isinstance(col, str):
|
|
384
|
+
column = Column(col, sql_type)
|
|
385
|
+
column.table = table
|
|
386
|
+
return column
|
|
387
|
+
return col
|
|
388
|
+
|
|
389
|
+
cols = [get_col(col) for col in self._db_cols]
|
|
390
|
+
func_col = self.inner(*cols, *self.args)
|
|
391
|
+
|
|
392
|
+
if self.is_window:
|
|
393
|
+
if not self.window:
|
|
394
|
+
raise DataChainParamsError(
|
|
395
|
+
f"Window function {self} requires over() clause with a window spec",
|
|
396
|
+
)
|
|
397
|
+
func_col = func_col.over(
|
|
398
|
+
partition_by=self.window.partition_by,
|
|
399
|
+
order_by=(
|
|
400
|
+
desc(self.window.order_by)
|
|
401
|
+
if self.window.desc
|
|
402
|
+
else self.window.order_by
|
|
403
|
+
),
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
func_col.type = sql_type() if inspect.isclass(sql_type) else sql_type
|
|
407
|
+
|
|
408
|
+
if col_name := self.get_col_name(label):
|
|
409
|
+
func_col = func_col.label(col_name)
|
|
410
|
+
|
|
411
|
+
return func_col
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
def get_db_col_type(signals_schema: "SignalSchema", col: ColT) -> "DataType":
|
|
415
|
+
if isinstance(col, Func):
|
|
416
|
+
return col.get_result_type(signals_schema)
|
|
417
|
+
|
|
418
|
+
if isinstance(col, ColumnElement) and not hasattr(col, "name"):
|
|
419
|
+
return sql_to_python(col)
|
|
420
|
+
|
|
421
|
+
return signals_schema.get_column_type(
|
|
422
|
+
col.name if isinstance(col, ColumnElement) else col
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
def _truediv(a, b):
|
|
427
|
+
# Using sqlalchemy.sql.func.divide here instead of / operator
|
|
428
|
+
# because of a bug in ClickHouse SQLAlchemy dialect
|
|
429
|
+
# See https://github.com/xzkostyan/clickhouse-sqlalchemy/issues/335
|
|
430
|
+
return sa_func.divide(a, b)
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
def _floordiv(a, b):
|
|
434
|
+
return cast(_truediv(a, b), Integer)
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
from typing import Union
|
|
2
|
+
|
|
3
|
+
from datachain.sql.functions import numeric
|
|
4
|
+
|
|
5
|
+
from .func import ColT, Func
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def bit_and(*args: Union[ColT, int]) -> Func:
|
|
9
|
+
"""
|
|
10
|
+
Computes the bitwise AND operation between two values.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
args (str | int): Two values to compute the bitwise AND operation between.
|
|
14
|
+
If a string is provided, it is assumed to be the name of the column vector.
|
|
15
|
+
If an integer is provided, it is assumed to be a constant value.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
Func: A Func object that represents the bitwise AND function.
|
|
19
|
+
|
|
20
|
+
Example:
|
|
21
|
+
```py
|
|
22
|
+
dc.mutate(
|
|
23
|
+
xor1=func.bit_and("signal.values", 0x0F),
|
|
24
|
+
)
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Notes:
|
|
28
|
+
- Result column will always be of type int.
|
|
29
|
+
"""
|
|
30
|
+
cols, func_args = [], []
|
|
31
|
+
for arg in args:
|
|
32
|
+
if isinstance(arg, int):
|
|
33
|
+
func_args.append(arg)
|
|
34
|
+
else:
|
|
35
|
+
cols.append(arg)
|
|
36
|
+
|
|
37
|
+
if len(cols) + len(func_args) != 2:
|
|
38
|
+
raise ValueError("bit_and() requires exactly two arguments")
|
|
39
|
+
|
|
40
|
+
return Func(
|
|
41
|
+
"bit_and",
|
|
42
|
+
inner=numeric.bit_and,
|
|
43
|
+
cols=cols,
|
|
44
|
+
args=func_args,
|
|
45
|
+
result_type=int,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def bit_or(*args: Union[ColT, int]) -> Func:
|
|
50
|
+
"""
|
|
51
|
+
Computes the bitwise AND operation between two values.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
args (str | int): Two values to compute the bitwise OR operation between.
|
|
55
|
+
If a string is provided, it is assumed to be the name of the column vector.
|
|
56
|
+
If an integer is provided, it is assumed to be a constant value.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
Func: A Func object that represents the bitwise OR function.
|
|
60
|
+
|
|
61
|
+
Example:
|
|
62
|
+
```py
|
|
63
|
+
dc.mutate(
|
|
64
|
+
xor1=func.bit_or("signal.values", 0x0F),
|
|
65
|
+
)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Notes:
|
|
69
|
+
- Result column will always be of type int.
|
|
70
|
+
"""
|
|
71
|
+
cols, func_args = [], []
|
|
72
|
+
for arg in args:
|
|
73
|
+
if isinstance(arg, int):
|
|
74
|
+
func_args.append(arg)
|
|
75
|
+
else:
|
|
76
|
+
cols.append(arg)
|
|
77
|
+
|
|
78
|
+
if len(cols) + len(func_args) != 2:
|
|
79
|
+
raise ValueError("bit_or() requires exactly two arguments")
|
|
80
|
+
|
|
81
|
+
return Func(
|
|
82
|
+
"bit_or",
|
|
83
|
+
inner=numeric.bit_or,
|
|
84
|
+
cols=cols,
|
|
85
|
+
args=func_args,
|
|
86
|
+
result_type=int,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def bit_xor(*args: Union[ColT, int]) -> Func:
|
|
91
|
+
"""
|
|
92
|
+
Computes the bitwise XOR operation between two values.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
args (str | int): Two values to compute the bitwise XOR operation between.
|
|
96
|
+
If a string is provided, it is assumed to be the name of the column vector.
|
|
97
|
+
If an integer is provided, it is assumed to be a constant value.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
Func: A Func object that represents the bitwise XOR function.
|
|
101
|
+
|
|
102
|
+
Example:
|
|
103
|
+
```py
|
|
104
|
+
dc.mutate(
|
|
105
|
+
xor1=func.bit_xor("signal.values", 0x0F),
|
|
106
|
+
)
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Notes:
|
|
110
|
+
- Result column will always be of type int.
|
|
111
|
+
"""
|
|
112
|
+
cols, func_args = [], []
|
|
113
|
+
for arg in args:
|
|
114
|
+
if isinstance(arg, int):
|
|
115
|
+
func_args.append(arg)
|
|
116
|
+
else:
|
|
117
|
+
cols.append(arg)
|
|
118
|
+
|
|
119
|
+
if len(cols) + len(func_args) != 2:
|
|
120
|
+
raise ValueError("bit_xor() requires exactly two arguments")
|
|
121
|
+
|
|
122
|
+
return Func(
|
|
123
|
+
"bit_xor",
|
|
124
|
+
inner=numeric.bit_xor,
|
|
125
|
+
cols=cols,
|
|
126
|
+
args=func_args,
|
|
127
|
+
result_type=int,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def int_hash_64(col: Union[ColT, int]) -> Func:
|
|
132
|
+
"""
|
|
133
|
+
Returns the 64-bit hash of an integer.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
col (str | int): String to compute the hash of.
|
|
137
|
+
If a string is provided, it is assumed to be the name of the column.
|
|
138
|
+
If a int is provided, it is assumed to be an int literal.
|
|
139
|
+
If a Func is provided, it is assumed to be a function returning an int.
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
Func: A Func object that represents the 64-bit hash function.
|
|
143
|
+
|
|
144
|
+
Example:
|
|
145
|
+
```py
|
|
146
|
+
dc.mutate(
|
|
147
|
+
val_hash=func.int_hash_64("val"),
|
|
148
|
+
)
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
Note:
|
|
152
|
+
- Result column will always be of type int.
|
|
153
|
+
"""
|
|
154
|
+
cols, args = [], []
|
|
155
|
+
if isinstance(col, int):
|
|
156
|
+
args.append(col)
|
|
157
|
+
else:
|
|
158
|
+
cols.append(col)
|
|
159
|
+
|
|
160
|
+
return Func(
|
|
161
|
+
"int_hash_64", inner=numeric.int_hash_64, cols=cols, args=args, result_type=int
|
|
162
|
+
)
|
|
@@ -1150,7 +1150,7 @@ class DataChain:
|
|
|
1150
1150
|
def group_by(
|
|
1151
1151
|
self,
|
|
1152
1152
|
*,
|
|
1153
|
-
partition_by: Union[str, Func, Sequence[Union[str, Func]]],
|
|
1153
|
+
partition_by: Optional[Union[str, Func, Sequence[Union[str, Func]]]] = None,
|
|
1154
1154
|
**kwargs: Func,
|
|
1155
1155
|
) -> "Self":
|
|
1156
1156
|
"""Group rows by specified set of signals and return new signals
|
|
@@ -1167,10 +1167,10 @@ class DataChain:
|
|
|
1167
1167
|
)
|
|
1168
1168
|
```
|
|
1169
1169
|
"""
|
|
1170
|
-
if
|
|
1170
|
+
if partition_by is None:
|
|
1171
|
+
partition_by = []
|
|
1172
|
+
elif isinstance(partition_by, (str, Func)):
|
|
1171
1173
|
partition_by = [partition_by]
|
|
1172
|
-
if not partition_by:
|
|
1173
|
-
raise ValueError("At least one column should be provided for partition_by")
|
|
1174
1174
|
|
|
1175
1175
|
partition_by_columns: list[Column] = []
|
|
1176
1176
|
signal_columns: list[Column] = []
|
|
@@ -966,8 +966,6 @@ class SQLGroupBy(SQLClause):
|
|
|
966
966
|
def apply_sql_clause(self, query) -> Select:
|
|
967
967
|
if not self.cols:
|
|
968
968
|
raise ValueError("No columns to select")
|
|
969
|
-
if not self.group_by:
|
|
970
|
-
raise ValueError("No columns to group by")
|
|
971
969
|
|
|
972
970
|
subquery = query.subquery()
|
|
973
971
|
|