datachain 0.16.2__tar.gz → 0.16.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.16.2/src/datachain.egg-info → datachain-0.16.3}/PKG-INFO +2 -2
- {datachain-0.16.2 → datachain-0.16.3}/pyproject.toml +1 -1
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/func/array.py +56 -1
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/func/func.py +32 -1
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/dc/database.py +5 -3
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/dc/records.py +3 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/sql/functions/array.py +11 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/sql/sqlite/base.py +22 -0
- {datachain-0.16.2 → datachain-0.16.3/src/datachain.egg-info}/PKG-INFO +2 -2
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain.egg-info/SOURCES.txt +1 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain.egg-info/requires.txt +1 -1
- datachain-0.16.3/tests/func/test_func.py +124 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/lib/test_datachain.py +0 -4
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/sql/test_array.py +35 -0
- {datachain-0.16.2 → datachain-0.16.3}/.cruft.json +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/.gitattributes +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/.github/codecov.yaml +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/.github/dependabot.yml +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/.github/workflows/release.yml +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/.github/workflows/tests.yml +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/.gitignore +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/.pre-commit-config.yaml +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/LICENSE +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/README.rst +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/assets/datachain.svg +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/commands/auth/login.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/commands/auth/logout.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/commands/auth/team.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/commands/auth/token.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/commands/index.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/commands/job/cancel.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/commands/job/logs.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/commands/job/run.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/contributing.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/examples.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/index.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/overrides/main.html +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/quick-start.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/references/data-types/file.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/references/data-types/index.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/references/data-types/pose.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/references/data-types/segment.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/references/datachain.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/references/func.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/references/index.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/references/remotes.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/references/toolkit.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/references/torch.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/references/udf.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/docs/tutorials.md +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/examples/multimodal/wds.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/mkdocs.yml +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/noxfile.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/setup.cfg +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/__main__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/asyn.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/cache.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/cli/utils.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/client/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/client/azure.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/client/gcs.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/client/hf.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/client/local.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/client/s3.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/config.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/dataset.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/error.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/fs/reference.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/fs/utils.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/func/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/func/base.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/func/conditional.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/func/numeric.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/func/path.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/func/random.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/func/string.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/func/window.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/job.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/clip.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/dc/datachain.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/file.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/hf.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/image.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/listing.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/settings.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/tar.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/text.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/udf.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/utils.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/video.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/listing.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/model/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/model/bbox.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/model/pose.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/model/segment.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/model/utils.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/node.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/progress.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/py.typed +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/query/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/query/batch.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/query/dataset.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/query/metrics.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/query/params.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/query/queue.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/query/schema.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/query/session.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/query/udf.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/query/utils.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/remote/studio.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/script_meta.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/sql/types.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/sql/utils.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/studio.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/telemetry.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain/utils.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/conftest.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/data.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/examples/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/examples/test_examples.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/examples/wds_data.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/data/lena.jpg +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/model/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/test_catalog.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/test_client.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/test_data_storage.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/test_datachain.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/test_datasets.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/test_file.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/test_hf.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/test_image.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/test_listing.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/test_ls.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/test_metrics.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/test_pull.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/test_pytorch.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/test_query.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/test_read_database.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/test_session.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/test_toolkit.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/test_video.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/func/test_warehouse.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/scripts/feature_class.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/test_atomicity.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/test_cli_e2e.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/test_cli_studio.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/test_import_time.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/test_query_e2e.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/test_telemetry.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/model/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_asyn.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_cache.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_catalog.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_client.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_config.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_dataset.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_func.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_listing.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_metastore.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_query.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_query_params.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_serializer.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_session.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_utils.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.16.2 → datachain-0.16.3}/tests/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.16.
|
|
3
|
+
Version: 0.16.3
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -23,7 +23,7 @@ Requires-Dist: tqdm
|
|
|
23
23
|
Requires-Dist: numpy<3,>=1
|
|
24
24
|
Requires-Dist: pandas>=2.0.0
|
|
25
25
|
Requires-Dist: packaging
|
|
26
|
-
Requires-Dist: pyarrow
|
|
26
|
+
Requires-Dist: pyarrow<20
|
|
27
27
|
Requires-Dist: typing-extensions
|
|
28
28
|
Requires-Dist: python-dateutil>=2
|
|
29
29
|
Requires-Dist: attrs>=21.3.0
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from collections.abc import Sequence
|
|
2
|
-
from typing import Any, Union
|
|
2
|
+
from typing import Any, Optional, Union
|
|
3
3
|
|
|
4
4
|
from datachain.sql.functions import array
|
|
5
5
|
|
|
@@ -178,6 +178,61 @@ def contains(arr: Union[str, Sequence, Func], elem: Any) -> Func:
|
|
|
178
178
|
return Func("contains", inner=inner, cols=cols, args=args, result_type=int)
|
|
179
179
|
|
|
180
180
|
|
|
181
|
+
def get_element(arg: Union[str, Sequence, Func], index: int) -> Func:
|
|
182
|
+
"""
|
|
183
|
+
Returns the element at the given index from the array.
|
|
184
|
+
If the index is out of bounds, it returns None or columns default value.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
arg (str | Sequence | Func): Array to get the element from.
|
|
188
|
+
If a string is provided, it is assumed to be the name of the array column.
|
|
189
|
+
If a sequence is provided, it is assumed to be an array of values.
|
|
190
|
+
If a Func is provided, it is assumed to be a function returning an array.
|
|
191
|
+
index (int): Index of the element to get from the array.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
Func: A Func object that represents the array get_element function.
|
|
195
|
+
|
|
196
|
+
Example:
|
|
197
|
+
```py
|
|
198
|
+
dc.mutate(
|
|
199
|
+
first_el=func.array.get_element("signal.values", 0),
|
|
200
|
+
second_el=func.array.get_element([1, 2, 3, 4, 5], 1),
|
|
201
|
+
)
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
Note:
|
|
205
|
+
- Result column will always be the same type as the elements of the array.
|
|
206
|
+
"""
|
|
207
|
+
|
|
208
|
+
def type_from_args(arr, _):
|
|
209
|
+
if isinstance(arr, list):
|
|
210
|
+
try:
|
|
211
|
+
return type(arr[0])
|
|
212
|
+
except IndexError:
|
|
213
|
+
return str # if the array is empty, return str as default type
|
|
214
|
+
return None
|
|
215
|
+
|
|
216
|
+
cols: Optional[Union[str, Sequence, Func]]
|
|
217
|
+
args: Union[str, Sequence, Func, int]
|
|
218
|
+
|
|
219
|
+
if isinstance(arg, (str, Func)):
|
|
220
|
+
cols = [arg]
|
|
221
|
+
args = [index]
|
|
222
|
+
else:
|
|
223
|
+
cols = None
|
|
224
|
+
args = [arg, index]
|
|
225
|
+
|
|
226
|
+
return Func(
|
|
227
|
+
"get_element",
|
|
228
|
+
inner=array.get_element,
|
|
229
|
+
cols=cols,
|
|
230
|
+
args=args,
|
|
231
|
+
from_array=True,
|
|
232
|
+
type_from_args=type_from_args,
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
|
|
181
236
|
def sip_hash_64(arg: Union[str, Sequence]) -> Func:
|
|
182
237
|
"""
|
|
183
238
|
Computes the SipHash-64 hash of the array.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import inspect
|
|
2
2
|
from collections.abc import Sequence
|
|
3
|
-
from typing import TYPE_CHECKING, Any, Callable, Optional, Union
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Callable, Optional, Union, get_args, get_origin
|
|
4
4
|
|
|
5
5
|
from sqlalchemy import BindParameter, Case, ColumnElement, Integer, cast, desc
|
|
6
6
|
from sqlalchemy.sql import func as sa_func
|
|
@@ -36,7 +36,9 @@ class Func(Function):
|
|
|
36
36
|
args: Optional[Sequence[Any]] = None,
|
|
37
37
|
kwargs: Optional[dict[str, Any]] = None,
|
|
38
38
|
result_type: Optional["DataType"] = None,
|
|
39
|
+
type_from_args: Optional[Callable[..., "DataType"]] = None,
|
|
39
40
|
is_array: bool = False,
|
|
41
|
+
from_array: bool = False,
|
|
40
42
|
is_window: bool = False,
|
|
41
43
|
window: Optional["Window"] = None,
|
|
42
44
|
label: Optional[str] = None,
|
|
@@ -47,7 +49,9 @@ class Func(Function):
|
|
|
47
49
|
self.args = args or []
|
|
48
50
|
self.kwargs = kwargs or {}
|
|
49
51
|
self.result_type = result_type
|
|
52
|
+
self.type_from_args = type_from_args
|
|
50
53
|
self.is_array = is_array
|
|
54
|
+
self.from_array = from_array
|
|
51
55
|
self.is_window = is_window
|
|
52
56
|
self.window = window
|
|
53
57
|
self.col_label = label
|
|
@@ -66,7 +70,9 @@ class Func(Function):
|
|
|
66
70
|
self.args,
|
|
67
71
|
self.kwargs,
|
|
68
72
|
self.result_type,
|
|
73
|
+
self.type_from_args,
|
|
69
74
|
self.is_array,
|
|
75
|
+
self.from_array,
|
|
70
76
|
self.is_window,
|
|
71
77
|
window,
|
|
72
78
|
self.col_label,
|
|
@@ -101,6 +107,20 @@ class Func(Function):
|
|
|
101
107
|
"Columns must have the same type to infer result type",
|
|
102
108
|
)
|
|
103
109
|
|
|
110
|
+
if self.from_array:
|
|
111
|
+
if get_origin(col_type) is list:
|
|
112
|
+
col_args = get_args(col_type)
|
|
113
|
+
if len(col_args) != 1:
|
|
114
|
+
raise DataChainColumnError(
|
|
115
|
+
str(self),
|
|
116
|
+
"Array column must have a single type argument",
|
|
117
|
+
)
|
|
118
|
+
return col_args[0]
|
|
119
|
+
raise DataChainColumnError(
|
|
120
|
+
str(self),
|
|
121
|
+
"Array column must be of type list",
|
|
122
|
+
)
|
|
123
|
+
|
|
104
124
|
return list[col_type] if self.is_array else col_type # type: ignore[valid-type]
|
|
105
125
|
|
|
106
126
|
def __add__(self, other: Union[ColT, float]) -> "Func":
|
|
@@ -339,7 +359,9 @@ class Func(Function):
|
|
|
339
359
|
self.args,
|
|
340
360
|
self.kwargs,
|
|
341
361
|
self.result_type,
|
|
362
|
+
self.type_from_args,
|
|
342
363
|
self.is_array,
|
|
364
|
+
self.from_array,
|
|
343
365
|
self.is_window,
|
|
344
366
|
self.window,
|
|
345
367
|
label,
|
|
@@ -368,6 +390,15 @@ class Func(Function):
|
|
|
368
390
|
if signals_schema and (col_type := self._db_col_type(signals_schema)):
|
|
369
391
|
return col_type
|
|
370
392
|
|
|
393
|
+
if (
|
|
394
|
+
self.type_from_args
|
|
395
|
+
and (self.cols is None or self.cols == [])
|
|
396
|
+
and self.args is not None
|
|
397
|
+
and len(self.args) > 0
|
|
398
|
+
and (result_type := self.type_from_args(*self.args)) is not None
|
|
399
|
+
):
|
|
400
|
+
return result_type
|
|
401
|
+
|
|
371
402
|
raise DataChainColumnError(
|
|
372
403
|
str(self),
|
|
373
404
|
"Column name is required to infer result type",
|
|
@@ -127,9 +127,11 @@ def read_database(
|
|
|
127
127
|
```
|
|
128
128
|
|
|
129
129
|
Notes:
|
|
130
|
-
This function works with a variety of databases — including,
|
|
131
|
-
SQLite, DuckDB, PostgreSQL, and Snowflake,
|
|
132
|
-
installed.
|
|
130
|
+
- This function works with a variety of databases — including,
|
|
131
|
+
but not limited to, SQLite, DuckDB, PostgreSQL, and Snowflake,
|
|
132
|
+
provided the appropriate driver is installed.
|
|
133
|
+
- This call is blocking, and will execute the query and return once the
|
|
134
|
+
results are saved.
|
|
133
135
|
"""
|
|
134
136
|
from datachain.lib.dc.records import read_records
|
|
135
137
|
|
|
@@ -37,6 +37,9 @@ def read_records(
|
|
|
37
37
|
import datachain as dc
|
|
38
38
|
single_record = dc.read_records(dc.DEFAULT_FILE_RECORD)
|
|
39
39
|
```
|
|
40
|
+
|
|
41
|
+
Notes:
|
|
42
|
+
This call blocks until all records are inserted.
|
|
40
43
|
"""
|
|
41
44
|
from datachain.query.dataset import INSERT_BATCH_SIZE, adjust_outputs, get_col_types
|
|
42
45
|
from datachain.sql.types import SQLType
|
|
@@ -48,6 +48,16 @@ class contains(GenericFunction): # noqa: N801
|
|
|
48
48
|
inherit_cache = True
|
|
49
49
|
|
|
50
50
|
|
|
51
|
+
class get_element(GenericFunction): # noqa: N801
|
|
52
|
+
"""
|
|
53
|
+
Returns the element at the given index in the array.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
package = "array"
|
|
57
|
+
name = "get_element"
|
|
58
|
+
inherit_cache = True
|
|
59
|
+
|
|
60
|
+
|
|
51
61
|
class sip_hash_64(GenericFunction): # noqa: N801
|
|
52
62
|
"""
|
|
53
63
|
Computes the SipHash-64 hash of the array.
|
|
@@ -63,4 +73,5 @@ compiler_not_implemented(cosine_distance)
|
|
|
63
73
|
compiler_not_implemented(euclidean_distance)
|
|
64
74
|
compiler_not_implemented(length)
|
|
65
75
|
compiler_not_implemented(contains)
|
|
76
|
+
compiler_not_implemented(get_element)
|
|
66
77
|
compiler_not_implemented(sip_hash_64)
|
|
@@ -88,6 +88,7 @@ def setup():
|
|
|
88
88
|
compiles(sql_path.file_ext, "sqlite")(compile_path_file_ext)
|
|
89
89
|
compiles(array.length, "sqlite")(compile_array_length)
|
|
90
90
|
compiles(array.contains, "sqlite")(compile_array_contains)
|
|
91
|
+
compiles(array.get_element, "sqlite")(compile_array_get_element)
|
|
91
92
|
compiles(string.length, "sqlite")(compile_string_length)
|
|
92
93
|
compiles(string.split, "sqlite")(compile_string_split)
|
|
93
94
|
compiles(string.regexp_replace, "sqlite")(compile_string_regexp_replace)
|
|
@@ -270,6 +271,13 @@ def register_user_defined_sql_functions() -> None:
|
|
|
270
271
|
|
|
271
272
|
_registered_function_creators["string_functions"] = create_string_functions
|
|
272
273
|
|
|
274
|
+
def create_array_functions(conn):
|
|
275
|
+
conn.create_function(
|
|
276
|
+
"json_array_get_element", 2, py_json_array_get_element, deterministic=True
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
_registered_function_creators["array_functions"] = create_array_functions
|
|
280
|
+
|
|
273
281
|
has_json_extension = functions_exist(["json_array_length", "json_array_contains"])
|
|
274
282
|
if not has_json_extension:
|
|
275
283
|
|
|
@@ -438,6 +446,20 @@ def py_json_array_contains(arr, value, is_json):
|
|
|
438
446
|
return value in orjson.loads(arr)
|
|
439
447
|
|
|
440
448
|
|
|
449
|
+
def py_json_array_get_element(val, idx):
|
|
450
|
+
arr = orjson.loads(val)
|
|
451
|
+
try:
|
|
452
|
+
return arr[idx]
|
|
453
|
+
except IndexError:
|
|
454
|
+
return None
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
def compile_array_get_element(element, compiler, **kwargs):
|
|
458
|
+
return compiler.process(
|
|
459
|
+
func.json_array_get_element(*element.clauses.clauses), **kwargs
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
|
|
441
463
|
def compile_array_length(element, compiler, **kwargs):
|
|
442
464
|
return compiler.process(func.json_array_length(*element.clauses.clauses), **kwargs)
|
|
443
465
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.16.
|
|
3
|
+
Version: 0.16.3
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -23,7 +23,7 @@ Requires-Dist: tqdm
|
|
|
23
23
|
Requires-Dist: numpy<3,>=1
|
|
24
24
|
Requires-Dist: pandas>=2.0.0
|
|
25
25
|
Requires-Dist: packaging
|
|
26
|
-
Requires-Dist: pyarrow
|
|
26
|
+
Requires-Dist: pyarrow<20
|
|
27
27
|
Requires-Dist: typing-extensions
|
|
28
28
|
Requires-Dist: python-dateutil>=2
|
|
29
29
|
Requires-Dist: attrs>=21.3.0
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import math
|
|
2
|
+
|
|
3
|
+
import datachain as dc
|
|
4
|
+
from datachain import func
|
|
5
|
+
from datachain.sql.types import Float, Int, String
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def values_almost_equal(a, b):
|
|
9
|
+
"""Compare two values, treating NaNs as equal."""
|
|
10
|
+
if (
|
|
11
|
+
isinstance(a, float)
|
|
12
|
+
and isinstance(b, float)
|
|
13
|
+
and math.isnan(a)
|
|
14
|
+
and math.isnan(b)
|
|
15
|
+
):
|
|
16
|
+
return True
|
|
17
|
+
return a == b
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def tuples_almost_equal(t1, t2):
|
|
21
|
+
"""Compare two tuples, treating NaN floats as equal."""
|
|
22
|
+
if len(t1) != len(t2):
|
|
23
|
+
return False
|
|
24
|
+
return all(values_almost_equal(x, y) for x, y in zip(t1, t2))
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def sets_of_tuples_almost_equal(s1, s2):
|
|
28
|
+
"""Compare two sets of tuples, treating NaN floats as equal."""
|
|
29
|
+
if len(s1) != len(s2):
|
|
30
|
+
return False
|
|
31
|
+
unmatched = list(s2)
|
|
32
|
+
for item1 in s1:
|
|
33
|
+
for item2 in unmatched:
|
|
34
|
+
if tuples_almost_equal(item1, item2):
|
|
35
|
+
unmatched.remove(item2)
|
|
36
|
+
break
|
|
37
|
+
else:
|
|
38
|
+
return False
|
|
39
|
+
return True
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def test_array_get_element(test_session):
|
|
43
|
+
db_dialect = test_session.catalog.warehouse.db.dialect
|
|
44
|
+
|
|
45
|
+
class Arr(dc.DataModel):
|
|
46
|
+
i: list[int]
|
|
47
|
+
f: list[float]
|
|
48
|
+
|
|
49
|
+
ds = (
|
|
50
|
+
dc.read_values(
|
|
51
|
+
arr=(
|
|
52
|
+
Arr(i=[10, 20, 30], f=[1.0, 2.0, 3.0]),
|
|
53
|
+
Arr(i=[40, 50, 60], f=[4.0, 5.0, 6.0]),
|
|
54
|
+
Arr(i=[50], f=[5.0]),
|
|
55
|
+
),
|
|
56
|
+
session=test_session,
|
|
57
|
+
)
|
|
58
|
+
.mutate(
|
|
59
|
+
first_i=func.array.get_element("arr.i", 0),
|
|
60
|
+
second_i=func.array.get_element("arr.i", 1),
|
|
61
|
+
unknown_i=func.array.get_element("arr.i", 100),
|
|
62
|
+
first_f=func.array.get_element("arr.f", 0),
|
|
63
|
+
second_f=func.array.get_element("arr.f", 1),
|
|
64
|
+
first_f2=func.array.get_element([9.0], 0),
|
|
65
|
+
first_s=func.array.get_element(["a", "b", "c", "d"], 0),
|
|
66
|
+
second_s=func.array.get_element(["a", "b", "c", "d"], 1),
|
|
67
|
+
unknown_s=func.array.get_element(["a", "b", "c", "d"], 100),
|
|
68
|
+
unknown=func.array.get_element([], 0),
|
|
69
|
+
)
|
|
70
|
+
.collect(
|
|
71
|
+
"first_i",
|
|
72
|
+
"second_i",
|
|
73
|
+
"unknown_i",
|
|
74
|
+
"first_f",
|
|
75
|
+
"second_f",
|
|
76
|
+
"first_f2",
|
|
77
|
+
"first_s",
|
|
78
|
+
"second_s",
|
|
79
|
+
"unknown_s",
|
|
80
|
+
"unknown",
|
|
81
|
+
)
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
assert sets_of_tuples_almost_equal(
|
|
85
|
+
set(ds),
|
|
86
|
+
{
|
|
87
|
+
(
|
|
88
|
+
10,
|
|
89
|
+
20,
|
|
90
|
+
Int.default_value(db_dialect),
|
|
91
|
+
1.0,
|
|
92
|
+
2.0,
|
|
93
|
+
9.0,
|
|
94
|
+
"a",
|
|
95
|
+
"b",
|
|
96
|
+
String.default_value(db_dialect),
|
|
97
|
+
String.default_value(db_dialect),
|
|
98
|
+
),
|
|
99
|
+
(
|
|
100
|
+
40,
|
|
101
|
+
50,
|
|
102
|
+
Int.default_value(db_dialect),
|
|
103
|
+
4.0,
|
|
104
|
+
5.0,
|
|
105
|
+
9.0,
|
|
106
|
+
"a",
|
|
107
|
+
"b",
|
|
108
|
+
String.default_value(db_dialect),
|
|
109
|
+
String.default_value(db_dialect),
|
|
110
|
+
),
|
|
111
|
+
(
|
|
112
|
+
50,
|
|
113
|
+
Int.default_value(db_dialect),
|
|
114
|
+
Int.default_value(db_dialect),
|
|
115
|
+
5.0,
|
|
116
|
+
Float.default_value(db_dialect),
|
|
117
|
+
9.0,
|
|
118
|
+
"a",
|
|
119
|
+
"b",
|
|
120
|
+
String.default_value(db_dialect),
|
|
121
|
+
String.default_value(db_dialect),
|
|
122
|
+
),
|
|
123
|
+
},
|
|
124
|
+
)
|
|
@@ -372,10 +372,6 @@ def test_datasets_in_memory():
|
|
|
372
372
|
],
|
|
373
373
|
)
|
|
374
374
|
def test_datasets_filtering(test_session, attrs, result):
|
|
375
|
-
ds = dc.datasets(column="dataset", session=test_session)
|
|
376
|
-
datasets = [d for d in ds.collect("dataset") if d.name == "fibonacci"]
|
|
377
|
-
assert len(datasets) == 0
|
|
378
|
-
|
|
379
375
|
dc.read_values(num=[1, 2, 3], session=test_session).save(
|
|
380
376
|
"primes", attrs=["number", "num=prime", "small"]
|
|
381
377
|
)
|
|
@@ -5,6 +5,7 @@ from numpy.testing import assert_array_almost_equal
|
|
|
5
5
|
|
|
6
6
|
from datachain import func
|
|
7
7
|
from datachain.sql import select
|
|
8
|
+
from datachain.sql.types import Int, String
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
def test_cosine_distance(warehouse):
|
|
@@ -70,6 +71,40 @@ def test_length(warehouse):
|
|
|
70
71
|
assert result == ((4, 5, 2),)
|
|
71
72
|
|
|
72
73
|
|
|
74
|
+
def test_get_element(warehouse):
|
|
75
|
+
db_dialect = warehouse.db.dialect
|
|
76
|
+
|
|
77
|
+
query = select(
|
|
78
|
+
func.array.get_element(["abc", "def", "g", "hi"], 0).label("first1"),
|
|
79
|
+
func.array.get_element(["abc", "def", "g", "hi"], 1).label("second1"),
|
|
80
|
+
func.array.get_element([3.0, 5.0, 1.0, 6.0, 1.0], 0).label("first2"),
|
|
81
|
+
func.array.get_element([3.0, 5.0, 1.0, 6.0, 1.0], 1).label("second2"),
|
|
82
|
+
func.array.get_element([1, 2, 3, 4, 5, 6], 0).label("first3"),
|
|
83
|
+
func.array.get_element([1, 2, 3, 4, 5, 6], 1).label("second3"),
|
|
84
|
+
func.array.get_element([1], 0).label("first4"),
|
|
85
|
+
func.array.get_element([2.0], 0).label("first5"),
|
|
86
|
+
func.array.get_element([], 0).label("not_found1"),
|
|
87
|
+
func.array.get_element([], -1).label("not_found2"),
|
|
88
|
+
func.array.get_element([1], 2).label("not_found3"),
|
|
89
|
+
)
|
|
90
|
+
result = tuple(warehouse.dataset_rows_select(query))
|
|
91
|
+
assert result == (
|
|
92
|
+
(
|
|
93
|
+
"abc",
|
|
94
|
+
"def",
|
|
95
|
+
3.0,
|
|
96
|
+
5.0,
|
|
97
|
+
1,
|
|
98
|
+
2,
|
|
99
|
+
1,
|
|
100
|
+
2.0,
|
|
101
|
+
String.default_value(db_dialect),
|
|
102
|
+
String.default_value(db_dialect),
|
|
103
|
+
Int.default_value(db_dialect),
|
|
104
|
+
),
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
|
|
73
108
|
def test_contains(warehouse):
|
|
74
109
|
query = select(
|
|
75
110
|
func.contains(["abc", "def", "g", "hi"], "abc").label("contains1"),
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|