datachain 0.37.5__tar.gz → 0.37.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.37.5 → datachain-0.37.6}/PKG-INFO +1 -1
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/query/dataset.py +3 -8
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain.egg-info/PKG-INFO +1 -1
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_udf.py +143 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_union.py +0 -27
- {datachain-0.37.5 → datachain-0.37.6}/.cruft.json +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/.gitattributes +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/.github/codecov.yaml +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/.github/dependabot.yml +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/.github/workflows/release.yml +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/.github/workflows/tests.yml +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/.gitignore +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/.pre-commit-config.yaml +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/LICENSE +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/README.rst +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/api_hooks.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/assets/datachain.svg +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/assets/webhook_dialog.png +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/assets/webhook_list.png +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/commands/auth/login.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/commands/auth/logout.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/commands/auth/team.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/commands/auth/token.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/commands/index.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/commands/job/cancel.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/commands/job/clusters.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/commands/job/logs.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/commands/job/ls.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/commands/job/run.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/contributing.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/examples.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/guide/checkpoints.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/guide/db_migrations.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/guide/delta.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/guide/env.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/guide/index.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/guide/namespaces.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/guide/processing.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/guide/remotes.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/guide/retry.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/index.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/overrides/main.html +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/quick-start.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/references/data-types/file.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/references/data-types/index.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/references/data-types/pose.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/references/data-types/segment.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/references/datachain.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/references/func.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/references/functions/aggregate.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/references/functions/array.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/references/functions/conditional.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/references/functions/numeric.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/references/functions/path.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/references/functions/random.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/references/functions/string.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/references/functions/window.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/references/index.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/references/toolkit.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/references/torch.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/references/udf.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/studio/api/.gitkeep +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/studio/webhooks.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/templates/main.dot +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/templates/operation.dot +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/templates/responses.def +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/docs/tutorials.md +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/examples/get_started/nested_datamodel.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/examples/multimodal/audio-to-text.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/examples/multimodal/wds.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/mkdocs.yml +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/noxfile.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/pyproject.toml +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/setup.cfg +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/__main__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/asyn.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/cache.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/catalog/dependency.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/checkpoint.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/cli/utils.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/client/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/client/azure.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/client/gcs.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/client/hf.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/client/http.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/client/local.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/client/s3.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/config.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/dataset.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/delta.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/error.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/fs/reference.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/fs/utils.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/func/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/func/array.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/func/base.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/func/conditional.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/func/func.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/func/numeric.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/func/path.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/func/random.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/func/string.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/func/window.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/hash_utils.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/job.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/audio.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/clip.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/dc/datachain.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/dc/storage_pattern.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/file.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/hf.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/image.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/listing.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/namespaces.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/projects.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/settings.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/tar.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/text.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/udf.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/utils.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/video.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/listing.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/model/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/model/bbox.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/model/pose.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/model/segment.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/model/utils.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/namespace.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/node.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/plugins.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/progress.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/project.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/py.typed +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/query/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/query/batch.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/query/metrics.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/query/params.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/query/queue.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/query/schema.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/query/session.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/query/udf.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/remote/studio.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/script_meta.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/semver.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/sql/postgresql_dialect.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/sql/postgresql_types.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/sql/types.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/sql/utils.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/studio.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/telemetry.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain/utils.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain.egg-info/SOURCES.txt +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/conftest.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/data.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/examples/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/examples/test_examples.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/examples/wds_data.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/data/lena.jpg +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/functions/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/functions/test_array.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/functions/test_path.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/functions/test_random.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/functions/test_string.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/model/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_audio.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_catalog.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_checkpoints.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_client.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_data_storage.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_datachain.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_datasets.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_delta.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_file.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_hf.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_image.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_listing.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_ls.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_metastore.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_metrics.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_mutate.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_pull.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_pytorch.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_query.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_read_database.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_read_dataset_remote.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_retry.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_session.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_storage_pattern.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_studio_datetime_parsing.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_temp_table_tracking.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_to_database.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_toolkit.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_video.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/func/test_warehouse.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/scripts/feature_class.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/test_atomicity.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/test_cli_e2e.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/test_cli_studio.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/test_import_time.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/test_job_management_e2e.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/test_query_e2e.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/test_telemetry.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_audio.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_checkpoints.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_datachain.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_namespace.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_partition_by.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_settings.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_storage_pattern.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/model/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_asyn.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_batching.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_cache.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_catalog.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_cli_datasets.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_client.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_client_http.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_config.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_datachain_hash.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_dataset.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_func.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_hash_utils.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_job_management.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_listing.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_metastore.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_query.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_query_params.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_query_steps_hash.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_semver.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_serializer.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_session.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_utils.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.37.5 → datachain-0.37.6}/tests/utils.py +0 -0
|
@@ -630,23 +630,18 @@ class UDFStep(Step, ABC):
|
|
|
630
630
|
def apply(
|
|
631
631
|
self, query_generator: QueryGenerator, temp_tables: list[str]
|
|
632
632
|
) -> "StepResult":
|
|
633
|
-
|
|
633
|
+
query, tables = self.process_input_query(query_generator.select())
|
|
634
|
+
_query = query
|
|
634
635
|
|
|
635
636
|
# Apply partitioning if needed.
|
|
636
637
|
if self.partition_by is not None:
|
|
637
|
-
_query = query = self.catalog.warehouse._regenerate_system_columns(
|
|
638
|
-
query_generator.select(),
|
|
639
|
-
keep_existing_columns=True,
|
|
640
|
-
regenerate_columns=["sys__id"],
|
|
641
|
-
)
|
|
642
638
|
partition_tbl = self.create_partitions_table(query)
|
|
643
|
-
temp_tables.append(partition_tbl.name)
|
|
644
639
|
query = query.outerjoin(
|
|
645
640
|
partition_tbl,
|
|
646
641
|
partition_tbl.c.sys__id == query.selected_columns.sys__id,
|
|
647
642
|
).add_columns(*partition_columns())
|
|
643
|
+
tables = [*tables, partition_tbl]
|
|
648
644
|
|
|
649
|
-
query, tables = self.process_input_query(query)
|
|
650
645
|
temp_tables.extend(t.name for t in tables)
|
|
651
646
|
udf_table = self.create_udf_table(_query)
|
|
652
647
|
temp_tables.append(udf_table.name)
|
|
@@ -878,3 +878,146 @@ def test_udf_distributed_interrupt(
|
|
|
878
878
|
chain.show()
|
|
879
879
|
captured = capfd.readouterr()
|
|
880
880
|
assert "semaphore" not in captured.err
|
|
881
|
+
|
|
882
|
+
|
|
883
|
+
def test_gen_works_after_union(test_session_tmpfile, monkeypatch):
|
|
884
|
+
"""
|
|
885
|
+
Union drops sys columns, we test that UDF generates them correctly after that.
|
|
886
|
+
"""
|
|
887
|
+
monkeypatch.setattr("datachain.query.dispatch.DEFAULT_BATCH_SIZE", 5, raising=False)
|
|
888
|
+
n = 30
|
|
889
|
+
|
|
890
|
+
x_ids = list(range(n))
|
|
891
|
+
y_ids = list(range(n, 2 * n))
|
|
892
|
+
|
|
893
|
+
x = dc.read_values(idx=x_ids, session=test_session_tmpfile)
|
|
894
|
+
y = dc.read_values(idx=y_ids, session=test_session_tmpfile)
|
|
895
|
+
|
|
896
|
+
xy = x.union(y)
|
|
897
|
+
|
|
898
|
+
def expand(idx):
|
|
899
|
+
yield f"val-{idx}"
|
|
900
|
+
|
|
901
|
+
generated = xy.settings(parallel=2).gen(
|
|
902
|
+
gen=expand,
|
|
903
|
+
params=("idx",),
|
|
904
|
+
output={"val": str},
|
|
905
|
+
)
|
|
906
|
+
|
|
907
|
+
values = generated.to_values("val")
|
|
908
|
+
|
|
909
|
+
assert len(values) == 2 * n
|
|
910
|
+
assert set(values) == {f"val-{i}" for i in range(2 * n)}
|
|
911
|
+
|
|
912
|
+
|
|
913
|
+
@pytest.mark.parametrize("full", [False, True])
|
|
914
|
+
def test_gen_works_after_merge(test_session_tmpfile, monkeypatch, full):
|
|
915
|
+
"""Merge drops sys columns as well; ensure UDF generation still works."""
|
|
916
|
+
monkeypatch.setattr("datachain.query.dispatch.DEFAULT_BATCH_SIZE", 5, raising=False)
|
|
917
|
+
n = 30
|
|
918
|
+
|
|
919
|
+
idxs = list(range(n))
|
|
920
|
+
|
|
921
|
+
left = dc.read_values(
|
|
922
|
+
idx=idxs,
|
|
923
|
+
left_value=[f"left-{i}" for i in idxs],
|
|
924
|
+
session=test_session_tmpfile,
|
|
925
|
+
)
|
|
926
|
+
right = dc.read_values(
|
|
927
|
+
idx=idxs,
|
|
928
|
+
right_value=[f"right-{i}" for i in idxs],
|
|
929
|
+
session=test_session_tmpfile,
|
|
930
|
+
)
|
|
931
|
+
|
|
932
|
+
merged = left.merge(right, on="idx", full=full)
|
|
933
|
+
|
|
934
|
+
def expand(idx, left_value, right_value):
|
|
935
|
+
yield f"val-{idx}-{left_value}-{right_value}"
|
|
936
|
+
|
|
937
|
+
generated = merged.settings(parallel=2).gen(
|
|
938
|
+
gen=expand,
|
|
939
|
+
params=("idx", "left_value", "right_value"),
|
|
940
|
+
output={"val": str},
|
|
941
|
+
)
|
|
942
|
+
|
|
943
|
+
values = generated.to_values("val")
|
|
944
|
+
|
|
945
|
+
assert len(values) == n
|
|
946
|
+
expected = {f"val-{i}-left-{i}-right-{i}" for i in idxs}
|
|
947
|
+
assert set(values) == expected
|
|
948
|
+
|
|
949
|
+
|
|
950
|
+
def test_agg_works_after_union(test_session_tmpfile, monkeypatch):
|
|
951
|
+
"""Union must preserve sys columns for aggregations with functional partitions."""
|
|
952
|
+
from datachain import func
|
|
953
|
+
|
|
954
|
+
monkeypatch.setattr("datachain.query.dispatch.DEFAULT_BATCH_SIZE", 5, raising=False)
|
|
955
|
+
|
|
956
|
+
groups = 5
|
|
957
|
+
n = 30
|
|
958
|
+
|
|
959
|
+
x_paths = [f"group-{i % groups}/item-{i}" for i in range(n)]
|
|
960
|
+
y_paths = [f"group-{i % groups}/item-{n + i}" for i in range(n)]
|
|
961
|
+
|
|
962
|
+
x = dc.read_values(path=x_paths, session=test_session_tmpfile)
|
|
963
|
+
y = dc.read_values(path=y_paths, session=test_session_tmpfile)
|
|
964
|
+
|
|
965
|
+
xy = x.union(y)
|
|
966
|
+
|
|
967
|
+
def summarize(paths):
|
|
968
|
+
group = paths[0].split("/")[0]
|
|
969
|
+
yield group, len(paths)
|
|
970
|
+
|
|
971
|
+
aggregated = xy.settings(parallel=2).agg(
|
|
972
|
+
summarize,
|
|
973
|
+
params=("path",),
|
|
974
|
+
output={"partition": str, "count": int},
|
|
975
|
+
partition_by=func.parent("path"),
|
|
976
|
+
)
|
|
977
|
+
|
|
978
|
+
records = aggregated.to_records()
|
|
979
|
+
expected_counts = {f"group-{g}": 2 * n // groups for g in range(groups)}
|
|
980
|
+
assert {row["partition"]: row["count"] for row in records} == expected_counts
|
|
981
|
+
|
|
982
|
+
|
|
983
|
+
@pytest.mark.parametrize("full", [False, True])
|
|
984
|
+
def test_agg_works_after_merge(test_session_tmpfile, monkeypatch, full):
|
|
985
|
+
"""Ensure merge keeps sys columns for aggregations with functional partitions."""
|
|
986
|
+
from datachain import func
|
|
987
|
+
|
|
988
|
+
monkeypatch.setattr("datachain.query.dispatch.DEFAULT_BATCH_SIZE", 5, raising=False)
|
|
989
|
+
|
|
990
|
+
groups = 5
|
|
991
|
+
n = 30
|
|
992
|
+
idxs = list(range(n))
|
|
993
|
+
|
|
994
|
+
left = dc.read_values(
|
|
995
|
+
idx=idxs,
|
|
996
|
+
left_path=[f"group-{i % groups}/left-{i}" for i in idxs],
|
|
997
|
+
session=test_session_tmpfile,
|
|
998
|
+
)
|
|
999
|
+
right = dc.read_values(
|
|
1000
|
+
idx=idxs,
|
|
1001
|
+
right_value=idxs,
|
|
1002
|
+
session=test_session_tmpfile,
|
|
1003
|
+
)
|
|
1004
|
+
|
|
1005
|
+
merged = left.merge(right, on="idx", full=full)
|
|
1006
|
+
|
|
1007
|
+
def summarize(left_path, right_value):
|
|
1008
|
+
group = left_path[0].split("/")[0]
|
|
1009
|
+
yield group, sum(right_value)
|
|
1010
|
+
|
|
1011
|
+
aggregated = merged.settings(parallel=2).agg(
|
|
1012
|
+
summarize,
|
|
1013
|
+
params=("left_path", "right_value"),
|
|
1014
|
+
output={"partition": str, "total": int},
|
|
1015
|
+
partition_by=func.parent("left_path"),
|
|
1016
|
+
)
|
|
1017
|
+
|
|
1018
|
+
records = aggregated.to_records()
|
|
1019
|
+
expected_totals = {
|
|
1020
|
+
f"group-{g}": sum(val for val in idxs if val % groups == g)
|
|
1021
|
+
for g in range(groups)
|
|
1022
|
+
}
|
|
1023
|
+
assert {row["partition"]: row["total"] for row in records} == expected_totals
|
|
@@ -57,30 +57,3 @@ def test_union_parallel_udf_ids_only_no_dup(test_session_tmpfile, monkeypatch):
|
|
|
57
57
|
assert total == 2 * n
|
|
58
58
|
assert len(distinct_idx) == 2 * n
|
|
59
59
|
assert total == len(distinct_idx)
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def test_union_parallel_gen_ids_only_no_dup(test_session_tmpfile, monkeypatch):
|
|
63
|
-
monkeypatch.setattr("datachain.query.dispatch.DEFAULT_BATCH_SIZE", 5, raising=False)
|
|
64
|
-
n = 30
|
|
65
|
-
|
|
66
|
-
x_ids = list(range(n))
|
|
67
|
-
y_ids = list(range(n, 2 * n))
|
|
68
|
-
|
|
69
|
-
x = dc.read_values(idx=x_ids, session=test_session_tmpfile)
|
|
70
|
-
y = dc.read_values(idx=y_ids, session=test_session_tmpfile)
|
|
71
|
-
|
|
72
|
-
xy = x.union(y)
|
|
73
|
-
|
|
74
|
-
def expand(idx):
|
|
75
|
-
yield f"val-{idx}"
|
|
76
|
-
|
|
77
|
-
generated = xy.settings(parallel=2).gen(
|
|
78
|
-
gen=expand,
|
|
79
|
-
params=("idx",),
|
|
80
|
-
output={"val": str},
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
values = generated.to_values("val")
|
|
84
|
-
|
|
85
|
-
assert len(values) == 2 * n
|
|
86
|
-
assert set(values) == {f"val-{i}" for i in range(2 * n)}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|