datachain 0.37.4__tar.gz → 0.37.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.37.4 → datachain-0.37.6}/.pre-commit-config.yaml +1 -1
- {datachain-0.37.4 → datachain-0.37.6}/PKG-INFO +1 -1
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/query/dataset.py +7 -17
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain.egg-info/PKG-INFO +1 -1
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_datachain.py +6 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_udf.py +143 -0
- {datachain-0.37.4 → datachain-0.37.6}/.cruft.json +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/.gitattributes +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/.github/codecov.yaml +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/.github/dependabot.yml +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/.github/workflows/release.yml +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/.github/workflows/tests.yml +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/.gitignore +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/LICENSE +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/README.rst +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/api_hooks.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/assets/datachain.svg +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/assets/webhook_dialog.png +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/assets/webhook_list.png +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/commands/auth/login.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/commands/auth/logout.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/commands/auth/team.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/commands/auth/token.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/commands/index.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/commands/job/cancel.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/commands/job/clusters.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/commands/job/logs.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/commands/job/ls.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/commands/job/run.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/contributing.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/examples.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/guide/checkpoints.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/guide/db_migrations.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/guide/delta.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/guide/env.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/guide/index.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/guide/namespaces.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/guide/processing.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/guide/remotes.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/guide/retry.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/index.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/overrides/main.html +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/quick-start.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/references/data-types/file.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/references/data-types/index.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/references/data-types/pose.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/references/data-types/segment.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/references/datachain.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/references/func.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/references/functions/aggregate.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/references/functions/array.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/references/functions/conditional.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/references/functions/numeric.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/references/functions/path.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/references/functions/random.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/references/functions/string.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/references/functions/window.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/references/index.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/references/toolkit.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/references/torch.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/references/udf.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/studio/api/.gitkeep +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/studio/webhooks.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/templates/main.dot +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/templates/operation.dot +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/templates/responses.def +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/docs/tutorials.md +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/examples/get_started/nested_datamodel.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/examples/multimodal/audio-to-text.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/examples/multimodal/wds.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/mkdocs.yml +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/noxfile.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/pyproject.toml +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/setup.cfg +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/__main__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/asyn.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/cache.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/catalog/dependency.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/checkpoint.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/cli/utils.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/client/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/client/azure.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/client/gcs.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/client/hf.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/client/http.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/client/local.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/client/s3.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/config.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/dataset.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/delta.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/error.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/fs/reference.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/fs/utils.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/func/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/func/array.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/func/base.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/func/conditional.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/func/func.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/func/numeric.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/func/path.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/func/random.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/func/string.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/func/window.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/hash_utils.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/job.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/audio.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/clip.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/dc/datachain.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/dc/storage_pattern.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/file.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/hf.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/image.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/listing.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/namespaces.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/projects.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/settings.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/tar.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/text.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/udf.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/utils.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/video.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/listing.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/model/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/model/bbox.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/model/pose.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/model/segment.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/model/utils.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/namespace.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/node.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/plugins.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/progress.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/project.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/py.typed +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/query/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/query/batch.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/query/metrics.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/query/params.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/query/queue.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/query/schema.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/query/session.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/query/udf.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/remote/studio.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/script_meta.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/semver.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/sql/postgresql_dialect.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/sql/postgresql_types.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/sql/types.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/sql/utils.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/studio.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/telemetry.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain/utils.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain.egg-info/SOURCES.txt +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/conftest.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/data.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/examples/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/examples/test_examples.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/examples/wds_data.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/data/lena.jpg +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/functions/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/functions/test_array.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/functions/test_path.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/functions/test_random.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/functions/test_string.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/model/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_audio.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_catalog.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_checkpoints.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_client.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_data_storage.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_datasets.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_delta.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_file.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_hf.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_image.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_listing.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_ls.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_metastore.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_metrics.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_mutate.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_pull.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_pytorch.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_query.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_read_database.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_read_dataset_remote.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_retry.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_session.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_storage_pattern.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_studio_datetime_parsing.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_temp_table_tracking.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_to_database.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_toolkit.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_union.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_video.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/func/test_warehouse.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/scripts/feature_class.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/test_atomicity.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/test_cli_e2e.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/test_cli_studio.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/test_import_time.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/test_job_management_e2e.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/test_query_e2e.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/test_telemetry.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_audio.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_checkpoints.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_datachain.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_namespace.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_partition_by.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_settings.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_storage_pattern.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/model/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_asyn.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_batching.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_cache.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_catalog.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_cli_datasets.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_client.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_client_http.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_config.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_datachain_hash.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_dataset.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_func.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_hash_utils.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_job_management.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_listing.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_metastore.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_query.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_query_params.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_query_steps_hash.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_semver.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_serializer.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_session.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_utils.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.37.4 → datachain-0.37.6}/tests/utils.py +0 -0
|
@@ -426,8 +426,10 @@ class UDFStep(Step, ABC):
|
|
|
426
426
|
"""Method that creates a table where temp udf results will be saved"""
|
|
427
427
|
|
|
428
428
|
def process_input_query(self, query: Select) -> tuple[Select, list["Table"]]:
|
|
429
|
-
"""
|
|
430
|
-
|
|
429
|
+
"""Materialize inputs, ensure sys columns are available, needed for checkpoints,
|
|
430
|
+
needed for map to work (merge results)"""
|
|
431
|
+
table = self.catalog.warehouse.create_pre_udf_table(query)
|
|
432
|
+
return sqlalchemy.select(*table.c), [table]
|
|
431
433
|
|
|
432
434
|
@abstractmethod
|
|
433
435
|
def create_result_query(
|
|
@@ -628,23 +630,18 @@ class UDFStep(Step, ABC):
|
|
|
628
630
|
def apply(
|
|
629
631
|
self, query_generator: QueryGenerator, temp_tables: list[str]
|
|
630
632
|
) -> "StepResult":
|
|
631
|
-
|
|
633
|
+
query, tables = self.process_input_query(query_generator.select())
|
|
634
|
+
_query = query
|
|
632
635
|
|
|
633
636
|
# Apply partitioning if needed.
|
|
634
637
|
if self.partition_by is not None:
|
|
635
|
-
_query = query = self.catalog.warehouse._regenerate_system_columns(
|
|
636
|
-
query_generator.select(),
|
|
637
|
-
keep_existing_columns=True,
|
|
638
|
-
regenerate_columns=["sys__id"],
|
|
639
|
-
)
|
|
640
638
|
partition_tbl = self.create_partitions_table(query)
|
|
641
|
-
temp_tables.append(partition_tbl.name)
|
|
642
639
|
query = query.outerjoin(
|
|
643
640
|
partition_tbl,
|
|
644
641
|
partition_tbl.c.sys__id == query.selected_columns.sys__id,
|
|
645
642
|
).add_columns(*partition_columns())
|
|
643
|
+
tables = [*tables, partition_tbl]
|
|
646
644
|
|
|
647
|
-
query, tables = self.process_input_query(query)
|
|
648
645
|
temp_tables.extend(t.name for t in tables)
|
|
649
646
|
udf_table = self.create_udf_table(_query)
|
|
650
647
|
temp_tables.append(udf_table.name)
|
|
@@ -675,13 +672,6 @@ class UDFSignal(UDFStep):
|
|
|
675
672
|
|
|
676
673
|
return self.catalog.warehouse.create_udf_table(udf_output_columns)
|
|
677
674
|
|
|
678
|
-
def process_input_query(self, query: Select) -> tuple[Select, list["Table"]]:
|
|
679
|
-
if os.getenv("DATACHAIN_DISABLE_QUERY_CACHE", "") not in ("", "0"):
|
|
680
|
-
return query, []
|
|
681
|
-
table = self.catalog.warehouse.create_pre_udf_table(query)
|
|
682
|
-
q: Select = sqlalchemy.select(*table.c)
|
|
683
|
-
return q, [table]
|
|
684
|
-
|
|
685
675
|
def create_result_query(
|
|
686
676
|
self, udf_table, query
|
|
687
677
|
) -> tuple[QueryGeneratorFunc, list["sqlalchemy.Column"]]:
|
|
@@ -1699,10 +1699,16 @@ def test_agg_offset_limit(catalog_tmpfile, parallel, offset, limit, files):
|
|
|
1699
1699
|
value=list(range(100)),
|
|
1700
1700
|
session=catalog_tmpfile.session,
|
|
1701
1701
|
)
|
|
1702
|
+
# Read values in general doesn't guarantee order, so we need to order first
|
|
1703
|
+
ds = ds.order_by("filename")
|
|
1702
1704
|
if offset is not None:
|
|
1703
1705
|
ds = ds.offset(offset)
|
|
1704
1706
|
if limit is not None:
|
|
1705
1707
|
ds = ds.limit(limit)
|
|
1708
|
+
|
|
1709
|
+
limited_filenames = ds.to_values("filename")
|
|
1710
|
+
assert set(limited_filenames) == set(files)
|
|
1711
|
+
|
|
1706
1712
|
ds = (
|
|
1707
1713
|
ds.settings(parallel=parallel)
|
|
1708
1714
|
.agg(
|
|
@@ -878,3 +878,146 @@ def test_udf_distributed_interrupt(
|
|
|
878
878
|
chain.show()
|
|
879
879
|
captured = capfd.readouterr()
|
|
880
880
|
assert "semaphore" not in captured.err
|
|
881
|
+
|
|
882
|
+
|
|
883
|
+
def test_gen_works_after_union(test_session_tmpfile, monkeypatch):
|
|
884
|
+
"""
|
|
885
|
+
Union drops sys columns, we test that UDF generates them correctly after that.
|
|
886
|
+
"""
|
|
887
|
+
monkeypatch.setattr("datachain.query.dispatch.DEFAULT_BATCH_SIZE", 5, raising=False)
|
|
888
|
+
n = 30
|
|
889
|
+
|
|
890
|
+
x_ids = list(range(n))
|
|
891
|
+
y_ids = list(range(n, 2 * n))
|
|
892
|
+
|
|
893
|
+
x = dc.read_values(idx=x_ids, session=test_session_tmpfile)
|
|
894
|
+
y = dc.read_values(idx=y_ids, session=test_session_tmpfile)
|
|
895
|
+
|
|
896
|
+
xy = x.union(y)
|
|
897
|
+
|
|
898
|
+
def expand(idx):
|
|
899
|
+
yield f"val-{idx}"
|
|
900
|
+
|
|
901
|
+
generated = xy.settings(parallel=2).gen(
|
|
902
|
+
gen=expand,
|
|
903
|
+
params=("idx",),
|
|
904
|
+
output={"val": str},
|
|
905
|
+
)
|
|
906
|
+
|
|
907
|
+
values = generated.to_values("val")
|
|
908
|
+
|
|
909
|
+
assert len(values) == 2 * n
|
|
910
|
+
assert set(values) == {f"val-{i}" for i in range(2 * n)}
|
|
911
|
+
|
|
912
|
+
|
|
913
|
+
@pytest.mark.parametrize("full", [False, True])
|
|
914
|
+
def test_gen_works_after_merge(test_session_tmpfile, monkeypatch, full):
|
|
915
|
+
"""Merge drops sys columns as well; ensure UDF generation still works."""
|
|
916
|
+
monkeypatch.setattr("datachain.query.dispatch.DEFAULT_BATCH_SIZE", 5, raising=False)
|
|
917
|
+
n = 30
|
|
918
|
+
|
|
919
|
+
idxs = list(range(n))
|
|
920
|
+
|
|
921
|
+
left = dc.read_values(
|
|
922
|
+
idx=idxs,
|
|
923
|
+
left_value=[f"left-{i}" for i in idxs],
|
|
924
|
+
session=test_session_tmpfile,
|
|
925
|
+
)
|
|
926
|
+
right = dc.read_values(
|
|
927
|
+
idx=idxs,
|
|
928
|
+
right_value=[f"right-{i}" for i in idxs],
|
|
929
|
+
session=test_session_tmpfile,
|
|
930
|
+
)
|
|
931
|
+
|
|
932
|
+
merged = left.merge(right, on="idx", full=full)
|
|
933
|
+
|
|
934
|
+
def expand(idx, left_value, right_value):
|
|
935
|
+
yield f"val-{idx}-{left_value}-{right_value}"
|
|
936
|
+
|
|
937
|
+
generated = merged.settings(parallel=2).gen(
|
|
938
|
+
gen=expand,
|
|
939
|
+
params=("idx", "left_value", "right_value"),
|
|
940
|
+
output={"val": str},
|
|
941
|
+
)
|
|
942
|
+
|
|
943
|
+
values = generated.to_values("val")
|
|
944
|
+
|
|
945
|
+
assert len(values) == n
|
|
946
|
+
expected = {f"val-{i}-left-{i}-right-{i}" for i in idxs}
|
|
947
|
+
assert set(values) == expected
|
|
948
|
+
|
|
949
|
+
|
|
950
|
+
def test_agg_works_after_union(test_session_tmpfile, monkeypatch):
|
|
951
|
+
"""Union must preserve sys columns for aggregations with functional partitions."""
|
|
952
|
+
from datachain import func
|
|
953
|
+
|
|
954
|
+
monkeypatch.setattr("datachain.query.dispatch.DEFAULT_BATCH_SIZE", 5, raising=False)
|
|
955
|
+
|
|
956
|
+
groups = 5
|
|
957
|
+
n = 30
|
|
958
|
+
|
|
959
|
+
x_paths = [f"group-{i % groups}/item-{i}" for i in range(n)]
|
|
960
|
+
y_paths = [f"group-{i % groups}/item-{n + i}" for i in range(n)]
|
|
961
|
+
|
|
962
|
+
x = dc.read_values(path=x_paths, session=test_session_tmpfile)
|
|
963
|
+
y = dc.read_values(path=y_paths, session=test_session_tmpfile)
|
|
964
|
+
|
|
965
|
+
xy = x.union(y)
|
|
966
|
+
|
|
967
|
+
def summarize(paths):
|
|
968
|
+
group = paths[0].split("/")[0]
|
|
969
|
+
yield group, len(paths)
|
|
970
|
+
|
|
971
|
+
aggregated = xy.settings(parallel=2).agg(
|
|
972
|
+
summarize,
|
|
973
|
+
params=("path",),
|
|
974
|
+
output={"partition": str, "count": int},
|
|
975
|
+
partition_by=func.parent("path"),
|
|
976
|
+
)
|
|
977
|
+
|
|
978
|
+
records = aggregated.to_records()
|
|
979
|
+
expected_counts = {f"group-{g}": 2 * n // groups for g in range(groups)}
|
|
980
|
+
assert {row["partition"]: row["count"] for row in records} == expected_counts
|
|
981
|
+
|
|
982
|
+
|
|
983
|
+
@pytest.mark.parametrize("full", [False, True])
|
|
984
|
+
def test_agg_works_after_merge(test_session_tmpfile, monkeypatch, full):
|
|
985
|
+
"""Ensure merge keeps sys columns for aggregations with functional partitions."""
|
|
986
|
+
from datachain import func
|
|
987
|
+
|
|
988
|
+
monkeypatch.setattr("datachain.query.dispatch.DEFAULT_BATCH_SIZE", 5, raising=False)
|
|
989
|
+
|
|
990
|
+
groups = 5
|
|
991
|
+
n = 30
|
|
992
|
+
idxs = list(range(n))
|
|
993
|
+
|
|
994
|
+
left = dc.read_values(
|
|
995
|
+
idx=idxs,
|
|
996
|
+
left_path=[f"group-{i % groups}/left-{i}" for i in idxs],
|
|
997
|
+
session=test_session_tmpfile,
|
|
998
|
+
)
|
|
999
|
+
right = dc.read_values(
|
|
1000
|
+
idx=idxs,
|
|
1001
|
+
right_value=idxs,
|
|
1002
|
+
session=test_session_tmpfile,
|
|
1003
|
+
)
|
|
1004
|
+
|
|
1005
|
+
merged = left.merge(right, on="idx", full=full)
|
|
1006
|
+
|
|
1007
|
+
def summarize(left_path, right_value):
|
|
1008
|
+
group = left_path[0].split("/")[0]
|
|
1009
|
+
yield group, sum(right_value)
|
|
1010
|
+
|
|
1011
|
+
aggregated = merged.settings(parallel=2).agg(
|
|
1012
|
+
summarize,
|
|
1013
|
+
params=("left_path", "right_value"),
|
|
1014
|
+
output={"partition": str, "total": int},
|
|
1015
|
+
partition_by=func.parent("left_path"),
|
|
1016
|
+
)
|
|
1017
|
+
|
|
1018
|
+
records = aggregated.to_records()
|
|
1019
|
+
expected_totals = {
|
|
1020
|
+
f"group-{g}": sum(val for val in idxs if val % groups == g)
|
|
1021
|
+
for g in range(groups)
|
|
1022
|
+
}
|
|
1023
|
+
assert {row["partition"]: row["total"] for row in records} == expected_totals
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|