datachain 0.34.4__tar.gz → 0.34.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.34.4 → datachain-0.34.5}/.github/workflows/tests-studio.yml +1 -1
- {datachain-0.34.4 → datachain-0.34.5}/PKG-INFO +1 -1
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/udf.py +7 -1
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/query/dataset.py +15 -7
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain.egg-info/PKG-INFO +1 -1
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain.egg-info/SOURCES.txt +1 -0
- datachain-0.34.5/tests/func/test_union.py +59 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_datachain.py +4 -1
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_datachain_hash.py +54 -6
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_query_steps_hash.py +28 -0
- {datachain-0.34.4 → datachain-0.34.5}/.cruft.json +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/.gitattributes +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/.github/codecov.yaml +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/.github/dependabot.yml +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/.github/workflows/release.yml +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/.github/workflows/tests.yml +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/.gitignore +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/.pre-commit-config.yaml +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/LICENSE +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/README.rst +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/api_hooks.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/assets/datachain.svg +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/assets/webhook_dialog.png +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/assets/webhook_list.png +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/commands/auth/login.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/commands/auth/logout.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/commands/auth/team.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/commands/auth/token.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/commands/index.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/commands/job/cancel.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/commands/job/clusters.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/commands/job/logs.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/commands/job/ls.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/commands/job/run.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/contributing.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/examples.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/guide/db_migrations.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/guide/delta.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/guide/env.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/guide/index.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/guide/namespaces.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/guide/processing.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/guide/remotes.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/guide/retry.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/index.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/overrides/main.html +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/quick-start.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/references/data-types/file.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/references/data-types/index.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/references/data-types/pose.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/references/data-types/segment.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/references/datachain.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/references/func.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/references/functions/aggregate.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/references/functions/array.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/references/functions/conditional.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/references/functions/numeric.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/references/functions/path.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/references/functions/random.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/references/functions/string.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/references/functions/window.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/references/index.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/references/toolkit.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/references/torch.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/references/udf.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/studio/api/.gitkeep +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/studio/webhooks.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/templates/main.dot +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/templates/operation.dot +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/templates/responses.def +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/docs/tutorials.md +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/examples/get_started/nested_datamodel.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/examples/multimodal/audio-to-text.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/examples/multimodal/wds.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/mkdocs.yml +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/noxfile.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/pyproject.toml +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/setup.cfg +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/__main__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/asyn.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/cache.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/checkpoint.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/cli/utils.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/client/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/client/azure.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/client/gcs.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/client/hf.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/client/http.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/client/local.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/client/s3.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/config.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/dataset.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/delta.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/error.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/fs/reference.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/fs/utils.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/func/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/func/array.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/func/base.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/func/conditional.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/func/func.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/func/numeric.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/func/path.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/func/random.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/func/string.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/func/window.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/hash_utils.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/job.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/audio.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/clip.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/dc/datachain.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/dc/storage_pattern.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/file.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/hf.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/image.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/listing.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/namespaces.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/projects.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/settings.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/tar.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/text.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/utils.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/video.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/listing.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/model/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/model/bbox.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/model/pose.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/model/segment.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/model/utils.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/namespace.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/node.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/plugins.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/progress.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/project.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/py.typed +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/query/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/query/batch.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/query/metrics.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/query/params.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/query/queue.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/query/schema.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/query/session.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/query/udf.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/query/utils.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/remote/studio.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/script_meta.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/semver.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/sql/postgresql_dialect.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/sql/postgresql_types.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/sql/types.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/sql/utils.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/studio.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/telemetry.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain/utils.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/conftest.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/data.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/examples/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/examples/test_examples.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/examples/wds_data.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/data/lena.jpg +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/functions/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/functions/test_array.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/functions/test_path.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/functions/test_random.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/functions/test_string.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/model/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_audio.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_batching.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_catalog.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_client.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_data_storage.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_datachain.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_datasets.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_delta.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_file.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_hf.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_image.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_listing.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_ls.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_metastore.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_metrics.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_mutate.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_pull.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_pytorch.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_query.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_read_database.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_read_dataset_remote.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_retry.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_session.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_storage_pattern.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_studio_datetime_parsing.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_to_database.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_toolkit.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_udf.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_video.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/func/test_warehouse.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/scripts/feature_class.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/test_atomicity.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/test_cli_e2e.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/test_cli_studio.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/test_import_time.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/test_query_e2e.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/test_telemetry.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_audio.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_checkpoints.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_namespace.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_partition_by.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_settings.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_storage_pattern.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/model/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_asyn.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_cache.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_catalog.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_cli_datasets.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_client.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_client_http.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_config.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_dataset.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_func.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_hash_utils.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_listing.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_metastore.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_query.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_query_params.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_semver.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_serializer.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_session.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_utils.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.34.4 → datachain-0.34.5}/tests/utils.py +0 -0
|
@@ -160,9 +160,15 @@ class UDFBase(AbstractUDF):
|
|
|
160
160
|
"""
|
|
161
161
|
Creates SHA hash of this UDF function. It takes into account function,
|
|
162
162
|
inputs and outputs.
|
|
163
|
+
|
|
164
|
+
For function-based UDFs, hashes self._func.
|
|
165
|
+
For class-based UDFs, hashes the process method.
|
|
163
166
|
"""
|
|
167
|
+
# Hash user code: either _func (function-based) or process method (class-based)
|
|
168
|
+
func_to_hash = self._func if self._func else self.process
|
|
169
|
+
|
|
164
170
|
parts = [
|
|
165
|
-
hash_callable(
|
|
171
|
+
hash_callable(func_to_hash),
|
|
166
172
|
self.params.hash() if self.params else "",
|
|
167
173
|
self.output.hash(),
|
|
168
174
|
]
|
|
@@ -982,18 +982,26 @@ class SQLUnion(Step):
|
|
|
982
982
|
|
|
983
983
|
columns1, columns2 = _order_columns(q1.columns, q2.columns)
|
|
984
984
|
|
|
985
|
+
union_select = sqlalchemy.select(*columns1).union_all(
|
|
986
|
+
sqlalchemy.select(*columns2)
|
|
987
|
+
)
|
|
988
|
+
union_cte = union_select.cte()
|
|
989
|
+
regenerated = self.query1.catalog.warehouse._regenerate_system_columns(
|
|
990
|
+
union_cte
|
|
991
|
+
)
|
|
992
|
+
result_columns = tuple(regenerated.selected_columns)
|
|
993
|
+
|
|
985
994
|
def q(*columns):
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
col2 = [c for c in columns2 if c.name in names]
|
|
989
|
-
res = sqlalchemy.select(*col1).union_all(sqlalchemy.select(*col2))
|
|
995
|
+
if not columns:
|
|
996
|
+
return regenerated
|
|
990
997
|
|
|
991
|
-
|
|
992
|
-
|
|
998
|
+
names = {c.name for c in columns}
|
|
999
|
+
selected = [c for c in result_columns if c.name in names]
|
|
1000
|
+
return regenerated.with_only_columns(*selected)
|
|
993
1001
|
|
|
994
1002
|
return step_result(
|
|
995
1003
|
q,
|
|
996
|
-
|
|
1004
|
+
result_columns,
|
|
997
1005
|
dependencies=self.query1.dependencies | self.query2.dependencies,
|
|
998
1006
|
)
|
|
999
1007
|
|
|
@@ -335,6 +335,7 @@ tests/func/test_studio_datetime_parsing.py
|
|
|
335
335
|
tests/func/test_to_database.py
|
|
336
336
|
tests/func/test_toolkit.py
|
|
337
337
|
tests/func/test_udf.py
|
|
338
|
+
tests/func/test_union.py
|
|
338
339
|
tests/func/test_video.py
|
|
339
340
|
tests/func/test_warehouse.py
|
|
340
341
|
tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import datachain as dc
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_union_persist_no_duplication_large_session(test_session, monkeypatch):
|
|
5
|
+
# See https://github.com/iterative/datachain/issues/1356
|
|
6
|
+
# Lower insert batch size to keep the test fast and still cross the boundary.
|
|
7
|
+
monkeypatch.setattr(
|
|
8
|
+
"datachain.data_storage.warehouse.INSERT_BATCH_SIZE", 20, raising=False
|
|
9
|
+
)
|
|
10
|
+
monkeypatch.setattr("datachain.query.dataset.INSERT_BATCH_SIZE", 20, raising=False)
|
|
11
|
+
n = 20 + 7
|
|
12
|
+
|
|
13
|
+
x_ids = list(range(n))
|
|
14
|
+
y_ids = list(range(n, 2 * n))
|
|
15
|
+
|
|
16
|
+
x = dc.read_values(idx=x_ids, session=test_session)
|
|
17
|
+
y = dc.read_values(idx=y_ids, session=test_session)
|
|
18
|
+
|
|
19
|
+
xy = x.union(y)
|
|
20
|
+
assert xy.count() == 2 * n
|
|
21
|
+
|
|
22
|
+
xy_p = xy.persist()
|
|
23
|
+
assert xy_p.count() == 2 * n
|
|
24
|
+
|
|
25
|
+
distinct_idx = {v for (v,) in xy_p.select("idx").results()}
|
|
26
|
+
assert len(distinct_idx) == 2 * n
|
|
27
|
+
|
|
28
|
+
j = xy_p.merge(x, on="idx", inner=True)
|
|
29
|
+
assert j.count() == n
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_union_parallel_udf_ids_only_no_dup(test_session_tmpfile, monkeypatch):
|
|
33
|
+
# Validate that after union, running a parallel UDF that uses the ids-only
|
|
34
|
+
# path does not duplicate rows due to sys__id collisions across branches.
|
|
35
|
+
# This specifically exercises the parallel dispatch path where input rows
|
|
36
|
+
# are split by sys__id and fetched per worker using IN (...) filters.
|
|
37
|
+
# See https://github.com/iterative/datachain/issues/1356
|
|
38
|
+
|
|
39
|
+
# Make worker/ids fetch batches small to exercise splitting on tiny inputs.
|
|
40
|
+
monkeypatch.setattr("datachain.query.dispatch.DEFAULT_BATCH_SIZE", 5, raising=False)
|
|
41
|
+
n = 30
|
|
42
|
+
|
|
43
|
+
x_ids = list(range(n))
|
|
44
|
+
y_ids = list(range(n, 2 * n))
|
|
45
|
+
|
|
46
|
+
x = dc.read_values(idx=x_ids, session=test_session_tmpfile)
|
|
47
|
+
y = dc.read_values(idx=y_ids, session=test_session_tmpfile)
|
|
48
|
+
|
|
49
|
+
xy = x.union(y)
|
|
50
|
+
mapped = xy.settings(parallel=2).map(
|
|
51
|
+
out=lambda idx: idx, output=int, params=("idx",)
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
total = mapped.count()
|
|
55
|
+
distinct_idx = {v for (v,) in mapped.select("idx").results()}
|
|
56
|
+
|
|
57
|
+
assert total == 2 * n
|
|
58
|
+
assert len(distinct_idx) == 2 * n
|
|
59
|
+
assert total == len(distinct_idx)
|
|
@@ -3686,7 +3686,10 @@ def test_group_by_schema(test_session):
|
|
|
3686
3686
|
"cnt": "int",
|
|
3687
3687
|
"sum": "float",
|
|
3688
3688
|
}
|
|
3689
|
-
assert
|
|
3689
|
+
assert sorted(
|
|
3690
|
+
chain.to_records(),
|
|
3691
|
+
key=lambda row: (row["signal__name"], row["parent__signal__name"]),
|
|
3692
|
+
) == [
|
|
3690
3693
|
{
|
|
3691
3694
|
"signal__name": "a",
|
|
3692
3695
|
"parent__signal__name": "a",
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from unittest.mock import patch
|
|
2
2
|
|
|
3
|
+
import pandas as pd
|
|
3
4
|
import pytest
|
|
4
5
|
from pydantic import BaseModel
|
|
5
6
|
|
|
@@ -7,6 +8,11 @@ import datachain as dc
|
|
|
7
8
|
from datachain import func
|
|
8
9
|
from datachain.lib.dc import C
|
|
9
10
|
|
|
11
|
+
DF_DATA = {
|
|
12
|
+
"first_name": ["Alice", "Bob", "Charlie", "David", "Eva"],
|
|
13
|
+
"age": [25, 30, 35, 40, 45],
|
|
14
|
+
}
|
|
15
|
+
|
|
10
16
|
|
|
11
17
|
class Person(BaseModel):
|
|
12
18
|
name: str
|
|
@@ -55,13 +61,55 @@ def mock_get_listing():
|
|
|
55
61
|
|
|
56
62
|
|
|
57
63
|
def test_read_values():
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
64
|
+
"""
|
|
65
|
+
Hash of the chain started with read_values is currently inconsistent.
|
|
66
|
+
Goal of this test is just to check it doesn't break.
|
|
67
|
+
"""
|
|
68
|
+
assert dc.read_values(num=[1, 2, 3]).hash() is not None
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def test_read_csv(test_session, tmp_dir):
|
|
72
|
+
"""
|
|
73
|
+
Hash of the chain started with read_csv is currently inconsistent.
|
|
74
|
+
Goal of this test is just to check it doesn't break.
|
|
75
|
+
"""
|
|
76
|
+
path = tmp_dir / "test.csv"
|
|
77
|
+
pd.DataFrame(DF_DATA).to_csv(path, index=False)
|
|
78
|
+
assert dc.read_csv(path.as_uri(), session=test_session).hash() is not None
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@pytest.mark.filterwarnings("ignore::pydantic.warnings.PydanticDeprecatedSince20")
|
|
82
|
+
def test_read_json(test_session, tmp_dir):
|
|
83
|
+
"""
|
|
84
|
+
Hash of the chain started with read_json is currently inconsistent.
|
|
85
|
+
Goal of this test is just to check it doesn't break.
|
|
86
|
+
"""
|
|
87
|
+
path = tmp_dir / "test.jsonl"
|
|
88
|
+
dc.read_pandas(pd.DataFrame(DF_DATA), session=test_session).to_jsonl(path)
|
|
89
|
+
assert (
|
|
90
|
+
dc.read_json(path.as_uri(), format="jsonl", session=test_session).hash()
|
|
91
|
+
is not None
|
|
63
92
|
)
|
|
64
|
-
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def test_read_pandas(test_session, tmp_dir):
|
|
96
|
+
"""
|
|
97
|
+
Hash of the chain started with read_pandas is currently inconsistent.
|
|
98
|
+
Goal of this test is just to check it doesn't break.
|
|
99
|
+
"""
|
|
100
|
+
df = pd.DataFrame(DF_DATA)
|
|
101
|
+
assert dc.read_pandas(df, session=test_session).hash() is not None
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def test_read_parquet(test_session, tmp_dir):
|
|
105
|
+
"""
|
|
106
|
+
Hash of the chain started with read_parquet is currently inconsistent.
|
|
107
|
+
Goal of this test is just to check it doesn't break.
|
|
108
|
+
"""
|
|
109
|
+
df = pd.DataFrame(DF_DATA)
|
|
110
|
+
path = tmp_dir / "test.parquet"
|
|
111
|
+
dc.read_pandas(df, session=test_session).to_parquet(path)
|
|
112
|
+
assert dc.read_parquet(path.as_uri(), session=test_session).hash() is not None
|
|
65
113
|
|
|
66
114
|
|
|
67
115
|
def test_read_storage(mock_get_listing):
|
|
@@ -75,6 +75,22 @@ def custom_feature_gen(m_fr):
|
|
|
75
75
|
)
|
|
76
76
|
|
|
77
77
|
|
|
78
|
+
# Class-based UDFs for testing hash calculation
|
|
79
|
+
class DoubleMapper(Mapper):
|
|
80
|
+
"""Class-based Mapper that overrides process()."""
|
|
81
|
+
|
|
82
|
+
def process(self, x):
|
|
83
|
+
return x * 2
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class TripleGenerator(Generator):
|
|
87
|
+
"""Class-based Generator that overrides process()."""
|
|
88
|
+
|
|
89
|
+
def process(self, x):
|
|
90
|
+
yield x * 3
|
|
91
|
+
yield x * 3 + 1
|
|
92
|
+
|
|
93
|
+
|
|
78
94
|
@pytest.fixture
|
|
79
95
|
def numbers_dataset(test_session):
|
|
80
96
|
"""
|
|
@@ -394,6 +410,12 @@ def test_subtract_hash(test_session, numbers_dataset, on, _hash):
|
|
|
394
410
|
{"x": CustomFeature},
|
|
395
411
|
"b4edceaa18ed731085e1c433a6d21deabec8d92dfc338fb1d709ed7951977fc5",
|
|
396
412
|
),
|
|
413
|
+
(
|
|
414
|
+
DoubleMapper(),
|
|
415
|
+
["x"],
|
|
416
|
+
{"double": int},
|
|
417
|
+
"7994436106fef0486b04078b02ee437be3aa73ade2d139fb8c020e2199515e26",
|
|
418
|
+
),
|
|
397
419
|
],
|
|
398
420
|
)
|
|
399
421
|
def test_udf_mapper_hash(
|
|
@@ -428,6 +450,12 @@ def test_udf_mapper_hash(
|
|
|
428
450
|
{"x": CustomFeature},
|
|
429
451
|
"7ff702d242612cbb83cbd1777aa79d2792fb2a341db5ea406cd9fd3f42543b9c",
|
|
430
452
|
),
|
|
453
|
+
(
|
|
454
|
+
TripleGenerator(),
|
|
455
|
+
["x"],
|
|
456
|
+
{"triple": int},
|
|
457
|
+
"02b4c6bf98ffa011b7c62f3374f219f21796ece5b001d99e4c2f69edf0a94f4a",
|
|
458
|
+
),
|
|
431
459
|
],
|
|
432
460
|
)
|
|
433
461
|
def test_udf_generator_hash(
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|