datachain 0.34.2__tar.gz → 0.34.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.34.2 → datachain-0.34.3}/PKG-INFO +1 -1
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/data_storage/sqlite.py +11 -2
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/data_storage/warehouse.py +40 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/dc/datachain.py +7 -2
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain.egg-info/PKG-INFO +1 -1
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_datachain_merge.py +44 -0
- {datachain-0.34.2 → datachain-0.34.3}/.cruft.json +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/.gitattributes +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/.github/codecov.yaml +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/.github/dependabot.yml +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/.github/workflows/release.yml +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/.github/workflows/tests.yml +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/.gitignore +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/.pre-commit-config.yaml +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/LICENSE +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/README.rst +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/api_hooks.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/assets/datachain.svg +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/assets/webhook_dialog.png +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/assets/webhook_list.png +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/commands/auth/login.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/commands/auth/logout.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/commands/auth/team.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/commands/auth/token.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/commands/index.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/commands/job/cancel.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/commands/job/clusters.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/commands/job/logs.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/commands/job/ls.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/commands/job/run.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/contributing.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/examples.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/guide/db_migrations.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/guide/delta.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/guide/env.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/guide/index.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/guide/namespaces.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/guide/processing.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/guide/remotes.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/guide/retry.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/index.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/overrides/main.html +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/quick-start.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/references/data-types/file.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/references/data-types/index.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/references/data-types/pose.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/references/data-types/segment.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/references/datachain.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/references/func.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/references/functions/aggregate.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/references/functions/array.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/references/functions/conditional.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/references/functions/numeric.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/references/functions/path.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/references/functions/random.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/references/functions/string.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/references/functions/window.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/references/index.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/references/toolkit.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/references/torch.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/references/udf.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/studio/api/.gitkeep +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/studio/webhooks.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/templates/main.dot +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/templates/operation.dot +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/templates/responses.def +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/docs/tutorials.md +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/examples/get_started/nested_datamodel.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/examples/multimodal/audio-to-text.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/examples/multimodal/wds.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/mkdocs.yml +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/noxfile.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/pyproject.toml +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/setup.cfg +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/__main__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/asyn.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/cache.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/checkpoint.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/cli/utils.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/client/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/client/azure.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/client/gcs.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/client/hf.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/client/http.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/client/local.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/client/s3.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/config.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/dataset.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/delta.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/error.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/fs/reference.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/fs/utils.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/func/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/func/array.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/func/base.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/func/conditional.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/func/func.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/func/numeric.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/func/path.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/func/random.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/func/string.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/func/window.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/hash_utils.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/job.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/audio.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/clip.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/dc/storage_pattern.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/file.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/hf.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/image.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/listing.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/namespaces.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/projects.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/settings.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/tar.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/text.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/udf.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/utils.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/video.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/listing.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/model/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/model/bbox.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/model/pose.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/model/segment.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/model/utils.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/namespace.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/node.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/plugins.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/progress.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/project.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/py.typed +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/query/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/query/batch.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/query/dataset.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/query/metrics.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/query/params.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/query/queue.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/query/schema.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/query/session.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/query/udf.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/query/utils.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/remote/studio.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/script_meta.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/semver.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/sql/postgresql_dialect.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/sql/postgresql_types.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/sql/types.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/sql/utils.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/studio.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/telemetry.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain/utils.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain.egg-info/SOURCES.txt +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/conftest.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/data.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/examples/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/examples/test_examples.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/examples/wds_data.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/data/lena.jpg +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/functions/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/functions/test_array.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/functions/test_path.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/functions/test_random.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/functions/test_string.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/model/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_audio.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_batching.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_catalog.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_client.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_data_storage.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_datachain.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_datasets.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_delta.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_file.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_hf.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_image.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_listing.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_ls.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_metastore.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_metrics.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_mutate.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_pull.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_pytorch.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_query.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_read_database.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_read_dataset_remote.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_retry.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_session.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_storage_pattern.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_studio_datetime_parsing.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_to_database.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_toolkit.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_video.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/func/test_warehouse.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/scripts/feature_class.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/test_atomicity.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/test_cli_e2e.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/test_cli_studio.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/test_import_time.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/test_query_e2e.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/test_telemetry.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_audio.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_checkpoints.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_datachain.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_namespace.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_partition_by.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_settings.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_storage_pattern.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/model/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_asyn.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_cache.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_catalog.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_cli_datasets.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_client.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_client_http.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_config.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_datachain_hash.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_dataset.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_func.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_hash_utils.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_listing.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_metastore.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_query.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_query_params.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_query_steps_hash.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_semver.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_serializer.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_session.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_utils.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.34.2 → datachain-0.34.3}/tests/utils.py +0 -0
|
@@ -875,8 +875,17 @@ class SQLiteWarehouse(AbstractWarehouse):
|
|
|
875
875
|
if isinstance(c, BinaryExpression):
|
|
876
876
|
right_left_join = add_left_rows_filter(c)
|
|
877
877
|
|
|
878
|
-
|
|
879
|
-
|
|
878
|
+
# Use CTE instead of subquery to force SQLite to materialize the result
|
|
879
|
+
# This breaks deep nesting and prevents parser stack overflow.
|
|
880
|
+
union_cte = sqlalchemy.union(left_right_join, right_left_join).cte()
|
|
881
|
+
|
|
882
|
+
return self._regenerate_system_columns(union_cte)
|
|
883
|
+
|
|
884
|
+
def _system_row_number_expr(self):
|
|
885
|
+
return func.row_number().over()
|
|
886
|
+
|
|
887
|
+
def _system_random_expr(self):
|
|
888
|
+
return self._system_row_number_expr() * 1103515245 + 12345
|
|
880
889
|
|
|
881
890
|
def create_pre_udf_table(self, query: "Select") -> "Table":
|
|
882
891
|
"""
|
|
@@ -246,6 +246,44 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
246
246
|
break # no more results
|
|
247
247
|
offset += page_size
|
|
248
248
|
|
|
249
|
+
def _regenerate_system_columns(self, selectable):
|
|
250
|
+
"""Return a SELECT that regenerates sys__id and sys__rand deterministically."""
|
|
251
|
+
|
|
252
|
+
base = selectable.subquery() if hasattr(selectable, "subquery") else selectable
|
|
253
|
+
|
|
254
|
+
system_types: dict[str, sa.types.TypeEngine] = {
|
|
255
|
+
sys_col.name: sys_col.type
|
|
256
|
+
for sys_col in self.schema.dataset_row_cls.sys_columns()
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
result_columns = []
|
|
260
|
+
for col in base.c:
|
|
261
|
+
if col.name == "sys__id":
|
|
262
|
+
expr = self._system_row_number_expr()
|
|
263
|
+
expr = sa.cast(expr, system_types["sys__id"])
|
|
264
|
+
result_columns.append(expr.label("sys__id"))
|
|
265
|
+
elif col.name == "sys__rand":
|
|
266
|
+
expr = self._system_random_expr()
|
|
267
|
+
expr = sa.cast(expr, system_types["sys__rand"])
|
|
268
|
+
result_columns.append(expr.label("sys__rand"))
|
|
269
|
+
else:
|
|
270
|
+
result_columns.append(col)
|
|
271
|
+
|
|
272
|
+
# Wrap in subquery to materialize window functions, then wrap again in SELECT
|
|
273
|
+
# This ensures window functions are computed before INSERT...FROM SELECT
|
|
274
|
+
inner = sa.select(*result_columns).select_from(base).subquery()
|
|
275
|
+
return sa.select(*inner.c).select_from(inner)
|
|
276
|
+
|
|
277
|
+
def _system_row_number_expr(self):
|
|
278
|
+
"""Return an expression that produces deterministic row numbers."""
|
|
279
|
+
|
|
280
|
+
raise NotImplementedError
|
|
281
|
+
|
|
282
|
+
def _system_random_expr(self):
|
|
283
|
+
"""Return an expression that produces deterministic random values."""
|
|
284
|
+
|
|
285
|
+
raise NotImplementedError
|
|
286
|
+
|
|
249
287
|
#
|
|
250
288
|
# Table Name Internal Functions
|
|
251
289
|
#
|
|
@@ -923,6 +961,8 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
923
961
|
right: "_FromClauseArgument",
|
|
924
962
|
onclause: "_OnClauseArgument",
|
|
925
963
|
inner: bool = True,
|
|
964
|
+
full: bool = False,
|
|
965
|
+
columns=None,
|
|
926
966
|
) -> sa.Select:
|
|
927
967
|
"""
|
|
928
968
|
Join two tables together.
|
|
@@ -1701,7 +1701,11 @@ class DataChain:
|
|
|
1701
1701
|
)
|
|
1702
1702
|
|
|
1703
1703
|
query = self._query.join(
|
|
1704
|
-
right_ds._query,
|
|
1704
|
+
right_ds._query,
|
|
1705
|
+
sqlalchemy.and_(*ops),
|
|
1706
|
+
inner,
|
|
1707
|
+
full,
|
|
1708
|
+
rname + "{name}",
|
|
1705
1709
|
)
|
|
1706
1710
|
query.feature_schema = None
|
|
1707
1711
|
ds = self._evolve(query=query)
|
|
@@ -1989,7 +1993,8 @@ class DataChain:
|
|
|
1989
1993
|
results = self.results(include_hidden=include_hidden)
|
|
1990
1994
|
if as_object:
|
|
1991
1995
|
df = pd.DataFrame(results, columns=columns, dtype=object)
|
|
1992
|
-
|
|
1996
|
+
df.where(pd.notna(df), None, inplace=True)
|
|
1997
|
+
return df
|
|
1993
1998
|
return pd.DataFrame.from_records(results, columns=columns)
|
|
1994
1999
|
|
|
1995
2000
|
def show(
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import pytest
|
|
2
2
|
|
|
3
3
|
import datachain as dc
|
|
4
|
+
from datachain import File
|
|
4
5
|
from datachain.sql.types import Int
|
|
5
6
|
|
|
6
7
|
|
|
@@ -99,3 +100,46 @@ def test_merge_multiple(cloud_test_catalog, inner1, inner2, inner3):
|
|
|
99
100
|
("dogs/dog3", 1, signal_default_value),
|
|
100
101
|
("dogs/others/dog4", 1, signal_default_value),
|
|
101
102
|
]
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def test_full_outer_join_preserves_all_rows(test_session):
|
|
106
|
+
"""Test that full outer join correctly saves all rows including right-only rows.
|
|
107
|
+
This test verifies the fix for the NULL sys__id bug where right-only rows
|
|
108
|
+
from a full outer join had NULL sys__id values
|
|
109
|
+
"""
|
|
110
|
+
# Create two datasets with no overlapping file paths
|
|
111
|
+
ds1 = dc.read_values(
|
|
112
|
+
id=[1, 2, 3],
|
|
113
|
+
file=[File(path=str(i)) for i in [1, 2, 3]],
|
|
114
|
+
session=test_session,
|
|
115
|
+
).persist()
|
|
116
|
+
|
|
117
|
+
ds2 = dc.read_values(
|
|
118
|
+
id=[5, 6, 7],
|
|
119
|
+
file=[File(path=str(i)) for i in [5, 6, 7]],
|
|
120
|
+
session=test_session,
|
|
121
|
+
).persist()
|
|
122
|
+
|
|
123
|
+
merged = ds1.merge(ds2, on="file.path", full=True)
|
|
124
|
+
|
|
125
|
+
# Use internal method to get all records including sys columns
|
|
126
|
+
records = merged._query.to_db_records()
|
|
127
|
+
|
|
128
|
+
assert len(records) == 6
|
|
129
|
+
|
|
130
|
+
# Extract sys__id and sys__rand from records
|
|
131
|
+
sys_ids = [r["sys__id"] for r in records]
|
|
132
|
+
sys_rands = [r["sys__rand"] for r in records]
|
|
133
|
+
|
|
134
|
+
# All sys__id values should be non-NULL and unique
|
|
135
|
+
assert all(sid is not None for sid in sys_ids)
|
|
136
|
+
assert len(set(sys_ids)) == 6
|
|
137
|
+
assert all(rand is not None for rand in sys_rands)
|
|
138
|
+
|
|
139
|
+
count_before = merged.count()
|
|
140
|
+
|
|
141
|
+
# Save and verify all rows are persisted
|
|
142
|
+
merged.save("test_merge")
|
|
143
|
+
count_after = dc.read_dataset("test_merge", session=test_session).count()
|
|
144
|
+
|
|
145
|
+
assert count_before == count_after == 6
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|