datachain 0.36.0__tar.gz → 0.36.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.36.0 → datachain-0.36.1}/PKG-INFO +3 -2
- {datachain-0.36.0 → datachain-0.36.1}/pyproject.toml +8 -3
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/data_storage/schema.py +1 -2
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/data_storage/sqlite.py +2 -9
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/data_storage/warehouse.py +50 -33
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/diff/__init__.py +2 -6
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/audio.py +54 -53
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/dc/datachain.py +13 -14
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/query/dataset.py +21 -26
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/query/dispatch.py +64 -42
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/query/queue.py +2 -1
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain.egg-info/PKG-INFO +3 -2
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain.egg-info/requires.txt +2 -1
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_datachain.py +1 -1
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_datachain_merge.py +7 -18
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_retry.py +0 -1
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_udf.py +116 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_audio.py +31 -37
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_datachain.py +15 -13
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_datachain_hash.py +1 -1
- {datachain-0.36.0 → datachain-0.36.1}/.cruft.json +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/.gitattributes +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/.github/codecov.yaml +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/.github/dependabot.yml +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/.github/workflows/release.yml +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/.github/workflows/tests.yml +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/.gitignore +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/.pre-commit-config.yaml +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/LICENSE +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/README.rst +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/api_hooks.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/assets/datachain.svg +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/assets/webhook_dialog.png +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/assets/webhook_list.png +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/commands/auth/login.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/commands/auth/logout.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/commands/auth/team.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/commands/auth/token.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/commands/index.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/commands/job/cancel.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/commands/job/clusters.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/commands/job/logs.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/commands/job/ls.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/commands/job/run.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/contributing.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/examples.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/guide/db_migrations.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/guide/delta.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/guide/env.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/guide/index.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/guide/namespaces.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/guide/processing.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/guide/remotes.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/guide/retry.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/index.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/overrides/main.html +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/quick-start.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/references/data-types/file.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/references/data-types/index.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/references/data-types/pose.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/references/data-types/segment.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/references/datachain.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/references/func.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/references/functions/aggregate.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/references/functions/array.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/references/functions/conditional.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/references/functions/numeric.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/references/functions/path.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/references/functions/random.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/references/functions/string.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/references/functions/window.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/references/index.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/references/toolkit.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/references/torch.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/references/udf.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/studio/api/.gitkeep +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/studio/webhooks.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/templates/main.dot +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/templates/operation.dot +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/templates/responses.def +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/docs/tutorials.md +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/examples/get_started/nested_datamodel.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/examples/multimodal/audio-to-text.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/examples/multimodal/wds.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/mkdocs.yml +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/noxfile.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/setup.cfg +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/__main__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/asyn.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/cache.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/catalog/dependency.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/checkpoint.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/cli/utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/client/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/client/azure.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/client/gcs.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/client/hf.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/client/http.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/client/local.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/client/s3.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/config.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/dataset.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/delta.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/error.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/fs/reference.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/fs/utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/func/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/func/array.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/func/base.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/func/conditional.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/func/func.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/func/numeric.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/func/path.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/func/random.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/func/string.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/func/window.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/hash_utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/job.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/clip.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/dc/storage_pattern.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/file.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/hf.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/image.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/listing.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/namespaces.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/projects.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/settings.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/tar.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/text.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/udf.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/video.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/listing.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/model/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/model/bbox.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/model/pose.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/model/segment.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/model/utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/namespace.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/node.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/plugins.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/progress.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/project.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/py.typed +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/query/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/query/batch.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/query/metrics.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/query/params.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/query/schema.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/query/session.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/query/udf.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/remote/studio.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/script_meta.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/semver.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/sql/postgresql_dialect.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/sql/postgresql_types.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/sql/types.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/sql/utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/studio.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/telemetry.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain/utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain.egg-info/SOURCES.txt +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/conftest.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/data.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/examples/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/examples/test_examples.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/examples/wds_data.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/data/lena.jpg +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/functions/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/functions/test_array.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/functions/test_path.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/functions/test_random.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/functions/test_string.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/model/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_audio.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_catalog.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_client.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_data_storage.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_datasets.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_delta.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_file.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_hf.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_image.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_listing.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_ls.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_metastore.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_metrics.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_mutate.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_pull.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_pytorch.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_query.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_read_database.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_read_dataset_remote.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_session.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_storage_pattern.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_studio_datetime_parsing.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_to_database.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_toolkit.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_union.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_video.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/func/test_warehouse.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/scripts/feature_class.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/test_atomicity.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/test_cli_e2e.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/test_cli_studio.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/test_import_time.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/test_query_e2e.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/test_telemetry.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_checkpoints.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_namespace.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_partition_by.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_settings.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_storage_pattern.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/model/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_asyn.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_batching.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_cache.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_catalog.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_cli_datasets.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_client.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_client_http.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_config.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_dataset.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_func.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_hash_utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_listing.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_metastore.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_query.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_query_params.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_query_steps_hash.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_semver.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_serializer.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_session.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.36.0 → datachain-0.36.1}/tests/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.36.
|
|
3
|
+
Version: 0.36.1
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -64,7 +64,6 @@ Requires-Dist: torch>=2.1.0; extra == "torch"
|
|
|
64
64
|
Requires-Dist: torchvision; extra == "torch"
|
|
65
65
|
Requires-Dist: transformers>=4.36.0; extra == "torch"
|
|
66
66
|
Provides-Extra: audio
|
|
67
|
-
Requires-Dist: torchaudio; extra == "audio"
|
|
68
67
|
Requires-Dist: soundfile; extra == "audio"
|
|
69
68
|
Provides-Extra: remote
|
|
70
69
|
Requires-Dist: lz4; extra == "remote"
|
|
@@ -76,6 +75,7 @@ Requires-Dist: numba>=0.60.0; extra == "hf"
|
|
|
76
75
|
Requires-Dist: datasets[vision]>=4.0.0; extra == "hf"
|
|
77
76
|
Requires-Dist: datasets[audio]>=4.0.0; (sys_platform == "linux" or sys_platform == "darwin") and extra == "hf"
|
|
78
77
|
Requires-Dist: fsspec>=2024.12.0; extra == "hf"
|
|
78
|
+
Requires-Dist: torch<2.9.0; extra == "hf"
|
|
79
79
|
Provides-Extra: video
|
|
80
80
|
Requires-Dist: ffmpeg-python; extra == "video"
|
|
81
81
|
Requires-Dist: imageio[ffmpeg,pyav]>=2.37.0; extra == "video"
|
|
@@ -117,6 +117,7 @@ Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
|
|
|
117
117
|
Requires-Dist: ultralytics; extra == "examples"
|
|
118
118
|
Requires-Dist: open_clip_torch; extra == "examples"
|
|
119
119
|
Requires-Dist: openai; extra == "examples"
|
|
120
|
+
Requires-Dist: torchaudio<2.9.0; extra == "examples"
|
|
120
121
|
Dynamic: license-file
|
|
121
122
|
|
|
122
123
|
================
|
|
@@ -73,7 +73,6 @@ torch = [
|
|
|
73
73
|
"transformers>=4.36.0"
|
|
74
74
|
]
|
|
75
75
|
audio = [
|
|
76
|
-
"torchaudio",
|
|
77
76
|
"soundfile"
|
|
78
77
|
]
|
|
79
78
|
remote = [
|
|
@@ -88,7 +87,11 @@ hf = [
|
|
|
88
87
|
"datasets[vision]>=4.0.0",
|
|
89
88
|
# https://github.com/pytorch/torchcodec/issues/640
|
|
90
89
|
"datasets[audio]>=4.0.0 ; (sys_platform == 'linux' or sys_platform == 'darwin')",
|
|
91
|
-
"fsspec>=2024.12.0"
|
|
90
|
+
"fsspec>=2024.12.0",
|
|
91
|
+
# Until datasets solve the issue, run test_hf_audio test to see if this can be removed
|
|
92
|
+
# https://github.com/meta-pytorch/torchcodec/issues/912
|
|
93
|
+
# https://github.com/huggingface/transformers/pull/41610
|
|
94
|
+
"torch<2.9.0"
|
|
92
95
|
]
|
|
93
96
|
video = [
|
|
94
97
|
"ffmpeg-python",
|
|
@@ -134,7 +137,9 @@ examples = [
|
|
|
134
137
|
"huggingface_hub[hf_transfer]",
|
|
135
138
|
"ultralytics",
|
|
136
139
|
"open_clip_torch",
|
|
137
|
-
"openai"
|
|
140
|
+
"openai",
|
|
141
|
+
# Transformers still require it
|
|
142
|
+
"torchaudio<2.9.0"
|
|
138
143
|
]
|
|
139
144
|
|
|
140
145
|
[project.urls]
|
|
@@ -11,7 +11,6 @@ from datachain.sql.types import (
|
|
|
11
11
|
JSON,
|
|
12
12
|
Boolean,
|
|
13
13
|
DateTime,
|
|
14
|
-
Int,
|
|
15
14
|
Int64,
|
|
16
15
|
SQLType,
|
|
17
16
|
String,
|
|
@@ -269,7 +268,7 @@ class DataTable:
|
|
|
269
268
|
@classmethod
|
|
270
269
|
def sys_columns(cls):
|
|
271
270
|
return [
|
|
272
|
-
sa.Column("sys__id",
|
|
271
|
+
sa.Column("sys__id", UInt64, primary_key=True),
|
|
273
272
|
sa.Column(
|
|
274
273
|
"sys__rand", UInt64, nullable=False, server_default=f.abs(f.random())
|
|
275
274
|
),
|
|
@@ -868,11 +868,8 @@ class SQLiteWarehouse(AbstractWarehouse):
|
|
|
868
868
|
if isinstance(c, BinaryExpression):
|
|
869
869
|
right_left_join = add_left_rows_filter(c)
|
|
870
870
|
|
|
871
|
-
# Use CTE instead of subquery to force SQLite to materialize the result
|
|
872
|
-
# This breaks deep nesting and prevents parser stack overflow.
|
|
873
871
|
union_cte = sqlalchemy.union(left_right_join, right_left_join).cte()
|
|
874
|
-
|
|
875
|
-
return self._regenerate_system_columns(union_cte)
|
|
872
|
+
return sqlalchemy.select(*union_cte.c).select_from(union_cte)
|
|
876
873
|
|
|
877
874
|
def _system_row_number_expr(self):
|
|
878
875
|
return func.row_number().over()
|
|
@@ -884,11 +881,7 @@ class SQLiteWarehouse(AbstractWarehouse):
|
|
|
884
881
|
"""
|
|
885
882
|
Create a temporary table from a query for use in a UDF.
|
|
886
883
|
"""
|
|
887
|
-
columns = [
|
|
888
|
-
sqlalchemy.Column(c.name, c.type)
|
|
889
|
-
for c in query.selected_columns
|
|
890
|
-
if c.name != "sys__id"
|
|
891
|
-
]
|
|
884
|
+
columns = [sqlalchemy.Column(c.name, c.type) for c in query.selected_columns]
|
|
892
885
|
table = self.create_udf_table(columns)
|
|
893
886
|
|
|
894
887
|
with tqdm(desc="Preparing", unit=" rows", leave=False) as pbar:
|
|
@@ -5,7 +5,7 @@ import random
|
|
|
5
5
|
import string
|
|
6
6
|
from abc import ABC, abstractmethod
|
|
7
7
|
from collections.abc import Callable, Generator, Iterable, Iterator, Sequence
|
|
8
|
-
from typing import TYPE_CHECKING, Any, Union
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Union, cast
|
|
9
9
|
from urllib.parse import urlparse
|
|
10
10
|
|
|
11
11
|
import attrs
|
|
@@ -23,7 +23,7 @@ from datachain.node import DirType, DirTypeGroup, Node, NodeWithPath, get_path
|
|
|
23
23
|
from datachain.query.batch import RowsOutput
|
|
24
24
|
from datachain.query.schema import ColumnMeta
|
|
25
25
|
from datachain.sql.functions import path as pathfunc
|
|
26
|
-
from datachain.sql.types import
|
|
26
|
+
from datachain.sql.types import SQLType
|
|
27
27
|
from datachain.utils import sql_escape_like
|
|
28
28
|
|
|
29
29
|
if TYPE_CHECKING:
|
|
@@ -32,6 +32,7 @@ if TYPE_CHECKING:
|
|
|
32
32
|
_FromClauseArgument,
|
|
33
33
|
_OnClauseArgument,
|
|
34
34
|
)
|
|
35
|
+
from sqlalchemy.sql.selectable import FromClause
|
|
35
36
|
from sqlalchemy.types import TypeEngine
|
|
36
37
|
|
|
37
38
|
from datachain.data_storage import schema
|
|
@@ -248,45 +249,56 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
248
249
|
|
|
249
250
|
def _regenerate_system_columns(
|
|
250
251
|
self,
|
|
251
|
-
selectable: sa.Select
|
|
252
|
+
selectable: sa.Select,
|
|
252
253
|
keep_existing_columns: bool = False,
|
|
254
|
+
regenerate_columns: Iterable[str] | None = None,
|
|
253
255
|
) -> sa.Select:
|
|
254
256
|
"""
|
|
255
|
-
Return a SELECT that regenerates
|
|
257
|
+
Return a SELECT that regenerates system columns deterministically.
|
|
256
258
|
|
|
257
|
-
If keep_existing_columns is True, existing
|
|
258
|
-
|
|
259
|
-
"""
|
|
260
|
-
base = selectable.subquery() if hasattr(selectable, "subquery") else selectable
|
|
261
|
-
|
|
262
|
-
result_columns: dict[str, sa.ColumnElement] = {}
|
|
263
|
-
for col in base.c:
|
|
264
|
-
if col.name in result_columns:
|
|
265
|
-
raise ValueError(f"Duplicate column name {col.name} in SELECT")
|
|
266
|
-
if col.name in ("sys__id", "sys__rand"):
|
|
267
|
-
if keep_existing_columns:
|
|
268
|
-
result_columns[col.name] = col
|
|
269
|
-
else:
|
|
270
|
-
result_columns[col.name] = col
|
|
259
|
+
If keep_existing_columns is True, existing system columns will be kept as-is
|
|
260
|
+
even when they are listed in ``regenerate_columns``.
|
|
271
261
|
|
|
272
|
-
|
|
262
|
+
Args:
|
|
263
|
+
selectable: Base SELECT
|
|
264
|
+
keep_existing_columns: When True, reuse existing system columns even if
|
|
265
|
+
they are part of the regeneration set.
|
|
266
|
+
regenerate_columns: Names of system columns to regenerate. Defaults to
|
|
267
|
+
{"sys__id", "sys__rand"}. Columns not listed are left untouched.
|
|
268
|
+
"""
|
|
269
|
+
system_columns = {
|
|
273
270
|
sys_col.name: sys_col.type
|
|
274
271
|
for sys_col in self.schema.dataset_row_cls.sys_columns()
|
|
275
272
|
}
|
|
273
|
+
regenerate = set(regenerate_columns or system_columns)
|
|
274
|
+
generators = {
|
|
275
|
+
"sys__id": self._system_row_number_expr,
|
|
276
|
+
"sys__rand": self._system_random_expr,
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
base = cast("FromClause", selectable.subquery())
|
|
280
|
+
|
|
281
|
+
def build(name: str) -> sa.ColumnElement:
|
|
282
|
+
expr = generators[name]()
|
|
283
|
+
return sa.cast(expr, system_columns[name]).label(name)
|
|
284
|
+
|
|
285
|
+
columns: list[sa.ColumnElement] = []
|
|
286
|
+
present: set[str] = set()
|
|
287
|
+
changed = False
|
|
288
|
+
|
|
289
|
+
for col in base.c:
|
|
290
|
+
present.add(col.name)
|
|
291
|
+
regen = col.name in regenerate and not keep_existing_columns
|
|
292
|
+
columns.append(build(col.name) if regen else col)
|
|
293
|
+
changed |= regen
|
|
294
|
+
|
|
295
|
+
for name in regenerate - present:
|
|
296
|
+
columns.append(build(name))
|
|
297
|
+
changed = True
|
|
298
|
+
|
|
299
|
+
if not changed:
|
|
300
|
+
return selectable
|
|
276
301
|
|
|
277
|
-
# Add missing system columns if needed
|
|
278
|
-
if "sys__id" not in result_columns:
|
|
279
|
-
expr = self._system_row_number_expr()
|
|
280
|
-
expr = sa.cast(expr, system_types["sys__id"])
|
|
281
|
-
result_columns["sys__id"] = expr.label("sys__id")
|
|
282
|
-
if "sys__rand" not in result_columns:
|
|
283
|
-
expr = self._system_random_expr()
|
|
284
|
-
expr = sa.cast(expr, system_types["sys__rand"])
|
|
285
|
-
result_columns["sys__rand"] = expr.label("sys__rand")
|
|
286
|
-
|
|
287
|
-
# Wrap in subquery to materialize window functions, then wrap again in SELECT
|
|
288
|
-
# This ensures window functions are computed before INSERT...FROM SELECT
|
|
289
|
-
columns = list(result_columns.values())
|
|
290
302
|
inner = sa.select(*columns).select_from(base).subquery()
|
|
291
303
|
return sa.select(*inner.c).select_from(inner)
|
|
292
304
|
|
|
@@ -950,10 +962,15 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
950
962
|
SQLite TEMPORARY tables cannot be directly used as they are process-specific,
|
|
951
963
|
and UDFs are run in other processes when run in parallel.
|
|
952
964
|
"""
|
|
965
|
+
columns = [
|
|
966
|
+
c
|
|
967
|
+
for c in columns
|
|
968
|
+
if c.name not in [col.name for col in self.dataset_row_cls.sys_columns()]
|
|
969
|
+
]
|
|
953
970
|
tbl = sa.Table(
|
|
954
971
|
name or self.udf_table_name(),
|
|
955
972
|
sa.MetaData(),
|
|
956
|
-
|
|
973
|
+
*self.dataset_row_cls.sys_columns(),
|
|
957
974
|
*columns,
|
|
958
975
|
)
|
|
959
976
|
self.db.create_table(tbl, if_not_exists=True)
|
|
@@ -24,7 +24,7 @@ class CompareStatus(str, Enum):
|
|
|
24
24
|
SAME = "S"
|
|
25
25
|
|
|
26
26
|
|
|
27
|
-
def _compare( # noqa: C901
|
|
27
|
+
def _compare( # noqa: C901
|
|
28
28
|
left: "DataChain",
|
|
29
29
|
right: "DataChain",
|
|
30
30
|
on: str | Sequence[str],
|
|
@@ -151,11 +151,7 @@ def _compare( # noqa: C901, PLR0912
|
|
|
151
151
|
if status_col:
|
|
152
152
|
cols_select.append(diff_col)
|
|
153
153
|
|
|
154
|
-
|
|
155
|
-
# TODO workaround when sys signal is not available in diff
|
|
156
|
-
dc_diff = dc_diff.settings(sys=True).select(*cols_select).settings(sys=False)
|
|
157
|
-
else:
|
|
158
|
-
dc_diff = dc_diff.select(*cols_select)
|
|
154
|
+
dc_diff = dc_diff.select(*cols_select)
|
|
159
155
|
|
|
160
156
|
# final schema is schema from the left chain with status column added if needed
|
|
161
157
|
dc_diff.signals_schema = (
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import posixpath
|
|
2
|
+
import re
|
|
2
3
|
from typing import TYPE_CHECKING
|
|
3
4
|
|
|
4
5
|
from datachain.lib.file import FileError
|
|
@@ -9,7 +10,7 @@ if TYPE_CHECKING:
|
|
|
9
10
|
from datachain.lib.file import Audio, AudioFile, File
|
|
10
11
|
|
|
11
12
|
try:
|
|
12
|
-
import
|
|
13
|
+
import soundfile as sf
|
|
13
14
|
except ImportError as exc:
|
|
14
15
|
raise ImportError(
|
|
15
16
|
"Missing dependencies for processing audio.\n"
|
|
@@ -26,18 +27,25 @@ def audio_info(file: "File | AudioFile") -> "Audio":
|
|
|
26
27
|
|
|
27
28
|
try:
|
|
28
29
|
with file.open() as f:
|
|
29
|
-
info =
|
|
30
|
+
info = sf.info(f)
|
|
31
|
+
|
|
32
|
+
sample_rate = int(info.samplerate)
|
|
33
|
+
channels = int(info.channels)
|
|
34
|
+
frames = int(info.frames)
|
|
35
|
+
duration = float(info.duration)
|
|
30
36
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
37
|
+
# soundfile provides format and subtype
|
|
38
|
+
if info.format:
|
|
39
|
+
format_name = info.format.lower()
|
|
40
|
+
else:
|
|
41
|
+
format_name = file.get_file_ext().lower()
|
|
35
42
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
43
|
+
if not format_name:
|
|
44
|
+
format_name = "unknown"
|
|
45
|
+
codec_name = info.subtype if info.subtype else ""
|
|
39
46
|
|
|
40
|
-
|
|
47
|
+
# Calculate bit rate from subtype
|
|
48
|
+
bits_per_sample = _get_bits_per_sample(info.subtype)
|
|
41
49
|
bit_rate = (
|
|
42
50
|
bits_per_sample * sample_rate * channels if bits_per_sample > 0 else -1
|
|
43
51
|
)
|
|
@@ -58,44 +66,39 @@ def audio_info(file: "File | AudioFile") -> "Audio":
|
|
|
58
66
|
)
|
|
59
67
|
|
|
60
68
|
|
|
61
|
-
def
|
|
69
|
+
def _get_bits_per_sample(subtype: str) -> int:
|
|
62
70
|
"""
|
|
63
|
-
Map
|
|
71
|
+
Map soundfile subtype to bits per sample.
|
|
64
72
|
|
|
65
73
|
Args:
|
|
66
|
-
|
|
67
|
-
file_ext: The file extension as a fallback
|
|
74
|
+
subtype: The subtype string from soundfile
|
|
68
75
|
|
|
69
76
|
Returns:
|
|
70
|
-
|
|
77
|
+
Bits per sample, or 0 if unknown
|
|
71
78
|
"""
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
"
|
|
78
|
-
"
|
|
79
|
-
"
|
|
80
|
-
"
|
|
79
|
+
if not subtype:
|
|
80
|
+
return 0
|
|
81
|
+
|
|
82
|
+
# Common PCM and floating-point subtypes
|
|
83
|
+
pcm_bits = {
|
|
84
|
+
"PCM_16": 16,
|
|
85
|
+
"PCM_24": 24,
|
|
86
|
+
"PCM_32": 32,
|
|
87
|
+
"PCM_S8": 8,
|
|
88
|
+
"PCM_U8": 8,
|
|
89
|
+
"FLOAT": 32,
|
|
90
|
+
"DOUBLE": 64,
|
|
81
91
|
}
|
|
82
92
|
|
|
83
|
-
if
|
|
84
|
-
return
|
|
93
|
+
if subtype in pcm_bits:
|
|
94
|
+
return pcm_bits[subtype]
|
|
85
95
|
|
|
86
|
-
#
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
"wav": "wav",
|
|
91
|
-
"aiff": "aiff",
|
|
92
|
-
"au": "au",
|
|
93
|
-
"raw": "raw",
|
|
94
|
-
}
|
|
95
|
-
return pcm_formats.get(file_ext, "wav") # Default to wav for PCM
|
|
96
|
+
# Handle variants such as PCM_S16LE, PCM_F32LE, etc.
|
|
97
|
+
match = re.search(r"PCM_(?:[A-Z]*?)(\d+)", subtype)
|
|
98
|
+
if match:
|
|
99
|
+
return int(match.group(1))
|
|
96
100
|
|
|
97
|
-
|
|
98
|
-
return file_ext if file_ext else "unknown"
|
|
101
|
+
return 0
|
|
99
102
|
|
|
100
103
|
|
|
101
104
|
def audio_to_np(
|
|
@@ -114,27 +117,27 @@ def audio_to_np(
|
|
|
114
117
|
|
|
115
118
|
try:
|
|
116
119
|
with audio.open() as f:
|
|
117
|
-
info =
|
|
118
|
-
sample_rate = info.
|
|
120
|
+
info = sf.info(f)
|
|
121
|
+
sample_rate = info.samplerate
|
|
119
122
|
|
|
120
123
|
frame_offset = int(start * sample_rate)
|
|
121
124
|
num_frames = int(duration * sample_rate) if duration is not None else -1
|
|
122
125
|
|
|
123
126
|
# Reset file pointer to the beginning
|
|
124
|
-
# This is important to ensure we read from the correct position later
|
|
125
127
|
f.seek(0)
|
|
126
128
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
+
# Read audio data with offset and frame count
|
|
130
|
+
audio_np, sr = sf.read(
|
|
131
|
+
f,
|
|
132
|
+
start=frame_offset,
|
|
133
|
+
frames=num_frames,
|
|
134
|
+
always_2d=False,
|
|
135
|
+
dtype="float32",
|
|
129
136
|
)
|
|
130
137
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
audio_np = audio_np.T
|
|
135
|
-
else:
|
|
136
|
-
audio_np = audio_np.squeeze()
|
|
137
|
-
|
|
138
|
+
# soundfile returns shape (frames,) for mono or
|
|
139
|
+
# (frames, channels) for multi-channel
|
|
140
|
+
# We keep this format as it matches expected output
|
|
138
141
|
return audio_np, int(sr)
|
|
139
142
|
except Exception as exc:
|
|
140
143
|
raise FileError(
|
|
@@ -152,11 +155,9 @@ def audio_to_bytes(
|
|
|
152
155
|
|
|
153
156
|
If duration is None, converts from start to end of file.
|
|
154
157
|
If start is 0 and duration is None, converts entire file."""
|
|
155
|
-
y, sr = audio_to_np(audio, start, duration)
|
|
156
|
-
|
|
157
158
|
import io
|
|
158
159
|
|
|
159
|
-
|
|
160
|
+
y, sr = audio_to_np(audio, start, duration)
|
|
160
161
|
|
|
161
162
|
buffer = io.BytesIO()
|
|
162
163
|
sf.write(buffer, y, sr, format=format)
|
|
@@ -856,7 +856,9 @@ class DataChain:
|
|
|
856
856
|
udf_obj.to_udf_wrapper(self._settings.batch_size),
|
|
857
857
|
**self._settings.to_dict(),
|
|
858
858
|
),
|
|
859
|
-
signal_schema=
|
|
859
|
+
signal_schema=SignalSchema({"sys": Sys})
|
|
860
|
+
| self.signals_schema
|
|
861
|
+
| udf_obj.output,
|
|
860
862
|
)
|
|
861
863
|
|
|
862
864
|
def gen(
|
|
@@ -894,7 +896,7 @@ class DataChain:
|
|
|
894
896
|
udf_obj.to_udf_wrapper(self._settings.batch_size),
|
|
895
897
|
**self._settings.to_dict(),
|
|
896
898
|
),
|
|
897
|
-
signal_schema=udf_obj.output,
|
|
899
|
+
signal_schema=SignalSchema({"sys": Sys}) | udf_obj.output,
|
|
898
900
|
)
|
|
899
901
|
|
|
900
902
|
@delta_disabled
|
|
@@ -1031,7 +1033,7 @@ class DataChain:
|
|
|
1031
1033
|
partition_by=processed_partition_by,
|
|
1032
1034
|
**self._settings.to_dict(),
|
|
1033
1035
|
),
|
|
1034
|
-
signal_schema=udf_obj.output,
|
|
1036
|
+
signal_schema=SignalSchema({"sys": Sys}) | udf_obj.output,
|
|
1035
1037
|
)
|
|
1036
1038
|
|
|
1037
1039
|
def batch_map(
|
|
@@ -1097,11 +1099,7 @@ class DataChain:
|
|
|
1097
1099
|
sign = UdfSignature.parse(name, signal_map, func, params, output, is_generator)
|
|
1098
1100
|
DataModel.register(list(sign.output_schema.values.values()))
|
|
1099
1101
|
|
|
1100
|
-
|
|
1101
|
-
if self._sys:
|
|
1102
|
-
signals_schema = SignalSchema({"sys": Sys}) | signals_schema
|
|
1103
|
-
|
|
1104
|
-
params_schema = signals_schema.slice(
|
|
1102
|
+
params_schema = self.signals_schema.slice(
|
|
1105
1103
|
sign.params, self._setup, is_batch=is_batch
|
|
1106
1104
|
)
|
|
1107
1105
|
|
|
@@ -1156,11 +1154,9 @@ class DataChain:
|
|
|
1156
1154
|
)
|
|
1157
1155
|
)
|
|
1158
1156
|
|
|
1159
|
-
def select(self, *args: str
|
|
1157
|
+
def select(self, *args: str) -> "Self":
|
|
1160
1158
|
"""Select only a specified set of signals."""
|
|
1161
1159
|
new_schema = self.signals_schema.resolve(*args)
|
|
1162
|
-
if self._sys and _sys:
|
|
1163
|
-
new_schema = SignalSchema({"sys": Sys}) | new_schema
|
|
1164
1160
|
columns = new_schema.db_signals()
|
|
1165
1161
|
return self._evolve(
|
|
1166
1162
|
query=self._query.select(*columns), signal_schema=new_schema
|
|
@@ -1710,9 +1706,11 @@ class DataChain:
|
|
|
1710
1706
|
|
|
1711
1707
|
signals_schema = self.signals_schema.clone_without_sys_signals()
|
|
1712
1708
|
right_signals_schema = right_ds.signals_schema.clone_without_sys_signals()
|
|
1713
|
-
|
|
1714
|
-
|
|
1715
|
-
|
|
1709
|
+
|
|
1710
|
+
ds.signals_schema = signals_schema.merge(right_signals_schema, rname)
|
|
1711
|
+
|
|
1712
|
+
if not full:
|
|
1713
|
+
ds.signals_schema = SignalSchema({"sys": Sys}) | ds.signals_schema
|
|
1716
1714
|
|
|
1717
1715
|
return ds
|
|
1718
1716
|
|
|
@@ -1723,6 +1721,7 @@ class DataChain:
|
|
|
1723
1721
|
Parameters:
|
|
1724
1722
|
other: chain whose rows will be added to `self`.
|
|
1725
1723
|
"""
|
|
1724
|
+
self.signals_schema = self.signals_schema.clone_without_sys_signals()
|
|
1726
1725
|
return self._evolve(query=self._query.union(other._query))
|
|
1727
1726
|
|
|
1728
1727
|
def subtract( # type: ignore[override]
|
|
@@ -438,9 +438,6 @@ class UDFStep(Step, ABC):
|
|
|
438
438
|
"""
|
|
439
439
|
|
|
440
440
|
def populate_udf_table(self, udf_table: "Table", query: Select) -> None:
|
|
441
|
-
if "sys__id" not in query.selected_columns:
|
|
442
|
-
raise RuntimeError("Query must have sys__id column to run UDF")
|
|
443
|
-
|
|
444
441
|
if (rows_total := self.catalog.warehouse.query_count(query)) == 0:
|
|
445
442
|
return
|
|
446
443
|
|
|
@@ -634,12 +631,11 @@ class UDFStep(Step, ABC):
|
|
|
634
631
|
|
|
635
632
|
# Apply partitioning if needed.
|
|
636
633
|
if self.partition_by is not None:
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
634
|
+
_query = query = self.catalog.warehouse._regenerate_system_columns(
|
|
635
|
+
query_generator.select(),
|
|
636
|
+
keep_existing_columns=True,
|
|
637
|
+
regenerate_columns=["sys__id"],
|
|
638
|
+
)
|
|
643
639
|
partition_tbl = self.create_partitions_table(query)
|
|
644
640
|
temp_tables.append(partition_tbl.name)
|
|
645
641
|
query = query.outerjoin(
|
|
@@ -960,28 +956,23 @@ class SQLUnion(Step):
|
|
|
960
956
|
q2 = self.query2.apply_steps().select().subquery()
|
|
961
957
|
temp_tables.extend(self.query2.temp_table_names)
|
|
962
958
|
|
|
963
|
-
columns1
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
sqlalchemy.select(*columns2)
|
|
967
|
-
)
|
|
968
|
-
union_cte = union_select.cte()
|
|
969
|
-
regenerated = self.query1.catalog.warehouse._regenerate_system_columns(
|
|
970
|
-
union_cte
|
|
971
|
-
)
|
|
972
|
-
result_columns = tuple(regenerated.selected_columns)
|
|
959
|
+
columns1 = _drop_system_columns(q1.columns)
|
|
960
|
+
columns2 = _drop_system_columns(q2.columns)
|
|
961
|
+
columns1, columns2 = _order_columns(columns1, columns2)
|
|
973
962
|
|
|
974
963
|
def q(*columns):
|
|
975
|
-
|
|
976
|
-
|
|
964
|
+
selected_names = [c.name for c in columns]
|
|
965
|
+
col1 = [c for c in columns1 if c.name in selected_names]
|
|
966
|
+
col2 = [c for c in columns2 if c.name in selected_names]
|
|
967
|
+
union_query = sqlalchemy.select(*col1).union_all(sqlalchemy.select(*col2))
|
|
977
968
|
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
return
|
|
969
|
+
union_cte = union_query.cte()
|
|
970
|
+
select_cols = [union_cte.c[name] for name in selected_names]
|
|
971
|
+
return sqlalchemy.select(*select_cols)
|
|
981
972
|
|
|
982
973
|
return step_result(
|
|
983
974
|
q,
|
|
984
|
-
|
|
975
|
+
columns1,
|
|
985
976
|
dependencies=self.query1.dependencies | self.query2.dependencies,
|
|
986
977
|
)
|
|
987
978
|
|
|
@@ -1070,7 +1061,7 @@ class SQLJoin(Step):
|
|
|
1070
1061
|
q1 = self.get_query(self.query1, temp_tables)
|
|
1071
1062
|
q2 = self.get_query(self.query2, temp_tables)
|
|
1072
1063
|
|
|
1073
|
-
q1_columns = list(q1.c)
|
|
1064
|
+
q1_columns = _drop_system_columns(q1.c) if self.full else list(q1.c)
|
|
1074
1065
|
q1_column_names = {c.name for c in q1_columns}
|
|
1075
1066
|
|
|
1076
1067
|
q2_columns = []
|
|
@@ -1211,6 +1202,10 @@ def _order_columns(
|
|
|
1211
1202
|
return [[d[n] for n in column_order] for d in column_dicts]
|
|
1212
1203
|
|
|
1213
1204
|
|
|
1205
|
+
def _drop_system_columns(columns: Iterable[ColumnElement]) -> list[ColumnElement]:
|
|
1206
|
+
return [c for c in columns if not c.name.startswith("sys__")]
|
|
1207
|
+
|
|
1208
|
+
|
|
1214
1209
|
@attrs.define
|
|
1215
1210
|
class ResultIter:
|
|
1216
1211
|
_row_iter: Iterable[Any]
|