datachain 0.36.0__tar.gz → 0.36.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.36.0 → datachain-0.36.2}/PKG-INFO +3 -2
- {datachain-0.36.0 → datachain-0.36.2}/pyproject.toml +8 -3
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/data_storage/metastore.py +35 -23
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/data_storage/schema.py +1 -2
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/data_storage/sqlite.py +27 -10
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/data_storage/warehouse.py +50 -33
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/diff/__init__.py +2 -6
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/audio.py +54 -53
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/datachain.py +13 -14
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/query/dataset.py +21 -26
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/query/dispatch.py +64 -42
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/query/queue.py +2 -1
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain.egg-info/PKG-INFO +3 -2
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain.egg-info/requires.txt +2 -1
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_datachain.py +1 -1
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_datachain_merge.py +7 -18
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_retry.py +0 -1
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_udf.py +116 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_audio.py +31 -37
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_datachain.py +15 -13
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_datachain_hash.py +1 -1
- {datachain-0.36.0 → datachain-0.36.2}/.cruft.json +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/.gitattributes +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/.github/codecov.yaml +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/.github/dependabot.yml +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/.github/workflows/release.yml +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/.github/workflows/tests.yml +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/.gitignore +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/.pre-commit-config.yaml +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/LICENSE +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/README.rst +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/api_hooks.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/assets/datachain.svg +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/assets/webhook_dialog.png +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/assets/webhook_list.png +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/commands/auth/login.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/commands/auth/logout.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/commands/auth/team.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/commands/auth/token.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/commands/index.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/commands/job/cancel.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/commands/job/clusters.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/commands/job/logs.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/commands/job/ls.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/commands/job/run.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/contributing.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/examples.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/guide/db_migrations.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/guide/delta.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/guide/env.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/guide/index.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/guide/namespaces.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/guide/processing.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/guide/remotes.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/guide/retry.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/index.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/overrides/main.html +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/quick-start.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/references/data-types/file.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/references/data-types/index.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/references/data-types/pose.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/references/data-types/segment.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/references/datachain.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/references/func.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/references/functions/aggregate.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/references/functions/array.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/references/functions/conditional.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/references/functions/numeric.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/references/functions/path.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/references/functions/random.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/references/functions/string.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/references/functions/window.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/references/index.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/references/toolkit.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/references/torch.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/references/udf.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/studio/api/.gitkeep +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/studio/webhooks.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/templates/main.dot +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/templates/operation.dot +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/templates/responses.def +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/docs/tutorials.md +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/examples/get_started/nested_datamodel.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/examples/multimodal/audio-to-text.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/examples/multimodal/wds.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/mkdocs.yml +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/noxfile.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/setup.cfg +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/__main__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/asyn.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cache.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/catalog/dependency.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/checkpoint.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/cli/utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/client/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/client/azure.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/client/gcs.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/client/hf.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/client/http.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/client/local.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/client/s3.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/config.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/dataset.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/delta.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/error.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/fs/reference.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/fs/utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/func/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/func/array.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/func/base.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/func/conditional.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/func/func.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/func/numeric.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/func/path.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/func/random.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/func/string.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/func/window.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/hash_utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/job.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/clip.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/storage_pattern.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/file.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/hf.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/image.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/listing.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/namespaces.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/projects.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/settings.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/tar.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/text.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/udf.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/video.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/listing.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/model/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/model/bbox.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/model/pose.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/model/segment.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/model/utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/namespace.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/node.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/plugins.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/progress.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/project.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/py.typed +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/query/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/query/batch.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/query/metrics.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/query/params.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/query/schema.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/query/session.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/query/udf.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/remote/studio.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/script_meta.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/semver.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/postgresql_dialect.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/postgresql_types.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/types.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/sql/utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/studio.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/telemetry.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain/utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain.egg-info/SOURCES.txt +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/conftest.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/data.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/examples/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/examples/test_examples.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/examples/wds_data.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/data/lena.jpg +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/functions/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/functions/test_array.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/functions/test_path.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/functions/test_random.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/functions/test_string.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/model/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_audio.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_catalog.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_client.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_data_storage.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_datasets.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_delta.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_file.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_hf.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_image.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_listing.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_ls.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_metastore.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_metrics.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_mutate.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_pull.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_pytorch.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_query.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_read_database.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_read_dataset_remote.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_session.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_storage_pattern.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_studio_datetime_parsing.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_to_database.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_toolkit.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_union.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_video.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/func/test_warehouse.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/scripts/feature_class.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/test_atomicity.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/test_cli_e2e.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/test_cli_studio.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/test_import_time.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/test_query_e2e.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/test_telemetry.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_checkpoints.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_namespace.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_partition_by.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_settings.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_storage_pattern.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/model/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_asyn.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_batching.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_cache.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_catalog.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_cli_datasets.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_client.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_client_http.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_config.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_dataset.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_func.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_hash_utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_listing.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_metastore.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_query.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_query_params.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_query_steps_hash.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_semver.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_serializer.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_session.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_utils.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.36.0 → datachain-0.36.2}/tests/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.36.
|
|
3
|
+
Version: 0.36.2
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -64,7 +64,6 @@ Requires-Dist: torch>=2.1.0; extra == "torch"
|
|
|
64
64
|
Requires-Dist: torchvision; extra == "torch"
|
|
65
65
|
Requires-Dist: transformers>=4.36.0; extra == "torch"
|
|
66
66
|
Provides-Extra: audio
|
|
67
|
-
Requires-Dist: torchaudio; extra == "audio"
|
|
68
67
|
Requires-Dist: soundfile; extra == "audio"
|
|
69
68
|
Provides-Extra: remote
|
|
70
69
|
Requires-Dist: lz4; extra == "remote"
|
|
@@ -76,6 +75,7 @@ Requires-Dist: numba>=0.60.0; extra == "hf"
|
|
|
76
75
|
Requires-Dist: datasets[vision]>=4.0.0; extra == "hf"
|
|
77
76
|
Requires-Dist: datasets[audio]>=4.0.0; (sys_platform == "linux" or sys_platform == "darwin") and extra == "hf"
|
|
78
77
|
Requires-Dist: fsspec>=2024.12.0; extra == "hf"
|
|
78
|
+
Requires-Dist: torch<2.9.0; extra == "hf"
|
|
79
79
|
Provides-Extra: video
|
|
80
80
|
Requires-Dist: ffmpeg-python; extra == "video"
|
|
81
81
|
Requires-Dist: imageio[ffmpeg,pyav]>=2.37.0; extra == "video"
|
|
@@ -117,6 +117,7 @@ Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
|
|
|
117
117
|
Requires-Dist: ultralytics; extra == "examples"
|
|
118
118
|
Requires-Dist: open_clip_torch; extra == "examples"
|
|
119
119
|
Requires-Dist: openai; extra == "examples"
|
|
120
|
+
Requires-Dist: torchaudio<2.9.0; extra == "examples"
|
|
120
121
|
Dynamic: license-file
|
|
121
122
|
|
|
122
123
|
================
|
|
@@ -73,7 +73,6 @@ torch = [
|
|
|
73
73
|
"transformers>=4.36.0"
|
|
74
74
|
]
|
|
75
75
|
audio = [
|
|
76
|
-
"torchaudio",
|
|
77
76
|
"soundfile"
|
|
78
77
|
]
|
|
79
78
|
remote = [
|
|
@@ -88,7 +87,11 @@ hf = [
|
|
|
88
87
|
"datasets[vision]>=4.0.0",
|
|
89
88
|
# https://github.com/pytorch/torchcodec/issues/640
|
|
90
89
|
"datasets[audio]>=4.0.0 ; (sys_platform == 'linux' or sys_platform == 'darwin')",
|
|
91
|
-
"fsspec>=2024.12.0"
|
|
90
|
+
"fsspec>=2024.12.0",
|
|
91
|
+
# Until datasets solve the issue, run test_hf_audio test to see if this can be removed
|
|
92
|
+
# https://github.com/meta-pytorch/torchcodec/issues/912
|
|
93
|
+
# https://github.com/huggingface/transformers/pull/41610
|
|
94
|
+
"torch<2.9.0"
|
|
92
95
|
]
|
|
93
96
|
video = [
|
|
94
97
|
"ffmpeg-python",
|
|
@@ -134,7 +137,9 @@ examples = [
|
|
|
134
137
|
"huggingface_hub[hf_transfer]",
|
|
135
138
|
"ultralytics",
|
|
136
139
|
"open_clip_torch",
|
|
137
|
-
"openai"
|
|
140
|
+
"openai",
|
|
141
|
+
# Transformers still require it
|
|
142
|
+
"torchaudio<2.9.0"
|
|
138
143
|
]
|
|
139
144
|
|
|
140
145
|
[project.urls]
|
|
@@ -56,13 +56,15 @@ from datachain.project import Project
|
|
|
56
56
|
from datachain.utils import JSONSerialize
|
|
57
57
|
|
|
58
58
|
if TYPE_CHECKING:
|
|
59
|
-
from sqlalchemy import Delete, Insert, Select, Update
|
|
59
|
+
from sqlalchemy import CTE, Delete, Insert, Select, Subquery, Update
|
|
60
60
|
from sqlalchemy.schema import SchemaItem
|
|
61
|
+
from sqlalchemy.sql.elements import ColumnElement
|
|
61
62
|
|
|
62
63
|
from datachain.data_storage import schema
|
|
63
64
|
from datachain.data_storage.db_engine import DatabaseEngine
|
|
64
65
|
|
|
65
66
|
logger = logging.getLogger("datachain")
|
|
67
|
+
DEPTH_LIMIT_DEFAULT = 100
|
|
66
68
|
|
|
67
69
|
|
|
68
70
|
class AbstractMetastore(ABC, Serializable):
|
|
@@ -1463,6 +1465,18 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1463
1465
|
Returns a list of columns to select in a query for fetching dataset dependencies
|
|
1464
1466
|
"""
|
|
1465
1467
|
|
|
1468
|
+
@abstractmethod
|
|
1469
|
+
def _dataset_dependency_nodes_select_columns(
|
|
1470
|
+
self,
|
|
1471
|
+
namespaces_subquery: "Subquery",
|
|
1472
|
+
dependency_tree_cte: "CTE",
|
|
1473
|
+
datasets_subquery: "Subquery",
|
|
1474
|
+
) -> list["ColumnElement"]:
|
|
1475
|
+
"""
|
|
1476
|
+
Returns a list of columns to select in a query for fetching
|
|
1477
|
+
dataset dependency nodes.
|
|
1478
|
+
"""
|
|
1479
|
+
|
|
1466
1480
|
def get_direct_dataset_dependencies(
|
|
1467
1481
|
self, dataset: DatasetRecord, version: str
|
|
1468
1482
|
) -> list[DatasetDependency | None]:
|
|
@@ -1493,7 +1507,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1493
1507
|
return [self.dependency_class.parse(*r) for r in self.db.execute(query)]
|
|
1494
1508
|
|
|
1495
1509
|
def get_dataset_dependency_nodes(
|
|
1496
|
-
self, dataset_id: int, version_id: int
|
|
1510
|
+
self, dataset_id: int, version_id: int, depth_limit: int = DEPTH_LIMIT_DEFAULT
|
|
1497
1511
|
) -> list[DatasetDependencyNode | None]:
|
|
1498
1512
|
n = self._namespaces_select().subquery()
|
|
1499
1513
|
p = self._projects
|
|
@@ -1522,33 +1536,31 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1522
1536
|
cte = base_query.cte(name="dependency_tree", recursive=True)
|
|
1523
1537
|
|
|
1524
1538
|
# Recursive case: dependencies of dependencies
|
|
1525
|
-
|
|
1526
|
-
|
|
1527
|
-
(
|
|
1528
|
-
|
|
1529
|
-
|
|
1530
|
-
dd,
|
|
1531
|
-
(cte.c.dataset_id == dd.c.source_dataset_id)
|
|
1532
|
-
& (cte.c.dataset_version_id == dd.c.source_dataset_version_id),
|
|
1539
|
+
# Limit depth to 100 to prevent infinite loops in case of circular dependencies
|
|
1540
|
+
recursive_query = (
|
|
1541
|
+
select(
|
|
1542
|
+
*dep_fields,
|
|
1543
|
+
(cte.c.depth + 1).label("depth"),
|
|
1533
1544
|
)
|
|
1545
|
+
.select_from(
|
|
1546
|
+
cte.join(
|
|
1547
|
+
dd,
|
|
1548
|
+
(cte.c.dataset_id == dd.c.source_dataset_id)
|
|
1549
|
+
& (cte.c.dataset_version_id == dd.c.source_dataset_version_id),
|
|
1550
|
+
)
|
|
1551
|
+
)
|
|
1552
|
+
.where(cte.c.depth < depth_limit)
|
|
1534
1553
|
)
|
|
1535
1554
|
|
|
1536
1555
|
cte = cte.union(recursive_query)
|
|
1537
1556
|
|
|
1538
1557
|
# Fetch all with full details
|
|
1539
|
-
|
|
1540
|
-
n
|
|
1541
|
-
|
|
1542
|
-
|
|
1543
|
-
|
|
1544
|
-
|
|
1545
|
-
d.c.name,
|
|
1546
|
-
dv.c.version,
|
|
1547
|
-
dv.c.created_at,
|
|
1548
|
-
cte.c.source_dataset_id,
|
|
1549
|
-
cte.c.source_dataset_version_id,
|
|
1550
|
-
cte.c.depth,
|
|
1551
|
-
).select_from(
|
|
1558
|
+
select_cols = self._dataset_dependency_nodes_select_columns(
|
|
1559
|
+
namespaces_subquery=n,
|
|
1560
|
+
dependency_tree_cte=cte,
|
|
1561
|
+
datasets_subquery=d,
|
|
1562
|
+
)
|
|
1563
|
+
final_query = self._datasets_dependencies_select(*select_cols).select_from(
|
|
1552
1564
|
# Use outer joins to handle cases where dependent datasets have been
|
|
1553
1565
|
# physically deleted. This allows us to return dependency records with
|
|
1554
1566
|
# None values instead of silently omitting them, making broken
|
|
@@ -11,7 +11,6 @@ from datachain.sql.types import (
|
|
|
11
11
|
JSON,
|
|
12
12
|
Boolean,
|
|
13
13
|
DateTime,
|
|
14
|
-
Int,
|
|
15
14
|
Int64,
|
|
16
15
|
SQLType,
|
|
17
16
|
String,
|
|
@@ -269,7 +268,7 @@ class DataTable:
|
|
|
269
268
|
@classmethod
|
|
270
269
|
def sys_columns(cls):
|
|
271
270
|
return [
|
|
272
|
-
sa.Column("sys__id",
|
|
271
|
+
sa.Column("sys__id", UInt64, primary_key=True),
|
|
273
272
|
sa.Column(
|
|
274
273
|
"sys__rand", UInt64, nullable=False, server_default=f.abs(f.random())
|
|
275
274
|
),
|
|
@@ -20,7 +20,10 @@ from sqlalchemy import (
|
|
|
20
20
|
from sqlalchemy.dialects import sqlite
|
|
21
21
|
from sqlalchemy.schema import CreateIndex, CreateTable, DropTable
|
|
22
22
|
from sqlalchemy.sql import func
|
|
23
|
-
from sqlalchemy.sql.elements import
|
|
23
|
+
from sqlalchemy.sql.elements import (
|
|
24
|
+
BinaryExpression,
|
|
25
|
+
BooleanClauseList,
|
|
26
|
+
)
|
|
24
27
|
from sqlalchemy.sql.expression import bindparam, cast
|
|
25
28
|
from sqlalchemy.sql.selectable import Select
|
|
26
29
|
from tqdm.auto import tqdm
|
|
@@ -41,6 +44,7 @@ from datachain.sql.types import SQLType
|
|
|
41
44
|
from datachain.utils import DataChainDir, batched, batched_it
|
|
42
45
|
|
|
43
46
|
if TYPE_CHECKING:
|
|
47
|
+
from sqlalchemy import CTE, Subquery
|
|
44
48
|
from sqlalchemy.dialects.sqlite import Insert
|
|
45
49
|
from sqlalchemy.engine.base import Engine
|
|
46
50
|
from sqlalchemy.schema import SchemaItem
|
|
@@ -539,6 +543,26 @@ class SQLiteMetastore(AbstractDBMetastore):
|
|
|
539
543
|
self._datasets_versions.c.created_at,
|
|
540
544
|
]
|
|
541
545
|
|
|
546
|
+
def _dataset_dependency_nodes_select_columns(
|
|
547
|
+
self,
|
|
548
|
+
namespaces_subquery: "Subquery",
|
|
549
|
+
dependency_tree_cte: "CTE",
|
|
550
|
+
datasets_subquery: "Subquery",
|
|
551
|
+
) -> list["ColumnElement"]:
|
|
552
|
+
return [
|
|
553
|
+
namespaces_subquery.c.name,
|
|
554
|
+
self._projects.c.name,
|
|
555
|
+
dependency_tree_cte.c.id,
|
|
556
|
+
dependency_tree_cte.c.dataset_id,
|
|
557
|
+
dependency_tree_cte.c.dataset_version_id,
|
|
558
|
+
datasets_subquery.c.name,
|
|
559
|
+
self._datasets_versions.c.version,
|
|
560
|
+
self._datasets_versions.c.created_at,
|
|
561
|
+
dependency_tree_cte.c.source_dataset_id,
|
|
562
|
+
dependency_tree_cte.c.source_dataset_version_id,
|
|
563
|
+
dependency_tree_cte.c.depth,
|
|
564
|
+
]
|
|
565
|
+
|
|
542
566
|
#
|
|
543
567
|
# Jobs
|
|
544
568
|
#
|
|
@@ -868,11 +892,8 @@ class SQLiteWarehouse(AbstractWarehouse):
|
|
|
868
892
|
if isinstance(c, BinaryExpression):
|
|
869
893
|
right_left_join = add_left_rows_filter(c)
|
|
870
894
|
|
|
871
|
-
# Use CTE instead of subquery to force SQLite to materialize the result
|
|
872
|
-
# This breaks deep nesting and prevents parser stack overflow.
|
|
873
895
|
union_cte = sqlalchemy.union(left_right_join, right_left_join).cte()
|
|
874
|
-
|
|
875
|
-
return self._regenerate_system_columns(union_cte)
|
|
896
|
+
return sqlalchemy.select(*union_cte.c).select_from(union_cte)
|
|
876
897
|
|
|
877
898
|
def _system_row_number_expr(self):
|
|
878
899
|
return func.row_number().over()
|
|
@@ -884,11 +905,7 @@ class SQLiteWarehouse(AbstractWarehouse):
|
|
|
884
905
|
"""
|
|
885
906
|
Create a temporary table from a query for use in a UDF.
|
|
886
907
|
"""
|
|
887
|
-
columns = [
|
|
888
|
-
sqlalchemy.Column(c.name, c.type)
|
|
889
|
-
for c in query.selected_columns
|
|
890
|
-
if c.name != "sys__id"
|
|
891
|
-
]
|
|
908
|
+
columns = [sqlalchemy.Column(c.name, c.type) for c in query.selected_columns]
|
|
892
909
|
table = self.create_udf_table(columns)
|
|
893
910
|
|
|
894
911
|
with tqdm(desc="Preparing", unit=" rows", leave=False) as pbar:
|
|
@@ -5,7 +5,7 @@ import random
|
|
|
5
5
|
import string
|
|
6
6
|
from abc import ABC, abstractmethod
|
|
7
7
|
from collections.abc import Callable, Generator, Iterable, Iterator, Sequence
|
|
8
|
-
from typing import TYPE_CHECKING, Any, Union
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Union, cast
|
|
9
9
|
from urllib.parse import urlparse
|
|
10
10
|
|
|
11
11
|
import attrs
|
|
@@ -23,7 +23,7 @@ from datachain.node import DirType, DirTypeGroup, Node, NodeWithPath, get_path
|
|
|
23
23
|
from datachain.query.batch import RowsOutput
|
|
24
24
|
from datachain.query.schema import ColumnMeta
|
|
25
25
|
from datachain.sql.functions import path as pathfunc
|
|
26
|
-
from datachain.sql.types import
|
|
26
|
+
from datachain.sql.types import SQLType
|
|
27
27
|
from datachain.utils import sql_escape_like
|
|
28
28
|
|
|
29
29
|
if TYPE_CHECKING:
|
|
@@ -32,6 +32,7 @@ if TYPE_CHECKING:
|
|
|
32
32
|
_FromClauseArgument,
|
|
33
33
|
_OnClauseArgument,
|
|
34
34
|
)
|
|
35
|
+
from sqlalchemy.sql.selectable import FromClause
|
|
35
36
|
from sqlalchemy.types import TypeEngine
|
|
36
37
|
|
|
37
38
|
from datachain.data_storage import schema
|
|
@@ -248,45 +249,56 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
248
249
|
|
|
249
250
|
def _regenerate_system_columns(
|
|
250
251
|
self,
|
|
251
|
-
selectable: sa.Select
|
|
252
|
+
selectable: sa.Select,
|
|
252
253
|
keep_existing_columns: bool = False,
|
|
254
|
+
regenerate_columns: Iterable[str] | None = None,
|
|
253
255
|
) -> sa.Select:
|
|
254
256
|
"""
|
|
255
|
-
Return a SELECT that regenerates
|
|
257
|
+
Return a SELECT that regenerates system columns deterministically.
|
|
256
258
|
|
|
257
|
-
If keep_existing_columns is True, existing
|
|
258
|
-
|
|
259
|
-
"""
|
|
260
|
-
base = selectable.subquery() if hasattr(selectable, "subquery") else selectable
|
|
261
|
-
|
|
262
|
-
result_columns: dict[str, sa.ColumnElement] = {}
|
|
263
|
-
for col in base.c:
|
|
264
|
-
if col.name in result_columns:
|
|
265
|
-
raise ValueError(f"Duplicate column name {col.name} in SELECT")
|
|
266
|
-
if col.name in ("sys__id", "sys__rand"):
|
|
267
|
-
if keep_existing_columns:
|
|
268
|
-
result_columns[col.name] = col
|
|
269
|
-
else:
|
|
270
|
-
result_columns[col.name] = col
|
|
259
|
+
If keep_existing_columns is True, existing system columns will be kept as-is
|
|
260
|
+
even when they are listed in ``regenerate_columns``.
|
|
271
261
|
|
|
272
|
-
|
|
262
|
+
Args:
|
|
263
|
+
selectable: Base SELECT
|
|
264
|
+
keep_existing_columns: When True, reuse existing system columns even if
|
|
265
|
+
they are part of the regeneration set.
|
|
266
|
+
regenerate_columns: Names of system columns to regenerate. Defaults to
|
|
267
|
+
{"sys__id", "sys__rand"}. Columns not listed are left untouched.
|
|
268
|
+
"""
|
|
269
|
+
system_columns = {
|
|
273
270
|
sys_col.name: sys_col.type
|
|
274
271
|
for sys_col in self.schema.dataset_row_cls.sys_columns()
|
|
275
272
|
}
|
|
273
|
+
regenerate = set(regenerate_columns or system_columns)
|
|
274
|
+
generators = {
|
|
275
|
+
"sys__id": self._system_row_number_expr,
|
|
276
|
+
"sys__rand": self._system_random_expr,
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
base = cast("FromClause", selectable.subquery())
|
|
280
|
+
|
|
281
|
+
def build(name: str) -> sa.ColumnElement:
|
|
282
|
+
expr = generators[name]()
|
|
283
|
+
return sa.cast(expr, system_columns[name]).label(name)
|
|
284
|
+
|
|
285
|
+
columns: list[sa.ColumnElement] = []
|
|
286
|
+
present: set[str] = set()
|
|
287
|
+
changed = False
|
|
288
|
+
|
|
289
|
+
for col in base.c:
|
|
290
|
+
present.add(col.name)
|
|
291
|
+
regen = col.name in regenerate and not keep_existing_columns
|
|
292
|
+
columns.append(build(col.name) if regen else col)
|
|
293
|
+
changed |= regen
|
|
294
|
+
|
|
295
|
+
for name in regenerate - present:
|
|
296
|
+
columns.append(build(name))
|
|
297
|
+
changed = True
|
|
298
|
+
|
|
299
|
+
if not changed:
|
|
300
|
+
return selectable
|
|
276
301
|
|
|
277
|
-
# Add missing system columns if needed
|
|
278
|
-
if "sys__id" not in result_columns:
|
|
279
|
-
expr = self._system_row_number_expr()
|
|
280
|
-
expr = sa.cast(expr, system_types["sys__id"])
|
|
281
|
-
result_columns["sys__id"] = expr.label("sys__id")
|
|
282
|
-
if "sys__rand" not in result_columns:
|
|
283
|
-
expr = self._system_random_expr()
|
|
284
|
-
expr = sa.cast(expr, system_types["sys__rand"])
|
|
285
|
-
result_columns["sys__rand"] = expr.label("sys__rand")
|
|
286
|
-
|
|
287
|
-
# Wrap in subquery to materialize window functions, then wrap again in SELECT
|
|
288
|
-
# This ensures window functions are computed before INSERT...FROM SELECT
|
|
289
|
-
columns = list(result_columns.values())
|
|
290
302
|
inner = sa.select(*columns).select_from(base).subquery()
|
|
291
303
|
return sa.select(*inner.c).select_from(inner)
|
|
292
304
|
|
|
@@ -950,10 +962,15 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
950
962
|
SQLite TEMPORARY tables cannot be directly used as they are process-specific,
|
|
951
963
|
and UDFs are run in other processes when run in parallel.
|
|
952
964
|
"""
|
|
965
|
+
columns = [
|
|
966
|
+
c
|
|
967
|
+
for c in columns
|
|
968
|
+
if c.name not in [col.name for col in self.dataset_row_cls.sys_columns()]
|
|
969
|
+
]
|
|
953
970
|
tbl = sa.Table(
|
|
954
971
|
name or self.udf_table_name(),
|
|
955
972
|
sa.MetaData(),
|
|
956
|
-
|
|
973
|
+
*self.dataset_row_cls.sys_columns(),
|
|
957
974
|
*columns,
|
|
958
975
|
)
|
|
959
976
|
self.db.create_table(tbl, if_not_exists=True)
|
|
@@ -24,7 +24,7 @@ class CompareStatus(str, Enum):
|
|
|
24
24
|
SAME = "S"
|
|
25
25
|
|
|
26
26
|
|
|
27
|
-
def _compare( # noqa: C901
|
|
27
|
+
def _compare( # noqa: C901
|
|
28
28
|
left: "DataChain",
|
|
29
29
|
right: "DataChain",
|
|
30
30
|
on: str | Sequence[str],
|
|
@@ -151,11 +151,7 @@ def _compare( # noqa: C901, PLR0912
|
|
|
151
151
|
if status_col:
|
|
152
152
|
cols_select.append(diff_col)
|
|
153
153
|
|
|
154
|
-
|
|
155
|
-
# TODO workaround when sys signal is not available in diff
|
|
156
|
-
dc_diff = dc_diff.settings(sys=True).select(*cols_select).settings(sys=False)
|
|
157
|
-
else:
|
|
158
|
-
dc_diff = dc_diff.select(*cols_select)
|
|
154
|
+
dc_diff = dc_diff.select(*cols_select)
|
|
159
155
|
|
|
160
156
|
# final schema is schema from the left chain with status column added if needed
|
|
161
157
|
dc_diff.signals_schema = (
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import posixpath
|
|
2
|
+
import re
|
|
2
3
|
from typing import TYPE_CHECKING
|
|
3
4
|
|
|
4
5
|
from datachain.lib.file import FileError
|
|
@@ -9,7 +10,7 @@ if TYPE_CHECKING:
|
|
|
9
10
|
from datachain.lib.file import Audio, AudioFile, File
|
|
10
11
|
|
|
11
12
|
try:
|
|
12
|
-
import
|
|
13
|
+
import soundfile as sf
|
|
13
14
|
except ImportError as exc:
|
|
14
15
|
raise ImportError(
|
|
15
16
|
"Missing dependencies for processing audio.\n"
|
|
@@ -26,18 +27,25 @@ def audio_info(file: "File | AudioFile") -> "Audio":
|
|
|
26
27
|
|
|
27
28
|
try:
|
|
28
29
|
with file.open() as f:
|
|
29
|
-
info =
|
|
30
|
+
info = sf.info(f)
|
|
31
|
+
|
|
32
|
+
sample_rate = int(info.samplerate)
|
|
33
|
+
channels = int(info.channels)
|
|
34
|
+
frames = int(info.frames)
|
|
35
|
+
duration = float(info.duration)
|
|
30
36
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
37
|
+
# soundfile provides format and subtype
|
|
38
|
+
if info.format:
|
|
39
|
+
format_name = info.format.lower()
|
|
40
|
+
else:
|
|
41
|
+
format_name = file.get_file_ext().lower()
|
|
35
42
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
43
|
+
if not format_name:
|
|
44
|
+
format_name = "unknown"
|
|
45
|
+
codec_name = info.subtype if info.subtype else ""
|
|
39
46
|
|
|
40
|
-
|
|
47
|
+
# Calculate bit rate from subtype
|
|
48
|
+
bits_per_sample = _get_bits_per_sample(info.subtype)
|
|
41
49
|
bit_rate = (
|
|
42
50
|
bits_per_sample * sample_rate * channels if bits_per_sample > 0 else -1
|
|
43
51
|
)
|
|
@@ -58,44 +66,39 @@ def audio_info(file: "File | AudioFile") -> "Audio":
|
|
|
58
66
|
)
|
|
59
67
|
|
|
60
68
|
|
|
61
|
-
def
|
|
69
|
+
def _get_bits_per_sample(subtype: str) -> int:
|
|
62
70
|
"""
|
|
63
|
-
Map
|
|
71
|
+
Map soundfile subtype to bits per sample.
|
|
64
72
|
|
|
65
73
|
Args:
|
|
66
|
-
|
|
67
|
-
file_ext: The file extension as a fallback
|
|
74
|
+
subtype: The subtype string from soundfile
|
|
68
75
|
|
|
69
76
|
Returns:
|
|
70
|
-
|
|
77
|
+
Bits per sample, or 0 if unknown
|
|
71
78
|
"""
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
"
|
|
78
|
-
"
|
|
79
|
-
"
|
|
80
|
-
"
|
|
79
|
+
if not subtype:
|
|
80
|
+
return 0
|
|
81
|
+
|
|
82
|
+
# Common PCM and floating-point subtypes
|
|
83
|
+
pcm_bits = {
|
|
84
|
+
"PCM_16": 16,
|
|
85
|
+
"PCM_24": 24,
|
|
86
|
+
"PCM_32": 32,
|
|
87
|
+
"PCM_S8": 8,
|
|
88
|
+
"PCM_U8": 8,
|
|
89
|
+
"FLOAT": 32,
|
|
90
|
+
"DOUBLE": 64,
|
|
81
91
|
}
|
|
82
92
|
|
|
83
|
-
if
|
|
84
|
-
return
|
|
93
|
+
if subtype in pcm_bits:
|
|
94
|
+
return pcm_bits[subtype]
|
|
85
95
|
|
|
86
|
-
#
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
"wav": "wav",
|
|
91
|
-
"aiff": "aiff",
|
|
92
|
-
"au": "au",
|
|
93
|
-
"raw": "raw",
|
|
94
|
-
}
|
|
95
|
-
return pcm_formats.get(file_ext, "wav") # Default to wav for PCM
|
|
96
|
+
# Handle variants such as PCM_S16LE, PCM_F32LE, etc.
|
|
97
|
+
match = re.search(r"PCM_(?:[A-Z]*?)(\d+)", subtype)
|
|
98
|
+
if match:
|
|
99
|
+
return int(match.group(1))
|
|
96
100
|
|
|
97
|
-
|
|
98
|
-
return file_ext if file_ext else "unknown"
|
|
101
|
+
return 0
|
|
99
102
|
|
|
100
103
|
|
|
101
104
|
def audio_to_np(
|
|
@@ -114,27 +117,27 @@ def audio_to_np(
|
|
|
114
117
|
|
|
115
118
|
try:
|
|
116
119
|
with audio.open() as f:
|
|
117
|
-
info =
|
|
118
|
-
sample_rate = info.
|
|
120
|
+
info = sf.info(f)
|
|
121
|
+
sample_rate = info.samplerate
|
|
119
122
|
|
|
120
123
|
frame_offset = int(start * sample_rate)
|
|
121
124
|
num_frames = int(duration * sample_rate) if duration is not None else -1
|
|
122
125
|
|
|
123
126
|
# Reset file pointer to the beginning
|
|
124
|
-
# This is important to ensure we read from the correct position later
|
|
125
127
|
f.seek(0)
|
|
126
128
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
+
# Read audio data with offset and frame count
|
|
130
|
+
audio_np, sr = sf.read(
|
|
131
|
+
f,
|
|
132
|
+
start=frame_offset,
|
|
133
|
+
frames=num_frames,
|
|
134
|
+
always_2d=False,
|
|
135
|
+
dtype="float32",
|
|
129
136
|
)
|
|
130
137
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
audio_np = audio_np.T
|
|
135
|
-
else:
|
|
136
|
-
audio_np = audio_np.squeeze()
|
|
137
|
-
|
|
138
|
+
# soundfile returns shape (frames,) for mono or
|
|
139
|
+
# (frames, channels) for multi-channel
|
|
140
|
+
# We keep this format as it matches expected output
|
|
138
141
|
return audio_np, int(sr)
|
|
139
142
|
except Exception as exc:
|
|
140
143
|
raise FileError(
|
|
@@ -152,11 +155,9 @@ def audio_to_bytes(
|
|
|
152
155
|
|
|
153
156
|
If duration is None, converts from start to end of file.
|
|
154
157
|
If start is 0 and duration is None, converts entire file."""
|
|
155
|
-
y, sr = audio_to_np(audio, start, duration)
|
|
156
|
-
|
|
157
158
|
import io
|
|
158
159
|
|
|
159
|
-
|
|
160
|
+
y, sr = audio_to_np(audio, start, duration)
|
|
160
161
|
|
|
161
162
|
buffer = io.BytesIO()
|
|
162
163
|
sf.write(buffer, y, sr, format=format)
|
|
@@ -856,7 +856,9 @@ class DataChain:
|
|
|
856
856
|
udf_obj.to_udf_wrapper(self._settings.batch_size),
|
|
857
857
|
**self._settings.to_dict(),
|
|
858
858
|
),
|
|
859
|
-
signal_schema=
|
|
859
|
+
signal_schema=SignalSchema({"sys": Sys})
|
|
860
|
+
| self.signals_schema
|
|
861
|
+
| udf_obj.output,
|
|
860
862
|
)
|
|
861
863
|
|
|
862
864
|
def gen(
|
|
@@ -894,7 +896,7 @@ class DataChain:
|
|
|
894
896
|
udf_obj.to_udf_wrapper(self._settings.batch_size),
|
|
895
897
|
**self._settings.to_dict(),
|
|
896
898
|
),
|
|
897
|
-
signal_schema=udf_obj.output,
|
|
899
|
+
signal_schema=SignalSchema({"sys": Sys}) | udf_obj.output,
|
|
898
900
|
)
|
|
899
901
|
|
|
900
902
|
@delta_disabled
|
|
@@ -1031,7 +1033,7 @@ class DataChain:
|
|
|
1031
1033
|
partition_by=processed_partition_by,
|
|
1032
1034
|
**self._settings.to_dict(),
|
|
1033
1035
|
),
|
|
1034
|
-
signal_schema=udf_obj.output,
|
|
1036
|
+
signal_schema=SignalSchema({"sys": Sys}) | udf_obj.output,
|
|
1035
1037
|
)
|
|
1036
1038
|
|
|
1037
1039
|
def batch_map(
|
|
@@ -1097,11 +1099,7 @@ class DataChain:
|
|
|
1097
1099
|
sign = UdfSignature.parse(name, signal_map, func, params, output, is_generator)
|
|
1098
1100
|
DataModel.register(list(sign.output_schema.values.values()))
|
|
1099
1101
|
|
|
1100
|
-
|
|
1101
|
-
if self._sys:
|
|
1102
|
-
signals_schema = SignalSchema({"sys": Sys}) | signals_schema
|
|
1103
|
-
|
|
1104
|
-
params_schema = signals_schema.slice(
|
|
1102
|
+
params_schema = self.signals_schema.slice(
|
|
1105
1103
|
sign.params, self._setup, is_batch=is_batch
|
|
1106
1104
|
)
|
|
1107
1105
|
|
|
@@ -1156,11 +1154,9 @@ class DataChain:
|
|
|
1156
1154
|
)
|
|
1157
1155
|
)
|
|
1158
1156
|
|
|
1159
|
-
def select(self, *args: str
|
|
1157
|
+
def select(self, *args: str) -> "Self":
|
|
1160
1158
|
"""Select only a specified set of signals."""
|
|
1161
1159
|
new_schema = self.signals_schema.resolve(*args)
|
|
1162
|
-
if self._sys and _sys:
|
|
1163
|
-
new_schema = SignalSchema({"sys": Sys}) | new_schema
|
|
1164
1160
|
columns = new_schema.db_signals()
|
|
1165
1161
|
return self._evolve(
|
|
1166
1162
|
query=self._query.select(*columns), signal_schema=new_schema
|
|
@@ -1710,9 +1706,11 @@ class DataChain:
|
|
|
1710
1706
|
|
|
1711
1707
|
signals_schema = self.signals_schema.clone_without_sys_signals()
|
|
1712
1708
|
right_signals_schema = right_ds.signals_schema.clone_without_sys_signals()
|
|
1713
|
-
|
|
1714
|
-
|
|
1715
|
-
|
|
1709
|
+
|
|
1710
|
+
ds.signals_schema = signals_schema.merge(right_signals_schema, rname)
|
|
1711
|
+
|
|
1712
|
+
if not full:
|
|
1713
|
+
ds.signals_schema = SignalSchema({"sys": Sys}) | ds.signals_schema
|
|
1716
1714
|
|
|
1717
1715
|
return ds
|
|
1718
1716
|
|
|
@@ -1723,6 +1721,7 @@ class DataChain:
|
|
|
1723
1721
|
Parameters:
|
|
1724
1722
|
other: chain whose rows will be added to `self`.
|
|
1725
1723
|
"""
|
|
1724
|
+
self.signals_schema = self.signals_schema.clone_without_sys_signals()
|
|
1726
1725
|
return self._evolve(query=self._query.union(other._query))
|
|
1727
1726
|
|
|
1728
1727
|
def subtract( # type: ignore[override]
|