datachain 0.37.0__tar.gz → 0.37.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.37.0 → datachain-0.37.2}/.pre-commit-config.yaml +1 -1
- {datachain-0.37.0 → datachain-0.37.2}/PKG-INFO +2 -2
- {datachain-0.37.0 → datachain-0.37.2}/pyproject.toml +1 -1
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/delta.py +6 -35
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/diff/__init__.py +3 -1
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/datachain.py +2 -3
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/datasets.py +4 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/storage.py +6 -9
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/query/dataset.py +1 -1
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain.egg-info/PKG-INFO +2 -2
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain.egg-info/requires.txt +1 -1
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_delta.py +118 -5
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_checkpoints.py +8 -6
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_datachain_merge.py +41 -3
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_diff.py +19 -1
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_datachain_hash.py +1 -1
- {datachain-0.37.0 → datachain-0.37.2}/.cruft.json +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/.gitattributes +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/.github/codecov.yaml +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/.github/dependabot.yml +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/.github/workflows/release.yml +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/.github/workflows/tests.yml +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/.gitignore +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/LICENSE +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/README.rst +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/api_hooks.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/assets/datachain.svg +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/assets/webhook_dialog.png +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/assets/webhook_list.png +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/commands/auth/login.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/commands/auth/logout.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/commands/auth/team.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/commands/auth/token.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/commands/index.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/commands/job/cancel.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/commands/job/clusters.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/commands/job/logs.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/commands/job/ls.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/commands/job/run.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/contributing.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/examples.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/guide/checkpoints.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/guide/db_migrations.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/guide/delta.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/guide/env.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/guide/index.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/guide/namespaces.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/guide/processing.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/guide/remotes.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/guide/retry.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/index.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/overrides/main.html +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/quick-start.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/references/data-types/file.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/references/data-types/index.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/references/data-types/pose.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/references/data-types/segment.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/references/datachain.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/references/func.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/references/functions/aggregate.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/references/functions/array.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/references/functions/conditional.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/references/functions/numeric.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/references/functions/path.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/references/functions/random.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/references/functions/string.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/references/functions/window.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/references/index.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/references/toolkit.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/references/torch.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/references/udf.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/studio/api/.gitkeep +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/studio/webhooks.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/templates/main.dot +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/templates/operation.dot +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/templates/responses.def +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/docs/tutorials.md +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/examples/get_started/nested_datamodel.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/examples/multimodal/audio-to-text.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/examples/multimodal/wds.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/mkdocs.yml +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/noxfile.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/setup.cfg +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/__main__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/asyn.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cache.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/catalog/dependency.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/checkpoint.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/cli/utils.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/client/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/client/azure.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/client/gcs.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/client/hf.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/client/http.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/client/local.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/client/s3.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/config.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/dataset.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/error.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/fs/reference.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/fs/utils.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/func/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/func/array.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/func/base.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/func/conditional.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/func/func.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/func/numeric.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/func/path.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/func/random.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/func/string.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/func/window.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/hash_utils.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/job.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/audio.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/clip.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/storage_pattern.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/file.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/hf.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/image.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/listing.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/namespaces.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/projects.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/settings.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/tar.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/text.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/udf.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/utils.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/video.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/listing.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/model/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/model/bbox.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/model/pose.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/model/segment.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/model/utils.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/namespace.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/node.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/plugins.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/progress.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/project.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/py.typed +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/query/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/query/batch.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/query/metrics.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/query/params.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/query/queue.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/query/schema.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/query/session.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/query/udf.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/remote/studio.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/script_meta.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/semver.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/postgresql_dialect.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/postgresql_types.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/types.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/sql/utils.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/studio.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/telemetry.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain/utils.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain.egg-info/SOURCES.txt +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/conftest.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/data.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/examples/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/examples/test_examples.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/examples/wds_data.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/data/lena.jpg +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/functions/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/functions/test_array.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/functions/test_path.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/functions/test_random.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/functions/test_string.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/model/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_audio.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_catalog.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_checkpoints.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_client.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_data_storage.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_datachain.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_datasets.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_file.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_hf.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_image.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_listing.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_ls.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_metastore.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_metrics.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_mutate.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_pull.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_pytorch.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_query.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_read_database.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_read_dataset_remote.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_retry.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_session.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_storage_pattern.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_studio_datetime_parsing.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_temp_table_tracking.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_to_database.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_toolkit.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_udf.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_union.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_video.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/func/test_warehouse.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/scripts/feature_class.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/test_atomicity.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/test_cli_e2e.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/test_cli_studio.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/test_import_time.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/test_job_management_e2e.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/test_query_e2e.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/test_telemetry.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_audio.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_datachain.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_namespace.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_partition_by.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_settings.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_storage_pattern.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/model/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_asyn.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_batching.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_cache.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_catalog.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_cli_datasets.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_client.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_client_http.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_config.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_dataset.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_func.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_hash_utils.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_job_management.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_listing.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_metastore.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_query.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_query_params.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_query_steps_hash.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_semver.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_serializer.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_session.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_utils.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.37.0 → datachain-0.37.2}/tests/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.37.
|
|
3
|
+
Version: 0.37.2
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -41,7 +41,7 @@ Requires-Dist: cloudpickle
|
|
|
41
41
|
Requires-Dist: pydantic
|
|
42
42
|
Requires-Dist: jmespath>=1.0
|
|
43
43
|
Requires-Dist: datamodel-code-generator>=0.25
|
|
44
|
-
Requires-Dist: Pillow<
|
|
44
|
+
Requires-Dist: Pillow<13,>=10.0.0
|
|
45
45
|
Requires-Dist: msgpack<2,>=1.0.4
|
|
46
46
|
Requires-Dist: psutil
|
|
47
47
|
Requires-Dist: huggingface_hub
|
|
@@ -1,16 +1,12 @@
|
|
|
1
|
-
import hashlib
|
|
2
1
|
from collections.abc import Sequence
|
|
3
2
|
from copy import copy
|
|
4
3
|
from functools import wraps
|
|
5
4
|
from typing import TYPE_CHECKING, TypeVar
|
|
6
5
|
|
|
7
|
-
from attrs import frozen
|
|
8
|
-
|
|
9
6
|
import datachain
|
|
10
7
|
from datachain.dataset import DatasetDependency, DatasetRecord
|
|
11
8
|
from datachain.error import DatasetNotFoundError
|
|
12
9
|
from datachain.project import Project
|
|
13
|
-
from datachain.query.dataset import Step, step_result
|
|
14
10
|
|
|
15
11
|
if TYPE_CHECKING:
|
|
16
12
|
from collections.abc import Callable
|
|
@@ -18,9 +14,7 @@ if TYPE_CHECKING:
|
|
|
18
14
|
|
|
19
15
|
from typing_extensions import ParamSpec
|
|
20
16
|
|
|
21
|
-
from datachain.catalog import Catalog
|
|
22
17
|
from datachain.lib.dc import DataChain
|
|
23
|
-
from datachain.query.dataset import QueryGenerator
|
|
24
18
|
|
|
25
19
|
P = ParamSpec("P")
|
|
26
20
|
|
|
@@ -49,38 +43,11 @@ def delta_disabled(
|
|
|
49
43
|
return _inner
|
|
50
44
|
|
|
51
45
|
|
|
52
|
-
@frozen
|
|
53
|
-
class _RegenerateSystemColumnsStep(Step):
|
|
54
|
-
catalog: "Catalog"
|
|
55
|
-
|
|
56
|
-
def hash_inputs(self) -> str:
|
|
57
|
-
return hashlib.sha256(b"regenerate_sys_columns").hexdigest()
|
|
58
|
-
|
|
59
|
-
def apply(self, query_generator: "QueryGenerator", temp_tables: list[str]):
|
|
60
|
-
selectable = query_generator.select()
|
|
61
|
-
regenerated = self.catalog.warehouse._regenerate_system_columns(
|
|
62
|
-
selectable,
|
|
63
|
-
keep_existing_columns=True,
|
|
64
|
-
regenerate_columns=None,
|
|
65
|
-
)
|
|
66
|
-
|
|
67
|
-
def q(*columns):
|
|
68
|
-
return regenerated.with_only_columns(*columns)
|
|
69
|
-
|
|
70
|
-
return step_result(q, regenerated.selected_columns)
|
|
71
|
-
|
|
72
|
-
|
|
73
46
|
def _append_steps(dc: "DataChain", other: "DataChain"):
|
|
74
47
|
"""Returns cloned chain with appended steps from other chain.
|
|
75
48
|
Steps are all those modification methods applied like filters, mappers etc.
|
|
76
49
|
"""
|
|
77
50
|
dc = dc.clone()
|
|
78
|
-
dc._query.steps.append(
|
|
79
|
-
_RegenerateSystemColumnsStep(
|
|
80
|
-
catalog=dc.session.catalog,
|
|
81
|
-
)
|
|
82
|
-
)
|
|
83
|
-
|
|
84
51
|
dc._query.steps += other._query.steps.copy()
|
|
85
52
|
dc.signals_schema = other.signals_schema
|
|
86
53
|
return dc
|
|
@@ -150,7 +117,9 @@ def _get_retry_chain(
|
|
|
150
117
|
error_records = result_dataset.filter(C(delta_retry) != "")
|
|
151
118
|
error_source_records = source_dc.merge(
|
|
152
119
|
error_records, on=on, right_on=right_on, inner=True
|
|
153
|
-
).select(
|
|
120
|
+
).select(
|
|
121
|
+
*list(source_dc.signals_schema.clone_without_sys_signals().values.keys())
|
|
122
|
+
)
|
|
154
123
|
retry_chain = error_source_records
|
|
155
124
|
|
|
156
125
|
# Handle missing records if delta_retry is True
|
|
@@ -200,7 +169,9 @@ def _get_source_info(
|
|
|
200
169
|
indirect=False,
|
|
201
170
|
)
|
|
202
171
|
|
|
203
|
-
source_ds_dep = next(
|
|
172
|
+
source_ds_dep = next(
|
|
173
|
+
(d for d in dependencies if d and d.name == source_ds.name), None
|
|
174
|
+
)
|
|
204
175
|
if not source_ds_dep:
|
|
205
176
|
# Starting dataset was removed, back off to normal dataset creation
|
|
206
177
|
return None, None, None, None, None
|
|
@@ -103,8 +103,10 @@ def _compare( # noqa: C901
|
|
|
103
103
|
left = left.mutate(**{ldiff_col: 1})
|
|
104
104
|
right = right.mutate(**{rdiff_col: 1})
|
|
105
105
|
|
|
106
|
-
if
|
|
106
|
+
if compare is None:
|
|
107
107
|
modified_cond = True
|
|
108
|
+
elif len(compare) == 0:
|
|
109
|
+
modified_cond = False
|
|
108
110
|
else:
|
|
109
111
|
modified_cond = or_( # type: ignore[assignment]
|
|
110
112
|
*[
|
|
@@ -1697,14 +1697,13 @@ class DataChain:
|
|
|
1697
1697
|
query.feature_schema = None
|
|
1698
1698
|
ds = self._evolve(query=query)
|
|
1699
1699
|
|
|
1700
|
+
# Note: merge drops sys signals from both sides, make sure to not include it
|
|
1701
|
+
# in the resulting schema
|
|
1700
1702
|
signals_schema = self.signals_schema.clone_without_sys_signals()
|
|
1701
1703
|
right_signals_schema = right_ds.signals_schema.clone_without_sys_signals()
|
|
1702
1704
|
|
|
1703
1705
|
ds.signals_schema = signals_schema.merge(right_signals_schema, rname)
|
|
1704
1706
|
|
|
1705
|
-
if not full:
|
|
1706
|
-
ds.signals_schema = SignalSchema({"sys": Sys}) | ds.signals_schema
|
|
1707
|
-
|
|
1708
1707
|
return ds
|
|
1709
1708
|
|
|
1710
1709
|
@delta_disabled
|
|
@@ -200,6 +200,10 @@ def read_dataset(
|
|
|
200
200
|
signals_schema |= SignalSchema.deserialize(query.feature_schema)
|
|
201
201
|
else:
|
|
202
202
|
signals_schema |= SignalSchema.from_column_types(query.column_types or {})
|
|
203
|
+
|
|
204
|
+
if delta:
|
|
205
|
+
signals_schema = signals_schema.clone_without_sys_signals()
|
|
206
|
+
|
|
203
207
|
chain = DataChain(query, _settings, signals_schema)
|
|
204
208
|
|
|
205
209
|
if delta:
|
|
@@ -187,6 +187,12 @@ def read_storage(
|
|
|
187
187
|
project=listing_project_name,
|
|
188
188
|
session=session,
|
|
189
189
|
settings=settings,
|
|
190
|
+
delta=delta,
|
|
191
|
+
delta_on=delta_on,
|
|
192
|
+
delta_result_on=delta_result_on,
|
|
193
|
+
delta_compare=delta_compare,
|
|
194
|
+
delta_retry=delta_retry,
|
|
195
|
+
delta_unsafe=delta_unsafe,
|
|
190
196
|
)
|
|
191
197
|
dc._query.update = update
|
|
192
198
|
dc.signals_schema = dc.signals_schema.mutate({f"{column}": file_type})
|
|
@@ -252,13 +258,4 @@ def read_storage(
|
|
|
252
258
|
|
|
253
259
|
assert storage_chain is not None
|
|
254
260
|
|
|
255
|
-
if delta:
|
|
256
|
-
storage_chain = storage_chain._as_delta(
|
|
257
|
-
on=delta_on,
|
|
258
|
-
right_on=delta_result_on,
|
|
259
|
-
compare=delta_compare,
|
|
260
|
-
delta_retry=delta_retry,
|
|
261
|
-
delta_unsafe=delta_unsafe,
|
|
262
|
-
)
|
|
263
|
-
|
|
264
261
|
return storage_chain
|
|
@@ -1065,7 +1065,7 @@ class SQLJoin(Step):
|
|
|
1065
1065
|
q1 = self.get_query(self.query1, temp_tables)
|
|
1066
1066
|
q2 = self.get_query(self.query2, temp_tables)
|
|
1067
1067
|
|
|
1068
|
-
q1_columns = _drop_system_columns(q1.c)
|
|
1068
|
+
q1_columns = _drop_system_columns(q1.c)
|
|
1069
1069
|
q1_column_names = {c.name for c in q1_columns}
|
|
1070
1070
|
|
|
1071
1071
|
q2_columns = []
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.37.
|
|
3
|
+
Version: 0.37.2
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -41,7 +41,7 @@ Requires-Dist: cloudpickle
|
|
|
41
41
|
Requires-Dist: pydantic
|
|
42
42
|
Requires-Dist: jmespath>=1.0
|
|
43
43
|
Requires-Dist: datamodel-code-generator>=0.25
|
|
44
|
-
Requires-Dist: Pillow<
|
|
44
|
+
Requires-Dist: Pillow<13,>=10.0.0
|
|
45
45
|
Requires-Dist: msgpack<2,>=1.0.4
|
|
46
46
|
Requires-Dist: psutil
|
|
47
47
|
Requires-Dist: huggingface_hub
|
|
@@ -98,6 +98,70 @@ def test_delta_update_from_dataset(test_session, tmp_dir, tmp_path):
|
|
|
98
98
|
create_delta_dataset(ds_name)
|
|
99
99
|
|
|
100
100
|
|
|
101
|
+
def test_delta_falls_back_when_dependency_missing(test_session):
|
|
102
|
+
catalog = test_session.catalog
|
|
103
|
+
|
|
104
|
+
source_ds = "delta_removed_dep_source"
|
|
105
|
+
delta_ds = "delta_removed_dep_result"
|
|
106
|
+
process_log: list[int] = []
|
|
107
|
+
|
|
108
|
+
def record_processing(id: int) -> int:
|
|
109
|
+
process_log.append(id)
|
|
110
|
+
return id
|
|
111
|
+
|
|
112
|
+
# Create first source dataset and initial delta version that depends on it
|
|
113
|
+
dc.read_values(id=[1, 2], session=test_session).save(source_ds)
|
|
114
|
+
dc.read_dataset(
|
|
115
|
+
source_ds,
|
|
116
|
+
session=test_session,
|
|
117
|
+
delta=True,
|
|
118
|
+
delta_on="id",
|
|
119
|
+
).map(processed_id=record_processing).save(delta_ds)
|
|
120
|
+
|
|
121
|
+
assert _get_dependencies(catalog, delta_ds, "1.0.0") == [(source_ds, "1.0.0")]
|
|
122
|
+
assert set(
|
|
123
|
+
dc.read_dataset(delta_ds, version="1.0.0", session=test_session).to_values("id")
|
|
124
|
+
) == {1, 2}
|
|
125
|
+
assert sorted(process_log[:2]) == [1, 2]
|
|
126
|
+
|
|
127
|
+
dc.read_values(id=[1, 2, 10, 20, 30], session=test_session).save(source_ds)
|
|
128
|
+
|
|
129
|
+
# Drop the previous version so it is clear the dependency targets 1.0.1
|
|
130
|
+
dc.delete_dataset(source_ds, version="1.0.0", session=test_session)
|
|
131
|
+
|
|
132
|
+
with pytest.raises(DatasetNotFoundError):
|
|
133
|
+
dc.read_dataset(source_ds, session=test_session, version="1.0.0")
|
|
134
|
+
|
|
135
|
+
deps_after_removal = catalog.get_dataset_dependencies(
|
|
136
|
+
delta_ds,
|
|
137
|
+
"1.0.0",
|
|
138
|
+
namespace_name=catalog.metastore.default_project.namespace.name,
|
|
139
|
+
project_name=catalog.metastore.default_project.name,
|
|
140
|
+
indirect=False,
|
|
141
|
+
)
|
|
142
|
+
assert deps_after_removal == [None]
|
|
143
|
+
|
|
144
|
+
dc.read_dataset(
|
|
145
|
+
source_ds,
|
|
146
|
+
session=test_session,
|
|
147
|
+
delta=True,
|
|
148
|
+
delta_on="id",
|
|
149
|
+
).map(processed_id=record_processing).save(delta_ds)
|
|
150
|
+
|
|
151
|
+
# Delta logic should fall back to rebuilding from scratch with the new dependency
|
|
152
|
+
assert _get_dependencies(catalog, delta_ds, "1.0.1") == [(source_ds, "1.0.1")]
|
|
153
|
+
assert set(
|
|
154
|
+
dc.read_dataset(delta_ds, version="1.0.1", session=test_session).to_values("id")
|
|
155
|
+
) == {1, 2, 10, 20, 30}
|
|
156
|
+
# Previous version remains intact and still reflects the original source dataset
|
|
157
|
+
assert set(
|
|
158
|
+
dc.read_dataset(delta_ds, version="1.0.0", session=test_session).to_values("id")
|
|
159
|
+
) == {1, 2}
|
|
160
|
+
# Fallback rebuilds the dataset, so ids 1 and 2 appear twice across both runs.
|
|
161
|
+
assert sorted(process_log[:2]) == [1, 2]
|
|
162
|
+
assert sorted(process_log[2:]) == [1, 2, 10, 20, 30]
|
|
163
|
+
|
|
164
|
+
|
|
101
165
|
def test_delta_returns_correct_dataset_on_no_changes(test_session):
|
|
102
166
|
catalog = test_session.catalog
|
|
103
167
|
|
|
@@ -250,17 +314,66 @@ def test_delta_replay_regenerates_system_columns(test_session):
|
|
|
250
314
|
|
|
251
315
|
build_chain(delta=False).save(result_name)
|
|
252
316
|
|
|
253
|
-
build_chain(delta=True).save(
|
|
254
|
-
result_name,
|
|
255
|
-
delta=True,
|
|
256
|
-
delta_on="measurement_id",
|
|
257
|
-
)
|
|
317
|
+
build_chain(delta=True).save(result_name)
|
|
258
318
|
|
|
259
319
|
assert set(
|
|
260
320
|
dc.read_dataset(result_name, session=test_session).to_values("measurement_id")
|
|
261
321
|
) == {1, 2}
|
|
262
322
|
|
|
263
323
|
|
|
324
|
+
def test_storage_delta_replay_regenerates_system_columns(test_session, tmp_dir):
|
|
325
|
+
data_dir = tmp_dir / f"regen_storage_{uuid.uuid4().hex[:8]}"
|
|
326
|
+
data_dir.mkdir()
|
|
327
|
+
storage_uri = data_dir.as_uri()
|
|
328
|
+
result_name = f"regen_storage_result_{uuid.uuid4().hex[:8]}"
|
|
329
|
+
|
|
330
|
+
def write_payload(index: int) -> None:
|
|
331
|
+
(data_dir / f"item{index}.txt").write_text(f"payload-{index}")
|
|
332
|
+
|
|
333
|
+
write_payload(1)
|
|
334
|
+
write_payload(2)
|
|
335
|
+
|
|
336
|
+
def build_chain(delta: bool):
|
|
337
|
+
read_kwargs = {"session": test_session, "update": True}
|
|
338
|
+
if delta:
|
|
339
|
+
read_kwargs |= {
|
|
340
|
+
"delta": True,
|
|
341
|
+
"delta_on": ["file.path"],
|
|
342
|
+
"delta_result_on": ["file.path"],
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
def get_measurement_id(file: File) -> int:
|
|
346
|
+
match = re.search(r"item(\d+)\.txt$", file.path)
|
|
347
|
+
assert match
|
|
348
|
+
return int(match.group(1))
|
|
349
|
+
|
|
350
|
+
def get_num(file: File) -> int:
|
|
351
|
+
return get_measurement_id(file)
|
|
352
|
+
|
|
353
|
+
chain = dc.read_storage(storage_uri, **read_kwargs)
|
|
354
|
+
return (
|
|
355
|
+
chain.mutate(num=1)
|
|
356
|
+
.select_except("num")
|
|
357
|
+
.map(measurement_id=get_measurement_id)
|
|
358
|
+
.map(err=lambda file: "")
|
|
359
|
+
.map(num=get_num)
|
|
360
|
+
.filter(C.err == "")
|
|
361
|
+
.select_except("err")
|
|
362
|
+
.map(double=lambda num: num * 2, output=int)
|
|
363
|
+
.select_except("num")
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
build_chain(delta=False).save(result_name)
|
|
367
|
+
|
|
368
|
+
write_payload(3)
|
|
369
|
+
|
|
370
|
+
build_chain(delta=True).save(result_name)
|
|
371
|
+
|
|
372
|
+
assert set(
|
|
373
|
+
dc.read_dataset(result_name, session=test_session).to_values("measurement_id")
|
|
374
|
+
) == {1, 2, 3}
|
|
375
|
+
|
|
376
|
+
|
|
264
377
|
def test_delta_update_from_storage(test_session, tmp_dir, tmp_path):
|
|
265
378
|
ds_name = "delta_ds"
|
|
266
379
|
path = tmp_dir.as_uri()
|
|
@@ -35,7 +35,7 @@ def test_checkpoints(
|
|
|
35
35
|
catalog = test_session.catalog
|
|
36
36
|
metastore = catalog.metastore
|
|
37
37
|
|
|
38
|
-
monkeypatch.setenv("DATACHAIN_CHECKPOINTS_RESET", reset_checkpoints)
|
|
38
|
+
monkeypatch.setenv("DATACHAIN_CHECKPOINTS_RESET", str(reset_checkpoints))
|
|
39
39
|
|
|
40
40
|
if with_delta:
|
|
41
41
|
chain = dc.read_dataset(
|
|
@@ -75,8 +75,9 @@ def test_checkpoints(
|
|
|
75
75
|
chain.save("nums3")
|
|
76
76
|
second_job_id = test_session.get_or_create_job().id
|
|
77
77
|
|
|
78
|
-
|
|
79
|
-
assert len(catalog.get_dataset("
|
|
78
|
+
expected_versions = 1 if with_delta or not reset_checkpoints else 2
|
|
79
|
+
assert len(catalog.get_dataset("nums1").versions) == expected_versions
|
|
80
|
+
assert len(catalog.get_dataset("nums2").versions) == expected_versions
|
|
80
81
|
assert len(catalog.get_dataset("nums3").versions) == 1
|
|
81
82
|
|
|
82
83
|
assert len(list(catalog.metastore.list_checkpoints(first_job_id))) == 2
|
|
@@ -88,7 +89,7 @@ def test_checkpoints_modified_chains(
|
|
|
88
89
|
test_session, monkeypatch, nums_dataset, reset_checkpoints
|
|
89
90
|
):
|
|
90
91
|
catalog = test_session.catalog
|
|
91
|
-
monkeypatch.setenv("DATACHAIN_CHECKPOINTS_RESET", reset_checkpoints)
|
|
92
|
+
monkeypatch.setenv("DATACHAIN_CHECKPOINTS_RESET", str(reset_checkpoints))
|
|
92
93
|
|
|
93
94
|
chain = dc.read_dataset("nums", session=test_session)
|
|
94
95
|
|
|
@@ -120,7 +121,7 @@ def test_checkpoints_multiple_runs(
|
|
|
120
121
|
):
|
|
121
122
|
catalog = test_session.catalog
|
|
122
123
|
|
|
123
|
-
monkeypatch.setenv("DATACHAIN_CHECKPOINTS_RESET", reset_checkpoints)
|
|
124
|
+
monkeypatch.setenv("DATACHAIN_CHECKPOINTS_RESET", str(reset_checkpoints))
|
|
124
125
|
|
|
125
126
|
chain = dc.read_dataset("nums", session=test_session)
|
|
126
127
|
|
|
@@ -184,7 +185,7 @@ def test_checkpoints_check_valid_chain_is_returned(
|
|
|
184
185
|
monkeypatch,
|
|
185
186
|
nums_dataset,
|
|
186
187
|
):
|
|
187
|
-
monkeypatch.setenv("DATACHAIN_CHECKPOINTS_RESET", False)
|
|
188
|
+
monkeypatch.setenv("DATACHAIN_CHECKPOINTS_RESET", str(False))
|
|
188
189
|
chain = dc.read_dataset("nums", session=test_session)
|
|
189
190
|
|
|
190
191
|
# -------------- FIRST RUN -------------------
|
|
@@ -197,6 +198,7 @@ def test_checkpoints_check_valid_chain_is_returned(
|
|
|
197
198
|
|
|
198
199
|
# checking that we return expected DataChain even though we skipped chain creation
|
|
199
200
|
# because of the checkpoints
|
|
201
|
+
assert ds.dataset is not None
|
|
200
202
|
assert ds.dataset.name == "nums1"
|
|
201
203
|
assert len(ds.dataset.versions) == 1
|
|
202
204
|
assert ds.order_by("num").to_list("num") == [(1,), (2,), (3,)]
|
|
@@ -140,7 +140,7 @@ def test_merge_similar_objects(test_session):
|
|
|
140
140
|
rname = "qq"
|
|
141
141
|
ch = ch1.merge(ch2, "emp.person.name", rname=rname)
|
|
142
142
|
|
|
143
|
-
assert list(ch.signals_schema.values.keys()) == ["
|
|
143
|
+
assert list(ch.signals_schema.values.keys()) == ["emp", rname + "emp"]
|
|
144
144
|
|
|
145
145
|
empl = list(ch.to_list())
|
|
146
146
|
assert len(empl) == 4
|
|
@@ -175,7 +175,7 @@ def test_merge_similar_objects_in_memory():
|
|
|
175
175
|
assert ch.session.catalog.metastore.db.db_file == ":memory:"
|
|
176
176
|
assert ch.session.catalog.warehouse.db.db_file == ":memory:"
|
|
177
177
|
|
|
178
|
-
assert list(ch.signals_schema.values.keys()) == ["
|
|
178
|
+
assert list(ch.signals_schema.values.keys()) == ["emp", rname + "emp"]
|
|
179
179
|
|
|
180
180
|
empl = list(ch.to_list())
|
|
181
181
|
assert len(empl) == 4
|
|
@@ -198,7 +198,6 @@ def test_merge_values(test_session):
|
|
|
198
198
|
ch = ch1.merge(ch2, "id")
|
|
199
199
|
|
|
200
200
|
assert list(ch.signals_schema.values.keys()) == [
|
|
201
|
-
"sys",
|
|
202
201
|
"id",
|
|
203
202
|
"descr",
|
|
204
203
|
"right_id",
|
|
@@ -339,3 +338,42 @@ def test_merge_on_expression(test_session):
|
|
|
339
338
|
count += 1
|
|
340
339
|
|
|
341
340
|
assert count == len(team) * len(team)
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def test_merge_with_drops_sys_columns(test_session):
|
|
344
|
+
left = dc.read_values(id=[1, 1], lval=[10, 20], session=test_session)
|
|
345
|
+
right = dc.read_values(id=[1, 1], rval=["a", "b"], session=test_session)
|
|
346
|
+
|
|
347
|
+
merged = left.merge(right, on="id")
|
|
348
|
+
|
|
349
|
+
assert "sys" not in merged.signals_schema.values
|
|
350
|
+
|
|
351
|
+
cols = merged.settings(sys=True).to_pandas(flatten=True).columns
|
|
352
|
+
assert all(not str(col).startswith("sys") for col in cols)
|
|
353
|
+
|
|
354
|
+
ds_name = "merge_left_dups_sys_check_sys"
|
|
355
|
+
merged.save(ds_name)
|
|
356
|
+
|
|
357
|
+
df_with_sys = (
|
|
358
|
+
dc.read_dataset(ds_name, session=test_session)
|
|
359
|
+
.settings(sys=True)
|
|
360
|
+
.to_pandas(flatten=True)
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
sys_cols = [c for c in df_with_sys.columns if str(c).startswith("sys")]
|
|
364
|
+
assert sys_cols
|
|
365
|
+
|
|
366
|
+
def _col(name: str) -> str:
|
|
367
|
+
for col in df_with_sys.columns:
|
|
368
|
+
if str(col) == f"sys.{name}":
|
|
369
|
+
return str(col)
|
|
370
|
+
raise AssertionError(f"Missing sys column for {name}")
|
|
371
|
+
|
|
372
|
+
sys_id_col = _col("id")
|
|
373
|
+
sys_rand_col = _col("rand")
|
|
374
|
+
|
|
375
|
+
sys_ids = list(df_with_sys[sys_id_col])
|
|
376
|
+
assert len(sys_ids) == len(set(sys_ids))
|
|
377
|
+
|
|
378
|
+
sys_rand = list(df_with_sys[sys_rand_col])
|
|
379
|
+
assert len(sys_rand) == len(set(sys_rand))
|
|
@@ -256,6 +256,24 @@ def test_diff_on_equal_datasets(test_session, on_self):
|
|
|
256
256
|
assert diff.order_by("id").to_list(*collect_fields) == expected
|
|
257
257
|
|
|
258
258
|
|
|
259
|
+
def test_diff_only_on_columns_treated_as_same(test_session):
|
|
260
|
+
ds1 = dc.read_values(
|
|
261
|
+
id=[1, 2],
|
|
262
|
+
session=test_session,
|
|
263
|
+
)
|
|
264
|
+
ds2 = dc.read_values(
|
|
265
|
+
id=[1, 2],
|
|
266
|
+
session=test_session,
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
diff = ds1.diff(ds2, on=["id"], same=True, status_col="diff")
|
|
270
|
+
|
|
271
|
+
assert diff.order_by("id").to_list("diff", "id") == [
|
|
272
|
+
(CompareStatus.SAME, 1),
|
|
273
|
+
(CompareStatus.SAME, 2),
|
|
274
|
+
]
|
|
275
|
+
|
|
276
|
+
|
|
259
277
|
def test_diff_multiple_columns(test_session, str_default):
|
|
260
278
|
ds1 = dc.read_values(
|
|
261
279
|
id=[1, 2, 4],
|
|
@@ -382,7 +400,7 @@ def test_diff_missing_on(test_session):
|
|
|
382
400
|
ds2 = dc.read_values(id=[1, 2, 4], session=test_session)
|
|
383
401
|
|
|
384
402
|
with pytest.raises(ValueError) as exc_info:
|
|
385
|
-
ds1.diff(ds2, on=None)
|
|
403
|
+
ds1.diff(ds2, on=None) # type: ignore[arg-type]
|
|
386
404
|
|
|
387
405
|
assert str(exc_info.value) == "'on' must be specified"
|
|
388
406
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|