datachain 0.13.0__tar.gz → 0.13.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.13.0 → datachain-0.13.1}/PKG-INFO +3 -2
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/catalog/catalog.py +13 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/diff/__init__.py +8 -5
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain.egg-info/PKG-INFO +3 -2
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_datachain.py +10 -1
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_diff.py +89 -56
- {datachain-0.13.0 → datachain-0.13.1}/.cruft.json +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/.gitattributes +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/.github/codecov.yaml +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/.github/dependabot.yml +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/.github/workflows/release.yml +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/.github/workflows/tests.yml +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/.gitignore +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/.pre-commit-config.yaml +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/LICENSE +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/README.rst +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/assets/datachain.svg +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/contributing.md +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/examples.md +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/index.md +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/overrides/main.html +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/quick-start.md +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/references/data-types/file.md +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/references/data-types/index.md +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/references/data-types/pose.md +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/references/data-types/segment.md +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/references/datachain.md +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/references/func.md +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/references/index.md +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/references/remotes.md +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/references/toolkit.md +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/references/torch.md +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/references/udf.md +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/docs/tutorials.md +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/examples/multimodal/wds.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/mkdocs.yml +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/noxfile.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/pyproject.toml +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/setup.cfg +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/__main__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/asyn.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cache.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/cli/utils.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/client/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/client/azure.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/client/gcs.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/client/hf.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/client/local.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/client/s3.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/config.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/dataset.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/error.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/fs/reference.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/fs/utils.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/func/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/func/array.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/func/base.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/func/conditional.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/func/func.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/func/numeric.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/func/path.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/func/random.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/func/string.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/func/window.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/job.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/clip.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/dc.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/file.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/hf.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/image.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/listing.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/settings.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/tar.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/text.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/udf.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/utils.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/video.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/listing.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/model/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/model/bbox.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/model/pose.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/model/segment.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/model/utils.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/node.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/progress.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/py.typed +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/query/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/query/batch.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/query/dataset.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/query/metrics.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/query/params.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/query/queue.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/query/schema.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/query/session.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/query/udf.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/query/utils.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/remote/studio.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/script_meta.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/types.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/sql/utils.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/studio.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/telemetry.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain/utils.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain.egg-info/SOURCES.txt +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/conftest.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/data.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/examples/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/examples/test_examples.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/examples/wds_data.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/data/lena.jpg +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/model/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_catalog.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_client.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_data_storage.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_datasets.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_file.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_hf.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_image.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_listing.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_ls.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_metrics.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_pull.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_pytorch.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_query.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_session.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_toolkit.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_video.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/func/test_warehouse.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/scripts/feature_class.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/test_atomicity.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/test_cli_e2e.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/test_cli_studio.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/test_import_time.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/test_query_e2e.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/test_telemetry.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_datachain.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/model/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_asyn.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_cache.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_catalog.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_client.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_config.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_dataset.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_func.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_listing.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_metastore.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_query.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_query_params.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_serializer.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_session.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_utils.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.13.0 → datachain-0.13.1}/tests/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.1
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -107,6 +107,7 @@ Requires-Dist: accelerate; extra == "examples"
|
|
|
107
107
|
Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
|
|
108
108
|
Requires-Dist: ultralytics; extra == "examples"
|
|
109
109
|
Requires-Dist: open_clip_torch; extra == "examples"
|
|
110
|
+
Dynamic: license-file
|
|
110
111
|
|
|
111
112
|
================
|
|
112
113
|
|logo| DataChain
|
|
@@ -795,6 +795,19 @@ class Catalog:
|
|
|
795
795
|
try:
|
|
796
796
|
dataset = self.get_dataset(name)
|
|
797
797
|
default_version = dataset.next_version
|
|
798
|
+
|
|
799
|
+
if (description or labels) and (
|
|
800
|
+
dataset.description != description or dataset.labels != labels
|
|
801
|
+
):
|
|
802
|
+
description = description or dataset.description
|
|
803
|
+
labels = labels or dataset.labels
|
|
804
|
+
|
|
805
|
+
self.update_dataset(
|
|
806
|
+
dataset,
|
|
807
|
+
description=description,
|
|
808
|
+
labels=labels,
|
|
809
|
+
)
|
|
810
|
+
|
|
798
811
|
except DatasetNotFoundError:
|
|
799
812
|
schema = {
|
|
800
813
|
c.name: c.type.to_dict() for c in columns if isinstance(c.type, SQLType)
|
|
@@ -74,6 +74,7 @@ def _compare( # noqa: C901
|
|
|
74
74
|
# all left and right columns
|
|
75
75
|
cols = left.signals_schema.clone_without_sys_signals().db_signals()
|
|
76
76
|
right_cols = right.signals_schema.clone_without_sys_signals().db_signals()
|
|
77
|
+
cols_select = list(left.signals_schema.clone_without_sys_signals().values.keys())
|
|
77
78
|
|
|
78
79
|
# getting correct on and right_on column names
|
|
79
80
|
on = left.signals_schema.resolve(*on).db_signals() # type: ignore[assignment]
|
|
@@ -131,10 +132,12 @@ def _compare( # noqa: C901
|
|
|
131
132
|
# when the row is deleted, we need to take column values from the right chain
|
|
132
133
|
.mutate(
|
|
133
134
|
**{
|
|
134
|
-
f"{
|
|
135
|
-
C(diff_col) == CompareStatus.DELETED,
|
|
135
|
+
f"{l_on}": ifelse(
|
|
136
|
+
C(diff_col) == CompareStatus.DELETED,
|
|
137
|
+
C(f"{rname + l_on if on == right_on else r_on}"),
|
|
138
|
+
C(l_on),
|
|
136
139
|
)
|
|
137
|
-
for
|
|
140
|
+
for l_on, r_on in zip(on, right_on) # type: ignore[arg-type]
|
|
138
141
|
}
|
|
139
142
|
)
|
|
140
143
|
.select_except(ldiff_col, rdiff_col)
|
|
@@ -150,9 +153,9 @@ def _compare( # noqa: C901
|
|
|
150
153
|
dc_diff = dc_diff.filter(C(diff_col) != CompareStatus.DELETED)
|
|
151
154
|
|
|
152
155
|
if status_col:
|
|
153
|
-
|
|
156
|
+
cols_select.append(diff_col)
|
|
154
157
|
|
|
155
|
-
dc_diff = dc_diff.select(*
|
|
158
|
+
dc_diff = dc_diff.select(*cols_select)
|
|
156
159
|
|
|
157
160
|
# final schema is schema from the left chain with status column added if needed
|
|
158
161
|
dc_diff.signals_schema = (
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.1
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -107,6 +107,7 @@ Requires-Dist: accelerate; extra == "examples"
|
|
|
107
107
|
Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
|
|
108
108
|
Requires-Dist: ultralytics; extra == "examples"
|
|
109
109
|
Requires-Dist: open_clip_torch; extra == "examples"
|
|
110
|
+
Dynamic: license-file
|
|
110
111
|
|
|
111
112
|
================
|
|
112
113
|
|logo| DataChain
|
|
@@ -461,6 +461,16 @@ def test_save(test_session):
|
|
|
461
461
|
assert ds.description == "new description"
|
|
462
462
|
assert ds.labels == ["new_label", "old_label"]
|
|
463
463
|
|
|
464
|
+
chain.save(
|
|
465
|
+
name="new_name",
|
|
466
|
+
description="updated description",
|
|
467
|
+
labels=["new_label", "old_label", "new_label2"],
|
|
468
|
+
)
|
|
469
|
+
ds = test_session.catalog.get_dataset("new_name")
|
|
470
|
+
assert ds.name == "new_name"
|
|
471
|
+
assert ds.description == "updated description"
|
|
472
|
+
assert ds.labels == ["new_label", "old_label", "new_label2"]
|
|
473
|
+
|
|
464
474
|
|
|
465
475
|
def test_show_nested_empty(capsys, test_session):
|
|
466
476
|
files = [File(size=s, path=p) for p, s in zip(list("abcde"), range(5))]
|
|
@@ -1000,7 +1010,6 @@ def test_udf_distributed_interrupt(cloud_test_catalog_tmpfile, capfd, datachain_
|
|
|
1000
1010
|
with pytest.raises(RuntimeError, match=r"Worker Killed \(KeyboardInterrupt\)"):
|
|
1001
1011
|
dc.show()
|
|
1002
1012
|
captured = capfd.readouterr()
|
|
1003
|
-
assert "KeyboardInterrupt" in captured.err
|
|
1004
1013
|
assert "semaphore" not in captured.err
|
|
1005
1014
|
|
|
1006
1015
|
|
|
@@ -6,7 +6,7 @@ from pydantic import BaseModel
|
|
|
6
6
|
from datachain.diff import CompareStatus, compare_and_split
|
|
7
7
|
from datachain.lib.dc import DataChain
|
|
8
8
|
from datachain.lib.file import File
|
|
9
|
-
from datachain.sql.types import
|
|
9
|
+
from datachain.sql.types import Int, String
|
|
10
10
|
from tests.utils import sorted_dicts
|
|
11
11
|
|
|
12
12
|
|
|
@@ -14,11 +14,21 @@ def _as_utc(d):
|
|
|
14
14
|
return d.replace(tzinfo=timezone.utc)
|
|
15
15
|
|
|
16
16
|
|
|
17
|
+
@pytest.fixture
|
|
18
|
+
def str_default(test_session):
|
|
19
|
+
return String.default_value(test_session.catalog.warehouse.db.dialect)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@pytest.fixture
|
|
23
|
+
def int_default(test_session):
|
|
24
|
+
return Int.default_value(test_session.catalog.warehouse.db.dialect)
|
|
25
|
+
|
|
26
|
+
|
|
17
27
|
@pytest.mark.parametrize("added", (True, False))
|
|
18
28
|
@pytest.mark.parametrize("deleted", (True, False))
|
|
19
29
|
@pytest.mark.parametrize("modified", (True, False))
|
|
20
30
|
@pytest.mark.parametrize("same", (True, False))
|
|
21
|
-
def test_compare(test_session, added, deleted, modified, same):
|
|
31
|
+
def test_compare(test_session, str_default, added, deleted, modified, same):
|
|
22
32
|
ds1 = DataChain.from_values(
|
|
23
33
|
id=[1, 2, 4],
|
|
24
34
|
name=["John1", "Doe", "Andy"],
|
|
@@ -76,7 +86,7 @@ def test_compare(test_session, added, deleted, modified, same):
|
|
|
76
86
|
|
|
77
87
|
if deleted:
|
|
78
88
|
assert "diff" not in chains[CompareStatus.DELETED].signals_schema.db_signals()
|
|
79
|
-
expected.append((CompareStatus.DELETED, 3,
|
|
89
|
+
expected.append((CompareStatus.DELETED, 3, str_default))
|
|
80
90
|
|
|
81
91
|
if same:
|
|
82
92
|
assert "diff" not in chains[CompareStatus.SAME].signals_schema.db_signals()
|
|
@@ -85,7 +95,7 @@ def test_compare(test_session, added, deleted, modified, same):
|
|
|
85
95
|
assert list(diff.order_by("id").collect("diff", "id", "name")) == expected
|
|
86
96
|
|
|
87
97
|
|
|
88
|
-
def test_compare_no_status_col(test_session):
|
|
98
|
+
def test_compare_no_status_col(test_session, str_default):
|
|
89
99
|
ds1 = DataChain.from_values(
|
|
90
100
|
id=[1, 2, 4],
|
|
91
101
|
name=["John1", "Doe", "Andy"],
|
|
@@ -108,14 +118,14 @@ def test_compare_no_status_col(test_session):
|
|
|
108
118
|
expected = [
|
|
109
119
|
(1, "John1"),
|
|
110
120
|
(2, "Doe"),
|
|
111
|
-
(3,
|
|
121
|
+
(3, str_default),
|
|
112
122
|
(4, "Andy"),
|
|
113
123
|
]
|
|
114
124
|
|
|
115
125
|
assert list(diff.order_by("id").collect()) == expected
|
|
116
126
|
|
|
117
127
|
|
|
118
|
-
def test_compare_from_datasets(test_session):
|
|
128
|
+
def test_compare_from_datasets(test_session, str_default):
|
|
119
129
|
ds1 = DataChain.from_values(
|
|
120
130
|
id=[1, 2, 4],
|
|
121
131
|
name=["John1", "Doe", "Andy"],
|
|
@@ -137,13 +147,13 @@ def test_compare_from_datasets(test_session):
|
|
|
137
147
|
assert list(diff.order_by("id").collect("diff", "id", "name")) == [
|
|
138
148
|
(CompareStatus.MODIFIED, 1, "John1"),
|
|
139
149
|
(CompareStatus.ADDED, 2, "Doe"),
|
|
140
|
-
(CompareStatus.DELETED, 3,
|
|
150
|
+
(CompareStatus.DELETED, 3, str_default),
|
|
141
151
|
(CompareStatus.SAME, 4, "Andy"),
|
|
142
152
|
]
|
|
143
153
|
|
|
144
154
|
|
|
145
155
|
@pytest.mark.parametrize("right_name", ("other_name", "name"))
|
|
146
|
-
def test_compare_with_explicit_compare_fields(test_session, right_name):
|
|
156
|
+
def test_compare_with_explicit_compare_fields(test_session, str_default, right_name):
|
|
147
157
|
ds1 = DataChain.from_values(
|
|
148
158
|
id=[1, 2, 4],
|
|
149
159
|
name=["John1", "Doe", "Andy"],
|
|
@@ -169,17 +179,10 @@ def test_compare_with_explicit_compare_fields(test_session, right_name):
|
|
|
169
179
|
status_col="diff",
|
|
170
180
|
)
|
|
171
181
|
|
|
172
|
-
string_default = String.default_value(test_session.catalog.warehouse.db.dialect)
|
|
173
|
-
|
|
174
182
|
expected = [
|
|
175
183
|
(CompareStatus.MODIFIED, 1, "John1", "New York"),
|
|
176
184
|
(CompareStatus.ADDED, 2, "Doe", "Boston"),
|
|
177
|
-
(
|
|
178
|
-
CompareStatus.DELETED,
|
|
179
|
-
3,
|
|
180
|
-
string_default if right_name == "other_name" else "Mark",
|
|
181
|
-
"Seattle",
|
|
182
|
-
),
|
|
185
|
+
(CompareStatus.DELETED, 3, str_default, str_default),
|
|
183
186
|
(CompareStatus.SAME, 4, "Andy", "San Francisco"),
|
|
184
187
|
]
|
|
185
188
|
|
|
@@ -187,7 +190,7 @@ def test_compare_with_explicit_compare_fields(test_session, right_name):
|
|
|
187
190
|
assert list(diff.order_by("id").collect(*collect_fields)) == expected
|
|
188
191
|
|
|
189
192
|
|
|
190
|
-
def test_compare_different_left_right_on_columns(test_session):
|
|
193
|
+
def test_compare_different_left_right_on_columns(test_session, str_default):
|
|
191
194
|
ds1 = DataChain.from_values(
|
|
192
195
|
id=[1, 2, 4],
|
|
193
196
|
name=["John1", "Doe", "Andy"],
|
|
@@ -208,17 +211,15 @@ def test_compare_different_left_right_on_columns(test_session):
|
|
|
208
211
|
status_col="diff",
|
|
209
212
|
)
|
|
210
213
|
|
|
211
|
-
int_default = Int64.default_value(test_session.catalog.warehouse.db.dialect)
|
|
212
|
-
|
|
213
214
|
expected = [
|
|
214
|
-
(CompareStatus.SAME, 4, "Andy"),
|
|
215
|
-
(CompareStatus.ADDED, 2, "Doe"),
|
|
216
215
|
(CompareStatus.MODIFIED, 1, "John1"),
|
|
217
|
-
(CompareStatus.
|
|
216
|
+
(CompareStatus.ADDED, 2, "Doe"),
|
|
217
|
+
(CompareStatus.DELETED, 3, str_default),
|
|
218
|
+
(CompareStatus.SAME, 4, "Andy"),
|
|
218
219
|
]
|
|
219
220
|
|
|
220
221
|
collect_fields = ["diff", "id", "name"]
|
|
221
|
-
assert list(diff.order_by("
|
|
222
|
+
assert list(diff.order_by("id").collect(*collect_fields)) == expected
|
|
222
223
|
|
|
223
224
|
|
|
224
225
|
@pytest.mark.parametrize("on_self", (True, False))
|
|
@@ -255,7 +256,7 @@ def test_compare_on_equal_datasets(test_session, on_self):
|
|
|
255
256
|
assert list(diff.order_by("id").collect(*collect_fields)) == expected
|
|
256
257
|
|
|
257
258
|
|
|
258
|
-
def test_compare_multiple_columns(test_session):
|
|
259
|
+
def test_compare_multiple_columns(test_session, str_default):
|
|
259
260
|
ds1 = DataChain.from_values(
|
|
260
261
|
id=[1, 2, 4],
|
|
261
262
|
name=["John", "Doe", "Andy"],
|
|
@@ -275,14 +276,19 @@ def test_compare_multiple_columns(test_session):
|
|
|
275
276
|
[
|
|
276
277
|
{"diff": CompareStatus.MODIFIED, "id": 1, "name": "John", "city": "London"},
|
|
277
278
|
{"diff": CompareStatus.ADDED, "id": 2, "name": "Doe", "city": "New York"},
|
|
278
|
-
{
|
|
279
|
+
{
|
|
280
|
+
"diff": CompareStatus.DELETED,
|
|
281
|
+
"id": 3,
|
|
282
|
+
"name": str_default,
|
|
283
|
+
"city": str_default,
|
|
284
|
+
},
|
|
279
285
|
{"diff": CompareStatus.SAME, "id": 4, "name": "Andy", "city": "Tokyo"},
|
|
280
286
|
],
|
|
281
287
|
"id",
|
|
282
288
|
)
|
|
283
289
|
|
|
284
290
|
|
|
285
|
-
def test_compare_multiple_match_columns(test_session):
|
|
291
|
+
def test_compare_multiple_match_columns(test_session, str_default):
|
|
286
292
|
ds1 = DataChain.from_values(
|
|
287
293
|
id=[1, 2, 4],
|
|
288
294
|
name=["John", "Doe", "Andy"],
|
|
@@ -302,14 +308,19 @@ def test_compare_multiple_match_columns(test_session):
|
|
|
302
308
|
[
|
|
303
309
|
{"diff": CompareStatus.MODIFIED, "id": 1, "name": "John", "city": "London"},
|
|
304
310
|
{"diff": CompareStatus.ADDED, "id": 2, "name": "Doe", "city": "New York"},
|
|
305
|
-
{
|
|
311
|
+
{
|
|
312
|
+
"diff": CompareStatus.DELETED,
|
|
313
|
+
"id": 3,
|
|
314
|
+
"name": "John",
|
|
315
|
+
"city": str_default,
|
|
316
|
+
},
|
|
306
317
|
{"diff": CompareStatus.SAME, "id": 4, "name": "Andy", "city": "Tokyo"},
|
|
307
318
|
],
|
|
308
319
|
"id",
|
|
309
320
|
)
|
|
310
321
|
|
|
311
322
|
|
|
312
|
-
def test_compare_additional_column_on_left(test_session):
|
|
323
|
+
def test_compare_additional_column_on_left(test_session, str_default):
|
|
313
324
|
ds1 = DataChain.from_values(
|
|
314
325
|
id=[1, 2, 4],
|
|
315
326
|
name=["John", "Doe", "Andy"],
|
|
@@ -322,8 +333,6 @@ def test_compare_additional_column_on_left(test_session):
|
|
|
322
333
|
session=test_session,
|
|
323
334
|
).save("ds2")
|
|
324
335
|
|
|
325
|
-
string_default = String.default_value(test_session.catalog.warehouse.db.dialect)
|
|
326
|
-
|
|
327
336
|
diff = ds1.compare(ds2, same=True, on=["id"], status_col="diff")
|
|
328
337
|
|
|
329
338
|
assert sorted_dicts(diff.to_records(), "id") == sorted_dicts(
|
|
@@ -333,8 +342,8 @@ def test_compare_additional_column_on_left(test_session):
|
|
|
333
342
|
{
|
|
334
343
|
"diff": CompareStatus.DELETED,
|
|
335
344
|
"id": 3,
|
|
336
|
-
"name":
|
|
337
|
-
"city":
|
|
345
|
+
"name": str_default,
|
|
346
|
+
"city": str_default,
|
|
338
347
|
},
|
|
339
348
|
{"diff": CompareStatus.MODIFIED, "id": 4, "name": "Andy", "city": "Tokyo"},
|
|
340
349
|
],
|
|
@@ -342,7 +351,7 @@ def test_compare_additional_column_on_left(test_session):
|
|
|
342
351
|
)
|
|
343
352
|
|
|
344
353
|
|
|
345
|
-
def test_compare_additional_column_on_right(test_session):
|
|
354
|
+
def test_compare_additional_column_on_right(test_session, str_default):
|
|
346
355
|
ds1 = DataChain.from_values(
|
|
347
356
|
id=[1, 2, 4],
|
|
348
357
|
name=["John", "Doe", "Andy"],
|
|
@@ -361,7 +370,7 @@ def test_compare_additional_column_on_right(test_session):
|
|
|
361
370
|
[
|
|
362
371
|
{"diff": CompareStatus.MODIFIED, "id": 1, "name": "John"},
|
|
363
372
|
{"diff": CompareStatus.ADDED, "id": 2, "name": "Doe"},
|
|
364
|
-
{"diff": CompareStatus.DELETED, "id": 3, "name":
|
|
373
|
+
{"diff": CompareStatus.DELETED, "id": 3, "name": str_default},
|
|
365
374
|
{"diff": CompareStatus.MODIFIED, "id": 4, "name": "Andy"},
|
|
366
375
|
],
|
|
367
376
|
"id",
|
|
@@ -413,7 +422,8 @@ def test_compare_right_compare_wrong_length(test_session):
|
|
|
413
422
|
|
|
414
423
|
|
|
415
424
|
@pytest.mark.parametrize("status_col", ("diff", None))
|
|
416
|
-
|
|
425
|
+
@pytest.mark.parametrize("right_on", ("file2", None))
|
|
426
|
+
def test_diff(test_session, str_default, int_default, status_col, right_on):
|
|
417
427
|
fs1 = File(source="s1", path="p1", version="2", etag="e2")
|
|
418
428
|
fs1_updated = File(source="s1", path="p1", version="1", etag="e1")
|
|
419
429
|
fs2 = File(source="s2", path="p2", version="1", etag="e1")
|
|
@@ -421,43 +431,53 @@ def test_diff(test_session, status_col):
|
|
|
421
431
|
fs4 = File(source="s4", path="p4", version="1", etag="e1")
|
|
422
432
|
|
|
423
433
|
ds1 = DataChain.from_values(
|
|
424
|
-
|
|
425
|
-
)
|
|
426
|
-
ds2 = DataChain.from_values(
|
|
427
|
-
file=[fs1, fs3, fs4], score=[1, 3, 4], session=test_session
|
|
434
|
+
file1=[fs1_updated, fs2, fs4], score=[1, 2, 4], session=test_session
|
|
428
435
|
)
|
|
429
436
|
|
|
437
|
+
if right_on:
|
|
438
|
+
ds2 = DataChain.from_values(
|
|
439
|
+
file2=[fs1, fs3, fs4], score=[1, 3, 4], session=test_session
|
|
440
|
+
)
|
|
441
|
+
else:
|
|
442
|
+
ds2 = DataChain.from_values(
|
|
443
|
+
file1=[fs1, fs3, fs4], score=[1, 3, 4], session=test_session
|
|
444
|
+
)
|
|
445
|
+
|
|
430
446
|
diff = ds1.diff(
|
|
431
447
|
ds2,
|
|
432
448
|
added=True,
|
|
433
449
|
deleted=True,
|
|
434
450
|
modified=True,
|
|
435
451
|
same=True,
|
|
436
|
-
on="
|
|
452
|
+
on="file1",
|
|
453
|
+
right_on=right_on,
|
|
437
454
|
status_col=status_col,
|
|
438
455
|
)
|
|
439
456
|
|
|
440
457
|
expected = [
|
|
441
|
-
(CompareStatus.MODIFIED,
|
|
442
|
-
(CompareStatus.ADDED,
|
|
443
|
-
(CompareStatus.DELETED,
|
|
444
|
-
(CompareStatus.SAME,
|
|
458
|
+
(CompareStatus.MODIFIED, "s1", "p1", "1", "e1", 1),
|
|
459
|
+
(CompareStatus.ADDED, "s2", "p2", "1", "e1", 2),
|
|
460
|
+
(CompareStatus.DELETED, "s3", "p3", str_default, str_default, int_default),
|
|
461
|
+
(CompareStatus.SAME, "s4", "p4", "1", "e1", 4),
|
|
445
462
|
]
|
|
446
463
|
|
|
447
|
-
collect_fields = [
|
|
464
|
+
collect_fields = [
|
|
465
|
+
"diff",
|
|
466
|
+
"file1.source",
|
|
467
|
+
"file1.path",
|
|
468
|
+
"file1.version",
|
|
469
|
+
"file1.etag",
|
|
470
|
+
"score",
|
|
471
|
+
]
|
|
448
472
|
if not status_col:
|
|
449
473
|
expected = [row[1:] for row in expected]
|
|
450
474
|
collect_fields = collect_fields[1:]
|
|
451
475
|
|
|
452
|
-
|
|
453
|
-
for r in res:
|
|
454
|
-
r[-2].last_modified = _as_utc(r[-2].last_modified)
|
|
455
|
-
|
|
456
|
-
assert res == expected
|
|
476
|
+
assert list(diff.order_by("file1.source").collect(*collect_fields)) == expected
|
|
457
477
|
|
|
458
478
|
|
|
459
479
|
@pytest.mark.parametrize("status_col", ("diff", None))
|
|
460
|
-
def test_diff_nested(test_session, status_col):
|
|
480
|
+
def test_diff_nested(test_session, str_default, int_default, status_col):
|
|
461
481
|
class Nested(BaseModel):
|
|
462
482
|
file: File
|
|
463
483
|
|
|
@@ -491,12 +511,25 @@ def test_diff_nested(test_session, status_col):
|
|
|
491
511
|
(CompareStatus.SAME, fs4, 4),
|
|
492
512
|
]
|
|
493
513
|
|
|
494
|
-
|
|
514
|
+
expected = [
|
|
515
|
+
(CompareStatus.MODIFIED, "s1", "p1", "1", "e1", 1),
|
|
516
|
+
(CompareStatus.ADDED, "s2", "p2", "1", "e1", 2),
|
|
517
|
+
(CompareStatus.DELETED, "s3", "p3", str_default, str_default, int_default),
|
|
518
|
+
(CompareStatus.SAME, "s4", "p4", "1", "e1", 4),
|
|
519
|
+
]
|
|
520
|
+
|
|
521
|
+
collect_fields = [
|
|
522
|
+
"diff",
|
|
523
|
+
"nested.file.source",
|
|
524
|
+
"nested.file.path",
|
|
525
|
+
"nested.file.version",
|
|
526
|
+
"nested.file.etag",
|
|
527
|
+
"score",
|
|
528
|
+
]
|
|
495
529
|
if not status_col:
|
|
496
530
|
expected = [row[1:] for row in expected]
|
|
497
531
|
collect_fields = collect_fields[1:]
|
|
498
532
|
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
assert res == expected
|
|
533
|
+
assert (
|
|
534
|
+
list(diff.order_by("nested.file.source").collect(*collect_fields)) == expected
|
|
535
|
+
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|