datachain 0.23.0__tar.gz → 0.24.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.23.0 → datachain-0.24.0}/PKG-INFO +1 -1
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/catalog/catalog.py +25 -13
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/dataset.py +34 -5
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/dc/datasets.py +52 -35
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/dc/listings.py +2 -6
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/projects.py +1 -1
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/query/dataset.py +2 -8
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/remote/studio.py +4 -3
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain.egg-info/PKG-INFO +1 -1
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain.egg-info/SOURCES.txt +2 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/conftest.py +169 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_dataset_query.py +2 -4
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_delta.py +3 -3
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_ls.py +4 -2
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_pull.py +61 -199
- datachain-0.24.0/tests/func/test_read_dataset_remote.py +555 -0
- datachain-0.24.0/tests/func/test_read_dataset_version_specifiers.py +88 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_retry.py +2 -2
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/lib/test_datachain.py +10 -1
- {datachain-0.23.0 → datachain-0.24.0}/.cruft.json +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/.gitattributes +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/.github/codecov.yaml +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/.github/dependabot.yml +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/.github/workflows/release.yml +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/.github/workflows/tests.yml +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/.gitignore +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/.pre-commit-config.yaml +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/LICENSE +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/README.rst +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/assets/datachain.svg +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/commands/auth/login.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/commands/auth/logout.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/commands/auth/team.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/commands/auth/token.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/commands/index.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/commands/job/cancel.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/commands/job/clusters.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/commands/job/logs.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/commands/job/ls.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/commands/job/run.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/contributing.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/examples.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/guide/db_migrations.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/guide/delta.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/guide/env.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/guide/index.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/guide/namespaces.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/guide/processing.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/guide/remotes.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/guide/retry.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/index.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/overrides/main.html +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/quick-start.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/references/data-types/file.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/references/data-types/index.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/references/data-types/pose.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/references/data-types/segment.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/references/datachain.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/references/func.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/references/index.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/references/toolkit.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/references/torch.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/references/udf.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/docs/tutorials.md +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/examples/multimodal/wds.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/mkdocs.yml +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/noxfile.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/pyproject.toml +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/setup.cfg +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/__main__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/asyn.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/cache.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/cli/utils.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/client/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/client/azure.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/client/gcs.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/client/hf.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/client/local.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/client/s3.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/config.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/delta.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/error.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/fs/reference.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/fs/utils.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/func/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/func/array.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/func/base.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/func/conditional.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/func/func.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/func/numeric.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/func/path.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/func/random.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/func/string.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/func/window.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/job.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/clip.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/dc/datachain.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/file.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/hf.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/image.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/listing.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/namespaces.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/settings.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/tar.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/text.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/udf.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/utils.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/video.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/listing.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/model/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/model/bbox.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/model/pose.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/model/segment.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/model/utils.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/namespace.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/node.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/progress.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/project.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/py.typed +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/query/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/query/batch.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/query/metrics.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/query/params.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/query/queue.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/query/schema.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/query/session.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/query/udf.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/query/utils.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/script_meta.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/semver.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/sql/types.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/sql/utils.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/studio.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/telemetry.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain/utils.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/data.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/examples/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/examples/test_examples.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/examples/wds_data.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/data/lena.jpg +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/functions/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/functions/test_array.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/functions/test_path.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/functions/test_random.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/functions/test_string.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/model/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_batching.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_catalog.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_client.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_data_storage.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_datachain.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_datasets.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_file.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_hf.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_image.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_listing.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_metastore.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_metrics.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_pytorch.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_query.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_read_database.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_session.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_toolkit.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_video.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/func/test_warehouse.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/scripts/feature_class.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/test_atomicity.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/test_cli_e2e.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/test_cli_studio.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/test_import_time.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/test_query_e2e.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/test_telemetry.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/lib/test_namespace.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/model/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_asyn.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_cache.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_catalog.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_client.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_config.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_dataset.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_func.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_listing.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_metastore.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_query.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_query_params.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_semver.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_serializer.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_session.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_utils.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.23.0 → datachain-0.24.0}/tests/utils.py +0 -0
|
@@ -49,6 +49,7 @@ from datachain.error import (
|
|
|
49
49
|
DatasetInvalidVersionError,
|
|
50
50
|
DatasetNotFoundError,
|
|
51
51
|
DatasetVersionNotFoundError,
|
|
52
|
+
NamespaceNotFoundError,
|
|
52
53
|
ProjectNotFoundError,
|
|
53
54
|
QueryScriptCancelError,
|
|
54
55
|
QueryScriptRunError,
|
|
@@ -1107,21 +1108,26 @@ class Catalog:
|
|
|
1107
1108
|
namespace_name: str,
|
|
1108
1109
|
project_name: str,
|
|
1109
1110
|
version: Optional[str] = None,
|
|
1111
|
+
pull_dataset: bool = False,
|
|
1112
|
+
update: bool = False,
|
|
1110
1113
|
) -> DatasetRecord:
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1114
|
+
if self.metastore.is_local_dataset(namespace_name) or not update:
|
|
1115
|
+
try:
|
|
1116
|
+
project = self.metastore.get_project(project_name, namespace_name)
|
|
1117
|
+
ds = self.get_dataset(name, project)
|
|
1118
|
+
if not version or ds.has_version(version):
|
|
1119
|
+
return ds
|
|
1120
|
+
except (NamespaceNotFoundError, ProjectNotFoundError, DatasetNotFoundError):
|
|
1121
|
+
pass
|
|
1122
|
+
|
|
1123
|
+
if self.metastore.is_local_dataset(namespace_name):
|
|
1124
|
+
raise DatasetNotFoundError(
|
|
1125
|
+
f"Dataset {name}"
|
|
1126
|
+
+ (f" version {version} " if version else " ")
|
|
1127
|
+
+ "not found"
|
|
1128
|
+
)
|
|
1119
1129
|
|
|
1120
|
-
|
|
1121
|
-
ProjectNotFoundError,
|
|
1122
|
-
DatasetNotFoundError,
|
|
1123
|
-
DatasetVersionNotFoundError,
|
|
1124
|
-
):
|
|
1130
|
+
if pull_dataset:
|
|
1125
1131
|
print("Dataset not found in local catalog, trying to get from studio")
|
|
1126
1132
|
remote_ds_uri = create_dataset_uri(
|
|
1127
1133
|
name, namespace_name, project_name, version
|
|
@@ -1136,6 +1142,8 @@ class Catalog:
|
|
|
1136
1142
|
name, self.metastore.get_project(project_name, namespace_name)
|
|
1137
1143
|
)
|
|
1138
1144
|
|
|
1145
|
+
return self.get_remote_dataset(namespace_name, project_name, name)
|
|
1146
|
+
|
|
1139
1147
|
def get_dataset_with_version_uuid(self, uuid: str) -> DatasetRecord:
|
|
1140
1148
|
"""Returns dataset that contains version with specific uuid"""
|
|
1141
1149
|
for dataset in self.ls_datasets():
|
|
@@ -1152,6 +1160,10 @@ class Catalog:
|
|
|
1152
1160
|
|
|
1153
1161
|
info_response = studio_client.dataset_info(namespace, project, name)
|
|
1154
1162
|
if not info_response.ok:
|
|
1163
|
+
if info_response.status == 404:
|
|
1164
|
+
raise DatasetNotFoundError(
|
|
1165
|
+
f"Dataset {namespace}.{project}.{name} not found"
|
|
1166
|
+
)
|
|
1155
1167
|
raise DataChainError(info_response.message)
|
|
1156
1168
|
|
|
1157
1169
|
dataset_info = info_response.data
|
|
@@ -12,6 +12,9 @@ from typing import (
|
|
|
12
12
|
)
|
|
13
13
|
from urllib.parse import urlparse
|
|
14
14
|
|
|
15
|
+
from packaging.specifiers import SpecifierSet
|
|
16
|
+
from packaging.version import Version
|
|
17
|
+
|
|
15
18
|
from datachain import semver
|
|
16
19
|
from datachain.error import DatasetVersionNotFoundError, InvalidDatasetNameError
|
|
17
20
|
from datachain.namespace import Namespace
|
|
@@ -661,13 +664,39 @@ class DatasetRecord:
|
|
|
661
664
|
return None
|
|
662
665
|
return max(versions).version
|
|
663
666
|
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
667
|
+
def latest_compatible_version(self, version_spec: str) -> Optional[str]:
|
|
668
|
+
"""
|
|
669
|
+
Returns the latest version that matches the given version specifier.
|
|
670
|
+
|
|
671
|
+
Supports Python version specifiers like:
|
|
672
|
+
- ">=1.0.0,<2.0.0" (compatible release range)
|
|
673
|
+
- "~=1.4.2" (compatible release clause)
|
|
674
|
+
- "==1.2.*" (prefix matching)
|
|
675
|
+
- ">1.0.0" (exclusive ordered comparison)
|
|
676
|
+
- ">=1.0.0" (inclusive ordered comparison)
|
|
677
|
+
- "!=1.3.0" (version exclusion)
|
|
678
|
+
|
|
679
|
+
Args:
|
|
680
|
+
version_spec: Version specifier string following PEP 440
|
|
681
|
+
|
|
682
|
+
Returns:
|
|
683
|
+
Latest compatible version string, or None if no compatible version found
|
|
684
|
+
"""
|
|
685
|
+
spec_set = SpecifierSet(version_spec)
|
|
686
|
+
|
|
687
|
+
# Convert dataset versions to packaging.Version objects
|
|
688
|
+
# and filter compatible ones
|
|
689
|
+
compatible_versions = []
|
|
690
|
+
for v in self.versions:
|
|
691
|
+
pkg_version = Version(v.version)
|
|
692
|
+
if spec_set.contains(pkg_version):
|
|
693
|
+
compatible_versions.append(v)
|
|
694
|
+
|
|
695
|
+
if not compatible_versions:
|
|
668
696
|
return None
|
|
669
697
|
|
|
670
|
-
|
|
698
|
+
# Return the latest compatible version
|
|
699
|
+
return max(compatible_versions).version
|
|
671
700
|
|
|
672
701
|
@classmethod
|
|
673
702
|
def from_dict(cls, d: dict[str, Any]) -> "DatasetRecord":
|
|
@@ -7,9 +7,6 @@ from datachain.error import (
|
|
|
7
7
|
ProjectNotFoundError,
|
|
8
8
|
)
|
|
9
9
|
from datachain.lib.dataset_info import DatasetInfo
|
|
10
|
-
from datachain.lib.file import (
|
|
11
|
-
File,
|
|
12
|
-
)
|
|
13
10
|
from datachain.lib.projects import get as get_project
|
|
14
11
|
from datachain.lib.settings import Settings
|
|
15
12
|
from datachain.lib.signal_schema import SignalSchema
|
|
@@ -34,7 +31,6 @@ def read_dataset(
|
|
|
34
31
|
version: Optional[Union[str, int]] = None,
|
|
35
32
|
session: Optional[Session] = None,
|
|
36
33
|
settings: Optional[dict] = None,
|
|
37
|
-
fallback_to_studio: bool = True,
|
|
38
34
|
delta: Optional[bool] = False,
|
|
39
35
|
delta_on: Optional[Union[str, Sequence[str]]] = (
|
|
40
36
|
"file.path",
|
|
@@ -44,6 +40,7 @@ def read_dataset(
|
|
|
44
40
|
delta_result_on: Optional[Union[str, Sequence[str]]] = None,
|
|
45
41
|
delta_compare: Optional[Union[str, Sequence[str]]] = None,
|
|
46
42
|
delta_retry: Optional[Union[bool, str]] = None,
|
|
43
|
+
update: bool = False,
|
|
47
44
|
) -> "DataChain":
|
|
48
45
|
"""Get data from a saved Dataset. It returns the chain itself.
|
|
49
46
|
If dataset or version is not found locally, it will try to pull it from Studio.
|
|
@@ -55,11 +52,12 @@ def read_dataset(
|
|
|
55
52
|
set; otherwise, default values will be applied.
|
|
56
53
|
namespace : optional name of namespace in which dataset to read is created
|
|
57
54
|
project : optional name of project in which dataset to read is created
|
|
58
|
-
version : dataset version
|
|
55
|
+
version : dataset version. Supports:
|
|
56
|
+
- Exact version strings: "1.2.3"
|
|
57
|
+
- Legacy integer versions: 1, 2, 3 (finds latest major version)
|
|
58
|
+
- Version specifiers (PEP 440): ">=1.0.0,<2.0.0", "~=1.4.2", "==1.2.*", etc.
|
|
59
59
|
session : Session to use for the chain.
|
|
60
60
|
settings : Settings to use for the chain.
|
|
61
|
-
fallback_to_studio : Try to pull dataset from Studio if not found locally.
|
|
62
|
-
Default is True.
|
|
63
61
|
delta: If True, only process new or changed files instead of reprocessing
|
|
64
62
|
everything. This saves time by skipping files that were already processed in
|
|
65
63
|
previous versions. The optimization is working when a new version of the
|
|
@@ -79,6 +77,10 @@ def read_dataset(
|
|
|
79
77
|
(error mode)
|
|
80
78
|
- True: Reprocess records missing from the result dataset (missing mode)
|
|
81
79
|
- None: No retry processing (default)
|
|
80
|
+
update: If True always checks for newer versions available on Studio, even if
|
|
81
|
+
some version of the dataset exists locally already. If False (default), it
|
|
82
|
+
will only fetch the dataset from Studio if it is not found locally.
|
|
83
|
+
|
|
82
84
|
|
|
83
85
|
Example:
|
|
84
86
|
```py
|
|
@@ -92,11 +94,22 @@ def read_dataset(
|
|
|
92
94
|
```
|
|
93
95
|
|
|
94
96
|
```py
|
|
95
|
-
chain = dc.read_dataset("my_cats",
|
|
97
|
+
chain = dc.read_dataset("my_cats", version="1.0.0")
|
|
96
98
|
```
|
|
97
99
|
|
|
98
100
|
```py
|
|
99
|
-
|
|
101
|
+
# Using version specifiers (PEP 440)
|
|
102
|
+
chain = dc.read_dataset("my_cats", version=">=1.0.0,<2.0.0")
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
```py
|
|
106
|
+
# Legacy integer version support (finds latest in major version)
|
|
107
|
+
chain = dc.read_dataset("my_cats", version=1) # Latest 1.x.x version
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
```py
|
|
111
|
+
# Always check for newer versions matching a version specifier from Studio
|
|
112
|
+
chain = dc.read_dataset("my_cats", version=">=1.0.0", update=True)
|
|
100
113
|
```
|
|
101
114
|
|
|
102
115
|
```py
|
|
@@ -113,7 +126,6 @@ def read_dataset(
|
|
|
113
126
|
version="1.0.0",
|
|
114
127
|
session=session,
|
|
115
128
|
settings=settings,
|
|
116
|
-
fallback_to_studio=True,
|
|
117
129
|
)
|
|
118
130
|
```
|
|
119
131
|
"""
|
|
@@ -121,6 +133,8 @@ def read_dataset(
|
|
|
121
133
|
|
|
122
134
|
from .datachain import DataChain
|
|
123
135
|
|
|
136
|
+
telemetry.send_event_once("class", "datachain_init", name=name, version=version)
|
|
137
|
+
|
|
124
138
|
session = Session.get(session)
|
|
125
139
|
catalog = session.catalog
|
|
126
140
|
|
|
@@ -131,31 +145,37 @@ def read_dataset(
|
|
|
131
145
|
)
|
|
132
146
|
|
|
133
147
|
if version is not None:
|
|
148
|
+
dataset = session.catalog.get_dataset_with_remote_fallback(
|
|
149
|
+
name, namespace_name, project_name, update=update
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
# Convert legacy integer versions to version specifiers
|
|
153
|
+
# For backward compatibility we still allow users to put version as integer
|
|
154
|
+
# in which case we convert it to a version specifier that finds the latest
|
|
155
|
+
# version where major part is equal to that input version.
|
|
156
|
+
# For example if user sets version=2, we convert it to ">=2.0.0,<3.0.0"
|
|
157
|
+
# which will find something like 2.4.3 (assuming 2.4.3 is the biggest among
|
|
158
|
+
# all 2.* dataset versions)
|
|
159
|
+
if isinstance(version, int):
|
|
160
|
+
version_spec = f">={version}.0.0,<{version + 1}.0.0"
|
|
161
|
+
else:
|
|
162
|
+
version_spec = str(version)
|
|
163
|
+
|
|
164
|
+
from packaging.specifiers import InvalidSpecifier, SpecifierSet
|
|
165
|
+
|
|
134
166
|
try:
|
|
135
|
-
#
|
|
136
|
-
|
|
137
|
-
#
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
# major part is equal to that input, exception is thrown.
|
|
141
|
-
major = int(version)
|
|
142
|
-
try:
|
|
143
|
-
ds_project = get_project(project_name, namespace_name, session=session)
|
|
144
|
-
except ProjectNotFoundError:
|
|
145
|
-
raise DatasetNotFoundError(
|
|
146
|
-
f"Dataset {name} not found in namespace {namespace_name} and",
|
|
147
|
-
f" project {project_name}",
|
|
148
|
-
) from None
|
|
149
|
-
|
|
150
|
-
dataset = session.catalog.get_dataset(name, ds_project)
|
|
151
|
-
latest_major = dataset.latest_major_version(major)
|
|
152
|
-
if not latest_major:
|
|
167
|
+
# Try to parse as version specifier
|
|
168
|
+
SpecifierSet(version_spec)
|
|
169
|
+
# If it's a valid specifier set, find the latest compatible version
|
|
170
|
+
latest_compatible = dataset.latest_compatible_version(version_spec)
|
|
171
|
+
if not latest_compatible:
|
|
153
172
|
raise DatasetVersionNotFoundError(
|
|
154
|
-
f"
|
|
173
|
+
f"No dataset {name} version matching specifier {version_spec}"
|
|
155
174
|
)
|
|
156
|
-
version =
|
|
157
|
-
except
|
|
158
|
-
#
|
|
175
|
+
version = latest_compatible
|
|
176
|
+
except InvalidSpecifier:
|
|
177
|
+
# If not a valid specifier, treat as exact version string
|
|
178
|
+
# This handles cases like "1.2.3" which are exact versions, not specifiers
|
|
159
179
|
pass
|
|
160
180
|
|
|
161
181
|
if settings:
|
|
@@ -169,11 +189,8 @@ def read_dataset(
|
|
|
169
189
|
namespace_name=namespace_name,
|
|
170
190
|
version=version, # type: ignore[arg-type]
|
|
171
191
|
session=session,
|
|
172
|
-
indexing_column_types=File._datachain_column_types,
|
|
173
|
-
fallback_to_studio=fallback_to_studio,
|
|
174
192
|
)
|
|
175
193
|
|
|
176
|
-
telemetry.send_event_once("class", "datachain_init", name=name, version=version)
|
|
177
194
|
signals_schema = SignalSchema({"sys": Sys})
|
|
178
195
|
if query.feature_schema:
|
|
179
196
|
signals_schema |= SignalSchema.deserialize(query.feature_schema)
|
|
@@ -127,12 +127,8 @@ def read_listing_dataset(
|
|
|
127
127
|
if version is None:
|
|
128
128
|
version = dataset.latest_version
|
|
129
129
|
|
|
130
|
-
query = DatasetQuery(
|
|
131
|
-
|
|
132
|
-
session=session,
|
|
133
|
-
indexing_column_types=File._datachain_column_types,
|
|
134
|
-
fallback_to_studio=False,
|
|
135
|
-
)
|
|
130
|
+
query = DatasetQuery(name=name, session=session)
|
|
131
|
+
|
|
136
132
|
if settings:
|
|
137
133
|
cfg = {**settings}
|
|
138
134
|
if "prefetch" not in cfg:
|
|
@@ -54,7 +54,7 @@ def get(name: str, namespace: str, session: Optional[Session]) -> Project:
|
|
|
54
54
|
```py
|
|
55
55
|
import datachain as dc
|
|
56
56
|
from datachain.lib.projects import get as get_project
|
|
57
|
-
project
|
|
57
|
+
project = get_project("my-project", "local")
|
|
58
58
|
```
|
|
59
59
|
"""
|
|
60
60
|
return Session.get(session).catalog.metastore.get_project(name, namespace)
|
|
@@ -1099,13 +1099,9 @@ class DatasetQuery:
|
|
|
1099
1099
|
namespace_name: Optional[str] = None,
|
|
1100
1100
|
catalog: Optional["Catalog"] = None,
|
|
1101
1101
|
session: Optional[Session] = None,
|
|
1102
|
-
indexing_column_types: Optional[dict[str, Any]] = None,
|
|
1103
1102
|
in_memory: bool = False,
|
|
1104
|
-
fallback_to_studio: bool = True,
|
|
1105
1103
|
update: bool = False,
|
|
1106
1104
|
) -> None:
|
|
1107
|
-
from datachain.remote.studio import is_token_set
|
|
1108
|
-
|
|
1109
1105
|
self.session = Session.get(session, catalog=catalog, in_memory=in_memory)
|
|
1110
1106
|
self.catalog = catalog or self.session.catalog
|
|
1111
1107
|
self.steps: list[Step] = []
|
|
@@ -1137,18 +1133,16 @@ class DatasetQuery:
|
|
|
1137
1133
|
# not setting query step yet as listing dataset might not exist at
|
|
1138
1134
|
# this point
|
|
1139
1135
|
self.list_ds_name = name
|
|
1140
|
-
|
|
1136
|
+
else:
|
|
1141
1137
|
self._set_starting_step(
|
|
1142
1138
|
self.catalog.get_dataset_with_remote_fallback(
|
|
1143
1139
|
name,
|
|
1144
1140
|
namespace_name=namespace_name,
|
|
1145
1141
|
project_name=project_name,
|
|
1146
1142
|
version=version,
|
|
1143
|
+
pull_dataset=True,
|
|
1147
1144
|
)
|
|
1148
1145
|
)
|
|
1149
|
-
else:
|
|
1150
|
-
project = self.catalog.metastore.get_project(project_name, namespace_name)
|
|
1151
|
-
self._set_starting_step(self.catalog.get_dataset(name, project=project))
|
|
1152
1146
|
|
|
1153
1147
|
def _set_starting_step(self, ds: "DatasetRecord") -> None:
|
|
1154
1148
|
if not self.version:
|
|
@@ -78,10 +78,11 @@ def _parse_dates(obj: dict, date_fields: list[str]):
|
|
|
78
78
|
|
|
79
79
|
|
|
80
80
|
class Response(Generic[T]):
|
|
81
|
-
def __init__(self, data: T, ok: bool, message: str) -> None:
|
|
81
|
+
def __init__(self, data: T, ok: bool, message: str, status: int) -> None:
|
|
82
82
|
self.data = data
|
|
83
83
|
self.ok = ok
|
|
84
84
|
self.message = message
|
|
85
|
+
self.status = status
|
|
85
86
|
|
|
86
87
|
def __repr__(self):
|
|
87
88
|
return (
|
|
@@ -186,7 +187,7 @@ class StudioClient:
|
|
|
186
187
|
message = "Indexing in progress"
|
|
187
188
|
else:
|
|
188
189
|
message = content.get("message", "")
|
|
189
|
-
return Response(response_data, ok, message)
|
|
190
|
+
return Response(response_data, ok, message, response.status_code)
|
|
190
191
|
|
|
191
192
|
@retry_with_backoff(retries=3, errors=(HTTPError, Timeout))
|
|
192
193
|
def _send_request(
|
|
@@ -236,7 +237,7 @@ class StudioClient:
|
|
|
236
237
|
else:
|
|
237
238
|
message = ""
|
|
238
239
|
|
|
239
|
-
return Response(data, ok, message)
|
|
240
|
+
return Response(data, ok, message, response.status_code)
|
|
240
241
|
|
|
241
242
|
@staticmethod
|
|
242
243
|
def _unpacker_hook(code, data):
|
|
@@ -298,6 +298,8 @@ tests/func/test_pull.py
|
|
|
298
298
|
tests/func/test_pytorch.py
|
|
299
299
|
tests/func/test_query.py
|
|
300
300
|
tests/func/test_read_database.py
|
|
301
|
+
tests/func/test_read_dataset_remote.py
|
|
302
|
+
tests/func/test_read_dataset_version_specifiers.py
|
|
301
303
|
tests/func/test_retry.py
|
|
302
304
|
tests/func/test_session.py
|
|
303
305
|
tests/func/test_toolkit.py
|
|
@@ -904,3 +904,172 @@ def run_datachain_worker(datachain_job_id):
|
|
|
904
904
|
worker.wait(timeout=30) # seconds
|
|
905
905
|
except subprocess.TimeoutExpired:
|
|
906
906
|
os.kill(worker.pid, signal.SIGKILL)
|
|
907
|
+
|
|
908
|
+
|
|
909
|
+
# Common constants for remote dataset testing
|
|
910
|
+
REMOTE_DATASET_UUID = "20f5a2f1-fc9a-4e36-8b91-5a530f289451"
|
|
911
|
+
REMOTE_DATASET_UUID_V2 = "30f5a2f1-fc9a-4e36-8b91-5a530f289452"
|
|
912
|
+
REMOTE_NAMESPACE_UUID = "efbc3e84-3eb6-4be1-bec1-704b939e1fe4"
|
|
913
|
+
REMOTE_PROJECT_UUID = "0ed3a6c6-f2f7-45aa-869b-39219c86a9d4"
|
|
914
|
+
|
|
915
|
+
REMOTE_NAMESPACE_NAME = "dev"
|
|
916
|
+
REMOTE_PROJECT_NAME = "animals"
|
|
917
|
+
|
|
918
|
+
|
|
919
|
+
@pytest.fixture
|
|
920
|
+
def remote_dataset_schema():
|
|
921
|
+
"""Common schema for remote datasets."""
|
|
922
|
+
return {
|
|
923
|
+
"id": {"type": "UInt64"},
|
|
924
|
+
"sys__rand": {"type": "UInt64"},
|
|
925
|
+
"file__path": {"type": "String"},
|
|
926
|
+
"file__etag": {"type": "String"},
|
|
927
|
+
"file__version": {"type": "String"},
|
|
928
|
+
"file__is_latest": {"type": "Boolean"},
|
|
929
|
+
"file__last_modified": {"type": "DateTime"},
|
|
930
|
+
"file__size": {"type": "Int64"},
|
|
931
|
+
"file__location": {"type": "String"},
|
|
932
|
+
"file__source": {"type": "String"},
|
|
933
|
+
"version": {"type": "String"},
|
|
934
|
+
}
|
|
935
|
+
|
|
936
|
+
|
|
937
|
+
@pytest.fixture
|
|
938
|
+
def remote_file_feature_schema():
|
|
939
|
+
"""Common File feature schema for remote datasets."""
|
|
940
|
+
return {
|
|
941
|
+
"file": "File@v1",
|
|
942
|
+
"version": "str",
|
|
943
|
+
"_custom_types": {
|
|
944
|
+
"File@v1": {
|
|
945
|
+
"schema_version": 2,
|
|
946
|
+
"name": "File@v1",
|
|
947
|
+
"fields": {
|
|
948
|
+
"source": "str",
|
|
949
|
+
"path": "str",
|
|
950
|
+
"size": "int",
|
|
951
|
+
"version": "str",
|
|
952
|
+
"etag": "str",
|
|
953
|
+
"is_latest": "bool",
|
|
954
|
+
"last_modified": "datetime",
|
|
955
|
+
"location": "Union[dict, list[dict], NoneType]",
|
|
956
|
+
},
|
|
957
|
+
"bases": [
|
|
958
|
+
["File", "datachain.lib.file", "File@v1"],
|
|
959
|
+
["DataModel", "datachain.lib.data_model", "DataModel@v1"],
|
|
960
|
+
["BaseModel", "pydantic.main", None],
|
|
961
|
+
["object", "builtins", None],
|
|
962
|
+
],
|
|
963
|
+
"hidden_fields": [
|
|
964
|
+
"source",
|
|
965
|
+
"version",
|
|
966
|
+
"etag",
|
|
967
|
+
"is_latest",
|
|
968
|
+
"last_modified",
|
|
969
|
+
"location",
|
|
970
|
+
],
|
|
971
|
+
}
|
|
972
|
+
},
|
|
973
|
+
}
|
|
974
|
+
|
|
975
|
+
|
|
976
|
+
@pytest.fixture
|
|
977
|
+
def remote_namespace():
|
|
978
|
+
"""Remote namespace fixture for Studio API mocking."""
|
|
979
|
+
return {
|
|
980
|
+
"id": 1,
|
|
981
|
+
"uuid": REMOTE_NAMESPACE_UUID,
|
|
982
|
+
"name": REMOTE_NAMESPACE_NAME,
|
|
983
|
+
"descr": "Dev namespace",
|
|
984
|
+
"created_at": "2024-02-23T10:42:31.842944+00:00",
|
|
985
|
+
}
|
|
986
|
+
|
|
987
|
+
|
|
988
|
+
@pytest.fixture
|
|
989
|
+
def remote_project(remote_namespace):
|
|
990
|
+
"""Remote project fixture for Studio API mocking."""
|
|
991
|
+
return {
|
|
992
|
+
"id": 1,
|
|
993
|
+
"uuid": REMOTE_PROJECT_UUID,
|
|
994
|
+
"name": REMOTE_PROJECT_NAME,
|
|
995
|
+
"descr": "Animals project",
|
|
996
|
+
"created_at": "2024-02-23T10:42:31.842944+00:00",
|
|
997
|
+
"namespace": remote_namespace,
|
|
998
|
+
}
|
|
999
|
+
|
|
1000
|
+
|
|
1001
|
+
@pytest.fixture
|
|
1002
|
+
def compressed_parquet_data():
|
|
1003
|
+
"""
|
|
1004
|
+
Factory fixture that creates lz4-compressed parquet for datasets.
|
|
1005
|
+
Returns a function that can be called with different data.
|
|
1006
|
+
"""
|
|
1007
|
+
import io
|
|
1008
|
+
from datetime import datetime
|
|
1009
|
+
|
|
1010
|
+
import lz4.frame
|
|
1011
|
+
import pandas as pd
|
|
1012
|
+
|
|
1013
|
+
def create_compressed_parquet(data, src_uri=None):
|
|
1014
|
+
def _adapt_row(row):
|
|
1015
|
+
"""
|
|
1016
|
+
Adjusting row values to match remote response
|
|
1017
|
+
"""
|
|
1018
|
+
adapted = {}
|
|
1019
|
+
for k, v in row.items():
|
|
1020
|
+
if isinstance(v, datetime):
|
|
1021
|
+
adapted[k] = v.timestamp()
|
|
1022
|
+
elif v is None:
|
|
1023
|
+
adapted[k] = ""
|
|
1024
|
+
else:
|
|
1025
|
+
adapted[k] = v
|
|
1026
|
+
|
|
1027
|
+
adapted["sys__id"] = 1
|
|
1028
|
+
adapted["sys__rand"] = 1
|
|
1029
|
+
adapted["file__location"] = ""
|
|
1030
|
+
adapted["file__source"] = src_uri or ""
|
|
1031
|
+
adapted["file__version"] = ""
|
|
1032
|
+
return adapted
|
|
1033
|
+
|
|
1034
|
+
adapted_data = [_adapt_row(row) for row in data]
|
|
1035
|
+
df = pd.DataFrame.from_records(adapted_data)
|
|
1036
|
+
buffer = io.BytesIO()
|
|
1037
|
+
df.to_parquet(buffer, engine="auto")
|
|
1038
|
+
|
|
1039
|
+
return lz4.frame.compress(buffer.getvalue())
|
|
1040
|
+
|
|
1041
|
+
return create_compressed_parquet
|
|
1042
|
+
|
|
1043
|
+
|
|
1044
|
+
@pytest.fixture
|
|
1045
|
+
def dog_entries():
|
|
1046
|
+
"""Factory function to create version-specific dog entries."""
|
|
1047
|
+
from tests.data import ENTRIES
|
|
1048
|
+
|
|
1049
|
+
def _create_dog_entries(version="1.0.0"):
|
|
1050
|
+
return [
|
|
1051
|
+
{
|
|
1052
|
+
"file__path": e.path,
|
|
1053
|
+
"file__etag": e.etag,
|
|
1054
|
+
"file__version": e.version,
|
|
1055
|
+
"file__is_latest": e.is_latest,
|
|
1056
|
+
"file__last_modified": e.last_modified,
|
|
1057
|
+
"file__size": e.size,
|
|
1058
|
+
"version": version,
|
|
1059
|
+
}
|
|
1060
|
+
for e in ENTRIES
|
|
1061
|
+
if e.name.startswith("dog")
|
|
1062
|
+
]
|
|
1063
|
+
|
|
1064
|
+
return _create_dog_entries
|
|
1065
|
+
|
|
1066
|
+
|
|
1067
|
+
@pytest.fixture
|
|
1068
|
+
def mock_parquet_data(compressed_parquet_data, dog_entries, version="1.0.0"):
|
|
1069
|
+
return compressed_parquet_data(dog_entries(version))
|
|
1070
|
+
|
|
1071
|
+
|
|
1072
|
+
@pytest.fixture
|
|
1073
|
+
def mock_parquet_data_cloud(compressed_parquet_data, dog_entries, cloud_test_catalog):
|
|
1074
|
+
src_uri = cloud_test_catalog.src_uri
|
|
1075
|
+
return compressed_parquet_data(dog_entries("1.0.0"), src_uri)
|
|
@@ -7,9 +7,7 @@ import pytest
|
|
|
7
7
|
import sqlalchemy
|
|
8
8
|
|
|
9
9
|
from datachain.dataset import DatasetDependencyType, DatasetStatus
|
|
10
|
-
from datachain.error import
|
|
11
|
-
DatasetVersionNotFoundError,
|
|
12
|
-
)
|
|
10
|
+
from datachain.error import DatasetNotFoundError
|
|
13
11
|
from datachain.lib.listing import parse_listing_uri
|
|
14
12
|
from datachain.query import C, DatasetQuery, Object, Stream
|
|
15
13
|
from datachain.sql.functions import path as pathfunc
|
|
@@ -70,7 +68,7 @@ def test_save_multiple_versions(cloud_test_catalog, animal_dataset):
|
|
|
70
68
|
assert DatasetQuery(name=ds_name, version="1.0.1", catalog=catalog).count() == 3
|
|
71
69
|
assert DatasetQuery(name=ds_name, version="1.0.2", catalog=catalog).count() == 3
|
|
72
70
|
|
|
73
|
-
with pytest.raises(
|
|
71
|
+
with pytest.raises(DatasetNotFoundError):
|
|
74
72
|
DatasetQuery(name=ds_name, version="4.0.0", catalog=catalog).count()
|
|
75
73
|
|
|
76
74
|
|
|
@@ -6,7 +6,7 @@ from PIL import Image
|
|
|
6
6
|
|
|
7
7
|
import datachain as dc
|
|
8
8
|
from datachain import func
|
|
9
|
-
from datachain.error import
|
|
9
|
+
from datachain.error import DatasetNotFoundError
|
|
10
10
|
from datachain.lib.dc import C
|
|
11
11
|
from datachain.lib.file import File, ImageFile
|
|
12
12
|
|
|
@@ -278,10 +278,10 @@ def test_delta_update_no_diff(test_session, tmp_dir, tmp_path):
|
|
|
278
278
|
"images/img9.jpg",
|
|
279
279
|
]
|
|
280
280
|
|
|
281
|
-
with pytest.raises(
|
|
281
|
+
with pytest.raises(DatasetNotFoundError) as exc_info:
|
|
282
282
|
dc.read_dataset(ds_name, version="1.0.1")
|
|
283
283
|
|
|
284
|
-
assert str(exc_info.value) == f"Dataset {ds_name}
|
|
284
|
+
assert str(exc_info.value) == f"Dataset {ds_name} version 1.0.1 not found"
|
|
285
285
|
|
|
286
286
|
|
|
287
287
|
@pytest.fixture
|
|
@@ -179,9 +179,10 @@ def test_ls_partial_indexing(cloud_test_catalog, cloud_type, capsys):
|
|
|
179
179
|
|
|
180
180
|
|
|
181
181
|
class MockResponse:
|
|
182
|
-
def __init__(self, content, ok=True):
|
|
182
|
+
def __init__(self, content, status_code, ok=True):
|
|
183
183
|
self.content = content
|
|
184
184
|
self.ok = ok
|
|
185
|
+
self.status_code = status_code
|
|
185
186
|
|
|
186
187
|
|
|
187
188
|
def mock_post(method, url, data=None, json=None, **kwargs):
|
|
@@ -196,7 +197,8 @@ def mock_post(method, url, data=None, json=None, **kwargs):
|
|
|
196
197
|
for d in REMOTE_DATA[path]
|
|
197
198
|
]
|
|
198
199
|
return MockResponse(
|
|
199
|
-
content=msgpack.packb({"data": data}, default=_pack_extended_types)
|
|
200
|
+
content=msgpack.packb({"data": data}, default=_pack_extended_types),
|
|
201
|
+
status_code=200,
|
|
200
202
|
)
|
|
201
203
|
|
|
202
204
|
|