datachain 0.18.3__tar.gz → 0.18.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.18.3 → datachain-0.18.4}/.github/codecov.yaml +7 -0
- {datachain-0.18.3/src/datachain.egg-info → datachain-0.18.4}/PKG-INFO +1 -1
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/catalog/catalog.py +34 -12
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/dataset.py +1 -1
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/listing.py +2 -3
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/remote/studio.py +4 -2
- {datachain-0.18.3 → datachain-0.18.4/src/datachain.egg-info}/PKG-INFO +1 -1
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_catalog.py +1 -1
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_datachain.py +6 -9
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_dataset_query.py +4 -12
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_datasets.py +2 -2
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_listing.py +2 -8
- {datachain-0.18.3 → datachain-0.18.4}/.cruft.json +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/.gitattributes +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/.github/dependabot.yml +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/.github/workflows/release.yml +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/.github/workflows/tests.yml +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/.gitignore +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/.pre-commit-config.yaml +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/LICENSE +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/README.rst +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/assets/datachain.svg +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/commands/auth/login.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/commands/auth/logout.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/commands/auth/team.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/commands/auth/token.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/commands/index.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/commands/job/cancel.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/commands/job/logs.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/commands/job/ls.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/commands/job/run.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/contributing.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/examples.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/index.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/overrides/main.html +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/quick-start.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/references/data-types/file.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/references/data-types/index.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/references/data-types/pose.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/references/data-types/segment.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/references/datachain.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/references/func.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/references/index.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/references/remotes.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/references/toolkit.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/references/torch.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/references/udf.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/docs/tutorials.md +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/examples/multimodal/wds.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/mkdocs.yml +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/noxfile.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/pyproject.toml +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/setup.cfg +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/__main__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/asyn.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/cache.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/cli/utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/client/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/client/azure.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/client/gcs.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/client/hf.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/client/local.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/client/s3.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/config.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/delta.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/error.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/fs/reference.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/fs/utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/func/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/func/array.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/func/base.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/func/conditional.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/func/func.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/func/numeric.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/func/path.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/func/random.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/func/string.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/func/window.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/job.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/clip.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/dc/datachain.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/file.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/hf.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/image.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/settings.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/tar.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/text.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/udf.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/video.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/listing.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/model/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/model/bbox.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/model/pose.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/model/segment.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/model/utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/node.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/progress.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/py.typed +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/query/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/query/batch.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/query/dataset.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/query/metrics.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/query/params.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/query/queue.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/query/schema.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/query/session.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/query/udf.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/query/utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/script_meta.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/semver.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/sql/types.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/sql/utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/studio.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/telemetry.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain/utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain.egg-info/SOURCES.txt +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/conftest.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/data.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/examples/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/examples/test_examples.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/examples/wds_data.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/data/lena.jpg +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/model/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_batching.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_client.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_data_storage.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_delta.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_file.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_func.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_hf.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_image.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_ls.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_metrics.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_pull.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_pytorch.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_query.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_read_database.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_session.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_toolkit.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_video.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/func/test_warehouse.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/scripts/feature_class.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/test_atomicity.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/test_cli_e2e.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/test_cli_studio.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/test_import_time.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/test_query_e2e.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/test_telemetry.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/lib/test_datachain.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/model/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_asyn.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_cache.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_catalog.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_client.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_config.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_dataset.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_func.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_listing.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_metastore.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_query.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_query_params.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_semver.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_serializer.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_session.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.18.3 → datachain-0.18.4}/tests/utils.py +0 -0
|
@@ -9,6 +9,13 @@ coverage:
|
|
|
9
9
|
threshold: 10%
|
|
10
10
|
# non-blocking status checks
|
|
11
11
|
informational: true
|
|
12
|
+
only_pulls: true
|
|
13
|
+
|
|
14
|
+
comment:
|
|
15
|
+
# sections shown in the PR comment
|
|
16
|
+
layout: "reach, diff, flags, files"
|
|
17
|
+
# updates the comment on PRs when coverage changes
|
|
18
|
+
behavior: default
|
|
12
19
|
|
|
13
20
|
flags:
|
|
14
21
|
datachain:
|
|
@@ -66,6 +66,7 @@ if TYPE_CHECKING:
|
|
|
66
66
|
)
|
|
67
67
|
from datachain.dataset import DatasetListVersion
|
|
68
68
|
from datachain.job import Job
|
|
69
|
+
from datachain.lib.listing_info import ListingInfo
|
|
69
70
|
from datachain.listing import Listing
|
|
70
71
|
|
|
71
72
|
logger = logging.getLogger("datachain")
|
|
@@ -1116,13 +1117,16 @@ class Catalog:
|
|
|
1116
1117
|
return direct_dependencies
|
|
1117
1118
|
|
|
1118
1119
|
def ls_datasets(
|
|
1119
|
-
self,
|
|
1120
|
+
self,
|
|
1121
|
+
prefix: Optional[str] = None,
|
|
1122
|
+
include_listing: bool = False,
|
|
1123
|
+
studio: bool = False,
|
|
1120
1124
|
) -> Iterator[DatasetListRecord]:
|
|
1121
1125
|
from datachain.remote.studio import StudioClient
|
|
1122
1126
|
|
|
1123
1127
|
if studio:
|
|
1124
1128
|
client = StudioClient()
|
|
1125
|
-
response = client.ls_datasets()
|
|
1129
|
+
response = client.ls_datasets(prefix=prefix)
|
|
1126
1130
|
if not response.ok:
|
|
1127
1131
|
raise DataChainError(response.message)
|
|
1128
1132
|
if not response.data:
|
|
@@ -1133,6 +1137,8 @@ class Catalog:
|
|
|
1133
1137
|
for d in response.data
|
|
1134
1138
|
if not d.get("name", "").startswith(QUERY_DATASET_PREFIX)
|
|
1135
1139
|
)
|
|
1140
|
+
elif prefix:
|
|
1141
|
+
datasets = self.metastore.list_datasets_by_prefix(prefix)
|
|
1136
1142
|
else:
|
|
1137
1143
|
datasets = self.metastore.list_datasets()
|
|
1138
1144
|
|
|
@@ -1142,39 +1148,55 @@ class Catalog:
|
|
|
1142
1148
|
|
|
1143
1149
|
def list_datasets_versions(
|
|
1144
1150
|
self,
|
|
1151
|
+
prefix: Optional[str] = None,
|
|
1145
1152
|
include_listing: bool = False,
|
|
1153
|
+
with_job: bool = True,
|
|
1146
1154
|
studio: bool = False,
|
|
1147
1155
|
) -> Iterator[tuple[DatasetListRecord, "DatasetListVersion", Optional["Job"]]]:
|
|
1148
1156
|
"""Iterate over all dataset versions with related jobs."""
|
|
1149
1157
|
datasets = list(
|
|
1150
|
-
self.ls_datasets(
|
|
1158
|
+
self.ls_datasets(
|
|
1159
|
+
prefix=prefix, include_listing=include_listing, studio=studio
|
|
1160
|
+
)
|
|
1151
1161
|
)
|
|
1152
1162
|
|
|
1153
1163
|
# preselect dataset versions jobs from db to avoid multiple queries
|
|
1154
|
-
jobs_ids: set[str] = {
|
|
1155
|
-
v.job_id for ds in datasets for v in ds.versions if v.job_id
|
|
1156
|
-
}
|
|
1157
1164
|
jobs: dict[str, Job] = {}
|
|
1158
|
-
if
|
|
1159
|
-
|
|
1165
|
+
if with_job:
|
|
1166
|
+
jobs_ids: set[str] = {
|
|
1167
|
+
v.job_id for ds in datasets for v in ds.versions if v.job_id
|
|
1168
|
+
}
|
|
1169
|
+
if jobs_ids:
|
|
1170
|
+
jobs = {
|
|
1171
|
+
j.id: j for j in self.metastore.list_jobs_by_ids(list(jobs_ids))
|
|
1172
|
+
}
|
|
1160
1173
|
|
|
1161
1174
|
for d in datasets:
|
|
1162
1175
|
yield from (
|
|
1163
|
-
(d, v, jobs.get(str(v.job_id)) if v.job_id else None)
|
|
1176
|
+
(d, v, jobs.get(str(v.job_id)) if with_job and v.job_id else None)
|
|
1164
1177
|
for v in d.versions
|
|
1165
1178
|
)
|
|
1166
1179
|
|
|
1167
|
-
def listings(self):
|
|
1180
|
+
def listings(self, prefix: Optional[str] = None) -> list["ListingInfo"]:
|
|
1168
1181
|
"""
|
|
1169
1182
|
Returns list of ListingInfo objects which are representing specific
|
|
1170
1183
|
storage listing datasets
|
|
1171
1184
|
"""
|
|
1172
|
-
from datachain.lib.listing import is_listing_dataset
|
|
1185
|
+
from datachain.lib.listing import LISTING_PREFIX, is_listing_dataset
|
|
1173
1186
|
from datachain.lib.listing_info import ListingInfo
|
|
1174
1187
|
|
|
1188
|
+
if prefix and not prefix.startswith(LISTING_PREFIX):
|
|
1189
|
+
prefix = LISTING_PREFIX + prefix
|
|
1190
|
+
|
|
1191
|
+
listing_datasets_versions = self.list_datasets_versions(
|
|
1192
|
+
prefix=prefix,
|
|
1193
|
+
include_listing=True,
|
|
1194
|
+
with_job=False,
|
|
1195
|
+
)
|
|
1196
|
+
|
|
1175
1197
|
return [
|
|
1176
1198
|
ListingInfo.from_models(d, v, j)
|
|
1177
|
-
for d, v, j in
|
|
1199
|
+
for d, v, j in listing_datasets_versions
|
|
1178
1200
|
if is_listing_dataset(d.name)
|
|
1179
1201
|
]
|
|
1180
1202
|
|
|
@@ -93,7 +93,7 @@ class DatasetDependency:
|
|
|
93
93
|
if self.type == DatasetDependencyType.DATASET:
|
|
94
94
|
return self.name
|
|
95
95
|
|
|
96
|
-
list_dataset_name, _, _ = parse_listing_uri(self.name.strip("/")
|
|
96
|
+
list_dataset_name, _, _ = parse_listing_uri(self.name.strip("/"))
|
|
97
97
|
assert list_dataset_name
|
|
98
98
|
return list_dataset_name
|
|
99
99
|
|
|
@@ -107,11 +107,10 @@ def ls(
|
|
|
107
107
|
return dc.filter(pathfunc.parent(_file_c("path")) == path.lstrip("/").rstrip("/*"))
|
|
108
108
|
|
|
109
109
|
|
|
110
|
-
def parse_listing_uri(uri: str
|
|
110
|
+
def parse_listing_uri(uri: str) -> tuple[str, str, str]:
|
|
111
111
|
"""
|
|
112
112
|
Parsing uri and returns listing dataset name, listing uri and listing path
|
|
113
113
|
"""
|
|
114
|
-
client_config = client_config or {}
|
|
115
114
|
storage_uri, path = Client.parse_url(uri)
|
|
116
115
|
if uses_glob(path):
|
|
117
116
|
lst_uri_path = posixpath.dirname(path)
|
|
@@ -175,7 +174,7 @@ def get_listing(
|
|
|
175
174
|
_, path = Client.parse_url(uri)
|
|
176
175
|
return None, uri, path, False
|
|
177
176
|
|
|
178
|
-
ds_name, list_uri, list_path = parse_listing_uri(uri
|
|
177
|
+
ds_name, list_uri, list_path = parse_listing_uri(uri)
|
|
179
178
|
listing = None
|
|
180
179
|
listings = [
|
|
181
180
|
ls for ls in catalog.listings() if not ls.is_expired and ls.contains(ds_name)
|
|
@@ -282,8 +282,10 @@ class StudioClient:
|
|
|
282
282
|
response = self._send_request_msgpack("datachain/ls", {"source": path})
|
|
283
283
|
yield path, response
|
|
284
284
|
|
|
285
|
-
def ls_datasets(self) -> Response[LsData]:
|
|
286
|
-
return self._send_request(
|
|
285
|
+
def ls_datasets(self, prefix: Optional[str] = None) -> Response[LsData]:
|
|
286
|
+
return self._send_request(
|
|
287
|
+
"datachain/datasets", {"prefix": prefix}, method="GET"
|
|
288
|
+
)
|
|
287
289
|
|
|
288
290
|
def edit_dataset(
|
|
289
291
|
self,
|
|
@@ -15,7 +15,7 @@ from tests.utils import DEFAULT_TREE, skip_if_not_sqlite, tree_from_path
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
def listing_stats(uri, catalog):
|
|
18
|
-
list_dataset_name, _, _ = parse_listing_uri(uri
|
|
18
|
+
list_dataset_name, _, _ = parse_listing_uri(uri)
|
|
19
19
|
dataset = catalog.get_dataset(list_dataset_name)
|
|
20
20
|
dataset_version = dataset.get_version(dataset.latest_version)
|
|
21
21
|
return dataset_version.num_objects, dataset_version.size
|
|
@@ -114,12 +114,11 @@ def test_read_storage_reindex(tmp_dir, test_session):
|
|
|
114
114
|
|
|
115
115
|
|
|
116
116
|
def test_read_storage_reindex_expired(tmp_dir, test_session):
|
|
117
|
-
catalog = test_session.catalog
|
|
118
117
|
tmp_dir = tmp_dir / "parquets"
|
|
119
118
|
os.mkdir(tmp_dir)
|
|
120
119
|
uri = tmp_dir.as_uri()
|
|
121
120
|
|
|
122
|
-
lst_ds_name = parse_listing_uri(uri
|
|
121
|
+
lst_ds_name = parse_listing_uri(uri)[0]
|
|
123
122
|
|
|
124
123
|
pd.DataFrame({"name": ["Alice", "Bob"]}).to_parquet(tmp_dir / "test1.parquet")
|
|
125
124
|
assert dc.read_storage(uri, session=test_session).count() == 1
|
|
@@ -144,10 +143,9 @@ def test_read_storage_partials(cloud_test_catalog):
|
|
|
144
143
|
ctc = cloud_test_catalog
|
|
145
144
|
src_uri = ctc.src_uri
|
|
146
145
|
session = ctc.session
|
|
147
|
-
catalog = session.catalog
|
|
148
146
|
|
|
149
147
|
def _list_dataset_name(uri: str) -> str:
|
|
150
|
-
name = parse_listing_uri(uri
|
|
148
|
+
name = parse_listing_uri(uri)[0]
|
|
151
149
|
assert name
|
|
152
150
|
return name
|
|
153
151
|
|
|
@@ -188,10 +186,9 @@ def test_read_storage_partials_with_update(cloud_test_catalog):
|
|
|
188
186
|
ctc = cloud_test_catalog
|
|
189
187
|
src_uri = ctc.src_uri
|
|
190
188
|
session = ctc.session
|
|
191
|
-
catalog = session.catalog
|
|
192
189
|
|
|
193
190
|
def _list_dataset_name(uri: str) -> str:
|
|
194
|
-
name = parse_listing_uri(uri
|
|
191
|
+
name = parse_listing_uri(uri)[0]
|
|
195
192
|
assert name
|
|
196
193
|
return name
|
|
197
194
|
|
|
@@ -222,7 +219,7 @@ def test_read_storage_listing_happens_once(cloud_test_catalog, cloud_type):
|
|
|
222
219
|
dc_dogs = chain.filter(dc.C("file.path").glob("dogs*"))
|
|
223
220
|
dc_cats.union(dc_dogs).save(ds_name)
|
|
224
221
|
|
|
225
|
-
lst_ds_name = parse_listing_uri(uri
|
|
222
|
+
lst_ds_name = parse_listing_uri(uri)[0]
|
|
226
223
|
assert _get_listing_datasets(ctc.session) == [f"{lst_ds_name}@v1.0.0"]
|
|
227
224
|
|
|
228
225
|
|
|
@@ -230,7 +227,7 @@ def test_read_storage_dependencies(cloud_test_catalog, cloud_type):
|
|
|
230
227
|
ctc = cloud_test_catalog
|
|
231
228
|
src_uri = ctc.src_uri
|
|
232
229
|
uri = f"{src_uri}/cats"
|
|
233
|
-
dep_name, _, _ = parse_listing_uri(uri
|
|
230
|
+
dep_name, _, _ = parse_listing_uri(uri)
|
|
234
231
|
ds_name = "dep"
|
|
235
232
|
dc.read_storage(uri, session=ctc.session).save(ds_name)
|
|
236
233
|
dependencies = ctc.session.catalog.get_dataset_dependencies(ds_name, "1.0.0")
|
|
@@ -244,7 +241,7 @@ def test_persist_not_affects_dependencies(tmp_dir, test_session):
|
|
|
244
241
|
(tmp_dir / f"file{i}.txt").write_text(f"file{i}")
|
|
245
242
|
|
|
246
243
|
uri = tmp_dir.as_uri()
|
|
247
|
-
dep_name, _, _ = parse_listing_uri(uri
|
|
244
|
+
dep_name, _, _ = parse_listing_uri(uri)
|
|
248
245
|
chain = dc.read_storage(uri, session=test_session) # .persist()
|
|
249
246
|
# calling multiple persists to create temp datasets
|
|
250
247
|
chain = chain.persist()
|
|
@@ -965,9 +965,7 @@ def test_dataset_dependencies_one_storage_as_dependency(
|
|
|
965
965
|
ds_name = uuid.uuid4().hex
|
|
966
966
|
catalog = cloud_test_catalog.catalog
|
|
967
967
|
listing = catalog.listings()[0]
|
|
968
|
-
dep_name, _, _ = parse_listing_uri(
|
|
969
|
-
cloud_test_catalog.src_uri, catalog.client_config
|
|
970
|
-
)
|
|
968
|
+
dep_name, _, _ = parse_listing_uri(cloud_test_catalog.src_uri)
|
|
971
969
|
|
|
972
970
|
DatasetQuery(cats_dataset.name, catalog=catalog).save(ds_name)
|
|
973
971
|
|
|
@@ -996,9 +994,7 @@ def test_dataset_dependencies_one_registered_dataset_as_dependency(
|
|
|
996
994
|
catalog = cloud_test_catalog.catalog
|
|
997
995
|
listing = catalog.listings()[0]
|
|
998
996
|
|
|
999
|
-
dep_name, _, _ = parse_listing_uri(
|
|
1000
|
-
cloud_test_catalog.src_uri, catalog.client_config
|
|
1001
|
-
)
|
|
997
|
+
dep_name, _, _ = parse_listing_uri(cloud_test_catalog.src_uri)
|
|
1002
998
|
|
|
1003
999
|
DatasetQuery(name=dogs_dataset.name, catalog=catalog).save(ds_name)
|
|
1004
1000
|
|
|
@@ -1044,9 +1040,7 @@ def test_dataset_dependencies_multiple_direct_dataset_dependencies(
|
|
|
1044
1040
|
ds_name = uuid.uuid4().hex
|
|
1045
1041
|
catalog = cloud_test_catalog.catalog
|
|
1046
1042
|
listing = catalog.listings()[0]
|
|
1047
|
-
dep_name, _, _ = parse_listing_uri(
|
|
1048
|
-
cloud_test_catalog.src_uri, catalog.client_config
|
|
1049
|
-
)
|
|
1043
|
+
dep_name, _, _ = parse_listing_uri(cloud_test_catalog.src_uri)
|
|
1050
1044
|
|
|
1051
1045
|
dogs = DatasetQuery(name=dogs_dataset.name, version="1.0.0", catalog=catalog)
|
|
1052
1046
|
cats = DatasetQuery(name=cats_dataset.name, version="1.0.0", catalog=catalog)
|
|
@@ -1116,9 +1110,7 @@ def test_dataset_dependencies_multiple_union(
|
|
|
1116
1110
|
ds_name = uuid.uuid4().hex
|
|
1117
1111
|
catalog = cloud_test_catalog.catalog
|
|
1118
1112
|
listing = catalog.listings()[0]
|
|
1119
|
-
dep_name, _, _ = parse_listing_uri(
|
|
1120
|
-
cloud_test_catalog.src_uri, catalog.client_config
|
|
1121
|
-
)
|
|
1113
|
+
dep_name, _, _ = parse_listing_uri(cloud_test_catalog.src_uri)
|
|
1122
1114
|
|
|
1123
1115
|
dogs = DatasetQuery(name=dogs_dataset.name, version="1.0.0", catalog=catalog)
|
|
1124
1116
|
cats = DatasetQuery(name=cats_dataset.name, version="1.0.0", catalog=catalog)
|
|
@@ -666,12 +666,12 @@ def test_dataset_storage_dependencies(cloud_test_catalog, cloud_type, indirect):
|
|
|
666
666
|
session = ctc.session
|
|
667
667
|
catalog = session.catalog
|
|
668
668
|
uri = cloud_test_catalog.src_uri
|
|
669
|
-
dep_name, _, _ = parse_listing_uri(ctc.src_uri
|
|
669
|
+
dep_name, _, _ = parse_listing_uri(ctc.src_uri)
|
|
670
670
|
|
|
671
671
|
ds_name = "some_ds"
|
|
672
672
|
dc.read_storage(uri, session=session).save(ds_name)
|
|
673
673
|
|
|
674
|
-
lst_ds_name, _, _ = parse_listing_uri(uri
|
|
674
|
+
lst_ds_name, _, _ = parse_listing_uri(uri)
|
|
675
675
|
lst_dataset = catalog.metastore.get_dataset(lst_ds_name)
|
|
676
676
|
|
|
677
677
|
assert [
|
|
@@ -36,10 +36,7 @@ def test_listing_generator(cloud_test_catalog, cloud_type):
|
|
|
36
36
|
)
|
|
37
37
|
def test_parse_listing_uri(cloud_test_catalog, cloud_type):
|
|
38
38
|
ctc = cloud_test_catalog
|
|
39
|
-
|
|
40
|
-
dataset_name, listing_uri, listing_path = parse_listing_uri(
|
|
41
|
-
f"{ctc.src_uri}/dogs", catalog.client_config
|
|
42
|
-
)
|
|
39
|
+
dataset_name, listing_uri, listing_path = parse_listing_uri(f"{ctc.src_uri}/dogs")
|
|
43
40
|
assert dataset_name == f"lst__{ctc.src_uri}/dogs/"
|
|
44
41
|
assert listing_uri == f"{ctc.src_uri}/dogs/"
|
|
45
42
|
if cloud_type == "file":
|
|
@@ -55,10 +52,7 @@ def test_parse_listing_uri(cloud_test_catalog, cloud_type):
|
|
|
55
52
|
)
|
|
56
53
|
def test_parse_listing_uri_with_glob(cloud_test_catalog):
|
|
57
54
|
ctc = cloud_test_catalog
|
|
58
|
-
|
|
59
|
-
dataset_name, listing_uri, listing_path = parse_listing_uri(
|
|
60
|
-
f"{ctc.src_uri}/dogs/*", catalog.client_config
|
|
61
|
-
)
|
|
55
|
+
dataset_name, listing_uri, listing_path = parse_listing_uri(f"{ctc.src_uri}/dogs/*")
|
|
62
56
|
assert dataset_name == f"lst__{ctc.src_uri}/dogs/"
|
|
63
57
|
assert listing_uri == f"{ctc.src_uri}/dogs"
|
|
64
58
|
assert listing_path == "dogs/*"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|