datachain 0.15.0__tar.gz → 0.16.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.15.0/src/datachain.egg-info → datachain-0.16.0}/PKG-INFO +1 -1
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/catalog/catalog.py +9 -9
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/__init__.py +1 -1
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/commands/datasets.py +3 -3
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/commands/show.py +2 -2
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/parser/__init__.py +2 -2
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/data_storage/metastore.py +5 -5
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/dataset.py +8 -8
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dataset_info.py +18 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/datachain.py +4 -3
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/datasets.py +9 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/udf.py +2 -1
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/query/dataset.py +2 -2
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/remote/studio.py +2 -2
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/studio.py +2 -2
- {datachain-0.15.0 → datachain-0.16.0/src/datachain.egg-info}/PKG-INFO +1 -1
- {datachain-0.15.0 → datachain-0.16.0}/tests/conftest.py +7 -7
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_datachain.py +4 -4
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_datasets.py +7 -7
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_pull.py +1 -1
- {datachain-0.15.0 → datachain-0.16.0}/tests/test_cli_studio.py +4 -4
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_datachain.py +35 -0
- {datachain-0.15.0 → datachain-0.16.0}/.cruft.json +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/.gitattributes +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/.github/codecov.yaml +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/.github/dependabot.yml +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/.github/workflows/release.yml +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/.github/workflows/tests.yml +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/.gitignore +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/.pre-commit-config.yaml +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/LICENSE +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/README.rst +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/assets/datachain.svg +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/contributing.md +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/examples.md +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/index.md +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/overrides/main.html +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/quick-start.md +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/references/data-types/file.md +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/references/data-types/index.md +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/references/data-types/pose.md +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/references/data-types/segment.md +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/references/datachain.md +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/references/func.md +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/references/index.md +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/references/remotes.md +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/references/toolkit.md +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/references/torch.md +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/references/udf.md +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/docs/tutorials.md +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/examples/multimodal/wds.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/mkdocs.yml +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/noxfile.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/pyproject.toml +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/setup.cfg +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/__main__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/asyn.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cache.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/cli/utils.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/client/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/client/azure.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/client/gcs.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/client/hf.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/client/local.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/client/s3.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/config.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/error.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/fs/reference.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/fs/utils.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/func/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/func/array.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/func/base.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/func/conditional.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/func/func.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/func/numeric.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/func/path.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/func/random.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/func/string.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/func/window.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/job.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/clip.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/file.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/hf.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/image.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/listing.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/settings.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/tar.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/text.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/utils.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/video.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/listing.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/model/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/model/bbox.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/model/pose.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/model/segment.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/model/utils.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/node.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/progress.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/py.typed +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/query/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/query/batch.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/query/metrics.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/query/params.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/query/queue.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/query/schema.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/query/session.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/query/udf.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/query/utils.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/script_meta.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/types.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/sql/utils.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/telemetry.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain/utils.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain.egg-info/SOURCES.txt +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/data.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/examples/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/examples/test_examples.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/examples/wds_data.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/data/lena.jpg +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/model/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_catalog.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_client.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_data_storage.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_file.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_hf.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_image.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_listing.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_ls.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_metrics.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_pytorch.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_query.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_read_database.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_session.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_toolkit.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_video.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/func/test_warehouse.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/scripts/feature_class.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/test_atomicity.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/test_cli_e2e.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/test_import_time.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/test_query_e2e.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/test_telemetry.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/model/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_asyn.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_cache.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_catalog.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_client.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_config.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_dataset.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_func.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_listing.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_metastore.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_query.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_query_params.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_serializer.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_session.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_utils.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.15.0 → datachain-0.16.0}/tests/utils.py +0 -0
|
@@ -776,7 +776,7 @@ class Catalog:
|
|
|
776
776
|
listing: Optional[bool] = False,
|
|
777
777
|
uuid: Optional[str] = None,
|
|
778
778
|
description: Optional[str] = None,
|
|
779
|
-
|
|
779
|
+
attrs: Optional[list[str]] = None,
|
|
780
780
|
) -> "DatasetRecord":
|
|
781
781
|
"""
|
|
782
782
|
Creates new dataset of a specific version.
|
|
@@ -794,16 +794,16 @@ class Catalog:
|
|
|
794
794
|
dataset = self.get_dataset(name)
|
|
795
795
|
default_version = dataset.next_version
|
|
796
796
|
|
|
797
|
-
if (description or
|
|
798
|
-
dataset.description != description or dataset.
|
|
797
|
+
if (description or attrs) and (
|
|
798
|
+
dataset.description != description or dataset.attrs != attrs
|
|
799
799
|
):
|
|
800
800
|
description = description or dataset.description
|
|
801
|
-
|
|
801
|
+
attrs = attrs or dataset.attrs
|
|
802
802
|
|
|
803
803
|
self.update_dataset(
|
|
804
804
|
dataset,
|
|
805
805
|
description=description,
|
|
806
|
-
|
|
806
|
+
attrs=attrs,
|
|
807
807
|
)
|
|
808
808
|
|
|
809
809
|
except DatasetNotFoundError:
|
|
@@ -817,7 +817,7 @@ class Catalog:
|
|
|
817
817
|
schema=schema,
|
|
818
818
|
ignore_if_exists=True,
|
|
819
819
|
description=description,
|
|
820
|
-
|
|
820
|
+
attrs=attrs,
|
|
821
821
|
)
|
|
822
822
|
|
|
823
823
|
version = version or default_version
|
|
@@ -1334,15 +1334,15 @@ class Catalog:
|
|
|
1334
1334
|
name: str,
|
|
1335
1335
|
new_name: Optional[str] = None,
|
|
1336
1336
|
description: Optional[str] = None,
|
|
1337
|
-
|
|
1337
|
+
attrs: Optional[list[str]] = None,
|
|
1338
1338
|
) -> DatasetRecord:
|
|
1339
1339
|
update_data = {}
|
|
1340
1340
|
if new_name:
|
|
1341
1341
|
update_data["name"] = new_name
|
|
1342
1342
|
if description is not None:
|
|
1343
1343
|
update_data["description"] = description
|
|
1344
|
-
if
|
|
1345
|
-
update_data["
|
|
1344
|
+
if attrs is not None:
|
|
1345
|
+
update_data["attrs"] = attrs # type: ignore[assignment]
|
|
1346
1346
|
|
|
1347
1347
|
dataset = self.get_dataset(name)
|
|
1348
1348
|
return self.update_dataset(dataset, **update_data)
|
|
@@ -154,7 +154,7 @@ def edit_dataset(
|
|
|
154
154
|
name: str,
|
|
155
155
|
new_name: Optional[str] = None,
|
|
156
156
|
description: Optional[str] = None,
|
|
157
|
-
|
|
157
|
+
attrs: Optional[list[str]] = None,
|
|
158
158
|
studio: bool = False,
|
|
159
159
|
local: bool = False,
|
|
160
160
|
all: bool = True,
|
|
@@ -167,9 +167,9 @@ def edit_dataset(
|
|
|
167
167
|
|
|
168
168
|
if all or local:
|
|
169
169
|
try:
|
|
170
|
-
catalog.edit_dataset(name, new_name, description,
|
|
170
|
+
catalog.edit_dataset(name, new_name, description, attrs)
|
|
171
171
|
except DatasetNotFoundError:
|
|
172
172
|
print("Dataset not found in local", file=sys.stderr)
|
|
173
173
|
|
|
174
174
|
if (all or studio) and token:
|
|
175
|
-
edit_studio_dataset(team, name, new_name, description,
|
|
175
|
+
edit_studio_dataset(team, name, new_name, description, attrs)
|
|
@@ -42,8 +42,8 @@ def show(
|
|
|
42
42
|
print("Name: ", name)
|
|
43
43
|
if dataset.description:
|
|
44
44
|
print("Description: ", dataset.description)
|
|
45
|
-
if dataset.
|
|
46
|
-
print("
|
|
45
|
+
if dataset.attrs:
|
|
46
|
+
print("Attributes: ", ",".join(dataset.attrs))
|
|
47
47
|
print("\n")
|
|
48
48
|
|
|
49
49
|
show_records(records, collapse_columns=not no_collapse, hidden_fields=hidden_fields)
|
|
@@ -217,9 +217,9 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
217
217
|
help="Dataset description",
|
|
218
218
|
)
|
|
219
219
|
parse_edit_dataset.add_argument(
|
|
220
|
-
"--
|
|
220
|
+
"--attrs",
|
|
221
221
|
nargs="+",
|
|
222
|
-
help="Dataset
|
|
222
|
+
help="Dataset attributes",
|
|
223
223
|
)
|
|
224
224
|
parse_edit_dataset.add_argument(
|
|
225
225
|
"--studio",
|
|
@@ -120,7 +120,7 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
120
120
|
schema: Optional[dict[str, Any]] = None,
|
|
121
121
|
ignore_if_exists: bool = False,
|
|
122
122
|
description: Optional[str] = None,
|
|
123
|
-
|
|
123
|
+
attrs: Optional[list[str]] = None,
|
|
124
124
|
) -> DatasetRecord:
|
|
125
125
|
"""Creates new dataset."""
|
|
126
126
|
|
|
@@ -326,7 +326,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
326
326
|
Column("id", Integer, primary_key=True),
|
|
327
327
|
Column("name", Text, nullable=False),
|
|
328
328
|
Column("description", Text),
|
|
329
|
-
Column("
|
|
329
|
+
Column("attrs", JSON, nullable=True),
|
|
330
330
|
Column("status", Integer, nullable=False),
|
|
331
331
|
Column("feature_schema", JSON, nullable=True),
|
|
332
332
|
Column("created_at", DateTime(timezone=True)),
|
|
@@ -521,7 +521,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
521
521
|
schema: Optional[dict[str, Any]] = None,
|
|
522
522
|
ignore_if_exists: bool = False,
|
|
523
523
|
description: Optional[str] = None,
|
|
524
|
-
|
|
524
|
+
attrs: Optional[list[str]] = None,
|
|
525
525
|
**kwargs, # TODO registered = True / False
|
|
526
526
|
) -> DatasetRecord:
|
|
527
527
|
"""Creates new dataset."""
|
|
@@ -538,7 +538,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
538
538
|
query_script=query_script,
|
|
539
539
|
schema=json.dumps(schema or {}),
|
|
540
540
|
description=description,
|
|
541
|
-
|
|
541
|
+
attrs=json.dumps(attrs or []),
|
|
542
542
|
)
|
|
543
543
|
if ignore_if_exists and hasattr(query, "on_conflict_do_nothing"):
|
|
544
544
|
# SQLite and PostgreSQL both support 'on_conflict_do_nothing',
|
|
@@ -621,7 +621,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
621
621
|
dataset_values = {}
|
|
622
622
|
for field, value in kwargs.items():
|
|
623
623
|
if field in self._dataset_fields[1:]:
|
|
624
|
-
if field in ["
|
|
624
|
+
if field in ["attrs", "schema"]:
|
|
625
625
|
values[field] = json.dumps(value) if value else None
|
|
626
626
|
else:
|
|
627
627
|
values[field] = value
|
|
@@ -329,7 +329,7 @@ class DatasetRecord:
|
|
|
329
329
|
id: int
|
|
330
330
|
name: str
|
|
331
331
|
description: Optional[str]
|
|
332
|
-
|
|
332
|
+
attrs: list[str]
|
|
333
333
|
schema: dict[str, Union[SQLType, type[SQLType]]]
|
|
334
334
|
feature_schema: dict
|
|
335
335
|
versions: list[DatasetVersion]
|
|
@@ -357,7 +357,7 @@ class DatasetRecord:
|
|
|
357
357
|
id: int,
|
|
358
358
|
name: str,
|
|
359
359
|
description: Optional[str],
|
|
360
|
-
|
|
360
|
+
attrs: str,
|
|
361
361
|
status: int,
|
|
362
362
|
feature_schema: Optional[str],
|
|
363
363
|
created_at: datetime,
|
|
@@ -387,7 +387,7 @@ class DatasetRecord:
|
|
|
387
387
|
version_schema: str,
|
|
388
388
|
version_job_id: Optional[str] = None,
|
|
389
389
|
) -> "DatasetRecord":
|
|
390
|
-
|
|
390
|
+
attrs_lst: list[str] = json.loads(attrs) if attrs else []
|
|
391
391
|
schema_dct: dict[str, Any] = json.loads(schema) if schema else {}
|
|
392
392
|
version_schema_dct: dict[str, str] = (
|
|
393
393
|
json.loads(version_schema) if version_schema else {}
|
|
@@ -418,7 +418,7 @@ class DatasetRecord:
|
|
|
418
418
|
id,
|
|
419
419
|
name,
|
|
420
420
|
description,
|
|
421
|
-
|
|
421
|
+
attrs_lst,
|
|
422
422
|
cls.parse_schema(schema_dct), # type: ignore[arg-type]
|
|
423
423
|
json.loads(feature_schema) if feature_schema else {},
|
|
424
424
|
[dataset_version],
|
|
@@ -562,7 +562,7 @@ class DatasetListRecord:
|
|
|
562
562
|
id: int
|
|
563
563
|
name: str
|
|
564
564
|
description: Optional[str]
|
|
565
|
-
|
|
565
|
+
attrs: list[str]
|
|
566
566
|
versions: list[DatasetListVersion]
|
|
567
567
|
created_at: Optional[datetime] = None
|
|
568
568
|
|
|
@@ -572,7 +572,7 @@ class DatasetListRecord:
|
|
|
572
572
|
id: int,
|
|
573
573
|
name: str,
|
|
574
574
|
description: Optional[str],
|
|
575
|
-
|
|
575
|
+
attrs: str,
|
|
576
576
|
created_at: datetime,
|
|
577
577
|
version_id: int,
|
|
578
578
|
version_uuid: str,
|
|
@@ -588,7 +588,7 @@ class DatasetListRecord:
|
|
|
588
588
|
version_query_script: Optional[str],
|
|
589
589
|
version_job_id: Optional[str] = None,
|
|
590
590
|
) -> "DatasetListRecord":
|
|
591
|
-
|
|
591
|
+
attrs_lst: list[str] = json.loads(attrs) if attrs else []
|
|
592
592
|
|
|
593
593
|
dataset_version = DatasetListVersion.parse(
|
|
594
594
|
version_id,
|
|
@@ -610,7 +610,7 @@ class DatasetListRecord:
|
|
|
610
610
|
id,
|
|
611
611
|
name,
|
|
612
612
|
description,
|
|
613
|
-
|
|
613
|
+
attrs_lst,
|
|
614
614
|
[dataset_version],
|
|
615
615
|
created_at,
|
|
616
616
|
)
|
|
@@ -32,11 +32,28 @@ class DatasetInfo(DataModel):
|
|
|
32
32
|
metrics: dict[str, Any] = Field(default={})
|
|
33
33
|
error_message: str = Field(default="")
|
|
34
34
|
error_stack: str = Field(default="")
|
|
35
|
+
attrs: list[str] = Field(default=[])
|
|
35
36
|
|
|
36
37
|
@property
|
|
37
38
|
def is_temp(self) -> bool:
|
|
38
39
|
return Session.is_temp_dataset(self.name)
|
|
39
40
|
|
|
41
|
+
def has_attr(self, attr: str) -> bool:
|
|
42
|
+
s = attr.split("=")
|
|
43
|
+
if len(s) == 1:
|
|
44
|
+
return attr in self.attrs
|
|
45
|
+
|
|
46
|
+
name = s[0]
|
|
47
|
+
value = s[1]
|
|
48
|
+
for a in self.attrs:
|
|
49
|
+
s = a.split("=")
|
|
50
|
+
if value == "*" and s[0] == name:
|
|
51
|
+
return True
|
|
52
|
+
if len(s) == 2 and s[0] == name and s[1] == value:
|
|
53
|
+
return True
|
|
54
|
+
|
|
55
|
+
return False
|
|
56
|
+
|
|
40
57
|
@staticmethod
|
|
41
58
|
def _validate_dict(
|
|
42
59
|
v: Optional[Union[str, dict]],
|
|
@@ -83,4 +100,5 @@ class DatasetInfo(DataModel):
|
|
|
83
100
|
metrics=job.metrics if job else {},
|
|
84
101
|
error_message=version.error_message,
|
|
85
102
|
error_stack=version.error_stack,
|
|
103
|
+
attrs=dataset.attrs,
|
|
86
104
|
)
|
|
@@ -459,7 +459,7 @@ class DataChain:
|
|
|
459
459
|
name: str,
|
|
460
460
|
version: Optional[int] = None,
|
|
461
461
|
description: Optional[str] = None,
|
|
462
|
-
|
|
462
|
+
attrs: Optional[list[str]] = None,
|
|
463
463
|
**kwargs,
|
|
464
464
|
) -> "Self":
|
|
465
465
|
"""Save to a Dataset. It returns the chain itself.
|
|
@@ -468,7 +468,8 @@ class DataChain:
|
|
|
468
468
|
name : dataset name.
|
|
469
469
|
version : version of a dataset. Default - the last version that exist.
|
|
470
470
|
description : description of a dataset.
|
|
471
|
-
|
|
471
|
+
attrs : attributes of a dataset. They can be without value, e.g "NLP",
|
|
472
|
+
or with a value, e.g "location=US".
|
|
472
473
|
"""
|
|
473
474
|
schema = self.signals_schema.clone_without_sys_signals().serialize()
|
|
474
475
|
return self._evolve(
|
|
@@ -476,7 +477,7 @@ class DataChain:
|
|
|
476
477
|
name=name,
|
|
477
478
|
version=version,
|
|
478
479
|
description=description,
|
|
479
|
-
|
|
480
|
+
attrs=attrs,
|
|
480
481
|
feature_schema=schema,
|
|
481
482
|
**kwargs,
|
|
482
483
|
)
|
|
@@ -102,6 +102,7 @@ def datasets(
|
|
|
102
102
|
column: Optional[str] = None,
|
|
103
103
|
include_listing: bool = False,
|
|
104
104
|
studio: bool = False,
|
|
105
|
+
attrs: Optional[list[str]] = None,
|
|
105
106
|
) -> "DataChain":
|
|
106
107
|
"""Generate chain with list of registered datasets.
|
|
107
108
|
|
|
@@ -114,6 +115,10 @@ def datasets(
|
|
|
114
115
|
include_listing: If True, includes listing datasets. Defaults to False.
|
|
115
116
|
studio: If True, returns datasets from Studio only,
|
|
116
117
|
otherwise returns all local datasets. Defaults to False.
|
|
118
|
+
attrs: Optional list of attributes to filter datasets on. It can be just
|
|
119
|
+
attribute without value e.g "NLP", or attribute with value
|
|
120
|
+
e.g "location=US". Attribute with value can also accept "*" to target
|
|
121
|
+
all that have specific name e.g "location=*"
|
|
117
122
|
|
|
118
123
|
Returns:
|
|
119
124
|
DataChain: A new DataChain instance containing dataset information.
|
|
@@ -139,6 +144,10 @@ def datasets(
|
|
|
139
144
|
]
|
|
140
145
|
datasets_values = [d for d in datasets_values if not d.is_temp]
|
|
141
146
|
|
|
147
|
+
if attrs:
|
|
148
|
+
for attr in attrs:
|
|
149
|
+
datasets_values = [d for d in datasets_values if d.has_attr(attr)]
|
|
150
|
+
|
|
142
151
|
if not column:
|
|
143
152
|
# flattening dataset fields
|
|
144
153
|
schema = {
|
|
@@ -474,8 +474,9 @@ class Generator(UDFBase):
|
|
|
474
474
|
remove_prefetched=bool(self.prefetch) and not cache,
|
|
475
475
|
)
|
|
476
476
|
with closing(prepared_inputs):
|
|
477
|
-
for row in
|
|
477
|
+
for row in prepared_inputs:
|
|
478
478
|
yield _process_row(row)
|
|
479
|
+
processed_cb.relative_update(1)
|
|
479
480
|
|
|
480
481
|
self.teardown()
|
|
481
482
|
|
|
@@ -1680,7 +1680,7 @@ class DatasetQuery:
|
|
|
1680
1680
|
version: Optional[int] = None,
|
|
1681
1681
|
feature_schema: Optional[dict] = None,
|
|
1682
1682
|
description: Optional[str] = None,
|
|
1683
|
-
|
|
1683
|
+
attrs: Optional[list[str]] = None,
|
|
1684
1684
|
**kwargs,
|
|
1685
1685
|
) -> "Self":
|
|
1686
1686
|
"""Save the query as a dataset."""
|
|
@@ -1714,7 +1714,7 @@ class DatasetQuery:
|
|
|
1714
1714
|
feature_schema=feature_schema,
|
|
1715
1715
|
columns=columns,
|
|
1716
1716
|
description=description,
|
|
1717
|
-
|
|
1717
|
+
attrs=attrs,
|
|
1718
1718
|
**kwargs,
|
|
1719
1719
|
)
|
|
1720
1720
|
version = version or dataset.latest_version
|
|
@@ -290,13 +290,13 @@ class StudioClient:
|
|
|
290
290
|
name: str,
|
|
291
291
|
new_name: Optional[str] = None,
|
|
292
292
|
description: Optional[str] = None,
|
|
293
|
-
|
|
293
|
+
attrs: Optional[list[str]] = None,
|
|
294
294
|
) -> Response[DatasetInfoData]:
|
|
295
295
|
body = {
|
|
296
296
|
"new_name": new_name,
|
|
297
297
|
"dataset_name": name,
|
|
298
298
|
"description": description,
|
|
299
|
-
"
|
|
299
|
+
"attrs": attrs,
|
|
300
300
|
}
|
|
301
301
|
|
|
302
302
|
return self._send_request(
|
|
@@ -187,10 +187,10 @@ def edit_studio_dataset(
|
|
|
187
187
|
name: str,
|
|
188
188
|
new_name: Optional[str] = None,
|
|
189
189
|
description: Optional[str] = None,
|
|
190
|
-
|
|
190
|
+
attrs: Optional[list[str]] = None,
|
|
191
191
|
):
|
|
192
192
|
client = StudioClient(team=team_name)
|
|
193
|
-
response = client.edit_dataset(name, new_name, description,
|
|
193
|
+
response = client.edit_dataset(name, new_name, description, attrs)
|
|
194
194
|
if not response.ok:
|
|
195
195
|
raise DataChainError(response.message)
|
|
196
196
|
|
|
@@ -550,7 +550,7 @@ def animal_dataset(listed_bucket, cloud_test_catalog):
|
|
|
550
550
|
src_uri = cloud_test_catalog.src_uri
|
|
551
551
|
dataset = catalog.create_dataset_from_sources(name, [src_uri], recursive=True)
|
|
552
552
|
return catalog.update_dataset(
|
|
553
|
-
dataset, {"description": "animal dataset", "
|
|
553
|
+
dataset, {"description": "animal dataset", "attrs": ["cats", "dogs"]}
|
|
554
554
|
)
|
|
555
555
|
|
|
556
556
|
|
|
@@ -563,7 +563,7 @@ def dogs_dataset(listed_bucket, cloud_test_catalog):
|
|
|
563
563
|
name, [f"{src_uri}/dogs/*"], recursive=True
|
|
564
564
|
)
|
|
565
565
|
return catalog.update_dataset(
|
|
566
|
-
dataset, {"description": "dogs dataset", "
|
|
566
|
+
dataset, {"description": "dogs dataset", "attrs": ["dogs", "dataset"]}
|
|
567
567
|
)
|
|
568
568
|
|
|
569
569
|
|
|
@@ -576,7 +576,7 @@ def cats_dataset(listed_bucket, cloud_test_catalog):
|
|
|
576
576
|
name, [f"{src_uri}/cats/*"], recursive=True
|
|
577
577
|
)
|
|
578
578
|
return catalog.update_dataset(
|
|
579
|
-
dataset, {"description": "cats dataset", "
|
|
579
|
+
dataset, {"description": "cats dataset", "attrs": ["cats", "dataset"]}
|
|
580
580
|
)
|
|
581
581
|
|
|
582
582
|
|
|
@@ -586,7 +586,7 @@ def dataset_record():
|
|
|
586
586
|
id=1,
|
|
587
587
|
name=f"ds_{uuid.uuid4().hex}",
|
|
588
588
|
description="",
|
|
589
|
-
|
|
589
|
+
attrs=[],
|
|
590
590
|
versions=[],
|
|
591
591
|
status=1,
|
|
592
592
|
schema={},
|
|
@@ -651,7 +651,7 @@ def studio_datasets(requests_mock, studio_token):
|
|
|
651
651
|
"id": 1,
|
|
652
652
|
"name": "dogs",
|
|
653
653
|
"description": "dogs dataset",
|
|
654
|
-
"
|
|
654
|
+
"attrs": ["dogs", "dataset"],
|
|
655
655
|
"versions": [
|
|
656
656
|
{
|
|
657
657
|
"version": 1,
|
|
@@ -676,7 +676,7 @@ def studio_datasets(requests_mock, studio_token):
|
|
|
676
676
|
"id": 2,
|
|
677
677
|
"name": "cats",
|
|
678
678
|
"description": "cats dataset",
|
|
679
|
-
"
|
|
679
|
+
"attrs": ["cats", "dataset"],
|
|
680
680
|
"versions": [
|
|
681
681
|
{
|
|
682
682
|
"version": 1,
|
|
@@ -691,7 +691,7 @@ def studio_datasets(requests_mock, studio_token):
|
|
|
691
691
|
"id": 3,
|
|
692
692
|
"name": "both",
|
|
693
693
|
"description": "both dataset",
|
|
694
|
-
"
|
|
694
|
+
"attrs": ["both", "dataset"],
|
|
695
695
|
"versions": [
|
|
696
696
|
{
|
|
697
697
|
"version": 1,
|
|
@@ -560,23 +560,23 @@ def test_save(test_session):
|
|
|
560
560
|
name="new_name",
|
|
561
561
|
version=1,
|
|
562
562
|
description="new description",
|
|
563
|
-
|
|
563
|
+
attrs=["new_label", "old_label"],
|
|
564
564
|
)
|
|
565
565
|
|
|
566
566
|
ds = test_session.catalog.get_dataset("new_name")
|
|
567
567
|
assert ds.name == "new_name"
|
|
568
568
|
assert ds.description == "new description"
|
|
569
|
-
assert ds.
|
|
569
|
+
assert ds.attrs == ["new_label", "old_label"]
|
|
570
570
|
|
|
571
571
|
chain.save(
|
|
572
572
|
name="new_name",
|
|
573
573
|
description="updated description",
|
|
574
|
-
|
|
574
|
+
attrs=["new_label", "old_label", "new_label2"],
|
|
575
575
|
)
|
|
576
576
|
ds = test_session.catalog.get_dataset("new_name")
|
|
577
577
|
assert ds.name == "new_name"
|
|
578
578
|
assert ds.description == "updated description"
|
|
579
|
-
assert ds.
|
|
579
|
+
assert ds.attrs == ["new_label", "old_label", "new_label2"]
|
|
580
580
|
|
|
581
581
|
|
|
582
582
|
def test_show_nested_empty(capsys, test_session):
|
|
@@ -170,7 +170,7 @@ def test_create_dataset_from_sources(listed_bucket, cloud_test_catalog):
|
|
|
170
170
|
assert dataset.name == dataset_name
|
|
171
171
|
assert dataset.description is None
|
|
172
172
|
assert dataset.versions_values == [1]
|
|
173
|
-
assert dataset.
|
|
173
|
+
assert dataset.attrs == []
|
|
174
174
|
assert dataset.status == DatasetStatus.COMPLETE
|
|
175
175
|
|
|
176
176
|
assert dataset_version.status == DatasetStatus.COMPLETE
|
|
@@ -207,7 +207,7 @@ def test_create_dataset_from_sources_dataset(cloud_test_catalog, dogs_dataset):
|
|
|
207
207
|
assert dataset.name == dataset_name
|
|
208
208
|
assert dataset.description is None
|
|
209
209
|
assert dataset.versions_values == [1]
|
|
210
|
-
assert dataset.
|
|
210
|
+
assert dataset.attrs == []
|
|
211
211
|
assert dataset.status == DatasetStatus.COMPLETE
|
|
212
212
|
|
|
213
213
|
assert dataset_version.status == DatasetStatus.COMPLETE
|
|
@@ -546,14 +546,14 @@ def test_edit_dataset(cloud_test_catalog, dogs_dataset):
|
|
|
546
546
|
dogs_dataset.name,
|
|
547
547
|
new_name=dataset_new_name,
|
|
548
548
|
description="new description",
|
|
549
|
-
|
|
549
|
+
attrs=["cats", "birds"],
|
|
550
550
|
)
|
|
551
551
|
|
|
552
552
|
dataset = catalog.get_dataset(dataset_new_name)
|
|
553
553
|
assert dataset.versions_values == [1]
|
|
554
554
|
assert dataset.name == dataset_new_name
|
|
555
555
|
assert dataset.description == "new description"
|
|
556
|
-
assert dataset.
|
|
556
|
+
assert dataset.attrs == ["cats", "birds"]
|
|
557
557
|
|
|
558
558
|
# check if dataset tables are renamed correctly
|
|
559
559
|
old_dataset_table_name = catalog.warehouse.dataset_table_name(dataset_old_name, 1)
|
|
@@ -589,7 +589,7 @@ def test_edit_dataset_same_name(cloud_test_catalog, dogs_dataset):
|
|
|
589
589
|
)
|
|
590
590
|
|
|
591
591
|
|
|
592
|
-
def
|
|
592
|
+
def test_edit_dataset_remove_attrs_and_description(cloud_test_catalog, dogs_dataset):
|
|
593
593
|
dataset_new_name = uuid.uuid4().hex
|
|
594
594
|
catalog = cloud_test_catalog.catalog
|
|
595
595
|
|
|
@@ -597,14 +597,14 @@ def test_edit_dataset_remove_labels_and_description(cloud_test_catalog, dogs_dat
|
|
|
597
597
|
dogs_dataset.name,
|
|
598
598
|
new_name=dataset_new_name,
|
|
599
599
|
description="",
|
|
600
|
-
|
|
600
|
+
attrs=[],
|
|
601
601
|
)
|
|
602
602
|
|
|
603
603
|
dataset = catalog.get_dataset(dataset_new_name)
|
|
604
604
|
assert dataset.versions_values == [1]
|
|
605
605
|
assert dataset.name == dataset_new_name
|
|
606
606
|
assert dataset.description == ""
|
|
607
|
-
assert dataset.
|
|
607
|
+
assert dataset.attrs == []
|
|
608
608
|
|
|
609
609
|
|
|
610
610
|
def test_ls_dataset_rows(cloud_test_catalog, dogs_dataset):
|
|
@@ -247,7 +247,7 @@ def test_studio_edit_dataset(capsys, mocker):
|
|
|
247
247
|
"new_name": "new-name",
|
|
248
248
|
"team_name": "team_name",
|
|
249
249
|
"description": None,
|
|
250
|
-
"
|
|
250
|
+
"attrs": None,
|
|
251
251
|
}
|
|
252
252
|
|
|
253
253
|
# With all arguments
|
|
@@ -261,8 +261,8 @@ def test_studio_edit_dataset(capsys, mocker):
|
|
|
261
261
|
"new-name",
|
|
262
262
|
"--description",
|
|
263
263
|
"description",
|
|
264
|
-
"--
|
|
265
|
-
"
|
|
264
|
+
"--attrs",
|
|
265
|
+
"attr1",
|
|
266
266
|
"--team",
|
|
267
267
|
"team_name",
|
|
268
268
|
"--studio",
|
|
@@ -275,7 +275,7 @@ def test_studio_edit_dataset(capsys, mocker):
|
|
|
275
275
|
"dataset_name": "name",
|
|
276
276
|
"new_name": "new-name",
|
|
277
277
|
"description": "description",
|
|
278
|
-
"
|
|
278
|
+
"attrs": ["attr1"],
|
|
279
279
|
"team_name": "team_name",
|
|
280
280
|
}
|
|
281
281
|
|
|
@@ -356,6 +356,41 @@ def test_datasets_in_memory():
|
|
|
356
356
|
assert datasets[0].num_objects == 6
|
|
357
357
|
|
|
358
358
|
|
|
359
|
+
@pytest.mark.parametrize(
|
|
360
|
+
"attrs,result",
|
|
361
|
+
[
|
|
362
|
+
(["number"], ["evens", "primes"]),
|
|
363
|
+
(["num=prime"], ["primes"]),
|
|
364
|
+
(["num=even"], ["evens"]),
|
|
365
|
+
(["num=*"], ["evens", "primes"]),
|
|
366
|
+
(["num=*", "small"], ["primes"]),
|
|
367
|
+
(["letter"], ["letters"]),
|
|
368
|
+
(["missing"], []),
|
|
369
|
+
(["num=*", "missing"], []),
|
|
370
|
+
(None, ["evens", "letters", "primes"]),
|
|
371
|
+
([], ["evens", "letters", "primes"]),
|
|
372
|
+
],
|
|
373
|
+
)
|
|
374
|
+
def test_datasets_filtering(test_session, attrs, result):
|
|
375
|
+
ds = dc.datasets(column="dataset", session=test_session)
|
|
376
|
+
datasets = [d for d in ds.collect("dataset") if d.name == "fibonacci"]
|
|
377
|
+
assert len(datasets) == 0
|
|
378
|
+
|
|
379
|
+
dc.read_values(num=[1, 2, 3], session=test_session).save(
|
|
380
|
+
"primes", attrs=["number", "num=prime", "small"]
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
dc.read_values(num=[2, 4, 6], session=test_session).save(
|
|
384
|
+
"evens", attrs=["number", "num=even"]
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
dc.read_values(letter=["a", "b", "c"], session=test_session).save(
|
|
388
|
+
"letters", attrs=["letter"]
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
assert sorted(dc.datasets(attrs=attrs).collect("name")) == sorted(result)
|
|
392
|
+
|
|
393
|
+
|
|
359
394
|
def test_listings(test_session, tmp_dir):
|
|
360
395
|
df = pd.DataFrame(DF_DATA)
|
|
361
396
|
df.to_parquet(tmp_dir / "df.parquet")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|