datachain 0.31.3__tar.gz → 0.32.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.31.3 → datachain-0.32.0}/PKG-INFO +1 -1
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/__init__.py +2 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/catalog/catalog.py +22 -58
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/data_storage/metastore.py +79 -15
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/error.py +8 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/file.py +95 -18
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/namespaces.py +57 -2
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/projects.py +47 -1
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/namespace.py +19 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain.egg-info/PKG-INFO +1 -1
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_file.py +68 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_namespace.py +45 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_project.py +60 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_query.py +3 -22
- {datachain-0.31.3 → datachain-0.32.0}/.cruft.json +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/.gitattributes +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/.github/codecov.yaml +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/.github/dependabot.yml +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/.github/workflows/release.yml +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/.github/workflows/tests.yml +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/.gitignore +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/.pre-commit-config.yaml +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/LICENSE +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/README.rst +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/api_hooks.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/assets/datachain.svg +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/commands/auth/login.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/commands/auth/logout.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/commands/auth/team.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/commands/auth/token.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/commands/index.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/commands/job/cancel.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/commands/job/clusters.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/commands/job/logs.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/commands/job/ls.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/commands/job/run.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/contributing.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/examples.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/guide/db_migrations.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/guide/delta.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/guide/env.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/guide/index.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/guide/namespaces.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/guide/processing.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/guide/remotes.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/guide/retry.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/index.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/overrides/main.html +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/quick-start.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/references/data-types/file.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/references/data-types/index.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/references/data-types/pose.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/references/data-types/segment.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/references/datachain.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/references/func.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/references/functions/aggregate.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/references/functions/array.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/references/functions/conditional.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/references/functions/numeric.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/references/functions/path.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/references/functions/random.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/references/functions/string.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/references/functions/window.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/references/index.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/references/toolkit.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/references/torch.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/references/udf.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/studio/api/.gitkeep +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/templates/main.dot +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/templates/operation.dot +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/templates/responses.def +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/docs/tutorials.md +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/examples/get_started/nested_datamodel.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/examples/multimodal/audio-to-text.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/examples/multimodal/wds.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/mkdocs.yml +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/noxfile.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/pyproject.toml +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/setup.cfg +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/__main__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/asyn.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/cache.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/cli/utils.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/client/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/client/azure.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/client/gcs.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/client/hf.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/client/local.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/client/s3.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/config.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/dataset.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/delta.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/fs/reference.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/fs/utils.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/func/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/func/array.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/func/base.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/func/conditional.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/func/func.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/func/numeric.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/func/path.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/func/random.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/func/string.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/func/window.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/job.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/audio.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/clip.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/dc/datachain.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/dc/storage_pattern.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/hf.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/image.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/listing.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/settings.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/tar.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/text.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/udf.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/utils.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/video.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/listing.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/model/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/model/bbox.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/model/pose.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/model/segment.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/model/utils.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/node.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/progress.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/project.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/py.typed +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/query/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/query/batch.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/query/dataset.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/query/metrics.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/query/params.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/query/queue.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/query/schema.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/query/session.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/query/udf.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/query/utils.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/remote/studio.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/script_meta.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/semver.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/sql/postgresql_dialect.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/sql/postgresql_types.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/sql/types.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/sql/utils.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/studio.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/telemetry.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain/utils.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain.egg-info/SOURCES.txt +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/conftest.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/data.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/examples/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/examples/test_examples.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/examples/wds_data.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/data/lena.jpg +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/functions/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/functions/test_array.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/functions/test_path.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/functions/test_random.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/functions/test_string.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/model/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_audio.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_batching.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_catalog.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_client.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_data_storage.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_datachain.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_datasets.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_delta.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_hf.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_image.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_listing.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_ls.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_metastore.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_metrics.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_mutate.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_pull.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_pytorch.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_query.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_read_database.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_read_dataset_remote.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_retry.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_session.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_storage_pattern.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_studio_datetime_parsing.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_to_database.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_toolkit.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_video.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/func/test_warehouse.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/scripts/feature_class.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/test_atomicity.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/test_cli_e2e.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/test_cli_studio.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/test_import_time.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/test_query_e2e.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/test_telemetry.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_audio.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_datachain.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_partition_by.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_settings.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_storage_pattern.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/model/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_asyn.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_cache.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_catalog.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_cli_datasets.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_client.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_config.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_dataset.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_func.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_listing.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_metastore.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_query_params.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_semver.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_serializer.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_session.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_utils.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.31.3 → datachain-0.32.0}/tests/utils.py +0 -0
|
@@ -37,6 +37,7 @@ from datachain.lib.file import (
|
|
|
37
37
|
VideoFrame,
|
|
38
38
|
)
|
|
39
39
|
from datachain.lib.model_store import ModelStore
|
|
40
|
+
from datachain.lib.namespaces import delete as delete_namespace
|
|
40
41
|
from datachain.lib.projects import create as create_project
|
|
41
42
|
from datachain.lib.udf import Aggregator, Generator, Mapper
|
|
42
43
|
from datachain.lib.utils import AbstractUDF, DataChainError
|
|
@@ -74,6 +75,7 @@ __all__ = [
|
|
|
74
75
|
"create_project",
|
|
75
76
|
"datasets",
|
|
76
77
|
"delete_dataset",
|
|
78
|
+
"delete_namespace",
|
|
77
79
|
"is_chain_type",
|
|
78
80
|
"is_studio",
|
|
79
81
|
"listings",
|
|
@@ -144,26 +144,19 @@ def shutdown_process(
|
|
|
144
144
|
return proc.wait()
|
|
145
145
|
|
|
146
146
|
|
|
147
|
-
def
|
|
147
|
+
def _process_stream(stream: "IO[bytes]", callback: Callable[[str], None]) -> None:
|
|
148
148
|
buffer = b""
|
|
149
|
+
while byt := stream.read(1): # Read one byte at a time
|
|
150
|
+
buffer += byt
|
|
149
151
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
buffer += byt
|
|
153
|
-
|
|
154
|
-
if byt in (b"\n", b"\r"): # Check for newline or carriage return
|
|
155
|
-
line = buffer.decode("utf-8", errors="replace")
|
|
156
|
-
callback(line)
|
|
157
|
-
buffer = b"" # Clear buffer for the next line
|
|
158
|
-
|
|
159
|
-
if buffer: # Handle any remaining data in the buffer
|
|
160
|
-
line = buffer.decode("utf-8", errors="replace")
|
|
152
|
+
if byt in (b"\n", b"\r"): # Check for newline or carriage return
|
|
153
|
+
line = buffer.decode("utf-8")
|
|
161
154
|
callback(line)
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
155
|
+
buffer = b"" # Clear buffer for next line
|
|
156
|
+
|
|
157
|
+
if buffer: # Handle any remaining data in the buffer
|
|
158
|
+
line = buffer.decode("utf-8")
|
|
159
|
+
callback(line)
|
|
167
160
|
|
|
168
161
|
|
|
169
162
|
class DatasetRowsFetcher(NodesThreadPool):
|
|
@@ -1767,13 +1760,13 @@ class Catalog:
|
|
|
1767
1760
|
recursive=recursive,
|
|
1768
1761
|
)
|
|
1769
1762
|
|
|
1770
|
-
@staticmethod
|
|
1771
1763
|
def query(
|
|
1764
|
+
self,
|
|
1772
1765
|
query_script: str,
|
|
1773
1766
|
env: Optional[Mapping[str, str]] = None,
|
|
1774
1767
|
python_executable: str = sys.executable,
|
|
1775
|
-
|
|
1776
|
-
|
|
1768
|
+
capture_output: bool = False,
|
|
1769
|
+
output_hook: Callable[[str], None] = noop,
|
|
1777
1770
|
params: Optional[dict[str, str]] = None,
|
|
1778
1771
|
job_id: Optional[str] = None,
|
|
1779
1772
|
interrupt_timeout: Optional[int] = None,
|
|
@@ -1788,18 +1781,13 @@ class Catalog:
|
|
|
1788
1781
|
},
|
|
1789
1782
|
)
|
|
1790
1783
|
popen_kwargs: dict[str, Any] = {}
|
|
1791
|
-
|
|
1792
|
-
|
|
1793
|
-
popen_kwargs = {"stdout": subprocess.PIPE}
|
|
1794
|
-
if stderr_callback is not None:
|
|
1795
|
-
popen_kwargs["stderr"] = subprocess.PIPE
|
|
1784
|
+
if capture_output:
|
|
1785
|
+
popen_kwargs = {"stdout": subprocess.PIPE, "stderr": subprocess.STDOUT}
|
|
1796
1786
|
|
|
1797
1787
|
def raise_termination_signal(sig: int, _: Any) -> NoReturn:
|
|
1798
1788
|
raise TerminationSignal(sig)
|
|
1799
1789
|
|
|
1800
|
-
|
|
1801
|
-
stderr_thread: Optional[Thread] = None
|
|
1802
|
-
|
|
1790
|
+
thread: Optional[Thread] = None
|
|
1803
1791
|
with subprocess.Popen(cmd, env=env, **popen_kwargs) as proc: # noqa: S603
|
|
1804
1792
|
logger.info("Starting process %s", proc.pid)
|
|
1805
1793
|
|
|
@@ -1813,20 +1801,10 @@ class Catalog:
|
|
|
1813
1801
|
orig_sigterm_handler = signal.getsignal(signal.SIGTERM)
|
|
1814
1802
|
signal.signal(signal.SIGTERM, raise_termination_signal)
|
|
1815
1803
|
try:
|
|
1816
|
-
if
|
|
1817
|
-
|
|
1818
|
-
|
|
1819
|
-
|
|
1820
|
-
daemon=True,
|
|
1821
|
-
)
|
|
1822
|
-
stdout_thread.start()
|
|
1823
|
-
if stderr_callback is not None:
|
|
1824
|
-
stderr_thread = Thread(
|
|
1825
|
-
target=process_output,
|
|
1826
|
-
args=(proc.stderr, stderr_callback),
|
|
1827
|
-
daemon=True,
|
|
1828
|
-
)
|
|
1829
|
-
stderr_thread.start()
|
|
1804
|
+
if capture_output:
|
|
1805
|
+
args = (proc.stdout, output_hook)
|
|
1806
|
+
thread = Thread(target=_process_stream, args=args, daemon=True)
|
|
1807
|
+
thread.start()
|
|
1830
1808
|
|
|
1831
1809
|
proc.wait()
|
|
1832
1810
|
except TerminationSignal as exc:
|
|
@@ -1844,22 +1822,8 @@ class Catalog:
|
|
|
1844
1822
|
finally:
|
|
1845
1823
|
signal.signal(signal.SIGTERM, orig_sigterm_handler)
|
|
1846
1824
|
signal.signal(signal.SIGINT, orig_sigint_handler)
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
if stdout_thread is not None:
|
|
1850
|
-
stdout_thread.join(timeout=thread_join_timeout_seconds)
|
|
1851
|
-
if stdout_thread.is_alive():
|
|
1852
|
-
logger.warning(
|
|
1853
|
-
"stdout thread is still alive after %s seconds",
|
|
1854
|
-
thread_join_timeout_seconds,
|
|
1855
|
-
)
|
|
1856
|
-
if stderr_thread is not None:
|
|
1857
|
-
stderr_thread.join(timeout=thread_join_timeout_seconds)
|
|
1858
|
-
if stderr_thread.is_alive():
|
|
1859
|
-
logger.warning(
|
|
1860
|
-
"stderr thread is still alive after %s seconds",
|
|
1861
|
-
thread_join_timeout_seconds,
|
|
1862
|
-
)
|
|
1825
|
+
if thread:
|
|
1826
|
+
thread.join() # wait for the reader thread
|
|
1863
1827
|
|
|
1864
1828
|
logger.info("Process %s exited with return code %s", proc.pid, proc.returncode)
|
|
1865
1829
|
if proc.returncode in (
|
|
@@ -22,6 +22,7 @@ from sqlalchemy import (
|
|
|
22
22
|
UniqueConstraint,
|
|
23
23
|
select,
|
|
24
24
|
)
|
|
25
|
+
from sqlalchemy.sql import func as f
|
|
25
26
|
|
|
26
27
|
from datachain.data_storage import JobQueryType, JobStatus
|
|
27
28
|
from datachain.data_storage.serializer import Serializable
|
|
@@ -37,7 +38,9 @@ from datachain.dataset import (
|
|
|
37
38
|
from datachain.error import (
|
|
38
39
|
DatasetNotFoundError,
|
|
39
40
|
DatasetVersionNotFoundError,
|
|
41
|
+
NamespaceDeleteNotAllowedError,
|
|
40
42
|
NamespaceNotFoundError,
|
|
43
|
+
ProjectDeleteNotAllowedError,
|
|
41
44
|
ProjectNotFoundError,
|
|
42
45
|
TableMissingError,
|
|
43
46
|
)
|
|
@@ -141,6 +144,10 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
141
144
|
def get_namespace(self, name: str, conn=None) -> Namespace:
|
|
142
145
|
"""Gets a single namespace by name"""
|
|
143
146
|
|
|
147
|
+
@abstractmethod
|
|
148
|
+
def remove_namespace(self, namespace_id: int, conn=None) -> None:
|
|
149
|
+
"""Removes a single namespace by id"""
|
|
150
|
+
|
|
144
151
|
@abstractmethod
|
|
145
152
|
def list_namespaces(self, conn=None) -> list[Namespace]:
|
|
146
153
|
"""Gets a list of all namespaces"""
|
|
@@ -190,10 +197,30 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
190
197
|
It also creates project if not found and create flag is set to True.
|
|
191
198
|
"""
|
|
192
199
|
|
|
200
|
+
def is_default_project(self, project_name: str, namespace_name: str) -> bool:
|
|
201
|
+
return (
|
|
202
|
+
project_name == self.default_project_name
|
|
203
|
+
and namespace_name == self.default_namespace_name
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
def is_listing_project(self, project_name: str, namespace_name: str) -> bool:
|
|
207
|
+
return (
|
|
208
|
+
project_name == self.listing_project_name
|
|
209
|
+
and namespace_name == self.system_namespace_name
|
|
210
|
+
)
|
|
211
|
+
|
|
193
212
|
@abstractmethod
|
|
194
213
|
def get_project_by_id(self, project_id: int, conn=None) -> Project:
|
|
195
214
|
"""Gets a single project by id"""
|
|
196
215
|
|
|
216
|
+
@abstractmethod
|
|
217
|
+
def count_projects(self, namespace_id: Optional[int] = None) -> int:
|
|
218
|
+
"""Counts projects in some namespace or in general."""
|
|
219
|
+
|
|
220
|
+
@abstractmethod
|
|
221
|
+
def remove_project(self, project_id: int, conn=None) -> None:
|
|
222
|
+
"""Removes a single project by id"""
|
|
223
|
+
|
|
197
224
|
@abstractmethod
|
|
198
225
|
def list_projects(self, namespace_id: Optional[int], conn=None) -> list[Project]:
|
|
199
226
|
"""Gets list of projects in some namespace or in general (in all namespaces)"""
|
|
@@ -270,6 +297,10 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
270
297
|
) -> Iterator[DatasetListRecord]:
|
|
271
298
|
"""Lists all datasets in some project or in all projects."""
|
|
272
299
|
|
|
300
|
+
@abstractmethod
|
|
301
|
+
def count_datasets(self, project_id: Optional[int] = None) -> int:
|
|
302
|
+
"""Counts datasets in some project or in all projects."""
|
|
303
|
+
|
|
273
304
|
@abstractmethod
|
|
274
305
|
def list_datasets_by_prefix(
|
|
275
306
|
self, prefix: str, project_id: Optional[int] = None
|
|
@@ -735,6 +766,18 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
735
766
|
|
|
736
767
|
return self.get_namespace(name)
|
|
737
768
|
|
|
769
|
+
def remove_namespace(self, namespace_id: int, conn=None) -> None:
|
|
770
|
+
num_projects = self.count_projects(namespace_id)
|
|
771
|
+
if num_projects > 0:
|
|
772
|
+
raise NamespaceDeleteNotAllowedError(
|
|
773
|
+
f"Namespace cannot be removed. It contains {num_projects} project(s). "
|
|
774
|
+
"Please remove the project(s) first."
|
|
775
|
+
)
|
|
776
|
+
|
|
777
|
+
n = self._namespaces
|
|
778
|
+
with self.db.transaction():
|
|
779
|
+
self.db.execute(self._namespaces_delete().where(n.c.id == namespace_id))
|
|
780
|
+
|
|
738
781
|
def get_namespace(self, name: str, conn=None) -> Namespace:
|
|
739
782
|
"""Gets a single namespace by name"""
|
|
740
783
|
n = self._namespaces
|
|
@@ -796,18 +839,6 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
796
839
|
|
|
797
840
|
return self.get_project(name, namespace.name)
|
|
798
841
|
|
|
799
|
-
def _is_listing_project(self, project_name: str, namespace_name: str) -> bool:
|
|
800
|
-
return (
|
|
801
|
-
project_name == self.listing_project_name
|
|
802
|
-
and namespace_name == self.system_namespace_name
|
|
803
|
-
)
|
|
804
|
-
|
|
805
|
-
def _is_default_project(self, project_name: str, namespace_name: str) -> bool:
|
|
806
|
-
return (
|
|
807
|
-
project_name == self.default_project_name
|
|
808
|
-
and namespace_name == self.default_namespace_name
|
|
809
|
-
)
|
|
810
|
-
|
|
811
842
|
def get_project(
|
|
812
843
|
self, name: str, namespace_name: str, create: bool = False, conn=None
|
|
813
844
|
) -> Project:
|
|
@@ -816,7 +847,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
816
847
|
p = self._projects
|
|
817
848
|
validate = True
|
|
818
849
|
|
|
819
|
-
if self.
|
|
850
|
+
if self.is_listing_project(name, namespace_name) or self.is_default_project(
|
|
820
851
|
name, namespace_name
|
|
821
852
|
):
|
|
822
853
|
# we are always creating default and listing projects if they don't exist
|
|
@@ -858,7 +889,31 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
858
889
|
raise ProjectNotFoundError(f"Project with id {project_id} not found.")
|
|
859
890
|
return self.project_class.parse(*rows[0])
|
|
860
891
|
|
|
861
|
-
def
|
|
892
|
+
def count_projects(self, namespace_id: Optional[int] = None) -> int:
|
|
893
|
+
p = self._projects
|
|
894
|
+
query = self._projects_select()
|
|
895
|
+
if namespace_id:
|
|
896
|
+
query = query.where(p.c.namespace_id == namespace_id)
|
|
897
|
+
|
|
898
|
+
query = select(f.count(1)).select_from(query.subquery())
|
|
899
|
+
|
|
900
|
+
return next(self.db.execute(query))[0]
|
|
901
|
+
|
|
902
|
+
def remove_project(self, project_id: int, conn=None) -> None:
|
|
903
|
+
num_datasets = self.count_datasets(project_id)
|
|
904
|
+
if num_datasets > 0:
|
|
905
|
+
raise ProjectDeleteNotAllowedError(
|
|
906
|
+
f"Project cannot be removed. It contains {num_datasets} dataset(s). "
|
|
907
|
+
"Please remove the dataset(s) first."
|
|
908
|
+
)
|
|
909
|
+
|
|
910
|
+
p = self._projects
|
|
911
|
+
with self.db.transaction():
|
|
912
|
+
self.db.execute(self._projects_delete().where(p.c.id == project_id))
|
|
913
|
+
|
|
914
|
+
def list_projects(
|
|
915
|
+
self, namespace_id: Optional[int] = None, conn=None
|
|
916
|
+
) -> list[Project]:
|
|
862
917
|
"""
|
|
863
918
|
Gets a list of projects inside some namespace, or in all namespaces
|
|
864
919
|
"""
|
|
@@ -1189,7 +1244,6 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1189
1244
|
def list_datasets(
|
|
1190
1245
|
self, project_id: Optional[int] = None
|
|
1191
1246
|
) -> Iterator["DatasetListRecord"]:
|
|
1192
|
-
"""Lists all datasets."""
|
|
1193
1247
|
d = self._datasets
|
|
1194
1248
|
query = self._base_list_datasets_query().order_by(
|
|
1195
1249
|
self._datasets.c.name, self._datasets_versions.c.version
|
|
@@ -1198,6 +1252,16 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1198
1252
|
query = query.where(d.c.project_id == project_id)
|
|
1199
1253
|
yield from self._parse_dataset_list(self.db.execute(query))
|
|
1200
1254
|
|
|
1255
|
+
def count_datasets(self, project_id: Optional[int] = None) -> int:
|
|
1256
|
+
d = self._datasets
|
|
1257
|
+
query = self._datasets_select()
|
|
1258
|
+
if project_id:
|
|
1259
|
+
query = query.where(d.c.project_id == project_id)
|
|
1260
|
+
|
|
1261
|
+
query = select(f.count(1)).select_from(query.subquery())
|
|
1262
|
+
|
|
1263
|
+
return next(self.db.execute(query))[0]
|
|
1264
|
+
|
|
1201
1265
|
def list_datasets_by_prefix(
|
|
1202
1266
|
self, prefix: str, project_id: Optional[int] = None, conn=None
|
|
1203
1267
|
) -> Iterator["DatasetListRecord"]:
|
|
@@ -34,6 +34,14 @@ class ProjectCreateNotAllowedError(NotAllowedError):
|
|
|
34
34
|
pass
|
|
35
35
|
|
|
36
36
|
|
|
37
|
+
class ProjectDeleteNotAllowedError(NotAllowedError):
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class NamespaceDeleteNotAllowedError(NotAllowedError):
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
|
|
37
45
|
class ProjectNotFoundError(NotFoundError):
|
|
38
46
|
pass
|
|
39
47
|
|
|
@@ -35,6 +35,7 @@ if TYPE_CHECKING:
|
|
|
35
35
|
from datachain.catalog import Catalog
|
|
36
36
|
from datachain.client.fsspec import Client
|
|
37
37
|
from datachain.dataset import RowDict
|
|
38
|
+
from datachain.query.session import Session
|
|
38
39
|
|
|
39
40
|
sha256 = partial(hashlib.sha256, usedforsecurity=False)
|
|
40
41
|
|
|
@@ -252,6 +253,15 @@ class File(DataModel):
|
|
|
252
253
|
"last_modified",
|
|
253
254
|
]
|
|
254
255
|
|
|
256
|
+
# Allowed kwargs we forward to TextIOWrapper
|
|
257
|
+
_TEXT_WRAPPER_ALLOWED: ClassVar[tuple[str, ...]] = (
|
|
258
|
+
"encoding",
|
|
259
|
+
"errors",
|
|
260
|
+
"newline",
|
|
261
|
+
"line_buffering",
|
|
262
|
+
"write_through",
|
|
263
|
+
)
|
|
264
|
+
|
|
255
265
|
@staticmethod
|
|
256
266
|
def _validate_dict(
|
|
257
267
|
v: Optional[Union[str, dict, list[dict]]],
|
|
@@ -328,7 +338,6 @@ class File(DataModel):
|
|
|
328
338
|
from datachain.catalog.loader import get_catalog
|
|
329
339
|
|
|
330
340
|
catalog = get_catalog()
|
|
331
|
-
|
|
332
341
|
from datachain.client.fsspec import Client
|
|
333
342
|
|
|
334
343
|
client_cls = Client.get_implementation(path)
|
|
@@ -341,6 +350,27 @@ class File(DataModel):
|
|
|
341
350
|
file._set_stream(catalog)
|
|
342
351
|
return file
|
|
343
352
|
|
|
353
|
+
@classmethod
|
|
354
|
+
def at(cls, uri: str, session: Optional["Session"] = None) -> "Self":
|
|
355
|
+
"""Construct a File from a full URI in one call.
|
|
356
|
+
|
|
357
|
+
Example:
|
|
358
|
+
file = File.at("s3://bucket/path/to/output.png")
|
|
359
|
+
with file.open("wb") as f: ...
|
|
360
|
+
"""
|
|
361
|
+
from datachain.client.fsspec import Client
|
|
362
|
+
from datachain.query.session import Session
|
|
363
|
+
|
|
364
|
+
if session is None:
|
|
365
|
+
session = Session.get()
|
|
366
|
+
catalog = session.catalog
|
|
367
|
+
|
|
368
|
+
client_cls = Client.get_implementation(uri)
|
|
369
|
+
source, rel_path = client_cls.split_url(uri)
|
|
370
|
+
file = cls(source=client_cls.get_uri(source), path=rel_path)
|
|
371
|
+
file._set_stream(catalog)
|
|
372
|
+
return file
|
|
373
|
+
|
|
344
374
|
@classmethod
|
|
345
375
|
def _from_row(cls, row: "RowDict") -> "Self":
|
|
346
376
|
return cls(**{key: row[key] for key in cls._datachain_column_types})
|
|
@@ -354,28 +384,70 @@ class File(DataModel):
|
|
|
354
384
|
return str(PurePosixPath(self.path).parent)
|
|
355
385
|
|
|
356
386
|
@contextmanager
|
|
357
|
-
def open(self, mode:
|
|
358
|
-
"""Open the file and return a file object.
|
|
359
|
-
if self.location:
|
|
360
|
-
with VFileRegistry.open(self, self.location) as f: # type: ignore[arg-type]
|
|
361
|
-
yield f
|
|
387
|
+
def open(self, mode: str = "rb", **open_kwargs) -> Iterator[Any]:
|
|
388
|
+
"""Open the file and return a file-like object.
|
|
362
389
|
|
|
363
|
-
|
|
390
|
+
Supports both read ("rb", "r") and write modes (e.g. "wb", "w", "ab").
|
|
391
|
+
When opened in a write mode, metadata is refreshed after closing.
|
|
392
|
+
"""
|
|
393
|
+
writing = any(ch in mode for ch in "wax+")
|
|
394
|
+
if self.location and writing:
|
|
395
|
+
raise VFileError(
|
|
396
|
+
"Writing to virtual file is not supported",
|
|
397
|
+
self.source,
|
|
398
|
+
self.path,
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
if self._catalog is None:
|
|
402
|
+
raise RuntimeError("Cannot open file: catalog is not set")
|
|
403
|
+
|
|
404
|
+
client: Client = self._catalog.get_client(self.source)
|
|
405
|
+
|
|
406
|
+
if not writing:
|
|
407
|
+
if self.location:
|
|
408
|
+
with VFileRegistry.open(self, self.location) as f: # type: ignore[arg-type]
|
|
409
|
+
yield self._wrap_text(f, mode, open_kwargs)
|
|
410
|
+
return
|
|
364
411
|
if self._caching_enabled:
|
|
365
412
|
self.ensure_cached()
|
|
366
|
-
client: Client = self._catalog.get_client(self.source)
|
|
367
413
|
with client.open_object(
|
|
368
414
|
self, use_cache=self._caching_enabled, cb=self._download_cb
|
|
369
415
|
) as f:
|
|
370
|
-
yield
|
|
416
|
+
yield self._wrap_text(f, mode, open_kwargs)
|
|
417
|
+
return
|
|
418
|
+
|
|
419
|
+
# write path
|
|
420
|
+
full_path = client.get_full_path(self.get_path_normalized())
|
|
421
|
+
with client.fs.open(full_path, mode, **open_kwargs) as f:
|
|
422
|
+
yield self._wrap_text(f, mode, open_kwargs)
|
|
423
|
+
|
|
424
|
+
# refresh metadata
|
|
425
|
+
info = client.fs.info(full_path)
|
|
426
|
+
refreshed = client.info_to_file(info, self.get_path_normalized())
|
|
427
|
+
for k, v in refreshed.model_dump().items():
|
|
428
|
+
setattr(self, k, v)
|
|
429
|
+
|
|
430
|
+
def _wrap_text(self, f: Any, mode: str, open_kwargs: dict[str, Any]) -> Any:
|
|
431
|
+
"""Return stream possibly wrapped for text."""
|
|
432
|
+
if "b" in mode or isinstance(f, io.TextIOBase):
|
|
433
|
+
return f
|
|
434
|
+
filtered = {
|
|
435
|
+
k: open_kwargs[k] for k in self._TEXT_WRAPPER_ALLOWED if k in open_kwargs
|
|
436
|
+
}
|
|
437
|
+
return io.TextIOWrapper(f, **filtered)
|
|
371
438
|
|
|
372
439
|
def read_bytes(self, length: int = -1):
|
|
373
440
|
"""Returns file contents as bytes."""
|
|
374
441
|
with self.open() as stream:
|
|
375
442
|
return stream.read(length)
|
|
376
443
|
|
|
377
|
-
def read_text(self):
|
|
378
|
-
"""
|
|
444
|
+
def read_text(self, **open_kwargs):
|
|
445
|
+
"""Return file contents decoded as text.
|
|
446
|
+
|
|
447
|
+
**open_kwargs : Any
|
|
448
|
+
Extra keyword arguments forwarded to ``open(mode="r", ...)``
|
|
449
|
+
(e.g. ``encoding="utf-8"``, ``errors="ignore"``)
|
|
450
|
+
"""
|
|
379
451
|
if self.location:
|
|
380
452
|
raise VFileError(
|
|
381
453
|
"Reading text from virtual file is not supported",
|
|
@@ -383,7 +455,7 @@ class File(DataModel):
|
|
|
383
455
|
self.path,
|
|
384
456
|
)
|
|
385
457
|
|
|
386
|
-
with self.open(mode="r") as stream:
|
|
458
|
+
with self.open(mode="r", **open_kwargs) as stream:
|
|
387
459
|
return stream.read()
|
|
388
460
|
|
|
389
461
|
def read(self, length: int = -1):
|
|
@@ -701,14 +773,19 @@ class TextFile(File):
|
|
|
701
773
|
"""`DataModel` for reading text files."""
|
|
702
774
|
|
|
703
775
|
@contextmanager
|
|
704
|
-
def open(self, mode:
|
|
705
|
-
"""Open the file and return a file object
|
|
706
|
-
|
|
776
|
+
def open(self, mode: str = "r", **open_kwargs) -> Iterator[Any]:
|
|
777
|
+
"""Open the file and return a file-like object.
|
|
778
|
+
Default to text mode"""
|
|
779
|
+
with super().open(mode=mode, **open_kwargs) as stream:
|
|
707
780
|
yield stream
|
|
708
781
|
|
|
709
|
-
def read_text(self):
|
|
710
|
-
"""
|
|
711
|
-
|
|
782
|
+
def read_text(self, **open_kwargs):
|
|
783
|
+
"""Return file contents as text.
|
|
784
|
+
|
|
785
|
+
**open_kwargs : Any
|
|
786
|
+
Extra keyword arguments forwarded to ``open()`` (e.g. encoding).
|
|
787
|
+
"""
|
|
788
|
+
with self.open(**open_kwargs) as stream:
|
|
712
789
|
return stream.read()
|
|
713
790
|
|
|
714
791
|
def save(self, destination: str, client_config: Optional[dict] = None):
|
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
from typing import Optional
|
|
2
2
|
|
|
3
|
-
from datachain.error import
|
|
4
|
-
|
|
3
|
+
from datachain.error import (
|
|
4
|
+
NamespaceCreateNotAllowedError,
|
|
5
|
+
NamespaceDeleteNotAllowedError,
|
|
6
|
+
)
|
|
7
|
+
from datachain.lib.projects import delete as delete_project
|
|
8
|
+
from datachain.namespace import Namespace, parse_name
|
|
5
9
|
from datachain.query import Session
|
|
6
10
|
|
|
7
11
|
|
|
@@ -71,3 +75,54 @@ def ls(session: Optional[Session] = None) -> list[Namespace]:
|
|
|
71
75
|
```
|
|
72
76
|
"""
|
|
73
77
|
return Session.get(session).catalog.metastore.list_namespaces()
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def delete(name: str, session: Optional[Session]) -> None:
|
|
81
|
+
"""
|
|
82
|
+
Removes a namespace by name.
|
|
83
|
+
|
|
84
|
+
Raises:
|
|
85
|
+
NamespaceNotFoundError: If the namespace does not exist.
|
|
86
|
+
NamespaceDeleteNotAllowedError: If the namespace is non-empty,
|
|
87
|
+
is the default namespace, or is a system namespace,
|
|
88
|
+
as these cannot be removed.
|
|
89
|
+
|
|
90
|
+
Parameters:
|
|
91
|
+
name : The name of the namespace.
|
|
92
|
+
session : Session to use for getting project.
|
|
93
|
+
|
|
94
|
+
Example:
|
|
95
|
+
```py
|
|
96
|
+
import datachain as dc
|
|
97
|
+
from datachain.lib.namespace import delete as delete_namespace
|
|
98
|
+
delete_namespace("dev")
|
|
99
|
+
```
|
|
100
|
+
"""
|
|
101
|
+
session = Session.get(session)
|
|
102
|
+
metastore = session.catalog.metastore
|
|
103
|
+
|
|
104
|
+
namespace_name, project_name = parse_name(name)
|
|
105
|
+
|
|
106
|
+
if project_name:
|
|
107
|
+
return delete_project(project_name, namespace_name, session)
|
|
108
|
+
|
|
109
|
+
namespace = metastore.get_namespace(name)
|
|
110
|
+
|
|
111
|
+
if name == metastore.system_namespace_name:
|
|
112
|
+
raise NamespaceDeleteNotAllowedError(
|
|
113
|
+
f"Namespace {metastore.system_namespace_name} cannot be removed"
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
if name == metastore.default_namespace_name:
|
|
117
|
+
raise NamespaceDeleteNotAllowedError(
|
|
118
|
+
f"Namespace {metastore.default_namespace_name} cannot be removed"
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
num_projects = metastore.count_projects(namespace.id)
|
|
122
|
+
if num_projects > 0:
|
|
123
|
+
raise NamespaceDeleteNotAllowedError(
|
|
124
|
+
f"Namespace cannot be removed. It contains {num_projects} project(s). "
|
|
125
|
+
"Please remove the project(s) first."
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
metastore.remove_namespace(namespace.id)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from typing import Optional
|
|
2
2
|
|
|
3
|
-
from datachain.error import ProjectCreateNotAllowedError
|
|
3
|
+
from datachain.error import ProjectCreateNotAllowedError, ProjectDeleteNotAllowedError
|
|
4
4
|
from datachain.project import Project
|
|
5
5
|
from datachain.query import Session
|
|
6
6
|
|
|
@@ -86,3 +86,49 @@ def ls(
|
|
|
86
86
|
namespace_id = session.catalog.metastore.get_namespace(namespace).id
|
|
87
87
|
|
|
88
88
|
return session.catalog.metastore.list_projects(namespace_id)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def delete(name: str, namespace: str, session: Optional[Session] = None) -> None:
|
|
92
|
+
"""
|
|
93
|
+
Removes a project by name within a namespace.
|
|
94
|
+
|
|
95
|
+
Raises:
|
|
96
|
+
ProjectNotFoundError: If the project does not exist.
|
|
97
|
+
ProjectDeleteNotAllowedError: If the project is non-empty,
|
|
98
|
+
is the default project, or is a listing project,
|
|
99
|
+
as these cannot be removed.
|
|
100
|
+
|
|
101
|
+
Parameters:
|
|
102
|
+
name : The name of the project.
|
|
103
|
+
namespace : The name of the namespace.
|
|
104
|
+
session : Session to use for getting project.
|
|
105
|
+
|
|
106
|
+
Example:
|
|
107
|
+
```py
|
|
108
|
+
import datachain as dc
|
|
109
|
+
dc.delete_project("my-project", "local")
|
|
110
|
+
```
|
|
111
|
+
"""
|
|
112
|
+
session = Session.get(session)
|
|
113
|
+
metastore = session.catalog.metastore
|
|
114
|
+
|
|
115
|
+
project = metastore.get_project(name, namespace)
|
|
116
|
+
|
|
117
|
+
if metastore.is_listing_project(name, namespace):
|
|
118
|
+
raise ProjectDeleteNotAllowedError(
|
|
119
|
+
f"Project {metastore.listing_project_name} cannot be removed"
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
if metastore.is_default_project(name, namespace):
|
|
123
|
+
raise ProjectDeleteNotAllowedError(
|
|
124
|
+
f"Project {metastore.default_project_name} cannot be removed"
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
num_datasets = metastore.count_datasets(project.id)
|
|
128
|
+
if num_datasets > 0:
|
|
129
|
+
raise ProjectDeleteNotAllowedError(
|
|
130
|
+
f"Project cannot be removed. It contains {num_datasets} dataset(s). "
|
|
131
|
+
"Please remove the dataset(s) first."
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
metastore.remove_project(project.id)
|
|
@@ -9,6 +9,25 @@ N = TypeVar("N", bound="Namespace")
|
|
|
9
9
|
NAMESPACE_NAME_RESERVED_CHARS = [".", "@"]
|
|
10
10
|
|
|
11
11
|
|
|
12
|
+
def parse_name(name: str) -> tuple[str, Optional[str]]:
|
|
13
|
+
"""
|
|
14
|
+
Parses namespace name into namespace and optional project name.
|
|
15
|
+
If both namespace and project are defined in name, they need to be split by dot
|
|
16
|
+
e.g dev.my-project
|
|
17
|
+
Valid inputs:
|
|
18
|
+
- dev.my-project
|
|
19
|
+
- dev
|
|
20
|
+
"""
|
|
21
|
+
parts = name.split(".")
|
|
22
|
+
if len(parts) == 1:
|
|
23
|
+
return name, None
|
|
24
|
+
if len(parts) == 2:
|
|
25
|
+
return parts[0], parts[1]
|
|
26
|
+
raise InvalidNamespaceNameError(
|
|
27
|
+
f"Invalid namespace format: {name}. Expected 'namespace' or 'ns1.ns2'."
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
12
31
|
@dataclass(frozen=True)
|
|
13
32
|
class Namespace:
|
|
14
33
|
id: int
|