datachain 0.31.4__tar.gz → 0.32.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.31.4 → datachain-0.32.1}/PKG-INFO +11 -23
- {datachain-0.31.4 → datachain-0.32.1}/README.rst +10 -22
- {datachain-0.31.4 → datachain-0.32.1}/docs/api_hooks.py +7 -0
- datachain-0.32.1/docs/assets/webhook_dialog.png +0 -0
- datachain-0.32.1/docs/assets/webhook_list.png +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/guide/namespaces.md +23 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/references/datachain.md +2 -0
- datachain-0.32.1/docs/studio/webhooks.md +265 -0
- {datachain-0.31.4 → datachain-0.32.1}/mkdocs.yml +1 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/__init__.py +2 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/data_storage/metastore.py +79 -15
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/error.py +8 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/convert/python_to_sql.py +18 -4
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/namespaces.py +56 -2
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/projects.py +47 -1
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/namespace.py +19 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain.egg-info/PKG-INFO +11 -23
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain.egg-info/SOURCES.txt +3 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_namespace.py +45 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_project.py +60 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_python_to_sql.py +19 -0
- {datachain-0.31.4 → datachain-0.32.1}/.cruft.json +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/.gitattributes +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/.github/codecov.yaml +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/.github/dependabot.yml +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/.github/workflows/release.yml +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/.github/workflows/tests.yml +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/.gitignore +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/.pre-commit-config.yaml +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/LICENSE +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/assets/datachain.svg +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/commands/auth/login.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/commands/auth/logout.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/commands/auth/team.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/commands/auth/token.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/commands/index.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/commands/job/cancel.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/commands/job/clusters.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/commands/job/logs.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/commands/job/ls.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/commands/job/run.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/contributing.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/examples.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/guide/db_migrations.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/guide/delta.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/guide/env.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/guide/index.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/guide/processing.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/guide/remotes.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/guide/retry.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/index.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/overrides/main.html +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/quick-start.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/references/data-types/file.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/references/data-types/index.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/references/data-types/pose.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/references/data-types/segment.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/references/func.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/references/functions/aggregate.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/references/functions/array.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/references/functions/conditional.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/references/functions/numeric.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/references/functions/path.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/references/functions/random.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/references/functions/string.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/references/functions/window.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/references/index.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/references/toolkit.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/references/torch.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/references/udf.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/studio/api/.gitkeep +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/templates/main.dot +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/templates/operation.dot +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/templates/responses.def +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/docs/tutorials.md +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/examples/get_started/nested_datamodel.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/examples/multimodal/audio-to-text.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/examples/multimodal/wds.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/noxfile.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/pyproject.toml +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/setup.cfg +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/__main__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/asyn.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cache.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/cli/utils.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/client/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/client/azure.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/client/gcs.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/client/hf.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/client/local.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/client/s3.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/config.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/dataset.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/delta.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/fs/reference.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/fs/utils.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/func/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/func/array.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/func/base.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/func/conditional.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/func/func.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/func/numeric.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/func/path.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/func/random.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/func/string.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/func/window.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/job.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/audio.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/clip.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/datachain.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/storage_pattern.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/file.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/hf.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/image.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/listing.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/settings.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/tar.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/text.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/udf.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/utils.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/video.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/listing.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/model/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/model/bbox.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/model/pose.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/model/segment.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/model/utils.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/node.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/progress.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/project.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/py.typed +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/query/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/query/batch.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/query/dataset.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/query/metrics.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/query/params.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/query/queue.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/query/schema.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/query/session.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/query/udf.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/query/utils.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/remote/studio.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/script_meta.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/semver.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/postgresql_dialect.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/postgresql_types.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/types.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/sql/utils.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/studio.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/telemetry.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain/utils.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/conftest.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/data.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/examples/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/examples/test_examples.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/examples/wds_data.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/data/lena.jpg +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/functions/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/functions/test_array.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/functions/test_path.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/functions/test_random.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/functions/test_string.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/model/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_audio.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_batching.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_catalog.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_client.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_data_storage.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_datachain.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_datasets.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_delta.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_file.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_hf.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_image.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_listing.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_ls.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_metastore.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_metrics.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_mutate.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_pull.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_pytorch.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_query.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_read_database.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_read_dataset_remote.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_retry.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_session.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_storage_pattern.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_studio_datetime_parsing.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_to_database.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_toolkit.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_video.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/func/test_warehouse.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/scripts/feature_class.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/test_atomicity.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/test_cli_e2e.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/test_cli_studio.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/test_import_time.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/test_query_e2e.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/test_telemetry.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_audio.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_datachain.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_partition_by.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_settings.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_storage_pattern.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/model/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_asyn.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_cache.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_catalog.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_cli_datasets.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_client.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_config.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_dataset.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_func.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_listing.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_metastore.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_query.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_query_params.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_semver.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_serializer.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_session.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_utils.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.31.4 → datachain-0.32.1}/tests/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.32.1
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -210,45 +210,33 @@ datasets that evolve over time and may occasionally have processing errors.
|
|
|
210
210
|
.. code:: py
|
|
211
211
|
|
|
212
212
|
import datachain as dc
|
|
213
|
-
from datachain import C, File
|
|
214
213
|
|
|
215
|
-
def process_file(file: File):
|
|
216
|
-
"""
|
|
214
|
+
def process_file(file: dc.File) -> tuple[str, str, str]:
|
|
215
|
+
"""Analyze a file, may occasionally fail."""
|
|
217
216
|
try:
|
|
218
217
|
# Your processing logic here
|
|
219
218
|
content = file.read_text()
|
|
220
|
-
result =
|
|
221
|
-
return
|
|
222
|
-
"content": content,
|
|
223
|
-
"result": result,
|
|
224
|
-
"error": None # No error
|
|
225
|
-
}
|
|
219
|
+
result = content.upper()
|
|
220
|
+
return content, result, "" # No error
|
|
226
221
|
except Exception as e:
|
|
227
222
|
# Return an error that will trigger reprocessing next time
|
|
228
|
-
return
|
|
229
|
-
"content": None,
|
|
230
|
-
"result": None,
|
|
231
|
-
"error": str(e) # Error field will trigger retry
|
|
232
|
-
}
|
|
223
|
+
return "", "", str(e) # Error field will trigger retry
|
|
233
224
|
|
|
234
225
|
# Process files efficiently with delta and retry
|
|
226
|
+
# Run it many times, keep adding files, to see delta and retry in action
|
|
235
227
|
chain = (
|
|
236
228
|
dc.read_storage(
|
|
237
229
|
"data/",
|
|
238
230
|
update=True,
|
|
239
231
|
delta=True, # Process only new/changed files
|
|
240
232
|
delta_on="file.path", # Identify files by path
|
|
241
|
-
|
|
233
|
+
delta_retry="error", # Process files with error again
|
|
242
234
|
)
|
|
243
|
-
.map(
|
|
244
|
-
.
|
|
245
|
-
content=C("processed_result.content"),
|
|
246
|
-
result=C("processed_result.result"),
|
|
247
|
-
error=C("processed_result.error")
|
|
248
|
-
)
|
|
249
|
-
.save(name="processed_data")
|
|
235
|
+
.map(process_file, output=("content", "result", "error"))
|
|
236
|
+
.save("processed-data")
|
|
250
237
|
)
|
|
251
238
|
|
|
239
|
+
|
|
252
240
|
Example: LLM based text-file evaluation
|
|
253
241
|
---------------------------------------
|
|
254
242
|
|
|
@@ -89,45 +89,33 @@ datasets that evolve over time and may occasionally have processing errors.
|
|
|
89
89
|
.. code:: py
|
|
90
90
|
|
|
91
91
|
import datachain as dc
|
|
92
|
-
from datachain import C, File
|
|
93
92
|
|
|
94
|
-
def process_file(file: File):
|
|
95
|
-
"""
|
|
93
|
+
def process_file(file: dc.File) -> tuple[str, str, str]:
|
|
94
|
+
"""Analyze a file, may occasionally fail."""
|
|
96
95
|
try:
|
|
97
96
|
# Your processing logic here
|
|
98
97
|
content = file.read_text()
|
|
99
|
-
result =
|
|
100
|
-
return
|
|
101
|
-
"content": content,
|
|
102
|
-
"result": result,
|
|
103
|
-
"error": None # No error
|
|
104
|
-
}
|
|
98
|
+
result = content.upper()
|
|
99
|
+
return content, result, "" # No error
|
|
105
100
|
except Exception as e:
|
|
106
101
|
# Return an error that will trigger reprocessing next time
|
|
107
|
-
return
|
|
108
|
-
"content": None,
|
|
109
|
-
"result": None,
|
|
110
|
-
"error": str(e) # Error field will trigger retry
|
|
111
|
-
}
|
|
102
|
+
return "", "", str(e) # Error field will trigger retry
|
|
112
103
|
|
|
113
104
|
# Process files efficiently with delta and retry
|
|
105
|
+
# Run it many times, keep adding files, to see delta and retry in action
|
|
114
106
|
chain = (
|
|
115
107
|
dc.read_storage(
|
|
116
108
|
"data/",
|
|
117
109
|
update=True,
|
|
118
110
|
delta=True, # Process only new/changed files
|
|
119
111
|
delta_on="file.path", # Identify files by path
|
|
120
|
-
|
|
112
|
+
delta_retry="error", # Process files with error again
|
|
121
113
|
)
|
|
122
|
-
.map(
|
|
123
|
-
.
|
|
124
|
-
content=C("processed_result.content"),
|
|
125
|
-
result=C("processed_result.result"),
|
|
126
|
-
error=C("processed_result.error")
|
|
127
|
-
)
|
|
128
|
-
.save(name="processed_data")
|
|
114
|
+
.map(process_file, output=("content", "result", "error"))
|
|
115
|
+
.save("processed-data")
|
|
129
116
|
)
|
|
130
117
|
|
|
118
|
+
|
|
131
119
|
Example: LLM based text-file evaluation
|
|
132
120
|
---------------------------------------
|
|
133
121
|
|
|
@@ -4,6 +4,13 @@ def on_pre_build(**kwargs):
|
|
|
4
4
|
|
|
5
5
|
import requests
|
|
6
6
|
|
|
7
|
+
# Skip if files already exist
|
|
8
|
+
if os.path.exists("docs/openapi.json") and os.path.exists(
|
|
9
|
+
"docs/studio/api/index.md"
|
|
10
|
+
):
|
|
11
|
+
print("API docs already exist, skipping generation")
|
|
12
|
+
return
|
|
13
|
+
|
|
7
14
|
# Download OpenAPI spec
|
|
8
15
|
response = requests.get(
|
|
9
16
|
"https://studio.datachain.ai/api/openapi.json",
|
|
Binary file
|
|
Binary file
|
|
@@ -159,3 +159,26 @@ dc.read_values(scores=[0.8, 1.5, 2.1]).save("metrics")
|
|
|
159
159
|
|
|
160
160
|
ds = dc.read_dataset("local.local.metrics")
|
|
161
161
|
ds.show()
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
## Removing Namespaces and Projects
|
|
165
|
+
|
|
166
|
+
Use `delete_namespace` to remove an empty namespace or an empty project within a namespace. Delete will fail if the target is not empty.
|
|
167
|
+
|
|
168
|
+
### Signature
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
def delete_namespace(name: str, session: Optional[Session]) -> None:
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
- **`<namespace>`** — deletes the namespace (must contain no projects or datasets).
|
|
175
|
+
- **`<namespace>.<project>`** — deletes the project (must contain no datasets).
|
|
176
|
+
|
|
177
|
+
### Examples
|
|
178
|
+
|
|
179
|
+
```python
|
|
180
|
+
import datachain as dc
|
|
181
|
+
|
|
182
|
+
dc.delete_namespace("dev.my-project") # delete project
|
|
183
|
+
dc.delete_namespace("dev") # delete namespace
|
|
184
|
+
```
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
# Webhooks in Studio
|
|
2
|
+
|
|
3
|
+
## About webhooks
|
|
4
|
+
|
|
5
|
+
Webhooks provide a way for the notifications to be delivered to an external web server whenever certain events occur in [Studio](https://studio.datachain.ai). With webhooks, you can set a setting once that you want to hear about certain events or activities .
|
|
6
|
+
|
|
7
|
+
When you create a webhook, you specify a URL, and necessary information you want us to send to along with the events that you want to listen on Datachain. When the event occurs, Datachain Studio will send an HTTP request with the data about the event to the URL that you specified. If your server is setup to listen for webhook deliveries at that URL, it can take action when it receives one.
|
|
8
|
+
|
|
9
|
+
For example, you can subscribe your webhook to events that occur when a job is created, is complete, is failed, is running, and so on. You can then monitor whenever a job is failed through this webhook.
|
|
10
|
+
|
|
11
|
+
### Alternative
|
|
12
|
+
As opposed to webhooks, you can also use [CLI command](../commands/index.md) to get the job information or some of our available [API endpoints](api/index.md) but webhook requires less effort than polling an API since it allows near real time updates.
|
|
13
|
+
|
|
14
|
+
## Available event type
|
|
15
|
+
As of now, your server can receive two different types of events.
|
|
16
|
+
|
|
17
|
+
### JOB
|
|
18
|
+
|
|
19
|
+
Whenever any job is created or any status is changed to the job, you will receive the JOB webhook event. The payload you get with the job webhook is as:
|
|
20
|
+
|
|
21
|
+
Header: `http-x-datachain-event`: `JOB`
|
|
22
|
+
|
|
23
|
+
Payload:
|
|
24
|
+
```json
|
|
25
|
+
{
|
|
26
|
+
"action": "job_status",
|
|
27
|
+
"job": {
|
|
28
|
+
"id": "da59df47-d121-4eb6-aa76-dc452755544e",
|
|
29
|
+
"status": "COMPLETE",
|
|
30
|
+
"error_message": "",
|
|
31
|
+
"created_at": "2021-07-27T16:02:08.070557",
|
|
32
|
+
"updated_at": "2021-07-27T16:22:08.070557",
|
|
33
|
+
"finished_at": "2021-07-27T16:22:08.070557",
|
|
34
|
+
"url": "https://studio.datachain.ai/team/TeamName/datasets/jobs/da59df47-d121-4eb6-aa76-dc452755544e"
|
|
35
|
+
},
|
|
36
|
+
"timestamp": "2021-07-27T16:22:08.070557",
|
|
37
|
+
}
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### PING
|
|
41
|
+
Whenever you add your webhook to your team, Studio sends a PING event to check the delivery to the server. You can check the recent deliveries to check if the webhook is successfully connected.
|
|
42
|
+
|
|
43
|
+
Header: `http-x-datachain-event`: `PING`.
|
|
44
|
+
|
|
45
|
+
Payload:
|
|
46
|
+
```json
|
|
47
|
+
{
|
|
48
|
+
"action": "PING"
|
|
49
|
+
}
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
## Creating webhooks
|
|
54
|
+
|
|
55
|
+
You should have admin access to a team to create the webhooks in the team. To create a webhook, go to settings for the team and under the section Webhooks, click on Add new Webhook.
|
|
56
|
+

|
|
57
|
+
|
|
58
|
+
Enter the necessary information to create the webhooks.
|
|
59
|
+
|
|
60
|
+
- **URL:** Enter the valid URL where you’d like to receive the webhook payload in
|
|
61
|
+
- **Secret:** A string to use as a secret key. You should choose a random string of text with high entropy. You can use the webhook secret to [validate incoming requests](#validating-webhook-deliveries) to those only originating from Datachain Studio.
|
|
62
|
+
- **Events:** Under events, select the events you would like to trigger the webhook.
|
|
63
|
+
- **JOB:**
|
|
64
|
+
- CREATED: When a job is created but not yet scheduled to run
|
|
65
|
+
- SCHEDULED: Job has been scheduled to run
|
|
66
|
+
- QUEUED: Job is in the queue waiting to be processed
|
|
67
|
+
- INIT: Job is initializing (starting up)
|
|
68
|
+
- RUNNING: When a job starts running
|
|
69
|
+
- COMPLETE: Job has completed successfully
|
|
70
|
+
- FAILED: Job failed with error
|
|
71
|
+
- CANCELED: Job has been canceled successfully
|
|
72
|
+
- CANCELING: Job has been scheduled to cancel
|
|
73
|
+
- TASK: A scheduled task is created.
|
|
74
|
+
|
|
75
|
+
- SSL Verification: By default, we verify SSL certificates when delivering payloads. SSL verification helps ensure that hook payloads are delivered to your URL endpoint securely, keeping your data away from prying eyes. Disabling this option is **not recommended**.
|
|
76
|
+
- HTTP Method: By default, we make a post request, but you can specify other http method if necessary.
|
|
77
|
+
- Content Type: Optionally, select the data format you want to receive the webhook payload in
|
|
78
|
+
- **application/json** will deliver the JSON payload directly as the body of the `POST` request.
|
|
79
|
+
- **application/x-www-form-urlencoded** will send the JSON payload as a form parameter called `payload`.
|
|
80
|
+
|
|
81
|
+

|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
## Handling webhook deliveries
|
|
85
|
+
|
|
86
|
+
When you create a webhook, you specify a URL and subscribe to event types. When any event that your webhook is subscribed to occurs, Datachain Studio will send an HTTP request with the data about the event to the event that you specified. If your server is setup at that URL, it can take action when it receives one.
|
|
87
|
+
|
|
88
|
+
### Setup
|
|
89
|
+
|
|
90
|
+
In order to test your webhook locally, you can use a webhook proxy URL to forward the webhooks from Studio to your computer or codespace. We are using [smee.io](http://smee.io) to provide a webhook proxy url and forward webhooks.
|
|
91
|
+
|
|
92
|
+
1. Go to [smee.io](http://smee.io)
|
|
93
|
+
2. Start a new channel
|
|
94
|
+
3. Copy the full URL under the webhook proxy URL. We will use this URL in the following setup steps.
|
|
95
|
+
4. Install smee-client if it is not already installed using `npm install --global smee-client`
|
|
96
|
+
5. To receive forwarded webhooks from smee.io, run the following command in your terminal. Replace the `WEBHOOK_PROXY_URL` with your webhook proxy URL from earlier.
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
smee --path /webhook --port 3000 --url WEBHOOK_PROXY_URL
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
6. Keep this running while you test out your webhook. When you want to stop forwarding the webhooks, enter Ctrl + C
|
|
103
|
+
7. Create webhook using the step as mentioned above or edit the one if you already have with the url from earlier.
|
|
104
|
+
8. Write code to handle webhook deliveries
|
|
105
|
+
1. Initialize your server to listen for requests to your webhook URL
|
|
106
|
+
2. Read HTTP headers and body from request
|
|
107
|
+
3. Take desired action in response to the request.
|
|
108
|
+
|
|
109
|
+
You can use any programming languages that you can to run on your server.
|
|
110
|
+
|
|
111
|
+
### Example Code
|
|
112
|
+
|
|
113
|
+
#### Python
|
|
114
|
+
|
|
115
|
+
This example uses the Python and Flask libraries to handle the routes and HTTP requests.
|
|
116
|
+
|
|
117
|
+
To use this you must install flask library in your project. For example:
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
pip install Flask
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
Create a python file with following contents. Modify the code to handle only the event types that your webhook is subscribed to as well as the ping event that Studio sends when you create a webhook. This example handles job, dataset and ping events.
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
# You installed the `flask` library earlier.
|
|
127
|
+
from flask import Flask, request
|
|
128
|
+
|
|
129
|
+
# This defines the port where your server should listen.
|
|
130
|
+
# 3000 matches the port that you specified for webhook forwarding.
|
|
131
|
+
#
|
|
132
|
+
# Once you deploy your code to a server,
|
|
133
|
+
# Change this to match the port where your server is listening.
|
|
134
|
+
port = 3000
|
|
135
|
+
secret = "secretString"
|
|
136
|
+
|
|
137
|
+
# This initializes a new Flask application.
|
|
138
|
+
app = Flask(__name__)
|
|
139
|
+
|
|
140
|
+
# This defines a POST route at the `/webhook` path.
|
|
141
|
+
# It matches the path you specified for the smee.io forwarding.
|
|
142
|
+
#
|
|
143
|
+
# Once you deploy your code to a server and update your webhook URL,
|
|
144
|
+
# Change this to match the path portion of the URL for your webhook.
|
|
145
|
+
@app.route('/webhook', methods=['POST'])
|
|
146
|
+
def webhook():
|
|
147
|
+
# Respond to indicate that delivery was successfully received.
|
|
148
|
+
# Your server should respond with a 2XX response
|
|
149
|
+
# within 10 seconds of receiving a webhook delivery.
|
|
150
|
+
# If your server takes longer than that to respond,
|
|
151
|
+
# then Studio terminates the connection.
|
|
152
|
+
|
|
153
|
+
# Check `http-x-datachain-event` header for the event type.
|
|
154
|
+
datachain_event = request.headers.get('http-x-datachain-event')
|
|
155
|
+
|
|
156
|
+
# You should add logic to handle each event type
|
|
157
|
+
# that your webhook is subscribed to.
|
|
158
|
+
# For example, this code handles the `JOB` and `PING` events.
|
|
159
|
+
if datachain_event == 'JOB':
|
|
160
|
+
data = request.get_json()
|
|
161
|
+
action = data.get('action')
|
|
162
|
+
if action == 'job_status':
|
|
163
|
+
print(
|
|
164
|
+
f"Job status for job {data['job']['id']} was" \
|
|
165
|
+
" changed to {data['job']['status']}"
|
|
166
|
+
)
|
|
167
|
+
else:
|
|
168
|
+
print(f"Unhandled action for the job event: {action}")
|
|
169
|
+
elif datachain_event == 'PING':
|
|
170
|
+
print('Ping event received')
|
|
171
|
+
else:
|
|
172
|
+
print(f"Unhandled event: {datachain_event}")
|
|
173
|
+
|
|
174
|
+
return '', 202 # 202 Accepted status code
|
|
175
|
+
|
|
176
|
+
# This starts the server.
|
|
177
|
+
if __name__ == '__main__':
|
|
178
|
+
app.run(host='0.0.0.0', port=port, debug=True)
|
|
179
|
+
print(f"Server is running on port {port}")
|
|
180
|
+
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
To test the code, run the file using `python FILENAME`. Make sure that you are forwarding the webhooks in a separate terminal.
|
|
184
|
+
|
|
185
|
+
When you run a job in Studio, you will see some similar response as below:
|
|
186
|
+
|
|
187
|
+
```prolog
|
|
188
|
+
Ping event received
|
|
189
|
+
Job status for job a852ee4a-091a-456f-ba1a-c809f7e804f3 was changed to CREATED
|
|
190
|
+
Job status for job a852ee4a-091a-456f-ba1a-c809f7e804f3 was changed to SCHEDULED
|
|
191
|
+
Job status for job a852ee4a-091a-456f-ba1a-c809f7e804f3 was changed to QUEUED
|
|
192
|
+
Job status for job a852ee4a-091a-456f-ba1a-c809f7e804f3 was changed to INIT
|
|
193
|
+
Job status for job a852ee4a-091a-456f-ba1a-c809f7e804f3 was changed to RUNNING
|
|
194
|
+
Job status for job a852ee4a-091a-456f-ba1a-c809f7e804f3 was changed to COMPLETE
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
## Validating webhook deliveries
|
|
198
|
+
|
|
199
|
+
Once your server is configured to receive payloads, it will listen for any delivery that’s sent to the endpoint you configured. To ensure that your server only processes webhook deliveries that were sent by Datachain Studio and to ensure that the delivery was not tampered with, you should validate webhook signature before processing the delivery further.
|
|
200
|
+
|
|
201
|
+
Studio will use the secret you added when creating your webhook to create a hash signature that’s sent to you with each payload. The hash signature will appear in each delivery as the value of `X-datachain-signature-256` header.
|
|
202
|
+
|
|
203
|
+
In your code that handles webhook deliveries, you should calculate a hash using your secret token and compare the hash Studio sent with the expected hash that you calculate and ensure they match.
|
|
204
|
+
|
|
205
|
+
Notes:
|
|
206
|
+
|
|
207
|
+
- Studio uses HMAC hex digest to compute the hash
|
|
208
|
+
- The hash signature always starts with `sha256=`
|
|
209
|
+
- The hash signature is generated using webhook’s secret token and payload contents.
|
|
210
|
+
- Never use a plain `==` operator. Instead consider using a method like [`secure_compare`](https://www.rubydoc.info/gems/rack/Rack%2FUtils:secure_compare) or [`crypto.timingSafeEqual`](https://nodejs.org/api/crypto.html#cryptotimingsafeequala-b), which performs a "constant time" string comparison to help mitigate certain timing attacks against regular equality operators, or regular loops in JIT-optimized languages.
|
|
211
|
+
|
|
212
|
+
Updating the example above:
|
|
213
|
+
|
|
214
|
+
```python
|
|
215
|
+
import hashlib
|
|
216
|
+
import hmac
|
|
217
|
+
from flask import abort
|
|
218
|
+
|
|
219
|
+
def verify_signature(payload_body, secret_token, signature_header):
|
|
220
|
+
"""Verify the payload was sent from Studio by validating SHA256.
|
|
221
|
+
|
|
222
|
+
Raise and return 403 if not authorized.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
payload_body: request body to verify (request.body())
|
|
226
|
+
secret_token: Studio webhook token (WEBHOOK_SECRET)
|
|
227
|
+
signature_header: header (x-datachain-signature-256)
|
|
228
|
+
"""
|
|
229
|
+
if not signature_header:
|
|
230
|
+
abort(403, "X-datachain-signature-256 is missing!")
|
|
231
|
+
hash_object = hmac.new(
|
|
232
|
+
secret_token.encode('utf-8'),
|
|
233
|
+
msg=payload_body,
|
|
234
|
+
digestmod=hashlib.sha256
|
|
235
|
+
)
|
|
236
|
+
expected_signature = "sha256=" + hash_object.hexdigest()
|
|
237
|
+
if not hmac.compare_digest(
|
|
238
|
+
expected_signature, signature_header
|
|
239
|
+
):
|
|
240
|
+
abort(403, "Request signatures didn't match!")
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
Add the following call in the api receiver.
|
|
244
|
+
|
|
245
|
+
```python
|
|
246
|
+
# Get the signature header
|
|
247
|
+
signature = request.headers.get('X-Datachain-Signature-256')
|
|
248
|
+
|
|
249
|
+
# Re-enable signature verification with improved JSON handling
|
|
250
|
+
if signature:
|
|
251
|
+
verify_signature(request.get_data(), secret, signature)
|
|
252
|
+
else:
|
|
253
|
+
print("Warning: No signature header found")
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
## Best practices for using Webhooks
|
|
257
|
+
|
|
258
|
+
1. You should only subscribe to the webhook events that you need. This will reduce the amount of work your server needs to do.
|
|
259
|
+
2. The webhook secret should be a random string of text with high entropy. You should securely store your webhook secret in a way that your server can access.
|
|
260
|
+
3. You should ensure that your server uses an HTTPS connection. By default, Studio will verify SSL certificates when delivering webhooks. Studio recommends that you leave SSL verification enabled.
|
|
261
|
+
4. Your server should respond with a 2XX response within 10 seconds of receiving a webhook delivery. If your server takes longer than that to respond, then Studio terminates the connection and considers the delivery a failure.
|
|
262
|
+
5. Check the event header and action type before processing the event.
|
|
263
|
+
6. Make sure the endpoints are idempotent meaning if multiple requests for same event is received, the server should handle this.
|
|
264
|
+
7. Datachain Studio may deliver webhooks in a different order than the order in which the events took place. If you need to know when the event occurred relative to another event, you should use the timestamps that are included in the delivery payload.
|
|
265
|
+
8. Consecutive 10 failures to webhook will disable the webhook deliveries.
|
|
@@ -37,6 +37,7 @@ from datachain.lib.file import (
|
|
|
37
37
|
VideoFrame,
|
|
38
38
|
)
|
|
39
39
|
from datachain.lib.model_store import ModelStore
|
|
40
|
+
from datachain.lib.namespaces import delete_namespace
|
|
40
41
|
from datachain.lib.projects import create as create_project
|
|
41
42
|
from datachain.lib.udf import Aggregator, Generator, Mapper
|
|
42
43
|
from datachain.lib.utils import AbstractUDF, DataChainError
|
|
@@ -74,6 +75,7 @@ __all__ = [
|
|
|
74
75
|
"create_project",
|
|
75
76
|
"datasets",
|
|
76
77
|
"delete_dataset",
|
|
78
|
+
"delete_namespace",
|
|
77
79
|
"is_chain_type",
|
|
78
80
|
"is_studio",
|
|
79
81
|
"listings",
|
|
@@ -22,6 +22,7 @@ from sqlalchemy import (
|
|
|
22
22
|
UniqueConstraint,
|
|
23
23
|
select,
|
|
24
24
|
)
|
|
25
|
+
from sqlalchemy.sql import func as f
|
|
25
26
|
|
|
26
27
|
from datachain.data_storage import JobQueryType, JobStatus
|
|
27
28
|
from datachain.data_storage.serializer import Serializable
|
|
@@ -37,7 +38,9 @@ from datachain.dataset import (
|
|
|
37
38
|
from datachain.error import (
|
|
38
39
|
DatasetNotFoundError,
|
|
39
40
|
DatasetVersionNotFoundError,
|
|
41
|
+
NamespaceDeleteNotAllowedError,
|
|
40
42
|
NamespaceNotFoundError,
|
|
43
|
+
ProjectDeleteNotAllowedError,
|
|
41
44
|
ProjectNotFoundError,
|
|
42
45
|
TableMissingError,
|
|
43
46
|
)
|
|
@@ -141,6 +144,10 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
141
144
|
def get_namespace(self, name: str, conn=None) -> Namespace:
|
|
142
145
|
"""Gets a single namespace by name"""
|
|
143
146
|
|
|
147
|
+
@abstractmethod
|
|
148
|
+
def remove_namespace(self, namespace_id: int, conn=None) -> None:
|
|
149
|
+
"""Removes a single namespace by id"""
|
|
150
|
+
|
|
144
151
|
@abstractmethod
|
|
145
152
|
def list_namespaces(self, conn=None) -> list[Namespace]:
|
|
146
153
|
"""Gets a list of all namespaces"""
|
|
@@ -190,10 +197,30 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
190
197
|
It also creates project if not found and create flag is set to True.
|
|
191
198
|
"""
|
|
192
199
|
|
|
200
|
+
def is_default_project(self, project_name: str, namespace_name: str) -> bool:
|
|
201
|
+
return (
|
|
202
|
+
project_name == self.default_project_name
|
|
203
|
+
and namespace_name == self.default_namespace_name
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
def is_listing_project(self, project_name: str, namespace_name: str) -> bool:
|
|
207
|
+
return (
|
|
208
|
+
project_name == self.listing_project_name
|
|
209
|
+
and namespace_name == self.system_namespace_name
|
|
210
|
+
)
|
|
211
|
+
|
|
193
212
|
@abstractmethod
|
|
194
213
|
def get_project_by_id(self, project_id: int, conn=None) -> Project:
|
|
195
214
|
"""Gets a single project by id"""
|
|
196
215
|
|
|
216
|
+
@abstractmethod
|
|
217
|
+
def count_projects(self, namespace_id: Optional[int] = None) -> int:
|
|
218
|
+
"""Counts projects in some namespace or in general."""
|
|
219
|
+
|
|
220
|
+
@abstractmethod
|
|
221
|
+
def remove_project(self, project_id: int, conn=None) -> None:
|
|
222
|
+
"""Removes a single project by id"""
|
|
223
|
+
|
|
197
224
|
@abstractmethod
|
|
198
225
|
def list_projects(self, namespace_id: Optional[int], conn=None) -> list[Project]:
|
|
199
226
|
"""Gets list of projects in some namespace or in general (in all namespaces)"""
|
|
@@ -270,6 +297,10 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
270
297
|
) -> Iterator[DatasetListRecord]:
|
|
271
298
|
"""Lists all datasets in some project or in all projects."""
|
|
272
299
|
|
|
300
|
+
@abstractmethod
|
|
301
|
+
def count_datasets(self, project_id: Optional[int] = None) -> int:
|
|
302
|
+
"""Counts datasets in some project or in all projects."""
|
|
303
|
+
|
|
273
304
|
@abstractmethod
|
|
274
305
|
def list_datasets_by_prefix(
|
|
275
306
|
self, prefix: str, project_id: Optional[int] = None
|
|
@@ -735,6 +766,18 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
735
766
|
|
|
736
767
|
return self.get_namespace(name)
|
|
737
768
|
|
|
769
|
+
def remove_namespace(self, namespace_id: int, conn=None) -> None:
|
|
770
|
+
num_projects = self.count_projects(namespace_id)
|
|
771
|
+
if num_projects > 0:
|
|
772
|
+
raise NamespaceDeleteNotAllowedError(
|
|
773
|
+
f"Namespace cannot be removed. It contains {num_projects} project(s). "
|
|
774
|
+
"Please remove the project(s) first."
|
|
775
|
+
)
|
|
776
|
+
|
|
777
|
+
n = self._namespaces
|
|
778
|
+
with self.db.transaction():
|
|
779
|
+
self.db.execute(self._namespaces_delete().where(n.c.id == namespace_id))
|
|
780
|
+
|
|
738
781
|
def get_namespace(self, name: str, conn=None) -> Namespace:
|
|
739
782
|
"""Gets a single namespace by name"""
|
|
740
783
|
n = self._namespaces
|
|
@@ -796,18 +839,6 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
796
839
|
|
|
797
840
|
return self.get_project(name, namespace.name)
|
|
798
841
|
|
|
799
|
-
def _is_listing_project(self, project_name: str, namespace_name: str) -> bool:
|
|
800
|
-
return (
|
|
801
|
-
project_name == self.listing_project_name
|
|
802
|
-
and namespace_name == self.system_namespace_name
|
|
803
|
-
)
|
|
804
|
-
|
|
805
|
-
def _is_default_project(self, project_name: str, namespace_name: str) -> bool:
|
|
806
|
-
return (
|
|
807
|
-
project_name == self.default_project_name
|
|
808
|
-
and namespace_name == self.default_namespace_name
|
|
809
|
-
)
|
|
810
|
-
|
|
811
842
|
def get_project(
|
|
812
843
|
self, name: str, namespace_name: str, create: bool = False, conn=None
|
|
813
844
|
) -> Project:
|
|
@@ -816,7 +847,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
816
847
|
p = self._projects
|
|
817
848
|
validate = True
|
|
818
849
|
|
|
819
|
-
if self.
|
|
850
|
+
if self.is_listing_project(name, namespace_name) or self.is_default_project(
|
|
820
851
|
name, namespace_name
|
|
821
852
|
):
|
|
822
853
|
# we are always creating default and listing projects if they don't exist
|
|
@@ -858,7 +889,31 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
858
889
|
raise ProjectNotFoundError(f"Project with id {project_id} not found.")
|
|
859
890
|
return self.project_class.parse(*rows[0])
|
|
860
891
|
|
|
861
|
-
def
|
|
892
|
+
def count_projects(self, namespace_id: Optional[int] = None) -> int:
|
|
893
|
+
p = self._projects
|
|
894
|
+
query = self._projects_select()
|
|
895
|
+
if namespace_id:
|
|
896
|
+
query = query.where(p.c.namespace_id == namespace_id)
|
|
897
|
+
|
|
898
|
+
query = select(f.count(1)).select_from(query.subquery())
|
|
899
|
+
|
|
900
|
+
return next(self.db.execute(query))[0]
|
|
901
|
+
|
|
902
|
+
def remove_project(self, project_id: int, conn=None) -> None:
|
|
903
|
+
num_datasets = self.count_datasets(project_id)
|
|
904
|
+
if num_datasets > 0:
|
|
905
|
+
raise ProjectDeleteNotAllowedError(
|
|
906
|
+
f"Project cannot be removed. It contains {num_datasets} dataset(s). "
|
|
907
|
+
"Please remove the dataset(s) first."
|
|
908
|
+
)
|
|
909
|
+
|
|
910
|
+
p = self._projects
|
|
911
|
+
with self.db.transaction():
|
|
912
|
+
self.db.execute(self._projects_delete().where(p.c.id == project_id))
|
|
913
|
+
|
|
914
|
+
def list_projects(
|
|
915
|
+
self, namespace_id: Optional[int] = None, conn=None
|
|
916
|
+
) -> list[Project]:
|
|
862
917
|
"""
|
|
863
918
|
Gets a list of projects inside some namespace, or in all namespaces
|
|
864
919
|
"""
|
|
@@ -1189,7 +1244,6 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1189
1244
|
def list_datasets(
|
|
1190
1245
|
self, project_id: Optional[int] = None
|
|
1191
1246
|
) -> Iterator["DatasetListRecord"]:
|
|
1192
|
-
"""Lists all datasets."""
|
|
1193
1247
|
d = self._datasets
|
|
1194
1248
|
query = self._base_list_datasets_query().order_by(
|
|
1195
1249
|
self._datasets.c.name, self._datasets_versions.c.version
|
|
@@ -1198,6 +1252,16 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1198
1252
|
query = query.where(d.c.project_id == project_id)
|
|
1199
1253
|
yield from self._parse_dataset_list(self.db.execute(query))
|
|
1200
1254
|
|
|
1255
|
+
def count_datasets(self, project_id: Optional[int] = None) -> int:
|
|
1256
|
+
d = self._datasets
|
|
1257
|
+
query = self._datasets_select()
|
|
1258
|
+
if project_id:
|
|
1259
|
+
query = query.where(d.c.project_id == project_id)
|
|
1260
|
+
|
|
1261
|
+
query = select(f.count(1)).select_from(query.subquery())
|
|
1262
|
+
|
|
1263
|
+
return next(self.db.execute(query))[0]
|
|
1264
|
+
|
|
1201
1265
|
def list_datasets_by_prefix(
|
|
1202
1266
|
self, prefix: str, project_id: Optional[int] = None, conn=None
|
|
1203
1267
|
) -> Iterator["DatasetListRecord"]:
|
|
@@ -34,6 +34,14 @@ class ProjectCreateNotAllowedError(NotAllowedError):
|
|
|
34
34
|
pass
|
|
35
35
|
|
|
36
36
|
|
|
37
|
+
class ProjectDeleteNotAllowedError(NotAllowedError):
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class NamespaceDeleteNotAllowedError(NotAllowedError):
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
|
|
37
45
|
class ProjectNotFoundError(NotFoundError):
|
|
38
46
|
pass
|
|
39
47
|
|