datachain 0.22.0__tar.gz → 0.23.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.22.0 → datachain-0.23.0}/PKG-INFO +1 -1
- {datachain-0.22.0 → datachain-0.23.0}/docs/guide/env.md +4 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/guide/namespaces.md +43 -1
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/catalog/catalog.py +33 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/commands/datasets.py +4 -10
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/data_storage/metastore.py +13 -2
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/data_storage/sqlite.py +6 -2
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/dataset.py +3 -1
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/datachain.py +6 -12
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/datasets.py +8 -9
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/records.py +1 -1
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/signal_schema.py +8 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain.egg-info/PKG-INFO +1 -1
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_datachain.py +46 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_datachain.py +111 -1
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_dataset.py +1 -1
- {datachain-0.22.0 → datachain-0.23.0}/.cruft.json +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/.gitattributes +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/.github/codecov.yaml +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/.github/dependabot.yml +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/.github/workflows/release.yml +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/.github/workflows/tests.yml +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/.gitignore +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/.pre-commit-config.yaml +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/LICENSE +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/README.rst +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/assets/datachain.svg +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/commands/auth/login.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/commands/auth/logout.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/commands/auth/team.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/commands/auth/token.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/commands/index.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/commands/job/cancel.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/commands/job/clusters.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/commands/job/logs.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/commands/job/ls.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/commands/job/run.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/contributing.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/examples.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/guide/db_migrations.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/guide/delta.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/guide/index.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/guide/processing.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/guide/remotes.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/guide/retry.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/index.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/overrides/main.html +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/quick-start.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/references/data-types/file.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/references/data-types/index.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/references/data-types/pose.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/references/data-types/segment.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/references/datachain.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/references/func.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/references/index.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/references/toolkit.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/references/torch.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/references/udf.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/docs/tutorials.md +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/examples/multimodal/wds.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/mkdocs.yml +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/noxfile.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/pyproject.toml +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/setup.cfg +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/__main__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/asyn.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cache.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/cli/utils.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/client/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/client/azure.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/client/gcs.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/client/hf.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/client/local.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/client/s3.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/config.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/delta.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/error.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/fs/reference.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/fs/utils.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/func/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/func/array.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/func/base.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/func/conditional.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/func/func.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/func/numeric.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/func/path.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/func/random.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/func/string.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/func/window.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/job.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/clip.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/file.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/hf.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/image.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/listing.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/namespaces.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/projects.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/settings.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/tar.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/text.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/udf.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/utils.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/video.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/listing.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/model/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/model/bbox.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/model/pose.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/model/segment.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/model/utils.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/namespace.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/node.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/progress.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/project.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/py.typed +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/query/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/query/batch.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/query/dataset.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/query/metrics.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/query/params.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/query/queue.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/query/schema.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/query/session.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/query/udf.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/query/utils.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/remote/studio.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/script_meta.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/semver.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/types.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/sql/utils.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/studio.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/telemetry.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain/utils.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain.egg-info/SOURCES.txt +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/conftest.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/data.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/examples/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/examples/test_examples.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/examples/wds_data.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/data/lena.jpg +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/functions/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/functions/test_array.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/functions/test_path.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/functions/test_random.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/functions/test_string.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/model/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_batching.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_catalog.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_client.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_data_storage.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_datasets.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_delta.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_file.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_hf.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_image.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_listing.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_ls.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_metastore.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_metrics.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_pull.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_pytorch.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_query.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_read_database.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_retry.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_session.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_toolkit.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_video.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/func/test_warehouse.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/scripts/feature_class.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/test_atomicity.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/test_cli_e2e.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/test_cli_studio.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/test_import_time.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/test_query_e2e.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/test_telemetry.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_namespace.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/model/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_asyn.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_cache.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_catalog.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_client.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_config.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_func.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_listing.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_metastore.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_query.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_query_params.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_semver.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_serializer.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_session.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_utils.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.22.0 → datachain-0.23.0}/tests/utils.py +0 -0
|
@@ -15,4 +15,8 @@ List of environment variables used to configure DataChain behavior.
|
|
|
15
15
|
- `DATACHAIN_STUDIO_TOKEN` – Authentication token for Studio.
|
|
16
16
|
- `DATACHAIN_STUDIO_TEAM` – Studio team name.
|
|
17
17
|
|
|
18
|
+
### Namespaces and projects
|
|
19
|
+
- `DATACHAIN_NAMESPACE` – Namespace name to use as default.
|
|
20
|
+
- `DATACHAIN_PROJECT` – Project name or combination of namespace name and project name separated by `.` to use as default, example: `DATACHAIN_PROJECT=dev.analytics`
|
|
21
|
+
|
|
18
22
|
Note: Some environment variables are used internally and may not be documented here. For the most up-to-date list, refer to the source code.
|
|
@@ -82,6 +82,49 @@ This is equivalent to saving to `dev.analytics.metrics`.
|
|
|
82
82
|
|
|
83
83
|
In CLI, `.settings()` is only supported when both `namespace` and `project` are set to `"local"`.
|
|
84
84
|
|
|
85
|
+
## Setting Namespace and Project via Environment Variables
|
|
86
|
+
|
|
87
|
+
In addition to using `.settings()`, you can configure the namespace and project using environment variables:
|
|
88
|
+
|
|
89
|
+
- `DATACHAIN_NAMESPACE` sets the namespace.
|
|
90
|
+
- `DATACHAIN_PROJECT` sets the project name, or both the namespace and project using the format `namespace.project`.
|
|
91
|
+
|
|
92
|
+
### Examples
|
|
93
|
+
|
|
94
|
+
```
|
|
95
|
+
# Set namespace only
|
|
96
|
+
export DATACHAIN_NAMESPACE=dev
|
|
97
|
+
|
|
98
|
+
# Set project only
|
|
99
|
+
export DATACHAIN_PROJECT=analytics
|
|
100
|
+
|
|
101
|
+
# Set both namespace and project
|
|
102
|
+
export DATACHAIN_PROJECT=dev.analytics
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## How Namespace and Project Are Resolved
|
|
106
|
+
|
|
107
|
+
When determining which namespace and project to use, Datachain applies the following precedence:
|
|
108
|
+
|
|
109
|
+
1. **Fully qualified dataset name**
|
|
110
|
+
If the dataset name includes both the namespace and project, these values take highest precedence.
|
|
111
|
+
```python
|
|
112
|
+
dc.read_dataset("dev.analytics.metrics")
|
|
113
|
+
|
|
114
|
+
2. **Explicit settings in code**
|
|
115
|
+
Values provided via `.settings()` or passed directly to `read_dataset()` or similar methods.
|
|
116
|
+
```python
|
|
117
|
+
dc.settings(namespace="dev", project="analytics")
|
|
118
|
+
dc.read_dataset("metrics", namespace="dev", project="analytics")
|
|
119
|
+
```
|
|
120
|
+
3. **Environment variables**
|
|
121
|
+
Namespace and project set using environment variables:
|
|
122
|
+
```console
|
|
123
|
+
export DATACHAIN_PROJECT=dev.analytics
|
|
124
|
+
```
|
|
125
|
+
4. **Defaults**
|
|
126
|
+
If none of the above are provided, Datachain falls back to the default namespace and project.
|
|
127
|
+
|
|
85
128
|
## Reading a Dataset from a Project
|
|
86
129
|
|
|
87
130
|
To read a dataset from a specific namespace and project:
|
|
@@ -116,4 +159,3 @@ dc.read_values(scores=[0.8, 1.5, 2.1]).save("metrics")
|
|
|
116
159
|
|
|
117
160
|
ds = dc.read_dataset("local.local.metrics")
|
|
118
161
|
ds.show()
|
|
119
|
-
```
|
|
@@ -1059,6 +1059,39 @@ class Catalog:
|
|
|
1059
1059
|
|
|
1060
1060
|
return self.get_dataset(name, project)
|
|
1061
1061
|
|
|
1062
|
+
def get_full_dataset_name(
|
|
1063
|
+
self,
|
|
1064
|
+
name: str,
|
|
1065
|
+
project_name: Optional[str] = None,
|
|
1066
|
+
namespace_name: Optional[str] = None,
|
|
1067
|
+
) -> tuple[str, str, str]:
|
|
1068
|
+
"""
|
|
1069
|
+
Returns dataset name together with separated namespace and project name.
|
|
1070
|
+
It takes into account all the ways namespace and project can be added.
|
|
1071
|
+
"""
|
|
1072
|
+
parsed_namespace_name, parsed_project_name, name = parse_dataset_name(name)
|
|
1073
|
+
|
|
1074
|
+
namespace_env = os.environ.get("DATACHAIN_NAMESPACE")
|
|
1075
|
+
project_env = os.environ.get("DATACHAIN_PROJECT")
|
|
1076
|
+
if project_env and len(project_env.split(".")) == 2:
|
|
1077
|
+
# we allow setting both namespace and project in DATACHAIN_PROJECT
|
|
1078
|
+
namespace_env, project_env = project_env.split(".")
|
|
1079
|
+
|
|
1080
|
+
namespace_name = (
|
|
1081
|
+
parsed_namespace_name
|
|
1082
|
+
or namespace_name
|
|
1083
|
+
or namespace_env
|
|
1084
|
+
or self.metastore.default_namespace_name
|
|
1085
|
+
)
|
|
1086
|
+
project_name = (
|
|
1087
|
+
parsed_project_name
|
|
1088
|
+
or project_name
|
|
1089
|
+
or project_env
|
|
1090
|
+
or self.metastore.default_project_name
|
|
1091
|
+
)
|
|
1092
|
+
|
|
1093
|
+
return namespace_name, project_name, name
|
|
1094
|
+
|
|
1062
1095
|
def get_dataset(
|
|
1063
1096
|
self, name: str, project: Optional[Project] = None
|
|
1064
1097
|
) -> DatasetRecord:
|
|
@@ -8,7 +8,6 @@ if TYPE_CHECKING:
|
|
|
8
8
|
|
|
9
9
|
from datachain.cli.utils import determine_flavors
|
|
10
10
|
from datachain.config import Config
|
|
11
|
-
from datachain.dataset import parse_dataset_name
|
|
12
11
|
from datachain.error import DataChainError, DatasetNotFoundError
|
|
13
12
|
from datachain.studio import list_datasets as list_datasets_studio
|
|
14
13
|
|
|
@@ -106,9 +105,8 @@ def list_datasets_local(catalog: "Catalog", name: Optional[str] = None):
|
|
|
106
105
|
|
|
107
106
|
|
|
108
107
|
def list_datasets_local_versions(catalog: "Catalog", name: str):
|
|
109
|
-
namespace_name, project_name, name =
|
|
110
|
-
|
|
111
|
-
project_name = project_name or catalog.metastore.default_project_name
|
|
108
|
+
namespace_name, project_name, name = catalog.get_full_dataset_name(name)
|
|
109
|
+
|
|
112
110
|
project = catalog.metastore.get_project(project_name, namespace_name)
|
|
113
111
|
ds = catalog.get_dataset(name, project)
|
|
114
112
|
for v in ds.versions:
|
|
@@ -137,9 +135,7 @@ def rm_dataset(
|
|
|
137
135
|
studio: Optional[bool] = False,
|
|
138
136
|
team: Optional[str] = None,
|
|
139
137
|
):
|
|
140
|
-
namespace_name, project_name, name =
|
|
141
|
-
namespace_name = namespace_name or catalog.metastore.default_namespace_name
|
|
142
|
-
project_name = project_name or catalog.metastore.default_project_name
|
|
138
|
+
namespace_name, project_name, name = catalog.get_full_dataset_name(name)
|
|
143
139
|
|
|
144
140
|
if not catalog.metastore.is_local_dataset(namespace_name) and studio:
|
|
145
141
|
from datachain.studio import remove_studio_dataset
|
|
@@ -166,9 +162,7 @@ def edit_dataset(
|
|
|
166
162
|
attrs: Optional[list[str]] = None,
|
|
167
163
|
team: Optional[str] = None,
|
|
168
164
|
):
|
|
169
|
-
namespace_name, project_name, name =
|
|
170
|
-
namespace_name = namespace_name or catalog.metastore.default_namespace_name
|
|
171
|
-
project_name = project_name or catalog.metastore.default_project_name
|
|
165
|
+
namespace_name, project_name, name = catalog.get_full_dataset_name(name)
|
|
172
166
|
|
|
173
167
|
if catalog.metastore.is_local_dataset(namespace_name):
|
|
174
168
|
try:
|
|
@@ -132,6 +132,7 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
132
132
|
description: Optional[str] = None,
|
|
133
133
|
uuid: Optional[str] = None,
|
|
134
134
|
ignore_if_exists: bool = True,
|
|
135
|
+
validate: bool = True,
|
|
135
136
|
**kwargs,
|
|
136
137
|
) -> Namespace:
|
|
137
138
|
"""Creates new namespace"""
|
|
@@ -192,6 +193,7 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
192
193
|
description: Optional[str] = None,
|
|
193
194
|
uuid: Optional[str] = None,
|
|
194
195
|
ignore_if_exists: bool = True,
|
|
196
|
+
validate: bool = True,
|
|
195
197
|
**kwargs,
|
|
196
198
|
) -> Project:
|
|
197
199
|
"""Creates new project in specific namespace"""
|
|
@@ -725,8 +727,11 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
725
727
|
description: Optional[str] = None,
|
|
726
728
|
uuid: Optional[str] = None,
|
|
727
729
|
ignore_if_exists: bool = True,
|
|
730
|
+
validate: bool = True,
|
|
728
731
|
**kwargs,
|
|
729
732
|
) -> Namespace:
|
|
733
|
+
if validate:
|
|
734
|
+
Namespace.validate_name(name)
|
|
730
735
|
query = self._namespaces_insert().values(
|
|
731
736
|
name=name,
|
|
732
737
|
uuid=uuid or str(uuid4()),
|
|
@@ -775,12 +780,15 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
775
780
|
description: Optional[str] = None,
|
|
776
781
|
uuid: Optional[str] = None,
|
|
777
782
|
ignore_if_exists: bool = True,
|
|
783
|
+
validate: bool = True,
|
|
778
784
|
**kwargs,
|
|
779
785
|
) -> Project:
|
|
786
|
+
if validate:
|
|
787
|
+
Project.validate_name(name)
|
|
780
788
|
try:
|
|
781
789
|
namespace = self.get_namespace(namespace_name)
|
|
782
790
|
except NamespaceNotFoundError:
|
|
783
|
-
namespace = self.create_namespace(namespace_name)
|
|
791
|
+
namespace = self.create_namespace(namespace_name, validate=validate)
|
|
784
792
|
|
|
785
793
|
query = self._projects_insert().values(
|
|
786
794
|
namespace_id=namespace.id,
|
|
@@ -817,11 +825,14 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
817
825
|
"""Gets a single project inside some namespace by name"""
|
|
818
826
|
n = self._namespaces
|
|
819
827
|
p = self._projects
|
|
828
|
+
validate = True
|
|
829
|
+
|
|
820
830
|
if self._is_listing_project(name, namespace_name) or self._is_default_project(
|
|
821
831
|
name, namespace_name
|
|
822
832
|
):
|
|
823
833
|
# we are always creating default and listing projects if they don't exist
|
|
824
834
|
create = True
|
|
835
|
+
validate = False
|
|
825
836
|
|
|
826
837
|
query = self._projects_select(
|
|
827
838
|
*(getattr(n.c, f) for f in self._namespaces_fields),
|
|
@@ -834,7 +845,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
834
845
|
rows = list(self.db.execute(query, conn=conn))
|
|
835
846
|
if not rows:
|
|
836
847
|
if create:
|
|
837
|
-
return self.create_project(namespace_name, name)
|
|
848
|
+
return self.create_project(namespace_name, name, validate=validate)
|
|
838
849
|
raise ProjectNotFoundError(
|
|
839
850
|
f"Project {name} in namespace {namespace_name} not found."
|
|
840
851
|
)
|
|
@@ -468,8 +468,12 @@ class SQLiteMetastore(AbstractDBMetastore):
|
|
|
468
468
|
be created implicitly though, to keep the same fully qualified name with
|
|
469
469
|
Studio dataset.
|
|
470
470
|
"""
|
|
471
|
-
system_namespace = self.create_namespace(
|
|
472
|
-
|
|
471
|
+
system_namespace = self.create_namespace(
|
|
472
|
+
Namespace.system(), "System namespace", validate=False
|
|
473
|
+
)
|
|
474
|
+
self.create_project(
|
|
475
|
+
system_namespace.name, Project.listing(), "Listing project", validate=False
|
|
476
|
+
)
|
|
473
477
|
|
|
474
478
|
def _check_schema_version(self) -> None:
|
|
475
479
|
"""
|
|
@@ -81,8 +81,10 @@ def create_dataset_uri(
|
|
|
81
81
|
def parse_dataset_name(name: str) -> tuple[Optional[str], Optional[str], str]:
|
|
82
82
|
"""Parses dataset name and returns namespace, project and name"""
|
|
83
83
|
if not name:
|
|
84
|
-
raise
|
|
84
|
+
raise InvalidDatasetNameError("Name must be defined to parse it")
|
|
85
85
|
split = name.split(".")
|
|
86
|
+
if len(split) > 3:
|
|
87
|
+
raise InvalidDatasetNameError(f"Invalid dataset name {name}")
|
|
86
88
|
name = split[-1]
|
|
87
89
|
project_name = split[-2] if len(split) > 1 else None
|
|
88
90
|
namespace_name = split[-3] if len(split) > 2 else None
|
|
@@ -24,7 +24,7 @@ from pydantic import BaseModel
|
|
|
24
24
|
from tqdm import tqdm
|
|
25
25
|
|
|
26
26
|
from datachain import semver
|
|
27
|
-
from datachain.dataset import DatasetRecord
|
|
27
|
+
from datachain.dataset import DatasetRecord
|
|
28
28
|
from datachain.delta import delta_disabled
|
|
29
29
|
from datachain.error import ProjectCreateNotAllowedError, ProjectNotFoundError
|
|
30
30
|
from datachain.func import literal
|
|
@@ -557,6 +557,7 @@ class DataChain:
|
|
|
557
557
|
update_version: which part of the dataset version to automatically increase.
|
|
558
558
|
Available values: `major`, `minor` or `patch`. Default is `patch`.
|
|
559
559
|
"""
|
|
560
|
+
catalog = self.session.catalog
|
|
560
561
|
if version is not None:
|
|
561
562
|
semver.validate(version)
|
|
562
563
|
|
|
@@ -570,17 +571,10 @@ class DataChain:
|
|
|
570
571
|
" patch"
|
|
571
572
|
)
|
|
572
573
|
|
|
573
|
-
namespace_name, project_name, name =
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
or self._settings.namespace
|
|
578
|
-
or self.session.catalog.metastore.default_namespace_name
|
|
579
|
-
)
|
|
580
|
-
project_name = (
|
|
581
|
-
project_name
|
|
582
|
-
or self._settings.project
|
|
583
|
-
or self.session.catalog.metastore.default_project_name
|
|
574
|
+
namespace_name, project_name, name = catalog.get_full_dataset_name(
|
|
575
|
+
name,
|
|
576
|
+
namespace_name=self._settings.namespace,
|
|
577
|
+
project_name=self._settings.project,
|
|
584
578
|
)
|
|
585
579
|
|
|
586
580
|
try:
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from collections.abc import Sequence
|
|
2
2
|
from typing import TYPE_CHECKING, Optional, Union, get_origin, get_type_hints
|
|
3
3
|
|
|
4
|
-
from datachain.dataset import parse_dataset_name
|
|
5
4
|
from datachain.error import (
|
|
6
5
|
DatasetNotFoundError,
|
|
7
6
|
DatasetVersionNotFoundError,
|
|
@@ -125,11 +124,11 @@ def read_dataset(
|
|
|
125
124
|
session = Session.get(session)
|
|
126
125
|
catalog = session.catalog
|
|
127
126
|
|
|
128
|
-
namespace_name, project_name, name =
|
|
129
|
-
|
|
130
|
-
|
|
127
|
+
namespace_name, project_name, name = catalog.get_full_dataset_name(
|
|
128
|
+
name,
|
|
129
|
+
project_name=project,
|
|
130
|
+
namespace_name=namespace,
|
|
131
131
|
)
|
|
132
|
-
project_name = project_name or project or catalog.metastore.default_project_name
|
|
133
132
|
|
|
134
133
|
if version is not None:
|
|
135
134
|
try:
|
|
@@ -320,11 +319,11 @@ def delete_dataset(
|
|
|
320
319
|
session = Session.get(session, in_memory=in_memory)
|
|
321
320
|
catalog = session.catalog
|
|
322
321
|
|
|
323
|
-
namespace_name, project_name, name =
|
|
324
|
-
|
|
325
|
-
|
|
322
|
+
namespace_name, project_name, name = catalog.get_full_dataset_name(
|
|
323
|
+
name,
|
|
324
|
+
project_name=project,
|
|
325
|
+
namespace_name=namespace,
|
|
326
326
|
)
|
|
327
|
-
project_name = project_name or project or catalog.metastore.default_project_name
|
|
328
327
|
|
|
329
328
|
if not catalog.metastore.is_local_dataset(namespace_name) and studio:
|
|
330
329
|
return remove_studio_dataset(
|
|
@@ -97,4 +97,4 @@ def read_records(
|
|
|
97
97
|
for chunk in batched(records, INSERT_BATCH_SIZE):
|
|
98
98
|
warehouse.insert_rows(table, chunk)
|
|
99
99
|
warehouse.insert_rows_done(table)
|
|
100
|
-
return read_dataset(name=dsr.
|
|
100
|
+
return read_dataset(name=dsr.full_name, session=session, settings=settings)
|
|
@@ -25,6 +25,7 @@ from pydantic import BaseModel, Field, create_model
|
|
|
25
25
|
from sqlalchemy import ColumnElement
|
|
26
26
|
from typing_extensions import Literal as LiteralEx
|
|
27
27
|
|
|
28
|
+
from datachain.func import literal
|
|
28
29
|
from datachain.func.func import Func
|
|
29
30
|
from datachain.lib.convert.python_to_sql import python_to_sql
|
|
30
31
|
from datachain.lib.convert.sql_to_python import sql_to_python
|
|
@@ -659,6 +660,7 @@ class SignalSchema:
|
|
|
659
660
|
|
|
660
661
|
def mutate(self, args_map: dict) -> "SignalSchema":
|
|
661
662
|
new_values = self.values.copy()
|
|
663
|
+
primitives = (bool, str, int, float)
|
|
662
664
|
|
|
663
665
|
for name, value in args_map.items():
|
|
664
666
|
if isinstance(value, Column) and value.name in self.values:
|
|
@@ -679,6 +681,12 @@ class SignalSchema:
|
|
|
679
681
|
# adding new signal with function
|
|
680
682
|
new_values[name] = value.get_result_type(self)
|
|
681
683
|
continue
|
|
684
|
+
if isinstance(value, primitives):
|
|
685
|
+
# For primitives, store the type, not the value
|
|
686
|
+
val = literal(value)
|
|
687
|
+
val.type = python_to_sql(type(value))()
|
|
688
|
+
new_values[name] = sql_to_python(val)
|
|
689
|
+
continue
|
|
682
690
|
if isinstance(value, ColumnElement):
|
|
683
691
|
# adding new signal
|
|
684
692
|
new_values[name] = sql_to_python(value)
|
|
@@ -756,6 +756,52 @@ def test_mutate_existing_column(test_session):
|
|
|
756
756
|
assert ds.order_by("ids").to_list() == [(2,), (3,), (4,)]
|
|
757
757
|
|
|
758
758
|
|
|
759
|
+
def test_mutate_with_primitives_save_load(test_session):
|
|
760
|
+
"""Test that mutate with primitive values properly persists schema
|
|
761
|
+
through save/load cycle."""
|
|
762
|
+
original_data = [1, 2, 3]
|
|
763
|
+
|
|
764
|
+
# Create dataset with multiple primitive columns added via mutate
|
|
765
|
+
ds = dc.read_values(data=original_data, session=test_session).mutate(
|
|
766
|
+
str_col="test_string",
|
|
767
|
+
int_col=42,
|
|
768
|
+
float_col=3.14,
|
|
769
|
+
bool_col=True,
|
|
770
|
+
)
|
|
771
|
+
|
|
772
|
+
# Verify schema before saving
|
|
773
|
+
schema = ds.signals_schema.values
|
|
774
|
+
assert schema.get("str_col") is str
|
|
775
|
+
assert schema.get("int_col") is int
|
|
776
|
+
assert schema.get("float_col") is float
|
|
777
|
+
assert schema.get("bool_col") is bool
|
|
778
|
+
|
|
779
|
+
ds.save("test_mutate_primitives")
|
|
780
|
+
|
|
781
|
+
# Load the dataset back
|
|
782
|
+
loaded_ds = dc.read_dataset("test_mutate_primitives", session=test_session)
|
|
783
|
+
|
|
784
|
+
# Verify schema after loading
|
|
785
|
+
loaded_schema = loaded_ds.signals_schema.values
|
|
786
|
+
assert loaded_schema.get("str_col") is str
|
|
787
|
+
assert loaded_schema.get("int_col") is int
|
|
788
|
+
assert loaded_schema.get("float_col") is float
|
|
789
|
+
assert loaded_schema.get("bool_col") is bool
|
|
790
|
+
|
|
791
|
+
# Verify data integrity
|
|
792
|
+
results = set(loaded_ds.to_list())
|
|
793
|
+
assert len(results) == 3
|
|
794
|
+
|
|
795
|
+
# Expected tuples: (data, str_col, int_col, float_col, bool_col)
|
|
796
|
+
expected_results = {
|
|
797
|
+
(1, "test_string", 42, 3.14, True),
|
|
798
|
+
(2, "test_string", 42, 3.14, True),
|
|
799
|
+
(3, "test_string", 42, 3.14, True),
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
assert results == expected_results
|
|
803
|
+
|
|
804
|
+
|
|
759
805
|
@pytest.mark.parametrize("processes", [False, 2, True])
|
|
760
806
|
@pytest.mark.xdist_group(name="tmpfile")
|
|
761
807
|
def test_parallel(processes, test_session_tmpfile):
|
|
@@ -20,6 +20,9 @@ from datachain.error import (
|
|
|
20
20
|
DatasetInvalidVersionError,
|
|
21
21
|
DatasetNotFoundError,
|
|
22
22
|
DatasetVersionNotFoundError,
|
|
23
|
+
InvalidDatasetNameError,
|
|
24
|
+
InvalidNamespaceNameError,
|
|
25
|
+
InvalidProjectNameError,
|
|
23
26
|
ProjectCreateNotAllowedError,
|
|
24
27
|
)
|
|
25
28
|
from datachain.lib.data_model import DataModel
|
|
@@ -3425,7 +3428,9 @@ def test_save_specify_only_non_default_project(
|
|
|
3425
3428
|
default_namespace_name = catalog.metastore.default_namespace_name
|
|
3426
3429
|
|
|
3427
3430
|
if project_created_upfront:
|
|
3428
|
-
catalog.metastore.create_project(
|
|
3431
|
+
catalog.metastore.create_project(
|
|
3432
|
+
default_namespace_name, "numbers", validate=False
|
|
3433
|
+
)
|
|
3429
3434
|
|
|
3430
3435
|
ds = dc.read_values(fib=[1, 1, 2, 3, 5, 8], session=test_session)
|
|
3431
3436
|
if use_settings:
|
|
@@ -3445,6 +3450,111 @@ def test_save_specify_only_non_default_project(
|
|
|
3445
3450
|
dc.read_dataset(name="fibonacci")
|
|
3446
3451
|
|
|
3447
3452
|
|
|
3453
|
+
@pytest.mark.parametrize(
|
|
3454
|
+
(
|
|
3455
|
+
"ds_name_namespace,ds_name_project,"
|
|
3456
|
+
"settings_namespace,settings_project,"
|
|
3457
|
+
"env_namespace,env_project,"
|
|
3458
|
+
"result_ds_namespace,result_ds_project"
|
|
3459
|
+
),
|
|
3460
|
+
[
|
|
3461
|
+
("n3", "p3", "n2", "p2", "n1", "p1", "n3", "p3"),
|
|
3462
|
+
("", "", "n2", "p2", "n1", "p1", "n2", "p2"),
|
|
3463
|
+
("", "", "", "", "n1", "p1", "n1", "p1"),
|
|
3464
|
+
("", "", "", "", "n5", "n1.p1", "n1", "p1"),
|
|
3465
|
+
("", "", "", "", "", "n1.p1", "n1", "p1"),
|
|
3466
|
+
("", "", "", "", "", "n5.p5", "n5", "p5"),
|
|
3467
|
+
("n3", "p3", "n2", "p2", "", "", "n3", "p3"),
|
|
3468
|
+
("n3", "p3", "", "", "", "", "n3", "p3"),
|
|
3469
|
+
("n3", "p3", "", "", "n1", "p1", "n3", "p3"),
|
|
3470
|
+
("", "", "", "", "", "", "", ""),
|
|
3471
|
+
],
|
|
3472
|
+
)
|
|
3473
|
+
def test_save_all_ways_to_set_project(
|
|
3474
|
+
test_session,
|
|
3475
|
+
monkeypatch,
|
|
3476
|
+
ds_name_namespace,
|
|
3477
|
+
ds_name_project,
|
|
3478
|
+
settings_namespace,
|
|
3479
|
+
settings_project,
|
|
3480
|
+
env_namespace,
|
|
3481
|
+
env_project,
|
|
3482
|
+
result_ds_namespace,
|
|
3483
|
+
result_ds_project,
|
|
3484
|
+
):
|
|
3485
|
+
def _full_name(namespace, project, name) -> str:
|
|
3486
|
+
if namespace and project:
|
|
3487
|
+
return f"{namespace}.{project}.{name}"
|
|
3488
|
+
return name
|
|
3489
|
+
|
|
3490
|
+
metastore = test_session.catalog.metastore
|
|
3491
|
+
ds_name = "numbers"
|
|
3492
|
+
|
|
3493
|
+
monkeypatch.setenv("DATACHAIN_NAMESPACE", env_namespace)
|
|
3494
|
+
monkeypatch.setenv("DATACHAIN_PROJECT", env_project)
|
|
3495
|
+
|
|
3496
|
+
if not result_ds_namespace and not result_ds_project:
|
|
3497
|
+
# special case when nothing is defined - we set default ones
|
|
3498
|
+
result_ds_namespace = metastore.default_namespace_name
|
|
3499
|
+
result_ds_project = metastore.default_project_name
|
|
3500
|
+
|
|
3501
|
+
ds = (
|
|
3502
|
+
dc.read_values(num=[1, 2, 3, 4], session=test_session)
|
|
3503
|
+
.settings(namespace=settings_namespace, project=settings_project)
|
|
3504
|
+
.save(_full_name(ds_name_namespace, ds_name_project, ds_name))
|
|
3505
|
+
)
|
|
3506
|
+
|
|
3507
|
+
assert ds.dataset.project == metastore.get_project(
|
|
3508
|
+
result_ds_project, result_ds_namespace
|
|
3509
|
+
)
|
|
3510
|
+
dc.read_dataset(_full_name(result_ds_namespace, result_ds_project, ds_name))
|
|
3511
|
+
|
|
3512
|
+
|
|
3513
|
+
@pytest.mark.parametrize(
|
|
3514
|
+
(
|
|
3515
|
+
"ds_name_namespace,ds_name_project,"
|
|
3516
|
+
"settings_namespace,settings_project,"
|
|
3517
|
+
"env_namespace,env_project,"
|
|
3518
|
+
"error"
|
|
3519
|
+
),
|
|
3520
|
+
[
|
|
3521
|
+
("n3.n3", "p3", "n2", "p2", "n1", "p1", InvalidDatasetNameError),
|
|
3522
|
+
("n3", "p3.p3", "n2", "p2", "n1", "p1", InvalidDatasetNameError),
|
|
3523
|
+
("", "", "n2.n2", "p2", "n1", "p1", InvalidNamespaceNameError),
|
|
3524
|
+
("", "", "n2", "p2.p2", "n1", "p1", InvalidProjectNameError),
|
|
3525
|
+
("", "", "", "", "n1.n1", "p1", InvalidNamespaceNameError),
|
|
3526
|
+
("", "", "", "", "n1", "p1.p1.p1", InvalidProjectNameError),
|
|
3527
|
+
],
|
|
3528
|
+
)
|
|
3529
|
+
def test_save_all_ways_to_set_project_invalid_name(
|
|
3530
|
+
test_session,
|
|
3531
|
+
monkeypatch,
|
|
3532
|
+
ds_name_namespace,
|
|
3533
|
+
ds_name_project,
|
|
3534
|
+
settings_namespace,
|
|
3535
|
+
settings_project,
|
|
3536
|
+
env_namespace,
|
|
3537
|
+
env_project,
|
|
3538
|
+
error,
|
|
3539
|
+
):
|
|
3540
|
+
def _full_name(namespace, project, name) -> str:
|
|
3541
|
+
if namespace and project:
|
|
3542
|
+
return f"{namespace}.{project}.{name}"
|
|
3543
|
+
return name
|
|
3544
|
+
|
|
3545
|
+
ds_name = "numbers"
|
|
3546
|
+
|
|
3547
|
+
monkeypatch.setenv("DATACHAIN_NAMESPACE", env_namespace)
|
|
3548
|
+
monkeypatch.setenv("DATACHAIN_PROJECT", env_project)
|
|
3549
|
+
|
|
3550
|
+
with pytest.raises(error):
|
|
3551
|
+
(
|
|
3552
|
+
dc.read_values(num=[1, 2, 3, 4], session=test_session)
|
|
3553
|
+
.settings(namespace=settings_namespace, project=settings_project)
|
|
3554
|
+
.save(_full_name(ds_name_namespace, ds_name_project, ds_name))
|
|
3555
|
+
)
|
|
3556
|
+
|
|
3557
|
+
|
|
3448
3558
|
@pytest.mark.parametrize("allow_create_project", [False])
|
|
3449
3559
|
@skip_if_not_sqlite
|
|
3450
3560
|
def test_save_create_project_not_allowed(test_session, allow_create_project):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|