datachain 0.24.5__tar.gz → 0.25.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.24.5 → datachain-0.25.0}/PKG-INFO +1 -1
- {datachain-0.24.5 → datachain-0.25.0}/docs/guide/env.md +1 -1
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/__init__.py +2 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/catalog/catalog.py +5 -20
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/data_storage/metastore.py +30 -1
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/data_storage/warehouse.py +16 -17
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/__init__.py +2 -1
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/datasets.py +55 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain.egg-info/PKG-INFO +1 -1
- {datachain-0.24.5 → datachain-0.25.0}/tests/conftest.py +6 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_datasets.py +101 -14
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_datachain.py +7 -1
- {datachain-0.24.5 → datachain-0.25.0}/tests/utils.py +8 -0
- {datachain-0.24.5 → datachain-0.25.0}/.cruft.json +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/.gitattributes +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/.github/codecov.yaml +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/.github/dependabot.yml +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/.github/workflows/release.yml +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/.github/workflows/tests.yml +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/.gitignore +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/.pre-commit-config.yaml +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/LICENSE +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/README.rst +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/assets/datachain.svg +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/commands/auth/login.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/commands/auth/logout.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/commands/auth/team.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/commands/auth/token.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/commands/index.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/commands/job/cancel.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/commands/job/clusters.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/commands/job/logs.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/commands/job/ls.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/commands/job/run.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/contributing.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/examples.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/guide/db_migrations.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/guide/delta.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/guide/index.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/guide/namespaces.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/guide/processing.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/guide/remotes.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/guide/retry.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/index.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/overrides/main.html +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/quick-start.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/references/data-types/file.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/references/data-types/index.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/references/data-types/pose.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/references/data-types/segment.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/references/datachain.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/references/func.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/references/index.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/references/toolkit.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/references/torch.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/references/udf.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/docs/tutorials.md +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/examples/multimodal/wds.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/mkdocs.yml +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/noxfile.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/pyproject.toml +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/setup.cfg +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/__main__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/asyn.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cache.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/cli/utils.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/client/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/client/azure.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/client/gcs.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/client/hf.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/client/local.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/client/s3.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/config.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/dataset.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/delta.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/error.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/fs/reference.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/fs/utils.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/func/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/func/array.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/func/base.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/func/conditional.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/func/func.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/func/numeric.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/func/path.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/func/random.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/func/string.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/func/window.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/job.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/clip.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/datachain.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/file.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/hf.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/image.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/listing.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/namespaces.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/projects.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/settings.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/tar.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/text.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/udf.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/utils.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/video.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/listing.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/model/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/model/bbox.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/model/pose.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/model/segment.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/model/utils.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/namespace.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/node.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/progress.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/project.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/py.typed +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/query/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/query/batch.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/query/dataset.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/query/metrics.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/query/params.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/query/queue.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/query/schema.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/query/session.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/query/udf.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/query/utils.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/remote/studio.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/script_meta.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/semver.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/types.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/sql/utils.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/studio.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/telemetry.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain/utils.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain.egg-info/SOURCES.txt +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/data.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/examples/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/examples/test_examples.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/examples/wds_data.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/data/lena.jpg +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/functions/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/functions/test_array.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/functions/test_path.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/functions/test_random.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/functions/test_string.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/model/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_batching.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_catalog.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_client.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_data_storage.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_datachain.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_delta.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_file.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_hf.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_image.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_listing.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_ls.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_metastore.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_metrics.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_pull.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_pytorch.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_query.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_read_database.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_read_dataset_remote.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_retry.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_session.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_toolkit.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_video.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/func/test_warehouse.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/scripts/feature_class.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/test_atomicity.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/test_cli_e2e.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/test_cli_studio.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/test_import_time.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/test_query_e2e.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/test_telemetry.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_namespace.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/model/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_asyn.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_cache.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_catalog.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_client.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_config.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_dataset.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_func.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_listing.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_metastore.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_query.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_query_params.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_semver.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_serializer.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_session.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_utils.py +0 -0
- {datachain-0.24.5 → datachain-0.25.0}/tests/unit/test_warehouse.py +0 -0
|
@@ -4,7 +4,7 @@ List of environment variables used to configure DataChain behavior.
|
|
|
4
4
|
|
|
5
5
|
### Core Configuration
|
|
6
6
|
|
|
7
|
-
- `DATACHAIN_ROOT_DIR` – Specifies the root directory where DataChain will create the `.datachain` folder to store its internal data. (default:
|
|
7
|
+
- `DATACHAIN_ROOT_DIR` – Specifies the root directory where DataChain will create the `.datachain` folder to store its internal data. (default: user home directory).
|
|
8
8
|
- `DATACHAIN_SYSTEM_CONFIG_DIR` – Overrides the system-wide configuration directory (default depends on the platform).
|
|
9
9
|
- `DATACHAIN_GLOBAL_CONFIG_DIR` – Overrides the user's global configuration directory (default depends on the platform).
|
|
10
10
|
- `DATACHAIN_NO_ANALYTICS` – Disables telemetry.
|
|
@@ -7,6 +7,7 @@ from datachain.lib.dc import (
|
|
|
7
7
|
datasets,
|
|
8
8
|
delete_dataset,
|
|
9
9
|
listings,
|
|
10
|
+
move_dataset,
|
|
10
11
|
read_csv,
|
|
11
12
|
read_database,
|
|
12
13
|
read_dataset,
|
|
@@ -69,6 +70,7 @@ __all__ = [
|
|
|
69
70
|
"is_chain_type",
|
|
70
71
|
"listings",
|
|
71
72
|
"metrics",
|
|
73
|
+
"move_dataset",
|
|
72
74
|
"param",
|
|
73
75
|
"read_csv",
|
|
74
76
|
"read_database",
|
|
@@ -956,26 +956,9 @@ class Catalog:
|
|
|
956
956
|
self, dataset: DatasetRecord, conn=None, **kwargs
|
|
957
957
|
) -> DatasetRecord:
|
|
958
958
|
"""Updates dataset fields."""
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
old_name = dataset.name
|
|
963
|
-
new_name = kwargs["name"]
|
|
964
|
-
|
|
965
|
-
dataset = self.metastore.update_dataset(dataset, conn=conn, **kwargs)
|
|
966
|
-
|
|
967
|
-
if old_name and new_name:
|
|
968
|
-
# updating name must result in updating dataset table names as well
|
|
969
|
-
for version in [v.version for v in dataset.versions]:
|
|
970
|
-
self.warehouse.rename_dataset_table(
|
|
971
|
-
dataset,
|
|
972
|
-
old_name,
|
|
973
|
-
new_name,
|
|
974
|
-
old_version=version,
|
|
975
|
-
new_version=version,
|
|
976
|
-
)
|
|
977
|
-
|
|
978
|
-
return dataset
|
|
959
|
+
dataset_updated = self.metastore.update_dataset(dataset, conn=conn, **kwargs)
|
|
960
|
+
self.warehouse.rename_dataset_tables(dataset, dataset_updated)
|
|
961
|
+
return dataset_updated
|
|
979
962
|
|
|
980
963
|
def remove_dataset_version(
|
|
981
964
|
self, dataset: DatasetRecord, version: str, drop_rows: Optional[bool] = True
|
|
@@ -1555,12 +1538,14 @@ class Catalog:
|
|
|
1555
1538
|
remote_ds.project.namespace.name,
|
|
1556
1539
|
description=remote_ds.project.namespace.descr,
|
|
1557
1540
|
uuid=remote_ds.project.namespace.uuid,
|
|
1541
|
+
validate=False,
|
|
1558
1542
|
)
|
|
1559
1543
|
project = self.metastore.create_project(
|
|
1560
1544
|
namespace.name,
|
|
1561
1545
|
remote_ds.project.name,
|
|
1562
1546
|
description=remote_ds.project.descr,
|
|
1563
1547
|
uuid=remote_ds.project.uuid,
|
|
1548
|
+
validate=False,
|
|
1564
1549
|
)
|
|
1565
1550
|
|
|
1566
1551
|
try:
|
|
@@ -207,6 +207,10 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
207
207
|
It also creates project if not found and create flag is set to True.
|
|
208
208
|
"""
|
|
209
209
|
|
|
210
|
+
@abstractmethod
|
|
211
|
+
def get_project_by_id(self, project_id: int, conn=None) -> Project:
|
|
212
|
+
"""Gets a single project by id"""
|
|
213
|
+
|
|
210
214
|
@abstractmethod
|
|
211
215
|
def list_projects(self, namespace_id: Optional[int], conn=None) -> list[Project]:
|
|
212
216
|
"""Gets list of projects in some namespace or in general (in all namespaces)"""
|
|
@@ -851,6 +855,24 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
851
855
|
)
|
|
852
856
|
return self.project_class.parse(*rows[0])
|
|
853
857
|
|
|
858
|
+
def get_project_by_id(self, project_id: int, conn=None) -> Project:
|
|
859
|
+
"""Gets a single project by id"""
|
|
860
|
+
n = self._namespaces
|
|
861
|
+
p = self._projects
|
|
862
|
+
|
|
863
|
+
query = self._projects_select(
|
|
864
|
+
*(getattr(n.c, f) for f in self._namespaces_fields),
|
|
865
|
+
*(getattr(p.c, f) for f in self._projects_fields),
|
|
866
|
+
)
|
|
867
|
+
query = query.select_from(n.join(p, n.c.id == p.c.namespace_id)).where(
|
|
868
|
+
p.c.id == project_id
|
|
869
|
+
)
|
|
870
|
+
|
|
871
|
+
rows = list(self.db.execute(query, conn=conn))
|
|
872
|
+
if not rows:
|
|
873
|
+
raise ProjectNotFoundError(f"Project with id {project_id} not found.")
|
|
874
|
+
return self.project_class.parse(*rows[0])
|
|
875
|
+
|
|
854
876
|
def list_projects(self, namespace_id: Optional[int], conn=None) -> list[Project]:
|
|
855
877
|
"""
|
|
856
878
|
Gets a list of projects inside some namespace, or in all namespaces
|
|
@@ -1008,6 +1030,11 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1008
1030
|
else:
|
|
1009
1031
|
values[field] = json.dumps(value)
|
|
1010
1032
|
dataset_values[field] = DatasetRecord.parse_schema(value)
|
|
1033
|
+
elif field == "project_id":
|
|
1034
|
+
if not value:
|
|
1035
|
+
raise ValueError("Cannot set empty project_id for dataset")
|
|
1036
|
+
dataset_values["project"] = self.get_project_by_id(value)
|
|
1037
|
+
values[field] = value
|
|
1011
1038
|
else:
|
|
1012
1039
|
values[field] = value
|
|
1013
1040
|
dataset_values[field] = value
|
|
@@ -1017,7 +1044,9 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1017
1044
|
|
|
1018
1045
|
d = self._datasets
|
|
1019
1046
|
self.db.execute(
|
|
1020
|
-
self._datasets_update()
|
|
1047
|
+
self._datasets_update()
|
|
1048
|
+
.where(d.c.name == dataset.name, d.c.project_id == dataset.project.id)
|
|
1049
|
+
.values(values),
|
|
1021
1050
|
conn=conn,
|
|
1022
1051
|
) # type: ignore [attr-defined]
|
|
1023
1052
|
|
|
@@ -356,24 +356,23 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
356
356
|
self, dataset: DatasetRecord, version: str
|
|
357
357
|
) -> list[StorageURI]: ...
|
|
358
358
|
|
|
359
|
-
def
|
|
360
|
-
self,
|
|
361
|
-
dataset: DatasetRecord,
|
|
362
|
-
old_name: str,
|
|
363
|
-
new_name: str,
|
|
364
|
-
old_version: str,
|
|
365
|
-
new_version: str,
|
|
359
|
+
def rename_dataset_tables(
|
|
360
|
+
self, dataset: DatasetRecord, dataset_updated: DatasetRecord
|
|
366
361
|
) -> None:
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
)
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
362
|
+
"""
|
|
363
|
+
Renames all dataset version tables when parts of the dataset that
|
|
364
|
+
are used in constructing table name are updated.
|
|
365
|
+
If nothing important is changed, nothing will be renamed (no DB calls
|
|
366
|
+
will be made at all).
|
|
367
|
+
"""
|
|
368
|
+
for version in [v.version for v in dataset_updated.versions]:
|
|
369
|
+
if not dataset.has_version(version):
|
|
370
|
+
continue
|
|
371
|
+
src = self.dataset_table_name(dataset, version)
|
|
372
|
+
dest = self.dataset_table_name(dataset_updated, version)
|
|
373
|
+
if src == dest:
|
|
374
|
+
continue
|
|
375
|
+
self.db.rename_table(src, dest)
|
|
377
376
|
|
|
378
377
|
def dataset_rows_count(self, dataset: DatasetRecord, version=None) -> int:
|
|
379
378
|
"""Returns total number of rows in a dataset"""
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from .csv import read_csv
|
|
2
2
|
from .database import read_database
|
|
3
3
|
from .datachain import C, Column, DataChain
|
|
4
|
-
from .datasets import datasets, delete_dataset, read_dataset
|
|
4
|
+
from .datasets import datasets, delete_dataset, move_dataset, read_dataset
|
|
5
5
|
from .hf import read_hf
|
|
6
6
|
from .json import read_json
|
|
7
7
|
from .listings import listings
|
|
@@ -22,6 +22,7 @@ __all__ = [
|
|
|
22
22
|
"datasets",
|
|
23
23
|
"delete_dataset",
|
|
24
24
|
"listings",
|
|
25
|
+
"move_dataset",
|
|
25
26
|
"read_csv",
|
|
26
27
|
"read_database",
|
|
27
28
|
"read_dataset",
|
|
@@ -361,3 +361,58 @@ def delete_dataset(
|
|
|
361
361
|
else:
|
|
362
362
|
version = None
|
|
363
363
|
catalog.remove_dataset(name, ds_project, version=version, force=force)
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def move_dataset(
|
|
367
|
+
src: str,
|
|
368
|
+
dest: str,
|
|
369
|
+
session: Optional[Session] = None,
|
|
370
|
+
in_memory: bool = False,
|
|
371
|
+
) -> None:
|
|
372
|
+
"""Moves an entire dataset between namespaces and projects.
|
|
373
|
+
|
|
374
|
+
Args:
|
|
375
|
+
src: The source dataset name. This can be a fully qualified name that includes
|
|
376
|
+
the namespace and project, or a regular name. If a regular name is used,
|
|
377
|
+
default values will be applied. The source dataset will no longer exist
|
|
378
|
+
after the move.
|
|
379
|
+
dst: The destination dataset name. This can also be a fully qualified
|
|
380
|
+
name with a namespace and project, or just a regular name (default values
|
|
381
|
+
will be used in that case). The original dataset will be moved here.
|
|
382
|
+
session: An optional session instance. If not provided, the default session
|
|
383
|
+
will be used.
|
|
384
|
+
in_memory: If True, creates an in-memory session. Defaults to False.
|
|
385
|
+
|
|
386
|
+
Returns:
|
|
387
|
+
None
|
|
388
|
+
|
|
389
|
+
Examples:
|
|
390
|
+
```python
|
|
391
|
+
import datachain as dc
|
|
392
|
+
dc.move_dataset("cats", "new_cats")
|
|
393
|
+
```
|
|
394
|
+
|
|
395
|
+
```python
|
|
396
|
+
import datachain as dc
|
|
397
|
+
dc.move_dataset("dev.animals.cats", "prod.animals.cats")
|
|
398
|
+
```
|
|
399
|
+
"""
|
|
400
|
+
session = Session.get(session, in_memory=in_memory)
|
|
401
|
+
catalog = session.catalog
|
|
402
|
+
|
|
403
|
+
namespace, project, name = catalog.get_full_dataset_name(src)
|
|
404
|
+
dest_namespace, dest_project, dest_name = catalog.get_full_dataset_name(dest)
|
|
405
|
+
|
|
406
|
+
dataset = catalog.get_dataset(
|
|
407
|
+
name, catalog.metastore.get_project(project, namespace)
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
catalog.update_dataset(
|
|
411
|
+
dataset,
|
|
412
|
+
name=dest_name,
|
|
413
|
+
project_id=catalog.metastore.get_project(
|
|
414
|
+
dest_project,
|
|
415
|
+
dest_namespace,
|
|
416
|
+
create=catalog.metastore.project_allowed_to_create,
|
|
417
|
+
).id,
|
|
418
|
+
)
|
|
@@ -576,6 +576,12 @@ def mock_allowed_to_create_namespace(allow_create_namespace):
|
|
|
576
576
|
yield
|
|
577
577
|
|
|
578
578
|
|
|
579
|
+
@pytest.fixture
|
|
580
|
+
def mock_is_local_dataset():
|
|
581
|
+
with patch.object(AbstractMetastore, "is_local_dataset", return_value=True):
|
|
582
|
+
yield
|
|
583
|
+
|
|
584
|
+
|
|
579
585
|
@pytest.fixture
|
|
580
586
|
def project(test_session):
|
|
581
587
|
return dc.create_project("dev", "animals", "Animals project")
|
|
@@ -11,12 +11,13 @@ from datachain.dataset import DatasetDependencyType, DatasetStatus
|
|
|
11
11
|
from datachain.error import (
|
|
12
12
|
DatasetInvalidVersionError,
|
|
13
13
|
DatasetNotFoundError,
|
|
14
|
+
ProjectNotFoundError,
|
|
14
15
|
)
|
|
15
16
|
from datachain.lib.file import File
|
|
16
17
|
from datachain.lib.listing import parse_listing_uri
|
|
17
18
|
from datachain.query.dataset import DatasetQuery
|
|
18
19
|
from datachain.sql.types import Float32, Int, Int64
|
|
19
|
-
from tests.utils import assert_row_names, dataset_dependency_asdict
|
|
20
|
+
from tests.utils import assert_row_names, dataset_dependency_asdict, table_row_count
|
|
20
21
|
|
|
21
22
|
FILE_SCHEMA = {
|
|
22
23
|
f"file__{name}": _type if _type != Int else Int64
|
|
@@ -169,14 +170,6 @@ def test_get_dataset(cloud_test_catalog, dogs_dataset):
|
|
|
169
170
|
catalog.get_dataset("wrong name", dogs_dataset.project)
|
|
170
171
|
|
|
171
172
|
|
|
172
|
-
# Returns None if the table does not exist
|
|
173
|
-
def get_table_row_count(db, table_name):
|
|
174
|
-
if not db.has_table(table_name):
|
|
175
|
-
return None
|
|
176
|
-
query = sa.select(sa.func.count()).select_from(sa.table(table_name))
|
|
177
|
-
return next(db.execute(query), (None,))[0]
|
|
178
|
-
|
|
179
|
-
|
|
180
173
|
def test_create_dataset_from_sources(listed_bucket, cloud_test_catalog, project):
|
|
181
174
|
dataset_name = uuid.uuid4().hex
|
|
182
175
|
src_uri = cloud_test_catalog.src_uri
|
|
@@ -327,7 +320,7 @@ def test_remove_dataset(cloud_test_catalog, dogs_dataset):
|
|
|
327
320
|
catalog.get_dataset(dogs_dataset.name, dogs_dataset.project)
|
|
328
321
|
|
|
329
322
|
dataset_table_name = catalog.warehouse.dataset_table_name(dogs_dataset, "1.0.0")
|
|
330
|
-
assert
|
|
323
|
+
assert table_row_count(catalog.warehouse.db, dataset_table_name) is None
|
|
331
324
|
|
|
332
325
|
assert (
|
|
333
326
|
catalog.metastore.get_direct_dataset_dependencies(dogs_dataset, "1.0.0") == []
|
|
@@ -391,14 +384,108 @@ def test_edit_dataset(cloud_test_catalog, dogs_dataset):
|
|
|
391
384
|
old_dataset_table_name = catalog.warehouse.dataset_table_name(dogs_dataset, "1.0.0")
|
|
392
385
|
new_dataset_table_name = catalog.warehouse.dataset_table_name(dataset, "1.0.0")
|
|
393
386
|
|
|
394
|
-
assert
|
|
395
|
-
expected_table_row_count =
|
|
387
|
+
assert table_row_count(catalog.warehouse.db, old_dataset_table_name) is None
|
|
388
|
+
expected_table_row_count = table_row_count(
|
|
396
389
|
catalog.warehouse.db, new_dataset_table_name
|
|
397
390
|
)
|
|
398
391
|
assert expected_table_row_count
|
|
399
392
|
assert dataset.get_version("1.0.0").num_objects == expected_table_row_count
|
|
400
393
|
|
|
401
394
|
|
|
395
|
+
@pytest.mark.parametrize(
|
|
396
|
+
"old_name,new_name",
|
|
397
|
+
[
|
|
398
|
+
("old.old.numbers", "new.new.numbers"),
|
|
399
|
+
("old.old.numbers", "new.new.numbers_new"),
|
|
400
|
+
("old.old.numbers", "old.new.numbers"),
|
|
401
|
+
("old.old.numbers", "old.old.numbers"),
|
|
402
|
+
("numbers", "numbers2"),
|
|
403
|
+
("numbers", "numbers"),
|
|
404
|
+
],
|
|
405
|
+
)
|
|
406
|
+
def test_move_dataset(
|
|
407
|
+
test_session,
|
|
408
|
+
old_name,
|
|
409
|
+
new_name,
|
|
410
|
+
mock_is_local_dataset,
|
|
411
|
+
):
|
|
412
|
+
catalog = test_session.catalog
|
|
413
|
+
|
|
414
|
+
# create 2 versions of dataset in old project
|
|
415
|
+
for _ in range(2):
|
|
416
|
+
(dc.read_values(num=[1, 2, 3], session=test_session).save(old_name))
|
|
417
|
+
|
|
418
|
+
dataset = dc.read_dataset(old_name).dataset
|
|
419
|
+
|
|
420
|
+
dc.move_dataset(old_name, new_name, session=test_session)
|
|
421
|
+
|
|
422
|
+
if old_name != new_name:
|
|
423
|
+
# check that old dataset doesn't exist any more
|
|
424
|
+
with pytest.raises(DatasetNotFoundError):
|
|
425
|
+
dc.read_dataset(old_name).save("wrong")
|
|
426
|
+
|
|
427
|
+
dataset_updated = dc.read_dataset(new_name).dataset
|
|
428
|
+
|
|
429
|
+
# check if dataset tables are renamed correctly as well
|
|
430
|
+
for version in [v.version for v in dataset.versions]:
|
|
431
|
+
old_table_name = catalog.warehouse.dataset_table_name(dataset, version)
|
|
432
|
+
new_table_name = catalog.warehouse.dataset_table_name(dataset_updated, version)
|
|
433
|
+
if old_name == new_name:
|
|
434
|
+
assert old_table_name == new_table_name
|
|
435
|
+
else:
|
|
436
|
+
assert table_row_count(catalog.warehouse.db, old_table_name) is None
|
|
437
|
+
|
|
438
|
+
assert table_row_count(catalog.warehouse.db, new_table_name) == 3
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def test_move_dataset_then_save_into(test_session):
|
|
442
|
+
old_name = "old.old.numbers"
|
|
443
|
+
new_name = "new.new.numbers"
|
|
444
|
+
|
|
445
|
+
# create 2 versions of dataset in old project
|
|
446
|
+
for _ in range(2):
|
|
447
|
+
dc.read_values(num=[1, 2, 3], session=test_session).save(old_name)
|
|
448
|
+
|
|
449
|
+
dc.move_dataset(old_name, new_name, session=test_session)
|
|
450
|
+
dc.read_values(num=[1, 2, 3], session=test_session).save(new_name)
|
|
451
|
+
|
|
452
|
+
ds = dc.datasets(column="dataset", session=test_session)
|
|
453
|
+
datasets = [
|
|
454
|
+
d
|
|
455
|
+
for d in ds.to_values("dataset")
|
|
456
|
+
if d.name == "numbers" and d.project == "new" and d.namespace == "new"
|
|
457
|
+
]
|
|
458
|
+
|
|
459
|
+
assert len(datasets) == 3
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
def test_move_dataset_wrong_old_project(test_session, project):
|
|
463
|
+
dc.read_values(num=[1, 2, 3], session=test_session).save("old.old.numbers")
|
|
464
|
+
|
|
465
|
+
with pytest.raises(ProjectNotFoundError):
|
|
466
|
+
dc.move_dataset("wrong.wrong.numbers", "new.new.numbers", session=test_session)
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def test_move_dataset_error_in_session_moved_dataset_removed(catalog):
|
|
470
|
+
from datachain.query.session import Session
|
|
471
|
+
|
|
472
|
+
old_name = "old.old.numbers"
|
|
473
|
+
new_name = "new.new.numbers"
|
|
474
|
+
|
|
475
|
+
with pytest.raises(DatasetNotFoundError):
|
|
476
|
+
with Session("new", catalog=catalog) as test_session:
|
|
477
|
+
dc.read_values(num=[1, 2, 3]).save("aa")
|
|
478
|
+
dc.read_values(num=[1, 2, 3], session=test_session).save(old_name)
|
|
479
|
+
dc.move_dataset(old_name, new_name, session=test_session)
|
|
480
|
+
|
|
481
|
+
# throws DatasetNotFoundError
|
|
482
|
+
dc.read_dataset("wrong", session=test_session)
|
|
483
|
+
|
|
484
|
+
ds = dc.datasets(column="dataset")
|
|
485
|
+
datasets = [d for d in ds.to_values("dataset")] # noqa: C416
|
|
486
|
+
assert len(datasets) == 0
|
|
487
|
+
|
|
488
|
+
|
|
402
489
|
def test_edit_dataset_same_name(cloud_test_catalog, dogs_dataset):
|
|
403
490
|
dataset_new_name = dogs_dataset.name
|
|
404
491
|
catalog = cloud_test_catalog.catalog
|
|
@@ -414,12 +501,12 @@ def test_edit_dataset_same_name(cloud_test_catalog, dogs_dataset):
|
|
|
414
501
|
old_dataset_table_name = catalog.warehouse.dataset_table_name(dogs_dataset, "1.0.0")
|
|
415
502
|
new_dataset_table_name = catalog.warehouse.dataset_table_name(dataset, "1.0.0")
|
|
416
503
|
|
|
417
|
-
expected_table_row_count =
|
|
504
|
+
expected_table_row_count = table_row_count(
|
|
418
505
|
catalog.warehouse.db, old_dataset_table_name
|
|
419
506
|
)
|
|
420
507
|
assert expected_table_row_count
|
|
421
508
|
assert dataset.get_version("1.0.0").num_objects == expected_table_row_count
|
|
422
|
-
assert expected_table_row_count ==
|
|
509
|
+
assert expected_table_row_count == table_row_count(
|
|
423
510
|
catalog.warehouse.db, new_dataset_table_name
|
|
424
511
|
)
|
|
425
512
|
|
|
@@ -43,7 +43,13 @@ from datachain.lib.udf_signature import UdfSignatureError
|
|
|
43
43
|
from datachain.lib.utils import DataChainColumnError, DataChainParamsError
|
|
44
44
|
from datachain.sql.types import Float, Int64, String
|
|
45
45
|
from datachain.utils import STUDIO_URL
|
|
46
|
-
from tests.utils import
|
|
46
|
+
from tests.utils import (
|
|
47
|
+
ANY_VALUE,
|
|
48
|
+
df_equal,
|
|
49
|
+
skip_if_not_sqlite,
|
|
50
|
+
sort_df,
|
|
51
|
+
sorted_dicts,
|
|
52
|
+
)
|
|
47
53
|
|
|
48
54
|
DF_DATA = {
|
|
49
55
|
"first_name": ["Alice", "Bob", "Charlie", "David", "Eva"],
|
|
@@ -10,6 +10,7 @@ from time import sleep, time
|
|
|
10
10
|
from typing import Any, Callable, Optional
|
|
11
11
|
|
|
12
12
|
import pytest
|
|
13
|
+
import sqlalchemy as sa
|
|
13
14
|
from PIL import Image
|
|
14
15
|
|
|
15
16
|
import datachain as dc
|
|
@@ -231,3 +232,10 @@ def sort_df(df):
|
|
|
231
232
|
def df_equal(df1, df2) -> bool:
|
|
232
233
|
"""Helper function to check if two dataframes are equal regardless of ordering"""
|
|
233
234
|
return sort_df(df1).equals(sort_df(df2))
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def table_row_count(db, table_name) -> Optional[int]:
|
|
238
|
+
if not db.has_table(table_name):
|
|
239
|
+
return None
|
|
240
|
+
query = sa.select(sa.func.count()).select_from(sa.table(table_name))
|
|
241
|
+
return next(db.execute(query), (None,))[0]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|