datachain 0.24.6__tar.gz → 0.25.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.24.6 → datachain-0.25.1}/.github/workflows/benchmarks.yml +0 -3
- {datachain-0.24.6 → datachain-0.25.1}/.github/workflows/tests-studio.yml +15 -2
- {datachain-0.24.6 → datachain-0.25.1}/.github/workflows/tests.yml +16 -12
- {datachain-0.24.6 → datachain-0.25.1}/PKG-INFO +3 -2
- {datachain-0.24.6 → datachain-0.25.1}/pyproject.toml +3 -1
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/__init__.py +2 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/catalog/catalog.py +3 -20
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/data_storage/metastore.py +30 -1
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/data_storage/warehouse.py +16 -17
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/arrow.py +9 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/dc/__init__.py +2 -1
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/dc/datasets.py +55 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/hf.py +18 -21
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain.egg-info/PKG-INFO +3 -2
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain.egg-info/requires.txt +4 -1
- {datachain-0.24.6 → datachain-0.25.1}/tests/conftest.py +6 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_datasets.py +101 -14
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_hf.py +16 -1
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/lib/test_datachain.py +23 -1
- {datachain-0.24.6 → datachain-0.25.1}/tests/utils.py +8 -0
- {datachain-0.24.6 → datachain-0.25.1}/.cruft.json +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/.gitattributes +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/.github/codecov.yaml +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/.github/dependabot.yml +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/.github/workflows/release.yml +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/.gitignore +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/.pre-commit-config.yaml +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/LICENSE +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/README.rst +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/assets/datachain.svg +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/commands/auth/login.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/commands/auth/logout.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/commands/auth/team.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/commands/auth/token.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/commands/index.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/commands/job/cancel.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/commands/job/clusters.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/commands/job/logs.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/commands/job/ls.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/commands/job/run.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/contributing.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/examples.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/guide/db_migrations.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/guide/delta.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/guide/env.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/guide/index.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/guide/namespaces.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/guide/processing.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/guide/remotes.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/guide/retry.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/index.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/overrides/main.html +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/quick-start.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/references/data-types/file.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/references/data-types/index.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/references/data-types/pose.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/references/data-types/segment.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/references/datachain.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/references/func.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/references/index.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/references/toolkit.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/references/torch.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/references/udf.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/docs/tutorials.md +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/examples/multimodal/wds.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/mkdocs.yml +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/noxfile.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/setup.cfg +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/__main__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/asyn.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/cache.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/cli/utils.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/client/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/client/azure.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/client/gcs.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/client/hf.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/client/local.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/client/s3.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/config.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/dataset.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/delta.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/error.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/fs/reference.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/fs/utils.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/func/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/func/array.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/func/base.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/func/conditional.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/func/func.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/func/numeric.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/func/path.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/func/random.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/func/string.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/func/window.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/job.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/clip.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/dc/datachain.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/file.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/image.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/listing.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/namespaces.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/projects.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/settings.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/tar.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/text.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/udf.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/utils.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/video.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/listing.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/model/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/model/bbox.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/model/pose.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/model/segment.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/model/utils.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/namespace.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/node.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/progress.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/project.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/py.typed +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/query/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/query/batch.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/query/dataset.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/query/metrics.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/query/params.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/query/queue.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/query/schema.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/query/session.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/query/udf.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/query/utils.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/remote/studio.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/script_meta.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/semver.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/sql/types.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/sql/utils.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/studio.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/telemetry.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain/utils.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain.egg-info/SOURCES.txt +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/data.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/examples/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/examples/test_examples.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/examples/wds_data.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/data/lena.jpg +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/functions/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/functions/test_array.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/functions/test_path.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/functions/test_random.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/functions/test_string.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/model/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_batching.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_catalog.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_client.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_data_storage.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_datachain.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_delta.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_file.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_image.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_listing.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_ls.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_metastore.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_metrics.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_pull.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_pytorch.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_query.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_read_database.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_read_dataset_remote.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_retry.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_session.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_toolkit.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_video.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/func/test_warehouse.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/scripts/feature_class.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/test_atomicity.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/test_cli_e2e.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/test_cli_studio.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/test_import_time.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/test_query_e2e.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/test_telemetry.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/lib/test_namespace.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/model/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_asyn.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_cache.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_catalog.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_client.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_config.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_dataset.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_func.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_listing.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_metastore.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_query.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_query_params.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_semver.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_serializer.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_session.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_utils.py +0 -0
- {datachain-0.24.6 → datachain-0.25.1}/tests/unit/test_warehouse.py +0 -0
|
@@ -30,9 +30,6 @@ jobs:
|
|
|
30
30
|
enable-cache: true
|
|
31
31
|
cache-suffix: benchmarks
|
|
32
32
|
cache-dependency-glob: pyproject.toml
|
|
33
|
-
# revert after this is fixed
|
|
34
|
-
# https://github.com/wntrblm/nox/issues/953
|
|
35
|
-
version: ">=0.6,<0.7"
|
|
36
33
|
|
|
37
34
|
- name: Install nox and dvc
|
|
38
35
|
run: uv pip install dvc[gs] nox --system
|
|
@@ -75,8 +75,21 @@ jobs:
|
|
|
75
75
|
path: './backend/datachain'
|
|
76
76
|
fetch-depth: 0
|
|
77
77
|
|
|
78
|
-
- name:
|
|
79
|
-
|
|
78
|
+
- name: Install FFmpeg on Windows
|
|
79
|
+
if: runner.os == 'Windows'
|
|
80
|
+
run: choco install ffmpeg
|
|
81
|
+
|
|
82
|
+
- name: Install FFmpeg on macOS
|
|
83
|
+
if: runner.os == 'macOS'
|
|
84
|
+
run: |
|
|
85
|
+
brew install ffmpeg
|
|
86
|
+
echo 'DYLD_FALLBACK_LIBRARY_PATH=/opt/homebrew/lib' >> "$GITHUB_ENV"
|
|
87
|
+
|
|
88
|
+
- name: Install FFmpeg on Ubuntu
|
|
89
|
+
if: runner.os == 'Linux'
|
|
90
|
+
run: |
|
|
91
|
+
sudo apt update
|
|
92
|
+
sudo apt install -y ffmpeg
|
|
80
93
|
|
|
81
94
|
- name: Set up Python ${{ matrix.pyv }}
|
|
82
95
|
uses: actions/setup-python@v5
|
|
@@ -34,9 +34,6 @@ jobs:
|
|
|
34
34
|
enable-cache: true
|
|
35
35
|
cache-suffix: lint
|
|
36
36
|
cache-dependency-glob: pyproject.toml
|
|
37
|
-
# revert after this is fixed
|
|
38
|
-
# https://github.com/wntrblm/nox/issues/953
|
|
39
|
-
version: ">=0.6,<0.7"
|
|
40
37
|
|
|
41
38
|
- name: Install nox
|
|
42
39
|
run: uv pip install nox --system
|
|
@@ -81,9 +78,6 @@ jobs:
|
|
|
81
78
|
fetch-depth: 0
|
|
82
79
|
ref: ${{ github.event.pull_request.head.sha || github.ref }}
|
|
83
80
|
|
|
84
|
-
- name: Set up FFmpeg
|
|
85
|
-
uses: AnimMouse/setup-ffmpeg@v1
|
|
86
|
-
|
|
87
81
|
- name: Set up Python ${{ matrix.pyv }}
|
|
88
82
|
uses: actions/setup-python@v5
|
|
89
83
|
with:
|
|
@@ -95,9 +89,22 @@ jobs:
|
|
|
95
89
|
enable-cache: true
|
|
96
90
|
cache-suffix: tests-${{ matrix.pyv }}
|
|
97
91
|
cache-dependency-glob: pyproject.toml
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
92
|
+
|
|
93
|
+
- name: Install FFmpeg on Windows
|
|
94
|
+
if: runner.os == 'Windows'
|
|
95
|
+
run: choco install ffmpeg
|
|
96
|
+
|
|
97
|
+
- name: Install FFmpeg on macOS
|
|
98
|
+
if: runner.os == 'macOS'
|
|
99
|
+
run: |
|
|
100
|
+
brew install ffmpeg
|
|
101
|
+
echo 'DYLD_FALLBACK_LIBRARY_PATH=/opt/homebrew/lib' >> "$GITHUB_ENV"
|
|
102
|
+
|
|
103
|
+
- name: Install FFmpeg on Ubuntu
|
|
104
|
+
if: runner.os == 'Linux'
|
|
105
|
+
run: |
|
|
106
|
+
sudo apt update
|
|
107
|
+
sudo apt install -y ffmpeg
|
|
101
108
|
|
|
102
109
|
- name: Install nox
|
|
103
110
|
run: uv pip install nox --system
|
|
@@ -165,9 +172,6 @@ jobs:
|
|
|
165
172
|
enable-cache: true
|
|
166
173
|
cache-suffix: examples-${{ matrix.pyv }}
|
|
167
174
|
cache-dependency-glob: pyproject.toml
|
|
168
|
-
# revert after this is fixed
|
|
169
|
-
# https://github.com/wntrblm/nox/issues/953
|
|
170
|
-
version: ">=0.6,<0.7"
|
|
171
175
|
|
|
172
176
|
- name: Install nox
|
|
173
177
|
run: uv pip install nox --system
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.25.1
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -70,7 +70,8 @@ Provides-Extra: vector
|
|
|
70
70
|
Requires-Dist: usearch; extra == "vector"
|
|
71
71
|
Provides-Extra: hf
|
|
72
72
|
Requires-Dist: numba>=0.60.0; extra == "hf"
|
|
73
|
-
Requires-Dist: datasets[
|
|
73
|
+
Requires-Dist: datasets[vision]>=4.0.0; extra == "hf"
|
|
74
|
+
Requires-Dist: datasets[audio]>=4.0.0; (sys_platform == "linux" or sys_platform == "darwin") and extra == "hf"
|
|
74
75
|
Requires-Dist: fsspec>=2024.12.0; extra == "hf"
|
|
75
76
|
Provides-Extra: video
|
|
76
77
|
Requires-Dist: ffmpeg-python; extra == "video"
|
|
@@ -81,7 +81,9 @@ vector = [
|
|
|
81
81
|
]
|
|
82
82
|
hf = [
|
|
83
83
|
"numba>=0.60.0",
|
|
84
|
-
"datasets[
|
|
84
|
+
"datasets[vision]>=4.0.0",
|
|
85
|
+
# https://github.com/pytorch/torchcodec/issues/640
|
|
86
|
+
"datasets[audio]>=4.0.0 ; (sys_platform == 'linux' or sys_platform == 'darwin')",
|
|
85
87
|
"fsspec>=2024.12.0"
|
|
86
88
|
]
|
|
87
89
|
video = [
|
|
@@ -7,6 +7,7 @@ from datachain.lib.dc import (
|
|
|
7
7
|
datasets,
|
|
8
8
|
delete_dataset,
|
|
9
9
|
listings,
|
|
10
|
+
move_dataset,
|
|
10
11
|
read_csv,
|
|
11
12
|
read_database,
|
|
12
13
|
read_dataset,
|
|
@@ -69,6 +70,7 @@ __all__ = [
|
|
|
69
70
|
"is_chain_type",
|
|
70
71
|
"listings",
|
|
71
72
|
"metrics",
|
|
73
|
+
"move_dataset",
|
|
72
74
|
"param",
|
|
73
75
|
"read_csv",
|
|
74
76
|
"read_database",
|
|
@@ -956,26 +956,9 @@ class Catalog:
|
|
|
956
956
|
self, dataset: DatasetRecord, conn=None, **kwargs
|
|
957
957
|
) -> DatasetRecord:
|
|
958
958
|
"""Updates dataset fields."""
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
old_name = dataset.name
|
|
963
|
-
new_name = kwargs["name"]
|
|
964
|
-
|
|
965
|
-
dataset = self.metastore.update_dataset(dataset, conn=conn, **kwargs)
|
|
966
|
-
|
|
967
|
-
if old_name and new_name:
|
|
968
|
-
# updating name must result in updating dataset table names as well
|
|
969
|
-
for version in [v.version for v in dataset.versions]:
|
|
970
|
-
self.warehouse.rename_dataset_table(
|
|
971
|
-
dataset,
|
|
972
|
-
old_name,
|
|
973
|
-
new_name,
|
|
974
|
-
old_version=version,
|
|
975
|
-
new_version=version,
|
|
976
|
-
)
|
|
977
|
-
|
|
978
|
-
return dataset
|
|
959
|
+
dataset_updated = self.metastore.update_dataset(dataset, conn=conn, **kwargs)
|
|
960
|
+
self.warehouse.rename_dataset_tables(dataset, dataset_updated)
|
|
961
|
+
return dataset_updated
|
|
979
962
|
|
|
980
963
|
def remove_dataset_version(
|
|
981
964
|
self, dataset: DatasetRecord, version: str, drop_rows: Optional[bool] = True
|
|
@@ -207,6 +207,10 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
207
207
|
It also creates project if not found and create flag is set to True.
|
|
208
208
|
"""
|
|
209
209
|
|
|
210
|
+
@abstractmethod
|
|
211
|
+
def get_project_by_id(self, project_id: int, conn=None) -> Project:
|
|
212
|
+
"""Gets a single project by id"""
|
|
213
|
+
|
|
210
214
|
@abstractmethod
|
|
211
215
|
def list_projects(self, namespace_id: Optional[int], conn=None) -> list[Project]:
|
|
212
216
|
"""Gets list of projects in some namespace or in general (in all namespaces)"""
|
|
@@ -851,6 +855,24 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
851
855
|
)
|
|
852
856
|
return self.project_class.parse(*rows[0])
|
|
853
857
|
|
|
858
|
+
def get_project_by_id(self, project_id: int, conn=None) -> Project:
|
|
859
|
+
"""Gets a single project by id"""
|
|
860
|
+
n = self._namespaces
|
|
861
|
+
p = self._projects
|
|
862
|
+
|
|
863
|
+
query = self._projects_select(
|
|
864
|
+
*(getattr(n.c, f) for f in self._namespaces_fields),
|
|
865
|
+
*(getattr(p.c, f) for f in self._projects_fields),
|
|
866
|
+
)
|
|
867
|
+
query = query.select_from(n.join(p, n.c.id == p.c.namespace_id)).where(
|
|
868
|
+
p.c.id == project_id
|
|
869
|
+
)
|
|
870
|
+
|
|
871
|
+
rows = list(self.db.execute(query, conn=conn))
|
|
872
|
+
if not rows:
|
|
873
|
+
raise ProjectNotFoundError(f"Project with id {project_id} not found.")
|
|
874
|
+
return self.project_class.parse(*rows[0])
|
|
875
|
+
|
|
854
876
|
def list_projects(self, namespace_id: Optional[int], conn=None) -> list[Project]:
|
|
855
877
|
"""
|
|
856
878
|
Gets a list of projects inside some namespace, or in all namespaces
|
|
@@ -1008,6 +1030,11 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1008
1030
|
else:
|
|
1009
1031
|
values[field] = json.dumps(value)
|
|
1010
1032
|
dataset_values[field] = DatasetRecord.parse_schema(value)
|
|
1033
|
+
elif field == "project_id":
|
|
1034
|
+
if not value:
|
|
1035
|
+
raise ValueError("Cannot set empty project_id for dataset")
|
|
1036
|
+
dataset_values["project"] = self.get_project_by_id(value)
|
|
1037
|
+
values[field] = value
|
|
1011
1038
|
else:
|
|
1012
1039
|
values[field] = value
|
|
1013
1040
|
dataset_values[field] = value
|
|
@@ -1017,7 +1044,9 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1017
1044
|
|
|
1018
1045
|
d = self._datasets
|
|
1019
1046
|
self.db.execute(
|
|
1020
|
-
self._datasets_update()
|
|
1047
|
+
self._datasets_update()
|
|
1048
|
+
.where(d.c.name == dataset.name, d.c.project_id == dataset.project.id)
|
|
1049
|
+
.values(values),
|
|
1021
1050
|
conn=conn,
|
|
1022
1051
|
) # type: ignore [attr-defined]
|
|
1023
1052
|
|
|
@@ -356,24 +356,23 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
356
356
|
self, dataset: DatasetRecord, version: str
|
|
357
357
|
) -> list[StorageURI]: ...
|
|
358
358
|
|
|
359
|
-
def
|
|
360
|
-
self,
|
|
361
|
-
dataset: DatasetRecord,
|
|
362
|
-
old_name: str,
|
|
363
|
-
new_name: str,
|
|
364
|
-
old_version: str,
|
|
365
|
-
new_version: str,
|
|
359
|
+
def rename_dataset_tables(
|
|
360
|
+
self, dataset: DatasetRecord, dataset_updated: DatasetRecord
|
|
366
361
|
) -> None:
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
)
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
362
|
+
"""
|
|
363
|
+
Renames all dataset version tables when parts of the dataset that
|
|
364
|
+
are used in constructing table name are updated.
|
|
365
|
+
If nothing important is changed, nothing will be renamed (no DB calls
|
|
366
|
+
will be made at all).
|
|
367
|
+
"""
|
|
368
|
+
for version in [v.version for v in dataset_updated.versions]:
|
|
369
|
+
if not dataset.has_version(version):
|
|
370
|
+
continue
|
|
371
|
+
src = self.dataset_table_name(dataset, version)
|
|
372
|
+
dest = self.dataset_table_name(dataset_updated, version)
|
|
373
|
+
if src == dest:
|
|
374
|
+
continue
|
|
375
|
+
self.db.rename_table(src, dest)
|
|
377
376
|
|
|
378
377
|
def dataset_rows_count(self, dataset: DatasetRecord, version=None) -> int:
|
|
379
378
|
"""Returns total number of rows in a dataset"""
|
|
@@ -126,7 +126,16 @@ class ArrowGenerator(Generator):
|
|
|
126
126
|
if isinstance(kwargs.get("format"), CsvFileFormat):
|
|
127
127
|
kwargs["format"] = "csv"
|
|
128
128
|
arrow_file = ArrowRow(file=file, index=index, kwargs=kwargs)
|
|
129
|
+
|
|
130
|
+
if self.output_schema and hasattr(vals[0], "source"):
|
|
131
|
+
# if we are reading parquet file written by datachain it might have
|
|
132
|
+
# source inside of it already, so we should not duplicate it, instead
|
|
133
|
+
# we are re-creating it of the self.source flag
|
|
134
|
+
vals[0].source = arrow_file # type: ignore[attr-defined]
|
|
135
|
+
|
|
136
|
+
return vals
|
|
129
137
|
return [arrow_file, *vals]
|
|
138
|
+
|
|
130
139
|
return vals
|
|
131
140
|
|
|
132
141
|
def _process_non_datachain_record(
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from .csv import read_csv
|
|
2
2
|
from .database import read_database
|
|
3
3
|
from .datachain import C, Column, DataChain
|
|
4
|
-
from .datasets import datasets, delete_dataset, read_dataset
|
|
4
|
+
from .datasets import datasets, delete_dataset, move_dataset, read_dataset
|
|
5
5
|
from .hf import read_hf
|
|
6
6
|
from .json import read_json
|
|
7
7
|
from .listings import listings
|
|
@@ -22,6 +22,7 @@ __all__ = [
|
|
|
22
22
|
"datasets",
|
|
23
23
|
"delete_dataset",
|
|
24
24
|
"listings",
|
|
25
|
+
"move_dataset",
|
|
25
26
|
"read_csv",
|
|
26
27
|
"read_database",
|
|
27
28
|
"read_dataset",
|
|
@@ -361,3 +361,58 @@ def delete_dataset(
|
|
|
361
361
|
else:
|
|
362
362
|
version = None
|
|
363
363
|
catalog.remove_dataset(name, ds_project, version=version, force=force)
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def move_dataset(
|
|
367
|
+
src: str,
|
|
368
|
+
dest: str,
|
|
369
|
+
session: Optional[Session] = None,
|
|
370
|
+
in_memory: bool = False,
|
|
371
|
+
) -> None:
|
|
372
|
+
"""Moves an entire dataset between namespaces and projects.
|
|
373
|
+
|
|
374
|
+
Args:
|
|
375
|
+
src: The source dataset name. This can be a fully qualified name that includes
|
|
376
|
+
the namespace and project, or a regular name. If a regular name is used,
|
|
377
|
+
default values will be applied. The source dataset will no longer exist
|
|
378
|
+
after the move.
|
|
379
|
+
dst: The destination dataset name. This can also be a fully qualified
|
|
380
|
+
name with a namespace and project, or just a regular name (default values
|
|
381
|
+
will be used in that case). The original dataset will be moved here.
|
|
382
|
+
session: An optional session instance. If not provided, the default session
|
|
383
|
+
will be used.
|
|
384
|
+
in_memory: If True, creates an in-memory session. Defaults to False.
|
|
385
|
+
|
|
386
|
+
Returns:
|
|
387
|
+
None
|
|
388
|
+
|
|
389
|
+
Examples:
|
|
390
|
+
```python
|
|
391
|
+
import datachain as dc
|
|
392
|
+
dc.move_dataset("cats", "new_cats")
|
|
393
|
+
```
|
|
394
|
+
|
|
395
|
+
```python
|
|
396
|
+
import datachain as dc
|
|
397
|
+
dc.move_dataset("dev.animals.cats", "prod.animals.cats")
|
|
398
|
+
```
|
|
399
|
+
"""
|
|
400
|
+
session = Session.get(session, in_memory=in_memory)
|
|
401
|
+
catalog = session.catalog
|
|
402
|
+
|
|
403
|
+
namespace, project, name = catalog.get_full_dataset_name(src)
|
|
404
|
+
dest_namespace, dest_project, dest_name = catalog.get_full_dataset_name(dest)
|
|
405
|
+
|
|
406
|
+
dataset = catalog.get_dataset(
|
|
407
|
+
name, catalog.metastore.get_project(project, namespace)
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
catalog.update_dataset(
|
|
411
|
+
dataset,
|
|
412
|
+
name=dest_name,
|
|
413
|
+
project_id=catalog.metastore.get_project(
|
|
414
|
+
dest_project,
|
|
415
|
+
dest_namespace,
|
|
416
|
+
create=catalog.metastore.project_allowed_to_create,
|
|
417
|
+
).id,
|
|
418
|
+
)
|
|
@@ -11,7 +11,7 @@ try:
|
|
|
11
11
|
Image,
|
|
12
12
|
IterableDataset,
|
|
13
13
|
IterableDatasetDict,
|
|
14
|
-
|
|
14
|
+
List,
|
|
15
15
|
Value,
|
|
16
16
|
load_dataset,
|
|
17
17
|
)
|
|
@@ -59,7 +59,6 @@ class HFImage(DataModel):
|
|
|
59
59
|
|
|
60
60
|
|
|
61
61
|
class HFAudio(DataModel):
|
|
62
|
-
path: str
|
|
63
62
|
array: list[float]
|
|
64
63
|
sampling_rate: int
|
|
65
64
|
|
|
@@ -116,26 +115,24 @@ def stream_splits(ds: Union[str, HFDatasetType], *args, **kwargs):
|
|
|
116
115
|
return {"": ds}
|
|
117
116
|
|
|
118
117
|
|
|
119
|
-
def convert_feature(val: Any, feat: Any, anno: Any) -> Any:
|
|
120
|
-
if isinstance(feat, (Value, Array2D, Array3D, Array4D, Array5D)):
|
|
118
|
+
def convert_feature(val: Any, feat: Any, anno: Any) -> Any:
|
|
119
|
+
if isinstance(feat, (Value, Array2D, Array3D, Array4D, Array5D, List)):
|
|
121
120
|
return val
|
|
122
121
|
if isinstance(feat, ClassLabel):
|
|
123
122
|
return HFClassLabel(string=feat.names[val], integer=val)
|
|
124
|
-
if isinstance(feat,
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
return anno(**sdict)
|
|
132
|
-
return val
|
|
123
|
+
if isinstance(feat, dict):
|
|
124
|
+
sdict = {}
|
|
125
|
+
for sname in val:
|
|
126
|
+
sfeat = feat[sname]
|
|
127
|
+
sanno = anno.model_fields[sname].annotation
|
|
128
|
+
sdict[sname] = [convert_feature(v, sfeat, sanno) for v in val[sname]]
|
|
129
|
+
return anno(**sdict)
|
|
133
130
|
if isinstance(feat, Image):
|
|
134
131
|
if isinstance(val, dict):
|
|
135
132
|
return HFImage(img=val["bytes"])
|
|
136
133
|
return HFImage(img=image_to_bytes(val))
|
|
137
134
|
if isinstance(feat, Audio):
|
|
138
|
-
return HFAudio(
|
|
135
|
+
return HFAudio(array=val["array"], sampling_rate=val["sampling_rate"])
|
|
139
136
|
|
|
140
137
|
|
|
141
138
|
def get_output_schema(features: Features) -> dict[str, DataType]:
|
|
@@ -151,13 +148,13 @@ def _feature_to_chain_type(name: str, val: Any) -> DataType: # noqa: PLR0911
|
|
|
151
148
|
return arrow_type_mapper(val.pa_type)
|
|
152
149
|
if isinstance(val, ClassLabel):
|
|
153
150
|
return HFClassLabel
|
|
154
|
-
if isinstance(val,
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
151
|
+
if isinstance(val, dict):
|
|
152
|
+
sequence_dict = {}
|
|
153
|
+
for sname, sval in val.items():
|
|
154
|
+
dtype = _feature_to_chain_type(sname, sval)
|
|
155
|
+
sequence_dict[sname] = dtype # type: ignore[valid-type]
|
|
156
|
+
return dict_to_data_model(name, sequence_dict) # type: ignore[arg-type]
|
|
157
|
+
if isinstance(val, List):
|
|
161
158
|
return list[_feature_to_chain_type(name, val.feature)] # type: ignore[arg-type,misc,return-value]
|
|
162
159
|
if isinstance(val, Array2D):
|
|
163
160
|
dtype = arrow_type_mapper(string_to_arrow(val.dtype))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.25.1
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -70,7 +70,8 @@ Provides-Extra: vector
|
|
|
70
70
|
Requires-Dist: usearch; extra == "vector"
|
|
71
71
|
Provides-Extra: hf
|
|
72
72
|
Requires-Dist: numba>=0.60.0; extra == "hf"
|
|
73
|
-
Requires-Dist: datasets[
|
|
73
|
+
Requires-Dist: datasets[vision]>=4.0.0; extra == "hf"
|
|
74
|
+
Requires-Dist: datasets[audio]>=4.0.0; (sys_platform == "linux" or sys_platform == "darwin") and extra == "hf"
|
|
74
75
|
Requires-Dist: fsspec>=2024.12.0; extra == "hf"
|
|
75
76
|
Provides-Extra: video
|
|
76
77
|
Requires-Dist: ffmpeg-python; extra == "video"
|
|
@@ -63,9 +63,12 @@ open_clip_torch
|
|
|
63
63
|
|
|
64
64
|
[hf]
|
|
65
65
|
numba>=0.60.0
|
|
66
|
-
datasets[
|
|
66
|
+
datasets[vision]>=4.0.0
|
|
67
67
|
fsspec>=2024.12.0
|
|
68
68
|
|
|
69
|
+
[hf:sys_platform == "linux" or sys_platform == "darwin"]
|
|
70
|
+
datasets[audio]>=4.0.0
|
|
71
|
+
|
|
69
72
|
[remote]
|
|
70
73
|
lz4
|
|
71
74
|
requests>=2.22.0
|
|
@@ -576,6 +576,12 @@ def mock_allowed_to_create_namespace(allow_create_namespace):
|
|
|
576
576
|
yield
|
|
577
577
|
|
|
578
578
|
|
|
579
|
+
@pytest.fixture
|
|
580
|
+
def mock_is_local_dataset():
|
|
581
|
+
with patch.object(AbstractMetastore, "is_local_dataset", return_value=True):
|
|
582
|
+
yield
|
|
583
|
+
|
|
584
|
+
|
|
579
585
|
@pytest.fixture
|
|
580
586
|
def project(test_session):
|
|
581
587
|
return dc.create_project("dev", "animals", "Animals project")
|
|
@@ -11,12 +11,13 @@ from datachain.dataset import DatasetDependencyType, DatasetStatus
|
|
|
11
11
|
from datachain.error import (
|
|
12
12
|
DatasetInvalidVersionError,
|
|
13
13
|
DatasetNotFoundError,
|
|
14
|
+
ProjectNotFoundError,
|
|
14
15
|
)
|
|
15
16
|
from datachain.lib.file import File
|
|
16
17
|
from datachain.lib.listing import parse_listing_uri
|
|
17
18
|
from datachain.query.dataset import DatasetQuery
|
|
18
19
|
from datachain.sql.types import Float32, Int, Int64
|
|
19
|
-
from tests.utils import assert_row_names, dataset_dependency_asdict
|
|
20
|
+
from tests.utils import assert_row_names, dataset_dependency_asdict, table_row_count
|
|
20
21
|
|
|
21
22
|
FILE_SCHEMA = {
|
|
22
23
|
f"file__{name}": _type if _type != Int else Int64
|
|
@@ -169,14 +170,6 @@ def test_get_dataset(cloud_test_catalog, dogs_dataset):
|
|
|
169
170
|
catalog.get_dataset("wrong name", dogs_dataset.project)
|
|
170
171
|
|
|
171
172
|
|
|
172
|
-
# Returns None if the table does not exist
|
|
173
|
-
def get_table_row_count(db, table_name):
|
|
174
|
-
if not db.has_table(table_name):
|
|
175
|
-
return None
|
|
176
|
-
query = sa.select(sa.func.count()).select_from(sa.table(table_name))
|
|
177
|
-
return next(db.execute(query), (None,))[0]
|
|
178
|
-
|
|
179
|
-
|
|
180
173
|
def test_create_dataset_from_sources(listed_bucket, cloud_test_catalog, project):
|
|
181
174
|
dataset_name = uuid.uuid4().hex
|
|
182
175
|
src_uri = cloud_test_catalog.src_uri
|
|
@@ -327,7 +320,7 @@ def test_remove_dataset(cloud_test_catalog, dogs_dataset):
|
|
|
327
320
|
catalog.get_dataset(dogs_dataset.name, dogs_dataset.project)
|
|
328
321
|
|
|
329
322
|
dataset_table_name = catalog.warehouse.dataset_table_name(dogs_dataset, "1.0.0")
|
|
330
|
-
assert
|
|
323
|
+
assert table_row_count(catalog.warehouse.db, dataset_table_name) is None
|
|
331
324
|
|
|
332
325
|
assert (
|
|
333
326
|
catalog.metastore.get_direct_dataset_dependencies(dogs_dataset, "1.0.0") == []
|
|
@@ -391,14 +384,108 @@ def test_edit_dataset(cloud_test_catalog, dogs_dataset):
|
|
|
391
384
|
old_dataset_table_name = catalog.warehouse.dataset_table_name(dogs_dataset, "1.0.0")
|
|
392
385
|
new_dataset_table_name = catalog.warehouse.dataset_table_name(dataset, "1.0.0")
|
|
393
386
|
|
|
394
|
-
assert
|
|
395
|
-
expected_table_row_count =
|
|
387
|
+
assert table_row_count(catalog.warehouse.db, old_dataset_table_name) is None
|
|
388
|
+
expected_table_row_count = table_row_count(
|
|
396
389
|
catalog.warehouse.db, new_dataset_table_name
|
|
397
390
|
)
|
|
398
391
|
assert expected_table_row_count
|
|
399
392
|
assert dataset.get_version("1.0.0").num_objects == expected_table_row_count
|
|
400
393
|
|
|
401
394
|
|
|
395
|
+
@pytest.mark.parametrize(
|
|
396
|
+
"old_name,new_name",
|
|
397
|
+
[
|
|
398
|
+
("old.old.numbers", "new.new.numbers"),
|
|
399
|
+
("old.old.numbers", "new.new.numbers_new"),
|
|
400
|
+
("old.old.numbers", "old.new.numbers"),
|
|
401
|
+
("old.old.numbers", "old.old.numbers"),
|
|
402
|
+
("numbers", "numbers2"),
|
|
403
|
+
("numbers", "numbers"),
|
|
404
|
+
],
|
|
405
|
+
)
|
|
406
|
+
def test_move_dataset(
|
|
407
|
+
test_session,
|
|
408
|
+
old_name,
|
|
409
|
+
new_name,
|
|
410
|
+
mock_is_local_dataset,
|
|
411
|
+
):
|
|
412
|
+
catalog = test_session.catalog
|
|
413
|
+
|
|
414
|
+
# create 2 versions of dataset in old project
|
|
415
|
+
for _ in range(2):
|
|
416
|
+
(dc.read_values(num=[1, 2, 3], session=test_session).save(old_name))
|
|
417
|
+
|
|
418
|
+
dataset = dc.read_dataset(old_name).dataset
|
|
419
|
+
|
|
420
|
+
dc.move_dataset(old_name, new_name, session=test_session)
|
|
421
|
+
|
|
422
|
+
if old_name != new_name:
|
|
423
|
+
# check that old dataset doesn't exist any more
|
|
424
|
+
with pytest.raises(DatasetNotFoundError):
|
|
425
|
+
dc.read_dataset(old_name).save("wrong")
|
|
426
|
+
|
|
427
|
+
dataset_updated = dc.read_dataset(new_name).dataset
|
|
428
|
+
|
|
429
|
+
# check if dataset tables are renamed correctly as well
|
|
430
|
+
for version in [v.version for v in dataset.versions]:
|
|
431
|
+
old_table_name = catalog.warehouse.dataset_table_name(dataset, version)
|
|
432
|
+
new_table_name = catalog.warehouse.dataset_table_name(dataset_updated, version)
|
|
433
|
+
if old_name == new_name:
|
|
434
|
+
assert old_table_name == new_table_name
|
|
435
|
+
else:
|
|
436
|
+
assert table_row_count(catalog.warehouse.db, old_table_name) is None
|
|
437
|
+
|
|
438
|
+
assert table_row_count(catalog.warehouse.db, new_table_name) == 3
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def test_move_dataset_then_save_into(test_session):
|
|
442
|
+
old_name = "old.old.numbers"
|
|
443
|
+
new_name = "new.new.numbers"
|
|
444
|
+
|
|
445
|
+
# create 2 versions of dataset in old project
|
|
446
|
+
for _ in range(2):
|
|
447
|
+
dc.read_values(num=[1, 2, 3], session=test_session).save(old_name)
|
|
448
|
+
|
|
449
|
+
dc.move_dataset(old_name, new_name, session=test_session)
|
|
450
|
+
dc.read_values(num=[1, 2, 3], session=test_session).save(new_name)
|
|
451
|
+
|
|
452
|
+
ds = dc.datasets(column="dataset", session=test_session)
|
|
453
|
+
datasets = [
|
|
454
|
+
d
|
|
455
|
+
for d in ds.to_values("dataset")
|
|
456
|
+
if d.name == "numbers" and d.project == "new" and d.namespace == "new"
|
|
457
|
+
]
|
|
458
|
+
|
|
459
|
+
assert len(datasets) == 3
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
def test_move_dataset_wrong_old_project(test_session, project):
|
|
463
|
+
dc.read_values(num=[1, 2, 3], session=test_session).save("old.old.numbers")
|
|
464
|
+
|
|
465
|
+
with pytest.raises(ProjectNotFoundError):
|
|
466
|
+
dc.move_dataset("wrong.wrong.numbers", "new.new.numbers", session=test_session)
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def test_move_dataset_error_in_session_moved_dataset_removed(catalog):
|
|
470
|
+
from datachain.query.session import Session
|
|
471
|
+
|
|
472
|
+
old_name = "old.old.numbers"
|
|
473
|
+
new_name = "new.new.numbers"
|
|
474
|
+
|
|
475
|
+
with pytest.raises(DatasetNotFoundError):
|
|
476
|
+
with Session("new", catalog=catalog) as test_session:
|
|
477
|
+
dc.read_values(num=[1, 2, 3]).save("aa")
|
|
478
|
+
dc.read_values(num=[1, 2, 3], session=test_session).save(old_name)
|
|
479
|
+
dc.move_dataset(old_name, new_name, session=test_session)
|
|
480
|
+
|
|
481
|
+
# throws DatasetNotFoundError
|
|
482
|
+
dc.read_dataset("wrong", session=test_session)
|
|
483
|
+
|
|
484
|
+
ds = dc.datasets(column="dataset")
|
|
485
|
+
datasets = [d for d in ds.to_values("dataset")] # noqa: C416
|
|
486
|
+
assert len(datasets) == 0
|
|
487
|
+
|
|
488
|
+
|
|
402
489
|
def test_edit_dataset_same_name(cloud_test_catalog, dogs_dataset):
|
|
403
490
|
dataset_new_name = dogs_dataset.name
|
|
404
491
|
catalog = cloud_test_catalog.catalog
|
|
@@ -414,12 +501,12 @@ def test_edit_dataset_same_name(cloud_test_catalog, dogs_dataset):
|
|
|
414
501
|
old_dataset_table_name = catalog.warehouse.dataset_table_name(dogs_dataset, "1.0.0")
|
|
415
502
|
new_dataset_table_name = catalog.warehouse.dataset_table_name(dataset, "1.0.0")
|
|
416
503
|
|
|
417
|
-
expected_table_row_count =
|
|
504
|
+
expected_table_row_count = table_row_count(
|
|
418
505
|
catalog.warehouse.db, old_dataset_table_name
|
|
419
506
|
)
|
|
420
507
|
assert expected_table_row_count
|
|
421
508
|
assert dataset.get_version("1.0.0").num_objects == expected_table_row_count
|
|
422
|
-
assert expected_table_row_count ==
|
|
509
|
+
assert expected_table_row_count == table_row_count(
|
|
423
510
|
catalog.warehouse.db, new_dataset_table_name
|
|
424
511
|
)
|
|
425
512
|
|