datachain 0.24.1__tar.gz → 0.24.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.24.1 → datachain-0.24.3}/.pre-commit-config.yaml +2 -2
- {datachain-0.24.1 → datachain-0.24.3}/PKG-INFO +1 -1
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/catalog/catalog.py +11 -2
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/data_storage/metastore.py +3 -1
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/data_storage/sqlite.py +9 -6
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/data_storage/warehouse.py +6 -4
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/listing.py +10 -3
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/query/dataset.py +11 -10
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain.egg-info/PKG-INFO +1 -1
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_datachain.py +85 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_delta.py +7 -1
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/lib/test_datachain.py +24 -0
- {datachain-0.24.1 → datachain-0.24.3}/.cruft.json +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/.gitattributes +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/.github/codecov.yaml +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/.github/dependabot.yml +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/.github/workflows/release.yml +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/.github/workflows/tests.yml +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/.gitignore +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/LICENSE +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/README.rst +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/assets/datachain.svg +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/commands/auth/login.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/commands/auth/logout.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/commands/auth/team.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/commands/auth/token.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/commands/index.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/commands/job/cancel.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/commands/job/clusters.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/commands/job/logs.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/commands/job/ls.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/commands/job/run.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/contributing.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/examples.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/guide/db_migrations.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/guide/delta.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/guide/env.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/guide/index.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/guide/namespaces.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/guide/processing.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/guide/remotes.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/guide/retry.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/index.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/overrides/main.html +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/quick-start.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/references/data-types/file.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/references/data-types/index.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/references/data-types/pose.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/references/data-types/segment.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/references/datachain.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/references/func.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/references/index.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/references/toolkit.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/references/torch.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/references/udf.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/docs/tutorials.md +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/examples/multimodal/wds.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/mkdocs.yml +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/noxfile.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/pyproject.toml +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/setup.cfg +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/__main__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/asyn.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/cache.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/cli/utils.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/client/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/client/azure.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/client/gcs.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/client/hf.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/client/local.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/client/s3.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/config.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/dataset.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/delta.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/error.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/fs/reference.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/fs/utils.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/func/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/func/array.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/func/base.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/func/conditional.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/func/func.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/func/numeric.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/func/path.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/func/random.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/func/string.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/func/window.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/job.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/clip.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/dc/datachain.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/file.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/hf.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/image.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/listing.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/namespaces.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/projects.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/settings.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/tar.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/text.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/udf.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/utils.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/video.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/model/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/model/bbox.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/model/pose.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/model/segment.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/model/utils.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/namespace.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/node.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/progress.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/project.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/py.typed +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/query/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/query/batch.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/query/metrics.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/query/params.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/query/queue.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/query/schema.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/query/session.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/query/udf.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/query/utils.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/remote/studio.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/script_meta.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/semver.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/sql/types.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/sql/utils.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/studio.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/telemetry.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain/utils.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain.egg-info/SOURCES.txt +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/conftest.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/data.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/examples/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/examples/test_examples.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/examples/wds_data.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/data/lena.jpg +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/functions/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/functions/test_array.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/functions/test_path.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/functions/test_random.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/functions/test_string.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/model/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_batching.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_catalog.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_client.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_data_storage.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_datasets.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_file.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_hf.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_image.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_listing.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_ls.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_metastore.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_metrics.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_pull.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_pytorch.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_query.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_read_database.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_read_dataset_remote.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_retry.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_session.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_toolkit.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_video.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/func/test_warehouse.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/scripts/feature_class.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/test_atomicity.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/test_cli_e2e.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/test_cli_studio.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/test_import_time.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/test_query_e2e.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/test_telemetry.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/lib/test_namespace.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/model/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_asyn.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_cache.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_catalog.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_client.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_config.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_dataset.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_func.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_listing.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_metastore.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_query.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_query_params.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_semver.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_serializer.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_session.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_utils.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.24.1 → datachain-0.24.3}/tests/utils.py +0 -0
|
@@ -24,7 +24,7 @@ repos:
|
|
|
24
24
|
- id: trailing-whitespace
|
|
25
25
|
exclude: '^LICENSES/'
|
|
26
26
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
27
|
-
rev: 'v0.12.
|
|
27
|
+
rev: 'v0.12.1'
|
|
28
28
|
hooks:
|
|
29
29
|
- id: ruff
|
|
30
30
|
args: [--fix, --exit-non-zero-on-fix]
|
|
@@ -35,7 +35,7 @@ repos:
|
|
|
35
35
|
- id: codespell
|
|
36
36
|
additional_dependencies: ["tomli"]
|
|
37
37
|
- repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
|
|
38
|
-
rev: v2.
|
|
38
|
+
rev: v2.15.0
|
|
39
39
|
hooks:
|
|
40
40
|
- id: pretty-format-toml
|
|
41
41
|
args: [--autofix, --no-sort]
|
|
@@ -1098,9 +1098,18 @@ class Catalog:
|
|
|
1098
1098
|
) -> DatasetRecord:
|
|
1099
1099
|
from datachain.lib.listing import is_listing_dataset
|
|
1100
1100
|
|
|
1101
|
+
project = project or self.metastore.default_project
|
|
1102
|
+
|
|
1101
1103
|
if is_listing_dataset(name):
|
|
1102
1104
|
project = self.metastore.listing_project
|
|
1103
|
-
|
|
1105
|
+
|
|
1106
|
+
try:
|
|
1107
|
+
return self.metastore.get_dataset(name, project.id if project else None)
|
|
1108
|
+
except DatasetNotFoundError:
|
|
1109
|
+
raise DatasetNotFoundError(
|
|
1110
|
+
f"Dataset {name} not found in namespace {project.namespace.name}"
|
|
1111
|
+
f" and project {project.name}"
|
|
1112
|
+
) from None
|
|
1104
1113
|
|
|
1105
1114
|
def get_dataset_with_remote_fallback(
|
|
1106
1115
|
self,
|
|
@@ -1124,7 +1133,7 @@ class Catalog:
|
|
|
1124
1133
|
raise DatasetNotFoundError(
|
|
1125
1134
|
f"Dataset {name}"
|
|
1126
1135
|
+ (f" version {version} " if version else " ")
|
|
1127
|
-
+ "not found"
|
|
1136
|
+
+ f"not found in namespace {namespace_name} and project {project_name}"
|
|
1128
1137
|
)
|
|
1129
1138
|
|
|
1130
1139
|
if pull_dataset:
|
|
@@ -1194,14 +1194,16 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1194
1194
|
Gets a single dataset in project by dataset name.
|
|
1195
1195
|
"""
|
|
1196
1196
|
project_id = project_id or self.default_project.id
|
|
1197
|
+
|
|
1197
1198
|
d = self._datasets
|
|
1198
1199
|
query = self._base_dataset_query()
|
|
1199
1200
|
query = query.where(d.c.name == name, d.c.project_id == project_id) # type: ignore [attr-defined]
|
|
1200
1201
|
ds = self._parse_dataset(self.db.execute(query, conn=conn))
|
|
1201
1202
|
if not ds:
|
|
1202
1203
|
raise DatasetNotFoundError(
|
|
1203
|
-
f"Dataset {name} not found in project {project_id}"
|
|
1204
|
+
f"Dataset {name} not found in project with id {project_id}"
|
|
1204
1205
|
)
|
|
1206
|
+
|
|
1205
1207
|
return ds
|
|
1206
1208
|
|
|
1207
1209
|
def remove_dataset_version(
|
|
@@ -774,7 +774,15 @@ class SQLiteWarehouse(AbstractWarehouse):
|
|
|
774
774
|
query: Select,
|
|
775
775
|
progress_cb: Optional[Callable[[int], None]] = None,
|
|
776
776
|
) -> None:
|
|
777
|
-
|
|
777
|
+
col_id = (
|
|
778
|
+
query.selected_columns.sys__id
|
|
779
|
+
if "sys__id" in query.selected_columns
|
|
780
|
+
else None
|
|
781
|
+
)
|
|
782
|
+
|
|
783
|
+
# If there is no sys__id column, we cannot copy the table in batches,
|
|
784
|
+
# and we need to copy all rows at once. Same if there is a group by clause.
|
|
785
|
+
if col_id is None or len(query._group_by_clause) > 0:
|
|
778
786
|
select_q = query.with_only_columns(
|
|
779
787
|
*[c for c in query.selected_columns if c.name != "sys__id"]
|
|
780
788
|
)
|
|
@@ -782,12 +790,7 @@ class SQLiteWarehouse(AbstractWarehouse):
|
|
|
782
790
|
self.db.execute(q)
|
|
783
791
|
return
|
|
784
792
|
|
|
785
|
-
if "sys__id" in query.selected_columns:
|
|
786
|
-
col_id = query.selected_columns.sys__id
|
|
787
|
-
else:
|
|
788
|
-
col_id = sqlalchemy.column("sys__id")
|
|
789
793
|
select_ids = query.with_only_columns(col_id)
|
|
790
|
-
|
|
791
794
|
ids = self.db.execute(select_ids).fetchall()
|
|
792
795
|
|
|
793
796
|
select_q = (
|
|
@@ -218,7 +218,7 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
218
218
|
limit = query._limit
|
|
219
219
|
paginated_query = query.limit(page_size)
|
|
220
220
|
|
|
221
|
-
offset = 0
|
|
221
|
+
offset = query._offset or 0
|
|
222
222
|
num_yielded = 0
|
|
223
223
|
|
|
224
224
|
# Ensure we're using a thread-local connection
|
|
@@ -234,13 +234,13 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
234
234
|
# Cursor results are not thread-safe, so we convert them to a list
|
|
235
235
|
results = list(wh.dataset_rows_select(paginated_query.offset(offset)))
|
|
236
236
|
|
|
237
|
-
processed =
|
|
237
|
+
processed = 0
|
|
238
238
|
for row in results:
|
|
239
|
-
processed
|
|
239
|
+
processed += 1
|
|
240
240
|
yield row
|
|
241
241
|
num_yielded += 1
|
|
242
242
|
|
|
243
|
-
if
|
|
243
|
+
if processed < page_size:
|
|
244
244
|
break # no more results
|
|
245
245
|
offset += page_size
|
|
246
246
|
|
|
@@ -343,6 +343,8 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
343
343
|
if (id_col := get_query_id_column(query)) is None:
|
|
344
344
|
raise RuntimeError("sys__id column not found in query")
|
|
345
345
|
|
|
346
|
+
query = query._clone().offset(None).limit(None).order_by(None)
|
|
347
|
+
|
|
346
348
|
if is_batched:
|
|
347
349
|
for batch in ids:
|
|
348
350
|
yield list(self.dataset_rows_select(query.where(id_col.in_(batch))))
|
|
@@ -65,10 +65,17 @@ class Listing:
|
|
|
65
65
|
|
|
66
66
|
@cached_property
|
|
67
67
|
def dataset(self) -> "DatasetRecord":
|
|
68
|
+
from datachain.error import DatasetNotFoundError
|
|
69
|
+
|
|
68
70
|
assert self.dataset_name
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
71
|
+
project = self.metastore.listing_project
|
|
72
|
+
try:
|
|
73
|
+
return self.metastore.get_dataset(self.dataset_name, project.id)
|
|
74
|
+
except DatasetNotFoundError:
|
|
75
|
+
raise DatasetNotFoundError(
|
|
76
|
+
f"Dataset {self.dataset_name} not found in namespace"
|
|
77
|
+
f" {project.namespace.name} and project {project.name}"
|
|
78
|
+
) from None
|
|
72
79
|
|
|
73
80
|
@cached_property
|
|
74
81
|
def dataset_rows(self):
|
|
@@ -11,6 +11,7 @@ from collections.abc import Generator, Iterable, Iterator, Sequence
|
|
|
11
11
|
from copy import copy
|
|
12
12
|
from functools import wraps
|
|
13
13
|
from secrets import token_hex
|
|
14
|
+
from types import GeneratorType
|
|
14
15
|
from typing import (
|
|
15
16
|
TYPE_CHECKING,
|
|
16
17
|
Any,
|
|
@@ -557,8 +558,8 @@ class UDFStep(Step, ABC):
|
|
|
557
558
|
"""
|
|
558
559
|
assert self.partition_by is not None
|
|
559
560
|
|
|
560
|
-
if isinstance(self.partition_by,
|
|
561
|
-
list_partition_by = self.partition_by
|
|
561
|
+
if isinstance(self.partition_by, (list, tuple, GeneratorType)):
|
|
562
|
+
list_partition_by = list(self.partition_by)
|
|
562
563
|
else:
|
|
563
564
|
list_partition_by = [self.partition_by]
|
|
564
565
|
|
|
@@ -575,7 +576,10 @@ class UDFStep(Step, ABC):
|
|
|
575
576
|
f.dense_rank().over(order_by=partition_by).label(PARTITION_COLUMN_ID),
|
|
576
577
|
]
|
|
577
578
|
self.catalog.warehouse.db.execute(
|
|
578
|
-
tbl.insert().from_select(
|
|
579
|
+
tbl.insert().from_select(
|
|
580
|
+
cols,
|
|
581
|
+
query.offset(None).limit(None).with_only_columns(*cols),
|
|
582
|
+
)
|
|
579
583
|
)
|
|
580
584
|
|
|
581
585
|
return tbl
|
|
@@ -601,13 +605,10 @@ class UDFStep(Step, ABC):
|
|
|
601
605
|
if self.partition_by is not None:
|
|
602
606
|
partition_tbl = self.create_partitions_table(query)
|
|
603
607
|
temp_tables.append(partition_tbl.name)
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
.outerjoin(partition_tbl, partition_tbl.c.sys__id == subq.c.sys__id)
|
|
609
|
-
.add_columns(*partition_columns())
|
|
610
|
-
)
|
|
608
|
+
query = query.outerjoin(
|
|
609
|
+
partition_tbl,
|
|
610
|
+
partition_tbl.c.sys__id == query.selected_columns.sys__id,
|
|
611
|
+
).add_columns(*partition_columns())
|
|
611
612
|
|
|
612
613
|
query, tables = self.process_input_query(query)
|
|
613
614
|
temp_tables.extend(t.name for t in tables)
|
|
@@ -236,6 +236,22 @@ def test_read_storage_dependencies(cloud_test_catalog, cloud_type):
|
|
|
236
236
|
assert dependencies[0].name == dep_name
|
|
237
237
|
|
|
238
238
|
|
|
239
|
+
def test_persist_after_mutate(test_session):
|
|
240
|
+
chain = (
|
|
241
|
+
dc.read_values(fib=[1, 1, 2, 3, 5, 8, 13, 21], session=test_session)
|
|
242
|
+
.map(mod3=lambda fib: fib % 3, output=int)
|
|
243
|
+
.group_by(
|
|
244
|
+
cnt=dc.func.count(),
|
|
245
|
+
partition_by="mod3",
|
|
246
|
+
)
|
|
247
|
+
.mutate(x=1)
|
|
248
|
+
.persist()
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
assert chain.count() == 3
|
|
252
|
+
assert set(chain.to_values("mod3")) == {0, 1, 2}
|
|
253
|
+
|
|
254
|
+
|
|
239
255
|
def test_persist_not_affects_dependencies(tmp_dir, test_session):
|
|
240
256
|
for i in range(4):
|
|
241
257
|
(tmp_dir / f"file{i}.txt").write_text(f"file{i}")
|
|
@@ -2324,3 +2340,72 @@ def test_agg(catalog_tmpfile, parallel):
|
|
|
2324
2340
|
],
|
|
2325
2341
|
"parent",
|
|
2326
2342
|
)
|
|
2343
|
+
|
|
2344
|
+
|
|
2345
|
+
@pytest.mark.parametrize("parallel", [1, 2])
|
|
2346
|
+
@pytest.mark.parametrize(
|
|
2347
|
+
"offset,limit,files",
|
|
2348
|
+
[
|
|
2349
|
+
(None, 1000, [f"file{i:02d}" for i in range(100)]),
|
|
2350
|
+
(None, 3, ["file00", "file01", "file02"]),
|
|
2351
|
+
(0, 3, ["file00", "file01", "file02"]),
|
|
2352
|
+
(97, 1000, ["file97", "file98", "file99"]),
|
|
2353
|
+
(1, 2, ["file01", "file02"]),
|
|
2354
|
+
(50, 3, ["file50", "file51", "file52"]),
|
|
2355
|
+
(None, 0, []),
|
|
2356
|
+
(50, 0, []),
|
|
2357
|
+
],
|
|
2358
|
+
)
|
|
2359
|
+
def test_agg_offset_limit(catalog_tmpfile, parallel, offset, limit, files):
|
|
2360
|
+
def process(filename: list[str]) -> Iterator[tuple[str, int]]:
|
|
2361
|
+
yield filename[0], len(filename)
|
|
2362
|
+
|
|
2363
|
+
ds = dc.read_values(
|
|
2364
|
+
filename=[f"file{i:02d}" for i in range(100)],
|
|
2365
|
+
value=list(range(100)),
|
|
2366
|
+
session=catalog_tmpfile.session,
|
|
2367
|
+
)
|
|
2368
|
+
if offset is not None:
|
|
2369
|
+
ds = ds.offset(offset)
|
|
2370
|
+
if limit is not None:
|
|
2371
|
+
ds = ds.limit(limit)
|
|
2372
|
+
ds = (
|
|
2373
|
+
ds.settings(parallel=parallel)
|
|
2374
|
+
.agg(
|
|
2375
|
+
process,
|
|
2376
|
+
output={"filename": str, "count": int},
|
|
2377
|
+
partition_by="filename",
|
|
2378
|
+
)
|
|
2379
|
+
.save("my-ds")
|
|
2380
|
+
)
|
|
2381
|
+
|
|
2382
|
+
records = list(ds.to_records())
|
|
2383
|
+
assert len(records) == len(files)
|
|
2384
|
+
assert all(row["count"] == 1 for row in records)
|
|
2385
|
+
assert sorted(row["filename"] for row in records) == sorted(files)
|
|
2386
|
+
|
|
2387
|
+
|
|
2388
|
+
@pytest.mark.parametrize("parallel", [1, 2])
|
|
2389
|
+
@pytest.mark.parametrize("sample", [0, 1, 3, 10, 50, 100])
|
|
2390
|
+
def test_agg_sample(catalog_tmpfile, parallel, sample):
|
|
2391
|
+
def process(filename: list[str]) -> Iterator[tuple[str, int]]:
|
|
2392
|
+
yield filename[0], len(filename)
|
|
2393
|
+
|
|
2394
|
+
ds = (
|
|
2395
|
+
dc.read_values(
|
|
2396
|
+
filename=[f"file{i:02d}" for i in range(100)],
|
|
2397
|
+
session=catalog_tmpfile.session,
|
|
2398
|
+
)
|
|
2399
|
+
.sample(sample)
|
|
2400
|
+
.settings(parallel=parallel)
|
|
2401
|
+
.agg(
|
|
2402
|
+
process,
|
|
2403
|
+
output={"filename": str, "count": int},
|
|
2404
|
+
partition_by="filename",
|
|
2405
|
+
)
|
|
2406
|
+
.save("my-ds")
|
|
2407
|
+
)
|
|
2408
|
+
|
|
2409
|
+
records = list(ds.to_records())
|
|
2410
|
+
assert len(records) == sample
|
|
2411
|
+
assert all(row["count"] == 1 for row in records)
|
|
@@ -248,6 +248,9 @@ def test_delta_update_check_num_calls(test_session, tmp_dir, tmp_path, capsys):
|
|
|
248
248
|
|
|
249
249
|
|
|
250
250
|
def test_delta_update_no_diff(test_session, tmp_dir, tmp_path):
|
|
251
|
+
catalog = test_session.catalog
|
|
252
|
+
default_namespace_name = catalog.metastore.default_namespace_name
|
|
253
|
+
default_project_name = catalog.metastore.default_project_name
|
|
251
254
|
ds_name = "delta_ds"
|
|
252
255
|
path = tmp_dir.as_uri()
|
|
253
256
|
tmp_dir = tmp_dir / "images"
|
|
@@ -296,7 +299,10 @@ def test_delta_update_no_diff(test_session, tmp_dir, tmp_path):
|
|
|
296
299
|
with pytest.raises(DatasetNotFoundError) as exc_info:
|
|
297
300
|
dc.read_dataset(ds_name, version="1.0.1")
|
|
298
301
|
|
|
299
|
-
assert str(exc_info.value) ==
|
|
302
|
+
assert str(exc_info.value) == (
|
|
303
|
+
f"Dataset {ds_name} version 1.0.1 not found in namespace "
|
|
304
|
+
f"{default_namespace_name} and project {default_project_name}"
|
|
305
|
+
)
|
|
300
306
|
|
|
301
307
|
|
|
302
308
|
@pytest.fixture
|
|
@@ -16,6 +16,7 @@ from pydantic import BaseModel
|
|
|
16
16
|
|
|
17
17
|
import datachain as dc
|
|
18
18
|
from datachain import Column
|
|
19
|
+
from datachain.data_storage import AbstractMetastore
|
|
19
20
|
from datachain.error import (
|
|
20
21
|
DatasetInvalidVersionError,
|
|
21
22
|
DatasetNotFoundError,
|
|
@@ -3428,6 +3429,29 @@ def test_save_to_non_default_namespace_and_project(
|
|
|
3428
3429
|
dc.read_dataset(name="fibonacci")
|
|
3429
3430
|
|
|
3430
3431
|
|
|
3432
|
+
def test_dataset_not_found_in_default_project(test_session):
|
|
3433
|
+
metastore = test_session.catalog.metastore
|
|
3434
|
+
with pytest.raises(DatasetNotFoundError) as excinfo:
|
|
3435
|
+
with patch.object(AbstractMetastore, "is_local_dataset", return_value=True):
|
|
3436
|
+
dc.read_dataset("fibonacci")
|
|
3437
|
+
assert str(excinfo.value) == (
|
|
3438
|
+
f"Dataset fibonacci not found in namespace {metastore.default_namespace_name}"
|
|
3439
|
+
f" and project {metastore.default_project_name}"
|
|
3440
|
+
)
|
|
3441
|
+
|
|
3442
|
+
|
|
3443
|
+
@pytest.mark.parametrize("project_created", (True, False))
|
|
3444
|
+
def test_dataset_not_found_in_non_default_project(test_session, project_created):
|
|
3445
|
+
if project_created:
|
|
3446
|
+
dc.create_project("dev", "numbers")
|
|
3447
|
+
with pytest.raises(DatasetNotFoundError) as excinfo:
|
|
3448
|
+
with patch.object(AbstractMetastore, "is_local_dataset", return_value=True):
|
|
3449
|
+
dc.read_dataset("dev.numbers.fibonacci")
|
|
3450
|
+
assert str(excinfo.value) == (
|
|
3451
|
+
"Dataset fibonacci not found in namespace dev and project numbers"
|
|
3452
|
+
)
|
|
3453
|
+
|
|
3454
|
+
|
|
3431
3455
|
@pytest.mark.parametrize("use_settings", (True, False))
|
|
3432
3456
|
@pytest.mark.parametrize("project_created_upfront", (True, False))
|
|
3433
3457
|
def test_save_specify_only_non_default_project(
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|