datachain 0.24.3__tar.gz → 0.24.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.24.3 → datachain-0.24.4}/.github/workflows/tests-studio.yml +1 -1
- {datachain-0.24.3 → datachain-0.24.4}/PKG-INFO +1 -1
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/catalog/catalog.py +8 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/dataset.py +1 -1
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/dc/datachain.py +26 -1
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/dc/datasets.py +1 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/namespace.py +1 -1
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/project.py +1 -1
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/query/dataset.py +5 -1
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain.egg-info/PKG-INFO +1 -1
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_read_dataset_remote.py +49 -4
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/lib/test_datachain.py +57 -0
- {datachain-0.24.3 → datachain-0.24.4}/.cruft.json +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/.gitattributes +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/.github/codecov.yaml +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/.github/dependabot.yml +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/.github/workflows/release.yml +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/.github/workflows/tests.yml +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/.gitignore +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/.pre-commit-config.yaml +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/LICENSE +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/README.rst +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/assets/datachain.svg +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/commands/auth/login.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/commands/auth/logout.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/commands/auth/team.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/commands/auth/token.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/commands/index.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/commands/job/cancel.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/commands/job/clusters.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/commands/job/logs.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/commands/job/ls.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/commands/job/run.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/contributing.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/examples.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/guide/db_migrations.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/guide/delta.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/guide/env.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/guide/index.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/guide/namespaces.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/guide/processing.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/guide/remotes.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/guide/retry.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/index.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/overrides/main.html +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/quick-start.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/references/data-types/file.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/references/data-types/index.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/references/data-types/pose.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/references/data-types/segment.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/references/datachain.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/references/func.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/references/index.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/references/toolkit.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/references/torch.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/references/udf.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/docs/tutorials.md +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/examples/multimodal/wds.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/mkdocs.yml +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/noxfile.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/pyproject.toml +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/setup.cfg +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/__main__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/asyn.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/cache.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/cli/utils.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/client/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/client/azure.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/client/gcs.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/client/hf.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/client/local.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/client/s3.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/config.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/delta.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/error.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/fs/reference.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/fs/utils.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/func/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/func/array.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/func/base.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/func/conditional.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/func/func.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/func/numeric.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/func/path.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/func/random.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/func/string.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/func/window.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/job.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/clip.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/file.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/hf.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/image.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/listing.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/namespaces.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/projects.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/settings.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/tar.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/text.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/udf.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/utils.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/video.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/listing.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/model/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/model/bbox.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/model/pose.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/model/segment.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/model/utils.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/node.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/progress.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/py.typed +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/query/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/query/batch.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/query/metrics.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/query/params.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/query/queue.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/query/schema.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/query/session.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/query/udf.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/query/utils.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/remote/studio.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/script_meta.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/semver.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/sql/types.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/sql/utils.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/studio.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/telemetry.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain/utils.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain.egg-info/SOURCES.txt +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/conftest.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/data.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/examples/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/examples/test_examples.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/examples/wds_data.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/data/lena.jpg +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/functions/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/functions/test_array.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/functions/test_path.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/functions/test_random.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/functions/test_string.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/model/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_batching.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_catalog.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_client.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_data_storage.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_datachain.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_datasets.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_delta.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_file.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_hf.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_image.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_listing.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_ls.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_metastore.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_metrics.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_pull.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_pytorch.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_query.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_read_database.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_retry.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_session.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_toolkit.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_video.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/func/test_warehouse.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/scripts/feature_class.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/test_atomicity.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/test_cli_e2e.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/test_cli_studio.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/test_import_time.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/test_query_e2e.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/test_telemetry.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/lib/test_namespace.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/model/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_asyn.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_cache.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_catalog.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_client.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_config.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_dataset.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_func.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_listing.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_metastore.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_query.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_query_params.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_semver.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_serializer.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_session.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_utils.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.24.3 → datachain-0.24.4}/tests/utils.py +0 -0
|
@@ -98,7 +98,7 @@ jobs:
|
|
|
98
98
|
- name: Run tests
|
|
99
99
|
# Generate `.test_durations` file with `pytest --store-durations --durations-path ../.github/.test_durations ...`
|
|
100
100
|
run: >
|
|
101
|
-
|
|
101
|
+
DATACHAIN_METASTORE_ARG_USERNAME=john
|
|
102
102
|
PYTHONPATH="$(pwd)/..:${PYTHONPATH}"
|
|
103
103
|
pytest
|
|
104
104
|
--config-file=pyproject.toml -rs
|
|
@@ -1120,6 +1120,14 @@ class Catalog:
|
|
|
1120
1120
|
pull_dataset: bool = False,
|
|
1121
1121
|
update: bool = False,
|
|
1122
1122
|
) -> DatasetRecord:
|
|
1123
|
+
# Intentionally ignore update flag is version is provided. Here only exact
|
|
1124
|
+
# version can be provided and update then doesn't make sense.
|
|
1125
|
+
# It corresponds to a query like this for example:
|
|
1126
|
+
#
|
|
1127
|
+
# dc.read_dataset("some.remote.dataset", version="1.0.0", update=True)
|
|
1128
|
+
if version:
|
|
1129
|
+
update = False
|
|
1130
|
+
|
|
1123
1131
|
if self.metastore.is_local_dataset(namespace_name) or not update:
|
|
1124
1132
|
try:
|
|
1125
1133
|
project = self.metastore.get_project(project_name, namespace_name)
|
|
@@ -21,6 +21,7 @@ from typing import (
|
|
|
21
21
|
import orjson
|
|
22
22
|
import sqlalchemy
|
|
23
23
|
from pydantic import BaseModel
|
|
24
|
+
from sqlalchemy.sql.elements import ColumnElement
|
|
24
25
|
from tqdm import tqdm
|
|
25
26
|
|
|
26
27
|
from datachain import semver
|
|
@@ -806,11 +807,35 @@ class DataChain:
|
|
|
806
807
|
chain.save("new_dataset")
|
|
807
808
|
```
|
|
808
809
|
"""
|
|
810
|
+
# Convert string partition_by parameters to Column objects
|
|
811
|
+
processed_partition_by = partition_by
|
|
812
|
+
if partition_by is not None:
|
|
813
|
+
if isinstance(partition_by, (str, Function, ColumnElement)):
|
|
814
|
+
list_partition_by = [partition_by]
|
|
815
|
+
else:
|
|
816
|
+
list_partition_by = list(partition_by)
|
|
817
|
+
|
|
818
|
+
processed_partition_columns: list[ColumnElement] = []
|
|
819
|
+
for col in list_partition_by:
|
|
820
|
+
if isinstance(col, str):
|
|
821
|
+
col_db_name = ColumnMeta.to_db_name(col)
|
|
822
|
+
col_type = self.signals_schema.get_column_type(col_db_name)
|
|
823
|
+
column = Column(col_db_name, python_to_sql(col_type))
|
|
824
|
+
processed_partition_columns.append(column)
|
|
825
|
+
elif isinstance(col, Function):
|
|
826
|
+
column = col.get_column(self.signals_schema)
|
|
827
|
+
processed_partition_columns.append(column)
|
|
828
|
+
else:
|
|
829
|
+
# Assume it's already a ColumnElement
|
|
830
|
+
processed_partition_columns.append(col)
|
|
831
|
+
|
|
832
|
+
processed_partition_by = processed_partition_columns
|
|
833
|
+
|
|
809
834
|
udf_obj = self._udf_to_obj(Aggregator, func, params, output, signal_map)
|
|
810
835
|
return self._evolve(
|
|
811
836
|
query=self._query.generate(
|
|
812
837
|
udf_obj.to_udf_wrapper(),
|
|
813
|
-
partition_by=
|
|
838
|
+
partition_by=processed_partition_by,
|
|
814
839
|
**self._settings.to_dict(),
|
|
815
840
|
),
|
|
816
841
|
signal_schema=udf_obj.output,
|
|
@@ -82,7 +82,10 @@ if TYPE_CHECKING:
|
|
|
82
82
|
INSERT_BATCH_SIZE = 10000
|
|
83
83
|
|
|
84
84
|
PartitionByType = Union[
|
|
85
|
-
|
|
85
|
+
str,
|
|
86
|
+
Function,
|
|
87
|
+
ColumnElement,
|
|
88
|
+
Sequence[Union[str, Function, ColumnElement]],
|
|
86
89
|
]
|
|
87
90
|
JoinPredicateType = Union[str, ColumnClause, ColumnElement]
|
|
88
91
|
DatasetDependencyType = tuple["DatasetRecord", str]
|
|
@@ -1142,6 +1145,7 @@ class DatasetQuery:
|
|
|
1142
1145
|
project_name=project_name,
|
|
1143
1146
|
version=version,
|
|
1144
1147
|
pull_dataset=True,
|
|
1148
|
+
update=update,
|
|
1145
1149
|
)
|
|
1146
1150
|
)
|
|
1147
1151
|
|
|
@@ -362,8 +362,16 @@ def test_read_dataset_remote_update_flag(
|
|
|
362
362
|
assert dc.datasets().to_values("version") == ["1.0.0"]
|
|
363
363
|
assert ds1.to_values("version")[0] == "1.0.0"
|
|
364
364
|
|
|
365
|
+
# Read without update and version returns a cached version
|
|
366
|
+
ds1 = dc.read_dataset(
|
|
367
|
+
f"{REMOTE_NAMESPACE_NAME}.{REMOTE_PROJECT_NAME}.dogs",
|
|
368
|
+
session=test_session,
|
|
369
|
+
)
|
|
370
|
+
assert dc.datasets().to_values("version") == ["1.0.0"]
|
|
371
|
+
assert ds1.to_values("version")[0] == "1.0.0"
|
|
372
|
+
|
|
365
373
|
# Second read with update=True with the exact version
|
|
366
|
-
# returns the same
|
|
374
|
+
# returns the same dataset version
|
|
367
375
|
ds2 = dc.read_dataset(
|
|
368
376
|
f"{REMOTE_NAMESPACE_NAME}.{REMOTE_PROJECT_NAME}.dogs",
|
|
369
377
|
version="1.0.0",
|
|
@@ -385,9 +393,7 @@ def test_read_dataset_remote_update_flag(
|
|
|
385
393
|
assert dc.datasets().to_values("version") == ["1.0.0"]
|
|
386
394
|
assert ds3.to_values("version")[0] == "1.0.0"
|
|
387
395
|
|
|
388
|
-
# Finally, read with update=
|
|
389
|
-
# that allows for newer version still bring the same version
|
|
390
|
-
# as the one already downloaded
|
|
396
|
+
# Finally, read with update=True brings the latest version
|
|
391
397
|
ds4 = dc.read_dataset(
|
|
392
398
|
f"{REMOTE_NAMESPACE_NAME}.{REMOTE_PROJECT_NAME}.dogs",
|
|
393
399
|
version=">=1.0.0",
|
|
@@ -399,6 +405,45 @@ def test_read_dataset_remote_update_flag(
|
|
|
399
405
|
assert dc.datasets().to_values("version") == ["1.0.0", "2.0.0"]
|
|
400
406
|
|
|
401
407
|
|
|
408
|
+
@skip_if_not_sqlite
|
|
409
|
+
def test_read_dataset_remote_update_flag_no_version(
|
|
410
|
+
studio_token,
|
|
411
|
+
test_session,
|
|
412
|
+
remote_dataset_multi_version,
|
|
413
|
+
mock_dataset_info_endpoint,
|
|
414
|
+
mock_export_endpoint_with_urls,
|
|
415
|
+
mock_export_status_completed,
|
|
416
|
+
mock_s3_parquet_download,
|
|
417
|
+
mock_dataset_rows_fetcher_status_check,
|
|
418
|
+
requests_mock,
|
|
419
|
+
):
|
|
420
|
+
"""Test read_dataset with update=True flag to force remote check."""
|
|
421
|
+
|
|
422
|
+
# Mock the Studio API responses
|
|
423
|
+
mock_dataset_info_endpoint(remote_dataset_multi_version)
|
|
424
|
+
mock_s3_parquet_download()
|
|
425
|
+
|
|
426
|
+
# First read - downloads version 1.0.0
|
|
427
|
+
ds1 = dc.read_dataset(
|
|
428
|
+
f"{REMOTE_NAMESPACE_NAME}.{REMOTE_PROJECT_NAME}.dogs",
|
|
429
|
+
version="1.0.0",
|
|
430
|
+
session=test_session,
|
|
431
|
+
)
|
|
432
|
+
assert dc.datasets().to_values("version") == ["1.0.0"]
|
|
433
|
+
assert ds1.to_values("version")[0] == "1.0.0"
|
|
434
|
+
|
|
435
|
+
# Read with update=True w/o version specifier also
|
|
436
|
+
# checks the most recent remote version and brings it
|
|
437
|
+
ds4 = dc.read_dataset(
|
|
438
|
+
f"{REMOTE_NAMESPACE_NAME}.{REMOTE_PROJECT_NAME}.dogs",
|
|
439
|
+
update=True,
|
|
440
|
+
session=test_session,
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
assert ds4.to_values("version")[0] == "2.0.0"
|
|
444
|
+
assert dc.datasets().to_values("version") == ["1.0.0", "2.0.0"]
|
|
445
|
+
|
|
446
|
+
|
|
402
447
|
@skip_if_not_sqlite
|
|
403
448
|
def test_read_dataset_remote_version_specifiers(
|
|
404
449
|
studio_token,
|
|
@@ -3595,3 +3595,60 @@ def test_save_create_project_not_allowed(test_session, allow_create_project):
|
|
|
3595
3595
|
dc.read_values(fib=[1, 1, 2, 3, 5, 8], session=test_session).save(
|
|
3596
3596
|
"dev.numbers.fibonacci"
|
|
3597
3597
|
)
|
|
3598
|
+
|
|
3599
|
+
|
|
3600
|
+
def test_agg_partition_by_string_notation(test_session):
|
|
3601
|
+
"""Test that agg method supports string notation for partition_by."""
|
|
3602
|
+
|
|
3603
|
+
class _ImageGroup(BaseModel):
|
|
3604
|
+
name: str
|
|
3605
|
+
size: int
|
|
3606
|
+
|
|
3607
|
+
def func(key, val) -> Iterator[tuple[File, _ImageGroup]]:
|
|
3608
|
+
n = "-".join(key)
|
|
3609
|
+
v = sum(val)
|
|
3610
|
+
yield File(path=n), _ImageGroup(name=n, size=v)
|
|
3611
|
+
|
|
3612
|
+
keys = ["n1", "n2", "n1"]
|
|
3613
|
+
values = [1, 5, 9]
|
|
3614
|
+
|
|
3615
|
+
# Test using string notation (NEW functionality)
|
|
3616
|
+
ds = dc.read_values(key=keys, val=values, session=test_session).agg(
|
|
3617
|
+
x=func,
|
|
3618
|
+
partition_by="key", # String notation instead of C("key")
|
|
3619
|
+
)
|
|
3620
|
+
|
|
3621
|
+
assert ds.order_by("x_1.name").to_values("x_1.name") == ["n1-n1", "n2"]
|
|
3622
|
+
assert ds.order_by("x_1.size").to_values("x_1.size") == [5, 10]
|
|
3623
|
+
|
|
3624
|
+
|
|
3625
|
+
def test_agg_partition_by_string_sequence(test_session):
|
|
3626
|
+
"""Test that agg method supports sequence of strings for partition_by."""
|
|
3627
|
+
|
|
3628
|
+
class _ImageGroup(BaseModel):
|
|
3629
|
+
name: str
|
|
3630
|
+
size: int
|
|
3631
|
+
|
|
3632
|
+
def func(key1, key2, val) -> Iterator[tuple[File, _ImageGroup]]:
|
|
3633
|
+
n = f"{key1[0]}-{key2[0]}"
|
|
3634
|
+
v = sum(val)
|
|
3635
|
+
yield File(path=n), _ImageGroup(name=n, size=v)
|
|
3636
|
+
|
|
3637
|
+
key1_values = ["a", "a", "b"]
|
|
3638
|
+
key2_values = ["x", "y", "x"]
|
|
3639
|
+
values = [1, 5, 9]
|
|
3640
|
+
|
|
3641
|
+
# Test using sequence of strings (NEW functionality)
|
|
3642
|
+
ds = dc.read_values(
|
|
3643
|
+
key1=key1_values, key2=key2_values, val=values, session=test_session
|
|
3644
|
+
).agg(
|
|
3645
|
+
x=func,
|
|
3646
|
+
partition_by=["key1", "key2"], # Sequence of strings
|
|
3647
|
+
)
|
|
3648
|
+
|
|
3649
|
+
result_names = ds.order_by("x_1.name").to_values("x_1.name")
|
|
3650
|
+
result_sizes = ds.order_by("x_1.size").to_values("x_1.size")
|
|
3651
|
+
|
|
3652
|
+
# Should have 3 partitions: (a,x), (a,y), (b,x)
|
|
3653
|
+
assert len(result_names) == 3
|
|
3654
|
+
assert len(result_sizes) == 3
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|