datachain 0.18.3__tar.gz → 0.18.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.18.3 → datachain-0.18.5}/.github/codecov.yaml +7 -0
- {datachain-0.18.3 → datachain-0.18.5}/.pre-commit-config.yaml +1 -1
- {datachain-0.18.3/src/datachain.egg-info → datachain-0.18.5}/PKG-INFO +3 -3
- {datachain-0.18.3 → datachain-0.18.5}/pyproject.toml +2 -2
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/catalog/catalog.py +36 -22
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/client/azure.py +1 -1
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/client/gcs.py +1 -1
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/client/s3.py +5 -3
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/data_storage/metastore.py +87 -42
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/dataset.py +1 -1
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/func/aggregate.py +64 -38
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/func/array.py +102 -73
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/func/conditional.py +71 -51
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/func/func.py +1 -1
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/func/numeric.py +55 -36
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/func/path.py +32 -20
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/func/random.py +2 -2
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/func/string.py +59 -37
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/func/window.py +7 -8
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/datachain.py +9 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/listing.py +2 -3
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/model/ultralytics/bbox.py +6 -4
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/model/ultralytics/pose.py +6 -4
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/model/ultralytics/segment.py +6 -4
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/remote/studio.py +4 -2
- {datachain-0.18.3 → datachain-0.18.5/src/datachain.egg-info}/PKG-INFO +3 -3
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain.egg-info/SOURCES.txt +9 -1
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain.egg-info/requires.txt +2 -2
- datachain-0.18.5/tests/func/functions/test_aggregate.py +173 -0
- datachain-0.18.5/tests/func/functions/test_array.py +424 -0
- datachain-0.18.5/tests/func/functions/test_conditional.py +178 -0
- datachain-0.18.5/tests/func/functions/test_numeric.py +127 -0
- datachain-0.18.5/tests/func/functions/test_path.py +108 -0
- datachain-0.18.5/tests/func/functions/test_random.py +20 -0
- datachain-0.18.5/tests/func/functions/test_string.py +213 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/model/test_yolo.py +73 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_catalog.py +1 -1
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_client.py +6 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_datachain.py +6 -9
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_dataset_query.py +4 -12
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_datasets.py +2 -2
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_listing.py +2 -8
- datachain-0.18.5/tests/func/test_metastore.py +889 -0
- datachain-0.18.5/tests/unit/sql/sqlite/__init__.py +0 -0
- datachain-0.18.3/tests/func/test_func.py +0 -223
- {datachain-0.18.3 → datachain-0.18.5}/.cruft.json +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/.gitattributes +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/.github/dependabot.yml +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/.github/workflows/release.yml +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/.github/workflows/tests.yml +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/.gitignore +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/LICENSE +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/README.rst +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/assets/datachain.svg +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/commands/auth/login.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/commands/auth/logout.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/commands/auth/team.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/commands/auth/token.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/commands/index.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/commands/job/cancel.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/commands/job/logs.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/commands/job/ls.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/commands/job/run.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/contributing.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/examples.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/index.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/overrides/main.html +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/quick-start.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/references/data-types/file.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/references/data-types/index.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/references/data-types/pose.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/references/data-types/segment.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/references/datachain.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/references/func.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/references/index.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/references/remotes.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/references/toolkit.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/references/torch.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/references/udf.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/docs/tutorials.md +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/examples/multimodal/wds.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/mkdocs.yml +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/noxfile.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/setup.cfg +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/__main__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/asyn.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cache.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/cli/utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/client/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/client/hf.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/client/local.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/config.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/delta.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/error.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/fs/reference.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/fs/utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/func/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/func/base.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/job.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/clip.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/file.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/hf.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/image.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/settings.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/tar.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/text.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/udf.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/video.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/listing.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/model/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/model/bbox.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/model/pose.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/model/segment.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/model/utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/node.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/progress.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/py.typed +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/query/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/query/batch.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/query/dataset.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/query/metrics.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/query/params.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/query/queue.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/query/schema.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/query/session.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/query/udf.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/query/utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/script_meta.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/semver.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/types.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/sql/utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/studio.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/telemetry.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain/utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/conftest.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/data.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/examples/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/examples/test_examples.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/examples/wds_data.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/data/lena.jpg +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.18.3/tests/func/model → datachain-0.18.5/tests/func/functions}/__init__.py +0 -0
- {datachain-0.18.3/tests/unit → datachain-0.18.5/tests/func/model}/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_batching.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_data_storage.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_delta.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_file.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_hf.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_image.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_ls.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_metrics.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_pull.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_pytorch.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_query.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_read_database.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_session.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_toolkit.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_video.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/func/test_warehouse.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/scripts/feature_class.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/test_atomicity.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/test_cli_e2e.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/test_cli_studio.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/test_import_time.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/test_query_e2e.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/test_telemetry.py +0 -0
- {datachain-0.18.3/tests/unit/lib → datachain-0.18.5/tests/unit}/__init__.py +0 -0
- {datachain-0.18.3/tests/unit/model → datachain-0.18.5/tests/unit/lib}/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_datachain.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.18.3/tests/unit/sql → datachain-0.18.5/tests/unit/model}/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.18.3/tests/unit/sql/sqlite → datachain-0.18.5/tests/unit/sql}/__init__.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_asyn.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_cache.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_catalog.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_client.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_config.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_dataset.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_func.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_listing.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_metastore.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_query.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_query_params.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_semver.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_serializer.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_session.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_utils.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.18.3 → datachain-0.18.5}/tests/utils.py +0 -0
|
@@ -9,6 +9,13 @@ coverage:
|
|
|
9
9
|
threshold: 10%
|
|
10
10
|
# non-blocking status checks
|
|
11
11
|
informational: true
|
|
12
|
+
only_pulls: true
|
|
13
|
+
|
|
14
|
+
comment:
|
|
15
|
+
# sections shown in the PR comment
|
|
16
|
+
layout: "reach, diff, flags, files"
|
|
17
|
+
# updates the comment on PRs when coverage changes
|
|
18
|
+
behavior: default
|
|
12
19
|
|
|
13
20
|
flags:
|
|
14
21
|
datachain:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.18.
|
|
3
|
+
Version: 0.18.5
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -23,7 +23,7 @@ Requires-Dist: tqdm
|
|
|
23
23
|
Requires-Dist: numpy<3,>=1
|
|
24
24
|
Requires-Dist: pandas>=2.0.0
|
|
25
25
|
Requires-Dist: packaging
|
|
26
|
-
Requires-Dist: pyarrow
|
|
26
|
+
Requires-Dist: pyarrow
|
|
27
27
|
Requires-Dist: typing-extensions
|
|
28
28
|
Requires-Dist: python-dateutil>=2
|
|
29
29
|
Requires-Dist: attrs>=21.3.0
|
|
@@ -38,7 +38,7 @@ Requires-Dist: sqlalchemy>=2
|
|
|
38
38
|
Requires-Dist: multiprocess==0.70.16
|
|
39
39
|
Requires-Dist: cloudpickle
|
|
40
40
|
Requires-Dist: orjson>=3.10.5
|
|
41
|
-
Requires-Dist: pydantic
|
|
41
|
+
Requires-Dist: pydantic
|
|
42
42
|
Requires-Dist: jmespath>=1.0
|
|
43
43
|
Requires-Dist: datamodel-code-generator>=0.25
|
|
44
44
|
Requires-Dist: Pillow<12,>=10.0.0
|
|
@@ -27,7 +27,7 @@ dependencies = [
|
|
|
27
27
|
"numpy>=1,<3",
|
|
28
28
|
"pandas>=2.0.0",
|
|
29
29
|
"packaging",
|
|
30
|
-
"pyarrow
|
|
30
|
+
"pyarrow",
|
|
31
31
|
"typing-extensions",
|
|
32
32
|
"python-dateutil>=2",
|
|
33
33
|
"attrs>=21.3.0",
|
|
@@ -42,7 +42,7 @@ dependencies = [
|
|
|
42
42
|
"multiprocess==0.70.16",
|
|
43
43
|
"cloudpickle",
|
|
44
44
|
"orjson>=3.10.5",
|
|
45
|
-
"pydantic
|
|
45
|
+
"pydantic",
|
|
46
46
|
"jmespath>=1.0",
|
|
47
47
|
"datamodel-code-generator>=0.25",
|
|
48
48
|
"Pillow>=10.0.0,<12",
|
|
@@ -66,6 +66,7 @@ if TYPE_CHECKING:
|
|
|
66
66
|
)
|
|
67
67
|
from datachain.dataset import DatasetListVersion
|
|
68
68
|
from datachain.job import Job
|
|
69
|
+
from datachain.lib.listing_info import ListingInfo
|
|
69
70
|
from datachain.listing import Listing
|
|
70
71
|
|
|
71
72
|
logger = logging.getLogger("datachain")
|
|
@@ -910,11 +911,7 @@ class Catalog:
|
|
|
910
911
|
values["num_objects"] = None
|
|
911
912
|
values["size"] = None
|
|
912
913
|
values["preview"] = None
|
|
913
|
-
self.metastore.update_dataset_version(
|
|
914
|
-
dataset,
|
|
915
|
-
version,
|
|
916
|
-
**values,
|
|
917
|
-
)
|
|
914
|
+
self.metastore.update_dataset_version(dataset, version, **values)
|
|
918
915
|
return
|
|
919
916
|
|
|
920
917
|
if not dataset_version.num_objects:
|
|
@@ -934,11 +931,7 @@ class Catalog:
|
|
|
934
931
|
if not values:
|
|
935
932
|
return
|
|
936
933
|
|
|
937
|
-
self.metastore.update_dataset_version(
|
|
938
|
-
dataset,
|
|
939
|
-
version,
|
|
940
|
-
**values,
|
|
941
|
-
)
|
|
934
|
+
self.metastore.update_dataset_version(dataset, version, **values)
|
|
942
935
|
|
|
943
936
|
def update_dataset(
|
|
944
937
|
self, dataset: DatasetRecord, conn=None, **kwargs
|
|
@@ -1116,13 +1109,16 @@ class Catalog:
|
|
|
1116
1109
|
return direct_dependencies
|
|
1117
1110
|
|
|
1118
1111
|
def ls_datasets(
|
|
1119
|
-
self,
|
|
1112
|
+
self,
|
|
1113
|
+
prefix: Optional[str] = None,
|
|
1114
|
+
include_listing: bool = False,
|
|
1115
|
+
studio: bool = False,
|
|
1120
1116
|
) -> Iterator[DatasetListRecord]:
|
|
1121
1117
|
from datachain.remote.studio import StudioClient
|
|
1122
1118
|
|
|
1123
1119
|
if studio:
|
|
1124
1120
|
client = StudioClient()
|
|
1125
|
-
response = client.ls_datasets()
|
|
1121
|
+
response = client.ls_datasets(prefix=prefix)
|
|
1126
1122
|
if not response.ok:
|
|
1127
1123
|
raise DataChainError(response.message)
|
|
1128
1124
|
if not response.data:
|
|
@@ -1133,6 +1129,8 @@ class Catalog:
|
|
|
1133
1129
|
for d in response.data
|
|
1134
1130
|
if not d.get("name", "").startswith(QUERY_DATASET_PREFIX)
|
|
1135
1131
|
)
|
|
1132
|
+
elif prefix:
|
|
1133
|
+
datasets = self.metastore.list_datasets_by_prefix(prefix)
|
|
1136
1134
|
else:
|
|
1137
1135
|
datasets = self.metastore.list_datasets()
|
|
1138
1136
|
|
|
@@ -1142,39 +1140,55 @@ class Catalog:
|
|
|
1142
1140
|
|
|
1143
1141
|
def list_datasets_versions(
|
|
1144
1142
|
self,
|
|
1143
|
+
prefix: Optional[str] = None,
|
|
1145
1144
|
include_listing: bool = False,
|
|
1145
|
+
with_job: bool = True,
|
|
1146
1146
|
studio: bool = False,
|
|
1147
1147
|
) -> Iterator[tuple[DatasetListRecord, "DatasetListVersion", Optional["Job"]]]:
|
|
1148
1148
|
"""Iterate over all dataset versions with related jobs."""
|
|
1149
1149
|
datasets = list(
|
|
1150
|
-
self.ls_datasets(
|
|
1150
|
+
self.ls_datasets(
|
|
1151
|
+
prefix=prefix, include_listing=include_listing, studio=studio
|
|
1152
|
+
)
|
|
1151
1153
|
)
|
|
1152
1154
|
|
|
1153
1155
|
# preselect dataset versions jobs from db to avoid multiple queries
|
|
1154
|
-
jobs_ids: set[str] = {
|
|
1155
|
-
v.job_id for ds in datasets for v in ds.versions if v.job_id
|
|
1156
|
-
}
|
|
1157
1156
|
jobs: dict[str, Job] = {}
|
|
1158
|
-
if
|
|
1159
|
-
|
|
1157
|
+
if with_job:
|
|
1158
|
+
jobs_ids: set[str] = {
|
|
1159
|
+
v.job_id for ds in datasets for v in ds.versions if v.job_id
|
|
1160
|
+
}
|
|
1161
|
+
if jobs_ids:
|
|
1162
|
+
jobs = {
|
|
1163
|
+
j.id: j for j in self.metastore.list_jobs_by_ids(list(jobs_ids))
|
|
1164
|
+
}
|
|
1160
1165
|
|
|
1161
1166
|
for d in datasets:
|
|
1162
1167
|
yield from (
|
|
1163
|
-
(d, v, jobs.get(str(v.job_id)) if v.job_id else None)
|
|
1168
|
+
(d, v, jobs.get(str(v.job_id)) if with_job and v.job_id else None)
|
|
1164
1169
|
for v in d.versions
|
|
1165
1170
|
)
|
|
1166
1171
|
|
|
1167
|
-
def listings(self):
|
|
1172
|
+
def listings(self, prefix: Optional[str] = None) -> list["ListingInfo"]:
|
|
1168
1173
|
"""
|
|
1169
1174
|
Returns list of ListingInfo objects which are representing specific
|
|
1170
1175
|
storage listing datasets
|
|
1171
1176
|
"""
|
|
1172
|
-
from datachain.lib.listing import is_listing_dataset
|
|
1177
|
+
from datachain.lib.listing import LISTING_PREFIX, is_listing_dataset
|
|
1173
1178
|
from datachain.lib.listing_info import ListingInfo
|
|
1174
1179
|
|
|
1180
|
+
if prefix and not prefix.startswith(LISTING_PREFIX):
|
|
1181
|
+
prefix = LISTING_PREFIX + prefix
|
|
1182
|
+
|
|
1183
|
+
listing_datasets_versions = self.list_datasets_versions(
|
|
1184
|
+
prefix=prefix,
|
|
1185
|
+
include_listing=True,
|
|
1186
|
+
with_job=False,
|
|
1187
|
+
)
|
|
1188
|
+
|
|
1175
1189
|
return [
|
|
1176
1190
|
ListingInfo.from_models(d, v, j)
|
|
1177
|
-
for d, v, j in
|
|
1191
|
+
for d, v, j in listing_datasets_versions
|
|
1178
1192
|
if is_listing_dataset(d.name)
|
|
1179
1193
|
]
|
|
1180
1194
|
|
|
@@ -74,7 +74,7 @@ class GCSClient(Client):
|
|
|
74
74
|
try:
|
|
75
75
|
await self._get_pages(prefix, page_queue)
|
|
76
76
|
found = await consumer
|
|
77
|
-
if not found:
|
|
77
|
+
if not found and prefix:
|
|
78
78
|
raise FileNotFoundError(f"Unable to resolve remote path: {prefix}")
|
|
79
79
|
finally:
|
|
80
80
|
consumer.cancel() # In case _get_pages() raised
|
|
@@ -80,7 +80,7 @@ class ClientS3(Client):
|
|
|
80
80
|
finally:
|
|
81
81
|
await page_queue.put(None)
|
|
82
82
|
|
|
83
|
-
async def process_pages(page_queue, result_queue):
|
|
83
|
+
async def process_pages(page_queue, result_queue, prefix):
|
|
84
84
|
found = False
|
|
85
85
|
with tqdm(desc=f"Listing {self.uri}", unit=" objects", leave=False) as pbar:
|
|
86
86
|
while (res := await page_queue.get()) is not None:
|
|
@@ -94,7 +94,7 @@ class ClientS3(Client):
|
|
|
94
94
|
if entries:
|
|
95
95
|
await result_queue.put(entries)
|
|
96
96
|
pbar.update(len(entries))
|
|
97
|
-
if not found:
|
|
97
|
+
if not found and prefix:
|
|
98
98
|
raise FileNotFoundError(f"Unable to resolve remote path: {prefix}")
|
|
99
99
|
|
|
100
100
|
try:
|
|
@@ -118,7 +118,9 @@ class ClientS3(Client):
|
|
|
118
118
|
Delimiter="",
|
|
119
119
|
)
|
|
120
120
|
page_queue: asyncio.Queue[list] = asyncio.Queue(2)
|
|
121
|
-
consumer = asyncio.create_task(
|
|
121
|
+
consumer = asyncio.create_task(
|
|
122
|
+
process_pages(page_queue, result_queue, prefix)
|
|
123
|
+
)
|
|
122
124
|
try:
|
|
123
125
|
await get_pages(it, page_queue)
|
|
124
126
|
await consumer
|
|
@@ -36,6 +36,7 @@ from datachain.dataset import (
|
|
|
36
36
|
)
|
|
37
37
|
from datachain.error import (
|
|
38
38
|
DatasetNotFoundError,
|
|
39
|
+
DatasetVersionNotFoundError,
|
|
39
40
|
TableMissingError,
|
|
40
41
|
)
|
|
41
42
|
from datachain.job import Job
|
|
@@ -273,7 +274,6 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
273
274
|
self,
|
|
274
275
|
job_id: str,
|
|
275
276
|
status: Optional[JobStatus] = None,
|
|
276
|
-
exit_code: Optional[int] = None,
|
|
277
277
|
error_message: Optional[str] = None,
|
|
278
278
|
error_stack: Optional[str] = None,
|
|
279
279
|
finished_at: Optional[datetime] = None,
|
|
@@ -620,22 +620,36 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
620
620
|
self, dataset: DatasetRecord, conn=None, **kwargs
|
|
621
621
|
) -> DatasetRecord:
|
|
622
622
|
"""Updates dataset fields."""
|
|
623
|
-
values = {}
|
|
624
|
-
dataset_values = {}
|
|
623
|
+
values: dict[str, Any] = {}
|
|
624
|
+
dataset_values: dict[str, Any] = {}
|
|
625
625
|
for field, value in kwargs.items():
|
|
626
|
-
if field in self._dataset_fields
|
|
627
|
-
|
|
628
|
-
|
|
626
|
+
if field in ("id", "created_at") or field not in self._dataset_fields:
|
|
627
|
+
continue # these fields are read-only or not applicable
|
|
628
|
+
|
|
629
|
+
if value is None and field in ("name", "status", "sources", "query_script"):
|
|
630
|
+
raise ValueError(f"Field {field} cannot be None")
|
|
631
|
+
if field == "name" and not value:
|
|
632
|
+
raise ValueError("name cannot be empty")
|
|
633
|
+
|
|
634
|
+
if field == "attrs":
|
|
635
|
+
if value is None:
|
|
636
|
+
values[field] = None
|
|
629
637
|
else:
|
|
630
|
-
values[field] = value
|
|
631
|
-
|
|
632
|
-
|
|
638
|
+
values[field] = json.dumps(value)
|
|
639
|
+
dataset_values[field] = value
|
|
640
|
+
elif field == "schema":
|
|
641
|
+
if value is None:
|
|
642
|
+
values[field] = None
|
|
643
|
+
dataset_values[field] = None
|
|
633
644
|
else:
|
|
634
|
-
|
|
645
|
+
values[field] = json.dumps(value)
|
|
646
|
+
dataset_values[field] = DatasetRecord.parse_schema(value)
|
|
647
|
+
else:
|
|
648
|
+
values[field] = value
|
|
649
|
+
dataset_values[field] = value
|
|
635
650
|
|
|
636
651
|
if not values:
|
|
637
|
-
#
|
|
638
|
-
return dataset
|
|
652
|
+
return dataset # nothing to update
|
|
639
653
|
|
|
640
654
|
d = self._datasets
|
|
641
655
|
self.db.execute(
|
|
@@ -651,36 +665,70 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
651
665
|
self, dataset: DatasetRecord, version: str, conn=None, **kwargs
|
|
652
666
|
) -> DatasetVersion:
|
|
653
667
|
"""Updates dataset fields."""
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
values = {}
|
|
657
|
-
version_values: dict = {}
|
|
668
|
+
values: dict[str, Any] = {}
|
|
669
|
+
version_values: dict[str, Any] = {}
|
|
658
670
|
for field, value in kwargs.items():
|
|
659
|
-
if
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
671
|
+
if (
|
|
672
|
+
field in ("id", "created_at")
|
|
673
|
+
or field not in self._dataset_version_fields
|
|
674
|
+
):
|
|
675
|
+
continue # these fields are read-only or not applicable
|
|
676
|
+
|
|
677
|
+
if value is None and field in (
|
|
678
|
+
"status",
|
|
679
|
+
"sources",
|
|
680
|
+
"query_script",
|
|
681
|
+
"error_message",
|
|
682
|
+
"error_stack",
|
|
683
|
+
"script_output",
|
|
684
|
+
"uuid",
|
|
685
|
+
):
|
|
686
|
+
raise ValueError(f"Field {field} cannot be None")
|
|
687
|
+
|
|
688
|
+
if field == "schema":
|
|
689
|
+
values[field] = json.dumps(value) if value else None
|
|
690
|
+
version_values[field] = (
|
|
691
|
+
DatasetRecord.parse_schema(value) if value else None
|
|
692
|
+
)
|
|
693
|
+
elif field == "feature_schema":
|
|
694
|
+
if value is None:
|
|
695
|
+
values[field] = None
|
|
696
|
+
else:
|
|
697
|
+
values[field] = json.dumps(value)
|
|
698
|
+
version_values[field] = value
|
|
699
|
+
elif field == "preview":
|
|
700
|
+
if value is None:
|
|
701
|
+
values[field] = None
|
|
702
|
+
elif not isinstance(value, list):
|
|
703
|
+
raise ValueError(
|
|
704
|
+
f"Field '{field}' must be a list, got {type(value).__name__}"
|
|
705
|
+
)
|
|
669
706
|
else:
|
|
670
|
-
values[field] = value
|
|
671
|
-
|
|
707
|
+
values[field] = json.dumps(value, cls=JSONSerialize)
|
|
708
|
+
version_values["_preview_data"] = value
|
|
709
|
+
else:
|
|
710
|
+
values[field] = value
|
|
711
|
+
version_values[field] = value
|
|
672
712
|
|
|
673
|
-
if values:
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
)
|
|
681
|
-
|
|
713
|
+
if not values:
|
|
714
|
+
return dataset.get_version(version)
|
|
715
|
+
|
|
716
|
+
dv = self._datasets_versions
|
|
717
|
+
self.db.execute(
|
|
718
|
+
self._datasets_versions_update()
|
|
719
|
+
.where(dv.c.dataset_id == dataset.id, dv.c.version == version)
|
|
720
|
+
.values(values),
|
|
721
|
+
conn=conn,
|
|
722
|
+
) # type: ignore [attr-defined]
|
|
723
|
+
|
|
724
|
+
for v in dataset.versions:
|
|
725
|
+
if v.version == version:
|
|
726
|
+
v.update(**version_values)
|
|
727
|
+
return v
|
|
682
728
|
|
|
683
|
-
|
|
729
|
+
raise DatasetVersionNotFoundError(
|
|
730
|
+
f"Dataset {dataset.name} does not have version {version}"
|
|
731
|
+
)
|
|
684
732
|
|
|
685
733
|
def _parse_dataset(self, rows) -> Optional[DatasetRecord]:
|
|
686
734
|
versions = [self.dataset_class.parse(*r) for r in rows]
|
|
@@ -812,7 +860,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
812
860
|
update_data["error_message"] = error_message
|
|
813
861
|
update_data["error_stack"] = error_stack
|
|
814
862
|
|
|
815
|
-
self.update_dataset(dataset, conn=conn, **update_data)
|
|
863
|
+
dataset = self.update_dataset(dataset, conn=conn, **update_data)
|
|
816
864
|
|
|
817
865
|
if version:
|
|
818
866
|
self.update_dataset_version(dataset, version, conn=conn, **update_data)
|
|
@@ -1064,7 +1112,6 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1064
1112
|
self,
|
|
1065
1113
|
job_id: str,
|
|
1066
1114
|
status: Optional[JobStatus] = None,
|
|
1067
|
-
exit_code: Optional[int] = None,
|
|
1068
1115
|
error_message: Optional[str] = None,
|
|
1069
1116
|
error_stack: Optional[str] = None,
|
|
1070
1117
|
finished_at: Optional[datetime] = None,
|
|
@@ -1075,8 +1122,6 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1075
1122
|
values: dict = {}
|
|
1076
1123
|
if status is not None:
|
|
1077
1124
|
values["status"] = status
|
|
1078
|
-
if exit_code is not None:
|
|
1079
|
-
values["exit_code"] = exit_code
|
|
1080
1125
|
if error_message is not None:
|
|
1081
1126
|
values["error_message"] = error_message
|
|
1082
1127
|
if error_stack is not None:
|
|
@@ -93,7 +93,7 @@ class DatasetDependency:
|
|
|
93
93
|
if self.type == DatasetDependencyType.DATASET:
|
|
94
94
|
return self.name
|
|
95
95
|
|
|
96
|
-
list_dataset_name, _, _ = parse_listing_uri(self.name.strip("/")
|
|
96
|
+
list_dataset_name, _, _ = parse_listing_uri(self.name.strip("/"))
|
|
97
97
|
assert list_dataset_name
|
|
98
98
|
return list_dataset_name
|
|
99
99
|
|