datachain 0.32.1__tar.gz → 0.32.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.32.1 → datachain-0.32.2}/.pre-commit-config.yaml +1 -1
- {datachain-0.32.1 → datachain-0.32.2}/PKG-INFO +2 -2
- {datachain-0.32.1 → datachain-0.32.2}/docs/studio/webhooks.md +12 -1
- {datachain-0.32.1 → datachain-0.32.2}/pyproject.toml +1 -1
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/dataset.py +2 -2
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/dc/parquet.py +20 -5
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/dc/storage.py +12 -6
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/dc/storage_pattern.py +50 -99
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/namespaces.py +1 -1
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain.egg-info/PKG-INFO +2 -2
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain.egg-info/requires.txt +1 -1
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_metastore.py +1 -1
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_storage_pattern.py +61 -5
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_audio.py +1 -1
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_datachain.py +5 -5
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_namespace.py +7 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_storage_pattern.py +88 -22
- {datachain-0.32.1 → datachain-0.32.2}/.cruft.json +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/.gitattributes +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/.github/codecov.yaml +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/.github/dependabot.yml +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/.github/workflows/release.yml +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/.github/workflows/tests.yml +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/.gitignore +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/LICENSE +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/README.rst +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/api_hooks.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/assets/datachain.svg +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/assets/webhook_dialog.png +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/assets/webhook_list.png +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/commands/auth/login.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/commands/auth/logout.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/commands/auth/team.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/commands/auth/token.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/commands/index.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/commands/job/cancel.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/commands/job/clusters.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/commands/job/logs.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/commands/job/ls.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/commands/job/run.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/contributing.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/examples.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/guide/db_migrations.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/guide/delta.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/guide/env.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/guide/index.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/guide/namespaces.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/guide/processing.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/guide/remotes.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/guide/retry.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/index.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/overrides/main.html +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/quick-start.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/references/data-types/file.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/references/data-types/index.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/references/data-types/pose.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/references/data-types/segment.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/references/datachain.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/references/func.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/references/functions/aggregate.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/references/functions/array.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/references/functions/conditional.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/references/functions/numeric.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/references/functions/path.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/references/functions/random.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/references/functions/string.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/references/functions/window.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/references/index.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/references/toolkit.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/references/torch.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/references/udf.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/studio/api/.gitkeep +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/templates/main.dot +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/templates/operation.dot +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/templates/responses.def +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/docs/tutorials.md +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/examples/get_started/nested_datamodel.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/examples/multimodal/audio-to-text.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/examples/multimodal/wds.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/mkdocs.yml +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/noxfile.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/setup.cfg +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/__main__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/asyn.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/cache.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/cli/utils.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/client/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/client/azure.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/client/gcs.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/client/hf.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/client/local.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/client/s3.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/config.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/delta.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/error.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/fs/reference.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/fs/utils.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/func/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/func/array.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/func/base.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/func/conditional.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/func/func.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/func/numeric.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/func/path.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/func/random.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/func/string.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/func/window.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/job.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/audio.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/clip.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/dc/datachain.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/file.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/hf.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/image.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/listing.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/projects.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/settings.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/tar.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/text.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/udf.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/utils.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/video.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/listing.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/model/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/model/bbox.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/model/pose.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/model/segment.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/model/utils.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/namespace.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/node.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/progress.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/project.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/py.typed +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/query/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/query/batch.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/query/dataset.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/query/metrics.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/query/params.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/query/queue.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/query/schema.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/query/session.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/query/udf.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/query/utils.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/remote/studio.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/script_meta.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/semver.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/sql/postgresql_dialect.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/sql/postgresql_types.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/sql/types.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/sql/utils.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/studio.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/telemetry.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain/utils.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain.egg-info/SOURCES.txt +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/conftest.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/data.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/examples/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/examples/test_examples.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/examples/wds_data.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/data/lena.jpg +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/functions/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/functions/test_array.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/functions/test_path.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/functions/test_random.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/functions/test_string.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/model/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_audio.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_batching.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_catalog.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_client.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_data_storage.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_datachain.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_datasets.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_delta.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_file.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_hf.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_image.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_listing.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_ls.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_metrics.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_mutate.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_pull.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_pytorch.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_query.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_read_database.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_read_dataset_remote.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_retry.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_session.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_studio_datetime_parsing.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_to_database.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_toolkit.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_video.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/func/test_warehouse.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/scripts/feature_class.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/test_atomicity.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/test_cli_e2e.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/test_cli_studio.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/test_import_time.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/test_query_e2e.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/test_telemetry.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_partition_by.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_settings.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/model/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_asyn.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_cache.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_catalog.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_cli_datasets.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_client.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_config.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_dataset.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_func.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_listing.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_metastore.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_query.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_query_params.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_semver.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_serializer.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_session.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_utils.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.32.1 → datachain-0.32.2}/tests/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.32.
|
|
3
|
+
Version: 0.32.2
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -102,7 +102,7 @@ Requires-Dist: scipy; extra == "tests"
|
|
|
102
102
|
Requires-Dist: ultralytics; extra == "tests"
|
|
103
103
|
Provides-Extra: dev
|
|
104
104
|
Requires-Dist: datachain[docs,tests]; extra == "dev"
|
|
105
|
-
Requires-Dist: mypy==1.
|
|
105
|
+
Requires-Dist: mypy==1.18.1; extra == "dev"
|
|
106
106
|
Requires-Dist: types-python-dateutil; extra == "dev"
|
|
107
107
|
Requires-Dist: types-dateparser; extra == "dev"
|
|
108
108
|
Requires-Dist: types-pytz; extra == "dev"
|
|
@@ -28,12 +28,14 @@ Payload:
|
|
|
28
28
|
"id": "da59df47-d121-4eb6-aa76-dc452755544e",
|
|
29
29
|
"status": "COMPLETE",
|
|
30
30
|
"error_message": "",
|
|
31
|
+
"name": "job_query.py",
|
|
31
32
|
"created_at": "2021-07-27T16:02:08.070557",
|
|
32
33
|
"updated_at": "2021-07-27T16:22:08.070557",
|
|
33
34
|
"finished_at": "2021-07-27T16:22:08.070557",
|
|
34
35
|
"url": "https://studio.datachain.ai/team/TeamName/datasets/jobs/da59df47-d121-4eb6-aa76-dc452755544e"
|
|
35
36
|
},
|
|
36
37
|
"timestamp": "2021-07-27T16:22:08.070557",
|
|
38
|
+
"text": "Job job_query.py (da59df47-d121-4eb6-aa76-dc452755544e) changed its status to COMPLETE"
|
|
37
39
|
}
|
|
38
40
|
```
|
|
39
41
|
|
|
@@ -45,7 +47,8 @@ Header: `http-x-datachain-event`: `PING`.
|
|
|
45
47
|
Payload:
|
|
46
48
|
```json
|
|
47
49
|
{
|
|
48
|
-
"action": "PING"
|
|
50
|
+
"action": "PING",
|
|
51
|
+
"message": "Webhook connection test successful"
|
|
49
52
|
}
|
|
50
53
|
```
|
|
51
54
|
|
|
@@ -253,6 +256,14 @@ else:
|
|
|
253
256
|
print("Warning: No signature header found")
|
|
254
257
|
```
|
|
255
258
|
|
|
259
|
+
## Slack Integration
|
|
260
|
+
|
|
261
|
+
You can use this webhook feature to send a message to slack as well. To integrate slack with Studio,
|
|
262
|
+
|
|
263
|
+
1. Using the guide as described in [Slack documentation](https://docs.slack.dev/messaging/sending-messages-using-incoming-webhooks/) , create an incoming webhook and copy the webhook address in the following format `https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX` .
|
|
264
|
+
2. Use the address to create a webhook in Studio as described [above](#creating-webhooks)
|
|
265
|
+
3. You should now be able to view the message in Slack channel connected.
|
|
266
|
+
|
|
256
267
|
## Best practices for using Webhooks
|
|
257
268
|
|
|
258
269
|
1. You should only subscribe to the webhook events that you need. This will reduce the amount of work your server needs to do.
|
|
@@ -619,7 +619,7 @@ class DatasetRecord:
|
|
|
619
619
|
if not self.versions:
|
|
620
620
|
return "1.0.0"
|
|
621
621
|
|
|
622
|
-
major,
|
|
622
|
+
major, _, _ = semver.parse(self.latest_version)
|
|
623
623
|
return semver.create(major + 1, 0, 0)
|
|
624
624
|
|
|
625
625
|
@property
|
|
@@ -630,7 +630,7 @@ class DatasetRecord:
|
|
|
630
630
|
if not self.versions:
|
|
631
631
|
return "1.0.0"
|
|
632
632
|
|
|
633
|
-
major, minor,
|
|
633
|
+
major, minor, _ = semver.parse(self.latest_version)
|
|
634
634
|
return semver.create(major, minor + 1, 0)
|
|
635
635
|
|
|
636
636
|
@property
|
|
@@ -26,8 +26,14 @@ def read_parquet(
|
|
|
26
26
|
"""Generate chain from parquet files.
|
|
27
27
|
|
|
28
28
|
Parameters:
|
|
29
|
-
path: Storage
|
|
30
|
-
|
|
29
|
+
path: Storage path(s) or URI(s). Can be a local path or start with a
|
|
30
|
+
storage prefix like `s3://`, `gs://`, `az://`, `hf://` or "file:///".
|
|
31
|
+
Supports glob patterns:
|
|
32
|
+
- `*` : wildcard
|
|
33
|
+
- `**` : recursive wildcard
|
|
34
|
+
- `?` : single character
|
|
35
|
+
- `{a,b}` : brace expansion list
|
|
36
|
+
- `{1..9}` : brace numeric or alphabetic range
|
|
31
37
|
partitioning: Any pyarrow partitioning schema.
|
|
32
38
|
output: Dictionary defining column names and their corresponding types.
|
|
33
39
|
column: Created column name.
|
|
@@ -43,10 +49,19 @@ def read_parquet(
|
|
|
43
49
|
dc.read_parquet("s3://mybucket/file.parquet")
|
|
44
50
|
```
|
|
45
51
|
|
|
46
|
-
|
|
52
|
+
All files from a directory:
|
|
47
53
|
```py
|
|
48
|
-
|
|
49
|
-
|
|
54
|
+
dc.read_parquet("s3://mybucket/dir/")
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Only parquet files from a directory, and all it's subdirectories:
|
|
58
|
+
```py
|
|
59
|
+
dc.read_parquet("s3://mybucket/dir/**/*.parquet")
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Using filename patterns - numeric, list, starting with zeros:
|
|
63
|
+
```py
|
|
64
|
+
dc.read_parquet("s3://mybucket/202{1..4}/{yellow,green}-{01..12}.parquet")
|
|
50
65
|
```
|
|
51
66
|
"""
|
|
52
67
|
from .storage import read_storage
|
|
@@ -51,7 +51,8 @@ def read_storage(
|
|
|
51
51
|
- `*` : wildcard
|
|
52
52
|
- `**` : recursive wildcard
|
|
53
53
|
- `?` : single character
|
|
54
|
-
- `{a,b}` : brace expansion
|
|
54
|
+
- `{a,b}` : brace expansion list
|
|
55
|
+
- `{1..9}` : brace numeric or alphabetic range
|
|
55
56
|
type: read file as "binary", "text", or "image" data. Default is "binary".
|
|
56
57
|
recursive: search recursively for the given path.
|
|
57
58
|
column: Column name that will contain File objects. Default is "file".
|
|
@@ -88,27 +89,32 @@ def read_storage(
|
|
|
88
89
|
Simple call from s3:
|
|
89
90
|
```python
|
|
90
91
|
import datachain as dc
|
|
91
|
-
|
|
92
|
+
dc.read_storage("s3://my-bucket/my-dir")
|
|
92
93
|
```
|
|
93
94
|
|
|
94
95
|
Match all .json files recursively using glob pattern
|
|
95
96
|
```py
|
|
96
|
-
|
|
97
|
+
dc.read_storage("gs://bucket/meta/**/*.json")
|
|
97
98
|
```
|
|
98
99
|
|
|
99
100
|
Match image file extensions for directories with pattern
|
|
100
101
|
```py
|
|
101
|
-
|
|
102
|
+
dc.read_storage("s3://bucket/202?/**/*.{jpg,jpeg,png}")
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
By ranges in filenames:
|
|
106
|
+
```py
|
|
107
|
+
dc.read_storage("s3://bucket/202{1..4}/**/*.{jpg,jpeg,png}")
|
|
102
108
|
```
|
|
103
109
|
|
|
104
110
|
Multiple URIs:
|
|
105
111
|
```python
|
|
106
|
-
|
|
112
|
+
dc.read_storage(["s3://my-bkt/dir1", "s3://bucket2/dir2/dir3"])
|
|
107
113
|
```
|
|
108
114
|
|
|
109
115
|
With AWS S3-compatible storage:
|
|
110
116
|
```python
|
|
111
|
-
|
|
117
|
+
dc.read_storage(
|
|
112
118
|
"s3://my-bucket/my-dir",
|
|
113
119
|
client_config = {"aws_endpoint_url": "<minio-endpoint-url>"}
|
|
114
120
|
)
|
|
@@ -12,61 +12,37 @@ def validate_cloud_bucket_name(uri: str) -> None:
|
|
|
12
12
|
"""
|
|
13
13
|
Validate that cloud storage bucket names don't contain glob patterns.
|
|
14
14
|
|
|
15
|
-
Args:
|
|
16
|
-
uri: URI to validate
|
|
17
|
-
|
|
18
15
|
Raises:
|
|
19
16
|
ValueError: If a cloud storage bucket name contains glob patterns
|
|
20
17
|
"""
|
|
21
18
|
if not is_cloud_uri(uri):
|
|
22
19
|
return
|
|
23
20
|
|
|
24
|
-
# Extract bucket name (everything between :// and first /)
|
|
25
21
|
if "://" in uri:
|
|
26
22
|
scheme_end = uri.index("://") + 3
|
|
27
23
|
path_part = uri[scheme_end:]
|
|
28
24
|
|
|
29
|
-
# Get the bucket name (first segment)
|
|
30
25
|
if "/" in path_part:
|
|
31
26
|
bucket_name = path_part.split("/")[0]
|
|
32
27
|
else:
|
|
33
28
|
bucket_name = path_part
|
|
34
29
|
|
|
35
|
-
# Check if bucket name contains glob patterns
|
|
36
30
|
glob_chars = ["*", "?", "[", "]", "{", "}"]
|
|
37
31
|
if any(char in bucket_name for char in glob_chars):
|
|
38
32
|
raise ValueError(f"Glob patterns in bucket names are not supported: {uri}")
|
|
39
33
|
|
|
40
34
|
|
|
41
35
|
def split_uri_pattern(uri: str) -> tuple[str, Union[str, None]]:
|
|
42
|
-
"""
|
|
43
|
-
Split a URI into base path and glob pattern.
|
|
44
|
-
|
|
45
|
-
Args:
|
|
46
|
-
uri: URI that may contain glob patterns (*, **, ?, {})
|
|
47
|
-
|
|
48
|
-
Returns:
|
|
49
|
-
Tuple of (base_uri, pattern) where pattern is None if no glob pattern found
|
|
50
|
-
|
|
51
|
-
Examples:
|
|
52
|
-
"s3://bucket/dir/*.mp3" -> ("s3://bucket/dir", "*.mp3")
|
|
53
|
-
"s3://bucket/**/*.mp3" -> ("s3://bucket", "**/*.mp3")
|
|
54
|
-
"s3://bucket/dir" -> ("s3://bucket/dir", None)
|
|
55
|
-
"""
|
|
36
|
+
"""Split a URI into base path and glob pattern."""
|
|
56
37
|
if not any(char in uri for char in ["*", "?", "[", "{", "}"]):
|
|
57
38
|
return uri, None
|
|
58
39
|
|
|
59
|
-
# Handle different URI schemes
|
|
60
40
|
if "://" in uri:
|
|
61
|
-
# Split into scheme and path
|
|
62
41
|
scheme_end = uri.index("://") + 3
|
|
63
42
|
scheme_part = uri[:scheme_end]
|
|
64
43
|
path_part = uri[scheme_end:]
|
|
65
|
-
|
|
66
|
-
# Find where the glob pattern starts
|
|
67
44
|
path_segments = path_part.split("/")
|
|
68
45
|
|
|
69
|
-
# Find first segment with glob pattern
|
|
70
46
|
pattern_start_idx = None
|
|
71
47
|
for i, segment in enumerate(path_segments):
|
|
72
48
|
# Check for glob patterns including brace expansion
|
|
@@ -77,9 +53,7 @@ def split_uri_pattern(uri: str) -> tuple[str, Union[str, None]]:
|
|
|
77
53
|
if pattern_start_idx is None:
|
|
78
54
|
return uri, None
|
|
79
55
|
|
|
80
|
-
# Split into base and pattern
|
|
81
56
|
if pattern_start_idx == 0:
|
|
82
|
-
# Pattern at root of bucket
|
|
83
57
|
base = scheme_part + path_segments[0]
|
|
84
58
|
pattern = "/".join(path_segments[1:]) if len(path_segments) > 1 else "*"
|
|
85
59
|
else:
|
|
@@ -87,13 +61,11 @@ def split_uri_pattern(uri: str) -> tuple[str, Union[str, None]]:
|
|
|
87
61
|
pattern = "/".join(path_segments[pattern_start_idx:])
|
|
88
62
|
|
|
89
63
|
return base, pattern
|
|
90
|
-
|
|
64
|
+
|
|
91
65
|
path_segments = uri.split("/")
|
|
92
66
|
|
|
93
|
-
# Find first segment with glob pattern
|
|
94
67
|
pattern_start_idx = None
|
|
95
68
|
for i, segment in enumerate(path_segments):
|
|
96
|
-
# Check for glob patterns including brace expansion
|
|
97
69
|
if glob.has_magic(segment) or "{" in segment:
|
|
98
70
|
pattern_start_idx = i
|
|
99
71
|
break
|
|
@@ -101,7 +73,6 @@ def split_uri_pattern(uri: str) -> tuple[str, Union[str, None]]:
|
|
|
101
73
|
if pattern_start_idx is None:
|
|
102
74
|
return uri, None
|
|
103
75
|
|
|
104
|
-
# Split into base and pattern
|
|
105
76
|
base = "/".join(path_segments[:pattern_start_idx]) if pattern_start_idx > 0 else "/"
|
|
106
77
|
pattern = "/".join(path_segments[pattern_start_idx:])
|
|
107
78
|
|
|
@@ -109,51 +80,30 @@ def split_uri_pattern(uri: str) -> tuple[str, Union[str, None]]:
|
|
|
109
80
|
|
|
110
81
|
|
|
111
82
|
def should_use_recursion(pattern: str, user_recursive: bool) -> bool:
|
|
112
|
-
"""
|
|
113
|
-
Determine if we should use recursive listing based on the pattern.
|
|
114
|
-
|
|
115
|
-
Args:
|
|
116
|
-
pattern: The glob pattern extracted from URI
|
|
117
|
-
user_recursive: User's recursive preference
|
|
118
|
-
|
|
119
|
-
Returns:
|
|
120
|
-
True if recursive listing should be used
|
|
121
|
-
|
|
122
|
-
Examples:
|
|
123
|
-
"*" -> False (single level only)
|
|
124
|
-
"*.mp3" -> False (single level only)
|
|
125
|
-
"**/*.mp3" -> True (globstar requires recursion)
|
|
126
|
-
"dir/*/file.txt" -> True (multi-level pattern)
|
|
127
|
-
"""
|
|
128
83
|
if not user_recursive:
|
|
129
|
-
# If user explicitly wants non-recursive, respect that
|
|
130
84
|
return False
|
|
131
85
|
|
|
132
|
-
# If pattern contains globstar, definitely need recursion
|
|
133
86
|
if "**" in pattern:
|
|
134
87
|
return True
|
|
135
88
|
|
|
136
|
-
# If pattern contains path separators, it needs recursion
|
|
137
|
-
# Single-level patterns like "*", "*.txt", "file?" should not be recursive
|
|
138
89
|
return "/" in pattern
|
|
139
90
|
|
|
140
91
|
|
|
141
92
|
def expand_brace_pattern(pattern: str) -> list[str]:
|
|
142
93
|
"""
|
|
143
|
-
Recursively expand brace patterns
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
Returns:
|
|
150
|
-
List of expanded patterns
|
|
94
|
+
Recursively expand brace patterns into multiple glob patterns.
|
|
95
|
+
Supports:
|
|
96
|
+
- Comma-separated lists: *.{mp3,wav}
|
|
97
|
+
- Numeric ranges: file{1..10}
|
|
98
|
+
- Zero-padded numeric ranges: file{01..10}
|
|
99
|
+
- Character ranges: file{a..z}
|
|
151
100
|
|
|
152
101
|
Examples:
|
|
153
102
|
"*.{mp3,wav}" -> ["*.mp3", "*.wav"]
|
|
103
|
+
"file{1..3}" -> ["file1", "file2", "file3"]
|
|
104
|
+
"file{01..03}" -> ["file01", "file02", "file03"]
|
|
105
|
+
"file{a..c}" -> ["filea", "fileb", "filec"]
|
|
154
106
|
"{a,b}/{c,d}" -> ["a/c", "a/d", "b/c", "b/d"]
|
|
155
|
-
"*.txt" -> ["*.txt"]
|
|
156
|
-
"{{a,b}}" -> ["{a}", "{b}"] # Handle double braces
|
|
157
107
|
"""
|
|
158
108
|
if "{" not in pattern or "}" not in pattern:
|
|
159
109
|
return [pattern]
|
|
@@ -162,11 +112,9 @@ def expand_brace_pattern(pattern: str) -> list[str]:
|
|
|
162
112
|
|
|
163
113
|
|
|
164
114
|
def _expand_single_braces(pattern: str) -> list[str]:
|
|
165
|
-
"""Helper to expand single-level braces."""
|
|
166
115
|
if "{" not in pattern or "}" not in pattern:
|
|
167
116
|
return [pattern]
|
|
168
117
|
|
|
169
|
-
# Find the first complete brace pattern
|
|
170
118
|
start = pattern.index("{")
|
|
171
119
|
end = start
|
|
172
120
|
depth = 0
|
|
@@ -184,46 +132,66 @@ def _expand_single_braces(pattern: str) -> list[str]:
|
|
|
184
132
|
|
|
185
133
|
prefix = pattern[:start]
|
|
186
134
|
suffix = pattern[end + 1 :]
|
|
187
|
-
|
|
135
|
+
brace_content = pattern[start + 1 : end]
|
|
136
|
+
|
|
137
|
+
if ".." in brace_content:
|
|
138
|
+
options = _expand_range(brace_content)
|
|
139
|
+
else:
|
|
140
|
+
options = [opt.strip() for opt in brace_content.split(",")]
|
|
188
141
|
|
|
189
|
-
# Generate all combinations and recursively expand
|
|
190
142
|
expanded = []
|
|
191
143
|
for option in options:
|
|
192
|
-
combined = prefix + option
|
|
193
|
-
# Recursively expand any remaining braces
|
|
144
|
+
combined = prefix + option + suffix
|
|
194
145
|
expanded.extend(_expand_single_braces(combined))
|
|
195
146
|
|
|
196
147
|
return expanded
|
|
197
148
|
|
|
198
149
|
|
|
199
|
-
def
|
|
200
|
-
""
|
|
150
|
+
def _expand_range(range_spec: str) -> list[str]: # noqa: PLR0911
|
|
151
|
+
if ".." not in range_spec:
|
|
152
|
+
return [range_spec]
|
|
201
153
|
|
|
202
|
-
|
|
203
|
-
|
|
154
|
+
parts = range_spec.split("..")
|
|
155
|
+
if len(parts) != 2:
|
|
156
|
+
return [range_spec]
|
|
204
157
|
|
|
205
|
-
|
|
206
|
-
filter_pattern: Pattern that may contain globstars (**)
|
|
158
|
+
start, end = parts[0], parts[1]
|
|
207
159
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
160
|
+
if start.isdigit() and end.isdigit():
|
|
161
|
+
pad_width = max(len(start), len(end)) if start[0] == "0" or end[0] == "0" else 0
|
|
162
|
+
start_num = int(start)
|
|
163
|
+
end_num = int(end)
|
|
164
|
+
|
|
165
|
+
if start_num <= end_num:
|
|
166
|
+
if pad_width > 0:
|
|
167
|
+
return [str(i).zfill(pad_width) for i in range(start_num, end_num + 1)]
|
|
168
|
+
return [str(i) for i in range(start_num, end_num + 1)]
|
|
169
|
+
if pad_width > 0:
|
|
170
|
+
return [str(i).zfill(pad_width) for i in range(start_num, end_num - 1, -1)]
|
|
171
|
+
return [str(i) for i in range(start_num, end_num - 1, -1)]
|
|
172
|
+
|
|
173
|
+
if len(start) == 1 and len(end) == 1 and start.isalpha() and end.isalpha():
|
|
174
|
+
start_ord = ord(start)
|
|
175
|
+
end_ord = ord(end)
|
|
176
|
+
|
|
177
|
+
if start_ord <= end_ord:
|
|
178
|
+
return [chr(i) for i in range(start_ord, end_ord + 1)]
|
|
179
|
+
return [chr(i) for i in range(start_ord, end_ord - 1, -1)]
|
|
180
|
+
|
|
181
|
+
return [range_spec]
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def convert_globstar_to_glob(filter_pattern: str) -> str:
|
|
211
185
|
if "**" not in filter_pattern:
|
|
212
186
|
return filter_pattern
|
|
213
187
|
|
|
214
188
|
parts = filter_pattern.split("/")
|
|
215
189
|
globstar_positions = [i for i, p in enumerate(parts) if p == "**"]
|
|
216
190
|
|
|
217
|
-
# Handle different cases based on number of globstars
|
|
218
191
|
num_globstars = len(globstar_positions)
|
|
219
192
|
|
|
220
193
|
if num_globstars <= 1:
|
|
221
|
-
# Special case: pattern like **/* means zero or more directories
|
|
222
|
-
# This is tricky because GLOB can't express "zero or more"
|
|
223
|
-
# We need different handling based on the pattern structure
|
|
224
|
-
|
|
225
194
|
if filter_pattern == "**/*":
|
|
226
|
-
# Match everything
|
|
227
195
|
return "*"
|
|
228
196
|
if filter_pattern.startswith("**/"):
|
|
229
197
|
remaining = filter_pattern[3:]
|
|
@@ -236,20 +204,11 @@ def convert_globstar_to_glob(filter_pattern: str) -> str:
|
|
|
236
204
|
# that works with recursive listing
|
|
237
205
|
# Special handling: if it's a simple extension pattern, match broadly
|
|
238
206
|
if remaining.startswith("*."):
|
|
239
|
-
# Pattern like **/*.ext - match any file with this extension
|
|
240
|
-
# This matches *.ext at current level and deeper with recursion:
|
|
241
207
|
return remaining
|
|
242
|
-
# Pattern like **/temp?.* - match as filename in subdirs
|
|
243
208
|
return f"*/{remaining}"
|
|
244
209
|
|
|
245
|
-
# Default: Zero or one globstar - simple replacement
|
|
246
210
|
return filter_pattern.replace("**", "*")
|
|
247
211
|
|
|
248
|
-
# Multiple globstars - need more careful handling
|
|
249
|
-
# For patterns like **/level?/backup/**/*.ext
|
|
250
|
-
# We want to match any path containing /level?/backup/ and ending with .ext
|
|
251
|
-
|
|
252
|
-
# Find middle directories (between first and last **)
|
|
253
212
|
middle_parts = []
|
|
254
213
|
start_idx = globstar_positions[0] + 1
|
|
255
214
|
end_idx = globstar_positions[-1]
|
|
@@ -258,17 +217,12 @@ def convert_globstar_to_glob(filter_pattern: str) -> str:
|
|
|
258
217
|
middle_parts.append(parts[i])
|
|
259
218
|
|
|
260
219
|
if not middle_parts:
|
|
261
|
-
# No fixed middle parts, just use wildcards
|
|
262
220
|
result = filter_pattern.replace("**", "*")
|
|
263
221
|
else:
|
|
264
|
-
# Create pattern that matches the middle parts
|
|
265
222
|
middle_pattern = "/".join(middle_parts)
|
|
266
|
-
# Get the file pattern at the end if any
|
|
267
223
|
last_part = parts[-1] if parts[-1] != "**" else "*"
|
|
268
224
|
|
|
269
|
-
# Match any path containing this pattern
|
|
270
225
|
if last_part != "*":
|
|
271
|
-
# Has specific file pattern
|
|
272
226
|
result = f"*{middle_pattern}*{last_part}"
|
|
273
227
|
else:
|
|
274
228
|
result = f"*{middle_pattern}*"
|
|
@@ -287,14 +241,11 @@ def apply_glob_filter(
|
|
|
287
241
|
|
|
288
242
|
chain = ls(dc, list_path, recursive=use_recursive, column=column)
|
|
289
243
|
|
|
290
|
-
# If pattern doesn't contain path separator and list_path is not empty,
|
|
291
|
-
# prepend the list_path to make the pattern match correctly
|
|
292
244
|
if list_path and "/" not in pattern:
|
|
293
245
|
filter_pattern = f"{list_path.rstrip('/')}/{pattern}"
|
|
294
246
|
else:
|
|
295
247
|
filter_pattern = pattern
|
|
296
248
|
|
|
297
|
-
# Convert globstar patterns to GLOB-compatible patterns
|
|
298
249
|
glob_pattern = convert_globstar_to_glob(filter_pattern)
|
|
299
250
|
|
|
300
251
|
return chain.filter(Column(f"{column}.path").glob(glob_pattern))
|
|
@@ -77,7 +77,7 @@ def ls(session: Optional[Session] = None) -> list[Namespace]:
|
|
|
77
77
|
return Session.get(session).catalog.metastore.list_namespaces()
|
|
78
78
|
|
|
79
79
|
|
|
80
|
-
def delete_namespace(name: str, session: Optional[Session]) -> None:
|
|
80
|
+
def delete_namespace(name: str, session: Optional[Session] = None) -> None:
|
|
81
81
|
"""
|
|
82
82
|
Removes a namespace by name.
|
|
83
83
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.32.
|
|
3
|
+
Version: 0.32.2
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -102,7 +102,7 @@ Requires-Dist: scipy; extra == "tests"
|
|
|
102
102
|
Requires-Dist: ultralytics; extra == "tests"
|
|
103
103
|
Provides-Extra: dev
|
|
104
104
|
Requires-Dist: datachain[docs,tests]; extra == "dev"
|
|
105
|
-
Requires-Dist: mypy==1.
|
|
105
|
+
Requires-Dist: mypy==1.18.1; extra == "dev"
|
|
106
106
|
Requires-Dist: types-python-dateutil; extra == "dev"
|
|
107
107
|
Requires-Dist: types-dateparser; extra == "dev"
|
|
108
108
|
Requires-Dist: types-pytz; extra == "dev"
|
|
@@ -282,7 +282,7 @@ def test_remove_dataset(metastore):
|
|
|
282
282
|
}
|
|
283
283
|
|
|
284
284
|
metastore.remove_dataset(ds)
|
|
285
|
-
with pytest.raises(Exception, match="Dataset .+ not found"):
|
|
285
|
+
with pytest.raises(Exception, match=r"Dataset .+ not found"):
|
|
286
286
|
metastore.get_dataset(ds.name)
|
|
287
287
|
|
|
288
288
|
# dependencies should also be deleted and cleaned up
|
|
@@ -77,7 +77,6 @@ def tmp_dir(tmp_path):
|
|
|
77
77
|
|
|
78
78
|
|
|
79
79
|
def test_simple_wildcard(tmp_dir):
|
|
80
|
-
# Single level wildcard
|
|
81
80
|
result = dc.read_storage(f"{tmp_dir}/deep/level1/temp/*.tmp")
|
|
82
81
|
names = {f.name for f in result.to_values("file")}
|
|
83
82
|
assert names == {"temp1.tmp", "temp2.tmp"}
|
|
@@ -224,18 +223,23 @@ def test_mixed_pattern_types(tmp_dir):
|
|
|
224
223
|
|
|
225
224
|
def test_glob_pattern_in_bucket_name_raises_error():
|
|
226
225
|
with pytest.raises(
|
|
227
|
-
ValueError, match="Glob patterns in bucket names are not supported.*bucket
|
|
226
|
+
ValueError, match=r"Glob patterns in bucket names are not supported.*bucket-\*"
|
|
228
227
|
):
|
|
229
228
|
dc.read_storage("s3://bucket-*/data/file.txt")
|
|
230
229
|
|
|
231
230
|
with pytest.raises(
|
|
232
|
-
ValueError, match="Glob patterns in bucket names are not supported.*bucket
|
|
231
|
+
ValueError, match=r"Glob patterns in bucket names are not supported.*bucket-\?"
|
|
233
232
|
):
|
|
234
233
|
dc.read_storage("s3://bucket-?/files/*.txt")
|
|
235
234
|
|
|
236
235
|
with pytest.raises(
|
|
237
236
|
ValueError,
|
|
238
|
-
|
|
237
|
+
# Brace expansion appears literally in the message, we only need to
|
|
238
|
+
# escape braces for the regex engine, not double escape like before.
|
|
239
|
+
match=(
|
|
240
|
+
r"Glob patterns in bucket names are not supported.*"
|
|
241
|
+
r"bucket-\{dev,prod\}/logs/.*"
|
|
242
|
+
),
|
|
239
243
|
):
|
|
240
244
|
dc.read_storage("s3://bucket-{dev,prod}/logs/*.log")
|
|
241
245
|
|
|
@@ -256,6 +260,58 @@ def test_hugging_face_glob_patterns():
|
|
|
256
260
|
|
|
257
261
|
with pytest.raises(
|
|
258
262
|
ValueError,
|
|
259
|
-
match="Glob patterns in bucket names are not supported.*hf://datasets",
|
|
263
|
+
match=r"Glob patterns in bucket names are not supported.*hf://datasets",
|
|
260
264
|
):
|
|
261
265
|
validate_cloud_bucket_name("hf://datasets*/username/repo-name/data/file.txt")
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def test_brace_expansion_numeric_ranges(tmp_dir):
|
|
269
|
+
(tmp_dir / "deep").mkdir(exist_ok=True)
|
|
270
|
+
|
|
271
|
+
for i in range(1, 6):
|
|
272
|
+
(tmp_dir / "deep" / f"file{i}.txt").write_text(f"content {i}")
|
|
273
|
+
|
|
274
|
+
result = dc.read_storage(f"{tmp_dir}/deep/file{{1..3}}.txt")
|
|
275
|
+
files = sorted(f.name for f in result.to_values("file"))
|
|
276
|
+
assert files == ["file1.txt", "file2.txt", "file3.txt"]
|
|
277
|
+
|
|
278
|
+
for i in range(1, 10):
|
|
279
|
+
(tmp_dir / "deep" / f"data{str(i).zfill(2)}.log").write_text(f"log {i}")
|
|
280
|
+
|
|
281
|
+
result = dc.read_storage(f"{tmp_dir}/deep/data{{01..05}}.log")
|
|
282
|
+
files = sorted(f.name for f in result.to_values("file"))
|
|
283
|
+
assert files == [
|
|
284
|
+
"data01.log",
|
|
285
|
+
"data02.log",
|
|
286
|
+
"data03.log",
|
|
287
|
+
"data04.log",
|
|
288
|
+
"data05.log",
|
|
289
|
+
]
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def test_brace_expansion_character_ranges(tmp_dir):
|
|
293
|
+
(tmp_dir / "deep").mkdir(exist_ok=True)
|
|
294
|
+
for char in "abcde":
|
|
295
|
+
dir_path = tmp_dir / "deep" / f"dir-{char}"
|
|
296
|
+
dir_path.mkdir()
|
|
297
|
+
(dir_path / "file.txt").write_text(f"content {char}")
|
|
298
|
+
|
|
299
|
+
result = dc.read_storage(f"{tmp_dir}/deep/dir-{{a..c}}/file.txt")
|
|
300
|
+
dirs = sorted(f.source.split("/")[-1] for f in result.to_values("file"))
|
|
301
|
+
assert dirs == ["dir-a", "dir-b", "dir-c"]
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def test_brace_expansion_combined_patterns(tmp_dir):
|
|
305
|
+
(tmp_dir / "deep").mkdir(exist_ok=True)
|
|
306
|
+
for year in ["2005"]:
|
|
307
|
+
for month in range(1, 13):
|
|
308
|
+
filename = f"data-{year}-{str(month).zfill(2)}.csv"
|
|
309
|
+
(tmp_dir / "deep" / filename).write_text(f"data {year}-{month}")
|
|
310
|
+
|
|
311
|
+
result = dc.read_storage(f"{tmp_dir}/deep/data-2005-{{01..03}}.csv")
|
|
312
|
+
files = sorted([f.name for f in result.to_values("file")])
|
|
313
|
+
assert files == ["data-2005-01.csv", "data-2005-02.csv", "data-2005-03.csv"]
|
|
314
|
+
|
|
315
|
+
result = dc.read_storage(f"{tmp_dir}/deep/data-*-{{10..12}}.csv")
|
|
316
|
+
files = sorted(f.name for f in result.to_values("file"))
|
|
317
|
+
assert files == ["data-2005-10.csv", "data-2005-11.csv", "data-2005-12.csv"]
|
|
@@ -178,7 +178,7 @@ def test_save_audio_validation(audio_file, tmp_path):
|
|
|
178
178
|
with pytest.raises(ValueError, match="start time must be non-negative"):
|
|
179
179
|
save_audio(audio_file, output=str(tmp_path), start=-1.0, end=1.0)
|
|
180
180
|
|
|
181
|
-
with pytest.raises(ValueError, match="Can't save audio.*invalid time range"):
|
|
181
|
+
with pytest.raises(ValueError, match=r"Can't save audio.*invalid time range"):
|
|
182
182
|
save_audio(audio_file, output=str(tmp_path), start=2.0, end=1.0)
|
|
183
183
|
|
|
184
184
|
|
|
@@ -2182,18 +2182,18 @@ def test_union_different_columns(test_session):
|
|
|
2182
2182
|
chain2 = dc.read_values(value=[3, 4], session=test_session)
|
|
2183
2183
|
chain3 = dc.read_values(other=["a", "different", "thing"], session=test_session)
|
|
2184
2184
|
with pytest.raises(
|
|
2185
|
-
ValueError, match="Cannot perform union. name only present in left"
|
|
2185
|
+
ValueError, match=r"Cannot perform union. name only present in left"
|
|
2186
2186
|
):
|
|
2187
2187
|
chain1.union(chain2).show()
|
|
2188
2188
|
with pytest.raises(
|
|
2189
|
-
ValueError, match="Cannot perform union. name only present in right"
|
|
2189
|
+
ValueError, match=r"Cannot perform union. name only present in right"
|
|
2190
2190
|
):
|
|
2191
2191
|
chain2.union(chain1).show()
|
|
2192
2192
|
with pytest.raises(
|
|
2193
2193
|
ValueError,
|
|
2194
|
-
match="Cannot perform union. "
|
|
2195
|
-
"other only present in left. "
|
|
2196
|
-
"name, value only present in right",
|
|
2194
|
+
match=r"Cannot perform union. "
|
|
2195
|
+
r"other only present in left. "
|
|
2196
|
+
r"name, value only present in right",
|
|
2197
2197
|
):
|
|
2198
2198
|
chain3.union(chain1).show()
|
|
2199
2199
|
|