datachain 0.31.3__tar.gz → 0.31.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.31.3 → datachain-0.31.4}/PKG-INFO +1 -1
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/catalog/catalog.py +22 -58
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/file.py +95 -18
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain.egg-info/PKG-INFO +1 -1
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_file.py +68 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_query.py +3 -22
- {datachain-0.31.3 → datachain-0.31.4}/.cruft.json +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/.gitattributes +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/.github/codecov.yaml +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/.github/dependabot.yml +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/.github/workflows/release.yml +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/.github/workflows/tests.yml +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/.gitignore +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/.pre-commit-config.yaml +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/LICENSE +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/README.rst +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/api_hooks.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/assets/datachain.svg +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/commands/auth/login.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/commands/auth/logout.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/commands/auth/team.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/commands/auth/token.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/commands/index.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/commands/job/cancel.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/commands/job/clusters.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/commands/job/logs.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/commands/job/ls.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/commands/job/run.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/contributing.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/examples.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/guide/db_migrations.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/guide/delta.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/guide/env.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/guide/index.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/guide/namespaces.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/guide/processing.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/guide/remotes.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/guide/retry.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/index.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/overrides/main.html +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/quick-start.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/references/data-types/file.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/references/data-types/index.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/references/data-types/pose.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/references/data-types/segment.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/references/datachain.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/references/func.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/references/functions/aggregate.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/references/functions/array.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/references/functions/conditional.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/references/functions/numeric.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/references/functions/path.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/references/functions/random.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/references/functions/string.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/references/functions/window.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/references/index.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/references/toolkit.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/references/torch.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/references/udf.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/studio/api/.gitkeep +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/templates/main.dot +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/templates/operation.dot +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/templates/responses.def +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/docs/tutorials.md +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/examples/get_started/nested_datamodel.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/examples/multimodal/audio-to-text.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/examples/multimodal/wds.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/mkdocs.yml +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/noxfile.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/pyproject.toml +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/setup.cfg +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/__main__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/asyn.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cache.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/cli/utils.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/client/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/client/azure.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/client/gcs.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/client/hf.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/client/local.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/client/s3.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/config.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/dataset.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/delta.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/error.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/fs/reference.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/fs/utils.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/func/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/func/array.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/func/base.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/func/conditional.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/func/func.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/func/numeric.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/func/path.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/func/random.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/func/string.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/func/window.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/job.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/audio.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/clip.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/datachain.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/storage_pattern.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/hf.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/image.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/listing.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/namespaces.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/projects.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/settings.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/tar.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/text.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/udf.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/utils.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/video.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/listing.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/model/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/model/bbox.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/model/pose.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/model/segment.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/model/utils.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/namespace.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/node.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/progress.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/project.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/py.typed +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/query/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/query/batch.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/query/dataset.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/query/metrics.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/query/params.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/query/queue.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/query/schema.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/query/session.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/query/udf.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/query/utils.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/remote/studio.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/script_meta.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/semver.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/postgresql_dialect.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/postgresql_types.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/types.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/sql/utils.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/studio.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/telemetry.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain/utils.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain.egg-info/SOURCES.txt +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/conftest.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/data.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/examples/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/examples/test_examples.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/examples/wds_data.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/data/lena.jpg +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/functions/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/functions/test_array.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/functions/test_path.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/functions/test_random.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/functions/test_string.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/model/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_audio.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_batching.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_catalog.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_client.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_data_storage.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_datachain.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_datasets.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_delta.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_hf.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_image.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_listing.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_ls.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_metastore.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_metrics.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_mutate.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_pull.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_pytorch.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_query.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_read_database.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_read_dataset_remote.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_retry.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_session.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_storage_pattern.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_studio_datetime_parsing.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_to_database.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_toolkit.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_video.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/func/test_warehouse.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/scripts/feature_class.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/test_atomicity.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/test_cli_e2e.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/test_cli_studio.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/test_import_time.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/test_query_e2e.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/test_telemetry.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_audio.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_datachain.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_namespace.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_partition_by.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_settings.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_storage_pattern.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/model/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_asyn.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_cache.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_catalog.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_cli_datasets.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_client.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_config.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_dataset.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_func.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_listing.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_metastore.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_query_params.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_semver.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_serializer.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_session.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_utils.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.31.3 → datachain-0.31.4}/tests/utils.py +0 -0
|
@@ -144,26 +144,19 @@ def shutdown_process(
|
|
|
144
144
|
return proc.wait()
|
|
145
145
|
|
|
146
146
|
|
|
147
|
-
def
|
|
147
|
+
def _process_stream(stream: "IO[bytes]", callback: Callable[[str], None]) -> None:
|
|
148
148
|
buffer = b""
|
|
149
|
+
while byt := stream.read(1): # Read one byte at a time
|
|
150
|
+
buffer += byt
|
|
149
151
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
buffer += byt
|
|
153
|
-
|
|
154
|
-
if byt in (b"\n", b"\r"): # Check for newline or carriage return
|
|
155
|
-
line = buffer.decode("utf-8", errors="replace")
|
|
156
|
-
callback(line)
|
|
157
|
-
buffer = b"" # Clear buffer for the next line
|
|
158
|
-
|
|
159
|
-
if buffer: # Handle any remaining data in the buffer
|
|
160
|
-
line = buffer.decode("utf-8", errors="replace")
|
|
152
|
+
if byt in (b"\n", b"\r"): # Check for newline or carriage return
|
|
153
|
+
line = buffer.decode("utf-8")
|
|
161
154
|
callback(line)
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
155
|
+
buffer = b"" # Clear buffer for next line
|
|
156
|
+
|
|
157
|
+
if buffer: # Handle any remaining data in the buffer
|
|
158
|
+
line = buffer.decode("utf-8")
|
|
159
|
+
callback(line)
|
|
167
160
|
|
|
168
161
|
|
|
169
162
|
class DatasetRowsFetcher(NodesThreadPool):
|
|
@@ -1767,13 +1760,13 @@ class Catalog:
|
|
|
1767
1760
|
recursive=recursive,
|
|
1768
1761
|
)
|
|
1769
1762
|
|
|
1770
|
-
@staticmethod
|
|
1771
1763
|
def query(
|
|
1764
|
+
self,
|
|
1772
1765
|
query_script: str,
|
|
1773
1766
|
env: Optional[Mapping[str, str]] = None,
|
|
1774
1767
|
python_executable: str = sys.executable,
|
|
1775
|
-
|
|
1776
|
-
|
|
1768
|
+
capture_output: bool = False,
|
|
1769
|
+
output_hook: Callable[[str], None] = noop,
|
|
1777
1770
|
params: Optional[dict[str, str]] = None,
|
|
1778
1771
|
job_id: Optional[str] = None,
|
|
1779
1772
|
interrupt_timeout: Optional[int] = None,
|
|
@@ -1788,18 +1781,13 @@ class Catalog:
|
|
|
1788
1781
|
},
|
|
1789
1782
|
)
|
|
1790
1783
|
popen_kwargs: dict[str, Any] = {}
|
|
1791
|
-
|
|
1792
|
-
|
|
1793
|
-
popen_kwargs = {"stdout": subprocess.PIPE}
|
|
1794
|
-
if stderr_callback is not None:
|
|
1795
|
-
popen_kwargs["stderr"] = subprocess.PIPE
|
|
1784
|
+
if capture_output:
|
|
1785
|
+
popen_kwargs = {"stdout": subprocess.PIPE, "stderr": subprocess.STDOUT}
|
|
1796
1786
|
|
|
1797
1787
|
def raise_termination_signal(sig: int, _: Any) -> NoReturn:
|
|
1798
1788
|
raise TerminationSignal(sig)
|
|
1799
1789
|
|
|
1800
|
-
|
|
1801
|
-
stderr_thread: Optional[Thread] = None
|
|
1802
|
-
|
|
1790
|
+
thread: Optional[Thread] = None
|
|
1803
1791
|
with subprocess.Popen(cmd, env=env, **popen_kwargs) as proc: # noqa: S603
|
|
1804
1792
|
logger.info("Starting process %s", proc.pid)
|
|
1805
1793
|
|
|
@@ -1813,20 +1801,10 @@ class Catalog:
|
|
|
1813
1801
|
orig_sigterm_handler = signal.getsignal(signal.SIGTERM)
|
|
1814
1802
|
signal.signal(signal.SIGTERM, raise_termination_signal)
|
|
1815
1803
|
try:
|
|
1816
|
-
if
|
|
1817
|
-
|
|
1818
|
-
|
|
1819
|
-
|
|
1820
|
-
daemon=True,
|
|
1821
|
-
)
|
|
1822
|
-
stdout_thread.start()
|
|
1823
|
-
if stderr_callback is not None:
|
|
1824
|
-
stderr_thread = Thread(
|
|
1825
|
-
target=process_output,
|
|
1826
|
-
args=(proc.stderr, stderr_callback),
|
|
1827
|
-
daemon=True,
|
|
1828
|
-
)
|
|
1829
|
-
stderr_thread.start()
|
|
1804
|
+
if capture_output:
|
|
1805
|
+
args = (proc.stdout, output_hook)
|
|
1806
|
+
thread = Thread(target=_process_stream, args=args, daemon=True)
|
|
1807
|
+
thread.start()
|
|
1830
1808
|
|
|
1831
1809
|
proc.wait()
|
|
1832
1810
|
except TerminationSignal as exc:
|
|
@@ -1844,22 +1822,8 @@ class Catalog:
|
|
|
1844
1822
|
finally:
|
|
1845
1823
|
signal.signal(signal.SIGTERM, orig_sigterm_handler)
|
|
1846
1824
|
signal.signal(signal.SIGINT, orig_sigint_handler)
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
if stdout_thread is not None:
|
|
1850
|
-
stdout_thread.join(timeout=thread_join_timeout_seconds)
|
|
1851
|
-
if stdout_thread.is_alive():
|
|
1852
|
-
logger.warning(
|
|
1853
|
-
"stdout thread is still alive after %s seconds",
|
|
1854
|
-
thread_join_timeout_seconds,
|
|
1855
|
-
)
|
|
1856
|
-
if stderr_thread is not None:
|
|
1857
|
-
stderr_thread.join(timeout=thread_join_timeout_seconds)
|
|
1858
|
-
if stderr_thread.is_alive():
|
|
1859
|
-
logger.warning(
|
|
1860
|
-
"stderr thread is still alive after %s seconds",
|
|
1861
|
-
thread_join_timeout_seconds,
|
|
1862
|
-
)
|
|
1825
|
+
if thread:
|
|
1826
|
+
thread.join() # wait for the reader thread
|
|
1863
1827
|
|
|
1864
1828
|
logger.info("Process %s exited with return code %s", proc.pid, proc.returncode)
|
|
1865
1829
|
if proc.returncode in (
|
|
@@ -35,6 +35,7 @@ if TYPE_CHECKING:
|
|
|
35
35
|
from datachain.catalog import Catalog
|
|
36
36
|
from datachain.client.fsspec import Client
|
|
37
37
|
from datachain.dataset import RowDict
|
|
38
|
+
from datachain.query.session import Session
|
|
38
39
|
|
|
39
40
|
sha256 = partial(hashlib.sha256, usedforsecurity=False)
|
|
40
41
|
|
|
@@ -252,6 +253,15 @@ class File(DataModel):
|
|
|
252
253
|
"last_modified",
|
|
253
254
|
]
|
|
254
255
|
|
|
256
|
+
# Allowed kwargs we forward to TextIOWrapper
|
|
257
|
+
_TEXT_WRAPPER_ALLOWED: ClassVar[tuple[str, ...]] = (
|
|
258
|
+
"encoding",
|
|
259
|
+
"errors",
|
|
260
|
+
"newline",
|
|
261
|
+
"line_buffering",
|
|
262
|
+
"write_through",
|
|
263
|
+
)
|
|
264
|
+
|
|
255
265
|
@staticmethod
|
|
256
266
|
def _validate_dict(
|
|
257
267
|
v: Optional[Union[str, dict, list[dict]]],
|
|
@@ -328,7 +338,6 @@ class File(DataModel):
|
|
|
328
338
|
from datachain.catalog.loader import get_catalog
|
|
329
339
|
|
|
330
340
|
catalog = get_catalog()
|
|
331
|
-
|
|
332
341
|
from datachain.client.fsspec import Client
|
|
333
342
|
|
|
334
343
|
client_cls = Client.get_implementation(path)
|
|
@@ -341,6 +350,27 @@ class File(DataModel):
|
|
|
341
350
|
file._set_stream(catalog)
|
|
342
351
|
return file
|
|
343
352
|
|
|
353
|
+
@classmethod
|
|
354
|
+
def at(cls, uri: str, session: Optional["Session"] = None) -> "Self":
|
|
355
|
+
"""Construct a File from a full URI in one call.
|
|
356
|
+
|
|
357
|
+
Example:
|
|
358
|
+
file = File.at("s3://bucket/path/to/output.png")
|
|
359
|
+
with file.open("wb") as f: ...
|
|
360
|
+
"""
|
|
361
|
+
from datachain.client.fsspec import Client
|
|
362
|
+
from datachain.query.session import Session
|
|
363
|
+
|
|
364
|
+
if session is None:
|
|
365
|
+
session = Session.get()
|
|
366
|
+
catalog = session.catalog
|
|
367
|
+
|
|
368
|
+
client_cls = Client.get_implementation(uri)
|
|
369
|
+
source, rel_path = client_cls.split_url(uri)
|
|
370
|
+
file = cls(source=client_cls.get_uri(source), path=rel_path)
|
|
371
|
+
file._set_stream(catalog)
|
|
372
|
+
return file
|
|
373
|
+
|
|
344
374
|
@classmethod
|
|
345
375
|
def _from_row(cls, row: "RowDict") -> "Self":
|
|
346
376
|
return cls(**{key: row[key] for key in cls._datachain_column_types})
|
|
@@ -354,28 +384,70 @@ class File(DataModel):
|
|
|
354
384
|
return str(PurePosixPath(self.path).parent)
|
|
355
385
|
|
|
356
386
|
@contextmanager
|
|
357
|
-
def open(self, mode:
|
|
358
|
-
"""Open the file and return a file object.
|
|
359
|
-
if self.location:
|
|
360
|
-
with VFileRegistry.open(self, self.location) as f: # type: ignore[arg-type]
|
|
361
|
-
yield f
|
|
387
|
+
def open(self, mode: str = "rb", **open_kwargs) -> Iterator[Any]:
|
|
388
|
+
"""Open the file and return a file-like object.
|
|
362
389
|
|
|
363
|
-
|
|
390
|
+
Supports both read ("rb", "r") and write modes (e.g. "wb", "w", "ab").
|
|
391
|
+
When opened in a write mode, metadata is refreshed after closing.
|
|
392
|
+
"""
|
|
393
|
+
writing = any(ch in mode for ch in "wax+")
|
|
394
|
+
if self.location and writing:
|
|
395
|
+
raise VFileError(
|
|
396
|
+
"Writing to virtual file is not supported",
|
|
397
|
+
self.source,
|
|
398
|
+
self.path,
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
if self._catalog is None:
|
|
402
|
+
raise RuntimeError("Cannot open file: catalog is not set")
|
|
403
|
+
|
|
404
|
+
client: Client = self._catalog.get_client(self.source)
|
|
405
|
+
|
|
406
|
+
if not writing:
|
|
407
|
+
if self.location:
|
|
408
|
+
with VFileRegistry.open(self, self.location) as f: # type: ignore[arg-type]
|
|
409
|
+
yield self._wrap_text(f, mode, open_kwargs)
|
|
410
|
+
return
|
|
364
411
|
if self._caching_enabled:
|
|
365
412
|
self.ensure_cached()
|
|
366
|
-
client: Client = self._catalog.get_client(self.source)
|
|
367
413
|
with client.open_object(
|
|
368
414
|
self, use_cache=self._caching_enabled, cb=self._download_cb
|
|
369
415
|
) as f:
|
|
370
|
-
yield
|
|
416
|
+
yield self._wrap_text(f, mode, open_kwargs)
|
|
417
|
+
return
|
|
418
|
+
|
|
419
|
+
# write path
|
|
420
|
+
full_path = client.get_full_path(self.get_path_normalized())
|
|
421
|
+
with client.fs.open(full_path, mode, **open_kwargs) as f:
|
|
422
|
+
yield self._wrap_text(f, mode, open_kwargs)
|
|
423
|
+
|
|
424
|
+
# refresh metadata
|
|
425
|
+
info = client.fs.info(full_path)
|
|
426
|
+
refreshed = client.info_to_file(info, self.get_path_normalized())
|
|
427
|
+
for k, v in refreshed.model_dump().items():
|
|
428
|
+
setattr(self, k, v)
|
|
429
|
+
|
|
430
|
+
def _wrap_text(self, f: Any, mode: str, open_kwargs: dict[str, Any]) -> Any:
|
|
431
|
+
"""Return stream possibly wrapped for text."""
|
|
432
|
+
if "b" in mode or isinstance(f, io.TextIOBase):
|
|
433
|
+
return f
|
|
434
|
+
filtered = {
|
|
435
|
+
k: open_kwargs[k] for k in self._TEXT_WRAPPER_ALLOWED if k in open_kwargs
|
|
436
|
+
}
|
|
437
|
+
return io.TextIOWrapper(f, **filtered)
|
|
371
438
|
|
|
372
439
|
def read_bytes(self, length: int = -1):
|
|
373
440
|
"""Returns file contents as bytes."""
|
|
374
441
|
with self.open() as stream:
|
|
375
442
|
return stream.read(length)
|
|
376
443
|
|
|
377
|
-
def read_text(self):
|
|
378
|
-
"""
|
|
444
|
+
def read_text(self, **open_kwargs):
|
|
445
|
+
"""Return file contents decoded as text.
|
|
446
|
+
|
|
447
|
+
**open_kwargs : Any
|
|
448
|
+
Extra keyword arguments forwarded to ``open(mode="r", ...)``
|
|
449
|
+
(e.g. ``encoding="utf-8"``, ``errors="ignore"``)
|
|
450
|
+
"""
|
|
379
451
|
if self.location:
|
|
380
452
|
raise VFileError(
|
|
381
453
|
"Reading text from virtual file is not supported",
|
|
@@ -383,7 +455,7 @@ class File(DataModel):
|
|
|
383
455
|
self.path,
|
|
384
456
|
)
|
|
385
457
|
|
|
386
|
-
with self.open(mode="r") as stream:
|
|
458
|
+
with self.open(mode="r", **open_kwargs) as stream:
|
|
387
459
|
return stream.read()
|
|
388
460
|
|
|
389
461
|
def read(self, length: int = -1):
|
|
@@ -701,14 +773,19 @@ class TextFile(File):
|
|
|
701
773
|
"""`DataModel` for reading text files."""
|
|
702
774
|
|
|
703
775
|
@contextmanager
|
|
704
|
-
def open(self, mode:
|
|
705
|
-
"""Open the file and return a file object
|
|
706
|
-
|
|
776
|
+
def open(self, mode: str = "r", **open_kwargs) -> Iterator[Any]:
|
|
777
|
+
"""Open the file and return a file-like object.
|
|
778
|
+
Default to text mode"""
|
|
779
|
+
with super().open(mode=mode, **open_kwargs) as stream:
|
|
707
780
|
yield stream
|
|
708
781
|
|
|
709
|
-
def read_text(self):
|
|
710
|
-
"""
|
|
711
|
-
|
|
782
|
+
def read_text(self, **open_kwargs):
|
|
783
|
+
"""Return file contents as text.
|
|
784
|
+
|
|
785
|
+
**open_kwargs : Any
|
|
786
|
+
Extra keyword arguments forwarded to ``open()`` (e.g. encoding).
|
|
787
|
+
"""
|
|
788
|
+
with self.open(**open_kwargs) as stream:
|
|
712
789
|
return stream.read()
|
|
713
790
|
|
|
714
791
|
def save(self, destination: str, client_config: Optional[dict] = None):
|
|
@@ -1,9 +1,12 @@
|
|
|
1
|
+
import io
|
|
2
|
+
|
|
1
3
|
import pytest
|
|
2
4
|
import pytz
|
|
3
5
|
|
|
4
6
|
import datachain as dc
|
|
5
7
|
from datachain.data_storage.sqlite import SQLiteWarehouse
|
|
6
8
|
from datachain.lib.file import File, FileError
|
|
9
|
+
from datachain.query import C
|
|
7
10
|
from datachain.utils import TIME_ZERO
|
|
8
11
|
|
|
9
12
|
|
|
@@ -91,3 +94,68 @@ def test_upload(cloud_test_catalog):
|
|
|
91
94
|
assert f.read() == img_bytes
|
|
92
95
|
|
|
93
96
|
client.fs.rm(dest, recursive=True)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def test_open_write_binary(cloud_test_catalog):
|
|
100
|
+
ctc = cloud_test_catalog
|
|
101
|
+
catalog = ctc.catalog
|
|
102
|
+
src_uri = ctc.src_uri
|
|
103
|
+
data = b"hello via open()"
|
|
104
|
+
file_path = f"{src_uri}/test-open-write-bytes.bin"
|
|
105
|
+
|
|
106
|
+
file = File.at(file_path, ctc.session)
|
|
107
|
+
with file.open("wb") as f:
|
|
108
|
+
f.write(data)
|
|
109
|
+
|
|
110
|
+
assert file.size == len(data)
|
|
111
|
+
assert file.read() == data
|
|
112
|
+
|
|
113
|
+
# Query storage for exactly that relative path.
|
|
114
|
+
# Metadata already refreshed by open() write path.
|
|
115
|
+
rel_path = file.path
|
|
116
|
+
chain = dc.read_storage(src_uri, session=ctc.session).filter(
|
|
117
|
+
C("file.path") == rel_path
|
|
118
|
+
)
|
|
119
|
+
results = list(chain.to_values("file"))
|
|
120
|
+
assert len(results) == 1
|
|
121
|
+
match = results[0]
|
|
122
|
+
for field_name in File.model_fields:
|
|
123
|
+
if field_name == "last_modified":
|
|
124
|
+
# Allow up to 1s difference across backends
|
|
125
|
+
# (some backends don't keep microsecond precision, we keep it simple here)
|
|
126
|
+
assert match.last_modified.timestamp() == pytest.approx(
|
|
127
|
+
file.last_modified.timestamp(), abs=1
|
|
128
|
+
)
|
|
129
|
+
else:
|
|
130
|
+
assert getattr(match, field_name) == getattr(file, field_name), (
|
|
131
|
+
f"Mismatch in field '{field_name}'"
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
catalog.get_client(src_uri).fs.rm(file_path)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def test_open_write_text(cloud_test_catalog):
|
|
138
|
+
ctc = cloud_test_catalog
|
|
139
|
+
catalog = ctc.catalog
|
|
140
|
+
src_uri = ctc.src_uri
|
|
141
|
+
file_path = f"{src_uri}/test-open-write-text.txt"
|
|
142
|
+
# Unicode content to exercise non-default (utf-16) encoding round trip
|
|
143
|
+
content = "Привет Мир\nSecond line"
|
|
144
|
+
|
|
145
|
+
file = File.at(file_path, ctc.session)
|
|
146
|
+
with file.open("w", encoding="utf-16-le") as f:
|
|
147
|
+
written_chars = f.write(content)
|
|
148
|
+
|
|
149
|
+
assert written_chars == len(content)
|
|
150
|
+
assert file.read_text(encoding="utf-16-le") == content
|
|
151
|
+
|
|
152
|
+
# Compute expected byte size using identical TextIOWrapper logic
|
|
153
|
+
buf = io.BytesIO()
|
|
154
|
+
tw = io.TextIOWrapper(buf, encoding="utf-16-le")
|
|
155
|
+
tw.write(content)
|
|
156
|
+
tw.flush()
|
|
157
|
+
expected_size = len(buf.getvalue())
|
|
158
|
+
tw.close()
|
|
159
|
+
assert file.size == expected_size
|
|
160
|
+
|
|
161
|
+
catalog.get_client(src_uri).fs.rm(file_path)
|
|
@@ -42,31 +42,12 @@ def test_args(catalog, mock_popen):
|
|
|
42
42
|
mock_popen.assert_called_once_with(["mypython", "-c", "pass"], env=expected_env)
|
|
43
43
|
|
|
44
44
|
|
|
45
|
-
def test_capture_stdout(catalog, mock_popen):
|
|
46
|
-
mock_popen.stdout = io.BytesIO(b"Hello, World!\rLorem Ipsum\nDolor Sit Amet\nconse")
|
|
47
|
-
stdout = []
|
|
48
|
-
|
|
49
|
-
catalog.query("pass", stdout_callback=stdout.append)
|
|
50
|
-
assert stdout == ["Hello, World!\r", "Lorem Ipsum\n", "Dolor Sit Amet\n", "conse"]
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
def test_capture_stderr(catalog, mock_popen):
|
|
54
|
-
mock_popen.stderr = io.BytesIO(b"Hello, World!\rLorem Ipsum\nDolor Sit Amet\nconse")
|
|
55
|
-
stderr = []
|
|
56
|
-
|
|
57
|
-
catalog.query("pass", stderr_callback=stderr.append)
|
|
58
|
-
assert stderr == ["Hello, World!\r", "Lorem Ipsum\n", "Dolor Sit Amet\n", "conse"]
|
|
59
|
-
|
|
60
|
-
|
|
61
45
|
def test_capture_output(catalog, mock_popen):
|
|
62
46
|
mock_popen.stdout = io.BytesIO(b"Hello, World!\rLorem Ipsum\nDolor Sit Amet\nconse")
|
|
63
|
-
|
|
64
|
-
stdout = []
|
|
65
|
-
stderr = []
|
|
47
|
+
lines = []
|
|
66
48
|
|
|
67
|
-
catalog.query("pass",
|
|
68
|
-
assert
|
|
69
|
-
assert stderr == ["foo\n", "bar"]
|
|
49
|
+
catalog.query("pass", capture_output=True, output_hook=lines.append)
|
|
50
|
+
assert lines == ["Hello, World!\r", "Lorem Ipsum\n", "Dolor Sit Amet\n", "conse"]
|
|
70
51
|
|
|
71
52
|
|
|
72
53
|
def test_canceled_by_user(catalog, mock_popen):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|