datachain 0.34.0__tar.gz → 0.34.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.34.0 → datachain-0.34.2}/PKG-INFO +1 -1
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/catalog/catalog.py +22 -58
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/catalog/loader.py +5 -0
- datachain-0.34.2/src/datachain/data_storage/serializer.py +119 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/data_storage/sqlite.py +13 -1
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/dc/datachain.py +11 -1
- datachain-0.34.2/src/datachain/plugins.py +30 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain.egg-info/PKG-INFO +1 -1
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain.egg-info/SOURCES.txt +1 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/conftest.py +1 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_datachain.py +20 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_datachain.py +27 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_database_engine.py +13 -11
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_metastore.py +12 -11
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_query.py +3 -22
- datachain-0.34.2/tests/unit/test_serializer.py +218 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_warehouse.py +11 -10
- datachain-0.34.0/src/datachain/data_storage/serializer.py +0 -29
- datachain-0.34.0/tests/unit/test_serializer.py +0 -92
- {datachain-0.34.0 → datachain-0.34.2}/.cruft.json +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/.gitattributes +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/.github/codecov.yaml +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/.github/dependabot.yml +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/.github/workflows/release.yml +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/.github/workflows/tests.yml +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/.gitignore +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/.pre-commit-config.yaml +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/LICENSE +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/README.rst +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/api_hooks.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/assets/datachain.svg +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/assets/webhook_dialog.png +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/assets/webhook_list.png +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/commands/auth/login.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/commands/auth/logout.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/commands/auth/team.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/commands/auth/token.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/commands/index.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/commands/job/cancel.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/commands/job/clusters.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/commands/job/logs.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/commands/job/ls.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/commands/job/run.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/contributing.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/examples.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/guide/db_migrations.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/guide/delta.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/guide/env.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/guide/index.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/guide/namespaces.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/guide/processing.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/guide/remotes.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/guide/retry.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/index.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/overrides/main.html +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/quick-start.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/references/data-types/file.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/references/data-types/index.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/references/data-types/pose.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/references/data-types/segment.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/references/datachain.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/references/func.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/references/functions/aggregate.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/references/functions/array.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/references/functions/conditional.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/references/functions/numeric.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/references/functions/path.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/references/functions/random.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/references/functions/string.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/references/functions/window.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/references/index.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/references/toolkit.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/references/torch.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/references/udf.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/studio/api/.gitkeep +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/studio/webhooks.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/templates/main.dot +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/templates/operation.dot +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/templates/responses.def +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/docs/tutorials.md +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/examples/get_started/nested_datamodel.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/examples/multimodal/audio-to-text.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/examples/multimodal/wds.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/mkdocs.yml +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/noxfile.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/pyproject.toml +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/setup.cfg +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/__main__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/asyn.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/cache.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/checkpoint.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/cli/utils.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/client/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/client/azure.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/client/gcs.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/client/hf.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/client/http.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/client/local.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/client/s3.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/config.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/data_storage/warehouse.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/dataset.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/delta.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/error.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/fs/reference.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/fs/utils.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/func/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/func/array.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/func/base.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/func/conditional.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/func/func.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/func/numeric.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/func/path.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/func/random.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/func/string.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/func/window.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/hash_utils.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/job.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/audio.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/clip.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/dc/storage_pattern.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/file.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/hf.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/image.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/listing.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/namespaces.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/projects.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/settings.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/tar.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/text.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/udf.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/utils.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/video.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/listing.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/model/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/model/bbox.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/model/pose.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/model/segment.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/model/utils.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/namespace.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/node.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/progress.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/project.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/py.typed +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/query/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/query/batch.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/query/dataset.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/query/dispatch.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/query/metrics.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/query/params.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/query/queue.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/query/schema.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/query/session.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/query/udf.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/query/utils.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/remote/studio.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/script_meta.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/semver.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/sql/postgresql_dialect.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/sql/postgresql_types.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/sql/types.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/sql/utils.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/studio.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/telemetry.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain/utils.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/data.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/examples/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/examples/test_examples.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/examples/wds_data.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/data/lena.jpg +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/functions/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/functions/test_array.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/functions/test_path.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/functions/test_random.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/functions/test_string.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/model/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_audio.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_batching.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_catalog.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_client.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_data_storage.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_datasets.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_delta.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_file.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_hf.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_image.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_listing.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_ls.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_metastore.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_metrics.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_mutate.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_pull.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_pytorch.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_query.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_read_database.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_read_dataset_remote.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_retry.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_session.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_storage_pattern.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_studio_datetime_parsing.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_to_database.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_toolkit.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_video.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/func/test_warehouse.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/scripts/feature_class.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/test_atomicity.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/test_cli_e2e.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/test_cli_studio.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/test_import_time.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/test_query_e2e.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/test_telemetry.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_audio.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_checkpoints.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_namespace.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_partition_by.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_settings.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_storage_pattern.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/model/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_asyn.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_cache.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_catalog.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_cli_datasets.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_client.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_client_http.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_config.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_datachain_hash.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_dataset.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_func.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_hash_utils.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_listing.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_query_params.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_query_steps_hash.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_semver.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_session.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/unit/test_utils.py +0 -0
- {datachain-0.34.0 → datachain-0.34.2}/tests/utils.py +0 -0
|
@@ -144,26 +144,19 @@ def shutdown_process(
|
|
|
144
144
|
return proc.wait()
|
|
145
145
|
|
|
146
146
|
|
|
147
|
-
def
|
|
147
|
+
def _process_stream(stream: "IO[bytes]", callback: Callable[[str], None]) -> None:
|
|
148
148
|
buffer = b""
|
|
149
|
+
while byt := stream.read(1): # Read one byte at a time
|
|
150
|
+
buffer += byt
|
|
149
151
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
buffer += byt
|
|
153
|
-
|
|
154
|
-
if byt in (b"\n", b"\r"): # Check for newline or carriage return
|
|
155
|
-
line = buffer.decode("utf-8", errors="replace")
|
|
156
|
-
callback(line)
|
|
157
|
-
buffer = b"" # Clear buffer for the next line
|
|
158
|
-
|
|
159
|
-
if buffer: # Handle any remaining data in the buffer
|
|
160
|
-
line = buffer.decode("utf-8", errors="replace")
|
|
152
|
+
if byt in (b"\n", b"\r"): # Check for newline or carriage return
|
|
153
|
+
line = buffer.decode("utf-8")
|
|
161
154
|
callback(line)
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
155
|
+
buffer = b"" # Clear buffer for next line
|
|
156
|
+
|
|
157
|
+
if buffer: # Handle any remaining data in the buffer
|
|
158
|
+
line = buffer.decode("utf-8")
|
|
159
|
+
callback(line)
|
|
167
160
|
|
|
168
161
|
|
|
169
162
|
class DatasetRowsFetcher(NodesThreadPool):
|
|
@@ -1767,13 +1760,13 @@ class Catalog:
|
|
|
1767
1760
|
recursive=recursive,
|
|
1768
1761
|
)
|
|
1769
1762
|
|
|
1770
|
-
@staticmethod
|
|
1771
1763
|
def query(
|
|
1764
|
+
self,
|
|
1772
1765
|
query_script: str,
|
|
1773
1766
|
env: Optional[Mapping[str, str]] = None,
|
|
1774
1767
|
python_executable: str = sys.executable,
|
|
1775
|
-
|
|
1776
|
-
|
|
1768
|
+
capture_output: bool = False,
|
|
1769
|
+
output_hook: Callable[[str], None] = noop,
|
|
1777
1770
|
params: Optional[dict[str, str]] = None,
|
|
1778
1771
|
job_id: Optional[str] = None,
|
|
1779
1772
|
interrupt_timeout: Optional[int] = None,
|
|
@@ -1788,18 +1781,13 @@ class Catalog:
|
|
|
1788
1781
|
},
|
|
1789
1782
|
)
|
|
1790
1783
|
popen_kwargs: dict[str, Any] = {}
|
|
1791
|
-
|
|
1792
|
-
|
|
1793
|
-
popen_kwargs = {"stdout": subprocess.PIPE}
|
|
1794
|
-
if stderr_callback is not None:
|
|
1795
|
-
popen_kwargs["stderr"] = subprocess.PIPE
|
|
1784
|
+
if capture_output:
|
|
1785
|
+
popen_kwargs = {"stdout": subprocess.PIPE, "stderr": subprocess.STDOUT}
|
|
1796
1786
|
|
|
1797
1787
|
def raise_termination_signal(sig: int, _: Any) -> NoReturn:
|
|
1798
1788
|
raise TerminationSignal(sig)
|
|
1799
1789
|
|
|
1800
|
-
|
|
1801
|
-
stderr_thread: Optional[Thread] = None
|
|
1802
|
-
|
|
1790
|
+
thread: Optional[Thread] = None
|
|
1803
1791
|
with subprocess.Popen(cmd, env=env, **popen_kwargs) as proc: # noqa: S603
|
|
1804
1792
|
logger.info("Starting process %s", proc.pid)
|
|
1805
1793
|
|
|
@@ -1813,20 +1801,10 @@ class Catalog:
|
|
|
1813
1801
|
orig_sigterm_handler = signal.getsignal(signal.SIGTERM)
|
|
1814
1802
|
signal.signal(signal.SIGTERM, raise_termination_signal)
|
|
1815
1803
|
try:
|
|
1816
|
-
if
|
|
1817
|
-
|
|
1818
|
-
|
|
1819
|
-
|
|
1820
|
-
daemon=True,
|
|
1821
|
-
)
|
|
1822
|
-
stdout_thread.start()
|
|
1823
|
-
if stderr_callback is not None:
|
|
1824
|
-
stderr_thread = Thread(
|
|
1825
|
-
target=process_output,
|
|
1826
|
-
args=(proc.stderr, stderr_callback),
|
|
1827
|
-
daemon=True,
|
|
1828
|
-
)
|
|
1829
|
-
stderr_thread.start()
|
|
1804
|
+
if capture_output:
|
|
1805
|
+
args = (proc.stdout, output_hook)
|
|
1806
|
+
thread = Thread(target=_process_stream, args=args, daemon=True)
|
|
1807
|
+
thread.start()
|
|
1830
1808
|
|
|
1831
1809
|
proc.wait()
|
|
1832
1810
|
except TerminationSignal as exc:
|
|
@@ -1844,22 +1822,8 @@ class Catalog:
|
|
|
1844
1822
|
finally:
|
|
1845
1823
|
signal.signal(signal.SIGTERM, orig_sigterm_handler)
|
|
1846
1824
|
signal.signal(signal.SIGINT, orig_sigint_handler)
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
if stdout_thread is not None:
|
|
1850
|
-
stdout_thread.join(timeout=thread_join_timeout_seconds)
|
|
1851
|
-
if stdout_thread.is_alive():
|
|
1852
|
-
logger.warning(
|
|
1853
|
-
"stdout thread is still alive after %s seconds",
|
|
1854
|
-
thread_join_timeout_seconds,
|
|
1855
|
-
)
|
|
1856
|
-
if stderr_thread is not None:
|
|
1857
|
-
stderr_thread.join(timeout=thread_join_timeout_seconds)
|
|
1858
|
-
if stderr_thread.is_alive():
|
|
1859
|
-
logger.warning(
|
|
1860
|
-
"stderr thread is still alive after %s seconds",
|
|
1861
|
-
thread_join_timeout_seconds,
|
|
1862
|
-
)
|
|
1825
|
+
if thread:
|
|
1826
|
+
thread.join() # wait for the reader thread
|
|
1863
1827
|
|
|
1864
1828
|
logger.info("Process %s exited with return code %s", proc.pid, proc.returncode)
|
|
1865
1829
|
if proc.returncode in (
|
|
@@ -3,6 +3,7 @@ import sys
|
|
|
3
3
|
from importlib import import_module
|
|
4
4
|
from typing import TYPE_CHECKING, Any, Optional
|
|
5
5
|
|
|
6
|
+
from datachain.plugins import ensure_plugins_loaded
|
|
6
7
|
from datachain.utils import get_envs_by_prefix
|
|
7
8
|
|
|
8
9
|
if TYPE_CHECKING:
|
|
@@ -24,6 +25,8 @@ IN_MEMORY_ERROR_MESSAGE = "In-memory is only supported on SQLite"
|
|
|
24
25
|
|
|
25
26
|
|
|
26
27
|
def get_metastore(in_memory: bool = False) -> "AbstractMetastore":
|
|
28
|
+
ensure_plugins_loaded()
|
|
29
|
+
|
|
27
30
|
from datachain.data_storage import AbstractMetastore
|
|
28
31
|
from datachain.data_storage.serializer import deserialize
|
|
29
32
|
|
|
@@ -64,6 +67,8 @@ def get_metastore(in_memory: bool = False) -> "AbstractMetastore":
|
|
|
64
67
|
|
|
65
68
|
|
|
66
69
|
def get_warehouse(in_memory: bool = False) -> "AbstractWarehouse":
|
|
70
|
+
ensure_plugins_loaded()
|
|
71
|
+
|
|
67
72
|
from datachain.data_storage import AbstractWarehouse
|
|
68
73
|
from datachain.data_storage.serializer import deserialize
|
|
69
74
|
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import json
|
|
3
|
+
from abc import abstractmethod
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from typing import Any, ClassVar
|
|
6
|
+
|
|
7
|
+
from datachain.plugins import ensure_plugins_loaded
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CallableRegistry:
|
|
11
|
+
_registry: ClassVar[dict[str, Callable]] = {}
|
|
12
|
+
|
|
13
|
+
@classmethod
|
|
14
|
+
def register(cls, callable_obj: Callable, name: str) -> str:
|
|
15
|
+
cls._registry[name] = callable_obj
|
|
16
|
+
return name
|
|
17
|
+
|
|
18
|
+
@classmethod
|
|
19
|
+
def get(cls, name: str) -> Callable:
|
|
20
|
+
return cls._registry[name]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Serializable:
|
|
24
|
+
@classmethod
|
|
25
|
+
@abstractmethod
|
|
26
|
+
def serialize_callable_name(cls) -> str:
|
|
27
|
+
"""Return the registered name used for this class' factory callable."""
|
|
28
|
+
|
|
29
|
+
@abstractmethod
|
|
30
|
+
def clone_params(self) -> tuple[Callable[..., Any], list[Any], dict[str, Any]]:
|
|
31
|
+
"""Return (callable, args, kwargs) necessary to recreate this object."""
|
|
32
|
+
|
|
33
|
+
def _prepare(self, params: tuple) -> dict:
|
|
34
|
+
callable, args, kwargs = params
|
|
35
|
+
callable_name = callable.__self__.serialize_callable_name()
|
|
36
|
+
return {
|
|
37
|
+
"callable": callable_name,
|
|
38
|
+
"args": args,
|
|
39
|
+
"kwargs": {
|
|
40
|
+
k: self._prepare(v) if isinstance(v, tuple) else v
|
|
41
|
+
for k, v in kwargs.items()
|
|
42
|
+
},
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
def serialize(self) -> str:
|
|
46
|
+
"""Return a base64-encoded JSON string with registered callable + params."""
|
|
47
|
+
_ensure_default_callables_registered()
|
|
48
|
+
data = self.clone_params()
|
|
49
|
+
return base64.b64encode(json.dumps(self._prepare(data)).encode()).decode()
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def deserialize(s: str) -> Serializable:
|
|
53
|
+
"""Deserialize from base64-encoded JSON using only registered callables.
|
|
54
|
+
|
|
55
|
+
Nested serialized objects are instantiated automatically except for those
|
|
56
|
+
passed via clone parameter tuples (keys ending with ``_clone_params``),
|
|
57
|
+
which must remain as (callable, args, kwargs) for later factory usage.
|
|
58
|
+
"""
|
|
59
|
+
ensure_plugins_loaded()
|
|
60
|
+
_ensure_default_callables_registered()
|
|
61
|
+
decoded = base64.b64decode(s.encode())
|
|
62
|
+
data = json.loads(decoded.decode())
|
|
63
|
+
|
|
64
|
+
def _is_serialized(obj: Any) -> bool:
|
|
65
|
+
return isinstance(obj, dict) and {"callable", "args", "kwargs"}.issubset(
|
|
66
|
+
obj.keys()
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
def _reconstruct(obj: Any, nested: bool = False) -> Any:
|
|
70
|
+
if not _is_serialized(obj):
|
|
71
|
+
return obj
|
|
72
|
+
callable_name: str = obj["callable"]
|
|
73
|
+
args: list[Any] = obj["args"]
|
|
74
|
+
kwargs: dict[str, Any] = obj["kwargs"]
|
|
75
|
+
# Recurse only inside kwargs because serialize() only nests through kwargs
|
|
76
|
+
for k, v in list(kwargs.items()):
|
|
77
|
+
if _is_serialized(v):
|
|
78
|
+
kwargs[k] = _reconstruct(v, True)
|
|
79
|
+
callable_obj = CallableRegistry.get(callable_name)
|
|
80
|
+
if nested:
|
|
81
|
+
return (callable_obj, args, kwargs)
|
|
82
|
+
# Otherwise instantiate
|
|
83
|
+
return callable_obj(*args, **kwargs)
|
|
84
|
+
|
|
85
|
+
if not _is_serialized(data):
|
|
86
|
+
raise ValueError("Invalid serialized data format")
|
|
87
|
+
return _reconstruct(data, False)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class _DefaultsState:
|
|
91
|
+
registered = False
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _ensure_default_callables_registered() -> None:
|
|
95
|
+
if _DefaultsState.registered:
|
|
96
|
+
return
|
|
97
|
+
|
|
98
|
+
from datachain.data_storage.sqlite import (
|
|
99
|
+
SQLiteDatabaseEngine,
|
|
100
|
+
SQLiteMetastore,
|
|
101
|
+
SQLiteWarehouse,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# Register (idempotent by name overwrite is fine) using class-level
|
|
105
|
+
# serialization names to avoid hard-coded literals here.
|
|
106
|
+
CallableRegistry.register(
|
|
107
|
+
SQLiteDatabaseEngine.from_db_file,
|
|
108
|
+
SQLiteDatabaseEngine.serialize_callable_name(),
|
|
109
|
+
)
|
|
110
|
+
CallableRegistry.register(
|
|
111
|
+
SQLiteMetastore.init_after_clone,
|
|
112
|
+
SQLiteMetastore.serialize_callable_name(),
|
|
113
|
+
)
|
|
114
|
+
CallableRegistry.register(
|
|
115
|
+
SQLiteWarehouse.init_after_clone,
|
|
116
|
+
SQLiteWarehouse.serialize_callable_name(),
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
_DefaultsState.registered = True
|
|
@@ -201,10 +201,14 @@ class SQLiteDatabaseEngine(DatabaseEngine):
|
|
|
201
201
|
"""
|
|
202
202
|
return (
|
|
203
203
|
SQLiteDatabaseEngine.from_db_file,
|
|
204
|
-
[self.db_file],
|
|
204
|
+
[str(self.db_file)],
|
|
205
205
|
{},
|
|
206
206
|
)
|
|
207
207
|
|
|
208
|
+
@classmethod
|
|
209
|
+
def serialize_callable_name(cls) -> str:
|
|
210
|
+
return "sqlite.from_db_file"
|
|
211
|
+
|
|
208
212
|
def _reconnect(self) -> None:
|
|
209
213
|
if not self.is_closed:
|
|
210
214
|
raise RuntimeError("Cannot reconnect on still-open DB!")
|
|
@@ -403,6 +407,10 @@ class SQLiteMetastore(AbstractDBMetastore):
|
|
|
403
407
|
},
|
|
404
408
|
)
|
|
405
409
|
|
|
410
|
+
@classmethod
|
|
411
|
+
def serialize_callable_name(cls) -> str:
|
|
412
|
+
return "sqlite.metastore.init_after_clone"
|
|
413
|
+
|
|
406
414
|
@classmethod
|
|
407
415
|
def init_after_clone(
|
|
408
416
|
cls,
|
|
@@ -610,6 +618,10 @@ class SQLiteWarehouse(AbstractWarehouse):
|
|
|
610
618
|
{"db_clone_params": self.db.clone_params()},
|
|
611
619
|
)
|
|
612
620
|
|
|
621
|
+
@classmethod
|
|
622
|
+
def serialize_callable_name(cls) -> str:
|
|
623
|
+
return "sqlite.warehouse.init_after_clone"
|
|
624
|
+
|
|
613
625
|
@classmethod
|
|
614
626
|
def init_after_clone(
|
|
615
627
|
cls,
|
|
@@ -1963,12 +1963,15 @@ class DataChain:
|
|
|
1963
1963
|
self,
|
|
1964
1964
|
flatten: bool = False,
|
|
1965
1965
|
include_hidden: bool = True,
|
|
1966
|
+
as_object: bool = False,
|
|
1966
1967
|
) -> "pd.DataFrame":
|
|
1967
1968
|
"""Return a pandas DataFrame from the chain.
|
|
1968
1969
|
|
|
1969
1970
|
Parameters:
|
|
1970
1971
|
flatten: Whether to use a multiindex or flatten column names.
|
|
1971
1972
|
include_hidden: Whether to include hidden columns.
|
|
1973
|
+
as_object: Whether to emit a dataframe backed by Python objects
|
|
1974
|
+
rather than pandas-inferred dtypes.
|
|
1972
1975
|
|
|
1973
1976
|
Returns:
|
|
1974
1977
|
pd.DataFrame: A pandas DataFrame representation of the chain.
|
|
@@ -1984,6 +1987,9 @@ class DataChain:
|
|
|
1984
1987
|
columns = pd.MultiIndex.from_tuples(map(tuple, headers))
|
|
1985
1988
|
|
|
1986
1989
|
results = self.results(include_hidden=include_hidden)
|
|
1990
|
+
if as_object:
|
|
1991
|
+
df = pd.DataFrame(results, columns=columns, dtype=object)
|
|
1992
|
+
return df.where(pd.notna(df), None)
|
|
1987
1993
|
return pd.DataFrame.from_records(results, columns=columns)
|
|
1988
1994
|
|
|
1989
1995
|
def show(
|
|
@@ -2006,7 +2012,11 @@ class DataChain:
|
|
|
2006
2012
|
import pandas as pd
|
|
2007
2013
|
|
|
2008
2014
|
dc = self.limit(limit) if limit > 0 else self # type: ignore[misc]
|
|
2009
|
-
df = dc.to_pandas(
|
|
2015
|
+
df = dc.to_pandas(
|
|
2016
|
+
flatten,
|
|
2017
|
+
include_hidden=include_hidden,
|
|
2018
|
+
as_object=True,
|
|
2019
|
+
)
|
|
2010
2020
|
|
|
2011
2021
|
if df.empty:
|
|
2012
2022
|
print("Empty result")
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Plugin loader for DataChain callables.
|
|
2
|
+
|
|
3
|
+
Discovers and invokes entry points in the group "datachain.callables" once
|
|
4
|
+
per process. This enables external packages (e.g., Studio) to register
|
|
5
|
+
their callables with the serializer registry without explicit imports.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from importlib import metadata as importlib_metadata
|
|
9
|
+
|
|
10
|
+
_plugins_loaded = False
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def ensure_plugins_loaded() -> None:
|
|
14
|
+
global _plugins_loaded # noqa: PLW0603
|
|
15
|
+
if _plugins_loaded:
|
|
16
|
+
return
|
|
17
|
+
|
|
18
|
+
# Compatible across importlib.metadata versions
|
|
19
|
+
eps_obj = importlib_metadata.entry_points()
|
|
20
|
+
if hasattr(eps_obj, "select"):
|
|
21
|
+
eps_list = eps_obj.select(group="datachain.callables")
|
|
22
|
+
else:
|
|
23
|
+
# Compatibility for older versions of importlib_metadata, Python 3.9
|
|
24
|
+
eps_list = eps_obj.get("datachain.callables", []) # type: ignore[attr-defined]
|
|
25
|
+
|
|
26
|
+
for ep in eps_list:
|
|
27
|
+
func = ep.load()
|
|
28
|
+
func()
|
|
29
|
+
|
|
30
|
+
_plugins_loaded = True
|
|
@@ -126,6 +126,7 @@ def clean_environment(
|
|
|
126
126
|
working_dir = str(tmp_path_factory.mktemp("default_working_dir"))
|
|
127
127
|
monkeypatch_session.chdir(working_dir)
|
|
128
128
|
monkeypatch_session.delenv(DataChainDir.ENV_VAR, raising=False)
|
|
129
|
+
monkeypatch_session.delenv(DataChainDir.ENV_VAR_DATACHAIN_ROOT, raising=False)
|
|
129
130
|
|
|
130
131
|
|
|
131
132
|
@pytest.fixture
|
|
@@ -35,6 +35,7 @@ from tests.utils import (
|
|
|
35
35
|
TARRED_TREE,
|
|
36
36
|
df_equal,
|
|
37
37
|
images_equal,
|
|
38
|
+
skip_if_not_sqlite,
|
|
38
39
|
sorted_dicts,
|
|
39
40
|
text_embedding,
|
|
40
41
|
)
|
|
@@ -559,6 +560,25 @@ def test_show(capsys, test_session):
|
|
|
559
560
|
assert f"{i} {first_name[i]}" in normalized_output
|
|
560
561
|
|
|
561
562
|
|
|
563
|
+
@skip_if_not_sqlite
|
|
564
|
+
def test_show_preserves_none(capsys, test_session):
|
|
565
|
+
chain = dc.read_values(
|
|
566
|
+
score=[1, None],
|
|
567
|
+
ts=[
|
|
568
|
+
datetime(2020, 1, 1, tzinfo=timezone.utc),
|
|
569
|
+
None,
|
|
570
|
+
],
|
|
571
|
+
session=test_session,
|
|
572
|
+
)
|
|
573
|
+
|
|
574
|
+
chain.show()
|
|
575
|
+
|
|
576
|
+
captured = capsys.readouterr().out
|
|
577
|
+
assert "NaN" not in captured
|
|
578
|
+
assert "NaT" not in captured
|
|
579
|
+
assert captured.count("None") >= 2
|
|
580
|
+
|
|
581
|
+
|
|
562
582
|
def test_show_without_temp_datasets(capsys, test_session):
|
|
563
583
|
dc.read_values(
|
|
564
584
|
key=[1, 2, 3, 4], session=test_session
|
|
@@ -3,6 +3,7 @@ import json
|
|
|
3
3
|
import math
|
|
4
4
|
import os
|
|
5
5
|
import re
|
|
6
|
+
from collections import Counter
|
|
6
7
|
from collections.abc import Generator, Iterator
|
|
7
8
|
from unittest.mock import ANY, patch
|
|
8
9
|
|
|
@@ -564,6 +565,32 @@ def test_from_features_simple_types_in_memory():
|
|
|
564
565
|
assert df["odds"].tolist() == values
|
|
565
566
|
|
|
566
567
|
|
|
568
|
+
@skip_if_not_sqlite
|
|
569
|
+
def test_to_pandas_as_object_preserves_none(test_session):
|
|
570
|
+
timestamp = datetime.datetime(2020, 1, 1, tzinfo=datetime.timezone.utc)
|
|
571
|
+
chain = dc.read_values(
|
|
572
|
+
id=[1, None],
|
|
573
|
+
value=[3.14, None],
|
|
574
|
+
ts=[timestamp, None],
|
|
575
|
+
session=test_session,
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
df_default = chain.to_pandas()
|
|
579
|
+
assert df_default["id"].dtype != object
|
|
580
|
+
assert df_default["value"].dtype != object
|
|
581
|
+
assert df_default["id"].isna().sum() == 1
|
|
582
|
+
assert df_default["value"].isna().sum() == 1
|
|
583
|
+
assert pd.isna(df_default.loc[df_default["id"].isna(), "ts"]).all()
|
|
584
|
+
|
|
585
|
+
df_object = chain.to_pandas(as_object=True)
|
|
586
|
+
assert df_object["id"].dtype == object
|
|
587
|
+
assert df_object["value"].dtype == object
|
|
588
|
+
assert df_object["ts"].dtype == object
|
|
589
|
+
assert Counter(df_object["id"].tolist()) == Counter([1, None])
|
|
590
|
+
assert Counter(df_object["value"].tolist()) == Counter([3.14, None])
|
|
591
|
+
assert Counter(df_object["ts"].tolist()) == Counter([timestamp, None])
|
|
592
|
+
|
|
593
|
+
|
|
567
594
|
def test_from_features_more_simple_types(test_session):
|
|
568
595
|
ds = dc.read_values(
|
|
569
596
|
t1=features,
|
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
import base64
|
|
2
|
+
import json
|
|
2
3
|
import os
|
|
3
|
-
import pickle
|
|
4
4
|
|
|
5
5
|
import pytest
|
|
6
6
|
from sqlalchemy import Column, Integer, Table
|
|
7
7
|
|
|
8
8
|
from datachain.data_storage.serializer import deserialize
|
|
9
|
-
from datachain.data_storage.sqlite import
|
|
9
|
+
from datachain.data_storage.sqlite import (
|
|
10
|
+
SQLiteDatabaseEngine,
|
|
11
|
+
get_db_file_in_memory,
|
|
12
|
+
)
|
|
10
13
|
from tests.utils import skip_if_not_sqlite
|
|
11
14
|
|
|
12
15
|
|
|
@@ -24,6 +27,7 @@ def test_init_clone(tmp_dir, db_file, expected_db_file):
|
|
|
24
27
|
expected_db_file = os.fspath(tmp_dir / expected_db_file)
|
|
25
28
|
|
|
26
29
|
with SQLiteDatabaseEngine.from_db_file(db_file) as db:
|
|
30
|
+
assert isinstance(db, SQLiteDatabaseEngine)
|
|
27
31
|
assert db.db_file == expected_db_file
|
|
28
32
|
|
|
29
33
|
# Test clone
|
|
@@ -53,17 +57,15 @@ def test_get_db_file_in_memory(db_file, in_memory, expected):
|
|
|
53
57
|
|
|
54
58
|
|
|
55
59
|
def test_serialize(sqlite_db):
|
|
56
|
-
#
|
|
60
|
+
# JSON serialization format
|
|
57
61
|
serialized = sqlite_db.serialize()
|
|
58
62
|
assert serialized
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
assert
|
|
63
|
-
assert
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
# Test deserialization
|
|
63
|
+
raw = base64.b64decode(serialized.encode())
|
|
64
|
+
data = json.loads(raw.decode())
|
|
65
|
+
assert data["callable"] == "sqlite.from_db_file"
|
|
66
|
+
assert data["args"] == [":memory:"]
|
|
67
|
+
assert data["kwargs"] == {}
|
|
68
|
+
|
|
67
69
|
obj3 = deserialize(serialized)
|
|
68
70
|
assert isinstance(obj3, SQLiteDatabaseEngine)
|
|
69
71
|
assert obj3.db_file == ":memory:"
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import base64
|
|
2
|
-
import
|
|
2
|
+
import json
|
|
3
3
|
|
|
4
4
|
import pytest
|
|
5
5
|
|
|
@@ -24,18 +24,19 @@ def test_sqlite_metastore(sqlite_db):
|
|
|
24
24
|
assert obj2.db.db_file == sqlite_db.db_file
|
|
25
25
|
assert obj2.clone_params() == obj.clone_params()
|
|
26
26
|
|
|
27
|
-
# Test serialization
|
|
27
|
+
# Test serialization JSON format
|
|
28
28
|
serialized = obj.serialize()
|
|
29
29
|
assert serialized
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
assert
|
|
34
|
-
assert
|
|
35
|
-
|
|
36
|
-
assert
|
|
37
|
-
|
|
38
|
-
|
|
30
|
+
raw = base64.b64decode(serialized.encode())
|
|
31
|
+
data = json.loads(raw.decode())
|
|
32
|
+
assert data["callable"] == "sqlite.metastore.init_after_clone"
|
|
33
|
+
assert data["args"] == []
|
|
34
|
+
assert data["kwargs"]["uri"] == uri
|
|
35
|
+
nested = data["kwargs"]["db_clone_params"]
|
|
36
|
+
assert nested["callable"] == "sqlite.from_db_file"
|
|
37
|
+
assert nested["args"] == [":memory:"]
|
|
38
|
+
assert nested["kwargs"] == {}
|
|
39
|
+
|
|
39
40
|
obj3 = deserialize(serialized)
|
|
40
41
|
assert isinstance(obj3, SQLiteMetastore)
|
|
41
42
|
assert obj3.uri == uri
|
|
@@ -42,31 +42,12 @@ def test_args(catalog, mock_popen):
|
|
|
42
42
|
mock_popen.assert_called_once_with(["mypython", "-c", "pass"], env=expected_env)
|
|
43
43
|
|
|
44
44
|
|
|
45
|
-
def test_capture_stdout(catalog, mock_popen):
|
|
46
|
-
mock_popen.stdout = io.BytesIO(b"Hello, World!\rLorem Ipsum\nDolor Sit Amet\nconse")
|
|
47
|
-
stdout = []
|
|
48
|
-
|
|
49
|
-
catalog.query("pass", stdout_callback=stdout.append)
|
|
50
|
-
assert stdout == ["Hello, World!\r", "Lorem Ipsum\n", "Dolor Sit Amet\n", "conse"]
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
def test_capture_stderr(catalog, mock_popen):
|
|
54
|
-
mock_popen.stderr = io.BytesIO(b"Hello, World!\rLorem Ipsum\nDolor Sit Amet\nconse")
|
|
55
|
-
stderr = []
|
|
56
|
-
|
|
57
|
-
catalog.query("pass", stderr_callback=stderr.append)
|
|
58
|
-
assert stderr == ["Hello, World!\r", "Lorem Ipsum\n", "Dolor Sit Amet\n", "conse"]
|
|
59
|
-
|
|
60
|
-
|
|
61
45
|
def test_capture_output(catalog, mock_popen):
|
|
62
46
|
mock_popen.stdout = io.BytesIO(b"Hello, World!\rLorem Ipsum\nDolor Sit Amet\nconse")
|
|
63
|
-
|
|
64
|
-
stdout = []
|
|
65
|
-
stderr = []
|
|
47
|
+
lines = []
|
|
66
48
|
|
|
67
|
-
catalog.query("pass",
|
|
68
|
-
assert
|
|
69
|
-
assert stderr == ["foo\n", "bar"]
|
|
49
|
+
catalog.query("pass", capture_output=True, output_hook=lines.append)
|
|
50
|
+
assert lines == ["Hello, World!\r", "Lorem Ipsum\n", "Dolor Sit Amet\n", "conse"]
|
|
70
51
|
|
|
71
52
|
|
|
72
53
|
def test_canceled_by_user(catalog, mock_popen):
|