datachain 0.35.0__tar.gz → 0.35.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.35.0 → datachain-0.35.2}/PKG-INFO +1 -1
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/catalog/catalog.py +58 -22
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/data_storage/warehouse.py +34 -18
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/query/batch.py +1 -2
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/query/dataset.py +12 -22
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/query/dispatch.py +25 -35
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain.egg-info/PKG-INFO +1 -1
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain.egg-info/SOURCES.txt +1 -2
- {datachain-0.35.0 → datachain-0.35.2}/tests/conftest.py +46 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_datachain.py +1 -373
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_udf.py +10 -0
- datachain-0.35.2/tests/unit/test_batching.py +229 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_datachain_hash.py +1 -1
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_hash_utils.py +6 -6
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_query.py +22 -3
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_query_steps_hash.py +4 -4
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_utils.py +5 -7
- datachain-0.35.2/tests/unit/test_warehouse.py +173 -0
- datachain-0.35.0/src/datachain/query/utils.py +0 -38
- datachain-0.35.0/tests/func/test_batching.py +0 -242
- datachain-0.35.0/tests/unit/test_warehouse.py +0 -43
- {datachain-0.35.0 → datachain-0.35.2}/.cruft.json +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/.gitattributes +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/.github/codecov.yaml +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/.github/dependabot.yml +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/.github/workflows/release.yml +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/.github/workflows/tests.yml +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/.gitignore +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/.pre-commit-config.yaml +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/LICENSE +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/README.rst +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/api_hooks.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/assets/datachain.svg +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/assets/webhook_dialog.png +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/assets/webhook_list.png +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/commands/auth/login.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/commands/auth/logout.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/commands/auth/team.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/commands/auth/token.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/commands/index.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/commands/job/cancel.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/commands/job/clusters.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/commands/job/logs.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/commands/job/ls.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/commands/job/run.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/contributing.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/examples.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/guide/db_migrations.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/guide/delta.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/guide/env.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/guide/index.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/guide/namespaces.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/guide/processing.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/guide/remotes.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/guide/retry.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/index.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/overrides/main.html +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/quick-start.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/references/data-types/file.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/references/data-types/index.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/references/data-types/pose.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/references/data-types/segment.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/references/datachain.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/references/func.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/references/functions/aggregate.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/references/functions/array.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/references/functions/conditional.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/references/functions/numeric.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/references/functions/path.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/references/functions/random.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/references/functions/string.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/references/functions/window.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/references/index.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/references/toolkit.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/references/torch.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/references/udf.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/studio/api/.gitkeep +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/studio/webhooks.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/templates/main.dot +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/templates/operation.dot +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/templates/responses.def +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/docs/tutorials.md +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/examples/get_started/nested_datamodel.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/examples/multimodal/audio-to-text.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/examples/multimodal/wds.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/mkdocs.yml +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/noxfile.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/pyproject.toml +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/setup.cfg +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/__main__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/asyn.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/cache.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/checkpoint.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/cli/utils.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/client/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/client/azure.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/client/gcs.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/client/hf.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/client/http.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/client/local.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/client/s3.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/config.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/dataset.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/delta.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/error.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/fs/reference.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/fs/utils.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/func/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/func/array.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/func/base.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/func/conditional.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/func/func.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/func/numeric.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/func/path.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/func/random.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/func/string.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/func/window.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/hash_utils.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/job.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/audio.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/clip.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/dc/database.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/dc/datachain.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/dc/records.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/dc/storage_pattern.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/file.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/hf.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/image.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/listing.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/namespaces.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/projects.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/settings.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/tar.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/text.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/udf.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/utils.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/video.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/listing.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/model/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/model/bbox.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/model/pose.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/model/segment.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/model/utils.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/namespace.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/node.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/plugins.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/progress.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/project.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/py.typed +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/query/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/query/metrics.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/query/params.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/query/queue.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/query/schema.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/query/session.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/query/udf.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/remote/studio.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/script_meta.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/semver.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/sql/postgresql_dialect.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/sql/postgresql_types.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/sql/types.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/sql/utils.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/studio.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/telemetry.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain/utils.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/data.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/examples/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/examples/test_examples.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/examples/wds_data.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/data/lena.jpg +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/functions/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/functions/test_array.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/functions/test_path.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/functions/test_random.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/functions/test_string.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/model/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_audio.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_catalog.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_client.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_data_storage.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_datasets.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_delta.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_file.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_hf.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_image.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_listing.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_ls.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_metastore.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_metrics.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_mutate.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_pull.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_pytorch.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_query.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_read_database.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_read_dataset_remote.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_retry.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_session.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_storage_pattern.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_studio_datetime_parsing.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_to_database.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_toolkit.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_union.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_video.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/func/test_warehouse.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/scripts/feature_class.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/test_atomicity.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/test_cli_e2e.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/test_cli_studio.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/test_import_time.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/test_query_e2e.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/test_telemetry.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_audio.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_checkpoints.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_datachain.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_namespace.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_partition_by.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_settings.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_storage_pattern.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/model/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_asyn.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_cache.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_catalog.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_cli_datasets.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_client.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_client_http.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_config.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_dataset.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_func.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_listing.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_metastore.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_query_params.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_semver.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_serializer.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/unit/test_session.py +0 -0
- {datachain-0.35.0 → datachain-0.35.2}/tests/utils.py +0 -0
|
@@ -133,19 +133,26 @@ def shutdown_process(
|
|
|
133
133
|
return proc.wait()
|
|
134
134
|
|
|
135
135
|
|
|
136
|
-
def
|
|
136
|
+
def process_output(stream: IO[bytes], callback: Callable[[str], None]) -> None:
|
|
137
137
|
buffer = b""
|
|
138
|
-
while byt := stream.read(1): # Read one byte at a time
|
|
139
|
-
buffer += byt
|
|
140
138
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
buffer = b"" # Clear buffer for next line
|
|
139
|
+
try:
|
|
140
|
+
while byt := stream.read(1): # Read one byte at a time
|
|
141
|
+
buffer += byt
|
|
145
142
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
143
|
+
if byt in (b"\n", b"\r"): # Check for newline or carriage return
|
|
144
|
+
line = buffer.decode("utf-8", errors="replace")
|
|
145
|
+
callback(line)
|
|
146
|
+
buffer = b"" # Clear buffer for the next line
|
|
147
|
+
|
|
148
|
+
if buffer: # Handle any remaining data in the buffer
|
|
149
|
+
line = buffer.decode("utf-8", errors="replace")
|
|
150
|
+
callback(line)
|
|
151
|
+
finally:
|
|
152
|
+
try:
|
|
153
|
+
stream.close() # Ensure output is closed
|
|
154
|
+
except Exception: # noqa: BLE001, S110
|
|
155
|
+
pass
|
|
149
156
|
|
|
150
157
|
|
|
151
158
|
class DatasetRowsFetcher(NodesThreadPool):
|
|
@@ -1747,13 +1754,13 @@ class Catalog:
|
|
|
1747
1754
|
recursive=recursive,
|
|
1748
1755
|
)
|
|
1749
1756
|
|
|
1757
|
+
@staticmethod
|
|
1750
1758
|
def query(
|
|
1751
|
-
self,
|
|
1752
1759
|
query_script: str,
|
|
1753
1760
|
env: Mapping[str, str] | None = None,
|
|
1754
1761
|
python_executable: str = sys.executable,
|
|
1755
|
-
|
|
1756
|
-
|
|
1762
|
+
stdout_callback: Callable[[str], None] | None = None,
|
|
1763
|
+
stderr_callback: Callable[[str], None] | None = None,
|
|
1757
1764
|
params: dict[str, str] | None = None,
|
|
1758
1765
|
job_id: str | None = None,
|
|
1759
1766
|
reset: bool = False,
|
|
@@ -1773,13 +1780,18 @@ class Catalog:
|
|
|
1773
1780
|
},
|
|
1774
1781
|
)
|
|
1775
1782
|
popen_kwargs: dict[str, Any] = {}
|
|
1776
|
-
|
|
1777
|
-
|
|
1783
|
+
|
|
1784
|
+
if stdout_callback is not None:
|
|
1785
|
+
popen_kwargs = {"stdout": subprocess.PIPE}
|
|
1786
|
+
if stderr_callback is not None:
|
|
1787
|
+
popen_kwargs["stderr"] = subprocess.PIPE
|
|
1778
1788
|
|
|
1779
1789
|
def raise_termination_signal(sig: int, _: Any) -> NoReturn:
|
|
1780
1790
|
raise TerminationSignal(sig)
|
|
1781
1791
|
|
|
1782
|
-
|
|
1792
|
+
stdout_thread: Thread | None = None
|
|
1793
|
+
stderr_thread: Thread | None = None
|
|
1794
|
+
|
|
1783
1795
|
with subprocess.Popen(cmd, env=env, **popen_kwargs) as proc: # noqa: S603
|
|
1784
1796
|
logger.info("Starting process %s", proc.pid)
|
|
1785
1797
|
|
|
@@ -1793,10 +1805,20 @@ class Catalog:
|
|
|
1793
1805
|
orig_sigterm_handler = signal.getsignal(signal.SIGTERM)
|
|
1794
1806
|
signal.signal(signal.SIGTERM, raise_termination_signal)
|
|
1795
1807
|
try:
|
|
1796
|
-
if
|
|
1797
|
-
|
|
1798
|
-
|
|
1799
|
-
|
|
1808
|
+
if stdout_callback is not None:
|
|
1809
|
+
stdout_thread = Thread(
|
|
1810
|
+
target=process_output,
|
|
1811
|
+
args=(proc.stdout, stdout_callback),
|
|
1812
|
+
daemon=True,
|
|
1813
|
+
)
|
|
1814
|
+
stdout_thread.start()
|
|
1815
|
+
if stderr_callback is not None:
|
|
1816
|
+
stderr_thread = Thread(
|
|
1817
|
+
target=process_output,
|
|
1818
|
+
args=(proc.stderr, stderr_callback),
|
|
1819
|
+
daemon=True,
|
|
1820
|
+
)
|
|
1821
|
+
stderr_thread.start()
|
|
1800
1822
|
|
|
1801
1823
|
proc.wait()
|
|
1802
1824
|
except TerminationSignal as exc:
|
|
@@ -1814,8 +1836,22 @@ class Catalog:
|
|
|
1814
1836
|
finally:
|
|
1815
1837
|
signal.signal(signal.SIGTERM, orig_sigterm_handler)
|
|
1816
1838
|
signal.signal(signal.SIGINT, orig_sigint_handler)
|
|
1817
|
-
|
|
1818
|
-
|
|
1839
|
+
# wait for the reader thread
|
|
1840
|
+
thread_join_timeout_seconds = 30
|
|
1841
|
+
if stdout_thread is not None:
|
|
1842
|
+
stdout_thread.join(timeout=thread_join_timeout_seconds)
|
|
1843
|
+
if stdout_thread.is_alive():
|
|
1844
|
+
logger.warning(
|
|
1845
|
+
"stdout thread is still alive after %s seconds",
|
|
1846
|
+
thread_join_timeout_seconds,
|
|
1847
|
+
)
|
|
1848
|
+
if stderr_thread is not None:
|
|
1849
|
+
stderr_thread.join(timeout=thread_join_timeout_seconds)
|
|
1850
|
+
if stderr_thread.is_alive():
|
|
1851
|
+
logger.warning(
|
|
1852
|
+
"stderr thread is still alive after %s seconds",
|
|
1853
|
+
thread_join_timeout_seconds,
|
|
1854
|
+
)
|
|
1819
1855
|
|
|
1820
1856
|
logger.info("Process %s exited with return code %s", proc.pid, proc.returncode)
|
|
1821
1857
|
if proc.returncode in (
|
|
@@ -22,7 +22,6 @@ from datachain.lib.signal_schema import SignalSchema
|
|
|
22
22
|
from datachain.node import DirType, DirTypeGroup, Node, NodeWithPath, get_path
|
|
23
23
|
from datachain.query.batch import RowsOutput
|
|
24
24
|
from datachain.query.schema import ColumnMeta
|
|
25
|
-
from datachain.query.utils import get_query_id_column
|
|
26
25
|
from datachain.sql.functions import path as pathfunc
|
|
27
26
|
from datachain.sql.types import Int, SQLType
|
|
28
27
|
from datachain.utils import sql_escape_like
|
|
@@ -228,7 +227,8 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
228
227
|
while True:
|
|
229
228
|
if limit is not None:
|
|
230
229
|
limit -= num_yielded
|
|
231
|
-
|
|
230
|
+
num_yielded = 0
|
|
231
|
+
if limit <= 0:
|
|
232
232
|
break
|
|
233
233
|
if limit < page_size:
|
|
234
234
|
paginated_query = paginated_query.limit(None).limit(limit)
|
|
@@ -246,32 +246,48 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
246
246
|
break # no more results
|
|
247
247
|
offset += page_size
|
|
248
248
|
|
|
249
|
-
def _regenerate_system_columns(
|
|
250
|
-
|
|
249
|
+
def _regenerate_system_columns(
|
|
250
|
+
self,
|
|
251
|
+
selectable: sa.Select | sa.CTE,
|
|
252
|
+
keep_existing_columns: bool = False,
|
|
253
|
+
) -> sa.Select:
|
|
254
|
+
"""
|
|
255
|
+
Return a SELECT that regenerates sys__id and sys__rand deterministically.
|
|
251
256
|
|
|
257
|
+
If keep_existing_columns is True, existing sys__id and sys__rand columns
|
|
258
|
+
will be kept as-is if they exist in the input selectable.
|
|
259
|
+
"""
|
|
252
260
|
base = selectable.subquery() if hasattr(selectable, "subquery") else selectable
|
|
253
261
|
|
|
262
|
+
result_columns: dict[str, sa.ColumnElement] = {}
|
|
263
|
+
for col in base.c:
|
|
264
|
+
if col.name in result_columns:
|
|
265
|
+
raise ValueError(f"Duplicate column name {col.name} in SELECT")
|
|
266
|
+
if col.name in ("sys__id", "sys__rand"):
|
|
267
|
+
if keep_existing_columns:
|
|
268
|
+
result_columns[col.name] = col
|
|
269
|
+
else:
|
|
270
|
+
result_columns[col.name] = col
|
|
271
|
+
|
|
254
272
|
system_types: dict[str, sa.types.TypeEngine] = {
|
|
255
273
|
sys_col.name: sys_col.type
|
|
256
274
|
for sys_col in self.schema.dataset_row_cls.sys_columns()
|
|
257
275
|
}
|
|
258
276
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
result_columns.append(expr.label("sys__rand"))
|
|
269
|
-
else:
|
|
270
|
-
result_columns.append(col)
|
|
277
|
+
# Add missing system columns if needed
|
|
278
|
+
if "sys__id" not in result_columns:
|
|
279
|
+
expr = self._system_row_number_expr()
|
|
280
|
+
expr = sa.cast(expr, system_types["sys__id"])
|
|
281
|
+
result_columns["sys__id"] = expr.label("sys__id")
|
|
282
|
+
if "sys__rand" not in result_columns:
|
|
283
|
+
expr = self._system_random_expr()
|
|
284
|
+
expr = sa.cast(expr, system_types["sys__rand"])
|
|
285
|
+
result_columns["sys__rand"] = expr.label("sys__rand")
|
|
271
286
|
|
|
272
287
|
# Wrap in subquery to materialize window functions, then wrap again in SELECT
|
|
273
288
|
# This ensures window functions are computed before INSERT...FROM SELECT
|
|
274
|
-
|
|
289
|
+
columns = list(result_columns.values())
|
|
290
|
+
inner = sa.select(*columns).select_from(base).subquery()
|
|
275
291
|
return sa.select(*inner.c).select_from(inner)
|
|
276
292
|
|
|
277
293
|
def _system_row_number_expr(self):
|
|
@@ -380,7 +396,7 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
380
396
|
"""
|
|
381
397
|
Fetch dataset rows from database using a list of IDs.
|
|
382
398
|
"""
|
|
383
|
-
if (id_col :=
|
|
399
|
+
if (id_col := query.selected_columns.get("sys__id")) is None:
|
|
384
400
|
raise RuntimeError("sys__id column not found in query")
|
|
385
401
|
|
|
386
402
|
query = query._clone().offset(None).limit(None).order_by(None)
|
|
@@ -6,7 +6,6 @@ from collections.abc import Callable, Generator, Sequence
|
|
|
6
6
|
import sqlalchemy as sa
|
|
7
7
|
|
|
8
8
|
from datachain.data_storage.schema import PARTITION_COLUMN_ID
|
|
9
|
-
from datachain.query.utils import get_query_column
|
|
10
9
|
|
|
11
10
|
RowsOutputBatch = Sequence[Sequence]
|
|
12
11
|
RowsOutput = Sequence | RowsOutputBatch
|
|
@@ -106,7 +105,7 @@ class Partition(BatchingStrategy):
|
|
|
106
105
|
query: sa.Select,
|
|
107
106
|
id_col: sa.ColumnElement | None = None,
|
|
108
107
|
) -> Generator[RowsOutput, None, None]:
|
|
109
|
-
if (partition_col :=
|
|
108
|
+
if (partition_col := query.selected_columns.get(PARTITION_COLUMN_ID)) is None:
|
|
110
109
|
raise RuntimeError("partition column not found in query")
|
|
111
110
|
|
|
112
111
|
ids_only = False
|
|
@@ -438,6 +438,9 @@ class UDFStep(Step, ABC):
|
|
|
438
438
|
"""
|
|
439
439
|
|
|
440
440
|
def populate_udf_table(self, udf_table: "Table", query: Select) -> None:
|
|
441
|
+
if "sys__id" not in query.selected_columns:
|
|
442
|
+
raise RuntimeError("Query must have sys__id column to run UDF")
|
|
443
|
+
|
|
441
444
|
if (rows_total := self.catalog.warehouse.query_count(query)) == 0:
|
|
442
445
|
return
|
|
443
446
|
|
|
@@ -580,13 +583,10 @@ class UDFStep(Step, ABC):
|
|
|
580
583
|
"""
|
|
581
584
|
Create temporary table with group by partitions.
|
|
582
585
|
"""
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
assert any(c.name == "sys__id" for c in query.selected_columns), (
|
|
588
|
-
"Query must have sys__id column to use partitioning."
|
|
589
|
-
)
|
|
586
|
+
if self.partition_by is None:
|
|
587
|
+
raise RuntimeError("Query must have partition_by set to use partitioning")
|
|
588
|
+
if (id_col := query.selected_columns.get("sys__id")) is None:
|
|
589
|
+
raise RuntimeError("Query must have sys__id column to use partitioning")
|
|
590
590
|
|
|
591
591
|
if isinstance(self.partition_by, (list, tuple, GeneratorType)):
|
|
592
592
|
list_partition_by = list(self.partition_by)
|
|
@@ -602,7 +602,7 @@ class UDFStep(Step, ABC):
|
|
|
602
602
|
|
|
603
603
|
# fill table with partitions
|
|
604
604
|
cols = [
|
|
605
|
-
|
|
605
|
+
id_col,
|
|
606
606
|
f.dense_rank().over(order_by=partition_by).label(PARTITION_COLUMN_ID),
|
|
607
607
|
]
|
|
608
608
|
self.catalog.warehouse.db.execute(
|
|
@@ -634,21 +634,11 @@ class UDFStep(Step, ABC):
|
|
|
634
634
|
|
|
635
635
|
# Apply partitioning if needed.
|
|
636
636
|
if self.partition_by is not None:
|
|
637
|
-
if
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
columns = [
|
|
642
|
-
c if isinstance(c, Column) else Column(c.name, c.type)
|
|
643
|
-
for c in query.subquery().columns
|
|
644
|
-
]
|
|
645
|
-
temp_table = self.catalog.warehouse.create_dataset_rows_table(
|
|
646
|
-
self.catalog.warehouse.temp_table_name(),
|
|
647
|
-
columns=columns,
|
|
637
|
+
if "sys__id" not in query.selected_columns:
|
|
638
|
+
_query = query = self.catalog.warehouse._regenerate_system_columns(
|
|
639
|
+
query,
|
|
640
|
+
keep_existing_columns=True,
|
|
648
641
|
)
|
|
649
|
-
temp_tables.append(temp_table.name)
|
|
650
|
-
self.catalog.warehouse.copy_table(temp_table, query)
|
|
651
|
-
_query = query = temp_table.select()
|
|
652
642
|
|
|
653
643
|
partition_tbl = self.create_partitions_table(query)
|
|
654
644
|
temp_tables.append(partition_tbl.name)
|
|
@@ -22,7 +22,6 @@ from datachain.query.dataset import (
|
|
|
22
22
|
)
|
|
23
23
|
from datachain.query.queue import get_from_queue, put_into_queue
|
|
24
24
|
from datachain.query.udf import UdfInfo
|
|
25
|
-
from datachain.query.utils import get_query_id_column
|
|
26
25
|
from datachain.utils import batched, flatten, safe_closing
|
|
27
26
|
|
|
28
27
|
if TYPE_CHECKING:
|
|
@@ -55,6 +54,9 @@ def udf_entrypoint() -> int:
|
|
|
55
54
|
udf_info: UdfInfo = load(stdin.buffer)
|
|
56
55
|
|
|
57
56
|
query = udf_info["query"]
|
|
57
|
+
if "sys__id" not in query.selected_columns:
|
|
58
|
+
raise RuntimeError("sys__id column is required in UDF query")
|
|
59
|
+
|
|
58
60
|
batching = udf_info["batching"]
|
|
59
61
|
is_generator = udf_info["is_generator"]
|
|
60
62
|
|
|
@@ -65,15 +67,16 @@ def udf_entrypoint() -> int:
|
|
|
65
67
|
wh_cls, wh_args, wh_kwargs = udf_info["warehouse_clone_params"]
|
|
66
68
|
warehouse: AbstractWarehouse = wh_cls(*wh_args, **wh_kwargs)
|
|
67
69
|
|
|
68
|
-
id_col = get_query_id_column(query)
|
|
69
|
-
|
|
70
70
|
with contextlib.closing(
|
|
71
|
-
batching(
|
|
71
|
+
batching(
|
|
72
|
+
warehouse.dataset_select_paginated,
|
|
73
|
+
query,
|
|
74
|
+
id_col=query.selected_columns.sys__id,
|
|
75
|
+
)
|
|
72
76
|
) as udf_inputs:
|
|
73
77
|
try:
|
|
74
78
|
UDFDispatcher(udf_info).run_udf(
|
|
75
79
|
udf_inputs,
|
|
76
|
-
ids_only=id_col is not None,
|
|
77
80
|
download_cb=download_cb,
|
|
78
81
|
processed_cb=processed_cb,
|
|
79
82
|
generated_cb=generated_cb,
|
|
@@ -147,10 +150,10 @@ class UDFDispatcher:
|
|
|
147
150
|
self.udf_fields,
|
|
148
151
|
)
|
|
149
152
|
|
|
150
|
-
def _run_worker(self
|
|
153
|
+
def _run_worker(self) -> None:
|
|
151
154
|
try:
|
|
152
155
|
worker = self._create_worker()
|
|
153
|
-
worker.run(
|
|
156
|
+
worker.run()
|
|
154
157
|
except (Exception, KeyboardInterrupt) as e:
|
|
155
158
|
if self.done_queue:
|
|
156
159
|
put_into_queue(
|
|
@@ -164,7 +167,6 @@ class UDFDispatcher:
|
|
|
164
167
|
def run_udf(
|
|
165
168
|
self,
|
|
166
169
|
input_rows: Iterable["RowsOutput"],
|
|
167
|
-
ids_only: bool,
|
|
168
170
|
download_cb: Callback = DEFAULT_CALLBACK,
|
|
169
171
|
processed_cb: Callback = DEFAULT_CALLBACK,
|
|
170
172
|
generated_cb: Callback = DEFAULT_CALLBACK,
|
|
@@ -178,9 +180,7 @@ class UDFDispatcher:
|
|
|
178
180
|
|
|
179
181
|
if n_workers == 1:
|
|
180
182
|
# no need to spawn worker processes if we are running in a single process
|
|
181
|
-
self.run_udf_single(
|
|
182
|
-
input_rows, ids_only, download_cb, processed_cb, generated_cb
|
|
183
|
-
)
|
|
183
|
+
self.run_udf_single(input_rows, download_cb, processed_cb, generated_cb)
|
|
184
184
|
else:
|
|
185
185
|
if self.buffer_size < n_workers:
|
|
186
186
|
raise RuntimeError(
|
|
@@ -189,13 +189,12 @@ class UDFDispatcher:
|
|
|
189
189
|
)
|
|
190
190
|
|
|
191
191
|
self.run_udf_parallel(
|
|
192
|
-
n_workers, input_rows,
|
|
192
|
+
n_workers, input_rows, download_cb, processed_cb, generated_cb
|
|
193
193
|
)
|
|
194
194
|
|
|
195
195
|
def run_udf_single(
|
|
196
196
|
self,
|
|
197
197
|
input_rows: Iterable["RowsOutput"],
|
|
198
|
-
ids_only: bool,
|
|
199
198
|
download_cb: Callback = DEFAULT_CALLBACK,
|
|
200
199
|
processed_cb: Callback = DEFAULT_CALLBACK,
|
|
201
200
|
generated_cb: Callback = DEFAULT_CALLBACK,
|
|
@@ -204,18 +203,15 @@ class UDFDispatcher:
|
|
|
204
203
|
# Rebuild schemas in single process too for consistency (cheap, idempotent).
|
|
205
204
|
ModelStore.rebuild_all()
|
|
206
205
|
|
|
207
|
-
if
|
|
206
|
+
if not self.is_batching:
|
|
208
207
|
input_rows = flatten(input_rows)
|
|
209
208
|
|
|
210
209
|
def get_inputs() -> Iterable["RowsOutput"]:
|
|
211
210
|
warehouse = self.catalog.warehouse.clone()
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
)
|
|
217
|
-
else:
|
|
218
|
-
yield from input_rows
|
|
211
|
+
for ids in batched(input_rows, DEFAULT_BATCH_SIZE):
|
|
212
|
+
yield from warehouse.dataset_rows_select_from_ids(
|
|
213
|
+
self.query, ids, self.is_batching
|
|
214
|
+
)
|
|
219
215
|
|
|
220
216
|
prefetch = udf.prefetch
|
|
221
217
|
with _get_cache(self.catalog.cache, prefetch, use_cache=self.cache) as _cache:
|
|
@@ -249,7 +245,6 @@ class UDFDispatcher:
|
|
|
249
245
|
self,
|
|
250
246
|
n_workers: int,
|
|
251
247
|
input_rows: Iterable["RowsOutput"],
|
|
252
|
-
ids_only: bool,
|
|
253
248
|
download_cb: Callback = DEFAULT_CALLBACK,
|
|
254
249
|
processed_cb: Callback = DEFAULT_CALLBACK,
|
|
255
250
|
generated_cb: Callback = DEFAULT_CALLBACK,
|
|
@@ -258,9 +253,7 @@ class UDFDispatcher:
|
|
|
258
253
|
self.done_queue = self.ctx.Queue()
|
|
259
254
|
|
|
260
255
|
pool = [
|
|
261
|
-
self.ctx.Process(
|
|
262
|
-
name=f"Worker-UDF-{i}", target=self._run_worker, args=[ids_only]
|
|
263
|
-
)
|
|
256
|
+
self.ctx.Process(name=f"Worker-UDF-{i}", target=self._run_worker)
|
|
264
257
|
for i in range(n_workers)
|
|
265
258
|
]
|
|
266
259
|
for p in pool:
|
|
@@ -406,13 +399,13 @@ class UDFWorker:
|
|
|
406
399
|
self.processed_cb = ProcessedCallback("processed", self.done_queue)
|
|
407
400
|
self.generated_cb = ProcessedCallback("generated", self.done_queue)
|
|
408
401
|
|
|
409
|
-
def run(self
|
|
402
|
+
def run(self) -> None:
|
|
410
403
|
prefetch = self.udf.prefetch
|
|
411
404
|
with _get_cache(self.catalog.cache, prefetch, use_cache=self.cache) as _cache:
|
|
412
405
|
catalog = clone_catalog_with_cache(self.catalog, _cache)
|
|
413
406
|
udf_results = self.udf.run(
|
|
414
407
|
self.udf_fields,
|
|
415
|
-
self.get_inputs(
|
|
408
|
+
self.get_inputs(),
|
|
416
409
|
catalog,
|
|
417
410
|
self.cache,
|
|
418
411
|
download_cb=self.download_cb,
|
|
@@ -434,13 +427,10 @@ class UDFWorker:
|
|
|
434
427
|
put_into_queue(self.done_queue, {"status": OK_STATUS})
|
|
435
428
|
yield row
|
|
436
429
|
|
|
437
|
-
def get_inputs(self
|
|
430
|
+
def get_inputs(self) -> Iterable["RowsOutput"]:
|
|
438
431
|
warehouse = self.catalog.warehouse.clone()
|
|
439
432
|
while (batch := get_from_queue(self.task_queue)) != STOP_SIGNAL:
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
)
|
|
445
|
-
else:
|
|
446
|
-
yield from batch
|
|
433
|
+
for ids in batched(batch, DEFAULT_BATCH_SIZE):
|
|
434
|
+
yield from warehouse.dataset_rows_select_from_ids(
|
|
435
|
+
self.query, ids, self.is_batching
|
|
436
|
+
)
|
|
@@ -249,7 +249,6 @@ src/datachain/query/queue.py
|
|
|
249
249
|
src/datachain/query/schema.py
|
|
250
250
|
src/datachain/query/session.py
|
|
251
251
|
src/datachain/query/udf.py
|
|
252
|
-
src/datachain/query/utils.py
|
|
253
252
|
src/datachain/remote/__init__.py
|
|
254
253
|
src/datachain/remote/studio.py
|
|
255
254
|
src/datachain/sql/__init__.py
|
|
@@ -301,7 +300,6 @@ tests/examples/wds_data.py
|
|
|
301
300
|
tests/func/__init__.py
|
|
302
301
|
tests/func/fake-service-account-credentials.json
|
|
303
302
|
tests/func/test_audio.py
|
|
304
|
-
tests/func/test_batching.py
|
|
305
303
|
tests/func/test_catalog.py
|
|
306
304
|
tests/func/test_client.py
|
|
307
305
|
tests/func/test_cloud_transfer.py
|
|
@@ -361,6 +359,7 @@ tests/scripts/feature_class_parallel_data_model.py
|
|
|
361
359
|
tests/scripts/name_len_slow.py
|
|
362
360
|
tests/unit/__init__.py
|
|
363
361
|
tests/unit/test_asyn.py
|
|
362
|
+
tests/unit/test_batching.py
|
|
364
363
|
tests/unit/test_cache.py
|
|
365
364
|
tests/unit/test_catalog.py
|
|
366
365
|
tests/unit/test_catalog_loader.py
|
|
@@ -1065,6 +1065,52 @@ def dog_entries():
|
|
|
1065
1065
|
return _create_dog_entries
|
|
1066
1066
|
|
|
1067
1067
|
|
|
1068
|
+
PRIMES_UP_TO_73 = (
|
|
1069
|
+
2,
|
|
1070
|
+
3,
|
|
1071
|
+
5,
|
|
1072
|
+
7,
|
|
1073
|
+
11,
|
|
1074
|
+
13,
|
|
1075
|
+
17,
|
|
1076
|
+
19,
|
|
1077
|
+
23,
|
|
1078
|
+
29,
|
|
1079
|
+
31,
|
|
1080
|
+
37,
|
|
1081
|
+
41,
|
|
1082
|
+
43,
|
|
1083
|
+
47,
|
|
1084
|
+
53,
|
|
1085
|
+
59,
|
|
1086
|
+
61,
|
|
1087
|
+
67,
|
|
1088
|
+
71,
|
|
1089
|
+
73,
|
|
1090
|
+
)
|
|
1091
|
+
|
|
1092
|
+
|
|
1093
|
+
@pytest.fixture
|
|
1094
|
+
def numbers_ds(test_session) -> Generator[DatasetRecord, None, None]:
|
|
1095
|
+
numbers = list(range(1, 74))
|
|
1096
|
+
ds = dc.read_values(
|
|
1097
|
+
number=numbers,
|
|
1098
|
+
parity=["odd" if n % 2 else "even" for n in numbers],
|
|
1099
|
+
primality=["prime" if n in PRIMES_UP_TO_73 else "composite" for n in numbers],
|
|
1100
|
+
last_digit=[n % 10 for n in numbers],
|
|
1101
|
+
session=test_session,
|
|
1102
|
+
).save("numbers_dataset")
|
|
1103
|
+
assert ds.dataset is not None
|
|
1104
|
+
yield ds.dataset
|
|
1105
|
+
dc.delete_dataset(ds.dataset.name, force=True)
|
|
1106
|
+
|
|
1107
|
+
|
|
1108
|
+
@pytest.fixture
|
|
1109
|
+
def numbers_table(warehouse, numbers_ds) -> Generator[sqlalchemy.Table, None, None]:
|
|
1110
|
+
table_name = warehouse.dataset_table_name(numbers_ds, numbers_ds.latest_version)
|
|
1111
|
+
yield warehouse.get_table(table_name)
|
|
1112
|
+
|
|
1113
|
+
|
|
1068
1114
|
@pytest.fixture
|
|
1069
1115
|
def mock_parquet_data(compressed_parquet_data, dog_entries, version="1.0.0"):
|
|
1070
1116
|
return compressed_parquet_data(dog_entries(version))
|