datachain 0.30.6__tar.gz → 0.30.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.30.6 → datachain-0.30.7}/.pre-commit-config.yaml +1 -1
- {datachain-0.30.6 → datachain-0.30.7}/PKG-INFO +1 -1
- {datachain-0.30.6 → datachain-0.30.7}/examples/get_started/udfs/parallel.py +2 -2
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/data_storage/sqlite.py +18 -15
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/data_storage/warehouse.py +7 -1
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/database.py +2 -2
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/datachain.py +28 -28
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/records.py +2 -4
- datachain-0.30.7/src/datachain/lib/settings.py +214 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/udf.py +3 -20
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/query/batch.py +2 -2
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/query/dataset.py +44 -17
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/query/dispatch.py +6 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/query/udf.py +2 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/utils.py +9 -10
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain.egg-info/PKG-INFO +1 -1
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_datachain.py +5 -5
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_to_database.py +1 -1
- datachain-0.30.7/tests/func/test_warehouse.py +87 -0
- datachain-0.30.7/tests/unit/lib/test_settings.py +472 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_utils.py +1 -1
- datachain-0.30.6/src/datachain/lib/settings.py +0 -111
- datachain-0.30.6/tests/func/test_warehouse.py +0 -35
- datachain-0.30.6/tests/unit/lib/test_settings.py +0 -61
- {datachain-0.30.6 → datachain-0.30.7}/.cruft.json +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/.gitattributes +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/.github/codecov.yaml +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/.github/dependabot.yml +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/.github/workflows/release.yml +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/.github/workflows/tests-studio.yml +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/.github/workflows/tests.yml +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/.gitignore +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/LICENSE +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/README.rst +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/assets/datachain-white.svg +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/assets/datachain.svg +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/commands/auth/login.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/commands/auth/logout.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/commands/auth/team.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/commands/auth/token.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/commands/index.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/commands/job/cancel.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/commands/job/clusters.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/commands/job/logs.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/commands/job/ls.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/commands/job/run.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/contributing.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/css/github-permalink-style.css +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/examples.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/guide/db_migrations.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/guide/delta.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/guide/env.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/guide/index.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/guide/namespaces.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/guide/processing.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/guide/remotes.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/guide/retry.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/index.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/overrides/main.html +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/quick-start.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/references/data-types/arrowrow.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/references/data-types/bbox.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/references/data-types/file.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/references/data-types/imagefile.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/references/data-types/index.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/references/data-types/pose.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/references/data-types/segment.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/references/data-types/tarvfile.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/references/data-types/textfile.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/references/data-types/videofile.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/references/datachain.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/references/func.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/references/functions/aggregate.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/references/functions/array.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/references/functions/conditional.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/references/functions/numeric.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/references/functions/path.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/references/functions/random.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/references/functions/string.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/references/functions/window.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/references/index.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/references/toolkit.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/references/torch.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/references/udf.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/docs/tutorials.md +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/examples/computer_vision/ultralytics-bbox.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/examples/computer_vision/ultralytics-pose.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/examples/computer_vision/ultralytics-segment.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/examples/get_started/nested_datamodel.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/examples/incremental_processing/delta.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/examples/incremental_processing/retry.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/examples/incremental_processing/utils.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/examples/llm_and_nlp/claude-query.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/examples/llm_and_nlp/hf-dataset-llm-eval.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/examples/multimodal/audio-to-text.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/examples/multimodal/wds.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/mkdocs.yml +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/noxfile.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/pyproject.toml +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/setup.cfg +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/__main__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/asyn.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cache.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/catalog/catalog.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/commands/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/commands/datasets.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/commands/du.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/commands/index.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/commands/ls.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/commands/misc.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/commands/query.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/commands/show.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/parser/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/parser/job.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/parser/studio.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/parser/utils.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/cli/utils.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/client/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/client/azure.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/client/fsspec.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/client/gcs.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/client/hf.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/client/local.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/client/s3.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/config.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/dataset.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/delta.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/diff/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/error.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/fs/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/fs/reference.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/fs/utils.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/func/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/func/aggregate.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/func/array.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/func/base.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/func/conditional.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/func/func.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/func/numeric.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/func/path.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/func/random.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/func/string.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/func/window.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/job.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/audio.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/clip.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/csv.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/datasets.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/hf.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/json.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/listings.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/pandas.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/parquet.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/storage.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/utils.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/dc/values.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/file.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/hf.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/image.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/listing.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/listing_info.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/namespaces.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/projects.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/tar.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/text.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/utils.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/video.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/listing.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/model/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/model/bbox.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/model/pose.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/model/segment.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/model/ultralytics/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/model/ultralytics/bbox.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/model/ultralytics/pose.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/model/ultralytics/segment.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/model/utils.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/namespace.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/node.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/progress.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/project.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/py.typed +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/query/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/query/metrics.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/query/params.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/query/queue.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/query/schema.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/query/session.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/query/utils.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/remote/studio.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/script_meta.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/semver.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/functions/aggregate.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/functions/numeric.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/postgresql_dialect.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/postgresql_types.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/types.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/sql/utils.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/studio.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/telemetry.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/toolkit/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/toolkit/split.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain.egg-info/SOURCES.txt +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/benchmarks/datasets/.dvc/.gitignore +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/benchmarks/datasets/.dvc/config +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/benchmarks/datasets/.gitignore +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/benchmarks/datasets/laion-tiny.npz.dvc +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/benchmarks/test_datachain.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/conftest.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/data.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/examples/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/examples/test_examples.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/examples/wds_data.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/data/Big_Buck_Bunny_360_10s_1MB.mp4 +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/data/lena.jpg +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/fake-service-account-credentials.json +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/functions/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/functions/test_aggregate.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/functions/test_array.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/functions/test_conditional.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/functions/test_numeric.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/functions/test_path.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/functions/test_random.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/functions/test_string.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/model/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/model/data/running-mask0.png +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/model/data/running-mask1.png +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/model/data/running.jpg +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/model/data/ships.jpg +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/model/test_yolo.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_audio.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_batching.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_catalog.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_client.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_cloud_transfer.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_data_storage.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_datachain_merge.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_datasets.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_delta.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_file.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_hf.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_hidden_field.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_image.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_listing.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_ls.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_meta_formats.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_metastore.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_metrics.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_mutate.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_pull.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_pytorch.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_query.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_read_database.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_read_dataset_remote.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_read_dataset_version_specifiers.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_retry.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_session.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_studio_datetime_parsing.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_toolkit.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/func/test_video.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/scripts/feature_class.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/scripts/feature_class_exception.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/test_atomicity.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/test_cli_e2e.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/test_cli_studio.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/test_import_time.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/test_query_e2e.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/test_telemetry.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_audio.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_datachain.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_diff.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_hf.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_listing_info.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_namespace.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_partition_by.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_project.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_python_to_sql.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_schema.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_udf.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/model/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/model/test_bbox.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/model/test_pose.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/model/test_segment.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/model/test_utils.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/sql/sqlite/test_types.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_asyn.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_cache.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_catalog.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_cli_datasets.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_client.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_client_gcs.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_config.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_database_engine.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_dataset.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_func.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_listing.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_metastore.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_pytorch.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_query.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_query_params.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_script_meta.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_semver.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_serializer.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_session.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/unit/test_warehouse.py +0 -0
- {datachain-0.30.6 → datachain-0.30.7}/tests/utils.py +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
This is a simple UDF to demonstrate local parallel processing with multiprocessing.
|
|
3
3
|
|
|
4
|
-
In add_signals specify either parallel
|
|
4
|
+
In add_signals specify either parallel=True to use processes equal to the number
|
|
5
5
|
of CPUs/cores on your current machine, or parallel=N for N processes.
|
|
6
6
|
The default if parallel is not specified is to run single-threaded.
|
|
7
7
|
|
|
@@ -33,7 +33,7 @@ def path_len_benchmark(path: str) -> int:
|
|
|
33
33
|
(
|
|
34
34
|
dc.read_storage("gs://datachain-demo/dogs-and-cats/", anon=True)
|
|
35
35
|
# Try to disable to see the difference in performance
|
|
36
|
-
.settings(parallel
|
|
36
|
+
.settings(parallel=True)
|
|
37
37
|
.map(path_len=path_len_benchmark, params=["file.path"])
|
|
38
38
|
.show()
|
|
39
39
|
)
|
|
@@ -37,6 +37,7 @@ from datachain import semver
|
|
|
37
37
|
from datachain.data_storage import AbstractDBMetastore, AbstractWarehouse
|
|
38
38
|
from datachain.data_storage.db_engine import DatabaseEngine
|
|
39
39
|
from datachain.data_storage.schema import DefaultSchema
|
|
40
|
+
from datachain.data_storage.warehouse import INSERT_BATCH_SIZE
|
|
40
41
|
from datachain.dataset import DatasetRecord, StorageURI
|
|
41
42
|
from datachain.error import DataChainError, OutdatedDatabaseSchemaError
|
|
42
43
|
from datachain.namespace import Namespace
|
|
@@ -44,7 +45,7 @@ from datachain.project import Project
|
|
|
44
45
|
from datachain.sql.sqlite import create_user_defined_sql_functions, sqlite_dialect
|
|
45
46
|
from datachain.sql.sqlite.base import load_usearch_extension
|
|
46
47
|
from datachain.sql.types import SQLType
|
|
47
|
-
from datachain.utils import DataChainDir, batched_it
|
|
48
|
+
from datachain.utils import DataChainDir, batched, batched_it
|
|
48
49
|
|
|
49
50
|
if TYPE_CHECKING:
|
|
50
51
|
from sqlalchemy.dialects.sqlite import Insert
|
|
@@ -712,19 +713,21 @@ class SQLiteWarehouse(AbstractWarehouse):
|
|
|
712
713
|
def prepare_entries(self, entries: "Iterable[File]") -> Iterable[dict[str, Any]]:
|
|
713
714
|
return (e.model_dump() for e in entries)
|
|
714
715
|
|
|
715
|
-
def insert_rows(
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
716
|
+
def insert_rows(
|
|
717
|
+
self,
|
|
718
|
+
table: Table,
|
|
719
|
+
rows: Iterable[dict[str, Any]],
|
|
720
|
+
batch_size: int = INSERT_BATCH_SIZE,
|
|
721
|
+
) -> None:
|
|
722
|
+
for row_chunk in batched(rows, batch_size):
|
|
723
|
+
with self.db.transaction() as conn:
|
|
724
|
+
# transactions speeds up inserts significantly as there is no separate
|
|
725
|
+
# transaction created for each insert row
|
|
726
|
+
self.db.executemany(
|
|
727
|
+
table.insert().values({f: bindparam(f) for f in row_chunk[0]}),
|
|
728
|
+
row_chunk,
|
|
729
|
+
conn=conn,
|
|
730
|
+
)
|
|
728
731
|
|
|
729
732
|
def insert_dataset_rows(self, df, dataset: DatasetRecord, version: str) -> int:
|
|
730
733
|
dr = self.dataset_rows(dataset, version)
|
|
@@ -797,7 +800,7 @@ class SQLiteWarehouse(AbstractWarehouse):
|
|
|
797
800
|
.limit(None)
|
|
798
801
|
)
|
|
799
802
|
|
|
800
|
-
for batch in batched_it(ids,
|
|
803
|
+
for batch in batched_it(ids, INSERT_BATCH_SIZE):
|
|
801
804
|
batch_ids = [row[0] for row in batch]
|
|
802
805
|
select_q._where_criteria = (col_id.in_(batch_ids),)
|
|
803
806
|
q = table.insert().from_select(list(select_q.selected_columns), select_q)
|
|
@@ -43,6 +43,7 @@ if TYPE_CHECKING:
|
|
|
43
43
|
logger = logging.getLogger("datachain")
|
|
44
44
|
|
|
45
45
|
SELECT_BATCH_SIZE = 100_000 # number of rows to fetch at a time
|
|
46
|
+
INSERT_BATCH_SIZE = 10_000 # number of rows to insert at a time
|
|
46
47
|
|
|
47
48
|
|
|
48
49
|
class AbstractWarehouse(ABC, Serializable):
|
|
@@ -415,7 +416,12 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
415
416
|
"""Convert File entries so they can be passed on to `insert_rows()`"""
|
|
416
417
|
|
|
417
418
|
@abstractmethod
|
|
418
|
-
def insert_rows(
|
|
419
|
+
def insert_rows(
|
|
420
|
+
self,
|
|
421
|
+
table: sa.Table,
|
|
422
|
+
rows: Iterable[dict[str, Any]],
|
|
423
|
+
batch_size: int = INSERT_BATCH_SIZE,
|
|
424
|
+
) -> None:
|
|
419
425
|
"""Does batch inserts of any kind of rows into table"""
|
|
420
426
|
|
|
421
427
|
def insert_rows_done(self, table: sa.Table) -> None:
|
|
@@ -73,7 +73,7 @@ def to_database(
|
|
|
73
73
|
table_name: str,
|
|
74
74
|
connection: "ConnectionType",
|
|
75
75
|
*,
|
|
76
|
-
|
|
76
|
+
batch_size: int = DEFAULT_DATABASE_BATCH_SIZE,
|
|
77
77
|
on_conflict: Optional[str] = None,
|
|
78
78
|
conflict_columns: Optional[list[str]] = None,
|
|
79
79
|
column_mapping: Optional[dict[str, Optional[str]]] = None,
|
|
@@ -124,7 +124,7 @@ def to_database(
|
|
|
124
124
|
table.create(conn, checkfirst=True)
|
|
125
125
|
|
|
126
126
|
rows_iter = chain._leaf_values()
|
|
127
|
-
for batch in batched(rows_iter,
|
|
127
|
+
for batch in batched(rows_iter, batch_size):
|
|
128
128
|
rows_affected = _process_batch(
|
|
129
129
|
conn,
|
|
130
130
|
table,
|
|
@@ -342,15 +342,15 @@ class DataChain:
|
|
|
342
342
|
|
|
343
343
|
def settings(
|
|
344
344
|
self,
|
|
345
|
-
cache=None,
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
prefetch: Optional[int] = None,
|
|
350
|
-
sys: Optional[bool] = None,
|
|
345
|
+
cache: Optional[bool] = None,
|
|
346
|
+
prefetch: Optional[Union[bool, int]] = None,
|
|
347
|
+
parallel: Optional[Union[bool, int]] = None,
|
|
348
|
+
workers: Optional[int] = None,
|
|
351
349
|
namespace: Optional[str] = None,
|
|
352
350
|
project: Optional[str] = None,
|
|
353
|
-
|
|
351
|
+
min_task_size: Optional[int] = None,
|
|
352
|
+
batch_size: Optional[int] = None,
|
|
353
|
+
sys: Optional[bool] = None,
|
|
354
354
|
) -> "Self":
|
|
355
355
|
"""Change settings for chain.
|
|
356
356
|
|
|
@@ -359,23 +359,23 @@ class DataChain:
|
|
|
359
359
|
|
|
360
360
|
Parameters:
|
|
361
361
|
cache : data caching. (default=False)
|
|
362
|
+
prefetch : number of workers to use for downloading files in advance.
|
|
363
|
+
This is enabled by default and uses 2 workers.
|
|
364
|
+
To disable prefetching, set it to 0 or False.
|
|
362
365
|
parallel : number of thread for processors. True is a special value to
|
|
363
366
|
enable all available CPUs. (default=1)
|
|
364
367
|
workers : number of distributed workers. Only for Studio mode. (default=1)
|
|
365
|
-
min_task_size : minimum number of tasks. (default=1)
|
|
366
|
-
prefetch : number of workers to use for downloading files in advance.
|
|
367
|
-
This is enabled by default and uses 2 workers.
|
|
368
|
-
To disable prefetching, set it to 0.
|
|
369
368
|
namespace : namespace name.
|
|
370
369
|
project : project name.
|
|
371
|
-
|
|
370
|
+
min_task_size : minimum number of tasks. (default=1)
|
|
371
|
+
batch_size : row limit per insert to balance speed and memory usage.
|
|
372
372
|
(default=2000)
|
|
373
373
|
|
|
374
374
|
Example:
|
|
375
375
|
```py
|
|
376
376
|
chain = (
|
|
377
377
|
chain
|
|
378
|
-
.settings(cache=True, parallel=8,
|
|
378
|
+
.settings(cache=True, parallel=8, batch_size=300)
|
|
379
379
|
.map(laion=process_webdataset(spec=WDSLaion), params="file")
|
|
380
380
|
)
|
|
381
381
|
```
|
|
@@ -385,14 +385,14 @@ class DataChain:
|
|
|
385
385
|
settings = copy.copy(self._settings)
|
|
386
386
|
settings.add(
|
|
387
387
|
Settings(
|
|
388
|
-
cache,
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
388
|
+
cache=cache,
|
|
389
|
+
prefetch=prefetch,
|
|
390
|
+
parallel=parallel,
|
|
391
|
+
workers=workers,
|
|
392
|
+
namespace=namespace,
|
|
393
|
+
project=project,
|
|
394
|
+
min_task_size=min_task_size,
|
|
395
|
+
batch_size=batch_size,
|
|
396
396
|
)
|
|
397
397
|
)
|
|
398
398
|
return self._evolve(settings=settings, _sys=sys)
|
|
@@ -745,7 +745,7 @@ class DataChain:
|
|
|
745
745
|
|
|
746
746
|
return self._evolve(
|
|
747
747
|
query=self._query.add_signals(
|
|
748
|
-
udf_obj.to_udf_wrapper(self._settings.
|
|
748
|
+
udf_obj.to_udf_wrapper(self._settings.batch_size),
|
|
749
749
|
**self._settings.to_dict(),
|
|
750
750
|
),
|
|
751
751
|
signal_schema=self.signals_schema | udf_obj.output,
|
|
@@ -783,7 +783,7 @@ class DataChain:
|
|
|
783
783
|
udf_obj.prefetch = prefetch
|
|
784
784
|
return self._evolve(
|
|
785
785
|
query=self._query.generate(
|
|
786
|
-
udf_obj.to_udf_wrapper(self._settings.
|
|
786
|
+
udf_obj.to_udf_wrapper(self._settings.batch_size),
|
|
787
787
|
**self._settings.to_dict(),
|
|
788
788
|
),
|
|
789
789
|
signal_schema=udf_obj.output,
|
|
@@ -919,7 +919,7 @@ class DataChain:
|
|
|
919
919
|
udf_obj = self._udf_to_obj(Aggregator, func, params, output, signal_map)
|
|
920
920
|
return self._evolve(
|
|
921
921
|
query=self._query.generate(
|
|
922
|
-
udf_obj.to_udf_wrapper(self._settings.
|
|
922
|
+
udf_obj.to_udf_wrapper(self._settings.batch_size),
|
|
923
923
|
partition_by=processed_partition_by,
|
|
924
924
|
**self._settings.to_dict(),
|
|
925
925
|
),
|
|
@@ -968,7 +968,7 @@ class DataChain:
|
|
|
968
968
|
|
|
969
969
|
return self._evolve(
|
|
970
970
|
query=self._query.add_signals(
|
|
971
|
-
udf_obj.to_udf_wrapper(self._settings.
|
|
971
|
+
udf_obj.to_udf_wrapper(self._settings.batch_size, batch=batch),
|
|
972
972
|
**self._settings.to_dict(),
|
|
973
973
|
),
|
|
974
974
|
signal_schema=self.signals_schema | udf_obj.output,
|
|
@@ -2314,7 +2314,7 @@ class DataChain:
|
|
|
2314
2314
|
table_name: str,
|
|
2315
2315
|
connection: "ConnectionType",
|
|
2316
2316
|
*,
|
|
2317
|
-
|
|
2317
|
+
batch_size: int = DEFAULT_DATABASE_BATCH_SIZE,
|
|
2318
2318
|
on_conflict: Optional[str] = None,
|
|
2319
2319
|
conflict_columns: Optional[list[str]] = None,
|
|
2320
2320
|
column_mapping: Optional[dict[str, Optional[str]]] = None,
|
|
@@ -2336,7 +2336,7 @@ class DataChain:
|
|
|
2336
2336
|
library. If a DBAPI2 object, only sqlite3 is supported. The user is
|
|
2337
2337
|
responsible for engine disposal and connection closure for the
|
|
2338
2338
|
SQLAlchemy connectable; str connections are closed automatically.
|
|
2339
|
-
|
|
2339
|
+
batch_size: Number of rows to insert per batch for optimal performance.
|
|
2340
2340
|
Larger batches are faster but use more memory. Default: 10,000.
|
|
2341
2341
|
on_conflict: Strategy for handling duplicate rows (requires table
|
|
2342
2342
|
constraints):
|
|
@@ -2417,7 +2417,7 @@ class DataChain:
|
|
|
2417
2417
|
self,
|
|
2418
2418
|
table_name,
|
|
2419
2419
|
connection,
|
|
2420
|
-
|
|
2420
|
+
batch_size=batch_size,
|
|
2421
2421
|
on_conflict=on_conflict,
|
|
2422
2422
|
conflict_columns=conflict_columns,
|
|
2423
2423
|
column_mapping=column_mapping,
|
|
@@ -31,7 +31,7 @@ def read_records(
|
|
|
31
31
|
|
|
32
32
|
Parameters:
|
|
33
33
|
to_insert : records (or a single record) to insert. Each record is
|
|
34
|
-
a dictionary of signals and
|
|
34
|
+
a dictionary of signals and their values.
|
|
35
35
|
schema : describes chain signals and their corresponding types
|
|
36
36
|
|
|
37
37
|
Example:
|
|
@@ -45,7 +45,6 @@ def read_records(
|
|
|
45
45
|
"""
|
|
46
46
|
from datachain.query.dataset import adjust_outputs, get_col_types
|
|
47
47
|
from datachain.sql.types import SQLType
|
|
48
|
-
from datachain.utils import batched
|
|
49
48
|
|
|
50
49
|
from .datasets import read_dataset
|
|
51
50
|
|
|
@@ -96,7 +95,6 @@ def read_records(
|
|
|
96
95
|
{c.name: c.type for c in columns if isinstance(c.type, SQLType)},
|
|
97
96
|
)
|
|
98
97
|
records = (adjust_outputs(warehouse, record, col_types) for record in to_insert)
|
|
99
|
-
|
|
100
|
-
warehouse.insert_rows(table, chunk)
|
|
98
|
+
warehouse.insert_rows(table, records, batch_size=READ_RECORDS_BATCH_SIZE)
|
|
101
99
|
warehouse.insert_rows_done(table)
|
|
102
100
|
return read_dataset(name=dsr.full_name, session=session, settings=settings)
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
from typing import Any, Optional, Union
|
|
2
|
+
|
|
3
|
+
from datachain.lib.utils import DataChainParamsError
|
|
4
|
+
|
|
5
|
+
DEFAULT_CACHE = False
|
|
6
|
+
DEFAULT_PREFETCH = 2
|
|
7
|
+
DEFAULT_BATCH_SIZE = 2_000
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class SettingsError(DataChainParamsError):
|
|
11
|
+
def __init__(self, msg: str) -> None:
|
|
12
|
+
super().__init__(f"Dataset settings error: {msg}")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Settings:
|
|
16
|
+
"""Settings for datachain."""
|
|
17
|
+
|
|
18
|
+
_cache: Optional[bool]
|
|
19
|
+
_prefetch: Optional[int]
|
|
20
|
+
_parallel: Optional[Union[bool, int]]
|
|
21
|
+
_workers: Optional[int]
|
|
22
|
+
_namespace: Optional[str]
|
|
23
|
+
_project: Optional[str]
|
|
24
|
+
_min_task_size: Optional[int]
|
|
25
|
+
_batch_size: Optional[int]
|
|
26
|
+
|
|
27
|
+
def __init__( # noqa: C901, PLR0912
|
|
28
|
+
self,
|
|
29
|
+
cache: Optional[bool] = None,
|
|
30
|
+
prefetch: Optional[Union[bool, int]] = None,
|
|
31
|
+
parallel: Optional[Union[bool, int]] = None,
|
|
32
|
+
workers: Optional[int] = None,
|
|
33
|
+
namespace: Optional[str] = None,
|
|
34
|
+
project: Optional[str] = None,
|
|
35
|
+
min_task_size: Optional[int] = None,
|
|
36
|
+
batch_size: Optional[int] = None,
|
|
37
|
+
) -> None:
|
|
38
|
+
if cache is None:
|
|
39
|
+
self._cache = None
|
|
40
|
+
else:
|
|
41
|
+
if not isinstance(cache, bool):
|
|
42
|
+
raise SettingsError(
|
|
43
|
+
"'cache' argument must be bool"
|
|
44
|
+
f" while {cache.__class__.__name__} was given"
|
|
45
|
+
)
|
|
46
|
+
self._cache = cache
|
|
47
|
+
|
|
48
|
+
if prefetch is None or prefetch is True:
|
|
49
|
+
self._prefetch = None
|
|
50
|
+
elif prefetch is False:
|
|
51
|
+
self._prefetch = 0 # disable prefetch (False == 0)
|
|
52
|
+
else:
|
|
53
|
+
if not isinstance(prefetch, int):
|
|
54
|
+
raise SettingsError(
|
|
55
|
+
"'prefetch' argument must be int or bool"
|
|
56
|
+
f" while {prefetch.__class__.__name__} was given"
|
|
57
|
+
)
|
|
58
|
+
if prefetch < 0:
|
|
59
|
+
raise SettingsError(
|
|
60
|
+
"'prefetch' argument must be non-negative integer"
|
|
61
|
+
f", {prefetch} was given"
|
|
62
|
+
)
|
|
63
|
+
self._prefetch = prefetch
|
|
64
|
+
|
|
65
|
+
if parallel is None or parallel is False:
|
|
66
|
+
self._parallel = None
|
|
67
|
+
elif parallel is True:
|
|
68
|
+
self._parallel = True
|
|
69
|
+
else:
|
|
70
|
+
if not isinstance(parallel, int):
|
|
71
|
+
raise SettingsError(
|
|
72
|
+
"'parallel' argument must be int or bool"
|
|
73
|
+
f" while {parallel.__class__.__name__} was given"
|
|
74
|
+
)
|
|
75
|
+
if parallel <= 0:
|
|
76
|
+
raise SettingsError(
|
|
77
|
+
"'parallel' argument must be positive integer"
|
|
78
|
+
f", {parallel} was given"
|
|
79
|
+
)
|
|
80
|
+
self._parallel = parallel
|
|
81
|
+
|
|
82
|
+
if workers is None:
|
|
83
|
+
self._workers = None
|
|
84
|
+
else:
|
|
85
|
+
if not isinstance(workers, int) or isinstance(workers, bool):
|
|
86
|
+
raise SettingsError(
|
|
87
|
+
"'workers' argument must be int"
|
|
88
|
+
f" while {workers.__class__.__name__} was given"
|
|
89
|
+
)
|
|
90
|
+
if workers <= 0:
|
|
91
|
+
raise SettingsError(
|
|
92
|
+
f"'workers' argument must be positive integer, {workers} was given"
|
|
93
|
+
)
|
|
94
|
+
self._workers = workers
|
|
95
|
+
|
|
96
|
+
if namespace is None:
|
|
97
|
+
self._namespace = None
|
|
98
|
+
else:
|
|
99
|
+
if not isinstance(namespace, str):
|
|
100
|
+
raise SettingsError(
|
|
101
|
+
"'namespace' argument must be str"
|
|
102
|
+
f", {namespace.__class__.__name__} was given"
|
|
103
|
+
)
|
|
104
|
+
self._namespace = namespace
|
|
105
|
+
|
|
106
|
+
if project is None:
|
|
107
|
+
self._project = None
|
|
108
|
+
else:
|
|
109
|
+
if not isinstance(project, str):
|
|
110
|
+
raise SettingsError(
|
|
111
|
+
"'project' argument must be str"
|
|
112
|
+
f", {project.__class__.__name__} was given"
|
|
113
|
+
)
|
|
114
|
+
self._project = project
|
|
115
|
+
|
|
116
|
+
if min_task_size is None:
|
|
117
|
+
self._min_task_size = None
|
|
118
|
+
else:
|
|
119
|
+
if not isinstance(min_task_size, int) or isinstance(min_task_size, bool):
|
|
120
|
+
raise SettingsError(
|
|
121
|
+
"'min_task_size' argument must be int"
|
|
122
|
+
f", {min_task_size.__class__.__name__} was given"
|
|
123
|
+
)
|
|
124
|
+
if min_task_size <= 0:
|
|
125
|
+
raise SettingsError(
|
|
126
|
+
"'min_task_size' argument must be positive integer"
|
|
127
|
+
f", {min_task_size} was given"
|
|
128
|
+
)
|
|
129
|
+
self._min_task_size = min_task_size
|
|
130
|
+
|
|
131
|
+
if batch_size is None:
|
|
132
|
+
self._batch_size = None
|
|
133
|
+
else:
|
|
134
|
+
if not isinstance(batch_size, int) or isinstance(batch_size, bool):
|
|
135
|
+
raise SettingsError(
|
|
136
|
+
"'batch_size' argument must be int"
|
|
137
|
+
f", {batch_size.__class__.__name__} was given"
|
|
138
|
+
)
|
|
139
|
+
if batch_size <= 0:
|
|
140
|
+
raise SettingsError(
|
|
141
|
+
"'batch_size' argument must be positive integer"
|
|
142
|
+
f", {batch_size} was given"
|
|
143
|
+
)
|
|
144
|
+
self._batch_size = batch_size
|
|
145
|
+
|
|
146
|
+
@property
|
|
147
|
+
def cache(self) -> bool:
|
|
148
|
+
return self._cache if self._cache is not None else DEFAULT_CACHE
|
|
149
|
+
|
|
150
|
+
@property
|
|
151
|
+
def prefetch(self) -> Optional[int]:
|
|
152
|
+
return self._prefetch if self._prefetch is not None else DEFAULT_PREFETCH
|
|
153
|
+
|
|
154
|
+
@property
|
|
155
|
+
def parallel(self) -> Optional[Union[bool, int]]:
|
|
156
|
+
return self._parallel if self._parallel is not None else None
|
|
157
|
+
|
|
158
|
+
@property
|
|
159
|
+
def workers(self) -> Optional[int]:
|
|
160
|
+
return self._workers if self._workers is not None else None
|
|
161
|
+
|
|
162
|
+
@property
|
|
163
|
+
def namespace(self) -> Optional[str]:
|
|
164
|
+
return self._namespace if self._namespace is not None else None
|
|
165
|
+
|
|
166
|
+
@property
|
|
167
|
+
def project(self) -> Optional[str]:
|
|
168
|
+
return self._project if self._project is not None else None
|
|
169
|
+
|
|
170
|
+
@property
|
|
171
|
+
def min_task_size(self) -> Optional[int]:
|
|
172
|
+
return self._min_task_size if self._min_task_size is not None else None
|
|
173
|
+
|
|
174
|
+
@property
|
|
175
|
+
def batch_size(self) -> int:
|
|
176
|
+
return self._batch_size if self._batch_size is not None else DEFAULT_BATCH_SIZE
|
|
177
|
+
|
|
178
|
+
def to_dict(self) -> dict[str, Any]:
|
|
179
|
+
res: dict[str, Any] = {}
|
|
180
|
+
if self._cache is not None:
|
|
181
|
+
res["cache"] = self.cache
|
|
182
|
+
if self._prefetch is not None:
|
|
183
|
+
res["prefetch"] = self.prefetch
|
|
184
|
+
if self._parallel is not None:
|
|
185
|
+
res["parallel"] = self.parallel
|
|
186
|
+
if self._workers is not None:
|
|
187
|
+
res["workers"] = self.workers
|
|
188
|
+
if self._min_task_size is not None:
|
|
189
|
+
res["min_task_size"] = self.min_task_size
|
|
190
|
+
if self._namespace is not None:
|
|
191
|
+
res["namespace"] = self.namespace
|
|
192
|
+
if self._project is not None:
|
|
193
|
+
res["project"] = self.project
|
|
194
|
+
if self._batch_size is not None:
|
|
195
|
+
res["batch_size"] = self.batch_size
|
|
196
|
+
return res
|
|
197
|
+
|
|
198
|
+
def add(self, settings: "Settings") -> None:
|
|
199
|
+
if settings._cache is not None:
|
|
200
|
+
self._cache = settings._cache
|
|
201
|
+
if settings._prefetch is not None:
|
|
202
|
+
self._prefetch = settings._prefetch
|
|
203
|
+
if settings._parallel is not None:
|
|
204
|
+
self._parallel = settings._parallel
|
|
205
|
+
if settings._workers is not None:
|
|
206
|
+
self._workers = settings._workers
|
|
207
|
+
if settings._namespace is not None:
|
|
208
|
+
self._namespace = settings._namespace
|
|
209
|
+
if settings._project is not None:
|
|
210
|
+
self._project = settings._project
|
|
211
|
+
if settings._min_task_size is not None:
|
|
212
|
+
self._min_task_size = settings._min_task_size
|
|
213
|
+
if settings._batch_size is not None:
|
|
214
|
+
self._batch_size = settings._batch_size
|
|
@@ -54,23 +54,11 @@ UDFOutputSpec = Mapping[str, ColumnType]
|
|
|
54
54
|
UDFResult = dict[str, Any]
|
|
55
55
|
|
|
56
56
|
|
|
57
|
-
@attrs.define
|
|
58
|
-
class UDFProperties:
|
|
59
|
-
udf: "UDFAdapter"
|
|
60
|
-
|
|
61
|
-
def get_batching(self, use_partitioning: bool = False) -> BatchingStrategy:
|
|
62
|
-
return self.udf.get_batching(use_partitioning)
|
|
63
|
-
|
|
64
|
-
@property
|
|
65
|
-
def batch_rows(self):
|
|
66
|
-
return self.udf.batch_rows
|
|
67
|
-
|
|
68
|
-
|
|
69
57
|
@attrs.define(slots=False)
|
|
70
58
|
class UDFAdapter:
|
|
71
59
|
inner: "UDFBase"
|
|
72
60
|
output: UDFOutputSpec
|
|
73
|
-
|
|
61
|
+
batch_size: Optional[int] = None
|
|
74
62
|
batch: int = 1
|
|
75
63
|
|
|
76
64
|
def get_batching(self, use_partitioning: bool = False) -> BatchingStrategy:
|
|
@@ -83,11 +71,6 @@ class UDFAdapter:
|
|
|
83
71
|
return Batch(self.batch)
|
|
84
72
|
raise ValueError(f"invalid batch size {self.batch}")
|
|
85
73
|
|
|
86
|
-
@property
|
|
87
|
-
def properties(self):
|
|
88
|
-
# For backwards compatibility.
|
|
89
|
-
return UDFProperties(self)
|
|
90
|
-
|
|
91
74
|
def run(
|
|
92
75
|
self,
|
|
93
76
|
udf_fields: "Sequence[str]",
|
|
@@ -237,13 +220,13 @@ class UDFBase(AbstractUDF):
|
|
|
237
220
|
|
|
238
221
|
def to_udf_wrapper(
|
|
239
222
|
self,
|
|
240
|
-
|
|
223
|
+
batch_size: Optional[int] = None,
|
|
241
224
|
batch: int = 1,
|
|
242
225
|
) -> UDFAdapter:
|
|
243
226
|
return UDFAdapter(
|
|
244
227
|
self,
|
|
245
228
|
self.output.to_udf_spec(),
|
|
246
|
-
|
|
229
|
+
batch_size,
|
|
247
230
|
batch,
|
|
248
231
|
)
|
|
249
232
|
|
|
@@ -81,8 +81,8 @@ class Batch(BatchingStrategy):
|
|
|
81
81
|
# select rows in batches
|
|
82
82
|
results = []
|
|
83
83
|
|
|
84
|
-
with contextlib.closing(execute(query, page_size=page_size)) as
|
|
85
|
-
for row in
|
|
84
|
+
with contextlib.closing(execute(query, page_size=page_size)) as rows:
|
|
85
|
+
for row in rows:
|
|
86
86
|
results.append(row)
|
|
87
87
|
if len(results) >= self.count:
|
|
88
88
|
batch, results = results[: self.count], results[self.count :]
|