datachain 0.3.0__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- {datachain-0.3.0 → datachain-0.3.1}/.github/workflows/tests.yml +20 -1
- {datachain-0.3.0/src/datachain.egg-info → datachain-0.3.1}/PKG-INFO +1 -1
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/catalog/catalog.py +1 -1
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/client/fsspec.py +1 -4
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/client/local.py +2 -7
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/data_storage/warehouse.py +8 -14
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/lib/dc.py +1 -1
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/lib/udf.py +21 -14
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/query/batch.py +45 -41
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/query/dataset.py +13 -6
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/query/dispatch.py +53 -68
- datachain-0.3.1/src/datachain/query/queue.py +120 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/query/udf.py +23 -8
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/utils.py +17 -2
- {datachain-0.3.0 → datachain-0.3.1/src/datachain.egg-info}/PKG-INFO +1 -1
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain.egg-info/SOURCES.txt +1 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/conftest.py +2 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/func/test_catalog.py +1 -1
- {datachain-0.3.0 → datachain-0.3.1}/tests/func/test_pull.py +7 -7
- {datachain-0.3.0 → datachain-0.3.1}/tests/test_query_e2e.py +1 -1
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/lib/test_datachain.py +3 -3
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/test_client.py +1 -1
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/test_database_engine.py +2 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/test_id_generator.py +2 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/test_listing.py +1 -1
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/test_storage.py +1 -1
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/test_udf.py +2 -2
- {datachain-0.3.0 → datachain-0.3.1}/tests/utils.py +5 -3
- {datachain-0.3.0 → datachain-0.3.1}/.cruft.json +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/.gitattributes +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/.github/ISSUE_TEMPLATE/empty_issue.md +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/.github/codecov.yaml +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/.github/dependabot.yml +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/.github/workflows/benchmarks.yml +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/.github/workflows/release.yml +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/.github/workflows/update-template.yaml +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/.gitignore +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/.pre-commit-config.yaml +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/CODE_OF_CONDUCT.rst +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/CONTRIBUTING.rst +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/LICENSE +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/README.rst +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/docs/assets/captioned_cartoons.png +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/docs/assets/datachain.png +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/docs/assets/flowchart.png +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/docs/index.md +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/docs/references/datachain.md +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/docs/references/datatype.md +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/docs/references/file.md +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/docs/references/index.md +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/docs/references/sql.md +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/docs/references/torch.md +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/docs/references/udf.md +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/computer_vision/blip2_image_desc_lib.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/computer_vision/fashion_product_images/.gitignore +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/computer_vision/fashion_product_images/1-quick-start.ipynb +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/computer_vision/fashion_product_images/2-working-with-image-datachains.ipynb +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/computer_vision/fashion_product_images/3-train-model.ipynb +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/computer_vision/fashion_product_images/4-inference.ipynb +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/computer_vision/fashion_product_images/README.md +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/computer_vision/fashion_product_images/requirements.txt +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/computer_vision/fashion_product_images/scripts/1-quick-start.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/computer_vision/fashion_product_images/scripts/2-basic-operations.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/computer_vision/fashion_product_images/scripts/2-embeddings.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/computer_vision/fashion_product_images/scripts/3-split-train-test.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/computer_vision/fashion_product_images/scripts/3-train-model.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/computer_vision/fashion_product_images/src/clustering.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/computer_vision/fashion_product_images/src/train.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/computer_vision/fashion_product_images/static/images/basic-operations.png +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/computer_vision/fashion_product_images/static/images/core-concepts.png +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/computer_vision/fashion_product_images/static/images/datachain-logo.png +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/computer_vision/fashion_product_images/static/images/datachain-overview.png +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/computer_vision/fashion_product_images/static/images/dataset-1.png +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/computer_vision/fashion_product_images/static/images/dataset-2.png +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/computer_vision/fashion_product_images/static/images/dataset-3.png +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/computer_vision/fashion_product_images/static/images/studio.png +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/computer_vision/iptc_exif_xmp_lib.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/computer_vision/llava2_image_desc_lib.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/computer_vision/openimage-detect.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/get_started/common_sql_functions.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/get_started/json-csv-reader.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/get_started/json-metadata-tutorial.ipynb +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/get_started/torch-loader.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/get_started/udfs/parallel.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/get_started/udfs/simple.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/get_started/udfs/stateful.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/llm/llm_chatbot_evaluation.ipynb +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/llm_and_nlp/llm-claude-aggregate-query.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/llm_and_nlp/llm-claude-simple-query.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/llm_and_nlp/llm-claude.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/llm_and_nlp/unstructured-text.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/multimodal/clip_fine_tuning.ipynb +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/multimodal/clip_inference.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/multimodal/hf_pipeline.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/multimodal/openai_image_desc_lib.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/multimodal/wds.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/examples/multimodal/wds_filtered.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/mkdocs.yml +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/noxfile.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/pyproject.toml +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/setup.cfg +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/__init__.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/__main__.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/asyn.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/cache.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/catalog/__init__.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/catalog/datasource.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/catalog/loader.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/catalog/subclass.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/cli.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/cli_utils.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/client/__init__.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/client/azure.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/client/fileslice.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/client/gcs.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/client/s3.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/config.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/data_storage/__init__.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/data_storage/db_engine.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/data_storage/id_generator.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/data_storage/job.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/data_storage/metastore.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/data_storage/schema.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/data_storage/serializer.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/data_storage/sqlite.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/dataset.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/error.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/job.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/lib/__init__.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/lib/arrow.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/lib/clip.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/lib/convert/__init__.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/lib/convert/flatten.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/lib/convert/python_to_sql.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/lib/convert/sql_to_python.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/lib/convert/unflatten.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/lib/convert/values_to_tuples.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/lib/data_model.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/lib/dataset_info.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/lib/file.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/lib/image.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/lib/meta_formats.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/lib/model_store.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/lib/pytorch.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/lib/settings.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/lib/signal_schema.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/lib/text.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/lib/udf_signature.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/lib/utils.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/lib/vfile.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/lib/webdataset.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/lib/webdataset_laion.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/listing.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/node.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/nodes_fetcher.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/nodes_thread_pool.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/progress.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/py.typed +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/query/__init__.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/query/builtins.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/query/metrics.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/query/params.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/query/schema.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/query/session.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/remote/__init__.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/remote/studio.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/sql/__init__.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/sql/default/__init__.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/sql/default/base.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/sql/functions/__init__.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/sql/functions/array.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/sql/functions/conditional.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/sql/functions/path.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/sql/functions/random.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/sql/functions/string.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/sql/selectable.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/sql/sqlite/__init__.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/sql/sqlite/base.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/sql/sqlite/types.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/sql/sqlite/vector.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/sql/types.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/sql/utils.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/storage.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain/torch/__init__.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain.egg-info/dependency_links.txt +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain.egg-info/entry_points.txt +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain.egg-info/requires.txt +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/src/datachain.egg-info/top_level.txt +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/__init__.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/benchmarks/__init__.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/benchmarks/conftest.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/benchmarks/test_ls.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/benchmarks/test_version.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/data.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/examples/__init__.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/examples/test_wds_e2e.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/examples/wds_data.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/func/__init__.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/func/test_client.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/func/test_datachain.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/func/test_dataset_query.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/func/test_datasets.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/func/test_feature_pickling.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/func/test_ls.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/func/test_pytorch.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/func/test_query.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/scripts/feature_class.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/scripts/feature_class_parallel.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/scripts/feature_class_parallel_data_model.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/scripts/name_len_slow.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/test_cli_e2e.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/__init__.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/lib/__init__.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/lib/conftest.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/lib/test_arrow.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/lib/test_clip.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/lib/test_datachain_bootstrap.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/lib/test_datachain_merge.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/lib/test_feature.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/lib/test_feature_utils.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/lib/test_file.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/lib/test_image.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/lib/test_signal_schema.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/lib/test_sql_to_python.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/lib/test_text.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/lib/test_udf_signature.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/lib/test_utils.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/lib/test_webdataset.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/sql/__init__.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/sql/sqlite/__init__.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/sql/sqlite/test_utils.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/sql/test_array.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/sql/test_conditional.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/sql/test_path.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/sql/test_random.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/sql/test_selectable.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/sql/test_string.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/test_asyn.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/test_cache.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/test_catalog.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/test_catalog_loader.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/test_cli_parsing.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/test_client_s3.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/test_data_storage.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/test_dataset.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/test_dispatch.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/test_fileslice.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/test_metastore.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/test_module_exports.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/test_query_metrics.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/test_query_params.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/test_serializer.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/test_session.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/test_utils.py +0 -0
- {datachain-0.3.0 → datachain-0.3.1}/tests/unit/test_warehouse.py +0 -0
|
@@ -8,6 +8,7 @@ on:
|
|
|
8
8
|
|
|
9
9
|
env:
|
|
10
10
|
FORCE_COLOR: "1"
|
|
11
|
+
BRANCH: ${{ github.head_ref || github.ref_name }}
|
|
11
12
|
|
|
12
13
|
concurrency:
|
|
13
14
|
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
|
@@ -144,12 +145,30 @@ jobs:
|
|
|
144
145
|
- 6379:6379
|
|
145
146
|
steps:
|
|
146
147
|
|
|
148
|
+
- name: Studio branch name
|
|
149
|
+
env:
|
|
150
|
+
BRANCH: ${{ env.BRANCH }}
|
|
151
|
+
STUDIO_READ_ACCESS_TOKEN: ${{ secrets.ITERATIVE_STUDIO_READ_ACCESS_TOKEN }}
|
|
152
|
+
run: |
|
|
153
|
+
echo "DataChain branch: $BRANCH"
|
|
154
|
+
if [[ "$BRANCH" == "main" ]]
|
|
155
|
+
then
|
|
156
|
+
STUDIO_BRANCH=develop
|
|
157
|
+
elif git ls-remote --heads https://"$STUDIO_READ_ACCESS_TOKEN"@github.com/iterative/studio.git "$BRANCH" | grep -F "$BRANCH" 2>&1>/dev/null
|
|
158
|
+
then
|
|
159
|
+
STUDIO_BRANCH="$BRANCH"
|
|
160
|
+
else
|
|
161
|
+
STUDIO_BRANCH=develop
|
|
162
|
+
fi
|
|
163
|
+
echo "STUDIO_BRANCH=$STUDIO_BRANCH" >> $GITHUB_ENV
|
|
164
|
+
echo "Studio branch: $STUDIO_BRANCH"
|
|
165
|
+
|
|
147
166
|
- name: Check out Studio
|
|
148
167
|
uses: actions/checkout@v4
|
|
149
168
|
with:
|
|
150
169
|
fetch-depth: 0
|
|
151
170
|
repository: iterative/studio
|
|
152
|
-
ref:
|
|
171
|
+
ref: ${{ env.STUDIO_BRANCH }}
|
|
153
172
|
token: ${{ secrets.ITERATIVE_STUDIO_READ_ACCESS_TOKEN }}
|
|
154
173
|
|
|
155
174
|
- name: Check out repository
|
|
@@ -676,7 +676,7 @@ class Catalog:
|
|
|
676
676
|
|
|
677
677
|
def parse_url(self, uri: str, **config: Any) -> tuple[Client, str]:
|
|
678
678
|
config = config or self.client_config
|
|
679
|
-
return Client.parse_url(uri, self.
|
|
679
|
+
return Client.parse_url(uri, self.cache, **config)
|
|
680
680
|
|
|
681
681
|
def get_client(self, uri: StorageURI, **config: Any) -> Client:
|
|
682
682
|
"""
|
|
@@ -37,7 +37,6 @@ from datachain.storage import StorageURI
|
|
|
37
37
|
if TYPE_CHECKING:
|
|
38
38
|
from fsspec.spec import AbstractFileSystem
|
|
39
39
|
|
|
40
|
-
from datachain.data_storage import AbstractMetastore
|
|
41
40
|
|
|
42
41
|
logger = logging.getLogger("datachain")
|
|
43
42
|
|
|
@@ -116,13 +115,12 @@ class Client(ABC):
|
|
|
116
115
|
@staticmethod
|
|
117
116
|
def parse_url(
|
|
118
117
|
source: str,
|
|
119
|
-
metastore: "AbstractMetastore",
|
|
120
118
|
cache: DataChainCache,
|
|
121
119
|
**kwargs,
|
|
122
120
|
) -> tuple["Client", str]:
|
|
123
121
|
cls = Client.get_implementation(source)
|
|
124
122
|
storage_url, rel_path = cls.split_url(source)
|
|
125
|
-
client = cls.from_name(storage_url,
|
|
123
|
+
client = cls.from_name(storage_url, cache, kwargs)
|
|
126
124
|
return client, rel_path
|
|
127
125
|
|
|
128
126
|
@classmethod
|
|
@@ -136,7 +134,6 @@ class Client(ABC):
|
|
|
136
134
|
def from_name(
|
|
137
135
|
cls,
|
|
138
136
|
name: str,
|
|
139
|
-
metastore: "AbstractMetastore",
|
|
140
137
|
cache: DataChainCache,
|
|
141
138
|
kwargs: dict[str, Any],
|
|
142
139
|
) -> "Client":
|
|
@@ -2,7 +2,7 @@ import os
|
|
|
2
2
|
import posixpath
|
|
3
3
|
from datetime import datetime, timezone
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import
|
|
5
|
+
from typing import Any
|
|
6
6
|
from urllib.parse import urlparse
|
|
7
7
|
|
|
8
8
|
from fsspec.implementations.local import LocalFileSystem
|
|
@@ -12,9 +12,6 @@ from datachain.storage import StorageURI
|
|
|
12
12
|
|
|
13
13
|
from .fsspec import Client
|
|
14
14
|
|
|
15
|
-
if TYPE_CHECKING:
|
|
16
|
-
from datachain.data_storage import AbstractMetastore
|
|
17
|
-
|
|
18
15
|
|
|
19
16
|
class FileClient(Client):
|
|
20
17
|
FS_CLASS = LocalFileSystem
|
|
@@ -97,9 +94,7 @@ class FileClient(Client):
|
|
|
97
94
|
return cls.root_dir(), uri.removeprefix(cls.root_path().as_uri())
|
|
98
95
|
|
|
99
96
|
@classmethod
|
|
100
|
-
def from_name(
|
|
101
|
-
cls, name: str, metastore: "AbstractMetastore", cache, kwargs
|
|
102
|
-
) -> "FileClient":
|
|
97
|
+
def from_name(cls, name: str, cache, kwargs) -> "FileClient":
|
|
103
98
|
use_symlinks = kwargs.pop("use_symlinks", False)
|
|
104
99
|
return cls(name, kwargs, cache, use_symlinks=use_symlinks)
|
|
105
100
|
|
|
@@ -17,7 +17,7 @@ from sqlalchemy.sql.expression import true
|
|
|
17
17
|
|
|
18
18
|
from datachain.client import Client
|
|
19
19
|
from datachain.data_storage.serializer import Serializable
|
|
20
|
-
from datachain.dataset import DatasetRecord
|
|
20
|
+
from datachain.dataset import DatasetRecord
|
|
21
21
|
from datachain.node import DirType, DirTypeGroup, Entry, Node, NodeWithPath, get_path
|
|
22
22
|
from datachain.sql.functions import path as pathfunc
|
|
23
23
|
from datachain.sql.types import Int, SQLType
|
|
@@ -201,23 +201,17 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
201
201
|
def dataset_select_paginated(
|
|
202
202
|
self,
|
|
203
203
|
query,
|
|
204
|
-
limit: Optional[int] = None,
|
|
205
|
-
order_by: tuple["ColumnElement[Any]", ...] = (),
|
|
206
204
|
page_size: int = SELECT_BATCH_SIZE,
|
|
207
|
-
) -> Generator[
|
|
205
|
+
) -> Generator[Sequence, None, None]:
|
|
208
206
|
"""
|
|
209
207
|
This is equivalent to `db.execute`, but for selecting rows in batches
|
|
210
208
|
"""
|
|
211
|
-
|
|
212
|
-
|
|
209
|
+
limit = query._limit
|
|
210
|
+
paginated_query = query.limit(page_size)
|
|
213
211
|
|
|
214
|
-
if not
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
ordering = order_by # type: ignore[assignment]
|
|
218
|
-
|
|
219
|
-
# reset query order by and apply new order by id
|
|
220
|
-
paginated_query = query.order_by(None).order_by(*ordering).limit(page_size)
|
|
212
|
+
if not paginated_query._order_by_clauses:
|
|
213
|
+
# default order by is order by `sys__id`
|
|
214
|
+
paginated_query = paginated_query.order_by(query.selected_columns.sys__id)
|
|
221
215
|
|
|
222
216
|
results = None
|
|
223
217
|
offset = 0
|
|
@@ -236,7 +230,7 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
236
230
|
processed = False
|
|
237
231
|
for row in results:
|
|
238
232
|
processed = True
|
|
239
|
-
yield
|
|
233
|
+
yield row
|
|
240
234
|
num_yielded += 1
|
|
241
235
|
|
|
242
236
|
if not processed:
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import sys
|
|
2
2
|
import traceback
|
|
3
|
-
from collections.abc import Iterable, Iterator
|
|
4
3
|
from typing import TYPE_CHECKING, Callable, Optional
|
|
5
4
|
|
|
6
5
|
from fsspec.callbacks import DEFAULT_CALLBACK, Callback
|
|
@@ -14,16 +13,19 @@ from datachain.lib.model_store import ModelStore
|
|
|
14
13
|
from datachain.lib.signal_schema import SignalSchema
|
|
15
14
|
from datachain.lib.udf_signature import UdfSignature
|
|
16
15
|
from datachain.lib.utils import AbstractUDF, DataChainError, DataChainParamsError
|
|
17
|
-
from datachain.query.batch import
|
|
16
|
+
from datachain.query.batch import UDFInputBatch
|
|
18
17
|
from datachain.query.schema import ColumnParameter
|
|
19
18
|
from datachain.query.udf import UDFBase as _UDFBase
|
|
20
|
-
from datachain.query.udf import UDFProperties
|
|
19
|
+
from datachain.query.udf import UDFProperties
|
|
21
20
|
|
|
22
21
|
if TYPE_CHECKING:
|
|
22
|
+
from collections.abc import Iterable, Iterator, Sequence
|
|
23
|
+
|
|
23
24
|
from typing_extensions import Self
|
|
24
25
|
|
|
25
26
|
from datachain.catalog import Catalog
|
|
26
|
-
from datachain.query.batch import
|
|
27
|
+
from datachain.query.batch import RowsOutput, UDFInput
|
|
28
|
+
from datachain.query.udf import UDFResult
|
|
27
29
|
|
|
28
30
|
|
|
29
31
|
class UdfError(DataChainParamsError):
|
|
@@ -42,22 +44,27 @@ class UDFAdapter(_UDFBase):
|
|
|
42
44
|
|
|
43
45
|
def run(
|
|
44
46
|
self,
|
|
45
|
-
|
|
47
|
+
udf_fields: "Sequence[str]",
|
|
48
|
+
udf_inputs: "Iterable[RowsOutput]",
|
|
46
49
|
catalog: "Catalog",
|
|
47
50
|
is_generator: bool,
|
|
48
51
|
cache: bool,
|
|
49
52
|
download_cb: Callback = DEFAULT_CALLBACK,
|
|
50
53
|
processed_cb: Callback = DEFAULT_CALLBACK,
|
|
51
|
-
) -> Iterator[Iterable[
|
|
54
|
+
) -> "Iterator[Iterable[UDFResult]]":
|
|
52
55
|
self.inner._catalog = catalog
|
|
53
56
|
if hasattr(self.inner, "setup") and callable(self.inner.setup):
|
|
54
57
|
self.inner.setup()
|
|
55
58
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
59
|
+
yield from super().run(
|
|
60
|
+
udf_fields,
|
|
61
|
+
udf_inputs,
|
|
62
|
+
catalog,
|
|
63
|
+
is_generator,
|
|
64
|
+
cache,
|
|
65
|
+
download_cb,
|
|
66
|
+
processed_cb,
|
|
67
|
+
)
|
|
61
68
|
|
|
62
69
|
if hasattr(self.inner, "teardown") and callable(self.inner.teardown):
|
|
63
70
|
self.inner.teardown()
|
|
@@ -65,12 +72,12 @@ class UDFAdapter(_UDFBase):
|
|
|
65
72
|
def run_once(
|
|
66
73
|
self,
|
|
67
74
|
catalog: "Catalog",
|
|
68
|
-
arg: "
|
|
75
|
+
arg: "UDFInput",
|
|
69
76
|
is_generator: bool = False,
|
|
70
77
|
cache: bool = False,
|
|
71
78
|
cb: Callback = DEFAULT_CALLBACK,
|
|
72
|
-
) -> Iterable[UDFResult]:
|
|
73
|
-
if isinstance(arg,
|
|
79
|
+
) -> "Iterable[UDFResult]":
|
|
80
|
+
if isinstance(arg, UDFInputBatch):
|
|
74
81
|
udf_inputs = [
|
|
75
82
|
self.bind_parameters(catalog, row, cache=cache, cb=cb)
|
|
76
83
|
for row in arg.rows
|
|
@@ -5,21 +5,29 @@ from collections.abc import Generator, Sequence
|
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
from typing import TYPE_CHECKING, Callable, Optional, Union
|
|
7
7
|
|
|
8
|
-
import sqlalchemy as sa
|
|
9
|
-
|
|
10
8
|
from datachain.data_storage.schema import PARTITION_COLUMN_ID
|
|
11
9
|
from datachain.data_storage.warehouse import SELECT_BATCH_SIZE
|
|
12
10
|
|
|
13
11
|
if TYPE_CHECKING:
|
|
12
|
+
from sqlalchemy import Select
|
|
13
|
+
|
|
14
14
|
from datachain.dataset import RowDict
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
@dataclass
|
|
18
|
-
class
|
|
18
|
+
class RowsOutputBatch:
|
|
19
|
+
rows: Sequence[Sequence]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
RowsOutput = Union[Sequence, RowsOutputBatch]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class UDFInputBatch:
|
|
19
27
|
rows: Sequence["RowDict"]
|
|
20
28
|
|
|
21
29
|
|
|
22
|
-
|
|
30
|
+
UDFInput = Union["RowDict", UDFInputBatch]
|
|
23
31
|
|
|
24
32
|
|
|
25
33
|
class BatchingStrategy(ABC):
|
|
@@ -28,9 +36,9 @@ class BatchingStrategy(ABC):
|
|
|
28
36
|
@abstractmethod
|
|
29
37
|
def __call__(
|
|
30
38
|
self,
|
|
31
|
-
execute: Callable,
|
|
32
|
-
query:
|
|
33
|
-
) -> Generator[
|
|
39
|
+
execute: Callable[..., Generator[Sequence, None, None]],
|
|
40
|
+
query: "Select",
|
|
41
|
+
) -> Generator[RowsOutput, None, None]:
|
|
34
42
|
"""Apply the provided parameters to the UDF."""
|
|
35
43
|
|
|
36
44
|
|
|
@@ -42,10 +50,10 @@ class NoBatching(BatchingStrategy):
|
|
|
42
50
|
|
|
43
51
|
def __call__(
|
|
44
52
|
self,
|
|
45
|
-
execute: Callable,
|
|
46
|
-
query:
|
|
47
|
-
) -> Generator[
|
|
48
|
-
return execute(query
|
|
53
|
+
execute: Callable[..., Generator[Sequence, None, None]],
|
|
54
|
+
query: "Select",
|
|
55
|
+
) -> Generator[Sequence, None, None]:
|
|
56
|
+
return execute(query)
|
|
49
57
|
|
|
50
58
|
|
|
51
59
|
class Batch(BatchingStrategy):
|
|
@@ -59,31 +67,24 @@ class Batch(BatchingStrategy):
|
|
|
59
67
|
|
|
60
68
|
def __call__(
|
|
61
69
|
self,
|
|
62
|
-
execute: Callable,
|
|
63
|
-
query:
|
|
64
|
-
) -> Generator[
|
|
70
|
+
execute: Callable[..., Generator[Sequence, None, None]],
|
|
71
|
+
query: "Select",
|
|
72
|
+
) -> Generator[RowsOutputBatch, None, None]:
|
|
65
73
|
# choose page size that is a multiple of the batch size
|
|
66
74
|
page_size = math.ceil(SELECT_BATCH_SIZE / self.count) * self.count
|
|
67
75
|
|
|
68
76
|
# select rows in batches
|
|
69
|
-
results: list[
|
|
70
|
-
|
|
71
|
-
with contextlib.closing(
|
|
72
|
-
execute(
|
|
73
|
-
query,
|
|
74
|
-
page_size=page_size,
|
|
75
|
-
limit=query._limit,
|
|
76
|
-
order_by=query._order_by_clauses,
|
|
77
|
-
)
|
|
78
|
-
) as rows:
|
|
77
|
+
results: list[Sequence] = []
|
|
78
|
+
|
|
79
|
+
with contextlib.closing(execute(query, page_size=page_size)) as rows:
|
|
79
80
|
for row in rows:
|
|
80
81
|
results.append(row)
|
|
81
82
|
if len(results) >= self.count:
|
|
82
83
|
batch, results = results[: self.count], results[self.count :]
|
|
83
|
-
yield
|
|
84
|
+
yield RowsOutputBatch(batch)
|
|
84
85
|
|
|
85
86
|
if len(results) > 0:
|
|
86
|
-
yield
|
|
87
|
+
yield RowsOutputBatch(results)
|
|
87
88
|
|
|
88
89
|
|
|
89
90
|
class Partition(BatchingStrategy):
|
|
@@ -95,27 +96,30 @@ class Partition(BatchingStrategy):
|
|
|
95
96
|
|
|
96
97
|
def __call__(
|
|
97
98
|
self,
|
|
98
|
-
execute: Callable,
|
|
99
|
-
query:
|
|
100
|
-
) -> Generator[
|
|
99
|
+
execute: Callable[..., Generator[Sequence, None, None]],
|
|
100
|
+
query: "Select",
|
|
101
|
+
) -> Generator[RowsOutputBatch, None, None]:
|
|
101
102
|
current_partition: Optional[int] = None
|
|
102
|
-
batch: list[
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
103
|
+
batch: list[Sequence] = []
|
|
104
|
+
|
|
105
|
+
query_fields = [str(c.name) for c in query.selected_columns]
|
|
106
|
+
partition_column_idx = query_fields.index(PARTITION_COLUMN_ID)
|
|
107
|
+
|
|
108
|
+
ordered_query = query.order_by(None).order_by(
|
|
109
|
+
PARTITION_COLUMN_ID,
|
|
110
|
+
"sys__id",
|
|
111
|
+
*query._order_by_clauses,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
with contextlib.closing(execute(ordered_query)) as rows:
|
|
111
115
|
for row in rows:
|
|
112
|
-
partition = row[
|
|
116
|
+
partition = row[partition_column_idx]
|
|
113
117
|
if current_partition != partition:
|
|
114
118
|
current_partition = partition
|
|
115
119
|
if len(batch) > 0:
|
|
116
|
-
yield
|
|
120
|
+
yield RowsOutputBatch(batch)
|
|
117
121
|
batch = []
|
|
118
122
|
batch.append(row)
|
|
119
123
|
|
|
120
124
|
if len(batch) > 0:
|
|
121
|
-
yield
|
|
125
|
+
yield RowsOutputBatch(batch)
|
|
@@ -461,6 +461,8 @@ class UDFStep(Step, ABC):
|
|
|
461
461
|
|
|
462
462
|
processes = determine_processes(self.parallel)
|
|
463
463
|
|
|
464
|
+
udf_fields = [str(c.name) for c in query.selected_columns]
|
|
465
|
+
|
|
464
466
|
try:
|
|
465
467
|
if workers:
|
|
466
468
|
from datachain.catalog.loader import get_distributed_class
|
|
@@ -473,6 +475,7 @@ class UDFStep(Step, ABC):
|
|
|
473
475
|
query,
|
|
474
476
|
workers,
|
|
475
477
|
processes,
|
|
478
|
+
udf_fields=udf_fields,
|
|
476
479
|
is_generator=self.is_generator,
|
|
477
480
|
use_partitioning=use_partitioning,
|
|
478
481
|
cache=self.cache,
|
|
@@ -489,6 +492,7 @@ class UDFStep(Step, ABC):
|
|
|
489
492
|
"warehouse_clone_params": self.catalog.warehouse.clone_params(),
|
|
490
493
|
"table": udf_table,
|
|
491
494
|
"query": query,
|
|
495
|
+
"udf_fields": udf_fields,
|
|
492
496
|
"batching": batching,
|
|
493
497
|
"processes": processes,
|
|
494
498
|
"is_generator": self.is_generator,
|
|
@@ -528,6 +532,7 @@ class UDFStep(Step, ABC):
|
|
|
528
532
|
generated_cb = get_generated_callback(self.is_generator)
|
|
529
533
|
try:
|
|
530
534
|
udf_results = udf.run(
|
|
535
|
+
udf_fields,
|
|
531
536
|
udf_inputs,
|
|
532
537
|
self.catalog,
|
|
533
538
|
self.is_generator,
|
|
@@ -1244,21 +1249,23 @@ class DatasetQuery:
|
|
|
1244
1249
|
actual_params = [normalize_param(p) for p in params]
|
|
1245
1250
|
try:
|
|
1246
1251
|
query = self.apply_steps().select()
|
|
1252
|
+
query_fields = [str(c.name) for c in query.selected_columns]
|
|
1247
1253
|
|
|
1248
|
-
def row_iter() -> Generator[
|
|
1254
|
+
def row_iter() -> Generator[Sequence, None, None]:
|
|
1249
1255
|
# warehouse isn't threadsafe, we need to clone() it
|
|
1250
1256
|
# in the thread that uses the results
|
|
1251
1257
|
with self.catalog.warehouse.clone() as warehouse:
|
|
1252
|
-
gen = warehouse.dataset_select_paginated(
|
|
1253
|
-
query, limit=query._limit, order_by=query._order_by_clauses
|
|
1254
|
-
)
|
|
1258
|
+
gen = warehouse.dataset_select_paginated(query)
|
|
1255
1259
|
with contextlib.closing(gen) as rows:
|
|
1256
1260
|
yield from rows
|
|
1257
1261
|
|
|
1258
|
-
async def get_params(row:
|
|
1262
|
+
async def get_params(row: Sequence) -> tuple:
|
|
1263
|
+
row_dict = RowDict(zip(query_fields, row))
|
|
1259
1264
|
return tuple(
|
|
1260
1265
|
[
|
|
1261
|
-
await p.get_value_async(
|
|
1266
|
+
await p.get_value_async(
|
|
1267
|
+
self.catalog, row_dict, mapper, **kwargs
|
|
1268
|
+
)
|
|
1262
1269
|
for p in actual_params
|
|
1263
1270
|
]
|
|
1264
1271
|
)
|