pipefunc 0.92.0__tar.gz → 0.93.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pipefunc-0.92.0 → pipefunc-0.93.0}/PKG-INFO +1 -1
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/function-io.md +37 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/faq.md +4 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_pipefunc.py +17 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_pipeline/_base.py +1 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_utils.py +46 -2
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_version.py +1 -1
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/cache.py +4 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_run.py +27 -1
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_storage_array/_file.py +11 -3
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/typing.py +16 -1
- pipefunc-0.93.0/tests/test_polars_parquet.py +234 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/.devcontainer/Dockerfile +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/.devcontainer/devcontainer.json +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/.gitattributes +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/doc-string-check.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/generate-release-notes.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/pr-title-checker-config.json +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/release-drafter.yml +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/renovate.json +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/update-environment.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/workflows/codeql.yml +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/workflows/codspeed.yml +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/workflows/doc-string-check.yml +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/workflows/pr-title-checker.yml +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/workflows/pytest-micromamba.yml +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/workflows/pytest-uv.yml +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/workflows/pythonpublish.yml +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/workflows/release-drafter.yaml +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/workflows/toc.yaml +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/workflows/update-environment.yaml +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/.gitignore +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/.pre-commit-config.yaml +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/.readthedocs.yml +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/.vscode/launch.json +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/.vscode/settings.json +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/AUTHORS.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/CHANGELOG.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/LICENSE +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/MANIFEST.in +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/README.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/.gitignore +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/Makefile +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/environment-sphinx.yml +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/_static/custom.css +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/changelog.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/adaptive-integration.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/caching.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/chaining.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/cli.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/error-handling.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/execution-and-parallelism.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/index.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/map_async_in_script.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/mapspec.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/mcp.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/mcp_agent.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/mcp_server.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/overhead-and-efficiency.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/parameter-scopes.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/parameter-sweeps.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/resource-management.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/run-status.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/simplifying-pipelines.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/slurm.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/testing.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/type-checking.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/variants.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/conf.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/examples/basic-usage.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/examples/image-processing.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/examples/index.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/examples/nlp-text-summarization.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/examples/physics-simulation.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/examples/sensor-data-processing.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/examples/weather-simulation.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/index.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/index.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/pipefunc.cache.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/pipefunc.helpers.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/pipefunc.lazy.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/pipefunc.map.adaptive.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/pipefunc.map.adaptive_scheduler.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/pipefunc.map.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/pipefunc.map.xarray.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/pipefunc.mcp.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/pipefunc.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/pipefunc.resources.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/pipefunc.sweep.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/pipefunc.testing.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/pipefunc.typing.md +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/environment.yml +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/example.ipynb +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/get-notebooks.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/noxfile.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/__init__.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_adaptive.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_error_handling.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_pipefunc_utils.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_pipeline/__init__.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_pipeline/_autodoc.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_pipeline/_cache.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_pipeline/_cli.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_pipeline/_mapspec.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_pipeline/_pydantic.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_pipeline/_simplify.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_pipeline/_types.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_pipeline/_validation.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_plotting.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_plotting_utils.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_profile.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_run_status.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_run_status_cli.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_run_status_heartbeat.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_variant_pipeline.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_widgets/__init__.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_widgets/async_status_widget.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_widgets/helpers.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_widgets/output_tabs.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_widgets/progress_base.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_widgets/progress_headless.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_widgets/progress_ipywidgets.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_widgets/progress_rich.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/exceptions.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/helpers.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/lazy.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/LICENSE +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/__init__.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_adaptive_scheduler_slurm_executor.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_load.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_mapspec.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_prepare.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_progress.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_result.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_run_eager.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_run_eager_async.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_run_info.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_shapes.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_storage_array/__init__.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_storage_array/_base.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_storage_array/_dict.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_storage_array/_zarr.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_types.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/adaptive.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/adaptive_scheduler.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/xarray.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/mcp.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/py.typed +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/resources.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/sweep.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/testing.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/pyproject.toml +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/__init__.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/conftest.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/helpers.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/integration/__init__.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/integration/map/__init__.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/integration/map/test_error_handling.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/integration/map/test_error_handling_basic.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/integration/map/test_error_handling_parallel_regressions.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/integration/map/test_error_handling_potential_issues.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/integration/map/test_propagation_reason_selection.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/integration/map/test_regressions_propagated_root_causes.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/integration/map/test_run_info_backward_compat.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/integration/storage/__init__.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/integration/storage/test_error_handling_storage.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/integration/test_error_handling_scan_inputs.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/__init__.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/storage/__init__.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/storage/test_all_storage.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/storage/test_file.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/storage/test_zarr.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_adaptive.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_adaptive_scheduler.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_adaptive_slurm_executor.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_async_map_block.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_cache_result_dict.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_eager_scheduler.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_fixed_indices.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_map.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_map_chunking.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_mapspec.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_regular_regressions.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_result.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_resume_validation.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_run_dynamic_internal_shape.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_sweeps.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_xarray.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/regressions/__init__.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/regressions/test_cache_reraise_single.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/regressions/test_regressions_cache_error_handling.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_adaptive_run.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_async_status_widget.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_autodoc.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_benchmark.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_cache.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_cache_memoize.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_cache_to_hashable.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_cli.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_helpers.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_helpers_chain.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_lazy.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_mcp.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_namedtuple_output.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_nested_pipefunc.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_output_tabs_widget.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_perf.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_pipefunc.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_pipefunc_annotations.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_pipefunc_signature.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_pipeline.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_pipeline_annotations.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_pipeline_cache.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_pipeline_lazy.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_pipeline_mapspec.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_pipeline_post_execution_hook.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_pipeline_resources.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_pipeline_update.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_plotting.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_plotting_utils.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_pydantic.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_resources.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_run_status_cli.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_simplify.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_sweep.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_testing.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_typing.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_typing_future_annotations.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_utils.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_variant_pipeline.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_variant_pipeline_widgets.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_widget_progress_tracker_ipywidgets.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_widget_progress_tracker_rich.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/unit/__init__.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/unit/error_handling/__init__.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/unit/error_handling/test_error_info_and_snapshots.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/unit/error_handling/test_output_picker_error_snapshot.py +0 -0
- {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/unit/error_handling/test_propagated_snapshot_serialization.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pipefunc
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.93.0
|
|
4
4
|
Summary: A Python library for defining, managing, and executing function pipelines.
|
|
5
5
|
Project-URL: homepage, https://pipefunc.readthedocs.io/
|
|
6
6
|
Project-URL: documentation, https://pipefunc.readthedocs.io/
|
|
@@ -530,3 +530,40 @@ print(result["result"].output)
|
|
|
530
530
|
6. **`pipeline.map`:** We call `pipeline.map` as before, but now we only need to specify the `internal_shapes` of the lists, not the shape of the status. The `internal_shapes` argument is only needed when you return a list, and it cannot be inferred from the inputs.
|
|
531
531
|
|
|
532
532
|
This pattern provides a clean and manageable way to work with functions that logically produce multiple outputs of varying shapes within the current capabilities of `pipefunc`.
|
|
533
|
+
|
|
534
|
+
## Working with `polars` DataFrames (Parquet storage and `LazyFrame` inputs)
|
|
535
|
+
|
|
536
|
+
`pipefunc` has first-class support for [polars](https://pola.rs/):
|
|
537
|
+
|
|
538
|
+
1. **Parquet on disk**: when a function returns a `polars.DataFrame` and the results are stored on disk (e.g., `storage="file_array"` in `pipeline.map`), the output is serialized as a [Parquet](https://parquet.apache.org/) file instead of a pickle. Parquet files are compact, fast to read, and can be inspected with external tools like DuckDB. If Parquet serialization fails (e.g., for unsupported dtypes), `pipefunc` transparently falls back to `cloudpickle`.
|
|
539
|
+
2. **Lazy inputs**: annotate a parameter as `polars.LazyFrame` to receive the upstream `polars.DataFrame` output lazily. When the upstream output is stored on disk as Parquet, the function receives `pl.scan_parquet(...)`, so the full DataFrame is never materialized in memory and polars can apply predicate and projection pushdown. Otherwise (e.g., in-memory storage or `pipeline.run`), the DataFrame is converted with `.lazy()`.
|
|
540
|
+
|
|
541
|
+
Type validation understands this conversion: a function returning `pl.DataFrame` may feed into a parameter annotated as `pl.LazyFrame`.
|
|
542
|
+
|
|
543
|
+
```{code-cell} ipython3
|
|
544
|
+
import polars as pl
|
|
545
|
+
|
|
546
|
+
from pipefunc import Pipeline, pipefunc
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
@pipefunc(output_name="df")
|
|
550
|
+
def make_df() -> pl.DataFrame:
|
|
551
|
+
return pl.DataFrame({"x": [1, 2, 3], "y": [10.0, 20.0, 30.0]})
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
@pipefunc(output_name="mean_y")
|
|
555
|
+
def mean_y(df: pl.LazyFrame) -> float: # annotate as LazyFrame to load lazily
|
|
556
|
+
return df.select(pl.col("y").mean()).collect().item()
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
pipeline = Pipeline([make_df, mean_y])
|
|
560
|
+
result = pipeline.map({}, run_folder="my_run_folder", parallel=False, show_progress=False)
|
|
561
|
+
print(result["mean_y"].output)
|
|
562
|
+
```
|
|
563
|
+
|
|
564
|
+
The `df` output above is stored as a Parquet file in the run folder, and `mean_y` receives a `pl.LazyFrame` that scans it.
|
|
565
|
+
|
|
566
|
+
```{note}
|
|
567
|
+
Only top-level `polars.DataFrame` return values are stored as Parquet; DataFrames nested inside other objects (lists, dicts, dataclasses) are pickled as usual.
|
|
568
|
+
The `pl.LazyFrame` conversion applies to parameters annotated *exactly* as `pl.LazyFrame`.
|
|
569
|
+
```
|
|
@@ -234,6 +234,10 @@ This section has been moved to [Function Inputs and Outputs](./concepts/function
|
|
|
234
234
|
|
|
235
235
|
This section has been moved to [Function Inputs and Outputs](./concepts/function-io.md#pipefuncs-with-multiple-outputs-of-different-shapes).
|
|
236
236
|
|
|
237
|
+
## How does `pipefunc` work with `polars` DataFrames?
|
|
238
|
+
|
|
239
|
+
See [Function Inputs and Outputs](./concepts/function-io.md#working-with-polars-dataframes-parquet-storage-and-lazyframe-inputs) for Parquet storage and lazy (`pl.LazyFrame`) inputs.
|
|
240
|
+
|
|
237
241
|
## Simplifying Pipelines
|
|
238
242
|
|
|
239
243
|
This section has been moved to [Simplifying Pipelines](./concepts/simplifying-pipelines.md).
|
|
@@ -37,6 +37,7 @@ from pipefunc._utils import (
|
|
|
37
37
|
clear_cached_properties,
|
|
38
38
|
format_function_call,
|
|
39
39
|
is_classmethod,
|
|
40
|
+
is_lazyframe_annotation,
|
|
40
41
|
is_pydantic_base_model,
|
|
41
42
|
requires,
|
|
42
43
|
)
|
|
@@ -876,6 +877,22 @@ class PipeFunc(Generic[P, R]):
|
|
|
876
877
|
type_hints = safe_get_type_hints(func, include_extras=True)
|
|
877
878
|
return {self.renames.get(k, k): v for k, v in type_hints.items() if k != "return"}
|
|
878
879
|
|
|
880
|
+
@functools.cached_property
|
|
881
|
+
def _lazyframe_parameters(self) -> tuple[str, ...]:
|
|
882
|
+
"""Names of parameters annotated as `polars.LazyFrame`."""
|
|
883
|
+
return tuple(p for p, a in self.parameter_annotations.items() if is_lazyframe_annotation(a))
|
|
884
|
+
|
|
885
|
+
def _convert_lazyframe_kwargs(self, kwargs: dict[str, Any]) -> None:
|
|
886
|
+
"""Convert `pl.DataFrame` values to `pl.LazyFrame` where the annotation asks for it."""
|
|
887
|
+
if not self._lazyframe_parameters: # fast path, avoids per-element overhead
|
|
888
|
+
return
|
|
889
|
+
import polars as pl
|
|
890
|
+
|
|
891
|
+
for p in self._lazyframe_parameters:
|
|
892
|
+
value = kwargs.get(p)
|
|
893
|
+
if isinstance(value, pl.DataFrame):
|
|
894
|
+
kwargs[p] = value.lazy()
|
|
895
|
+
|
|
879
896
|
@functools.cached_property
|
|
880
897
|
def output_annotation(self) -> dict[str, Any]:
|
|
881
898
|
"""Return the type annotation of the wrapped function's output."""
|
|
@@ -615,6 +615,7 @@ class Pipeline:
|
|
|
615
615
|
raise ValueError(msg)
|
|
616
616
|
func_args[arg] = value
|
|
617
617
|
used_parameters.add(arg)
|
|
618
|
+
func._convert_lazyframe_kwargs(func_args)
|
|
618
619
|
return func_args
|
|
619
620
|
|
|
620
621
|
def _current_cache(self) -> LRUCache | HybridCache | DiskCache | SimpleCache | None:
|
|
@@ -52,9 +52,23 @@ def at_least_tuple(x: Any) -> tuple[Any, ...]:
|
|
|
52
52
|
return x if isinstance(x, tuple) else (x,)
|
|
53
53
|
|
|
54
54
|
|
|
55
|
+
PARQUET_MAGIC = b"PAR1"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def is_parquet_file(path: Path) -> bool:
|
|
59
|
+
"""Check whether the file at ``path`` is a Parquet file (by magic bytes)."""
|
|
60
|
+
try:
|
|
61
|
+
with path.open("rb") as f:
|
|
62
|
+
return f.read(len(PARQUET_MAGIC)) == PARQUET_MAGIC
|
|
63
|
+
except OSError: # pragma: no cover
|
|
64
|
+
return False
|
|
65
|
+
|
|
66
|
+
|
|
55
67
|
def load(path: Path, *, cache: bool = False) -> Any:
|
|
56
|
-
"""Load
|
|
68
|
+
"""Load an object from a path.
|
|
57
69
|
|
|
70
|
+
Reads Parquet files (written by `dump` for ``polars.DataFrame`` objects)
|
|
71
|
+
as ``polars.DataFrame``, everything else as cloudpickle.
|
|
58
72
|
If ``cache`` is ``True``, the object will be cached in memory.
|
|
59
73
|
"""
|
|
60
74
|
if cache:
|
|
@@ -62,12 +76,33 @@ def load(path: Path, *, cache: bool = False) -> Any:
|
|
|
62
76
|
return _cached_load(cache_key)
|
|
63
77
|
|
|
64
78
|
with path.open("rb") as f:
|
|
79
|
+
is_parquet = f.read(len(PARQUET_MAGIC)) == PARQUET_MAGIC
|
|
80
|
+
f.seek(0)
|
|
81
|
+
if is_parquet:
|
|
82
|
+
import polars as pl
|
|
83
|
+
|
|
84
|
+
return pl.read_parquet(f)
|
|
65
85
|
return cloudpickle.load(f)
|
|
66
86
|
|
|
67
87
|
|
|
68
88
|
def dump(obj: Any, path: Path) -> None:
|
|
69
|
-
"""Dump an object to a path
|
|
89
|
+
"""Dump an object to a path.
|
|
90
|
+
|
|
91
|
+
``polars.DataFrame`` objects are stored as Parquet (falling back to
|
|
92
|
+
cloudpickle if Parquet serialization fails, e.g., for ``pl.Object``
|
|
93
|
+
dtype columns); everything else is stored with cloudpickle.
|
|
94
|
+
"""
|
|
70
95
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
96
|
+
if is_imported("polars"):
|
|
97
|
+
import polars as pl
|
|
98
|
+
|
|
99
|
+
if isinstance(obj, pl.DataFrame):
|
|
100
|
+
try:
|
|
101
|
+
obj.write_parquet(path)
|
|
102
|
+
except Exception: # noqa: BLE001, e.g., unsupported dtypes like pl.Object
|
|
103
|
+
path.unlink(missing_ok=True)
|
|
104
|
+
else:
|
|
105
|
+
return
|
|
71
106
|
with path.open("wb") as f:
|
|
72
107
|
cloudpickle.dump(obj, f)
|
|
73
108
|
|
|
@@ -629,3 +664,12 @@ def pandas_to_polars(df: Any) -> Any:
|
|
|
629
664
|
# Fallback to manual conversion if pyarrow is not available
|
|
630
665
|
# This happens when pandas has nullable types but pyarrow is not installed
|
|
631
666
|
return pl.DataFrame({col: df[col].to_numpy() for col in df.columns})
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
def is_lazyframe_annotation(annotation: Any) -> bool:
|
|
670
|
+
"""Check whether ``annotation`` is ``polars.LazyFrame``."""
|
|
671
|
+
if not is_imported("polars"):
|
|
672
|
+
return False
|
|
673
|
+
import polars as pl
|
|
674
|
+
|
|
675
|
+
return annotation is pl.LazyFrame
|
|
@@ -885,6 +885,10 @@ def to_hashable( # noqa: C901, PLR0911, PLR0912
|
|
|
885
885
|
if isinstance(obj, polars.DataFrame):
|
|
886
886
|
hsh = to_hashable(obj.to_dict(as_series=False), fallback_to_pickle)
|
|
887
887
|
return (m, tp, hsh)
|
|
888
|
+
if isinstance(obj, polars.LazyFrame):
|
|
889
|
+
# Hash the serialized query plan; collecting the data here would
|
|
890
|
+
# defeat the purpose of using a LazyFrame.
|
|
891
|
+
return (m, tp, obj.serialize())
|
|
888
892
|
|
|
889
893
|
if fallback_to_pickle:
|
|
890
894
|
try:
|
|
@@ -30,6 +30,7 @@ from pipefunc._utils import (
|
|
|
30
30
|
dump,
|
|
31
31
|
ensure_block_allowed,
|
|
32
32
|
get_ncores,
|
|
33
|
+
is_parquet_file,
|
|
33
34
|
is_running_in_ipynb,
|
|
34
35
|
prod,
|
|
35
36
|
)
|
|
@@ -716,7 +717,10 @@ def _func_kwargs(func: PipeFunc, run_info: RunInfo, store: dict[str, StoreType])
|
|
|
716
717
|
elif p in run_info.inputs:
|
|
717
718
|
kwargs[p] = run_info.inputs[p]
|
|
718
719
|
elif p in run_info.all_output_names:
|
|
719
|
-
|
|
720
|
+
if (lazy_frame := _maybe_scan_parquet(func, p, store)) is not None:
|
|
721
|
+
kwargs[p] = lazy_frame
|
|
722
|
+
else:
|
|
723
|
+
kwargs[p] = _load_from_store(p, store).value
|
|
720
724
|
elif p in run_info.defaults and p not in run_info.all_output_names:
|
|
721
725
|
kwargs[p] = run_info.defaults[p]
|
|
722
726
|
else: # pragma: no cover
|
|
@@ -727,6 +731,26 @@ def _func_kwargs(func: PipeFunc, run_info: RunInfo, store: dict[str, StoreType])
|
|
|
727
731
|
return kwargs
|
|
728
732
|
|
|
729
733
|
|
|
734
|
+
def _maybe_scan_parquet(func: PipeFunc, parameter: str, store: dict[str, StoreType]) -> Any:
|
|
735
|
+
"""Return a `pl.LazyFrame` scanning the stored Parquet file, if applicable.
|
|
736
|
+
|
|
737
|
+
Only applies when the parameter is annotated as `pl.LazyFrame`, is not
|
|
738
|
+
indexed by the function's mapspec, and the upstream output is stored on
|
|
739
|
+
disk as a Parquet file (see `pipefunc._utils.dump`). This avoids
|
|
740
|
+
materializing the full `pl.DataFrame` in memory.
|
|
741
|
+
"""
|
|
742
|
+
if parameter not in func._lazyframe_parameters:
|
|
743
|
+
return None
|
|
744
|
+
if func.mapspec is not None and parameter in func.mapspec.input_names:
|
|
745
|
+
return None
|
|
746
|
+
storage = store[parameter]
|
|
747
|
+
if not isinstance(storage, Path) or not storage.is_file() or not is_parquet_file(storage):
|
|
748
|
+
return None
|
|
749
|
+
import polars as pl
|
|
750
|
+
|
|
751
|
+
return pl.scan_parquet(storage)
|
|
752
|
+
|
|
753
|
+
|
|
730
754
|
def _select_kwargs(
|
|
731
755
|
func: PipeFunc,
|
|
732
756
|
kwargs: dict[str, Any],
|
|
@@ -740,6 +764,7 @@ def _select_kwargs(
|
|
|
740
764
|
normalized_keys = {k: v[0] if len(v) == 1 else v for k, v in input_keys.items()}
|
|
741
765
|
selected = {k: v[normalized_keys[k]] if k in normalized_keys else v for k, v in kwargs.items()}
|
|
742
766
|
_load_data(selected)
|
|
767
|
+
func._convert_lazyframe_kwargs(selected)
|
|
743
768
|
return selected
|
|
744
769
|
|
|
745
770
|
|
|
@@ -1695,6 +1720,7 @@ def _execute_single(
|
|
|
1695
1720
|
|
|
1696
1721
|
# Otherwise, run the function
|
|
1697
1722
|
_load_data(kwargs)
|
|
1723
|
+
func._convert_lazyframe_kwargs(kwargs)
|
|
1698
1724
|
if error_handling == "raise":
|
|
1699
1725
|
return _get_or_set_cache(func, kwargs, cache, _CTX_RAISE, "raise")
|
|
1700
1726
|
|
|
@@ -11,7 +11,7 @@ from typing import TYPE_CHECKING, Any
|
|
|
11
11
|
import cloudpickle # type: ignore[import-untyped]
|
|
12
12
|
import numpy as np
|
|
13
13
|
|
|
14
|
-
from pipefunc._utils import dump, load
|
|
14
|
+
from pipefunc._utils import PARQUET_MAGIC, dump, load
|
|
15
15
|
|
|
16
16
|
from ._base import (
|
|
17
17
|
StorageBase,
|
|
@@ -326,8 +326,16 @@ def _load_all(filenames: Iterator[Path]) -> list[Any]:
|
|
|
326
326
|
def maybe_read(f: Path) -> Any | None:
|
|
327
327
|
return _read(f) if f.is_file() else None
|
|
328
328
|
|
|
329
|
-
def maybe_load(x:
|
|
330
|
-
|
|
329
|
+
def maybe_load(x: bytes | None) -> Any | None:
|
|
330
|
+
if x is None:
|
|
331
|
+
return None
|
|
332
|
+
if x.startswith(PARQUET_MAGIC):
|
|
333
|
+
import io
|
|
334
|
+
|
|
335
|
+
import polars as pl
|
|
336
|
+
|
|
337
|
+
return pl.read_parquet(io.BytesIO(x))
|
|
338
|
+
return cloudpickle.loads(x)
|
|
331
339
|
|
|
332
340
|
# Delegate file reading to the threadpool but deserialize sequentially,
|
|
333
341
|
# as this is pure Python and CPU bound
|
|
@@ -20,6 +20,8 @@ from typing import (
|
|
|
20
20
|
|
|
21
21
|
import numpy as np
|
|
22
22
|
|
|
23
|
+
from pipefunc._utils import is_imported
|
|
24
|
+
|
|
23
25
|
|
|
24
26
|
class NoAnnotation:
|
|
25
27
|
"""Marker class for missing type annotations."""
|
|
@@ -209,7 +211,7 @@ def _handle_generic_types(
|
|
|
209
211
|
return None
|
|
210
212
|
|
|
211
213
|
|
|
212
|
-
def is_type_compatible(
|
|
214
|
+
def is_type_compatible( # noqa: PLR0911
|
|
213
215
|
incoming_type: Any,
|
|
214
216
|
required_type: Any,
|
|
215
217
|
memo: TypeCheckMemo | None = None,
|
|
@@ -228,6 +230,10 @@ def is_type_compatible(
|
|
|
228
230
|
|
|
229
231
|
if _check_identical_or_any(incoming_type, required_type):
|
|
230
232
|
return True
|
|
233
|
+
if _is_polars_dataframe_to_lazyframe(incoming_type, required_type):
|
|
234
|
+
# pipefunc converts `pl.DataFrame` values to `pl.LazyFrame` at execution
|
|
235
|
+
# time when the consuming parameter is annotated as `pl.LazyFrame`.
|
|
236
|
+
return True
|
|
231
237
|
if (result := _is_typevar_compatible(incoming_type, required_type, memo)) is not None:
|
|
232
238
|
return result
|
|
233
239
|
if (result := _handle_union_types(incoming_type, required_type, memo)) is not None:
|
|
@@ -237,6 +243,15 @@ def is_type_compatible(
|
|
|
237
243
|
return False
|
|
238
244
|
|
|
239
245
|
|
|
246
|
+
def _is_polars_dataframe_to_lazyframe(incoming_type: Any, required_type: Any) -> bool:
|
|
247
|
+
"""Check for the special-cased `pl.DataFrame` output -> `pl.LazyFrame` input edge."""
|
|
248
|
+
if not is_imported("polars"):
|
|
249
|
+
return False
|
|
250
|
+
import polars as pl
|
|
251
|
+
|
|
252
|
+
return incoming_type is pl.DataFrame and required_type is pl.LazyFrame
|
|
253
|
+
|
|
254
|
+
|
|
240
255
|
def _is_typevar_compatible(
|
|
241
256
|
incoming_type: Any,
|
|
242
257
|
required_type: Any,
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
"""Tests for Parquet serialization and `pl.LazyFrame` support (issue #879)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import importlib.util
|
|
6
|
+
import sys
|
|
7
|
+
from typing import TYPE_CHECKING, Any
|
|
8
|
+
|
|
9
|
+
import numpy as np # noqa: TC002, needed at runtime to resolve `np.ndarray` annotations
|
|
10
|
+
import pytest
|
|
11
|
+
|
|
12
|
+
from pipefunc import Pipeline, pipefunc
|
|
13
|
+
from pipefunc._utils import PARQUET_MAGIC, dump, is_parquet_file, load
|
|
14
|
+
from pipefunc.map import load_outputs
|
|
15
|
+
from pipefunc.typing import is_type_compatible
|
|
16
|
+
|
|
17
|
+
has_polars = importlib.util.find_spec("polars") is not None
|
|
18
|
+
pytestmark = pytest.mark.skipif(not has_polars, reason="polars not installed")
|
|
19
|
+
|
|
20
|
+
if has_polars:
|
|
21
|
+
import polars as pl
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_dump_dataframe_as_parquet(tmp_path: Path) -> None:
|
|
28
|
+
df = pl.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]})
|
|
29
|
+
path = tmp_path / "df.cloudpickle"
|
|
30
|
+
dump(df, path)
|
|
31
|
+
assert path.read_bytes()[:4] == PARQUET_MAGIC
|
|
32
|
+
assert is_parquet_file(path)
|
|
33
|
+
loaded = load(path)
|
|
34
|
+
assert isinstance(loaded, pl.DataFrame)
|
|
35
|
+
assert loaded.equals(df)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_dump_non_dataframe_still_pickles(tmp_path: Path) -> None:
|
|
39
|
+
path = tmp_path / "obj.cloudpickle"
|
|
40
|
+
dump({"a": 1}, path)
|
|
41
|
+
assert not is_parquet_file(path)
|
|
42
|
+
assert load(path) == {"a": 1}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def test_dump_falls_back_to_pickle_on_parquet_failure(
|
|
46
|
+
tmp_path: Path,
|
|
47
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
48
|
+
) -> None:
|
|
49
|
+
df = pl.DataFrame({"a": [1, 2]})
|
|
50
|
+
|
|
51
|
+
def fail(*args: Any, **kwargs: Any) -> None:
|
|
52
|
+
msg = "boom"
|
|
53
|
+
raise ValueError(msg)
|
|
54
|
+
|
|
55
|
+
monkeypatch.setattr(pl.DataFrame, "write_parquet", fail)
|
|
56
|
+
path = tmp_path / "df.cloudpickle"
|
|
57
|
+
dump(df, path)
|
|
58
|
+
assert path.read_bytes()[:4] != PARQUET_MAGIC
|
|
59
|
+
loaded = load(path)
|
|
60
|
+
assert isinstance(loaded, pl.DataFrame)
|
|
61
|
+
assert loaded.equals(df)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def test_load_with_cache(tmp_path: Path) -> None:
|
|
65
|
+
df = pl.DataFrame({"a": [1, 2]})
|
|
66
|
+
path = tmp_path / "df.cloudpickle"
|
|
67
|
+
dump(df, path)
|
|
68
|
+
assert load(path, cache=True).equals(df)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def test_file_array_with_dataframes(tmp_path: Path) -> None:
|
|
72
|
+
from pipefunc.map._storage_array._file import FileArray
|
|
73
|
+
|
|
74
|
+
arr = FileArray(tmp_path / "arr", shape=(2,))
|
|
75
|
+
arr.dump((0,), pl.DataFrame({"a": [1]}))
|
|
76
|
+
arr.dump((1,), pl.DataFrame({"a": [2]}))
|
|
77
|
+
assert is_parquet_file(arr._index_to_file(0))
|
|
78
|
+
element = arr[0,]
|
|
79
|
+
assert isinstance(element, pl.DataFrame)
|
|
80
|
+
assert element["a"].to_list() == [1]
|
|
81
|
+
# `to_array` exercises the threaded `_load_all` byte-sniffing path
|
|
82
|
+
full = arr.to_array()
|
|
83
|
+
assert all(isinstance(x, pl.DataFrame) for x in full)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def test_dataframe_to_lazyframe_type_compatible() -> None:
|
|
87
|
+
assert is_type_compatible(pl.DataFrame, pl.LazyFrame)
|
|
88
|
+
assert not is_type_compatible(pl.LazyFrame, pl.DataFrame)
|
|
89
|
+
assert not is_type_compatible(int, pl.LazyFrame)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def test_map_lazyframe_input_scans_parquet(tmp_path: Path) -> None:
|
|
93
|
+
@pipefunc(output_name="df")
|
|
94
|
+
def make_df() -> pl.DataFrame:
|
|
95
|
+
return pl.DataFrame({"a": [1, 2, 3]})
|
|
96
|
+
|
|
97
|
+
@pipefunc(output_name="total")
|
|
98
|
+
def consume(df: pl.LazyFrame) -> int:
|
|
99
|
+
assert isinstance(df, pl.LazyFrame)
|
|
100
|
+
# The plan must be a Parquet scan, not an in-memory DataFrame
|
|
101
|
+
assert "DF" not in df.explain(optimized=False)
|
|
102
|
+
return df.select(pl.col("a").sum()).collect().item()
|
|
103
|
+
|
|
104
|
+
pipeline = Pipeline([make_df, consume]) # validates type annotations
|
|
105
|
+
result = pipeline.map({}, run_folder=tmp_path, parallel=False, show_progress=False)
|
|
106
|
+
assert result["total"].output == 6
|
|
107
|
+
df_path = tmp_path / "outputs" / "df.cloudpickle"
|
|
108
|
+
assert is_parquet_file(df_path)
|
|
109
|
+
loaded = load_outputs("df", run_folder=tmp_path)
|
|
110
|
+
assert isinstance(loaded, pl.DataFrame)
|
|
111
|
+
assert loaded["a"].to_list() == [1, 2, 3]
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def test_map_lazyframe_input_without_run_folder() -> None:
|
|
115
|
+
@pipefunc(output_name="df")
|
|
116
|
+
def make_df() -> pl.DataFrame:
|
|
117
|
+
return pl.DataFrame({"a": [1, 2, 3]})
|
|
118
|
+
|
|
119
|
+
@pipefunc(output_name="total")
|
|
120
|
+
def consume(df: pl.LazyFrame) -> int:
|
|
121
|
+
assert isinstance(df, pl.LazyFrame)
|
|
122
|
+
return df.select(pl.col("a").sum()).collect().item()
|
|
123
|
+
|
|
124
|
+
pipeline = Pipeline([make_df, consume])
|
|
125
|
+
result = pipeline.map({}, parallel=False, show_progress=False, storage="dict")
|
|
126
|
+
assert result["total"].output == 6
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def test_map_elementwise_lazyframe(tmp_path: Path) -> None:
|
|
130
|
+
@pipefunc(output_name="df", mapspec="x[i] -> df[i]")
|
|
131
|
+
def make_df(x: int) -> pl.DataFrame:
|
|
132
|
+
return pl.DataFrame({"a": [x, x * 2]})
|
|
133
|
+
|
|
134
|
+
@pipefunc(output_name="total", mapspec="df[i] -> total[i]")
|
|
135
|
+
def consume(df: pl.LazyFrame) -> int:
|
|
136
|
+
assert isinstance(df, pl.LazyFrame)
|
|
137
|
+
return df.select(pl.col("a").sum()).collect().item()
|
|
138
|
+
|
|
139
|
+
pipeline = Pipeline([make_df, consume])
|
|
140
|
+
result = pipeline.map(
|
|
141
|
+
{"x": [1, 10]},
|
|
142
|
+
run_folder=tmp_path,
|
|
143
|
+
parallel=True,
|
|
144
|
+
show_progress=False,
|
|
145
|
+
)
|
|
146
|
+
assert result["total"].output.tolist() == [3, 30]
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def test_map_reduction_keeps_dataframes(tmp_path: Path) -> None:
|
|
150
|
+
@pipefunc(output_name="df", mapspec="x[i] -> df[i]")
|
|
151
|
+
def make_df(x: int) -> pl.DataFrame:
|
|
152
|
+
return pl.DataFrame({"a": [x]})
|
|
153
|
+
|
|
154
|
+
@pipefunc(output_name="n")
|
|
155
|
+
def reduce_all(df: np.ndarray) -> int:
|
|
156
|
+
assert all(isinstance(d, pl.DataFrame) for d in df)
|
|
157
|
+
return len(df)
|
|
158
|
+
|
|
159
|
+
pipeline = Pipeline([make_df, reduce_all])
|
|
160
|
+
result = pipeline.map(
|
|
161
|
+
{"x": [1, 10]},
|
|
162
|
+
run_folder=tmp_path,
|
|
163
|
+
parallel=False,
|
|
164
|
+
show_progress=False,
|
|
165
|
+
)
|
|
166
|
+
assert result["n"].output == 2
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def test_run_lazyframe_input() -> None:
|
|
170
|
+
@pipefunc(output_name="df")
|
|
171
|
+
def make_df() -> pl.DataFrame:
|
|
172
|
+
return pl.DataFrame({"a": [1, 2, 3]})
|
|
173
|
+
|
|
174
|
+
@pipefunc(output_name="total")
|
|
175
|
+
def consume(df: pl.LazyFrame) -> int:
|
|
176
|
+
assert isinstance(df, pl.LazyFrame)
|
|
177
|
+
return df.select(pl.col("a").sum()).collect().item()
|
|
178
|
+
|
|
179
|
+
pipeline = Pipeline([make_df, consume])
|
|
180
|
+
assert pipeline.run("total", kwargs={}) == 6
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def test_run_lazyframe_from_input_kwarg() -> None:
|
|
184
|
+
@pipefunc(output_name="total")
|
|
185
|
+
def consume(df: pl.LazyFrame) -> int:
|
|
186
|
+
assert isinstance(df, pl.LazyFrame)
|
|
187
|
+
return df.select(pl.col("a").sum()).collect().item()
|
|
188
|
+
|
|
189
|
+
pipeline = Pipeline([consume])
|
|
190
|
+
df = pl.DataFrame({"a": [1, 2, 3]})
|
|
191
|
+
assert pipeline.run("total", kwargs={"df": df}) == 6
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def test_map_lazyframe_from_input_kwarg(tmp_path: Path) -> None:
|
|
195
|
+
@pipefunc(output_name="total")
|
|
196
|
+
def consume(df: pl.LazyFrame) -> int:
|
|
197
|
+
assert isinstance(df, pl.LazyFrame)
|
|
198
|
+
return df.select(pl.col("a").sum()).collect().item()
|
|
199
|
+
|
|
200
|
+
pipeline = Pipeline([consume])
|
|
201
|
+
df = pl.DataFrame({"a": [1, 2, 3]})
|
|
202
|
+
result = pipeline.map({"df": df}, run_folder=tmp_path, parallel=False, show_progress=False)
|
|
203
|
+
assert result["total"].output == 6
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def test_lazyframe_passthrough() -> None:
|
|
207
|
+
@pipefunc(output_name="lf")
|
|
208
|
+
def make_lf() -> pl.LazyFrame:
|
|
209
|
+
return pl.DataFrame({"a": [1, 2, 3]}).lazy()
|
|
210
|
+
|
|
211
|
+
@pipefunc(output_name="total")
|
|
212
|
+
def consume(lf: pl.LazyFrame) -> int:
|
|
213
|
+
assert isinstance(lf, pl.LazyFrame)
|
|
214
|
+
return lf.select(pl.col("a").sum()).collect().item()
|
|
215
|
+
|
|
216
|
+
pipeline = Pipeline([make_lf, consume])
|
|
217
|
+
result = pipeline.map({}, parallel=False, show_progress=False, storage="dict")
|
|
218
|
+
assert result["total"].output == 6
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def test_to_hashable_lazyframe() -> None:
|
|
222
|
+
from pipefunc.cache import to_hashable
|
|
223
|
+
|
|
224
|
+
lf = pl.DataFrame({"a": [1, 2]}).lazy()
|
|
225
|
+
key = to_hashable(lf)
|
|
226
|
+
assert hash(key) == hash(to_hashable(pl.DataFrame({"a": [1, 2]}).lazy()))
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def test_helpers_when_polars_not_imported(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
230
|
+
from pipefunc._utils import is_lazyframe_annotation
|
|
231
|
+
|
|
232
|
+
monkeypatch.delitem(sys.modules, "polars")
|
|
233
|
+
assert not is_lazyframe_annotation(pl.LazyFrame)
|
|
234
|
+
assert not is_type_compatible(pl.DataFrame, pl.LazyFrame)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|