pipefunc 0.92.0__tar.gz → 0.93.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. {pipefunc-0.92.0 → pipefunc-0.93.0}/PKG-INFO +1 -1
  2. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/function-io.md +37 -0
  3. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/faq.md +4 -0
  4. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_pipefunc.py +17 -0
  5. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_pipeline/_base.py +1 -0
  6. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_utils.py +46 -2
  7. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_version.py +1 -1
  8. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/cache.py +4 -0
  9. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_run.py +27 -1
  10. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_storage_array/_file.py +11 -3
  11. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/typing.py +16 -1
  12. pipefunc-0.93.0/tests/test_polars_parquet.py +234 -0
  13. {pipefunc-0.92.0 → pipefunc-0.93.0}/.devcontainer/Dockerfile +0 -0
  14. {pipefunc-0.92.0 → pipefunc-0.93.0}/.devcontainer/devcontainer.json +0 -0
  15. {pipefunc-0.92.0 → pipefunc-0.93.0}/.gitattributes +0 -0
  16. {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/doc-string-check.py +0 -0
  17. {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/generate-release-notes.py +0 -0
  18. {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/pr-title-checker-config.json +0 -0
  19. {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/release-drafter.yml +0 -0
  20. {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/renovate.json +0 -0
  21. {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/update-environment.py +0 -0
  22. {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/workflows/codeql.yml +0 -0
  23. {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/workflows/codspeed.yml +0 -0
  24. {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/workflows/doc-string-check.yml +0 -0
  25. {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/workflows/pr-title-checker.yml +0 -0
  26. {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/workflows/pytest-micromamba.yml +0 -0
  27. {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/workflows/pytest-uv.yml +0 -0
  28. {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/workflows/pythonpublish.yml +0 -0
  29. {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/workflows/release-drafter.yaml +0 -0
  30. {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/workflows/toc.yaml +0 -0
  31. {pipefunc-0.92.0 → pipefunc-0.93.0}/.github/workflows/update-environment.yaml +0 -0
  32. {pipefunc-0.92.0 → pipefunc-0.93.0}/.gitignore +0 -0
  33. {pipefunc-0.92.0 → pipefunc-0.93.0}/.pre-commit-config.yaml +0 -0
  34. {pipefunc-0.92.0 → pipefunc-0.93.0}/.readthedocs.yml +0 -0
  35. {pipefunc-0.92.0 → pipefunc-0.93.0}/.vscode/launch.json +0 -0
  36. {pipefunc-0.92.0 → pipefunc-0.93.0}/.vscode/settings.json +0 -0
  37. {pipefunc-0.92.0 → pipefunc-0.93.0}/AUTHORS.md +0 -0
  38. {pipefunc-0.92.0 → pipefunc-0.93.0}/CHANGELOG.md +0 -0
  39. {pipefunc-0.92.0 → pipefunc-0.93.0}/LICENSE +0 -0
  40. {pipefunc-0.92.0 → pipefunc-0.93.0}/MANIFEST.in +0 -0
  41. {pipefunc-0.92.0 → pipefunc-0.93.0}/README.md +0 -0
  42. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/.gitignore +0 -0
  43. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/Makefile +0 -0
  44. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/environment-sphinx.yml +0 -0
  45. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/_static/custom.css +0 -0
  46. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/changelog.md +0 -0
  47. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/adaptive-integration.md +0 -0
  48. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/caching.md +0 -0
  49. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/chaining.md +0 -0
  50. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/cli.md +0 -0
  51. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/error-handling.md +0 -0
  52. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/execution-and-parallelism.md +0 -0
  53. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/index.md +0 -0
  54. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/map_async_in_script.py +0 -0
  55. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/mapspec.md +0 -0
  56. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/mcp.md +0 -0
  57. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/mcp_agent.py +0 -0
  58. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/mcp_server.py +0 -0
  59. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/overhead-and-efficiency.md +0 -0
  60. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/parameter-scopes.md +0 -0
  61. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/parameter-sweeps.md +0 -0
  62. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/resource-management.md +0 -0
  63. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/run-status.md +0 -0
  64. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/simplifying-pipelines.md +0 -0
  65. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/slurm.md +0 -0
  66. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/testing.md +0 -0
  67. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/type-checking.md +0 -0
  68. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/concepts/variants.md +0 -0
  69. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/conf.py +0 -0
  70. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/examples/basic-usage.md +0 -0
  71. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/examples/image-processing.md +0 -0
  72. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/examples/index.md +0 -0
  73. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/examples/nlp-text-summarization.md +0 -0
  74. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/examples/physics-simulation.md +0 -0
  75. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/examples/sensor-data-processing.md +0 -0
  76. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/examples/weather-simulation.md +0 -0
  77. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/index.md +0 -0
  78. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/index.md +0 -0
  79. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/pipefunc.cache.md +0 -0
  80. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/pipefunc.helpers.md +0 -0
  81. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/pipefunc.lazy.md +0 -0
  82. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/pipefunc.map.adaptive.md +0 -0
  83. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/pipefunc.map.adaptive_scheduler.md +0 -0
  84. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/pipefunc.map.md +0 -0
  85. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/pipefunc.map.xarray.md +0 -0
  86. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/pipefunc.mcp.md +0 -0
  87. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/pipefunc.md +0 -0
  88. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/pipefunc.resources.md +0 -0
  89. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/pipefunc.sweep.md +0 -0
  90. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/pipefunc.testing.md +0 -0
  91. {pipefunc-0.92.0 → pipefunc-0.93.0}/docs/source/reference/pipefunc.typing.md +0 -0
  92. {pipefunc-0.92.0 → pipefunc-0.93.0}/environment.yml +0 -0
  93. {pipefunc-0.92.0 → pipefunc-0.93.0}/example.ipynb +0 -0
  94. {pipefunc-0.92.0 → pipefunc-0.93.0}/get-notebooks.py +0 -0
  95. {pipefunc-0.92.0 → pipefunc-0.93.0}/noxfile.py +0 -0
  96. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/__init__.py +0 -0
  97. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_adaptive.py +0 -0
  98. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_error_handling.py +0 -0
  99. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_pipefunc_utils.py +0 -0
  100. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_pipeline/__init__.py +0 -0
  101. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_pipeline/_autodoc.py +0 -0
  102. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_pipeline/_cache.py +0 -0
  103. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_pipeline/_cli.py +0 -0
  104. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_pipeline/_mapspec.py +0 -0
  105. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_pipeline/_pydantic.py +0 -0
  106. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_pipeline/_simplify.py +0 -0
  107. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_pipeline/_types.py +0 -0
  108. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_pipeline/_validation.py +0 -0
  109. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_plotting.py +0 -0
  110. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_plotting_utils.py +0 -0
  111. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_profile.py +0 -0
  112. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_run_status.py +0 -0
  113. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_run_status_cli.py +0 -0
  114. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_run_status_heartbeat.py +0 -0
  115. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_variant_pipeline.py +0 -0
  116. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_widgets/__init__.py +0 -0
  117. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_widgets/async_status_widget.py +0 -0
  118. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_widgets/helpers.py +0 -0
  119. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_widgets/output_tabs.py +0 -0
  120. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_widgets/progress_base.py +0 -0
  121. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_widgets/progress_headless.py +0 -0
  122. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_widgets/progress_ipywidgets.py +0 -0
  123. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/_widgets/progress_rich.py +0 -0
  124. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/exceptions.py +0 -0
  125. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/helpers.py +0 -0
  126. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/lazy.py +0 -0
  127. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/LICENSE +0 -0
  128. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/__init__.py +0 -0
  129. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_adaptive_scheduler_slurm_executor.py +0 -0
  130. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_load.py +0 -0
  131. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_mapspec.py +0 -0
  132. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_prepare.py +0 -0
  133. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_progress.py +0 -0
  134. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_result.py +0 -0
  135. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_run_eager.py +0 -0
  136. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_run_eager_async.py +0 -0
  137. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_run_info.py +0 -0
  138. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_shapes.py +0 -0
  139. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_storage_array/__init__.py +0 -0
  140. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_storage_array/_base.py +0 -0
  141. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_storage_array/_dict.py +0 -0
  142. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_storage_array/_zarr.py +0 -0
  143. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/_types.py +0 -0
  144. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/adaptive.py +0 -0
  145. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/adaptive_scheduler.py +0 -0
  146. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/map/xarray.py +0 -0
  147. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/mcp.py +0 -0
  148. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/py.typed +0 -0
  149. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/resources.py +0 -0
  150. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/sweep.py +0 -0
  151. {pipefunc-0.92.0 → pipefunc-0.93.0}/pipefunc/testing.py +0 -0
  152. {pipefunc-0.92.0 → pipefunc-0.93.0}/pyproject.toml +0 -0
  153. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/__init__.py +0 -0
  154. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/conftest.py +0 -0
  155. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/helpers.py +0 -0
  156. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/integration/__init__.py +0 -0
  157. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/integration/map/__init__.py +0 -0
  158. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/integration/map/test_error_handling.py +0 -0
  159. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/integration/map/test_error_handling_basic.py +0 -0
  160. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/integration/map/test_error_handling_parallel_regressions.py +0 -0
  161. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/integration/map/test_error_handling_potential_issues.py +0 -0
  162. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/integration/map/test_propagation_reason_selection.py +0 -0
  163. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/integration/map/test_regressions_propagated_root_causes.py +0 -0
  164. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/integration/map/test_run_info_backward_compat.py +0 -0
  165. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/integration/storage/__init__.py +0 -0
  166. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/integration/storage/test_error_handling_storage.py +0 -0
  167. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/integration/test_error_handling_scan_inputs.py +0 -0
  168. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/__init__.py +0 -0
  169. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/storage/__init__.py +0 -0
  170. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/storage/test_all_storage.py +0 -0
  171. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/storage/test_file.py +0 -0
  172. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/storage/test_zarr.py +0 -0
  173. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_adaptive.py +0 -0
  174. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_adaptive_scheduler.py +0 -0
  175. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_adaptive_slurm_executor.py +0 -0
  176. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_async_map_block.py +0 -0
  177. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_cache_result_dict.py +0 -0
  178. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_eager_scheduler.py +0 -0
  179. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_fixed_indices.py +0 -0
  180. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_map.py +0 -0
  181. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_map_chunking.py +0 -0
  182. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_mapspec.py +0 -0
  183. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_regular_regressions.py +0 -0
  184. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_result.py +0 -0
  185. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_resume_validation.py +0 -0
  186. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_run_dynamic_internal_shape.py +0 -0
  187. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_sweeps.py +0 -0
  188. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/map/test_xarray.py +0 -0
  189. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/regressions/__init__.py +0 -0
  190. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/regressions/test_cache_reraise_single.py +0 -0
  191. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/regressions/test_regressions_cache_error_handling.py +0 -0
  192. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_adaptive_run.py +0 -0
  193. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_async_status_widget.py +0 -0
  194. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_autodoc.py +0 -0
  195. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_benchmark.py +0 -0
  196. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_cache.py +0 -0
  197. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_cache_memoize.py +0 -0
  198. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_cache_to_hashable.py +0 -0
  199. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_cli.py +0 -0
  200. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_helpers.py +0 -0
  201. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_helpers_chain.py +0 -0
  202. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_lazy.py +0 -0
  203. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_mcp.py +0 -0
  204. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_namedtuple_output.py +0 -0
  205. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_nested_pipefunc.py +0 -0
  206. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_output_tabs_widget.py +0 -0
  207. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_perf.py +0 -0
  208. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_pipefunc.py +0 -0
  209. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_pipefunc_annotations.py +0 -0
  210. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_pipefunc_signature.py +0 -0
  211. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_pipeline.py +0 -0
  212. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_pipeline_annotations.py +0 -0
  213. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_pipeline_cache.py +0 -0
  214. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_pipeline_lazy.py +0 -0
  215. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_pipeline_mapspec.py +0 -0
  216. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_pipeline_post_execution_hook.py +0 -0
  217. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_pipeline_resources.py +0 -0
  218. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_pipeline_update.py +0 -0
  219. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_plotting.py +0 -0
  220. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_plotting_utils.py +0 -0
  221. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_pydantic.py +0 -0
  222. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_resources.py +0 -0
  223. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_run_status_cli.py +0 -0
  224. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_simplify.py +0 -0
  225. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_sweep.py +0 -0
  226. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_testing.py +0 -0
  227. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_typing.py +0 -0
  228. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_typing_future_annotations.py +0 -0
  229. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_utils.py +0 -0
  230. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_variant_pipeline.py +0 -0
  231. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_variant_pipeline_widgets.py +0 -0
  232. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_widget_progress_tracker_ipywidgets.py +0 -0
  233. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/test_widget_progress_tracker_rich.py +0 -0
  234. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/unit/__init__.py +0 -0
  235. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/unit/error_handling/__init__.py +0 -0
  236. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/unit/error_handling/test_error_info_and_snapshots.py +0 -0
  237. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/unit/error_handling/test_output_picker_error_snapshot.py +0 -0
  238. {pipefunc-0.92.0 → pipefunc-0.93.0}/tests/unit/error_handling/test_propagated_snapshot_serialization.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pipefunc
3
- Version: 0.92.0
3
+ Version: 0.93.0
4
4
  Summary: A Python library for defining, managing, and executing function pipelines.
5
5
  Project-URL: homepage, https://pipefunc.readthedocs.io/
6
6
  Project-URL: documentation, https://pipefunc.readthedocs.io/
@@ -530,3 +530,40 @@ print(result["result"].output)
530
530
  6. **`pipeline.map`:** We call `pipeline.map` as before, but now we only need to specify the `internal_shapes` of the lists, not the shape of the status. The `internal_shapes` argument is only needed when you return a list, and it cannot be inferred from the inputs.
531
531
 
532
532
  This pattern provides a clean and manageable way to work with functions that logically produce multiple outputs of varying shapes within the current capabilities of `pipefunc`.
533
+
534
+ ## Working with `polars` DataFrames (Parquet storage and `LazyFrame` inputs)
535
+
536
+ `pipefunc` has first-class support for [polars](https://pola.rs/):
537
+
538
+ 1. **Parquet on disk**: when a function returns a `polars.DataFrame` and the results are stored on disk (e.g., `storage="file_array"` in `pipeline.map`), the output is serialized as a [Parquet](https://parquet.apache.org/) file instead of a pickle. Parquet files are compact, fast to read, and can be inspected with external tools like DuckDB. If Parquet serialization fails (e.g., for unsupported dtypes), `pipefunc` transparently falls back to `cloudpickle`.
539
+ 2. **Lazy inputs**: annotate a parameter as `polars.LazyFrame` to receive the upstream `polars.DataFrame` output lazily. When the upstream output is stored on disk as Parquet, the function receives `pl.scan_parquet(...)`, so the full DataFrame is never materialized in memory and polars can apply predicate and projection pushdown. Otherwise (e.g., in-memory storage or `pipeline.run`), the DataFrame is converted with `.lazy()`.
540
+
541
+ Type validation understands this conversion: a function returning `pl.DataFrame` may feed into a parameter annotated as `pl.LazyFrame`.
542
+
543
+ ```{code-cell} ipython3
544
+ import polars as pl
545
+
546
+ from pipefunc import Pipeline, pipefunc
547
+
548
+
549
+ @pipefunc(output_name="df")
550
+ def make_df() -> pl.DataFrame:
551
+ return pl.DataFrame({"x": [1, 2, 3], "y": [10.0, 20.0, 30.0]})
552
+
553
+
554
+ @pipefunc(output_name="mean_y")
555
+ def mean_y(df: pl.LazyFrame) -> float: # annotate as LazyFrame to load lazily
556
+ return df.select(pl.col("y").mean()).collect().item()
557
+
558
+
559
+ pipeline = Pipeline([make_df, mean_y])
560
+ result = pipeline.map({}, run_folder="my_run_folder", parallel=False, show_progress=False)
561
+ print(result["mean_y"].output)
562
+ ```
563
+
564
+ The `df` output above is stored as a Parquet file in the run folder, and `mean_y` receives a `pl.LazyFrame` that scans it.
565
+
566
+ ```{note}
567
+ Only top-level `polars.DataFrame` return values are stored as Parquet; DataFrames nested inside other objects (lists, dicts, dataclasses) are pickled as usual.
568
+ The `pl.LazyFrame` conversion applies to parameters annotated *exactly* as `pl.LazyFrame`.
569
+ ```
@@ -234,6 +234,10 @@ This section has been moved to [Function Inputs and Outputs](./concepts/function
234
234
 
235
235
  This section has been moved to [Function Inputs and Outputs](./concepts/function-io.md#pipefuncs-with-multiple-outputs-of-different-shapes).
236
236
 
237
+ ## How does `pipefunc` work with `polars` DataFrames?
238
+
239
+ See [Function Inputs and Outputs](./concepts/function-io.md#working-with-polars-dataframes-parquet-storage-and-lazyframe-inputs) for Parquet storage and lazy (`pl.LazyFrame`) inputs.
240
+
237
241
  ## Simplifying Pipelines
238
242
 
239
243
  This section has been moved to [Simplifying Pipelines](./concepts/simplifying-pipelines.md).
@@ -37,6 +37,7 @@ from pipefunc._utils import (
37
37
  clear_cached_properties,
38
38
  format_function_call,
39
39
  is_classmethod,
40
+ is_lazyframe_annotation,
40
41
  is_pydantic_base_model,
41
42
  requires,
42
43
  )
@@ -876,6 +877,22 @@ class PipeFunc(Generic[P, R]):
876
877
  type_hints = safe_get_type_hints(func, include_extras=True)
877
878
  return {self.renames.get(k, k): v for k, v in type_hints.items() if k != "return"}
878
879
 
880
+ @functools.cached_property
881
+ def _lazyframe_parameters(self) -> tuple[str, ...]:
882
+ """Names of parameters annotated as `polars.LazyFrame`."""
883
+ return tuple(p for p, a in self.parameter_annotations.items() if is_lazyframe_annotation(a))
884
+
885
+ def _convert_lazyframe_kwargs(self, kwargs: dict[str, Any]) -> None:
886
+ """Convert `pl.DataFrame` values to `pl.LazyFrame` where the annotation asks for it."""
887
+ if not self._lazyframe_parameters: # fast path, avoids per-element overhead
888
+ return
889
+ import polars as pl
890
+
891
+ for p in self._lazyframe_parameters:
892
+ value = kwargs.get(p)
893
+ if isinstance(value, pl.DataFrame):
894
+ kwargs[p] = value.lazy()
895
+
879
896
  @functools.cached_property
880
897
  def output_annotation(self) -> dict[str, Any]:
881
898
  """Return the type annotation of the wrapped function's output."""
@@ -615,6 +615,7 @@ class Pipeline:
615
615
  raise ValueError(msg)
616
616
  func_args[arg] = value
617
617
  used_parameters.add(arg)
618
+ func._convert_lazyframe_kwargs(func_args)
618
619
  return func_args
619
620
 
620
621
  def _current_cache(self) -> LRUCache | HybridCache | DiskCache | SimpleCache | None:
@@ -52,9 +52,23 @@ def at_least_tuple(x: Any) -> tuple[Any, ...]:
52
52
  return x if isinstance(x, tuple) else (x,)
53
53
 
54
54
 
55
+ PARQUET_MAGIC = b"PAR1"
56
+
57
+
58
+ def is_parquet_file(path: Path) -> bool:
59
+ """Check whether the file at ``path`` is a Parquet file (by magic bytes)."""
60
+ try:
61
+ with path.open("rb") as f:
62
+ return f.read(len(PARQUET_MAGIC)) == PARQUET_MAGIC
63
+ except OSError: # pragma: no cover
64
+ return False
65
+
66
+
55
67
  def load(path: Path, *, cache: bool = False) -> Any:
56
- """Load a cloudpickled object from a path.
68
+ """Load an object from a path.
57
69
 
70
+ Reads Parquet files (written by `dump` for ``polars.DataFrame`` objects)
71
+ as ``polars.DataFrame``, everything else as cloudpickle.
58
72
  If ``cache`` is ``True``, the object will be cached in memory.
59
73
  """
60
74
  if cache:
@@ -62,12 +76,33 @@ def load(path: Path, *, cache: bool = False) -> Any:
62
76
  return _cached_load(cache_key)
63
77
 
64
78
  with path.open("rb") as f:
79
+ is_parquet = f.read(len(PARQUET_MAGIC)) == PARQUET_MAGIC
80
+ f.seek(0)
81
+ if is_parquet:
82
+ import polars as pl
83
+
84
+ return pl.read_parquet(f)
65
85
  return cloudpickle.load(f)
66
86
 
67
87
 
68
88
  def dump(obj: Any, path: Path) -> None:
69
- """Dump an object to a path using cloudpickle."""
89
+ """Dump an object to a path.
90
+
91
+ ``polars.DataFrame`` objects are stored as Parquet (falling back to
92
+ cloudpickle if Parquet serialization fails, e.g., for ``pl.Object``
93
+ dtype columns); everything else is stored with cloudpickle.
94
+ """
70
95
  path.parent.mkdir(parents=True, exist_ok=True)
96
+ if is_imported("polars"):
97
+ import polars as pl
98
+
99
+ if isinstance(obj, pl.DataFrame):
100
+ try:
101
+ obj.write_parquet(path)
102
+ except Exception: # noqa: BLE001, e.g., unsupported dtypes like pl.Object
103
+ path.unlink(missing_ok=True)
104
+ else:
105
+ return
71
106
  with path.open("wb") as f:
72
107
  cloudpickle.dump(obj, f)
73
108
 
@@ -629,3 +664,12 @@ def pandas_to_polars(df: Any) -> Any:
629
664
  # Fallback to manual conversion if pyarrow is not available
630
665
  # This happens when pandas has nullable types but pyarrow is not installed
631
666
  return pl.DataFrame({col: df[col].to_numpy() for col in df.columns})
667
+
668
+
669
+ def is_lazyframe_annotation(annotation: Any) -> bool:
670
+ """Check whether ``annotation`` is ``polars.LazyFrame``."""
671
+ if not is_imported("polars"):
672
+ return False
673
+ import polars as pl
674
+
675
+ return annotation is pl.LazyFrame
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  from pathlib import Path
4
4
 
5
5
  # Is set during `onbuild` if `pip install pipefunc` is used
6
- __version__ = "0.92.0"
6
+ __version__ = "0.93.0"
7
7
 
8
8
  if not __version__:
9
9
  try:
@@ -885,6 +885,10 @@ def to_hashable( # noqa: C901, PLR0911, PLR0912
885
885
  if isinstance(obj, polars.DataFrame):
886
886
  hsh = to_hashable(obj.to_dict(as_series=False), fallback_to_pickle)
887
887
  return (m, tp, hsh)
888
+ if isinstance(obj, polars.LazyFrame):
889
+ # Hash the serialized query plan; collecting the data here would
890
+ # defeat the purpose of using a LazyFrame.
891
+ return (m, tp, obj.serialize())
888
892
 
889
893
  if fallback_to_pickle:
890
894
  try:
@@ -30,6 +30,7 @@ from pipefunc._utils import (
30
30
  dump,
31
31
  ensure_block_allowed,
32
32
  get_ncores,
33
+ is_parquet_file,
33
34
  is_running_in_ipynb,
34
35
  prod,
35
36
  )
@@ -716,7 +717,10 @@ def _func_kwargs(func: PipeFunc, run_info: RunInfo, store: dict[str, StoreType])
716
717
  elif p in run_info.inputs:
717
718
  kwargs[p] = run_info.inputs[p]
718
719
  elif p in run_info.all_output_names:
719
- kwargs[p] = _load_from_store(p, store).value
720
+ if (lazy_frame := _maybe_scan_parquet(func, p, store)) is not None:
721
+ kwargs[p] = lazy_frame
722
+ else:
723
+ kwargs[p] = _load_from_store(p, store).value
720
724
  elif p in run_info.defaults and p not in run_info.all_output_names:
721
725
  kwargs[p] = run_info.defaults[p]
722
726
  else: # pragma: no cover
@@ -727,6 +731,26 @@ def _func_kwargs(func: PipeFunc, run_info: RunInfo, store: dict[str, StoreType])
727
731
  return kwargs
728
732
 
729
733
 
734
+ def _maybe_scan_parquet(func: PipeFunc, parameter: str, store: dict[str, StoreType]) -> Any:
735
+ """Return a `pl.LazyFrame` scanning the stored Parquet file, if applicable.
736
+
737
+ Only applies when the parameter is annotated as `pl.LazyFrame`, is not
738
+ indexed by the function's mapspec, and the upstream output is stored on
739
+ disk as a Parquet file (see `pipefunc._utils.dump`). This avoids
740
+ materializing the full `pl.DataFrame` in memory.
741
+ """
742
+ if parameter not in func._lazyframe_parameters:
743
+ return None
744
+ if func.mapspec is not None and parameter in func.mapspec.input_names:
745
+ return None
746
+ storage = store[parameter]
747
+ if not isinstance(storage, Path) or not storage.is_file() or not is_parquet_file(storage):
748
+ return None
749
+ import polars as pl
750
+
751
+ return pl.scan_parquet(storage)
752
+
753
+
730
754
  def _select_kwargs(
731
755
  func: PipeFunc,
732
756
  kwargs: dict[str, Any],
@@ -740,6 +764,7 @@ def _select_kwargs(
740
764
  normalized_keys = {k: v[0] if len(v) == 1 else v for k, v in input_keys.items()}
741
765
  selected = {k: v[normalized_keys[k]] if k in normalized_keys else v for k, v in kwargs.items()}
742
766
  _load_data(selected)
767
+ func._convert_lazyframe_kwargs(selected)
743
768
  return selected
744
769
 
745
770
 
@@ -1695,6 +1720,7 @@ def _execute_single(
1695
1720
 
1696
1721
  # Otherwise, run the function
1697
1722
  _load_data(kwargs)
1723
+ func._convert_lazyframe_kwargs(kwargs)
1698
1724
  if error_handling == "raise":
1699
1725
  return _get_or_set_cache(func, kwargs, cache, _CTX_RAISE, "raise")
1700
1726
 
@@ -11,7 +11,7 @@ from typing import TYPE_CHECKING, Any
11
11
  import cloudpickle # type: ignore[import-untyped]
12
12
  import numpy as np
13
13
 
14
- from pipefunc._utils import dump, load
14
+ from pipefunc._utils import PARQUET_MAGIC, dump, load
15
15
 
16
16
  from ._base import (
17
17
  StorageBase,
@@ -326,8 +326,16 @@ def _load_all(filenames: Iterator[Path]) -> list[Any]:
326
326
  def maybe_read(f: Path) -> Any | None:
327
327
  return _read(f) if f.is_file() else None
328
328
 
329
- def maybe_load(x: str | None) -> Any | None:
330
- return cloudpickle.loads(x) if x is not None else None
329
+ def maybe_load(x: bytes | None) -> Any | None:
330
+ if x is None:
331
+ return None
332
+ if x.startswith(PARQUET_MAGIC):
333
+ import io
334
+
335
+ import polars as pl
336
+
337
+ return pl.read_parquet(io.BytesIO(x))
338
+ return cloudpickle.loads(x)
331
339
 
332
340
  # Delegate file reading to the threadpool but deserialize sequentially,
333
341
  # as this is pure Python and CPU bound
@@ -20,6 +20,8 @@ from typing import (
20
20
 
21
21
  import numpy as np
22
22
 
23
+ from pipefunc._utils import is_imported
24
+
23
25
 
24
26
  class NoAnnotation:
25
27
  """Marker class for missing type annotations."""
@@ -209,7 +211,7 @@ def _handle_generic_types(
209
211
  return None
210
212
 
211
213
 
212
- def is_type_compatible(
214
+ def is_type_compatible( # noqa: PLR0911
213
215
  incoming_type: Any,
214
216
  required_type: Any,
215
217
  memo: TypeCheckMemo | None = None,
@@ -228,6 +230,10 @@ def is_type_compatible(
228
230
 
229
231
  if _check_identical_or_any(incoming_type, required_type):
230
232
  return True
233
+ if _is_polars_dataframe_to_lazyframe(incoming_type, required_type):
234
+ # pipefunc converts `pl.DataFrame` values to `pl.LazyFrame` at execution
235
+ # time when the consuming parameter is annotated as `pl.LazyFrame`.
236
+ return True
231
237
  if (result := _is_typevar_compatible(incoming_type, required_type, memo)) is not None:
232
238
  return result
233
239
  if (result := _handle_union_types(incoming_type, required_type, memo)) is not None:
@@ -237,6 +243,15 @@ def is_type_compatible(
237
243
  return False
238
244
 
239
245
 
246
+ def _is_polars_dataframe_to_lazyframe(incoming_type: Any, required_type: Any) -> bool:
247
+ """Check for the special-cased `pl.DataFrame` output -> `pl.LazyFrame` input edge."""
248
+ if not is_imported("polars"):
249
+ return False
250
+ import polars as pl
251
+
252
+ return incoming_type is pl.DataFrame and required_type is pl.LazyFrame
253
+
254
+
240
255
  def _is_typevar_compatible(
241
256
  incoming_type: Any,
242
257
  required_type: Any,
@@ -0,0 +1,234 @@
1
+ """Tests for Parquet serialization and `pl.LazyFrame` support (issue #879)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import importlib.util
6
+ import sys
7
+ from typing import TYPE_CHECKING, Any
8
+
9
+ import numpy as np # noqa: TC002, needed at runtime to resolve `np.ndarray` annotations
10
+ import pytest
11
+
12
+ from pipefunc import Pipeline, pipefunc
13
+ from pipefunc._utils import PARQUET_MAGIC, dump, is_parquet_file, load
14
+ from pipefunc.map import load_outputs
15
+ from pipefunc.typing import is_type_compatible
16
+
17
+ has_polars = importlib.util.find_spec("polars") is not None
18
+ pytestmark = pytest.mark.skipif(not has_polars, reason="polars not installed")
19
+
20
+ if has_polars:
21
+ import polars as pl
22
+
23
+ if TYPE_CHECKING:
24
+ from pathlib import Path
25
+
26
+
27
+ def test_dump_dataframe_as_parquet(tmp_path: Path) -> None:
28
+ df = pl.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]})
29
+ path = tmp_path / "df.cloudpickle"
30
+ dump(df, path)
31
+ assert path.read_bytes()[:4] == PARQUET_MAGIC
32
+ assert is_parquet_file(path)
33
+ loaded = load(path)
34
+ assert isinstance(loaded, pl.DataFrame)
35
+ assert loaded.equals(df)
36
+
37
+
38
+ def test_dump_non_dataframe_still_pickles(tmp_path: Path) -> None:
39
+ path = tmp_path / "obj.cloudpickle"
40
+ dump({"a": 1}, path)
41
+ assert not is_parquet_file(path)
42
+ assert load(path) == {"a": 1}
43
+
44
+
45
+ def test_dump_falls_back_to_pickle_on_parquet_failure(
46
+ tmp_path: Path,
47
+ monkeypatch: pytest.MonkeyPatch,
48
+ ) -> None:
49
+ df = pl.DataFrame({"a": [1, 2]})
50
+
51
+ def fail(*args: Any, **kwargs: Any) -> None:
52
+ msg = "boom"
53
+ raise ValueError(msg)
54
+
55
+ monkeypatch.setattr(pl.DataFrame, "write_parquet", fail)
56
+ path = tmp_path / "df.cloudpickle"
57
+ dump(df, path)
58
+ assert path.read_bytes()[:4] != PARQUET_MAGIC
59
+ loaded = load(path)
60
+ assert isinstance(loaded, pl.DataFrame)
61
+ assert loaded.equals(df)
62
+
63
+
64
+ def test_load_with_cache(tmp_path: Path) -> None:
65
+ df = pl.DataFrame({"a": [1, 2]})
66
+ path = tmp_path / "df.cloudpickle"
67
+ dump(df, path)
68
+ assert load(path, cache=True).equals(df)
69
+
70
+
71
+ def test_file_array_with_dataframes(tmp_path: Path) -> None:
72
+ from pipefunc.map._storage_array._file import FileArray
73
+
74
+ arr = FileArray(tmp_path / "arr", shape=(2,))
75
+ arr.dump((0,), pl.DataFrame({"a": [1]}))
76
+ arr.dump((1,), pl.DataFrame({"a": [2]}))
77
+ assert is_parquet_file(arr._index_to_file(0))
78
+ element = arr[0,]
79
+ assert isinstance(element, pl.DataFrame)
80
+ assert element["a"].to_list() == [1]
81
+ # `to_array` exercises the threaded `_load_all` byte-sniffing path
82
+ full = arr.to_array()
83
+ assert all(isinstance(x, pl.DataFrame) for x in full)
84
+
85
+
86
+ def test_dataframe_to_lazyframe_type_compatible() -> None:
87
+ assert is_type_compatible(pl.DataFrame, pl.LazyFrame)
88
+ assert not is_type_compatible(pl.LazyFrame, pl.DataFrame)
89
+ assert not is_type_compatible(int, pl.LazyFrame)
90
+
91
+
92
+ def test_map_lazyframe_input_scans_parquet(tmp_path: Path) -> None:
93
+ @pipefunc(output_name="df")
94
+ def make_df() -> pl.DataFrame:
95
+ return pl.DataFrame({"a": [1, 2, 3]})
96
+
97
+ @pipefunc(output_name="total")
98
+ def consume(df: pl.LazyFrame) -> int:
99
+ assert isinstance(df, pl.LazyFrame)
100
+ # The plan must be a Parquet scan, not an in-memory DataFrame
101
+ assert "DF" not in df.explain(optimized=False)
102
+ return df.select(pl.col("a").sum()).collect().item()
103
+
104
+ pipeline = Pipeline([make_df, consume]) # validates type annotations
105
+ result = pipeline.map({}, run_folder=tmp_path, parallel=False, show_progress=False)
106
+ assert result["total"].output == 6
107
+ df_path = tmp_path / "outputs" / "df.cloudpickle"
108
+ assert is_parquet_file(df_path)
109
+ loaded = load_outputs("df", run_folder=tmp_path)
110
+ assert isinstance(loaded, pl.DataFrame)
111
+ assert loaded["a"].to_list() == [1, 2, 3]
112
+
113
+
114
+ def test_map_lazyframe_input_without_run_folder() -> None:
115
+ @pipefunc(output_name="df")
116
+ def make_df() -> pl.DataFrame:
117
+ return pl.DataFrame({"a": [1, 2, 3]})
118
+
119
+ @pipefunc(output_name="total")
120
+ def consume(df: pl.LazyFrame) -> int:
121
+ assert isinstance(df, pl.LazyFrame)
122
+ return df.select(pl.col("a").sum()).collect().item()
123
+
124
+ pipeline = Pipeline([make_df, consume])
125
+ result = pipeline.map({}, parallel=False, show_progress=False, storage="dict")
126
+ assert result["total"].output == 6
127
+
128
+
129
+ def test_map_elementwise_lazyframe(tmp_path: Path) -> None:
130
+ @pipefunc(output_name="df", mapspec="x[i] -> df[i]")
131
+ def make_df(x: int) -> pl.DataFrame:
132
+ return pl.DataFrame({"a": [x, x * 2]})
133
+
134
+ @pipefunc(output_name="total", mapspec="df[i] -> total[i]")
135
+ def consume(df: pl.LazyFrame) -> int:
136
+ assert isinstance(df, pl.LazyFrame)
137
+ return df.select(pl.col("a").sum()).collect().item()
138
+
139
+ pipeline = Pipeline([make_df, consume])
140
+ result = pipeline.map(
141
+ {"x": [1, 10]},
142
+ run_folder=tmp_path,
143
+ parallel=True,
144
+ show_progress=False,
145
+ )
146
+ assert result["total"].output.tolist() == [3, 30]
147
+
148
+
149
+ def test_map_reduction_keeps_dataframes(tmp_path: Path) -> None:
150
+ @pipefunc(output_name="df", mapspec="x[i] -> df[i]")
151
+ def make_df(x: int) -> pl.DataFrame:
152
+ return pl.DataFrame({"a": [x]})
153
+
154
+ @pipefunc(output_name="n")
155
+ def reduce_all(df: np.ndarray) -> int:
156
+ assert all(isinstance(d, pl.DataFrame) for d in df)
157
+ return len(df)
158
+
159
+ pipeline = Pipeline([make_df, reduce_all])
160
+ result = pipeline.map(
161
+ {"x": [1, 10]},
162
+ run_folder=tmp_path,
163
+ parallel=False,
164
+ show_progress=False,
165
+ )
166
+ assert result["n"].output == 2
167
+
168
+
169
+ def test_run_lazyframe_input() -> None:
170
+ @pipefunc(output_name="df")
171
+ def make_df() -> pl.DataFrame:
172
+ return pl.DataFrame({"a": [1, 2, 3]})
173
+
174
+ @pipefunc(output_name="total")
175
+ def consume(df: pl.LazyFrame) -> int:
176
+ assert isinstance(df, pl.LazyFrame)
177
+ return df.select(pl.col("a").sum()).collect().item()
178
+
179
+ pipeline = Pipeline([make_df, consume])
180
+ assert pipeline.run("total", kwargs={}) == 6
181
+
182
+
183
+ def test_run_lazyframe_from_input_kwarg() -> None:
184
+ @pipefunc(output_name="total")
185
+ def consume(df: pl.LazyFrame) -> int:
186
+ assert isinstance(df, pl.LazyFrame)
187
+ return df.select(pl.col("a").sum()).collect().item()
188
+
189
+ pipeline = Pipeline([consume])
190
+ df = pl.DataFrame({"a": [1, 2, 3]})
191
+ assert pipeline.run("total", kwargs={"df": df}) == 6
192
+
193
+
194
+ def test_map_lazyframe_from_input_kwarg(tmp_path: Path) -> None:
195
+ @pipefunc(output_name="total")
196
+ def consume(df: pl.LazyFrame) -> int:
197
+ assert isinstance(df, pl.LazyFrame)
198
+ return df.select(pl.col("a").sum()).collect().item()
199
+
200
+ pipeline = Pipeline([consume])
201
+ df = pl.DataFrame({"a": [1, 2, 3]})
202
+ result = pipeline.map({"df": df}, run_folder=tmp_path, parallel=False, show_progress=False)
203
+ assert result["total"].output == 6
204
+
205
+
206
+ def test_lazyframe_passthrough() -> None:
207
+ @pipefunc(output_name="lf")
208
+ def make_lf() -> pl.LazyFrame:
209
+ return pl.DataFrame({"a": [1, 2, 3]}).lazy()
210
+
211
+ @pipefunc(output_name="total")
212
+ def consume(lf: pl.LazyFrame) -> int:
213
+ assert isinstance(lf, pl.LazyFrame)
214
+ return lf.select(pl.col("a").sum()).collect().item()
215
+
216
+ pipeline = Pipeline([make_lf, consume])
217
+ result = pipeline.map({}, parallel=False, show_progress=False, storage="dict")
218
+ assert result["total"].output == 6
219
+
220
+
221
+ def test_to_hashable_lazyframe() -> None:
222
+ from pipefunc.cache import to_hashable
223
+
224
+ lf = pl.DataFrame({"a": [1, 2]}).lazy()
225
+ key = to_hashable(lf)
226
+ assert hash(key) == hash(to_hashable(pl.DataFrame({"a": [1, 2]}).lazy()))
227
+
228
+
229
+ def test_helpers_when_polars_not_imported(monkeypatch: pytest.MonkeyPatch) -> None:
230
+ from pipefunc._utils import is_lazyframe_annotation
231
+
232
+ monkeypatch.delitem(sys.modules, "polars")
233
+ assert not is_lazyframe_annotation(pl.LazyFrame)
234
+ assert not is_type_compatible(pl.DataFrame, pl.LazyFrame)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes