pipefunc 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pipefunc-0.1.0/AUTHORS.md +2 -0
- pipefunc-0.1.0/LICENSE +7 -0
- pipefunc-0.1.0/MANIFEST.in +3 -0
- pipefunc-0.1.0/PKG-INFO +171 -0
- pipefunc-0.1.0/README.md +140 -0
- pipefunc-0.1.0/pipefunc/__init__.py +25 -0
- pipefunc-0.1.0/pipefunc/_cache.py +266 -0
- pipefunc-0.1.0/pipefunc/_perf.py +151 -0
- pipefunc-0.1.0/pipefunc/_pipefunc.py +1243 -0
- pipefunc-0.1.0/pipefunc/_plotting.py +179 -0
- pipefunc-0.1.0/pipefunc/_sweep.py +367 -0
- pipefunc-0.1.0/pipefunc/_version.py +9 -0
- pipefunc-0.1.0/pipefunc.egg-info/PKG-INFO +171 -0
- pipefunc-0.1.0/pipefunc.egg-info/SOURCES.txt +23 -0
- pipefunc-0.1.0/pipefunc.egg-info/dependency_links.txt +1 -0
- pipefunc-0.1.0/pipefunc.egg-info/entry_points.txt +2 -0
- pipefunc-0.1.0/pipefunc.egg-info/requires.txt +35 -0
- pipefunc-0.1.0/pipefunc.egg-info/top_level.txt +1 -0
- pipefunc-0.1.0/pyproject.toml +123 -0
- pipefunc-0.1.0/setup.cfg +9 -0
- pipefunc-0.1.0/tests/test_cache.py +136 -0
- pipefunc-0.1.0/tests/test_perf.py +51 -0
- pipefunc-0.1.0/tests/test_pipefunc.py +478 -0
- pipefunc-0.1.0/tests/test_sweep.py +183 -0
pipefunc-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
Copyright 2023, Bas Nijholt
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the βSoftwareβ), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
4
|
+
|
|
5
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
6
|
+
|
|
7
|
+
THE SOFTWARE IS PROVIDED βAS ISβ, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
pipefunc-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: pipefunc
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A Python library for defining, managing, and executing function pipelines.
|
|
5
|
+
Maintainer-email: Bas Nijholt <bas@nijho.lt>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: homepage, https://pipefunc.readthedocs.io/
|
|
8
|
+
Project-URL: documentation, https://pipefunc.readthedocs.io/
|
|
9
|
+
Project-URL: repository, https://github.com/basnijholt/pipefunc
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering
|
|
22
|
+
Classifier: Topic :: System :: Distributed Computing
|
|
23
|
+
Requires-Python: >=3.8
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
Provides-Extra: plotting
|
|
26
|
+
Provides-Extra: test
|
|
27
|
+
Provides-Extra: docs
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
License-File: LICENSE
|
|
30
|
+
License-File: AUTHORS.md
|
|
31
|
+
|
|
32
|
+
# pipefunc: function composition magic for Python
|
|
33
|
+
|
|
34
|
+
> Lightweight function pipeline creation: π Less Bookkeeping, π― More Doing
|
|
35
|
+
|
|
36
|
+

|
|
37
|
+
|
|
38
|
+
<!-- toc-start -->
|
|
39
|
+
## :books: Table of Contents
|
|
40
|
+
<!-- START doctoc generated TOC please keep comment here to allow auto update -->
|
|
41
|
+
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
|
|
42
|
+
|
|
43
|
+
- [:thinking: What is this?](#thinking-what-is-this)
|
|
44
|
+
- [:rocket: Key Features](#rocket-key-features)
|
|
45
|
+
- [:test_tube: How does it work?](#test_tube-how-does-it-work)
|
|
46
|
+
- [:notebook: Jupyter Notebook Example](#notebook-jupyter-notebook-example)
|
|
47
|
+
- [:computer: Installation](#computer-installation)
|
|
48
|
+
- [:hammer_and_wrench: Development](#hammer_and_wrench-development)
|
|
49
|
+
|
|
50
|
+
<!-- END doctoc generated TOC please keep comment here to allow auto update -->
|
|
51
|
+
<!-- toc-end -->
|
|
52
|
+
|
|
53
|
+
## :thinking: What is this?
|
|
54
|
+
|
|
55
|
+
`pipefunc` is a Python library designed to create and manage complex networks of interdependent functions, often known as function pipelines.
|
|
56
|
+
|
|
57
|
+
In a function pipeline, each function can have dependencies on the results of other functions. Managing these dependencies, ensuring each function has the inputs it needs, and determining the order of execution can become an annoying bookkeeping task in complex cases.
|
|
58
|
+
|
|
59
|
+
`pipefunc` simplifies this process by allowing you to declare the dependencies of each function and automatically organizing the execution order to satisfy these dependencies. Additionally, the library provides features for visualizing the function pipeline, simplifying the pipeline graph, caching function results for efficiency, and profiling resource usage for optimization.
|
|
60
|
+
|
|
61
|
+
For example, imagine you have a set of functions where `function B` needs the output from `function A`, and `function C` needs the outputs from both `function A` and `function B`. `pipefunc` allows you to specify these dependencies when you create the functions and then automatically manages their execution. It also provides tools for visualizing this function network, simplifying it if possible, and understanding the resource usage of each function.
|
|
62
|
+
|
|
63
|
+
The library is designed to be an efficient and flexible tool for managing complex function dependencies in an intuitive and clear way. Whether you're dealing with data processing tasks, scientific computations, machine learning (AI) workflows, or other scenarios where functions depend on one another, `pipefunc` can help streamline your code and improve your productivity.
|
|
64
|
+
|
|
65
|
+
## :rocket: Key Features
|
|
66
|
+
|
|
67
|
+
Some of the key features of `pipefunc` include:
|
|
68
|
+
|
|
69
|
+
1. π **Function Composition and Pipelining:** The core functionality of `pipefunc` is to create a pipeline of functions, allowing you to feed the output of one function into another, and execute them in the right order.
|
|
70
|
+
1. π **Visualizing Pipelines:** `pipefunc` can generate a visual graph of the function pipeline, making it easier to understand the flow of data.
|
|
71
|
+
1. π‘ **Flexible Function Arguments:** `pipefunc` lets you call a function with different combinations of arguments, automatically determining which other functions to call based on the arguments you provide.
|
|
72
|
+
1. π₯ **Multiple Outputs:** `pipefunc` supports functions that return multiple results, allowing each result to be used as input to other functions.
|
|
73
|
+
1. β‘οΈ **Reducing Pipelines:** `pipefunc` can simplify a complex pipeline by merging nodes, improving computational efficiency at the cost of losing visibility into some intermediate steps.
|
|
74
|
+
1. ποΈ **Resources Report:** `pipefunc` provides a report on the performance of your pipeline, including CPU usage, memory usage, and execution time, helping you identify bottlenecks and optimize your code.
|
|
75
|
+
1. π **Parallel Execution and Caching:** `pipefunc` supports parallel execution of functions, and caching of results to avoid redundant computation.
|
|
76
|
+
1. π **Parameter Sweeps:** `pipefunc` provides a utility for generating combinations of parameters to use in a parameter sweep, along with the ability to cache results to optimize the sweep.
|
|
77
|
+
1. π οΈ **Flexibility and Ease of Use:** `pipefunc` is a lightweight, flexible, and powerful tool for managing complex function dependencies in a clear and intuitive way, designed to improve your productivity in any scenario where functions depend on one another.
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
## :test_tube: How does it work?
|
|
81
|
+
|
|
82
|
+
pipefunc provides a Pipeline class that you use to define your function pipeline.
|
|
83
|
+
You add functions to the pipeline using the `pipefunc` decorator, which also lets you specify a function's output name and dependencies.
|
|
84
|
+
Once your pipeline is defined, you can execute it for specific output values, simplify it by combining functions with the same root arguments, visualize it as a directed graph, and profile the resource usage of the pipeline functions.
|
|
85
|
+
For more detailed usage instructions and examples, please check the usage example provided in the package.
|
|
86
|
+
|
|
87
|
+
Here is a simple example usage of pipefunc to illustrate its primary features:
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
from pipefunc import pipefunc, Pipeline
|
|
91
|
+
|
|
92
|
+
# Define three functions that will be a part of the pipeline
|
|
93
|
+
@pipefunc(output_name="c")
|
|
94
|
+
def f_c(a, b):
|
|
95
|
+
return a + b
|
|
96
|
+
|
|
97
|
+
@pipefunc(output_name="d")
|
|
98
|
+
def f_d(b, c):
|
|
99
|
+
return b * c
|
|
100
|
+
|
|
101
|
+
@pipefunc(output_name="e")
|
|
102
|
+
def f_e(c, d, x=1):
|
|
103
|
+
return c * d * x
|
|
104
|
+
|
|
105
|
+
# Create a pipeline with these functions
|
|
106
|
+
funcs = [f_c, f_d, f_e]
|
|
107
|
+
pipeline = Pipeline(funcs, profile=True)
|
|
108
|
+
|
|
109
|
+
# You can access and call these functions using the func method
|
|
110
|
+
h_d = pipeline.func("d")
|
|
111
|
+
assert h_d(a=2, b=3) == 15
|
|
112
|
+
|
|
113
|
+
h_e = pipeline.func("e")
|
|
114
|
+
assert h_e(a=2, b=3, x=1) == 75
|
|
115
|
+
assert h_e(c=5, d=15, x=1) == 75
|
|
116
|
+
|
|
117
|
+
# Visualize the pipeline
|
|
118
|
+
pipeline.visualize()
|
|
119
|
+
|
|
120
|
+
# Get all possible argument mappings for each function
|
|
121
|
+
all_args = pipeline.all_arg_combinations()
|
|
122
|
+
print(all_args)
|
|
123
|
+
|
|
124
|
+
# Show resource reporting (only works if profile=True)
|
|
125
|
+
pipeline.resources_report()
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
This example demonstrates defining a pipeline with `f_c`, `f_d`, `f_e` functions, accessing and executing these functions using the pipeline, visualizing the pipeline graph, getting all possible argument mappings, and reporting on the resource usage.
|
|
129
|
+
This basic example should give you an idea of how to use pipefunc to construct and manage function pipelines.
|
|
130
|
+
|
|
131
|
+
## :notebook: Jupyter Notebook Example
|
|
132
|
+
|
|
133
|
+
See the detailed usage example and more in our [example.ipynb](https://github.com/basnijholt/pipefunc/blob/main/example.ipynb).
|
|
134
|
+
|
|
135
|
+
## :computer: Installation
|
|
136
|
+
|
|
137
|
+
Install the **latest stable** version from conda (recommended):
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
conda install "pipefunc[plotting]"
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
or from PyPI:
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
pip install pipefunc
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
or install **main** with:
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
pip install -U https://github.com/basnijholt/pipefunc/archive/main.zip
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
or clone the repository and do a dev install (recommended for dev):
|
|
156
|
+
|
|
157
|
+
```bash
|
|
158
|
+
git clone git@github.com:basnijholt/pipefunc.git
|
|
159
|
+
cd pipefunc
|
|
160
|
+
pip install -e ".[dev,test,plotting]"
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## :hammer_and_wrench: Development
|
|
164
|
+
|
|
165
|
+
We use [`pre-commit`](https://pre-commit.com/) to manage pre-commit hooks, which helps us ensure that our code is always clean and compliant with our coding standards.
|
|
166
|
+
To set it up, install pre-commit with pip and then run the install command:
|
|
167
|
+
|
|
168
|
+
```bash
|
|
169
|
+
pip install pre-commit
|
|
170
|
+
pre-commit install
|
|
171
|
+
```
|
pipefunc-0.1.0/README.md
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# pipefunc: function composition magic for Python
|
|
2
|
+
|
|
3
|
+
> Lightweight function pipeline creation: π Less Bookkeeping, π― More Doing
|
|
4
|
+
|
|
5
|
+

|
|
6
|
+
|
|
7
|
+
<!-- toc-start -->
|
|
8
|
+
## :books: Table of Contents
|
|
9
|
+
<!-- START doctoc generated TOC please keep comment here to allow auto update -->
|
|
10
|
+
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
|
|
11
|
+
|
|
12
|
+
- [:thinking: What is this?](#thinking-what-is-this)
|
|
13
|
+
- [:rocket: Key Features](#rocket-key-features)
|
|
14
|
+
- [:test_tube: How does it work?](#test_tube-how-does-it-work)
|
|
15
|
+
- [:notebook: Jupyter Notebook Example](#notebook-jupyter-notebook-example)
|
|
16
|
+
- [:computer: Installation](#computer-installation)
|
|
17
|
+
- [:hammer_and_wrench: Development](#hammer_and_wrench-development)
|
|
18
|
+
|
|
19
|
+
<!-- END doctoc generated TOC please keep comment here to allow auto update -->
|
|
20
|
+
<!-- toc-end -->
|
|
21
|
+
|
|
22
|
+
## :thinking: What is this?
|
|
23
|
+
|
|
24
|
+
`pipefunc` is a Python library designed to create and manage complex networks of interdependent functions, often known as function pipelines.
|
|
25
|
+
|
|
26
|
+
In a function pipeline, each function can have dependencies on the results of other functions. Managing these dependencies, ensuring each function has the inputs it needs, and determining the order of execution can become an annoying bookkeeping task in complex cases.
|
|
27
|
+
|
|
28
|
+
`pipefunc` simplifies this process by allowing you to declare the dependencies of each function and automatically organizing the execution order to satisfy these dependencies. Additionally, the library provides features for visualizing the function pipeline, simplifying the pipeline graph, caching function results for efficiency, and profiling resource usage for optimization.
|
|
29
|
+
|
|
30
|
+
For example, imagine you have a set of functions where `function B` needs the output from `function A`, and `function C` needs the outputs from both `function A` and `function B`. `pipefunc` allows you to specify these dependencies when you create the functions and then automatically manages their execution. It also provides tools for visualizing this function network, simplifying it if possible, and understanding the resource usage of each function.
|
|
31
|
+
|
|
32
|
+
The library is designed to be an efficient and flexible tool for managing complex function dependencies in an intuitive and clear way. Whether you're dealing with data processing tasks, scientific computations, machine learning (AI) workflows, or other scenarios where functions depend on one another, `pipefunc` can help streamline your code and improve your productivity.
|
|
33
|
+
|
|
34
|
+
## :rocket: Key Features
|
|
35
|
+
|
|
36
|
+
Some of the key features of `pipefunc` include:
|
|
37
|
+
|
|
38
|
+
1. π **Function Composition and Pipelining:** The core functionality of `pipefunc` is to create a pipeline of functions, allowing you to feed the output of one function into another, and execute them in the right order.
|
|
39
|
+
1. π **Visualizing Pipelines:** `pipefunc` can generate a visual graph of the function pipeline, making it easier to understand the flow of data.
|
|
40
|
+
1. π‘ **Flexible Function Arguments:** `pipefunc` lets you call a function with different combinations of arguments, automatically determining which other functions to call based on the arguments you provide.
|
|
41
|
+
1. π₯ **Multiple Outputs:** `pipefunc` supports functions that return multiple results, allowing each result to be used as input to other functions.
|
|
42
|
+
1. β‘οΈ **Reducing Pipelines:** `pipefunc` can simplify a complex pipeline by merging nodes, improving computational efficiency at the cost of losing visibility into some intermediate steps.
|
|
43
|
+
1. ποΈ **Resources Report:** `pipefunc` provides a report on the performance of your pipeline, including CPU usage, memory usage, and execution time, helping you identify bottlenecks and optimize your code.
|
|
44
|
+
1. π **Parallel Execution and Caching:** `pipefunc` supports parallel execution of functions, and caching of results to avoid redundant computation.
|
|
45
|
+
1. π **Parameter Sweeps:** `pipefunc` provides a utility for generating combinations of parameters to use in a parameter sweep, along with the ability to cache results to optimize the sweep.
|
|
46
|
+
1. π οΈ **Flexibility and Ease of Use:** `pipefunc` is a lightweight, flexible, and powerful tool for managing complex function dependencies in a clear and intuitive way, designed to improve your productivity in any scenario where functions depend on one another.
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
## :test_tube: How does it work?
|
|
50
|
+
|
|
51
|
+
pipefunc provides a Pipeline class that you use to define your function pipeline.
|
|
52
|
+
You add functions to the pipeline using the `pipefunc` decorator, which also lets you specify a function's output name and dependencies.
|
|
53
|
+
Once your pipeline is defined, you can execute it for specific output values, simplify it by combining functions with the same root arguments, visualize it as a directed graph, and profile the resource usage of the pipeline functions.
|
|
54
|
+
For more detailed usage instructions and examples, please check the usage example provided in the package.
|
|
55
|
+
|
|
56
|
+
Here is a simple example usage of pipefunc to illustrate its primary features:
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
from pipefunc import pipefunc, Pipeline
|
|
60
|
+
|
|
61
|
+
# Define three functions that will be a part of the pipeline
|
|
62
|
+
@pipefunc(output_name="c")
|
|
63
|
+
def f_c(a, b):
|
|
64
|
+
return a + b
|
|
65
|
+
|
|
66
|
+
@pipefunc(output_name="d")
|
|
67
|
+
def f_d(b, c):
|
|
68
|
+
return b * c
|
|
69
|
+
|
|
70
|
+
@pipefunc(output_name="e")
|
|
71
|
+
def f_e(c, d, x=1):
|
|
72
|
+
return c * d * x
|
|
73
|
+
|
|
74
|
+
# Create a pipeline with these functions
|
|
75
|
+
funcs = [f_c, f_d, f_e]
|
|
76
|
+
pipeline = Pipeline(funcs, profile=True)
|
|
77
|
+
|
|
78
|
+
# You can access and call these functions using the func method
|
|
79
|
+
h_d = pipeline.func("d")
|
|
80
|
+
assert h_d(a=2, b=3) == 15
|
|
81
|
+
|
|
82
|
+
h_e = pipeline.func("e")
|
|
83
|
+
assert h_e(a=2, b=3, x=1) == 75
|
|
84
|
+
assert h_e(c=5, d=15, x=1) == 75
|
|
85
|
+
|
|
86
|
+
# Visualize the pipeline
|
|
87
|
+
pipeline.visualize()
|
|
88
|
+
|
|
89
|
+
# Get all possible argument mappings for each function
|
|
90
|
+
all_args = pipeline.all_arg_combinations()
|
|
91
|
+
print(all_args)
|
|
92
|
+
|
|
93
|
+
# Show resource reporting (only works if profile=True)
|
|
94
|
+
pipeline.resources_report()
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
This example demonstrates defining a pipeline with `f_c`, `f_d`, `f_e` functions, accessing and executing these functions using the pipeline, visualizing the pipeline graph, getting all possible argument mappings, and reporting on the resource usage.
|
|
98
|
+
This basic example should give you an idea of how to use pipefunc to construct and manage function pipelines.
|
|
99
|
+
|
|
100
|
+
## :notebook: Jupyter Notebook Example
|
|
101
|
+
|
|
102
|
+
See the detailed usage example and more in our [example.ipynb](https://github.com/basnijholt/pipefunc/blob/main/example.ipynb).
|
|
103
|
+
|
|
104
|
+
## :computer: Installation
|
|
105
|
+
|
|
106
|
+
Install the **latest stable** version from conda (recommended):
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
conda install "pipefunc[plotting]"
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
or from PyPI:
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
pip install pipefunc
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
or install **main** with:
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
pip install -U https://github.com/basnijholt/pipefunc/archive/main.zip
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
or clone the repository and do a dev install (recommended for dev):
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
git clone git@github.com:basnijholt/pipefunc.git
|
|
128
|
+
cd pipefunc
|
|
129
|
+
pip install -e ".[dev,test,plotting]"
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## :hammer_and_wrench: Development
|
|
133
|
+
|
|
134
|
+
We use [`pre-commit`](https://pre-commit.com/) to manage pre-commit hooks, which helps us ensure that our code is always clean and compliant with our coding standards.
|
|
135
|
+
To set it up, install pre-commit with pip and then run the install command:
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
pip install pre-commit
|
|
139
|
+
pre-commit install
|
|
140
|
+
```
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""PipeFunc: A Python library for defining, managing, and executing function pipelines."""
|
|
2
|
+
|
|
3
|
+
from pipefunc._pipefunc import Pipeline, PipelineFunction, pipefunc
|
|
4
|
+
from pipefunc._sweep import (
|
|
5
|
+
MultiSweep,
|
|
6
|
+
Sweep,
|
|
7
|
+
count_sweep,
|
|
8
|
+
generate_sweep,
|
|
9
|
+
get_precalculation_order,
|
|
10
|
+
set_cache_for_sweep,
|
|
11
|
+
)
|
|
12
|
+
from pipefunc._version import __version__
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"__version__",
|
|
16
|
+
"count_sweep",
|
|
17
|
+
"generate_sweep",
|
|
18
|
+
"get_precalculation_order",
|
|
19
|
+
"pipefunc",
|
|
20
|
+
"Pipeline",
|
|
21
|
+
"PipelineFunction",
|
|
22
|
+
"set_cache_for_sweep",
|
|
23
|
+
"Sweep",
|
|
24
|
+
"MultiSweep",
|
|
25
|
+
]
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from contextlib import nullcontext
|
|
4
|
+
from multiprocessing import Manager
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
6
|
+
|
|
7
|
+
import cloudpickle
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from collections.abc import Hashable
|
|
11
|
+
from multiprocessing.managers import ListProxy
|
|
12
|
+
|
|
13
|
+
_NONE_RETURN_STR = "__ReturnsNone__"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class HybridCache:
|
|
17
|
+
"""A hybrid cache implementation.
|
|
18
|
+
|
|
19
|
+
This uses a combination of Least Frequently Used (LFU) and
|
|
20
|
+
Least Computationally Expensive (LCE) strategies for invalidating cache entries.
|
|
21
|
+
|
|
22
|
+
The cache invalidation strategy calculates a score for each entry based on its
|
|
23
|
+
access frequency and computation duration. The entry with the lowest score will
|
|
24
|
+
be invalidated when the cache reaches its maximum size.
|
|
25
|
+
|
|
26
|
+
Attributes
|
|
27
|
+
----------
|
|
28
|
+
max_size
|
|
29
|
+
The maximum number of entries the cache can store.
|
|
30
|
+
access_weight
|
|
31
|
+
The weight given to the access frequency in the score calculation.
|
|
32
|
+
duration_weight
|
|
33
|
+
The weight given to the computation duration in the score calculation.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
max_size: int = 128,
|
|
39
|
+
access_weight: float = 0.5,
|
|
40
|
+
duration_weight: float = 0.5,
|
|
41
|
+
*,
|
|
42
|
+
shared: bool = True,
|
|
43
|
+
) -> None:
|
|
44
|
+
"""Initialize the HybridCache instance."""
|
|
45
|
+
if shared:
|
|
46
|
+
manager = Manager()
|
|
47
|
+
self._cache = manager.dict()
|
|
48
|
+
self._access_counts = manager.dict()
|
|
49
|
+
self._computation_durations = manager.dict()
|
|
50
|
+
self.lock = manager.Lock()
|
|
51
|
+
else:
|
|
52
|
+
self._cache = {} # type: ignore[assignment]
|
|
53
|
+
self._access_counts = {} # type: ignore[assignment]
|
|
54
|
+
self._computation_durations = {} # type: ignore[assignment]
|
|
55
|
+
self.lock = nullcontext() # type: ignore[assignment]
|
|
56
|
+
self.max_size: int = max_size
|
|
57
|
+
self.access_weight: float = access_weight
|
|
58
|
+
self.duration_weight: float = duration_weight
|
|
59
|
+
self.shared: bool = shared
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def cache(self) -> dict[Hashable, Any]:
|
|
63
|
+
"""Return the cache entries."""
|
|
64
|
+
if not self.shared:
|
|
65
|
+
assert isinstance(self._cache, dict)
|
|
66
|
+
return self._cache
|
|
67
|
+
with self.lock:
|
|
68
|
+
return dict(self._cache.items())
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def access_counts(self) -> dict[Hashable, int]:
|
|
72
|
+
"""Return the access counts of the cache entries."""
|
|
73
|
+
if not self.shared:
|
|
74
|
+
assert isinstance(self._access_counts, dict)
|
|
75
|
+
return self._access_counts
|
|
76
|
+
with self.lock:
|
|
77
|
+
return dict(self._access_counts.items())
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def computation_durations(self) -> dict[Hashable, float]:
|
|
81
|
+
"""Return the computation durations of the cache entries."""
|
|
82
|
+
if not self.shared:
|
|
83
|
+
assert isinstance(self._computation_durations, dict)
|
|
84
|
+
return self._computation_durations
|
|
85
|
+
with self.lock:
|
|
86
|
+
return dict(self._computation_durations.items())
|
|
87
|
+
|
|
88
|
+
def get(self, key: Hashable) -> Any | None:
|
|
89
|
+
"""Retrieve a value from the cache by its key.
|
|
90
|
+
|
|
91
|
+
If the key is present in the cache, its access count is incremented.
|
|
92
|
+
|
|
93
|
+
Parameters
|
|
94
|
+
----------
|
|
95
|
+
key
|
|
96
|
+
The key associated with the value in the cache.
|
|
97
|
+
|
|
98
|
+
Returns
|
|
99
|
+
-------
|
|
100
|
+
Any
|
|
101
|
+
The value associated with the key if the key is present in the cache,
|
|
102
|
+
otherwise None.
|
|
103
|
+
"""
|
|
104
|
+
with self.lock:
|
|
105
|
+
if key in self._cache:
|
|
106
|
+
self._access_counts[key] += 1
|
|
107
|
+
return self._cache[key]
|
|
108
|
+
return None # pragma: no cover
|
|
109
|
+
|
|
110
|
+
def put(self, key: Hashable, value: Any, duration: float) -> None:
|
|
111
|
+
"""Add a value to the cache with its associated key and computation duration.
|
|
112
|
+
|
|
113
|
+
If the cache is full, the entry with the lowest score based on the access
|
|
114
|
+
frequency and computation duration will be invalidated.
|
|
115
|
+
|
|
116
|
+
Parameters
|
|
117
|
+
----------
|
|
118
|
+
key
|
|
119
|
+
The key associated with the value.
|
|
120
|
+
value
|
|
121
|
+
The value to store in the cache.
|
|
122
|
+
duration
|
|
123
|
+
The duration of the computation that generated the value.
|
|
124
|
+
"""
|
|
125
|
+
with self.lock:
|
|
126
|
+
if len(self._cache) >= self.max_size:
|
|
127
|
+
self._expire()
|
|
128
|
+
self._cache[key] = value
|
|
129
|
+
self._access_counts[key] = 1
|
|
130
|
+
self._computation_durations[key] = duration
|
|
131
|
+
|
|
132
|
+
def _expire(self) -> None:
|
|
133
|
+
"""Invalidate the entry with the lowest score based on the access frequency."""
|
|
134
|
+
# Calculate normalized access frequencies and computation durations
|
|
135
|
+
total_access_count = sum(self._access_counts.values())
|
|
136
|
+
total_duration = sum(self._computation_durations.values())
|
|
137
|
+
normalized_access_counts = {
|
|
138
|
+
k: v / total_access_count for k, v in self._access_counts.items()
|
|
139
|
+
}
|
|
140
|
+
normalized_durations = {
|
|
141
|
+
k: v / total_duration for k, v in self._computation_durations.items()
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
# Calculate scores using a weighted sum
|
|
145
|
+
scores = {
|
|
146
|
+
k: self.access_weight * normalized_access_counts[k]
|
|
147
|
+
+ self.duration_weight * normalized_durations[k]
|
|
148
|
+
for k in self._access_counts
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
# Find the key with the lowest score
|
|
152
|
+
lowest_score_key = min(scores, key=lambda k: scores[k])
|
|
153
|
+
del self._cache[lowest_score_key]
|
|
154
|
+
del self._access_counts[lowest_score_key]
|
|
155
|
+
del self._computation_durations[lowest_score_key]
|
|
156
|
+
|
|
157
|
+
def __contains__(self, key: Hashable) -> bool:
|
|
158
|
+
"""Check if a key is present in the cache.
|
|
159
|
+
|
|
160
|
+
Parameters
|
|
161
|
+
----------
|
|
162
|
+
key
|
|
163
|
+
The key to check for in the cache.
|
|
164
|
+
|
|
165
|
+
Returns
|
|
166
|
+
-------
|
|
167
|
+
bool
|
|
168
|
+
True if the key is present in the cache, otherwise False.
|
|
169
|
+
"""
|
|
170
|
+
return key in self._cache
|
|
171
|
+
|
|
172
|
+
def __str__(self) -> str:
|
|
173
|
+
"""Return a string representation of the HybridCache.
|
|
174
|
+
|
|
175
|
+
The string representation includes information about the cache, access counts,
|
|
176
|
+
and computation durations for each key.
|
|
177
|
+
|
|
178
|
+
Returns
|
|
179
|
+
-------
|
|
180
|
+
str
|
|
181
|
+
A string representation of the HybridCache.
|
|
182
|
+
"""
|
|
183
|
+
cache_str = f"Cache: {self._cache}\n"
|
|
184
|
+
access_counts_str = f"Access Counts: {self._access_counts}\n"
|
|
185
|
+
computation_durations_str = (
|
|
186
|
+
f"Computation Durations: {self._computation_durations}\n"
|
|
187
|
+
)
|
|
188
|
+
return cache_str + access_counts_str + computation_durations_str
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
class LRUCache:
|
|
192
|
+
"""A shared memory LRU cache implementation.
|
|
193
|
+
|
|
194
|
+
Parameters
|
|
195
|
+
----------
|
|
196
|
+
max_size
|
|
197
|
+
Cache size of the LRU cache, by default 128.
|
|
198
|
+
with_cloudpickle
|
|
199
|
+
Use cloudpickle for storing the data in memory.
|
|
200
|
+
shared
|
|
201
|
+
Whether the cache should be shared between multiple processes.
|
|
202
|
+
"""
|
|
203
|
+
|
|
204
|
+
def __init__(
|
|
205
|
+
self,
|
|
206
|
+
*,
|
|
207
|
+
max_size: int = 128,
|
|
208
|
+
with_cloudpickle: bool = False,
|
|
209
|
+
shared: bool = True,
|
|
210
|
+
) -> None:
|
|
211
|
+
"""Initialize the cache."""
|
|
212
|
+
self.max_size = max_size
|
|
213
|
+
self._with_cloudpickle = with_cloudpickle
|
|
214
|
+
self.shared = shared
|
|
215
|
+
if max_size == 0: # pragma: no cover
|
|
216
|
+
msg = "max_size must be greater than 0"
|
|
217
|
+
raise ValueError(msg)
|
|
218
|
+
if shared:
|
|
219
|
+
manager = Manager()
|
|
220
|
+
self._cache_dict = manager.dict()
|
|
221
|
+
self._cache_queue = manager.list()
|
|
222
|
+
self._cache_lock = manager.Lock()
|
|
223
|
+
else:
|
|
224
|
+
self._cache_dict = {} # type: ignore[assignment]
|
|
225
|
+
self._cache_queue = [] # type: ignore[assignment]
|
|
226
|
+
self._cache_lock = nullcontext() # type: ignore[assignment]
|
|
227
|
+
|
|
228
|
+
def get(self, key: Hashable) -> tuple[bool, Any]:
|
|
229
|
+
"""Get a value from the cache by key."""
|
|
230
|
+
with self._cache_lock:
|
|
231
|
+
value = self._cache_dict.get(key)
|
|
232
|
+
if value is not None: # Move key to back of queue
|
|
233
|
+
self._cache_queue.remove(key)
|
|
234
|
+
self._cache_queue.append(key)
|
|
235
|
+
if value is not None:
|
|
236
|
+
if value == _NONE_RETURN_STR:
|
|
237
|
+
value = None
|
|
238
|
+
elif self._with_cloudpickle:
|
|
239
|
+
value = cloudpickle.loads(value)
|
|
240
|
+
return value
|
|
241
|
+
|
|
242
|
+
def put(self, key: Hashable, value: Any) -> ListProxy[Any]:
|
|
243
|
+
"""Insert a key value pair into the cache."""
|
|
244
|
+
if value is None:
|
|
245
|
+
value = _NONE_RETURN_STR
|
|
246
|
+
elif self._with_cloudpickle:
|
|
247
|
+
value = cloudpickle.dumps(value)
|
|
248
|
+
with self._cache_lock:
|
|
249
|
+
cache_size = len(self._cache_queue)
|
|
250
|
+
self._cache_dict[key] = value
|
|
251
|
+
if cache_size < self.max_size:
|
|
252
|
+
self._cache_queue.append(key)
|
|
253
|
+
else:
|
|
254
|
+
key_to_evict = self._cache_queue.pop(0)
|
|
255
|
+
self._cache_dict.pop(key_to_evict)
|
|
256
|
+
self._cache_queue.append(key)
|
|
257
|
+
return self._cache_queue
|
|
258
|
+
|
|
259
|
+
def __contains__(self, key: Hashable) -> bool:
|
|
260
|
+
"""Check if a key is present in the cache."""
|
|
261
|
+
return key in self._cache_dict
|
|
262
|
+
|
|
263
|
+
@property
|
|
264
|
+
def cache(self) -> dict:
|
|
265
|
+
"""Returns a copy of the cache."""
|
|
266
|
+
return dict(self._cache_dict.items())
|