powerfunc 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- powerfunc-0.3.0/.github/workflows/ci.yml +63 -0
- powerfunc-0.3.0/PKG-INFO +108 -0
- {powerfunc-0.2.0 → powerfunc-0.3.0}/README.md +2 -2
- {powerfunc-0.2.0 → powerfunc-0.3.0}/documentation/advanced_usage.md +7 -7
- powerfunc-0.3.0/documentation/integrations/snakemake.md +44 -0
- powerfunc-0.3.0/documentation/providers/gcp.md +171 -0
- {powerfunc-0.2.0 → powerfunc-0.3.0}/documentation/providers/modal.md +10 -7
- {powerfunc-0.2.0 → powerfunc-0.3.0}/documentation/readme.md +14 -6
- {powerfunc-0.2.0 → powerfunc-0.3.0}/pyproject.toml +16 -10
- {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/compute.py +15 -8
- {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/decorator.py +10 -0
- powerfunc-0.3.0/src/powerfunc/integrations/__init__.py +1 -0
- powerfunc-0.3.0/src/powerfunc/integrations/snakemake.py +118 -0
- powerfunc-0.3.0/src/powerfunc/providers/gcp_batch.py +436 -0
- powerfunc-0.3.0/src/powerfunc/providers/gcp_cloud_run.py +391 -0
- {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/providers/modal.py +17 -12
- powerfunc-0.3.0/tests/powerfunc_tests/core/__init__.py +0 -0
- {powerfunc-0.2.0/tests/powerfunc_tests → powerfunc-0.3.0/tests/powerfunc_tests/core}/test_basic.py +1 -1
- {powerfunc-0.2.0/tests/powerfunc_tests → powerfunc-0.3.0/tests/powerfunc_tests/core}/test_cli.py +1 -1
- {powerfunc-0.2.0/tests/powerfunc_tests → powerfunc-0.3.0/tests/powerfunc_tests/core}/test_conversions.py +1 -1
- {powerfunc-0.2.0/tests/powerfunc_tests → powerfunc-0.3.0/tests/powerfunc_tests/data}/generate_data_files.py +1 -1
- powerfunc-0.3.0/tests/powerfunc_tests/gcp/__init__.py +0 -0
- powerfunc-0.3.0/tests/powerfunc_tests/gcp/conftest.py +42 -0
- powerfunc-0.3.0/tests/powerfunc_tests/gcp/gcp_jobs.py +73 -0
- powerfunc-0.3.0/tests/powerfunc_tests/gcp/gcp_shared.py +27 -0
- powerfunc-0.3.0/tests/powerfunc_tests/gcp/test_gcp_batch.py +70 -0
- powerfunc-0.3.0/tests/powerfunc_tests/gcp/test_gcp_cloud_run.py +52 -0
- powerfunc-0.3.0/tests/powerfunc_tests/modal/__init__.py +0 -0
- powerfunc-0.3.0/tests/powerfunc_tests/modal/test_modal.py +44 -0
- powerfunc-0.3.0/tests/powerfunc_tests/snakemake/__init__.py +0 -0
- powerfunc-0.3.0/tests/powerfunc_tests/snakemake/functions.py +25 -0
- powerfunc-0.3.0/tests/powerfunc_tests/snakemake/input.smk +8 -0
- powerfunc-0.3.0/tests/powerfunc_tests/snakemake/input_params_and_output.smk +11 -0
- powerfunc-0.3.0/tests/powerfunc_tests/snakemake/multiple_outputs_rejected.smk +8 -0
- powerfunc-0.3.0/tests/powerfunc_tests/snakemake/named_outputs_rejected.smk +10 -0
- powerfunc-0.3.0/tests/powerfunc_tests/snakemake/params.smk +8 -0
- powerfunc-0.3.0/tests/powerfunc_tests/snakemake/positional_rejected.smk +9 -0
- powerfunc-0.3.0/tests/powerfunc_tests/snakemake/test_snakemake.py +95 -0
- powerfunc-0.3.0/tox.ini +43 -0
- {powerfunc-0.2.0 → powerfunc-0.3.0}/uv.lock +573 -931
- powerfunc-0.2.0/.claude/settings.local.json +0 -8
- powerfunc-0.2.0/PKG-INFO +0 -27
- powerfunc-0.2.0/documentation/providers/gcp.md +0 -121
- powerfunc-0.2.0/src/powerfunc/providers/gcp.py +0 -251
- {powerfunc-0.2.0 → powerfunc-0.3.0}/.gitignore +0 -0
- {powerfunc-0.2.0 → powerfunc-0.3.0}/.pre-commit-config.yaml +0 -0
- {powerfunc-0.2.0 → powerfunc-0.3.0}/LICENSE.txt +0 -0
- {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/__init__.py +0 -0
- {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/command_line.py +0 -0
- {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/configuration.py +0 -0
- {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/conversions.py +0 -0
- {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/formats/__init__.py +0 -0
- {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/formats/arrow.py +0 -0
- {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/formats/csv_reader.py +0 -0
- {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/formats/dask.py +0 -0
- {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/formats/pandas.py +0 -0
- {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/formats/polars.py +0 -0
- {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/providers/__init__.py +0 -0
- {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/py.typed +0 -0
- {powerfunc-0.2.0 → powerfunc-0.3.0}/tests/powerfunc_tests/__init__.py +0 -0
- {powerfunc-0.2.0/tests/powerfunc_tests → powerfunc-0.3.0/tests/powerfunc_tests/core}/cli_example.py +0 -0
- {powerfunc-0.2.0/tests/powerfunc_tests → powerfunc-0.3.0/tests/powerfunc_tests/core}/test_remote.py +0 -0
- {powerfunc-0.2.0 → powerfunc-0.3.0}/tests/powerfunc_tests/data/data.arrow +0 -0
- {powerfunc-0.2.0 → powerfunc-0.3.0}/tests/powerfunc_tests/data/data.csv +0 -0
- {powerfunc-0.2.0 → powerfunc-0.3.0}/tests/powerfunc_tests/data/data.feather +0 -0
- {powerfunc-0.2.0 → powerfunc-0.3.0}/tests/powerfunc_tests/data/data.pandas.json +0 -0
- {powerfunc-0.2.0 → powerfunc-0.3.0}/tests/powerfunc_tests/data/data.parquet +0 -0
- {powerfunc-0.2.0 → powerfunc-0.3.0}/tests/powerfunc_tests/data/data.polars.json +0 -0
- {powerfunc-0.2.0 → powerfunc-0.3.0}/tests/powerfunc_tests/data/data.xlsx +0 -0
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
pull_request:
|
|
6
|
+
|
|
7
|
+
# One run per branch. Keying on the head branch (not github.ref) makes the
|
|
8
|
+
# push and pull_request events for the same branch share a group, so we get a
|
|
9
|
+
# single run per commit instead of two, and a new push cancels an in-progress
|
|
10
|
+
# run for the same branch so live remote tests can't pile up.
|
|
11
|
+
concurrency:
|
|
12
|
+
group: ci-${{ github.event.pull_request.head.ref || github.ref_name }}
|
|
13
|
+
cancel-in-progress: true
|
|
14
|
+
|
|
15
|
+
jobs:
|
|
16
|
+
lint:
|
|
17
|
+
runs-on: ubuntu-latest
|
|
18
|
+
timeout-minutes: 5
|
|
19
|
+
steps:
|
|
20
|
+
- uses: actions/checkout@v4
|
|
21
|
+
- uses: astral-sh/setup-uv@v6
|
|
22
|
+
with:
|
|
23
|
+
enable-cache: true
|
|
24
|
+
- run: uvx pre-commit run --all-files --show-diff-on-failure
|
|
25
|
+
|
|
26
|
+
test-core:
|
|
27
|
+
runs-on: ubuntu-latest
|
|
28
|
+
timeout-minutes: 10
|
|
29
|
+
steps:
|
|
30
|
+
- uses: actions/checkout@v4
|
|
31
|
+
- uses: astral-sh/setup-uv@v6
|
|
32
|
+
with:
|
|
33
|
+
enable-cache: true
|
|
34
|
+
- run: uvx --with tox-uv tox run -e core
|
|
35
|
+
|
|
36
|
+
test-gcp:
|
|
37
|
+
runs-on: ubuntu-latest
|
|
38
|
+
timeout-minutes: 15
|
|
39
|
+
steps:
|
|
40
|
+
- uses: actions/checkout@v4
|
|
41
|
+
- uses: astral-sh/setup-uv@v6
|
|
42
|
+
with:
|
|
43
|
+
enable-cache: true
|
|
44
|
+
- name: Run GCP tests
|
|
45
|
+
env:
|
|
46
|
+
GOOGLE_CLOUD_API_KEY: ${{ secrets.GOOGLE_CLOUD_API_KEY }}
|
|
47
|
+
GCP_BUCKET: gs://powerfunc-temporary-348293
|
|
48
|
+
GCP_REGION: europe-west4
|
|
49
|
+
run: uvx --with tox-uv tox run -e gcp
|
|
50
|
+
|
|
51
|
+
test-modal:
|
|
52
|
+
runs-on: ubuntu-latest
|
|
53
|
+
timeout-minutes: 10
|
|
54
|
+
steps:
|
|
55
|
+
- uses: actions/checkout@v4
|
|
56
|
+
- uses: astral-sh/setup-uv@v6
|
|
57
|
+
with:
|
|
58
|
+
enable-cache: true
|
|
59
|
+
- name: Run Modal tests
|
|
60
|
+
env:
|
|
61
|
+
MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
|
|
62
|
+
MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
|
|
63
|
+
run: uvx --with tox-uv tox run -e modal
|
powerfunc-0.3.0/PKG-INFO
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: powerfunc
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Make functions run however you like — CLI, pipeline, or remote compute
|
|
5
|
+
Project-URL: Homepage, https://github.com/ddrakard/powerfunc
|
|
6
|
+
Project-URL: Repository, https://github.com/ddrakard/powerfunc
|
|
7
|
+
Project-URL: Documentation, https://github.com/ddrakard/powerfunc/blob/main/documentation/readme.md
|
|
8
|
+
License-File: LICENSE.txt
|
|
9
|
+
Requires-Python: >=3.9
|
|
10
|
+
Requires-Dist: cloudpathlib[all]
|
|
11
|
+
Requires-Dist: cloudpickle
|
|
12
|
+
Requires-Dist: fsspec[adl,gcs,s3]
|
|
13
|
+
Requires-Dist: jsonargparse>=4.49.0
|
|
14
|
+
Requires-Dist: pydantic>=2
|
|
15
|
+
Provides-Extra: dev
|
|
16
|
+
Requires-Dist: pre-commit; extra == 'dev'
|
|
17
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
18
|
+
Requires-Dist: tox; extra == 'dev'
|
|
19
|
+
Requires-Dist: tox-uv; extra == 'dev'
|
|
20
|
+
Provides-Extra: gcp
|
|
21
|
+
Requires-Dist: google-auth; extra == 'gcp'
|
|
22
|
+
Requires-Dist: google-cloud-batch; extra == 'gcp'
|
|
23
|
+
Requires-Dist: google-cloud-compute; extra == 'gcp'
|
|
24
|
+
Requires-Dist: google-cloud-run; extra == 'gcp'
|
|
25
|
+
Provides-Extra: modal
|
|
26
|
+
Requires-Dist: modal; extra == 'modal'
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# powerfunc
|
|
30
|
+
|
|
31
|
+
Add superpowers to your functions. Run them from the CLI, on cloud data, or on cloud compute.
|
|
32
|
+
|
|
33
|
+
## Features
|
|
34
|
+
|
|
35
|
+
- Load and save function arguments and results to file paths, local or in cloud storage.
|
|
36
|
+
- Invoke functions on the command line.
|
|
37
|
+
- Remote execution on cloud compute.
|
|
38
|
+
- YAML configuration file support.
|
|
39
|
+
- Supports [pandas](https://pandas.pydata.org/), [polars](https://pola.rs/), [PyArrow](https://pypi.org/project/pyarrow/), [Dask](https://www.dask.org/), and the Python [`csv` module](https://docs.python.org/3/library/csv.html) out of the box
|
|
40
|
+
- Supports cloud providers [Google Cloud](https://cloud.google.com) and [Modal](https://modal.com/).
|
|
41
|
+
- Extensible for new data types and cloud providers.
|
|
42
|
+
|
|
43
|
+
## Installation
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
pip install powerfunc
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
With [Google Cloud](https://cloud.google.com) remote execution:
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
pip install 'powerfunc[gcp]'
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
With [Modal](https://modal.com/) remote execution:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pip install 'powerfunc[modal]'
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Example usage
|
|
62
|
+
|
|
63
|
+
Decorate a normal function with `@powerfunc`:
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
import pandas as pd
|
|
67
|
+
from powerfunc import powerfunc
|
|
68
|
+
|
|
69
|
+
@powerfunc
|
|
70
|
+
def sum_col(df: pd.DataFrame) -> float:
|
|
71
|
+
return float(df["value"].sum())
|
|
72
|
+
|
|
73
|
+
df = pd.DataFrame({"value": [1, 2, 3]})
|
|
74
|
+
print(sum_col(df))
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Then use it on files, local or in the cloud:
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
print(sum_col("data.csv"))
|
|
81
|
+
print(sum_col("https://storage.googleapis.com/powerfunc/data.csv"))
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Add one line at the end of your file to make it runnable on the command line:
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
from powerfunc import powerfunc
|
|
88
|
+
|
|
89
|
+
...
|
|
90
|
+
|
|
91
|
+
powerfunc.enable_cli()
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
```sh
|
|
95
|
+
python myfile.py gs://powerfunc/data.csv
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Support
|
|
99
|
+
|
|
100
|
+
This project is in early development. Breaking changes may happen at any time.
|
|
101
|
+
|
|
102
|
+
## Documentation
|
|
103
|
+
|
|
104
|
+
For complete information on using powerfunc, including executing on cloud compute, please see the [documentation](documentation/readme.md).
|
|
105
|
+
|
|
106
|
+
## License
|
|
107
|
+
|
|
108
|
+
Released under the [MIT](LICENSE.txt) license.
|
|
@@ -21,13 +21,13 @@ pip install powerfunc
|
|
|
21
21
|
With [Google Cloud](https://cloud.google.com) remote execution:
|
|
22
22
|
|
|
23
23
|
```bash
|
|
24
|
-
pip install powerfunc[gcp]
|
|
24
|
+
pip install 'powerfunc[gcp]'
|
|
25
25
|
```
|
|
26
26
|
|
|
27
27
|
With [Modal](https://modal.com/) remote execution:
|
|
28
28
|
|
|
29
29
|
```bash
|
|
30
|
-
pip install powerfunc[modal]
|
|
30
|
+
pip install 'powerfunc[modal]'
|
|
31
31
|
```
|
|
32
32
|
|
|
33
33
|
## Example usage
|
|
@@ -12,10 +12,10 @@ Subclass `ComputeSpecification` to define reusable compute configurations:
|
|
|
12
12
|
```python
|
|
13
13
|
from dataclasses import field
|
|
14
14
|
from pydantic.dataclasses import dataclass
|
|
15
|
-
from powerfunc.compute import ComputeSpecification, Provider
|
|
16
|
-
from powerfunc.providers.
|
|
15
|
+
from powerfunc.compute import ComputeSpecification, CpuCount, DockerImageUri, GpuModel, MemorySize, Provider
|
|
16
|
+
from powerfunc.providers.gcp_cloud_run import GCPCloudRunProvider
|
|
17
17
|
|
|
18
|
-
provider =
|
|
18
|
+
provider = GCPCloudRunProvider(
|
|
19
19
|
project="my-project",
|
|
20
20
|
region="us-central1",
|
|
21
21
|
temporary_bucket_path="gs://my-bucket/tmp",
|
|
@@ -23,10 +23,10 @@ provider = GCPProvider(
|
|
|
23
23
|
|
|
24
24
|
@dataclass
|
|
25
25
|
class MyGpuSpec(ComputeSpecification):
|
|
26
|
-
cpu:
|
|
27
|
-
memory:
|
|
28
|
-
image:
|
|
29
|
-
gpu:
|
|
26
|
+
cpu: CpuCount = 8.0
|
|
27
|
+
memory: MemorySize = 32768
|
|
28
|
+
image: DockerImageUri = "nvidia/cuda:12.1.0-base-ubuntu22.04"
|
|
29
|
+
gpu: GpuModel = "a100"
|
|
30
30
|
provider: Provider = field(default_factory=lambda: provider)
|
|
31
31
|
|
|
32
32
|
MY_GPU = MyGpuSpec()
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# Snakemake
|
|
2
|
+
|
|
3
|
+
powerfunc functions can be called directly from a [Snakemake](https://snakemake.readthedocs.io/)
|
|
4
|
+
rule. Call `.snakemake()` inside a `run:` block and the rule's inputs, params and outputs are
|
|
5
|
+
bound automatically.
|
|
6
|
+
|
|
7
|
+
## Usage
|
|
8
|
+
|
|
9
|
+
```python
|
|
10
|
+
# functions.py
|
|
11
|
+
import pandas as pd
|
|
12
|
+
from powerfunc import powerfunc
|
|
13
|
+
|
|
14
|
+
@powerfunc
|
|
15
|
+
def sum_col(df: pd.DataFrame) -> pd.DataFrame:
|
|
16
|
+
return df.sum().to_frame().T
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
# Snakefile
|
|
21
|
+
from functions import sum_col
|
|
22
|
+
|
|
23
|
+
rule sum:
|
|
24
|
+
input: "data.csv"
|
|
25
|
+
output: "result.csv"
|
|
26
|
+
run:
|
|
27
|
+
sum_col.snakemake()
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Binding rules
|
|
31
|
+
|
|
32
|
+
Inputs (`input`) and params (`params`) are bound to the function's arguments:
|
|
33
|
+
|
|
34
|
+
- When **both** inputs and params are present, they must all be **named** (keyword).
|
|
35
|
+
Positional entries are only allowed when one of the two is absent.
|
|
36
|
+
- Inputs are passed as paths and are read through powerfunc's
|
|
37
|
+
[converters](../readme.md#automatic-reading-and-writing-data), so a `pd.DataFrame`
|
|
38
|
+
parameter receives the loaded file. Params are passed through unchanged.
|
|
39
|
+
|
|
40
|
+
Outputs (`output`) are bound as follows:
|
|
41
|
+
|
|
42
|
+
- A single **unnamed** output receives the function's **return value**, written through
|
|
43
|
+
powerfunc's converters.
|
|
44
|
+
- Named outputs and multiple outputs are not supported.
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# Google Cloud Platform
|
|
2
|
+
|
|
3
|
+
powerfunc supports two GCP execution backends: [Cloud Run Jobs](https://cloud.google.com/run/docs/create-jobs) and [Batch](https://cloud.google.com/batch/docs). Both use the same authentication, bucket-based data transfer, and `ComputeSpecification` model.
|
|
4
|
+
|
|
5
|
+
## Contents
|
|
6
|
+
|
|
7
|
+
- [Installation](#installation)
|
|
8
|
+
- [Setup and configuration](#setup-and-configuration)
|
|
9
|
+
- [Cloud Run](#cloud-run)
|
|
10
|
+
- [Batch](#batch)
|
|
11
|
+
- [Environment variables](#environment-variables)
|
|
12
|
+
- [Predefined configurations](#predefined-configurations)
|
|
13
|
+
- [Supported GPUs](#supported-gpus)
|
|
14
|
+
- [Limitations](#limitations)
|
|
15
|
+
|
|
16
|
+
## Installation
|
|
17
|
+
|
|
18
|
+
Make sure to install with the `gcp` option.
|
|
19
|
+
|
|
20
|
+
```sh
|
|
21
|
+
pip install 'powerfunc[gcp]'
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
or
|
|
25
|
+
|
|
26
|
+
```sh
|
|
27
|
+
uv add 'powerfunc[gcp]'
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Setup
|
|
31
|
+
|
|
32
|
+
You need to have a Google Cloud account with a project inside it where the remote execution can happen. You must authenticate with the [Google Cloud CLI](https://cloud.google.com/sdk/docs/install) using:
|
|
33
|
+
|
|
34
|
+
```sh
|
|
35
|
+
gcloud auth application-default login
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
powerfunc transfers data in and out of the compute job using a [Google Cloud Storage bucket](https://docs.cloud.google.com/storage/docs/buckets). Therefore, you need to have or create a bucket that powerfunc can use, and provide these details.
|
|
39
|
+
|
|
40
|
+
### Command line usage
|
|
41
|
+
|
|
42
|
+
When runnng from the command line, [module-style invocation](https://docs.python.org/3/using/cmdline.html#cmdoption-m) `python -m my.module` must be used instead of direct script execution `python myscript.py`.
|
|
43
|
+
|
|
44
|
+
## Cloud Run
|
|
45
|
+
|
|
46
|
+
Cloud Run Jobs is a serverless execution backend. It scales to zero, has fast cold starts for cached images, and supports GPUs.
|
|
47
|
+
|
|
48
|
+
Configure in `powerfunc.yaml`:
|
|
49
|
+
|
|
50
|
+
```yaml
|
|
51
|
+
compute:
|
|
52
|
+
class_path: powerfunc.providers.gcp_cloud_run.GcpCloudRunCpuSmall
|
|
53
|
+
init_args:
|
|
54
|
+
timeout: 600
|
|
55
|
+
provider:
|
|
56
|
+
class_path: powerfunc.providers.gcp_cloud_run.GCPCloudRunProvider
|
|
57
|
+
init_args:
|
|
58
|
+
project: my-gcp-project
|
|
59
|
+
region: us-central1
|
|
60
|
+
temporary_bucket_path: gs://my-bucket/powerfunc-temp
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Or directly in Python:
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from powerfunc.providers.gcp_cloud_run import GCPCloudRunProvider, GcpCloudRunCpuSmall
|
|
67
|
+
|
|
68
|
+
provider = GCPCloudRunProvider(
|
|
69
|
+
project="my-gcp-project",
|
|
70
|
+
region="us-central1",
|
|
71
|
+
temporary_bucket_path="gs://my-bucket/",
|
|
72
|
+
)
|
|
73
|
+
compute = GcpCloudRunCpuSmall(timeout=600, provider=provider)
|
|
74
|
+
|
|
75
|
+
result = sum_col("gs://bucket/data.csv", compute=compute)
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Cloud Run Jobs can require some time to start, particularly for a docker image that has not been used recently.
|
|
79
|
+
|
|
80
|
+
## Batch
|
|
81
|
+
|
|
82
|
+
Batch provisions Compute Engine VMs. It supports GPUs, higher resource limits than Cloud Run, spot (preemptible) VMs, and explicit machine type selection.
|
|
83
|
+
|
|
84
|
+
Configure in `powerfunc.yaml`:
|
|
85
|
+
|
|
86
|
+
```yaml
|
|
87
|
+
compute:
|
|
88
|
+
class_path: powerfunc.providers.gcp_batch.GcpBatchCpuSmall
|
|
89
|
+
init_args:
|
|
90
|
+
timeout: 600
|
|
91
|
+
provider:
|
|
92
|
+
class_path: powerfunc.providers.gcp_batch.GCPBatchProvider
|
|
93
|
+
init_args:
|
|
94
|
+
project: my-gcp-project
|
|
95
|
+
region: us-central1
|
|
96
|
+
temporary_bucket_path: gs://my-bucket/powerfunc-temp
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Or directly in Python:
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
from powerfunc.providers.gcp_batch import GCPBatchProvider, GcpBatchCpuSmall
|
|
103
|
+
|
|
104
|
+
provider = GCPBatchProvider(
|
|
105
|
+
project="my-gcp-project",
|
|
106
|
+
region="us-central1",
|
|
107
|
+
temporary_bucket_path="gs://my-bucket/",
|
|
108
|
+
spot=True, # optional: use spot (preemptible) VMs
|
|
109
|
+
machine_type="n1-standard-4", # optional: Batch auto-selects if omitted
|
|
110
|
+
)
|
|
111
|
+
compute = GcpBatchCpuSmall(timeout=600, provider=provider)
|
|
112
|
+
|
|
113
|
+
result = sum_col("gs://bucket/data.csv", compute=compute)
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
`machine_type` is optional — Batch selects a machine from the requested CPU/memory when it
|
|
117
|
+
is not given. For GPUs, a compatible `machine_type` is usually required.
|
|
118
|
+
|
|
119
|
+
## Environment variables
|
|
120
|
+
|
|
121
|
+
Both Cloud Run and Batch support passing environment variables to the remote container via the `environment_variables` provider option:
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
provider = GCPCloudRunProvider(
|
|
125
|
+
project="my-project",
|
|
126
|
+
region="us-central1",
|
|
127
|
+
temporary_bucket_path="gs://my-bucket/tmp",
|
|
128
|
+
environment_variables={"MY_API_KEY": "secret123"},
|
|
129
|
+
)
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
Or in `powerfunc.yaml`:
|
|
133
|
+
|
|
134
|
+
```yaml
|
|
135
|
+
provider:
|
|
136
|
+
init_args:
|
|
137
|
+
environment_variables:
|
|
138
|
+
MY_API_KEY: secret123
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Supported GPUs
|
|
142
|
+
|
|
143
|
+
Both Cloud Run and Batch support: `t4`, `a100`, `l4`, `v100`.
|
|
144
|
+
|
|
145
|
+
## Predefined configurations
|
|
146
|
+
|
|
147
|
+
| Class | Provider | CPU | Memory | Image | GPU |
|
|
148
|
+
|---|---|---|---|---|---|
|
|
149
|
+
| `GcpCloudRunCpuSmall` | Cloud Run | 1 vCPU | 2GB | `python:3.12-slim` | — |
|
|
150
|
+
| `GcpCloudRunGpu` | Cloud Run | 4 vCPU | 16GB | `nvidia/cuda:12.1.0-base-ubuntu22.04` | L4 |
|
|
151
|
+
| `GcpBatchCpuSmall` | Batch | 1 vCPU | 2GB | `python:3.12-slim` | — |
|
|
152
|
+
| `GcpBatchGpu` | Batch | 4 vCPU | 16GB | `nvidia/cuda:12.1.0-base-ubuntu22.04` | L4 |
|
|
153
|
+
|
|
154
|
+
All presets require a `timeout` argument (in seconds), e.g. `GcpCloudRunCpuSmall(timeout=600)` or `GcpBatchCpuSmall(timeout=600)`.
|
|
155
|
+
|
|
156
|
+
## Known limitations
|
|
157
|
+
|
|
158
|
+
### Codebase synchronisation
|
|
159
|
+
|
|
160
|
+
A simple codebase synchronisation method is used. It is not robust to work with all codebases and scenarios. For complex setups, understanding of the underlying infrastructure technologies (such as Python packaging and [Docker](https://www.docker.com/)) will likely be needed. To enable rapid execution for complex codebases, additional setup in needed, such as preparing docker images.
|
|
161
|
+
|
|
162
|
+
### Anonymous usage
|
|
163
|
+
|
|
164
|
+
For public GCS buckets without credentials, configure fsspec before importing powerfunc:
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
import fsspec
|
|
168
|
+
fsspec.config.conf["gs"] = {"token": "anon"}
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
**Parquet files on `gs://` hang without credentials** for pandas and polars — their parquet readers use pyarrow's C++ GCS filesystem which has no anonymous mode and no timeout. Use `gcloud auth application-default login` to resolve this, or access parquet via HTTPS instead.
|
|
@@ -14,13 +14,13 @@ This page describes how to execute powerfunc functions remotely on [Modal](https
|
|
|
14
14
|
Make sure to install with the `modal` option.
|
|
15
15
|
|
|
16
16
|
```sh
|
|
17
|
-
pip install powerfunc[modal]
|
|
17
|
+
pip install 'powerfunc[modal]'
|
|
18
18
|
```
|
|
19
19
|
|
|
20
20
|
or
|
|
21
21
|
|
|
22
22
|
```sh
|
|
23
|
-
uv add powerfunc[modal]
|
|
23
|
+
uv add 'powerfunc[modal]'
|
|
24
24
|
```
|
|
25
25
|
|
|
26
26
|
You must have a Modal account. Then authenticate to Modal:
|
|
@@ -33,13 +33,13 @@ python -m modal setup
|
|
|
33
33
|
|
|
34
34
|
```python
|
|
35
35
|
from powerfunc import powerfunc
|
|
36
|
-
from powerfunc.providers.modal import
|
|
36
|
+
from powerfunc.providers.modal import ModalCpuSmall
|
|
37
37
|
|
|
38
38
|
@powerfunc
|
|
39
39
|
def sum_col(df: pd.DataFrame) -> float:
|
|
40
40
|
return float(df["value"].sum())
|
|
41
41
|
|
|
42
|
-
result = sum_col("data.csv", compute=
|
|
42
|
+
result = sum_col("data.csv", compute=ModalCpuSmall(timeout=600))
|
|
43
43
|
```
|
|
44
44
|
|
|
45
45
|
Or set a default in `powerfunc.yaml` so all calls run on Modal without passing `compute=`:
|
|
@@ -47,6 +47,8 @@ Or set a default in `powerfunc.yaml` so all calls run on Modal without passing `
|
|
|
47
47
|
```yaml
|
|
48
48
|
compute:
|
|
49
49
|
class_path: powerfunc.providers.modal.ModalCpuSmall
|
|
50
|
+
init_args:
|
|
51
|
+
timeout: 600
|
|
50
52
|
```
|
|
51
53
|
|
|
52
54
|
## Dependencies
|
|
@@ -56,7 +58,7 @@ By default the container uses Modal's `debian_slim` base image. If your function
|
|
|
56
58
|
```python
|
|
57
59
|
from powerfunc.providers.modal import ModalProvider, ModalCpuSmall
|
|
58
60
|
|
|
59
|
-
compute = ModalCpuSmall(provider=ModalProvider(pip_packages=("pandas", "pyarrow")))
|
|
61
|
+
compute = ModalCpuSmall(timeout=600, provider=ModalProvider(pip_packages=("pandas", "pyarrow")))
|
|
60
62
|
result = sum_col("data.csv", compute=compute)
|
|
61
63
|
```
|
|
62
64
|
|
|
@@ -65,7 +67,7 @@ To use a different base image, set `image` on the compute specification:
|
|
|
65
67
|
```python
|
|
66
68
|
from powerfunc.providers.modal import ModalCpuSmall
|
|
67
69
|
|
|
68
|
-
compute = ModalCpuSmall(image="python:3.12-slim")
|
|
70
|
+
compute = ModalCpuSmall(timeout=600, image="python:3.12-slim")
|
|
69
71
|
result = sum_col("data.csv", compute=compute)
|
|
70
72
|
```
|
|
71
73
|
|
|
@@ -75,6 +77,7 @@ Or in `powerfunc.yaml`:
|
|
|
75
77
|
compute:
|
|
76
78
|
class_path: powerfunc.providers.modal.ModalCpuSmall
|
|
77
79
|
init_args:
|
|
80
|
+
timeout: 600
|
|
78
81
|
image: "python:3.12-slim"
|
|
79
82
|
provider:
|
|
80
83
|
class_path: powerfunc.providers.modal.ModalProvider
|
|
@@ -89,4 +92,4 @@ compute:
|
|
|
89
92
|
| `ModalCpuSmall` | 1 vCPU | 1GB | — |
|
|
90
93
|
| `ModalGpuA100` | 8 vCPU | 80GB | A100 |
|
|
91
94
|
|
|
92
|
-
|
|
95
|
+
All presets require a `timeout` argument (in seconds), e.g. `ModalCpuSmall(timeout=600)`.
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
- [Cloud providers and remote execution](#cloud-providers-and-remote-execution)
|
|
11
11
|
- [Configuration](#configuration)
|
|
12
12
|
- [Supported formats](#supported-formats)
|
|
13
|
+
- [Integrations](#integrations)
|
|
13
14
|
- [Known limitations](#known-limitations)
|
|
14
15
|
- [Advanced usage](#advanced-usage)
|
|
15
16
|
|
|
@@ -24,13 +25,13 @@ pip install powerfunc
|
|
|
24
25
|
For GCP remote execution:
|
|
25
26
|
|
|
26
27
|
```sh
|
|
27
|
-
pip install powerfunc[gcp]
|
|
28
|
+
pip install 'powerfunc[gcp]'
|
|
28
29
|
```
|
|
29
30
|
|
|
30
31
|
For Modal remote execution:
|
|
31
32
|
|
|
32
33
|
```sh
|
|
33
|
-
pip install powerfunc[modal]
|
|
34
|
+
pip install 'powerfunc[modal]'
|
|
34
35
|
```
|
|
35
36
|
|
|
36
37
|
Or using [uv](https://github.com/astral-sh/uv):
|
|
@@ -40,11 +41,11 @@ uv add powerfunc
|
|
|
40
41
|
```
|
|
41
42
|
|
|
42
43
|
```sh
|
|
43
|
-
uv add powerfunc[gcp]
|
|
44
|
+
uv add 'powerfunc[gcp]'
|
|
44
45
|
```
|
|
45
46
|
|
|
46
47
|
```sh
|
|
47
|
-
uv add powerfunc[modal]
|
|
48
|
+
uv add 'powerfunc[modal]'
|
|
48
49
|
```
|
|
49
50
|
|
|
50
51
|
## Basic usage
|
|
@@ -151,10 +152,11 @@ For example:
|
|
|
151
152
|
|
|
152
153
|
```yaml
|
|
153
154
|
compute:
|
|
154
|
-
class_path: powerfunc.providers.
|
|
155
|
+
class_path: powerfunc.providers.gcp_cloud_run.GcpCloudRunCpuSmall
|
|
155
156
|
init_args:
|
|
157
|
+
timeout: 600
|
|
156
158
|
provider:
|
|
157
|
-
class_path: powerfunc.providers.
|
|
159
|
+
class_path: powerfunc.providers.gcp_cloud_run.GCPCloudRunProvider
|
|
158
160
|
init_args:
|
|
159
161
|
project: my-gcp-project
|
|
160
162
|
region: us-central1
|
|
@@ -192,6 +194,12 @@ python my_script.py sum_col data.csv --config my_config.yaml
|
|
|
192
194
|
| `dask.dataframe.DataFrame` | `.csv`, `.parquet` |
|
|
193
195
|
| `csv.reader` | `.csv` |
|
|
194
196
|
|
|
197
|
+
## Integrations
|
|
198
|
+
|
|
199
|
+
powerfunc functions can be driven by external workflow tools:
|
|
200
|
+
|
|
201
|
+
- [Snakemake](integrations/snakemake.md) — bind a rule's inputs, params and outputs with `function.snakemake()`.
|
|
202
|
+
|
|
195
203
|
## Known limitations
|
|
196
204
|
|
|
197
205
|
### Anonymous `gs://` access
|
|
@@ -4,12 +4,14 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "powerfunc"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.0"
|
|
8
8
|
description = "Make functions run however you like — CLI, pipeline, or remote compute"
|
|
9
|
+
readme = "README.md"
|
|
9
10
|
requires-python = ">=3.9"
|
|
10
11
|
dependencies = [
|
|
11
12
|
"pydantic>=2",
|
|
12
13
|
"cloudpathlib[all]",
|
|
14
|
+
"fsspec[gcs,s3,adl]",
|
|
13
15
|
"jsonargparse>=4.49.0",
|
|
14
16
|
"cloudpickle",
|
|
15
17
|
]
|
|
@@ -17,25 +19,25 @@ dependencies = [
|
|
|
17
19
|
[project.optional-dependencies]
|
|
18
20
|
gcp = [
|
|
19
21
|
"google-cloud-run",
|
|
22
|
+
"google-cloud-batch",
|
|
23
|
+
"google-cloud-compute",
|
|
20
24
|
"google-auth",
|
|
21
25
|
]
|
|
22
26
|
modal = [
|
|
23
27
|
"modal",
|
|
24
28
|
]
|
|
25
29
|
dev = [
|
|
26
|
-
"
|
|
27
|
-
"
|
|
28
|
-
"polars",
|
|
29
|
-
"pyarrow",
|
|
30
|
-
"openpyxl",
|
|
31
|
-
"xlsxwriter",
|
|
32
|
-
"fastexcel",
|
|
33
|
-
"dask[dataframe]",
|
|
34
|
-
"gcsfs",
|
|
30
|
+
"tox",
|
|
31
|
+
"tox-uv",
|
|
35
32
|
"ruff",
|
|
36
33
|
"pre-commit",
|
|
37
34
|
]
|
|
38
35
|
|
|
36
|
+
[project.urls]
|
|
37
|
+
Homepage = "https://github.com/ddrakard/powerfunc"
|
|
38
|
+
Repository = "https://github.com/ddrakard/powerfunc"
|
|
39
|
+
Documentation = "https://github.com/ddrakard/powerfunc/blob/main/documentation/readme.md"
|
|
40
|
+
|
|
39
41
|
[tool.ruff]
|
|
40
42
|
target-version = "py39"
|
|
41
43
|
line-length = 100
|
|
@@ -43,6 +45,10 @@ line-length = 100
|
|
|
43
45
|
[tool.ruff.lint]
|
|
44
46
|
select = ["E", "F", "W", "I", "B", "C4", "UP", "RUF"]
|
|
45
47
|
|
|
48
|
+
[tool.pytest.ini_options]
|
|
49
|
+
pythonpath = ["tests"]
|
|
50
|
+
addopts = "--import-mode=importlib"
|
|
51
|
+
|
|
46
52
|
[dependency-groups]
|
|
47
53
|
dev = [
|
|
48
54
|
"cloudpickle>=3.1.2",
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import dataclasses
|
|
2
|
-
from typing import Annotated, Any, Callable, Optional
|
|
2
|
+
from typing import Annotated, Any, Callable, Optional, TypeAlias
|
|
3
3
|
|
|
4
4
|
from pydantic import Field, GetCoreSchemaHandler
|
|
5
5
|
from pydantic.dataclasses import dataclass as pydantic_dataclass
|
|
@@ -7,6 +7,12 @@ from pydantic_core import core_schema
|
|
|
7
7
|
|
|
8
8
|
from powerfunc.command_line import ExpectedException
|
|
9
9
|
|
|
10
|
+
Timeout: TypeAlias = Annotated[float, Field(gt=0, description="Maximum job duration in seconds")]
|
|
11
|
+
CpuCount: TypeAlias = Annotated[float, Field(gt=0, description="Number of vCPUs")]
|
|
12
|
+
MemorySize: TypeAlias = Annotated[int, Field(gt=0, description="RAM in MB")]
|
|
13
|
+
DockerImageUri: TypeAlias = Annotated[str, Field(description="Container image URI")]
|
|
14
|
+
GpuModel: TypeAlias = Annotated[Optional[str], Field(description="GPU model name")]
|
|
15
|
+
|
|
10
16
|
|
|
11
17
|
class Provider:
|
|
12
18
|
"""Base compute provider. Subclass and implement call()."""
|
|
@@ -34,20 +40,21 @@ class UndefinedProvider(Provider):
|
|
|
34
40
|
class ComputeSpecification:
|
|
35
41
|
"""Specifies compute resources for remote execution."""
|
|
36
42
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
43
|
+
timeout: Timeout
|
|
44
|
+
cpu: CpuCount
|
|
45
|
+
memory: MemorySize
|
|
46
|
+
image: DockerImageUri = ""
|
|
40
47
|
provider: Provider = dataclasses.field(default_factory=UndefinedProvider)
|
|
41
|
-
gpu:
|
|
48
|
+
gpu: GpuModel = None
|
|
42
49
|
|
|
43
50
|
|
|
44
51
|
@pydantic_dataclass
|
|
45
52
|
class CpuSmall(ComputeSpecification):
|
|
46
53
|
"""1 vCPU, 2GB RAM, Python 3.12 slim."""
|
|
47
54
|
|
|
48
|
-
cpu:
|
|
49
|
-
memory:
|
|
50
|
-
image:
|
|
55
|
+
cpu: CpuCount = 1.0
|
|
56
|
+
memory: MemorySize = 2048
|
|
57
|
+
image: DockerImageUri = "python:3.12-slim"
|
|
51
58
|
|
|
52
59
|
|
|
53
60
|
user_identifier: Optional[str] = None
|