powerfunc 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. powerfunc-0.3.0/.github/workflows/ci.yml +63 -0
  2. powerfunc-0.3.0/PKG-INFO +108 -0
  3. {powerfunc-0.2.0 → powerfunc-0.3.0}/README.md +2 -2
  4. {powerfunc-0.2.0 → powerfunc-0.3.0}/documentation/advanced_usage.md +7 -7
  5. powerfunc-0.3.0/documentation/integrations/snakemake.md +44 -0
  6. powerfunc-0.3.0/documentation/providers/gcp.md +171 -0
  7. {powerfunc-0.2.0 → powerfunc-0.3.0}/documentation/providers/modal.md +10 -7
  8. {powerfunc-0.2.0 → powerfunc-0.3.0}/documentation/readme.md +14 -6
  9. {powerfunc-0.2.0 → powerfunc-0.3.0}/pyproject.toml +16 -10
  10. {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/compute.py +15 -8
  11. {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/decorator.py +10 -0
  12. powerfunc-0.3.0/src/powerfunc/integrations/__init__.py +1 -0
  13. powerfunc-0.3.0/src/powerfunc/integrations/snakemake.py +118 -0
  14. powerfunc-0.3.0/src/powerfunc/providers/gcp_batch.py +436 -0
  15. powerfunc-0.3.0/src/powerfunc/providers/gcp_cloud_run.py +391 -0
  16. {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/providers/modal.py +17 -12
  17. powerfunc-0.3.0/tests/powerfunc_tests/core/__init__.py +0 -0
  18. {powerfunc-0.2.0/tests/powerfunc_tests → powerfunc-0.3.0/tests/powerfunc_tests/core}/test_basic.py +1 -1
  19. {powerfunc-0.2.0/tests/powerfunc_tests → powerfunc-0.3.0/tests/powerfunc_tests/core}/test_cli.py +1 -1
  20. {powerfunc-0.2.0/tests/powerfunc_tests → powerfunc-0.3.0/tests/powerfunc_tests/core}/test_conversions.py +1 -1
  21. {powerfunc-0.2.0/tests/powerfunc_tests → powerfunc-0.3.0/tests/powerfunc_tests/data}/generate_data_files.py +1 -1
  22. powerfunc-0.3.0/tests/powerfunc_tests/gcp/__init__.py +0 -0
  23. powerfunc-0.3.0/tests/powerfunc_tests/gcp/conftest.py +42 -0
  24. powerfunc-0.3.0/tests/powerfunc_tests/gcp/gcp_jobs.py +73 -0
  25. powerfunc-0.3.0/tests/powerfunc_tests/gcp/gcp_shared.py +27 -0
  26. powerfunc-0.3.0/tests/powerfunc_tests/gcp/test_gcp_batch.py +70 -0
  27. powerfunc-0.3.0/tests/powerfunc_tests/gcp/test_gcp_cloud_run.py +52 -0
  28. powerfunc-0.3.0/tests/powerfunc_tests/modal/__init__.py +0 -0
  29. powerfunc-0.3.0/tests/powerfunc_tests/modal/test_modal.py +44 -0
  30. powerfunc-0.3.0/tests/powerfunc_tests/snakemake/__init__.py +0 -0
  31. powerfunc-0.3.0/tests/powerfunc_tests/snakemake/functions.py +25 -0
  32. powerfunc-0.3.0/tests/powerfunc_tests/snakemake/input.smk +8 -0
  33. powerfunc-0.3.0/tests/powerfunc_tests/snakemake/input_params_and_output.smk +11 -0
  34. powerfunc-0.3.0/tests/powerfunc_tests/snakemake/multiple_outputs_rejected.smk +8 -0
  35. powerfunc-0.3.0/tests/powerfunc_tests/snakemake/named_outputs_rejected.smk +10 -0
  36. powerfunc-0.3.0/tests/powerfunc_tests/snakemake/params.smk +8 -0
  37. powerfunc-0.3.0/tests/powerfunc_tests/snakemake/positional_rejected.smk +9 -0
  38. powerfunc-0.3.0/tests/powerfunc_tests/snakemake/test_snakemake.py +95 -0
  39. powerfunc-0.3.0/tox.ini +43 -0
  40. {powerfunc-0.2.0 → powerfunc-0.3.0}/uv.lock +573 -931
  41. powerfunc-0.2.0/.claude/settings.local.json +0 -8
  42. powerfunc-0.2.0/PKG-INFO +0 -27
  43. powerfunc-0.2.0/documentation/providers/gcp.md +0 -121
  44. powerfunc-0.2.0/src/powerfunc/providers/gcp.py +0 -251
  45. {powerfunc-0.2.0 → powerfunc-0.3.0}/.gitignore +0 -0
  46. {powerfunc-0.2.0 → powerfunc-0.3.0}/.pre-commit-config.yaml +0 -0
  47. {powerfunc-0.2.0 → powerfunc-0.3.0}/LICENSE.txt +0 -0
  48. {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/__init__.py +0 -0
  49. {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/command_line.py +0 -0
  50. {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/configuration.py +0 -0
  51. {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/conversions.py +0 -0
  52. {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/formats/__init__.py +0 -0
  53. {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/formats/arrow.py +0 -0
  54. {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/formats/csv_reader.py +0 -0
  55. {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/formats/dask.py +0 -0
  56. {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/formats/pandas.py +0 -0
  57. {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/formats/polars.py +0 -0
  58. {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/providers/__init__.py +0 -0
  59. {powerfunc-0.2.0 → powerfunc-0.3.0}/src/powerfunc/py.typed +0 -0
  60. {powerfunc-0.2.0 → powerfunc-0.3.0}/tests/powerfunc_tests/__init__.py +0 -0
  61. {powerfunc-0.2.0/tests/powerfunc_tests → powerfunc-0.3.0/tests/powerfunc_tests/core}/cli_example.py +0 -0
  62. {powerfunc-0.2.0/tests/powerfunc_tests → powerfunc-0.3.0/tests/powerfunc_tests/core}/test_remote.py +0 -0
  63. {powerfunc-0.2.0 → powerfunc-0.3.0}/tests/powerfunc_tests/data/data.arrow +0 -0
  64. {powerfunc-0.2.0 → powerfunc-0.3.0}/tests/powerfunc_tests/data/data.csv +0 -0
  65. {powerfunc-0.2.0 → powerfunc-0.3.0}/tests/powerfunc_tests/data/data.feather +0 -0
  66. {powerfunc-0.2.0 → powerfunc-0.3.0}/tests/powerfunc_tests/data/data.pandas.json +0 -0
  67. {powerfunc-0.2.0 → powerfunc-0.3.0}/tests/powerfunc_tests/data/data.parquet +0 -0
  68. {powerfunc-0.2.0 → powerfunc-0.3.0}/tests/powerfunc_tests/data/data.polars.json +0 -0
  69. {powerfunc-0.2.0 → powerfunc-0.3.0}/tests/powerfunc_tests/data/data.xlsx +0 -0
@@ -0,0 +1,63 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ pull_request:
6
+
7
+ # One run per branch. Keying on the head branch (not github.ref) makes the
8
+ # push and pull_request events for the same branch share a group, so we get a
9
+ # single run per commit instead of two, and a new push cancels an in-progress
10
+ # run for the same branch so live remote tests can't pile up.
11
+ concurrency:
12
+ group: ci-${{ github.event.pull_request.head.ref || github.ref_name }}
13
+ cancel-in-progress: true
14
+
15
+ jobs:
16
+ lint:
17
+ runs-on: ubuntu-latest
18
+ timeout-minutes: 5
19
+ steps:
20
+ - uses: actions/checkout@v4
21
+ - uses: astral-sh/setup-uv@v6
22
+ with:
23
+ enable-cache: true
24
+ - run: uvx pre-commit run --all-files --show-diff-on-failure
25
+
26
+ test-core:
27
+ runs-on: ubuntu-latest
28
+ timeout-minutes: 10
29
+ steps:
30
+ - uses: actions/checkout@v4
31
+ - uses: astral-sh/setup-uv@v6
32
+ with:
33
+ enable-cache: true
34
+ - run: uvx --with tox-uv tox run -e core
35
+
36
+ test-gcp:
37
+ runs-on: ubuntu-latest
38
+ timeout-minutes: 15
39
+ steps:
40
+ - uses: actions/checkout@v4
41
+ - uses: astral-sh/setup-uv@v6
42
+ with:
43
+ enable-cache: true
44
+ - name: Run GCP tests
45
+ env:
46
+ GOOGLE_CLOUD_API_KEY: ${{ secrets.GOOGLE_CLOUD_API_KEY }}
47
+ GCP_BUCKET: gs://powerfunc-temporary-348293
48
+ GCP_REGION: europe-west4
49
+ run: uvx --with tox-uv tox run -e gcp
50
+
51
+ test-modal:
52
+ runs-on: ubuntu-latest
53
+ timeout-minutes: 10
54
+ steps:
55
+ - uses: actions/checkout@v4
56
+ - uses: astral-sh/setup-uv@v6
57
+ with:
58
+ enable-cache: true
59
+ - name: Run Modal tests
60
+ env:
61
+ MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
62
+ MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
63
+ run: uvx --with tox-uv tox run -e modal
@@ -0,0 +1,108 @@
1
+ Metadata-Version: 2.4
2
+ Name: powerfunc
3
+ Version: 0.3.0
4
+ Summary: Make functions run however you like — CLI, pipeline, or remote compute
5
+ Project-URL: Homepage, https://github.com/ddrakard/powerfunc
6
+ Project-URL: Repository, https://github.com/ddrakard/powerfunc
7
+ Project-URL: Documentation, https://github.com/ddrakard/powerfunc/blob/main/documentation/readme.md
8
+ License-File: LICENSE.txt
9
+ Requires-Python: >=3.9
10
+ Requires-Dist: cloudpathlib[all]
11
+ Requires-Dist: cloudpickle
12
+ Requires-Dist: fsspec[adl,gcs,s3]
13
+ Requires-Dist: jsonargparse>=4.49.0
14
+ Requires-Dist: pydantic>=2
15
+ Provides-Extra: dev
16
+ Requires-Dist: pre-commit; extra == 'dev'
17
+ Requires-Dist: ruff; extra == 'dev'
18
+ Requires-Dist: tox; extra == 'dev'
19
+ Requires-Dist: tox-uv; extra == 'dev'
20
+ Provides-Extra: gcp
21
+ Requires-Dist: google-auth; extra == 'gcp'
22
+ Requires-Dist: google-cloud-batch; extra == 'gcp'
23
+ Requires-Dist: google-cloud-compute; extra == 'gcp'
24
+ Requires-Dist: google-cloud-run; extra == 'gcp'
25
+ Provides-Extra: modal
26
+ Requires-Dist: modal; extra == 'modal'
27
+ Description-Content-Type: text/markdown
28
+
29
+ # powerfunc
30
+
31
+ Add superpowers to your functions. Run them from the CLI, on cloud data, or on cloud compute.
32
+
33
+ ## Features
34
+
35
+ - Load and save function arguments and results to file paths, local or in cloud storage.
36
+ - Invoke functions on the command line.
37
+ - Remote execution on cloud compute.
38
+ - YAML configuration file support.
39
+ - Supports [pandas](https://pandas.pydata.org/), [polars](https://pola.rs/), [PyArrow](https://pypi.org/project/pyarrow/), [Dask](https://www.dask.org/), and the Python [`csv` module](https://docs.python.org/3/library/csv.html) out of the box
40
+ - Supports cloud providers [Google Cloud](https://cloud.google.com) and [Modal](https://modal.com/).
41
+ - Extensible for new data types and cloud providers.
42
+
43
+ ## Installation
44
+
45
+ ```bash
46
+ pip install powerfunc
47
+ ```
48
+
49
+ With [Google Cloud](https://cloud.google.com) remote execution:
50
+
51
+ ```bash
52
+ pip install 'powerfunc[gcp]'
53
+ ```
54
+
55
+ With [Modal](https://modal.com/) remote execution:
56
+
57
+ ```bash
58
+ pip install 'powerfunc[modal]'
59
+ ```
60
+
61
+ ## Example usage
62
+
63
+ Decorate a normal function with `@powerfunc`:
64
+
65
+ ```python
66
+ import pandas as pd
67
+ from powerfunc import powerfunc
68
+
69
+ @powerfunc
70
+ def sum_col(df: pd.DataFrame) -> float:
71
+ return float(df["value"].sum())
72
+
73
+ df = pd.DataFrame({"value": [1, 2, 3]})
74
+ print(sum_col(df))
75
+ ```
76
+
77
+ Then use it on files, local or in the cloud:
78
+
79
+ ```python
80
+ print(sum_col("data.csv"))
81
+ print(sum_col("https://storage.googleapis.com/powerfunc/data.csv"))
82
+ ```
83
+
84
+ Add one line at the end of your file to make it runnable on the command line:
85
+
86
+ ```python
87
+ from powerfunc import powerfunc
88
+
89
+ ...
90
+
91
+ powerfunc.enable_cli()
92
+ ```
93
+
94
+ ```sh
95
+ python myfile.py gs://powerfunc/data.csv
96
+ ```
97
+
98
+ ## Support
99
+
100
+ This project is in early development. Breaking changes may happen at any time.
101
+
102
+ ## Documentation
103
+
104
+ For complete information on using powerfunc, including executing on cloud compute, please see the [documentation](documentation/readme.md).
105
+
106
+ ## License
107
+
108
+ Released under the [MIT](LICENSE.txt) license.
@@ -21,13 +21,13 @@ pip install powerfunc
21
21
  With [Google Cloud](https://cloud.google.com) remote execution:
22
22
 
23
23
  ```bash
24
- pip install powerfunc[gcp]
24
+ pip install 'powerfunc[gcp]'
25
25
  ```
26
26
 
27
27
  With [Modal](https://modal.com/) remote execution:
28
28
 
29
29
  ```bash
30
- pip install powerfunc[modal]
30
+ pip install 'powerfunc[modal]'
31
31
  ```
32
32
 
33
33
  ## Example usage
@@ -12,10 +12,10 @@ Subclass `ComputeSpecification` to define reusable compute configurations:
12
12
  ```python
13
13
  from dataclasses import field
14
14
  from pydantic.dataclasses import dataclass
15
- from powerfunc.compute import ComputeSpecification, Provider
16
- from powerfunc.providers.gcp import GCPProvider
15
+ from powerfunc.compute import ComputeSpecification, CpuCount, DockerImageUri, GpuModel, MemorySize, Provider
16
+ from powerfunc.providers.gcp_cloud_run import GCPCloudRunProvider
17
17
 
18
- provider = GCPProvider(
18
+ provider = GCPCloudRunProvider(
19
19
  project="my-project",
20
20
  region="us-central1",
21
21
  temporary_bucket_path="gs://my-bucket/tmp",
@@ -23,10 +23,10 @@ provider = GCPProvider(
23
23
 
24
24
  @dataclass
25
25
  class MyGpuSpec(ComputeSpecification):
26
- cpu: float = 8.0
27
- memory: int = 32768
28
- image: str = "gcr.io/deeplearning-platform-release/base-cu121"
29
- gpu: str = "a100"
26
+ cpu: CpuCount = 8.0
27
+ memory: MemorySize = 32768
28
+ image: DockerImageUri = "nvidia/cuda:12.1.0-base-ubuntu22.04"
29
+ gpu: GpuModel = "a100"
30
30
  provider: Provider = field(default_factory=lambda: provider)
31
31
 
32
32
  MY_GPU = MyGpuSpec()
@@ -0,0 +1,44 @@
1
+ # Snakemake
2
+
3
+ powerfunc functions can be called directly from a [Snakemake](https://snakemake.readthedocs.io/)
4
+ rule. Call `.snakemake()` inside a `run:` block and the rule's inputs, params and outputs are
5
+ bound automatically.
6
+
7
+ ## Usage
8
+
9
+ ```python
10
+ # functions.py
11
+ import pandas as pd
12
+ from powerfunc import powerfunc
13
+
14
+ @powerfunc
15
+ def sum_col(df: pd.DataFrame) -> pd.DataFrame:
16
+ return df.sum().to_frame().T
17
+ ```
18
+
19
+ ```python
20
+ # Snakefile
21
+ from functions import sum_col
22
+
23
+ rule sum:
24
+ input: "data.csv"
25
+ output: "result.csv"
26
+ run:
27
+ sum_col.snakemake()
28
+ ```
29
+
30
+ ## Binding rules
31
+
32
+ Inputs (`input`) and params (`params`) are bound to the function's arguments:
33
+
34
+ - When **both** inputs and params are present, they must all be **named** (keyword).
35
+ Positional entries are only allowed when one of the two is absent.
36
+ - Inputs are passed as paths and are read through powerfunc's
37
+ [converters](../readme.md#automatic-reading-and-writing-data), so a `pd.DataFrame`
38
+ parameter receives the loaded file. Params are passed through unchanged.
39
+
40
+ Outputs (`output`) are bound as follows:
41
+
42
+ - A single **unnamed** output receives the function's **return value**, written through
43
+ powerfunc's converters.
44
+ - Named outputs and multiple outputs are not supported.
@@ -0,0 +1,171 @@
1
+ # Google Cloud Platform
2
+
3
+ powerfunc supports two GCP execution backends: [Cloud Run Jobs](https://cloud.google.com/run/docs/create-jobs) and [Batch](https://cloud.google.com/batch/docs). Both use the same authentication, bucket-based data transfer, and `ComputeSpecification` model.
4
+
5
+ ## Contents
6
+
7
+ - [Installation](#installation)
8
+ - [Setup and configuration](#setup-and-configuration)
9
+ - [Cloud Run](#cloud-run)
10
+ - [Batch](#batch)
11
+ - [Environment variables](#environment-variables)
12
+ - [Predefined configurations](#predefined-configurations)
13
+ - [Supported GPUs](#supported-gpus)
14
+ - [Limitations](#limitations)
15
+
16
+ ## Installation
17
+
18
+ Make sure to install with the `gcp` option.
19
+
20
+ ```sh
21
+ pip install 'powerfunc[gcp]'
22
+ ```
23
+
24
+ or
25
+
26
+ ```sh
27
+ uv add 'powerfunc[gcp]'
28
+ ```
29
+
30
+ ## Setup
31
+
32
+ You need to have a Google Cloud account with a project inside it where the remote execution can happen. You must authenticate with the [Google Cloud CLI](https://cloud.google.com/sdk/docs/install) using:
33
+
34
+ ```sh
35
+ gcloud auth application-default login
36
+ ```
37
+
38
+ powerfunc transfers data in and out of the compute job using a [Google Cloud Storage bucket](https://docs.cloud.google.com/storage/docs/buckets). Therefore, you need to have or create a bucket that powerfunc can use, and provide these details.
39
+
40
+ ### Command line usage
41
+
42
+ When runnng from the command line, [module-style invocation](https://docs.python.org/3/using/cmdline.html#cmdoption-m) `python -m my.module` must be used instead of direct script execution `python myscript.py`.
43
+
44
+ ## Cloud Run
45
+
46
+ Cloud Run Jobs is a serverless execution backend. It scales to zero, has fast cold starts for cached images, and supports GPUs.
47
+
48
+ Configure in `powerfunc.yaml`:
49
+
50
+ ```yaml
51
+ compute:
52
+ class_path: powerfunc.providers.gcp_cloud_run.GcpCloudRunCpuSmall
53
+ init_args:
54
+ timeout: 600
55
+ provider:
56
+ class_path: powerfunc.providers.gcp_cloud_run.GCPCloudRunProvider
57
+ init_args:
58
+ project: my-gcp-project
59
+ region: us-central1
60
+ temporary_bucket_path: gs://my-bucket/powerfunc-temp
61
+ ```
62
+
63
+ Or directly in Python:
64
+
65
+ ```python
66
+ from powerfunc.providers.gcp_cloud_run import GCPCloudRunProvider, GcpCloudRunCpuSmall
67
+
68
+ provider = GCPCloudRunProvider(
69
+ project="my-gcp-project",
70
+ region="us-central1",
71
+ temporary_bucket_path="gs://my-bucket/",
72
+ )
73
+ compute = GcpCloudRunCpuSmall(timeout=600, provider=provider)
74
+
75
+ result = sum_col("gs://bucket/data.csv", compute=compute)
76
+ ```
77
+
78
+ Cloud Run Jobs can require some time to start, particularly for a docker image that has not been used recently.
79
+
80
+ ## Batch
81
+
82
+ Batch provisions Compute Engine VMs. It supports GPUs, higher resource limits than Cloud Run, spot (preemptible) VMs, and explicit machine type selection.
83
+
84
+ Configure in `powerfunc.yaml`:
85
+
86
+ ```yaml
87
+ compute:
88
+ class_path: powerfunc.providers.gcp_batch.GcpBatchCpuSmall
89
+ init_args:
90
+ timeout: 600
91
+ provider:
92
+ class_path: powerfunc.providers.gcp_batch.GCPBatchProvider
93
+ init_args:
94
+ project: my-gcp-project
95
+ region: us-central1
96
+ temporary_bucket_path: gs://my-bucket/powerfunc-temp
97
+ ```
98
+
99
+ Or directly in Python:
100
+
101
+ ```python
102
+ from powerfunc.providers.gcp_batch import GCPBatchProvider, GcpBatchCpuSmall
103
+
104
+ provider = GCPBatchProvider(
105
+ project="my-gcp-project",
106
+ region="us-central1",
107
+ temporary_bucket_path="gs://my-bucket/",
108
+ spot=True, # optional: use spot (preemptible) VMs
109
+ machine_type="n1-standard-4", # optional: Batch auto-selects if omitted
110
+ )
111
+ compute = GcpBatchCpuSmall(timeout=600, provider=provider)
112
+
113
+ result = sum_col("gs://bucket/data.csv", compute=compute)
114
+ ```
115
+
116
+ `machine_type` is optional — Batch selects a machine from the requested CPU/memory when it
117
+ is not given. For GPUs, a compatible `machine_type` is usually required.
118
+
119
+ ## Environment variables
120
+
121
+ Both Cloud Run and Batch support passing environment variables to the remote container via the `environment_variables` provider option:
122
+
123
+ ```python
124
+ provider = GCPCloudRunProvider(
125
+ project="my-project",
126
+ region="us-central1",
127
+ temporary_bucket_path="gs://my-bucket/tmp",
128
+ environment_variables={"MY_API_KEY": "secret123"},
129
+ )
130
+ ```
131
+
132
+ Or in `powerfunc.yaml`:
133
+
134
+ ```yaml
135
+ provider:
136
+ init_args:
137
+ environment_variables:
138
+ MY_API_KEY: secret123
139
+ ```
140
+
141
+ ## Supported GPUs
142
+
143
+ Both Cloud Run and Batch support: `t4`, `a100`, `l4`, `v100`.
144
+
145
+ ## Predefined configurations
146
+
147
+ | Class | Provider | CPU | Memory | Image | GPU |
148
+ |---|---|---|---|---|---|
149
+ | `GcpCloudRunCpuSmall` | Cloud Run | 1 vCPU | 2GB | `python:3.12-slim` | — |
150
+ | `GcpCloudRunGpu` | Cloud Run | 4 vCPU | 16GB | `nvidia/cuda:12.1.0-base-ubuntu22.04` | L4 |
151
+ | `GcpBatchCpuSmall` | Batch | 1 vCPU | 2GB | `python:3.12-slim` | — |
152
+ | `GcpBatchGpu` | Batch | 4 vCPU | 16GB | `nvidia/cuda:12.1.0-base-ubuntu22.04` | L4 |
153
+
154
+ All presets require a `timeout` argument (in seconds), e.g. `GcpCloudRunCpuSmall(timeout=600)` or `GcpBatchCpuSmall(timeout=600)`.
155
+
156
+ ## Known limitations
157
+
158
+ ### Codebase synchronisation
159
+
160
+ A simple codebase synchronisation method is used. It is not robust to work with all codebases and scenarios. For complex setups, understanding of the underlying infrastructure technologies (such as Python packaging and [Docker](https://www.docker.com/)) will likely be needed. To enable rapid execution for complex codebases, additional setup in needed, such as preparing docker images.
161
+
162
+ ### Anonymous usage
163
+
164
+ For public GCS buckets without credentials, configure fsspec before importing powerfunc:
165
+
166
+ ```python
167
+ import fsspec
168
+ fsspec.config.conf["gs"] = {"token": "anon"}
169
+ ```
170
+
171
+ **Parquet files on `gs://` hang without credentials** for pandas and polars — their parquet readers use pyarrow's C++ GCS filesystem which has no anonymous mode and no timeout. Use `gcloud auth application-default login` to resolve this, or access parquet via HTTPS instead.
@@ -14,13 +14,13 @@ This page describes how to execute powerfunc functions remotely on [Modal](https
14
14
  Make sure to install with the `modal` option.
15
15
 
16
16
  ```sh
17
- pip install powerfunc[modal]
17
+ pip install 'powerfunc[modal]'
18
18
  ```
19
19
 
20
20
  or
21
21
 
22
22
  ```sh
23
- uv add powerfunc[modal]
23
+ uv add 'powerfunc[modal]'
24
24
  ```
25
25
 
26
26
  You must have a Modal account. Then authenticate to Modal:
@@ -33,13 +33,13 @@ python -m modal setup
33
33
 
34
34
  ```python
35
35
  from powerfunc import powerfunc
36
- from powerfunc.providers.modal import MODAL_CPU_SMALL
36
+ from powerfunc.providers.modal import ModalCpuSmall
37
37
 
38
38
  @powerfunc
39
39
  def sum_col(df: pd.DataFrame) -> float:
40
40
  return float(df["value"].sum())
41
41
 
42
- result = sum_col("data.csv", compute=MODAL_CPU_SMALL)
42
+ result = sum_col("data.csv", compute=ModalCpuSmall(timeout=600))
43
43
  ```
44
44
 
45
45
  Or set a default in `powerfunc.yaml` so all calls run on Modal without passing `compute=`:
@@ -47,6 +47,8 @@ Or set a default in `powerfunc.yaml` so all calls run on Modal without passing `
47
47
  ```yaml
48
48
  compute:
49
49
  class_path: powerfunc.providers.modal.ModalCpuSmall
50
+ init_args:
51
+ timeout: 600
50
52
  ```
51
53
 
52
54
  ## Dependencies
@@ -56,7 +58,7 @@ By default the container uses Modal's `debian_slim` base image. If your function
56
58
  ```python
57
59
  from powerfunc.providers.modal import ModalProvider, ModalCpuSmall
58
60
 
59
- compute = ModalCpuSmall(provider=ModalProvider(pip_packages=("pandas", "pyarrow")))
61
+ compute = ModalCpuSmall(timeout=600, provider=ModalProvider(pip_packages=("pandas", "pyarrow")))
60
62
  result = sum_col("data.csv", compute=compute)
61
63
  ```
62
64
 
@@ -65,7 +67,7 @@ To use a different base image, set `image` on the compute specification:
65
67
  ```python
66
68
  from powerfunc.providers.modal import ModalCpuSmall
67
69
 
68
- compute = ModalCpuSmall(image="python:3.12-slim")
70
+ compute = ModalCpuSmall(timeout=600, image="python:3.12-slim")
69
71
  result = sum_col("data.csv", compute=compute)
70
72
  ```
71
73
 
@@ -75,6 +77,7 @@ Or in `powerfunc.yaml`:
75
77
  compute:
76
78
  class_path: powerfunc.providers.modal.ModalCpuSmall
77
79
  init_args:
80
+ timeout: 600
78
81
  image: "python:3.12-slim"
79
82
  provider:
80
83
  class_path: powerfunc.providers.modal.ModalProvider
@@ -89,4 +92,4 @@ compute:
89
92
  | `ModalCpuSmall` | 1 vCPU | 1GB | — |
90
93
  | `ModalGpuA100` | 8 vCPU | 80GB | A100 |
91
94
 
92
- Convenience instances `powerfunc.providers.modal.MODAL_CPU_SMALL` and `powerfunc.providers.modal.MODAL_GPU_A100` are available for direct use.
95
+ All presets require a `timeout` argument (in seconds), e.g. `ModalCpuSmall(timeout=600)`.
@@ -10,6 +10,7 @@
10
10
  - [Cloud providers and remote execution](#cloud-providers-and-remote-execution)
11
11
  - [Configuration](#configuration)
12
12
  - [Supported formats](#supported-formats)
13
+ - [Integrations](#integrations)
13
14
  - [Known limitations](#known-limitations)
14
15
  - [Advanced usage](#advanced-usage)
15
16
 
@@ -24,13 +25,13 @@ pip install powerfunc
24
25
  For GCP remote execution:
25
26
 
26
27
  ```sh
27
- pip install powerfunc[gcp]
28
+ pip install 'powerfunc[gcp]'
28
29
  ```
29
30
 
30
31
  For Modal remote execution:
31
32
 
32
33
  ```sh
33
- pip install powerfunc[modal]
34
+ pip install 'powerfunc[modal]'
34
35
  ```
35
36
 
36
37
  Or using [uv](https://github.com/astral-sh/uv):
@@ -40,11 +41,11 @@ uv add powerfunc
40
41
  ```
41
42
 
42
43
  ```sh
43
- uv add powerfunc[gcp]
44
+ uv add 'powerfunc[gcp]'
44
45
  ```
45
46
 
46
47
  ```sh
47
- uv add powerfunc[modal]
48
+ uv add 'powerfunc[modal]'
48
49
  ```
49
50
 
50
51
  ## Basic usage
@@ -151,10 +152,11 @@ For example:
151
152
 
152
153
  ```yaml
153
154
  compute:
154
- class_path: powerfunc.providers.gcp.GcpCpuSmall
155
+ class_path: powerfunc.providers.gcp_cloud_run.GcpCloudRunCpuSmall
155
156
  init_args:
157
+ timeout: 600
156
158
  provider:
157
- class_path: powerfunc.providers.gcp.GCPProvider
159
+ class_path: powerfunc.providers.gcp_cloud_run.GCPCloudRunProvider
158
160
  init_args:
159
161
  project: my-gcp-project
160
162
  region: us-central1
@@ -192,6 +194,12 @@ python my_script.py sum_col data.csv --config my_config.yaml
192
194
  | `dask.dataframe.DataFrame` | `.csv`, `.parquet` |
193
195
  | `csv.reader` | `.csv` |
194
196
 
197
+ ## Integrations
198
+
199
+ powerfunc functions can be driven by external workflow tools:
200
+
201
+ - [Snakemake](integrations/snakemake.md) — bind a rule's inputs, params and outputs with `function.snakemake()`.
202
+
195
203
  ## Known limitations
196
204
 
197
205
  ### Anonymous `gs://` access
@@ -4,12 +4,14 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "powerfunc"
7
- version = "0.2.0"
7
+ version = "0.3.0"
8
8
  description = "Make functions run however you like — CLI, pipeline, or remote compute"
9
+ readme = "README.md"
9
10
  requires-python = ">=3.9"
10
11
  dependencies = [
11
12
  "pydantic>=2",
12
13
  "cloudpathlib[all]",
14
+ "fsspec[gcs,s3,adl]",
13
15
  "jsonargparse>=4.49.0",
14
16
  "cloudpickle",
15
17
  ]
@@ -17,25 +19,25 @@ dependencies = [
17
19
  [project.optional-dependencies]
18
20
  gcp = [
19
21
  "google-cloud-run",
22
+ "google-cloud-batch",
23
+ "google-cloud-compute",
20
24
  "google-auth",
21
25
  ]
22
26
  modal = [
23
27
  "modal",
24
28
  ]
25
29
  dev = [
26
- "pytest",
27
- "pandas",
28
- "polars",
29
- "pyarrow",
30
- "openpyxl",
31
- "xlsxwriter",
32
- "fastexcel",
33
- "dask[dataframe]",
34
- "gcsfs",
30
+ "tox",
31
+ "tox-uv",
35
32
  "ruff",
36
33
  "pre-commit",
37
34
  ]
38
35
 
36
+ [project.urls]
37
+ Homepage = "https://github.com/ddrakard/powerfunc"
38
+ Repository = "https://github.com/ddrakard/powerfunc"
39
+ Documentation = "https://github.com/ddrakard/powerfunc/blob/main/documentation/readme.md"
40
+
39
41
  [tool.ruff]
40
42
  target-version = "py39"
41
43
  line-length = 100
@@ -43,6 +45,10 @@ line-length = 100
43
45
  [tool.ruff.lint]
44
46
  select = ["E", "F", "W", "I", "B", "C4", "UP", "RUF"]
45
47
 
48
+ [tool.pytest.ini_options]
49
+ pythonpath = ["tests"]
50
+ addopts = "--import-mode=importlib"
51
+
46
52
  [dependency-groups]
47
53
  dev = [
48
54
  "cloudpickle>=3.1.2",
@@ -1,5 +1,5 @@
1
1
  import dataclasses
2
- from typing import Annotated, Any, Callable, Optional
2
+ from typing import Annotated, Any, Callable, Optional, TypeAlias
3
3
 
4
4
  from pydantic import Field, GetCoreSchemaHandler
5
5
  from pydantic.dataclasses import dataclass as pydantic_dataclass
@@ -7,6 +7,12 @@ from pydantic_core import core_schema
7
7
 
8
8
  from powerfunc.command_line import ExpectedException
9
9
 
10
+ Timeout: TypeAlias = Annotated[float, Field(gt=0, description="Maximum job duration in seconds")]
11
+ CpuCount: TypeAlias = Annotated[float, Field(gt=0, description="Number of vCPUs")]
12
+ MemorySize: TypeAlias = Annotated[int, Field(gt=0, description="RAM in MB")]
13
+ DockerImageUri: TypeAlias = Annotated[str, Field(description="Container image URI")]
14
+ GpuModel: TypeAlias = Annotated[Optional[str], Field(description="GPU model name")]
15
+
10
16
 
11
17
  class Provider:
12
18
  """Base compute provider. Subclass and implement call()."""
@@ -34,20 +40,21 @@ class UndefinedProvider(Provider):
34
40
  class ComputeSpecification:
35
41
  """Specifies compute resources for remote execution."""
36
42
 
37
- cpu: Annotated[float, Field(gt=0, description="Number of vCPUs")]
38
- memory: Annotated[int, Field(gt=0, description="RAM in MB")]
39
- image: Annotated[str, Field(description="Container image URI")] = ""
43
+ timeout: Timeout
44
+ cpu: CpuCount
45
+ memory: MemorySize
46
+ image: DockerImageUri = ""
40
47
  provider: Provider = dataclasses.field(default_factory=UndefinedProvider)
41
- gpu: Annotated[Optional[str], Field(description="GPU model name")] = None
48
+ gpu: GpuModel = None
42
49
 
43
50
 
44
51
  @pydantic_dataclass
45
52
  class CpuSmall(ComputeSpecification):
46
53
  """1 vCPU, 2GB RAM, Python 3.12 slim."""
47
54
 
48
- cpu: float = 1.0
49
- memory: int = 2048
50
- image: str = "python:3.12-slim"
55
+ cpu: CpuCount = 1.0
56
+ memory: MemorySize = 2048
57
+ image: DockerImageUri = "python:3.12-slim"
51
58
 
52
59
 
53
60
  user_identifier: Optional[str] = None