rappel 0.5.1__py3-none-manylinux_2_39_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- proto/ast_pb2.py +115 -0
- proto/ast_pb2.pyi +1522 -0
- proto/ast_pb2_grpc.py +24 -0
- proto/ast_pb2_grpc.pyi +22 -0
- proto/messages_pb2.py +106 -0
- proto/messages_pb2.pyi +1205 -0
- proto/messages_pb2_grpc.py +406 -0
- proto/messages_pb2_grpc.pyi +380 -0
- rappel/__init__.py +56 -0
- rappel/actions.py +108 -0
- rappel/bin/boot-rappel-singleton +0 -0
- rappel/bin/rappel-bridge +0 -0
- rappel/bin/start-workers +0 -0
- rappel/bridge.py +228 -0
- rappel/dependencies.py +149 -0
- rappel/exceptions.py +11 -0
- rappel/formatter.py +110 -0
- rappel/ir_builder.py +2966 -0
- rappel/logger.py +39 -0
- rappel/registry.py +106 -0
- rappel/schedule.py +347 -0
- rappel/serialization.py +253 -0
- rappel/worker.py +191 -0
- rappel/workflow.py +236 -0
- rappel/workflow_runtime.py +287 -0
- rappel-0.5.1.data/scripts/boot-rappel-singleton +0 -0
- rappel-0.5.1.data/scripts/rappel-bridge +0 -0
- rappel-0.5.1.data/scripts/start-workers +0 -0
- rappel-0.5.1.dist-info/METADATA +299 -0
- rappel-0.5.1.dist-info/RECORD +32 -0
- rappel-0.5.1.dist-info/WHEEL +4 -0
- rappel-0.5.1.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
"""Runtime helpers for executing actions inside the worker.
|
|
2
|
+
|
|
3
|
+
This module provides the execution layer for Python workers that receive
|
|
4
|
+
action dispatch commands from the Rust scheduler.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import dataclasses
|
|
9
|
+
from base64 import b64decode
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from datetime import date, datetime, time, timedelta
|
|
12
|
+
from decimal import Decimal
|
|
13
|
+
from pathlib import Path, PurePath
|
|
14
|
+
from typing import Any, Dict, get_args, get_origin, get_type_hints
|
|
15
|
+
from uuid import UUID
|
|
16
|
+
|
|
17
|
+
from pydantic import BaseModel
|
|
18
|
+
|
|
19
|
+
from proto import messages_pb2 as pb2
|
|
20
|
+
|
|
21
|
+
from .dependencies import provide_dependencies
|
|
22
|
+
from .registry import registry
|
|
23
|
+
from .serialization import arguments_to_kwargs
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class WorkflowNodeResult(BaseModel):
|
|
27
|
+
"""Result from a workflow node execution containing variable bindings."""
|
|
28
|
+
|
|
29
|
+
variables: Dict[str, Any]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class ActionExecutionResult:
|
|
34
|
+
"""Result of an action execution."""
|
|
35
|
+
|
|
36
|
+
result: Any
|
|
37
|
+
exception: BaseException | None = None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _is_pydantic_model(cls: type) -> bool:
|
|
41
|
+
"""Check if a class is a Pydantic BaseModel subclass."""
|
|
42
|
+
try:
|
|
43
|
+
return isinstance(cls, type) and issubclass(cls, BaseModel)
|
|
44
|
+
except TypeError:
|
|
45
|
+
return False
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _is_dataclass_type(cls: type) -> bool:
|
|
49
|
+
"""Check if a class is a dataclass."""
|
|
50
|
+
return dataclasses.is_dataclass(cls) and isinstance(cls, type)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _coerce_primitive(value: Any, target_type: type) -> Any:
|
|
54
|
+
"""Coerce a value to a primitive type based on target_type.
|
|
55
|
+
|
|
56
|
+
Handles conversion of serialized values (strings, floats) back to their
|
|
57
|
+
native Python types (UUID, datetime, etc.).
|
|
58
|
+
"""
|
|
59
|
+
# Handle None
|
|
60
|
+
if value is None:
|
|
61
|
+
return None
|
|
62
|
+
|
|
63
|
+
# UUID from string
|
|
64
|
+
if target_type is UUID:
|
|
65
|
+
if isinstance(value, UUID):
|
|
66
|
+
return value
|
|
67
|
+
if isinstance(value, str):
|
|
68
|
+
return UUID(value)
|
|
69
|
+
return value
|
|
70
|
+
|
|
71
|
+
# datetime from ISO string
|
|
72
|
+
if target_type is datetime:
|
|
73
|
+
if isinstance(value, datetime):
|
|
74
|
+
return value
|
|
75
|
+
if isinstance(value, str):
|
|
76
|
+
return datetime.fromisoformat(value)
|
|
77
|
+
return value
|
|
78
|
+
|
|
79
|
+
# date from ISO string
|
|
80
|
+
if target_type is date:
|
|
81
|
+
if isinstance(value, date):
|
|
82
|
+
return value
|
|
83
|
+
if isinstance(value, str):
|
|
84
|
+
return date.fromisoformat(value)
|
|
85
|
+
return value
|
|
86
|
+
|
|
87
|
+
# time from ISO string
|
|
88
|
+
if target_type is time:
|
|
89
|
+
if isinstance(value, time):
|
|
90
|
+
return value
|
|
91
|
+
if isinstance(value, str):
|
|
92
|
+
return time.fromisoformat(value)
|
|
93
|
+
return value
|
|
94
|
+
|
|
95
|
+
# timedelta from total seconds
|
|
96
|
+
if target_type is timedelta:
|
|
97
|
+
if isinstance(value, timedelta):
|
|
98
|
+
return value
|
|
99
|
+
if isinstance(value, (int, float)):
|
|
100
|
+
return timedelta(seconds=value)
|
|
101
|
+
return value
|
|
102
|
+
|
|
103
|
+
# Decimal from string
|
|
104
|
+
if target_type is Decimal:
|
|
105
|
+
if isinstance(value, Decimal):
|
|
106
|
+
return value
|
|
107
|
+
if isinstance(value, (str, int, float)):
|
|
108
|
+
return Decimal(str(value))
|
|
109
|
+
return value
|
|
110
|
+
|
|
111
|
+
# bytes from base64 string
|
|
112
|
+
if target_type is bytes:
|
|
113
|
+
if isinstance(value, bytes):
|
|
114
|
+
return value
|
|
115
|
+
if isinstance(value, str):
|
|
116
|
+
return b64decode(value)
|
|
117
|
+
return value
|
|
118
|
+
|
|
119
|
+
# Path from string
|
|
120
|
+
if target_type is Path or target_type is PurePath:
|
|
121
|
+
if isinstance(value, PurePath):
|
|
122
|
+
return value
|
|
123
|
+
if isinstance(value, str):
|
|
124
|
+
return Path(value)
|
|
125
|
+
return value
|
|
126
|
+
|
|
127
|
+
return value
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# Types that can be coerced from serialized form
|
|
131
|
+
COERCIBLE_TYPES = (UUID, datetime, date, time, timedelta, Decimal, bytes, Path, PurePath)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _coerce_dict_to_model(value: Any, target_type: type) -> Any:
|
|
135
|
+
"""Convert a dict to a Pydantic model or dataclass if needed.
|
|
136
|
+
|
|
137
|
+
If value is a dict and target_type is a Pydantic model or dataclass,
|
|
138
|
+
instantiate the model with the dict values. Otherwise, return value unchanged.
|
|
139
|
+
"""
|
|
140
|
+
if not isinstance(value, dict):
|
|
141
|
+
return value
|
|
142
|
+
|
|
143
|
+
if _is_pydantic_model(target_type):
|
|
144
|
+
# Use model_validate for Pydantic v2, fall back to direct instantiation
|
|
145
|
+
model_validate = getattr(target_type, "model_validate", None)
|
|
146
|
+
if model_validate is not None:
|
|
147
|
+
return model_validate(value)
|
|
148
|
+
return target_type(**value)
|
|
149
|
+
|
|
150
|
+
if _is_dataclass_type(target_type):
|
|
151
|
+
return target_type(**value)
|
|
152
|
+
|
|
153
|
+
return value
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _coerce_value(value: Any, target_type: type) -> Any:
|
|
157
|
+
"""Coerce a value to the target type.
|
|
158
|
+
|
|
159
|
+
Handles:
|
|
160
|
+
- Primitive types (UUID, datetime, etc.)
|
|
161
|
+
- Pydantic models and dataclasses (from dicts)
|
|
162
|
+
- Generic collections like list[UUID], set[datetime]
|
|
163
|
+
"""
|
|
164
|
+
# Handle None
|
|
165
|
+
if value is None:
|
|
166
|
+
return None
|
|
167
|
+
|
|
168
|
+
# Check for coercible primitive types
|
|
169
|
+
if isinstance(target_type, type) and issubclass(target_type, COERCIBLE_TYPES):
|
|
170
|
+
return _coerce_primitive(value, target_type)
|
|
171
|
+
|
|
172
|
+
# Check for Pydantic models or dataclasses
|
|
173
|
+
if isinstance(value, dict):
|
|
174
|
+
coerced = _coerce_dict_to_model(value, target_type)
|
|
175
|
+
if coerced is not value:
|
|
176
|
+
return coerced
|
|
177
|
+
|
|
178
|
+
# Handle generic types like list[UUID], set[datetime]
|
|
179
|
+
origin = get_origin(target_type)
|
|
180
|
+
if origin is not None:
|
|
181
|
+
args = get_args(target_type)
|
|
182
|
+
|
|
183
|
+
# Handle list[T]
|
|
184
|
+
if origin is list and isinstance(value, list) and args:
|
|
185
|
+
item_type = args[0]
|
|
186
|
+
return [_coerce_value(item, item_type) for item in value]
|
|
187
|
+
|
|
188
|
+
# Handle set[T] (serialized as list)
|
|
189
|
+
if origin is set and isinstance(value, list) and args:
|
|
190
|
+
item_type = args[0]
|
|
191
|
+
return {_coerce_value(item, item_type) for item in value}
|
|
192
|
+
|
|
193
|
+
# Handle frozenset[T] (serialized as list)
|
|
194
|
+
if origin is frozenset and isinstance(value, list) and args:
|
|
195
|
+
item_type = args[0]
|
|
196
|
+
return frozenset(_coerce_value(item, item_type) for item in value)
|
|
197
|
+
|
|
198
|
+
# Handle tuple[T, ...] (serialized as list)
|
|
199
|
+
if origin is tuple and isinstance(value, (list, tuple)) and args:
|
|
200
|
+
# Variable length tuple like tuple[int, ...]
|
|
201
|
+
if len(args) == 2 and args[1] is ...:
|
|
202
|
+
item_type = args[0]
|
|
203
|
+
return tuple(_coerce_value(item, item_type) for item in value)
|
|
204
|
+
# Fixed length tuple like tuple[int, str, UUID]
|
|
205
|
+
return tuple(
|
|
206
|
+
_coerce_value(item, item_type) for item, item_type in zip(value, args, strict=False)
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
# Handle dict[K, V]
|
|
210
|
+
if origin is dict and isinstance(value, dict) and len(args) == 2:
|
|
211
|
+
key_type, val_type = args
|
|
212
|
+
return {
|
|
213
|
+
_coerce_value(k, key_type): _coerce_value(v, val_type) for k, v in value.items()
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
return value
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def _coerce_kwargs_to_type_hints(handler: Any, kwargs: Dict[str, Any]) -> Dict[str, Any]:
|
|
220
|
+
"""Coerce kwargs to expected types based on handler's type hints.
|
|
221
|
+
|
|
222
|
+
Handles:
|
|
223
|
+
- Pydantic models and dataclasses (from dicts)
|
|
224
|
+
- Primitive types like UUID, datetime, Decimal, etc.
|
|
225
|
+
- Generic collections like list[UUID], dict[str, datetime]
|
|
226
|
+
"""
|
|
227
|
+
try:
|
|
228
|
+
type_hints = get_type_hints(handler)
|
|
229
|
+
except Exception:
|
|
230
|
+
# If we can't get type hints (e.g., forward references), return as-is
|
|
231
|
+
return kwargs
|
|
232
|
+
|
|
233
|
+
coerced = {}
|
|
234
|
+
for key, value in kwargs.items():
|
|
235
|
+
if key in type_hints:
|
|
236
|
+
target_type = type_hints[key]
|
|
237
|
+
coerced[key] = _coerce_value(value, target_type)
|
|
238
|
+
else:
|
|
239
|
+
coerced[key] = value
|
|
240
|
+
|
|
241
|
+
return coerced
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
async def execute_action(dispatch: pb2.ActionDispatch) -> ActionExecutionResult:
|
|
245
|
+
"""Execute an action based on the dispatch command.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
dispatch: The action dispatch command from the Rust scheduler.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
The result of executing the action.
|
|
252
|
+
"""
|
|
253
|
+
action_name = dispatch.action_name
|
|
254
|
+
module_name = dispatch.module_name
|
|
255
|
+
|
|
256
|
+
# Import the module if specified (this registers actions via @action decorator)
|
|
257
|
+
if module_name:
|
|
258
|
+
import importlib
|
|
259
|
+
|
|
260
|
+
importlib.import_module(module_name)
|
|
261
|
+
|
|
262
|
+
# Get the action handler using both module and name
|
|
263
|
+
handler = registry.get(module_name, action_name)
|
|
264
|
+
if handler is None:
|
|
265
|
+
return ActionExecutionResult(
|
|
266
|
+
result=None,
|
|
267
|
+
exception=KeyError(f"action '{module_name}:{action_name}' not registered"),
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
# Deserialize kwargs
|
|
271
|
+
kwargs = arguments_to_kwargs(dispatch.kwargs)
|
|
272
|
+
|
|
273
|
+
# Coerce dict arguments to Pydantic models or dataclasses based on type hints
|
|
274
|
+
# This is needed because the IR converts model constructor calls to dicts
|
|
275
|
+
kwargs = _coerce_kwargs_to_type_hints(handler, kwargs)
|
|
276
|
+
|
|
277
|
+
try:
|
|
278
|
+
async with provide_dependencies(handler, kwargs) as call_kwargs:
|
|
279
|
+
value = handler(**call_kwargs)
|
|
280
|
+
if asyncio.iscoroutine(value):
|
|
281
|
+
value = await value
|
|
282
|
+
return ActionExecutionResult(result=value)
|
|
283
|
+
except Exception as e:
|
|
284
|
+
return ActionExecutionResult(
|
|
285
|
+
result=None,
|
|
286
|
+
exception=e,
|
|
287
|
+
)
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: rappel
|
|
3
|
+
Version: 0.5.1
|
|
4
|
+
Summary: Distributed & durable background events in Python
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Requires-Dist: googleapis-common-protos>=1.72.0
|
|
7
|
+
Requires-Dist: grpcio<2,>=1.66
|
|
8
|
+
Requires-Dist: protobuf<6,>=5.29
|
|
9
|
+
Requires-Dist: pydantic<3,>=2
|
|
10
|
+
Provides-Extra: dev
|
|
11
|
+
Requires-Dist: pytest>=8; extra == 'dev'
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
|
|
14
|
+
# rappel
|
|
15
|
+
|
|
16
|
+

|
|
17
|
+
|
|
18
|
+
rappel is a library to let you build durable background tasks that withstand server restarts, task crashes, and long-running jobs. It's built for Python and Postgres without any additional deploy time requirements.
|
|
19
|
+
|
|
20
|
+
## Usage
|
|
21
|
+
|
|
22
|
+
Let's say you need to send welcome emails to a batch of users, but only the active ones. You want to fetch them all, filter out inactive accounts, then fan out emails in parallel. This is how you write that workflow in rappel:
|
|
23
|
+
|
|
24
|
+
```python
|
|
25
|
+
import asyncio
|
|
26
|
+
from rappel import Workflow, action, workflow
|
|
27
|
+
|
|
28
|
+
@workflow
|
|
29
|
+
class WelcomeEmailWorkflow(Workflow):
|
|
30
|
+
async def run(self, user_ids: list[str]) -> list[EmailResult]:
|
|
31
|
+
users = await fetch_users(user_ids)
|
|
32
|
+
active_users = [user for user in users if user.active]
|
|
33
|
+
|
|
34
|
+
results = await asyncio.gather(*[
|
|
35
|
+
send_email(to=user.email, subject="Welcome")
|
|
36
|
+
for user in active_users
|
|
37
|
+
])
|
|
38
|
+
|
|
39
|
+
return results
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
And here's how you define the actions distributed to your worker cluster:
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
@action
|
|
46
|
+
async def fetch_users(
|
|
47
|
+
user_ids: list[str],
|
|
48
|
+
db: Annotated[Database, Depend(get_db)],
|
|
49
|
+
) -> list[User]:
|
|
50
|
+
return await db.get_many(User, user_ids)
|
|
51
|
+
|
|
52
|
+
@action
|
|
53
|
+
async def send_email(
|
|
54
|
+
to: str,
|
|
55
|
+
subject: str,
|
|
56
|
+
emailer: Annotated[EmailClient, Depend(get_email_client)],
|
|
57
|
+
) -> EmailResult:
|
|
58
|
+
return await emailer.send(to=to, subject=subject)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
To kick off a background job and wait for completion:
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
async def welcome_users(user_ids: list[str]):
|
|
65
|
+
workflow = WelcomeEmailWorkflow()
|
|
66
|
+
await workflow.run(user_ids)
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
When you call `await workflow.run()`, we parse the AST of your `run()` method and compile it into the Rappel Runtime Language. The `for` loop becomes a filter node, the `asyncio.gather` becomes a parallel fan-out. None of this executes inline in your webserver, instead it's queued to Postgres and orchestrated by the Rust runtime across your worker cluster.
|
|
70
|
+
|
|
71
|
+
**Actions** are the distributed work: network calls, database queries, anything that can fail and should be retried independently.
|
|
72
|
+
|
|
73
|
+
**Workflows** are the control flow: loops, conditionals, parallel branches. They orchestrate actions but don't do heavy lifting themselves.
|
|
74
|
+
|
|
75
|
+
### Complex Workflows
|
|
76
|
+
|
|
77
|
+
Workflows can get much more complex than the example above:
|
|
78
|
+
|
|
79
|
+
1. Customizable retry policy
|
|
80
|
+
|
|
81
|
+
By default your Python code will execute like native logic would: any exceptions will throw and immediately fail. Actions are set to timeout after ~5min to keep the queues from backing up - although we will continuously retry timed out actions in case they were caused by a failed node in your cluster. If you want to control this logic to be more robust, you can set retry policies and backoff intervals so you can attempt the action multiple times until it succeeds.
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
from rappel import RetryPolicy, BackoffPolicy
|
|
85
|
+
from datetime import timedelta
|
|
86
|
+
|
|
87
|
+
async def run(self):
|
|
88
|
+
await self.run_action(
|
|
89
|
+
inconsistent_action(0.5),
|
|
90
|
+
# control handling of failures
|
|
91
|
+
retry=RetryPolicy(attempts=50),
|
|
92
|
+
backoff=BackoffPolicy(base_delay=5),
|
|
93
|
+
timeout=timedelta(minutes=10)
|
|
94
|
+
)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
1. Branching control flows
|
|
98
|
+
|
|
99
|
+
Use if statements, for loops, or any other Python primitives within the control logic. We will automatically detect these branches and compile them into a DAG node that gets executed just like your other actions.
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
async def run(self, user_id: str) -> Summary:
|
|
103
|
+
# loop + non-action helper call
|
|
104
|
+
top_spenders: list[float] = []
|
|
105
|
+
for record in summary.transactions.records:
|
|
106
|
+
if record.is_high_value:
|
|
107
|
+
top_spenders.append(record.amount)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
1. asyncio primitives
|
|
111
|
+
|
|
112
|
+
Use asyncio.gather to parallelize tasks. Use asyncio.sleep to sleep for a longer period of time.
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
import asyncio
|
|
116
|
+
|
|
117
|
+
async def run(self, user_id: str) -> Summary:
|
|
118
|
+
# parallelize independent actions with gather
|
|
119
|
+
profile, settings, history = await asyncio.gather(
|
|
120
|
+
fetch_profile(user_id=user_id),
|
|
121
|
+
fetch_settings(user_id=user_id),
|
|
122
|
+
fetch_purchase_history(user_id=user_id)
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
# wait before sending email
|
|
126
|
+
await asyncio.sleep(24*60*60)
|
|
127
|
+
recommendations = await email_ping(history)
|
|
128
|
+
|
|
129
|
+
return Summary(profile=profile, settings=settings, recommendations=recommendations)
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
### Error handling
|
|
133
|
+
|
|
134
|
+
To build truly robust background tasks, you need to consider how things can go wrong. Actions can 'fail' in a couple ways. This is supported by our `.run_action` syntax that allows users to provide additional parameters to modify the execution bounds on each action.
|
|
135
|
+
|
|
136
|
+
1. Action explicitly throws an error and we want to retry it. Caused by intermittent database connectivity / overloaded webservers / or simply buggy code will throw an error. This comes from a standard python `raise Exception()`
|
|
137
|
+
1. Actions raise an error that is a really a RappelTimeout. This indicates that we dequeued the task but weren't able to complete it in the time allocated. This could be because we dequeued the task, started work on it, then the server crashed. Or it could still be running in the background but simply took too much time. Either way we will raise a synthetic error that is representative of this execution.
|
|
138
|
+
|
|
139
|
+
By default we will only try explicit actions one time if there is an explicit exception raised. We will try them infinite times in the case of a timeout since this is usually caused by cross device coordination issues.
|
|
140
|
+
|
|
141
|
+
## Project Status
|
|
142
|
+
|
|
143
|
+
_NOTE: Right now you shouldn't use rappel in any production applications. The spec is changing too quickly and we don't guarantee backwards compatibility before 1.0.0. But we would love if you try it out in your side project and see how you find it._
|
|
144
|
+
|
|
145
|
+
Rappel is in an early alpha. Particular areas of focus include:
|
|
146
|
+
|
|
147
|
+
1. Finalizing the Rappel Runtime Language
|
|
148
|
+
1. Extending AST parsing logic to handle most core control flows
|
|
149
|
+
1. Performance tuning
|
|
150
|
+
1. Unit and integration tests
|
|
151
|
+
|
|
152
|
+
If you have a particular workflow that you think should be working but isn't yet producing the correct DAG (you can visualize it via CLI by `.visualize()`) please file an issue.
|
|
153
|
+
|
|
154
|
+
## Configuration
|
|
155
|
+
|
|
156
|
+
The main rappel configuration is done through env vars, which is what you'll typically use in production when using a docker deployment pipeline. If we can't find an environment parameter we will fallback to looking for an .env that specifies it within your local filesystem.
|
|
157
|
+
|
|
158
|
+
These are the primary environment parameters that you'll likely want to customize for your deployment:
|
|
159
|
+
|
|
160
|
+
| Environment Variable | Description | Default | Example |
|
|
161
|
+
|---------------------|-------------|---------|---------|
|
|
162
|
+
| `RAPPEL_DATABASE_URL` | PostgreSQL connection string for the rappel server | (required on bridge &workers ) | `postgresql://user:pass@localhost:5433/rappel` |
|
|
163
|
+
| `RAPPEL_WORKER_COUNT` | Number of Python worker processes | `num_cpus` | `8` |
|
|
164
|
+
| `RAPPEL_CONCURRENT_PER_WORKER` | Max concurrent actions per worker | `10` | `20` |
|
|
165
|
+
| `RAPPEL_USER_MODULE` | Python module preloaded into each worker | none | `my_app.actions` |
|
|
166
|
+
| `RAPPEL_POLL_INTERVAL_MS` | Poll interval for the dispatch loop (ms) | `100` | `50` |
|
|
167
|
+
| `RAPPEL_WEBAPP_ENABLED` | Enable the web dashboard | `false` | `true` |
|
|
168
|
+
| `RAPPEL_WEBAPP_ADDR` | Web dashboard bind address | `0.0.0.0:24119` | `0.0.0.0:8080` |
|
|
169
|
+
|
|
170
|
+
We expect that you won't need to modify the following env parameters, but we provide them for convenience:
|
|
171
|
+
|
|
172
|
+
| Environment Variable | Description | Default | Example |
|
|
173
|
+
|---------------------|-------------|---------|---------|
|
|
174
|
+
| `RAPPEL_HTTP_ADDR` | HTTP bind address for `rappel-bridge` | `127.0.0.1:24117` | `0.0.0.0:24117` |
|
|
175
|
+
| `RAPPEL_GRPC_ADDR` | gRPC bind address for `rappel-bridge` | HTTP port + 1 | `0.0.0.0:24118` |
|
|
176
|
+
| `RAPPEL_BATCH_SIZE` | Max actions fetched per poll | `workers * concurrent_per_worker` | `200` |
|
|
177
|
+
|
|
178
|
+
## Philosophy
|
|
179
|
+
|
|
180
|
+
Background jobs in webapps are so frequently used that they should really be a primitive of your fullstack library: database, backend, frontend, _and_ background jobs. Otherwise you're stuck in a situation where users either have to always make blocking requests to an API or you spin up ephemeral tasks that will be killed during re-deployments or an accidental docker crash.
|
|
181
|
+
|
|
182
|
+
After trying most of the ecosystem in the last 3 years, I believe background jobs should provide a few key features:
|
|
183
|
+
|
|
184
|
+
- Easy to write control flow in normal Python
|
|
185
|
+
- Should be both very simple to test locally and very simple to deploy remotely
|
|
186
|
+
- Reasonable default configurations to scale to a reasonable request volume without performance tuning
|
|
187
|
+
|
|
188
|
+
On the point of control flow, we shouldn't be forced into a DAG definition (decorators, custom syntax). It should be regular control flow just distinguished because the flows are durable and because some portions of the parallelism can be run across machines.
|
|
189
|
+
|
|
190
|
+
Nothing on the market provides this balance - `rappel` aims to try. We don't expect ourselves to reach best in class functionality for load performance. Instead we intend for this to scale _most_ applications well past product market fit.
|
|
191
|
+
|
|
192
|
+
## How It Works
|
|
193
|
+
|
|
194
|
+
Rappel takes a different approach from replay-based workflow engines like Temporal or Vercel Workflow.
|
|
195
|
+
|
|
196
|
+
| Approach | How it works | Constraint on users |
|
|
197
|
+
|----------|-------------|-------------------|
|
|
198
|
+
| **Temporal/Vercel Workflows** | Replay-based. Your workflow code re-executes from the beginning on each step; completed activities return cached results. | Code must be deterministic. No `random()`, no `datetime.now()`, no side effects in workflow logic. |
|
|
199
|
+
| **Rappel** | Compile-once. Parse your Python AST → intermediate representation → DAG. Execute the DAG directly. Your code never re-runs. | Code must use supported patterns. But once parsed, a node is self-aware where it lives in the computation graph. |
|
|
200
|
+
|
|
201
|
+
When you decorate a class with `@workflow`, Rappel parses the `run()` method's AST and compiles it to an intermediate representation (IR). This IR captures your control flow—loops, conditionals, parallel branches—as a static directed graph. The DAG is stored in Postgres and executed by the Rust runtime. Your original Python run definition is never re-executed during workflow recovery.
|
|
202
|
+
|
|
203
|
+
This is convenient in practice because it means that if your workflow compiles, your workflow will run as advertised. There's no need to hack around stdlib functions that are non-deterministic (like time/uuid/etc) because you'll get an error on compilation to switch these into an explicit `@action` where all non-determinism should live.
|
|
204
|
+
|
|
205
|
+
## Other options
|
|
206
|
+
|
|
207
|
+
**When should you use Rappel?**
|
|
208
|
+
|
|
209
|
+
- You're already using Python & Postgres for the core of your stack, either with Mountaineer or FastAPI
|
|
210
|
+
- You have a lot of async heavy logic that needs to be durable and can be retried if it fails (common with 3rd party API calls, db jobs, etc)
|
|
211
|
+
- You want something that works the same locally as when deployed remotely
|
|
212
|
+
- You want background job code to plug and play with your existing unit test & static analysis stack
|
|
213
|
+
- You are focused on getting to product market fit versus scale
|
|
214
|
+
|
|
215
|
+
Performance is a top priority of rappel. That's why it's written with a Rust core, is lightweight on your database connection by isolating them to ~1 pool per machine host, and runs continuous benchmarks on CI. But it's not the _only_ priority. After all there's only so much we can do with Postgres as an ACID backing store. Once you start to tax Postgres' capabilities you're probably at the scale where you should switch to a more complicated architecture.
|
|
216
|
+
|
|
217
|
+
**When shouldn't you?**
|
|
218
|
+
|
|
219
|
+
- You have particularly latency sensitive background jobs, where you need <100ms acknowledgement and handling of each task.
|
|
220
|
+
- You have a huge scale of concurrent background jobs, order of magnitude >10k actions being coordinated concurrently.
|
|
221
|
+
- You have tried some existing task coordinators and need to scale your solution to the next 10x worth of traffic.
|
|
222
|
+
|
|
223
|
+
There is no shortage of robust background queues in Python, including ones like Temporal.io/RabbitMQ that scale to millions of requests a second.
|
|
224
|
+
|
|
225
|
+
Almost all of these require a dedicated task broker that you host alongside your app. This usually isn't a huge deal during POCs but can get complex as you need to performance tune it for production. Cloud hosting of most of these are billed per-event and can get very expensive depending on how you orchestrate your jobs. They also typically force you to migrate your logic to fit the conventions of the framework.
|
|
226
|
+
|
|
227
|
+
Open source solutions like RabbitMQ have been battle tested over decades & large companies like Temporal are able to throw a lot of resources towards optimization. Both of these solutions are great choices - just intended to solve for different scopes. Expect an associated higher amount of setup and management complexity.
|
|
228
|
+
|
|
229
|
+
## Worker Pool
|
|
230
|
+
|
|
231
|
+
`start-workers` is the main invocation point to boot your worker cluster on a new node. It launches the gRPC bridge plus a polling dispatcher that streams
|
|
232
|
+
queued actions from Postgres into the Python workers. You should use this as your docker entrypoint:
|
|
233
|
+
|
|
234
|
+
```bash
|
|
235
|
+
$ cargo run --bin start-workers
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
## Development
|
|
239
|
+
|
|
240
|
+
### Packaging
|
|
241
|
+
|
|
242
|
+
Use the helper script to produce distributable wheels that bundle the Rust executables with the
|
|
243
|
+
Python package:
|
|
244
|
+
|
|
245
|
+
```bash
|
|
246
|
+
$ uv run scripts/build_wheel.py --out-dir target/wheels
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
The script compiles every Rust binary (release profile), stages the required entrypoints
|
|
250
|
+
(`rappel-bridge`, `boot-rappel-singleton`) inside the Python package, and invokes
|
|
251
|
+
`uv build --wheel` to produce an artifact suitable for publishing to PyPI.
|
|
252
|
+
|
|
253
|
+
### Local Server Runtime
|
|
254
|
+
|
|
255
|
+
The Rust runtime exposes both HTTP and gRPC APIs via the `rappel-bridge` binary:
|
|
256
|
+
|
|
257
|
+
```bash
|
|
258
|
+
$ cargo run --bin rappel-bridge
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
Developers can either launch it directly or rely on the `boot-rappel-singleton` helper which finds (or starts) a single shared instance on
|
|
262
|
+
`127.0.0.1:24117`. The helper prints the active HTTP port to stdout so Python clients can connect without additional
|
|
263
|
+
configuration:
|
|
264
|
+
|
|
265
|
+
```bash
|
|
266
|
+
$ cargo run --bin boot-rappel-singleton
|
|
267
|
+
24117
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
The Python bridge automatically shells out to the helper unless you provide `RAPPEL_SERVER_URL`
|
|
271
|
+
(`RAPPEL_GRPC_ADDR` for direct sockets) overrides. Once the ports are known it opens a gRPC channel to the
|
|
272
|
+
`WorkflowService`.
|
|
273
|
+
|
|
274
|
+
### Benchmarking
|
|
275
|
+
|
|
276
|
+
Stream benchmark output directly into our parser to summarize throughput and latency samples:
|
|
277
|
+
|
|
278
|
+
```bash
|
|
279
|
+
$ cargo run --bin bench -- \
|
|
280
|
+
--messages 100000 \
|
|
281
|
+
--payload 1024 \
|
|
282
|
+
--concurrency 64 \
|
|
283
|
+
--workers 4 \
|
|
284
|
+
--log-interval 15 \
|
|
285
|
+
uv run python/tools/parse_bench_logs.py
|
|
286
|
+
|
|
287
|
+
The `bench` binary seeds raw actions to measure dequeue/execute/ack throughput. Use `bench_instances` for an end-to-end workflow run (queueing and executing full workflow instances via the scheduler) without installing a separate `rappel-worker` binary—the harness shells out to `uv run python -m rappel.worker` automatically:
|
|
288
|
+
|
|
289
|
+
```bash
|
|
290
|
+
$ cargo run --bin bench_instances -- \
|
|
291
|
+
--instances 200 \
|
|
292
|
+
--batch-size 4 \
|
|
293
|
+
--payload-size 1024 \
|
|
294
|
+
--concurrency 64 \
|
|
295
|
+
--workers 4
|
|
296
|
+
```
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
Add `--json` to the parser if you prefer JSON output.
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
proto/ast_pb2.py,sha256=W3b0smmQe7cHotEj9eVWo5iFJLG8snzh1URa8_3oYis,12866
|
|
2
|
+
proto/ast_pb2.pyi,sha256=b0a2vy6QoeW1YCMOy4_VLxd2Pj_P-WbK1uwZ99BEkN4,47381
|
|
3
|
+
proto/ast_pb2_grpc.py,sha256=KJj-_ba1ZxWWYph-D7UShEDiZ2g3HrjffT-Ch9wtir0,868
|
|
4
|
+
proto/ast_pb2_grpc.pyi,sha256=ItuwUBKMrD0GfjDvwsarqytfyBpzDqQ_TMiJ0FNIoog,466
|
|
5
|
+
proto/messages_pb2.py,sha256=MdksC6bnrrsgCC-8FDMFsrpzbGAJvSuu7XM71C4epQw,12650
|
|
6
|
+
proto/messages_pb2.pyi,sha256=Wb65R4JmyWQYsoG2pgE4SrR7Sf7drWuGjDM2GpM1P_0,39759
|
|
7
|
+
proto/messages_pb2_grpc.py,sha256=6akfrXSv9w5oaWmMcnxUFilM5p_fIEIowqueUp3bAqk,15846
|
|
8
|
+
proto/messages_pb2_grpc.pyi,sha256=aN1cqB6D4Cn8npN_bh9Rz-v0VXmljsQYjf3zCM5FjrU,12322
|
|
9
|
+
rappel/__init__.py,sha256=LKQpsR-Pj6wbu02DYYLr6lUTgsWDznQ8GIzu_ezyeL0,1335
|
|
10
|
+
rappel/actions.py,sha256=ayfsBMU4rXR8Qnz6Sl7D01M7nGz42fr3Sf0c9yM2NPk,3847
|
|
11
|
+
rappel/bridge.py,sha256=wPBNsjK1iWqTZgerKbbMz3Z_UA5GPavFk_N4H9dniQc,8128
|
|
12
|
+
rappel/dependencies.py,sha256=W9GpaLFc1vfQ3TsIMjvebOICTpQ0-tNInURmPt7BjIY,5640
|
|
13
|
+
rappel/exceptions.py,sha256=MvX6HUxjMcHBq-pEnKRubhdMwA8irSiWvm3VVyyjn5Y,337
|
|
14
|
+
rappel/formatter.py,sha256=qLmnVa_hGjv6ydw4lDVq3n6FMs-Rpk9Zz9AqW23IGII,3113
|
|
15
|
+
rappel/ir_builder.py,sha256=W2KGHkB-8HxNQybFN_y4FxgH5FquLjTAkTfBtVOT8t4,118064
|
|
16
|
+
rappel/logger.py,sha256=NXK2GM4k_zPFG9yUJKjR9B3UZth2QWc2nts9IQU1s2Q,1118
|
|
17
|
+
rappel/registry.py,sha256=-bLrFPcLNYYKTjqPMWuerSuwkOpGqmzzEsXiuPPrcZA,3526
|
|
18
|
+
rappel/schedule.py,sha256=8YhhFT4iwagiZsCyHop7BsX0MmCpsbjFiMrjsokcYr0,10923
|
|
19
|
+
rappel/serialization.py,sha256=1p-Qp_5nxlRGF9-0c2ZlVnFxI5TVodw1HFv79RBO8MA,9891
|
|
20
|
+
rappel/worker.py,sha256=W-zTY25cR72jX-ubVmNipFiR52EDmcamndoFmAj4DOw,6836
|
|
21
|
+
rappel/workflow.py,sha256=oks205Lb8_A581v2DlawbYf98duQo-amgtcczItKvM8,8061
|
|
22
|
+
rappel/workflow_runtime.py,sha256=fdFQEh6uJ2wOfevS7x_ZtTqAXpufGchJnt950x5vpfQ,9038
|
|
23
|
+
rappel/bin/boot-rappel-singleton,sha256=i-PLDg_mq-2yE3jr_XbVIsEeyjBffTeyuU7-tq8a9qY,6431168
|
|
24
|
+
rappel/bin/rappel-bridge,sha256=b2SOggxeugwb-beshDm6EYZWQEXTrbBbY0Tpb8h2mng,11047152
|
|
25
|
+
rappel/bin/start-workers,sha256=vFr1ifMojZJv0KbtFp_8feUtH4F9kY1ppS36JeyVSzo,19272296
|
|
26
|
+
rappel-0.5.1.dist-info/METADATA,sha256=l8zTZgAgJZYQfUhaFcPVgdSI0w7ZFGAyen02d4oMe0s,15738
|
|
27
|
+
rappel-0.5.1.dist-info/entry_points.txt,sha256=h9D-AufOUWpdE7XjnyZyQCc-kER-ZIKj1Jryc1JNL_I,53
|
|
28
|
+
rappel-0.5.1.dist-info/RECORD,,
|
|
29
|
+
rappel-0.5.1.data/scripts/rappel-bridge,sha256=b2SOggxeugwb-beshDm6EYZWQEXTrbBbY0Tpb8h2mng,11047152
|
|
30
|
+
rappel-0.5.1.data/scripts/boot-rappel-singleton,sha256=i-PLDg_mq-2yE3jr_XbVIsEeyjBffTeyuU7-tq8a9qY,6431168
|
|
31
|
+
rappel-0.5.1.data/scripts/start-workers,sha256=vFr1ifMojZJv0KbtFp_8feUtH4F9kY1ppS36JeyVSzo,19272296
|
|
32
|
+
rappel-0.5.1.dist-info/WHEEL,sha256=74UJPlMKWRubwKveoWxggMGihpugjX6MX_YPU_nD8sg,107
|