edda-framework 0.9.1__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edda/app.py +419 -26
- edda/integrations/mirascope/__init__.py +78 -0
- edda/integrations/mirascope/agent.py +467 -0
- edda/integrations/mirascope/call.py +166 -0
- edda/integrations/mirascope/decorator.py +163 -0
- edda/integrations/mirascope/types.py +268 -0
- edda/outbox/relayer.py +21 -2
- edda/storage/__init__.py +8 -0
- edda/storage/notify_base.py +162 -0
- edda/storage/pg_notify.py +325 -0
- edda/storage/protocol.py +9 -1
- edda/storage/sqlalchemy_storage.py +193 -13
- edda/viewer_ui/app.py +26 -0
- edda/viewer_ui/data_service.py +4 -0
- {edda_framework-0.9.1.dist-info → edda_framework-0.11.0.dist-info}/METADATA +17 -1
- {edda_framework-0.9.1.dist-info → edda_framework-0.11.0.dist-info}/RECORD +19 -12
- {edda_framework-0.9.1.dist-info → edda_framework-0.11.0.dist-info}/WHEEL +0 -0
- {edda_framework-0.9.1.dist-info → edda_framework-0.11.0.dist-info}/entry_points.txt +0 -0
- {edda_framework-0.9.1.dist-info → edda_framework-0.11.0.dist-info}/licenses/LICENSE +0 -0
edda/app.py
CHANGED
|
@@ -8,10 +8,13 @@ application for handling CloudEvents and executing workflows.
|
|
|
8
8
|
import asyncio
|
|
9
9
|
import json
|
|
10
10
|
import logging
|
|
11
|
+
import math
|
|
11
12
|
import random
|
|
12
13
|
import sys
|
|
14
|
+
import time
|
|
13
15
|
from collections.abc import Callable
|
|
14
|
-
from
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
from typing import Any, Literal
|
|
15
18
|
|
|
16
19
|
import uvloop
|
|
17
20
|
from cloudevents.exceptions import GenericException as CloudEventsException
|
|
@@ -55,6 +58,11 @@ class EddaApp:
|
|
|
55
58
|
pool_timeout: int = 30,
|
|
56
59
|
pool_recycle: int = 3600,
|
|
57
60
|
pool_pre_ping: bool = True,
|
|
61
|
+
# PostgreSQL LISTEN/NOTIFY settings
|
|
62
|
+
use_listen_notify: bool | None = None,
|
|
63
|
+
notify_fallback_interval: int = 30,
|
|
64
|
+
# Batch processing settings
|
|
65
|
+
max_workflows_per_batch: int | Literal["auto", "auto:cpu"] = 10,
|
|
58
66
|
):
|
|
59
67
|
"""
|
|
60
68
|
Initialize Edda application.
|
|
@@ -81,6 +89,17 @@ class EddaApp:
|
|
|
81
89
|
Helps prevent stale connections. Ignored for SQLite.
|
|
82
90
|
pool_pre_ping: If True, test connections before use (default: True).
|
|
83
91
|
Helps detect disconnected connections. Ignored for SQLite.
|
|
92
|
+
use_listen_notify: Enable PostgreSQL LISTEN/NOTIFY for instant notifications.
|
|
93
|
+
None (default) = auto-detect (enabled for PostgreSQL, disabled for others).
|
|
94
|
+
True = force enable (raises error if not PostgreSQL).
|
|
95
|
+
False = force disable (use polling only).
|
|
96
|
+
notify_fallback_interval: Polling interval in seconds when NOTIFY is enabled.
|
|
97
|
+
Used as backup for missed notifications. Default: 30 seconds.
|
|
98
|
+
SQLite/MySQL always use their default polling intervals.
|
|
99
|
+
max_workflows_per_batch: Maximum workflows to process per resume cycle.
|
|
100
|
+
- int: Fixed batch size (default: 10)
|
|
101
|
+
- "auto": Scale 10-100 based on queue depth
|
|
102
|
+
- "auto:cpu": Scale 10-100 based on CPU utilization (requires psutil)
|
|
84
103
|
"""
|
|
85
104
|
self.db_url = db_url
|
|
86
105
|
self.service_name = service_name
|
|
@@ -99,6 +118,12 @@ class EddaApp:
|
|
|
99
118
|
self._pool_recycle = pool_recycle
|
|
100
119
|
self._pool_pre_ping = pool_pre_ping
|
|
101
120
|
|
|
121
|
+
# PostgreSQL LISTEN/NOTIFY settings
|
|
122
|
+
self._use_listen_notify = use_listen_notify
|
|
123
|
+
self._notify_fallback_interval = notify_fallback_interval
|
|
124
|
+
self._notify_listener: Any = None
|
|
125
|
+
self._notify_enabled = False
|
|
126
|
+
|
|
102
127
|
# Generate unique worker ID for this process
|
|
103
128
|
self.worker_id = generate_worker_id(service_name)
|
|
104
129
|
|
|
@@ -118,6 +143,31 @@ class EddaApp:
|
|
|
118
143
|
self._background_tasks: list[asyncio.Task[Any]] = []
|
|
119
144
|
self._initialized = False
|
|
120
145
|
|
|
146
|
+
# Wake event for notify-triggered background tasks
|
|
147
|
+
self._resume_wake_event: asyncio.Event | None = None
|
|
148
|
+
self._outbox_wake_event: asyncio.Event | None = None
|
|
149
|
+
|
|
150
|
+
# Rate limiting for NOTIFY handlers (to reduce thundering herd)
|
|
151
|
+
self._last_resume_notify_time: float = 0.0
|
|
152
|
+
self._last_outbox_notify_time: float = 0.0
|
|
153
|
+
self._notify_rate_limit: float = 0.1 # 100ms minimum interval
|
|
154
|
+
|
|
155
|
+
# Batch processing settings for load balancing
|
|
156
|
+
if isinstance(max_workflows_per_batch, int):
|
|
157
|
+
self._max_workflows_per_batch: int = max_workflows_per_batch
|
|
158
|
+
self._batch_size_strategy: str | None = None
|
|
159
|
+
elif max_workflows_per_batch == "auto":
|
|
160
|
+
self._max_workflows_per_batch = 10 # Initial value
|
|
161
|
+
self._batch_size_strategy = "queue" # Scale based on queue depth
|
|
162
|
+
elif max_workflows_per_batch == "auto:cpu":
|
|
163
|
+
self._max_workflows_per_batch = 10 # Initial value
|
|
164
|
+
self._batch_size_strategy = "cpu" # Scale based on CPU utilization
|
|
165
|
+
else:
|
|
166
|
+
raise ValueError(
|
|
167
|
+
f"Invalid max_workflows_per_batch: {max_workflows_per_batch}. "
|
|
168
|
+
"Must be int, 'auto', or 'auto:cpu'."
|
|
169
|
+
)
|
|
170
|
+
|
|
121
171
|
def _create_storage(self, db_url: str) -> SQLAlchemyStorage:
|
|
122
172
|
"""
|
|
123
173
|
Create storage backend from database URL.
|
|
@@ -166,6 +216,162 @@ class EddaApp:
|
|
|
166
216
|
|
|
167
217
|
return SQLAlchemyStorage(engine)
|
|
168
218
|
|
|
219
|
+
def _is_postgresql_url(self, db_url: str) -> bool:
|
|
220
|
+
"""Check if the database URL is for PostgreSQL."""
|
|
221
|
+
return db_url.startswith("postgresql")
|
|
222
|
+
|
|
223
|
+
async def _initialize_notify_listener(self) -> None:
|
|
224
|
+
"""Initialize PostgreSQL LISTEN/NOTIFY listener if applicable.
|
|
225
|
+
|
|
226
|
+
This sets up the notification system based on configuration:
|
|
227
|
+
- None (auto): Enable for PostgreSQL, disable for others
|
|
228
|
+
- True: Force enable (error if not PostgreSQL)
|
|
229
|
+
- False: Force disable
|
|
230
|
+
"""
|
|
231
|
+
is_pg = self._is_postgresql_url(self.db_url)
|
|
232
|
+
|
|
233
|
+
# Determine if we should use NOTIFY
|
|
234
|
+
if self._use_listen_notify is None:
|
|
235
|
+
# Auto-detect: enable for PostgreSQL only
|
|
236
|
+
should_use_notify = is_pg
|
|
237
|
+
elif self._use_listen_notify:
|
|
238
|
+
# Force enable: error if not PostgreSQL
|
|
239
|
+
if not is_pg:
|
|
240
|
+
raise ValueError(
|
|
241
|
+
"use_listen_notify=True requires PostgreSQL database. "
|
|
242
|
+
f"Current database URL starts with: {self.db_url.split(':')[0]}"
|
|
243
|
+
)
|
|
244
|
+
should_use_notify = True
|
|
245
|
+
else:
|
|
246
|
+
# Force disable
|
|
247
|
+
should_use_notify = False
|
|
248
|
+
|
|
249
|
+
if should_use_notify:
|
|
250
|
+
try:
|
|
251
|
+
from edda.storage.pg_notify import PostgresNotifyListener
|
|
252
|
+
|
|
253
|
+
# Convert SQLAlchemy URL to asyncpg DSN format
|
|
254
|
+
asyncpg_dsn = self._get_asyncpg_dsn(self.db_url)
|
|
255
|
+
|
|
256
|
+
self._notify_listener = PostgresNotifyListener(dsn=asyncpg_dsn)
|
|
257
|
+
await self._notify_listener.start()
|
|
258
|
+
|
|
259
|
+
# Set listener on storage for NOTIFY calls
|
|
260
|
+
self.storage.set_notify_listener(self._notify_listener)
|
|
261
|
+
|
|
262
|
+
# Initialize wake events for background tasks
|
|
263
|
+
self._resume_wake_event = asyncio.Event()
|
|
264
|
+
self._outbox_wake_event = asyncio.Event()
|
|
265
|
+
|
|
266
|
+
# Subscribe to notification channels
|
|
267
|
+
await self._setup_notify_subscriptions()
|
|
268
|
+
|
|
269
|
+
self._notify_enabled = True
|
|
270
|
+
logger.info(
|
|
271
|
+
"PostgreSQL LISTEN/NOTIFY enabled "
|
|
272
|
+
f"(fallback polling interval: {self._notify_fallback_interval}s)"
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
except ImportError:
|
|
276
|
+
logger.warning(
|
|
277
|
+
"asyncpg not installed, falling back to polling-only mode. "
|
|
278
|
+
"Install with: pip install edda[postgres-notify]"
|
|
279
|
+
)
|
|
280
|
+
self._notify_enabled = False
|
|
281
|
+
except Exception as e:
|
|
282
|
+
logger.warning(
|
|
283
|
+
f"Failed to initialize NOTIFY listener: {e}. "
|
|
284
|
+
"Falling back to polling-only mode."
|
|
285
|
+
)
|
|
286
|
+
self._notify_enabled = False
|
|
287
|
+
else:
|
|
288
|
+
db_type = self.db_url.split(":")[0]
|
|
289
|
+
logger.info(
|
|
290
|
+
f"LISTEN/NOTIFY not available for {db_type}, "
|
|
291
|
+
"using polling-only mode (default intervals)"
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
def _get_asyncpg_dsn(self, db_url: str) -> str:
|
|
295
|
+
"""Convert SQLAlchemy PostgreSQL URL to asyncpg DSN format.
|
|
296
|
+
|
|
297
|
+
SQLAlchemy format: postgresql+asyncpg://user:pass@host/db
|
|
298
|
+
asyncpg format: postgresql://user:pass@host/db
|
|
299
|
+
"""
|
|
300
|
+
# Remove +asyncpg driver suffix if present
|
|
301
|
+
if "+asyncpg" in db_url:
|
|
302
|
+
return db_url.replace("+asyncpg", "")
|
|
303
|
+
return db_url
|
|
304
|
+
|
|
305
|
+
async def _setup_notify_subscriptions(self) -> None:
|
|
306
|
+
"""Set up LISTEN subscriptions for notification channels."""
|
|
307
|
+
if self._notify_listener is None:
|
|
308
|
+
return
|
|
309
|
+
|
|
310
|
+
# Subscribe to workflow resumable notifications
|
|
311
|
+
await self._notify_listener.subscribe(
|
|
312
|
+
"edda_workflow_resumable",
|
|
313
|
+
self._on_workflow_resumable_notify,
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
# Subscribe to outbox notifications
|
|
317
|
+
await self._notify_listener.subscribe(
|
|
318
|
+
"edda_outbox_pending",
|
|
319
|
+
self._on_outbox_pending_notify,
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
# Subscribe to timer expired notifications
|
|
323
|
+
await self._notify_listener.subscribe(
|
|
324
|
+
"edda_timer_expired",
|
|
325
|
+
self._on_timer_expired_notify,
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
logger.debug("Subscribed to NOTIFY channels")
|
|
329
|
+
|
|
330
|
+
async def _on_workflow_resumable_notify(self, _payload: str) -> None:
|
|
331
|
+
"""Handle workflow resumable notification with rate limiting."""
|
|
332
|
+
try:
|
|
333
|
+
# Rate limit to reduce thundering herd
|
|
334
|
+
now = time.monotonic()
|
|
335
|
+
if now - self._last_resume_notify_time < self._notify_rate_limit:
|
|
336
|
+
return # Skip if within rate limit window
|
|
337
|
+
self._last_resume_notify_time = now
|
|
338
|
+
|
|
339
|
+
# Wake up the resume polling loop
|
|
340
|
+
if self._resume_wake_event is not None:
|
|
341
|
+
self._resume_wake_event.set()
|
|
342
|
+
except Exception as e:
|
|
343
|
+
logger.warning(f"Error handling workflow resumable notify: {e}")
|
|
344
|
+
|
|
345
|
+
async def _on_outbox_pending_notify(self, _payload: str) -> None:
|
|
346
|
+
"""Handle outbox pending notification with rate limiting."""
|
|
347
|
+
try:
|
|
348
|
+
# Rate limit to reduce thundering herd
|
|
349
|
+
now = time.monotonic()
|
|
350
|
+
if now - self._last_outbox_notify_time < self._notify_rate_limit:
|
|
351
|
+
return # Skip if within rate limit window
|
|
352
|
+
self._last_outbox_notify_time = now
|
|
353
|
+
|
|
354
|
+
# Wake up the outbox polling loop
|
|
355
|
+
if self._outbox_wake_event is not None:
|
|
356
|
+
self._outbox_wake_event.set()
|
|
357
|
+
except Exception as e:
|
|
358
|
+
logger.warning(f"Error handling outbox pending notify: {e}")
|
|
359
|
+
|
|
360
|
+
async def _on_timer_expired_notify(self, _payload: str) -> None:
|
|
361
|
+
"""Handle timer expired notification with rate limiting."""
|
|
362
|
+
try:
|
|
363
|
+
# Rate limit (shares with workflow resumable since they use same event)
|
|
364
|
+
now = time.monotonic()
|
|
365
|
+
if now - self._last_resume_notify_time < self._notify_rate_limit:
|
|
366
|
+
return # Skip if within rate limit window
|
|
367
|
+
self._last_resume_notify_time = now
|
|
368
|
+
|
|
369
|
+
# Wake up the resume polling loop (timer expiry leads to workflow resume)
|
|
370
|
+
if self._resume_wake_event is not None:
|
|
371
|
+
self._resume_wake_event.set()
|
|
372
|
+
except Exception as e:
|
|
373
|
+
logger.warning(f"Error handling timer expired notify: {e}")
|
|
374
|
+
|
|
169
375
|
async def initialize(self) -> None:
|
|
170
376
|
"""
|
|
171
377
|
Initialize the application.
|
|
@@ -185,6 +391,9 @@ class EddaApp:
|
|
|
185
391
|
# Initialize storage
|
|
186
392
|
await self.storage.initialize()
|
|
187
393
|
|
|
394
|
+
# Initialize LISTEN/NOTIFY if enabled
|
|
395
|
+
await self._initialize_notify_listener()
|
|
396
|
+
|
|
188
397
|
# Initialize replay engine
|
|
189
398
|
self.replay_engine = ReplayEngine(
|
|
190
399
|
storage=self.storage,
|
|
@@ -200,12 +409,17 @@ class EddaApp:
|
|
|
200
409
|
# Initialize outbox relayer if enabled
|
|
201
410
|
if self.outbox_enabled:
|
|
202
411
|
assert self.broker_url is not None # Validated in __init__
|
|
412
|
+
# Use longer poll interval with NOTIFY fallback
|
|
413
|
+
outbox_poll_interval = (
|
|
414
|
+
float(self._notify_fallback_interval) if self._notify_enabled else 1.0
|
|
415
|
+
)
|
|
203
416
|
self.outbox_relayer = OutboxRelayer(
|
|
204
417
|
storage=self.storage,
|
|
205
418
|
broker_url=self.broker_url,
|
|
206
|
-
poll_interval=
|
|
419
|
+
poll_interval=outbox_poll_interval,
|
|
207
420
|
max_retries=3,
|
|
208
421
|
batch_size=10,
|
|
422
|
+
wake_event=self._outbox_wake_event,
|
|
209
423
|
)
|
|
210
424
|
await self.outbox_relayer.start()
|
|
211
425
|
|
|
@@ -227,6 +441,14 @@ class EddaApp:
|
|
|
227
441
|
if self.outbox_relayer:
|
|
228
442
|
await self.outbox_relayer.stop()
|
|
229
443
|
|
|
444
|
+
# Stop NOTIFY listener if enabled
|
|
445
|
+
if self._notify_listener is not None:
|
|
446
|
+
try:
|
|
447
|
+
await self._notify_listener.stop()
|
|
448
|
+
logger.info("NOTIFY listener stopped")
|
|
449
|
+
except Exception as e:
|
|
450
|
+
logger.warning(f"Error stopping NOTIFY listener: {e}")
|
|
451
|
+
|
|
230
452
|
# Cancel background tasks
|
|
231
453
|
for task in self._background_tasks:
|
|
232
454
|
task.cancel()
|
|
@@ -588,9 +810,18 @@ class EddaApp:
|
|
|
588
810
|
logger.warning("No activity_id in timer for %s, skipping", instance_id)
|
|
589
811
|
continue
|
|
590
812
|
|
|
591
|
-
#
|
|
592
|
-
#
|
|
593
|
-
|
|
813
|
+
# Check if workflow is registered in this worker BEFORE acquiring lock
|
|
814
|
+
# In multi-app environments, another worker may own this workflow
|
|
815
|
+
from edda.workflow import get_all_workflows
|
|
816
|
+
|
|
817
|
+
workflows = get_all_workflows()
|
|
818
|
+
if workflow_name not in workflows:
|
|
819
|
+
logger.debug(
|
|
820
|
+
"Skipping timer for unregistered workflow: " "instance_id=%s, workflow_name=%s",
|
|
821
|
+
instance_id,
|
|
822
|
+
workflow_name,
|
|
823
|
+
)
|
|
824
|
+
continue # Let another worker handle it
|
|
594
825
|
|
|
595
826
|
# Distributed Coroutines: Acquire lock FIRST to prevent race conditions
|
|
596
827
|
# This ensures only ONE pod processes this timer, even if multiple pods
|
|
@@ -709,6 +940,25 @@ class EddaApp:
|
|
|
709
940
|
instance_id = subscription["instance_id"]
|
|
710
941
|
channel = subscription["channel"]
|
|
711
942
|
timeout_at = subscription["timeout_at"]
|
|
943
|
+
workflow_name = subscription.get("workflow_name")
|
|
944
|
+
|
|
945
|
+
if not workflow_name:
|
|
946
|
+
logger.warning("No workflow_name in subscription for %s, skipping", instance_id)
|
|
947
|
+
continue
|
|
948
|
+
|
|
949
|
+
# Check if workflow is registered in this worker BEFORE acquiring lock
|
|
950
|
+
# In multi-app environments, another worker may own this workflow
|
|
951
|
+
from edda.workflow import get_all_workflows
|
|
952
|
+
|
|
953
|
+
workflows = get_all_workflows()
|
|
954
|
+
if workflow_name not in workflows:
|
|
955
|
+
logger.debug(
|
|
956
|
+
"Skipping message subscription for unregistered workflow: "
|
|
957
|
+
"instance_id=%s, workflow_name=%s",
|
|
958
|
+
instance_id,
|
|
959
|
+
workflow_name,
|
|
960
|
+
)
|
|
961
|
+
continue # Let another worker handle it
|
|
712
962
|
|
|
713
963
|
# Lock-First pattern: Try to acquire the lock before processing
|
|
714
964
|
# If we can't get the lock, another worker is processing this workflow
|
|
@@ -778,14 +1028,6 @@ class EddaApp:
|
|
|
778
1028
|
|
|
779
1029
|
# 3. Resume workflow (lock already held - distributed coroutine pattern)
|
|
780
1030
|
# The workflow will replay and receive() will raise TimeoutError from cached history
|
|
781
|
-
workflow_name = subscription.get("workflow_name")
|
|
782
|
-
if not workflow_name:
|
|
783
|
-
logger.warning(
|
|
784
|
-
"No workflow_name in subscription for %s, skipping",
|
|
785
|
-
instance_id,
|
|
786
|
-
)
|
|
787
|
-
continue
|
|
788
|
-
|
|
789
1031
|
if self.replay_engine is None:
|
|
790
1032
|
logger.error("Replay engine not initialized")
|
|
791
1033
|
continue
|
|
@@ -830,27 +1072,51 @@ class EddaApp:
|
|
|
830
1072
|
|
|
831
1073
|
This provides fast resumption after message delivery. When deliver_message()
|
|
832
1074
|
sets a workflow's status to 'running' and releases the lock, this task
|
|
833
|
-
will pick it up
|
|
1075
|
+
will pick it up and resume it.
|
|
834
1076
|
|
|
835
|
-
|
|
836
|
-
-
|
|
1077
|
+
When NOTIFY is enabled:
|
|
1078
|
+
- Wakes up immediately when notified via _resume_wake_event
|
|
1079
|
+
- Falls back to notify_fallback_interval (default 30s) if no notifications
|
|
1080
|
+
|
|
1081
|
+
When NOTIFY is disabled (SQLite/MySQL):
|
|
1082
|
+
- Uses adaptive backoff to reduce DB load when no workflows are ready
|
|
1083
|
+
- When workflows are processed, uses base interval (1s)
|
|
837
1084
|
- When no workflows found, exponentially backs off up to 60 seconds
|
|
838
1085
|
- Always adds jitter to prevent thundering herd in multi-pod deployments
|
|
839
1086
|
|
|
840
1087
|
Args:
|
|
841
|
-
interval:
|
|
1088
|
+
interval: Base check interval in seconds (default: 1)
|
|
842
1089
|
"""
|
|
843
1090
|
consecutive_empty = 0 # Track empty results for adaptive backoff
|
|
1091
|
+
|
|
1092
|
+
# Use longer fallback interval when NOTIFY is enabled
|
|
1093
|
+
effective_interval = self._notify_fallback_interval if self._notify_enabled else interval
|
|
1094
|
+
|
|
844
1095
|
while True:
|
|
845
1096
|
try:
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
1097
|
+
if self._notify_enabled and self._resume_wake_event is not None:
|
|
1098
|
+
# NOTIFY mode: wait for event or timeout
|
|
1099
|
+
jitter = random.uniform(0, effective_interval * 0.1)
|
|
1100
|
+
try:
|
|
1101
|
+
await asyncio.wait_for(
|
|
1102
|
+
self._resume_wake_event.wait(),
|
|
1103
|
+
timeout=effective_interval + jitter,
|
|
1104
|
+
)
|
|
1105
|
+
# Clear the event for next notification
|
|
1106
|
+
self._resume_wake_event.clear()
|
|
1107
|
+
logger.debug("Resume task woken by NOTIFY")
|
|
1108
|
+
except TimeoutError:
|
|
1109
|
+
# Fallback polling timeout reached
|
|
1110
|
+
pass
|
|
851
1111
|
else:
|
|
852
|
-
|
|
853
|
-
|
|
1112
|
+
# Polling mode: adaptive backoff
|
|
1113
|
+
jitter = random.uniform(0, interval * 0.3)
|
|
1114
|
+
if consecutive_empty > 0:
|
|
1115
|
+
# Exponential backoff: 2s, 4s, 8s, 16s, 32s, max 60s
|
|
1116
|
+
backoff = min(interval * (2 ** min(consecutive_empty, 5)), 60)
|
|
1117
|
+
else:
|
|
1118
|
+
backoff = interval
|
|
1119
|
+
await asyncio.sleep(backoff + jitter)
|
|
854
1120
|
|
|
855
1121
|
count = await self._resume_running_workflows()
|
|
856
1122
|
if count == 0:
|
|
@@ -861,6 +1127,58 @@ class EddaApp:
|
|
|
861
1127
|
consecutive_empty = 0 # Reset on error
|
|
862
1128
|
logger.error("Error in periodic resume check: %s", e, exc_info=True)
|
|
863
1129
|
|
|
1130
|
+
def _calculate_effective_batch_size(self, pending_count: int) -> int:
|
|
1131
|
+
"""
|
|
1132
|
+
Calculate the effective batch size based on the configured strategy.
|
|
1133
|
+
|
|
1134
|
+
Args:
|
|
1135
|
+
pending_count: Number of resumable workflows detected in the previous cycle.
|
|
1136
|
+
|
|
1137
|
+
Returns:
|
|
1138
|
+
Effective batch size to use for the next cycle.
|
|
1139
|
+
|
|
1140
|
+
Strategies:
|
|
1141
|
+
- None (static): Returns the configured _max_workflows_per_batch
|
|
1142
|
+
- "queue": Scales 10-100 based on queue depth
|
|
1143
|
+
- "cpu": Scales 10-100 based on CPU utilization (requires psutil)
|
|
1144
|
+
"""
|
|
1145
|
+
if self._batch_size_strategy is None:
|
|
1146
|
+
return self._max_workflows_per_batch
|
|
1147
|
+
|
|
1148
|
+
base_size = 10
|
|
1149
|
+
max_size = 100
|
|
1150
|
+
|
|
1151
|
+
if self._batch_size_strategy == "queue":
|
|
1152
|
+
# Queue-based scaling: scale up when more workflows are waiting
|
|
1153
|
+
if pending_count <= base_size:
|
|
1154
|
+
return base_size
|
|
1155
|
+
scale_factor = min(math.ceil(pending_count / base_size), max_size // base_size)
|
|
1156
|
+
return min(base_size * scale_factor, max_size)
|
|
1157
|
+
|
|
1158
|
+
elif self._batch_size_strategy == "cpu":
|
|
1159
|
+
# CPU-based scaling: scale up when CPU is idle, down when busy
|
|
1160
|
+
try:
|
|
1161
|
+
import psutil # type: ignore[import-untyped]
|
|
1162
|
+
|
|
1163
|
+
cpu_percent = psutil.cpu_percent(interval=None) # Non-blocking
|
|
1164
|
+
|
|
1165
|
+
if cpu_percent < 30:
|
|
1166
|
+
return max_size # Low load: process aggressively
|
|
1167
|
+
elif cpu_percent < 50:
|
|
1168
|
+
return 50 # Medium load
|
|
1169
|
+
elif cpu_percent < 70:
|
|
1170
|
+
return 20 # Higher load
|
|
1171
|
+
else:
|
|
1172
|
+
return base_size # High load: process conservatively
|
|
1173
|
+
except ImportError:
|
|
1174
|
+
logger.warning(
|
|
1175
|
+
"psutil not installed, falling back to default batch size. "
|
|
1176
|
+
"Install with: pip install edda-framework[cpu-monitor]"
|
|
1177
|
+
)
|
|
1178
|
+
return self._max_workflows_per_batch
|
|
1179
|
+
|
|
1180
|
+
return self._max_workflows_per_batch
|
|
1181
|
+
|
|
864
1182
|
async def _resume_running_workflows(self) -> int:
|
|
865
1183
|
"""
|
|
866
1184
|
Find and resume workflows that are ready to run.
|
|
@@ -868,13 +1186,21 @@ class EddaApp:
|
|
|
868
1186
|
Finds workflows with status='running' that don't have a lock,
|
|
869
1187
|
acquires a lock, and resumes them.
|
|
870
1188
|
|
|
1189
|
+
Uses batch limiting to ensure fair load distribution across workers.
|
|
1190
|
+
Supports static batch size and dynamic auto-scaling strategies.
|
|
1191
|
+
|
|
871
1192
|
Returns:
|
|
872
1193
|
Number of workflows successfully processed (lock acquired and resumed).
|
|
873
1194
|
"""
|
|
874
|
-
|
|
1195
|
+
effective_batch = self._max_workflows_per_batch
|
|
1196
|
+
resumable = await self.storage.find_resumable_workflows(limit=effective_batch)
|
|
875
1197
|
processed_count = 0
|
|
876
1198
|
|
|
877
1199
|
for workflow_info in resumable:
|
|
1200
|
+
# Batch limit for load balancing across workers
|
|
1201
|
+
if processed_count >= effective_batch:
|
|
1202
|
+
break
|
|
1203
|
+
|
|
878
1204
|
instance_id = workflow_info["instance_id"]
|
|
879
1205
|
workflow_name = workflow_info["workflow_name"]
|
|
880
1206
|
|
|
@@ -882,7 +1208,7 @@ class EddaApp:
|
|
|
882
1208
|
# Try to acquire lock (Lock-First pattern)
|
|
883
1209
|
lock_acquired = await self.storage.try_acquire_lock(instance_id, self.worker_id)
|
|
884
1210
|
if not lock_acquired:
|
|
885
|
-
# Another worker got it first, skip
|
|
1211
|
+
# Another worker got it first, skip (doesn't count toward limit)
|
|
886
1212
|
continue
|
|
887
1213
|
|
|
888
1214
|
try:
|
|
@@ -901,6 +1227,10 @@ class EddaApp:
|
|
|
901
1227
|
except Exception as e:
|
|
902
1228
|
logger.error("Error resuming %s: %s", instance_id, e, exc_info=True)
|
|
903
1229
|
|
|
1230
|
+
# Update batch size for next cycle (auto modes only)
|
|
1231
|
+
if self._batch_size_strategy is not None:
|
|
1232
|
+
self._max_workflows_per_batch = self._calculate_effective_batch_size(len(resumable))
|
|
1233
|
+
|
|
904
1234
|
return processed_count
|
|
905
1235
|
|
|
906
1236
|
async def _cleanup_old_messages_periodically(
|
|
@@ -948,6 +1278,69 @@ class EddaApp:
|
|
|
948
1278
|
except Exception as e:
|
|
949
1279
|
logger.error("Error cleaning up old messages: %s", e, exc_info=True)
|
|
950
1280
|
|
|
1281
|
+
# -------------------------------------------------------------------------
|
|
1282
|
+
# Query API Methods
|
|
1283
|
+
# -------------------------------------------------------------------------
|
|
1284
|
+
|
|
1285
|
+
async def find_instances(
|
|
1286
|
+
self,
|
|
1287
|
+
*,
|
|
1288
|
+
input_filters: dict[str, Any] | None = None,
|
|
1289
|
+
status: str | None = None,
|
|
1290
|
+
workflow_name: str | None = None,
|
|
1291
|
+
instance_id: str | None = None,
|
|
1292
|
+
started_after: datetime | None = None,
|
|
1293
|
+
started_before: datetime | None = None,
|
|
1294
|
+
limit: int = 50,
|
|
1295
|
+
page_token: str | None = None,
|
|
1296
|
+
) -> dict[str, Any]:
|
|
1297
|
+
"""
|
|
1298
|
+
Find workflow instances with filtering support.
|
|
1299
|
+
|
|
1300
|
+
This is a high-level API for querying workflow instances by various
|
|
1301
|
+
criteria, including input parameter values.
|
|
1302
|
+
|
|
1303
|
+
Args:
|
|
1304
|
+
input_filters: Filter by input data values. Keys are JSON paths,
|
|
1305
|
+
values are expected values (exact match).
|
|
1306
|
+
Example: {"order_id": "ORD-123"}
|
|
1307
|
+
status: Filter by workflow status (e.g., "running", "completed")
|
|
1308
|
+
workflow_name: Filter by workflow name (partial match, case-insensitive)
|
|
1309
|
+
instance_id: Filter by instance ID (partial match, case-insensitive)
|
|
1310
|
+
started_after: Filter instances started after this datetime (inclusive)
|
|
1311
|
+
started_before: Filter instances started before this datetime (inclusive)
|
|
1312
|
+
limit: Maximum number of instances to return per page (default: 50)
|
|
1313
|
+
page_token: Cursor for pagination (from previous response)
|
|
1314
|
+
|
|
1315
|
+
Returns:
|
|
1316
|
+
Dictionary containing:
|
|
1317
|
+
- instances: List of matching workflow instances
|
|
1318
|
+
- next_page_token: Cursor for the next page, or None if no more pages
|
|
1319
|
+
- has_more: Boolean indicating if there are more pages
|
|
1320
|
+
|
|
1321
|
+
Example:
|
|
1322
|
+
>>> # Find all instances with order_id = "ORD-123"
|
|
1323
|
+
>>> result = await app.find_instances(input_filters={"order_id": "ORD-123"})
|
|
1324
|
+
>>> for instance in result["instances"]:
|
|
1325
|
+
... print(f"{instance['instance_id']}: {instance['status']}")
|
|
1326
|
+
|
|
1327
|
+
>>> # Find running instances with specific customer
|
|
1328
|
+
>>> result = await app.find_instances(
|
|
1329
|
+
... input_filters={"customer_id": "CUST-456"},
|
|
1330
|
+
... status="running"
|
|
1331
|
+
... )
|
|
1332
|
+
"""
|
|
1333
|
+
return await self.storage.list_instances(
|
|
1334
|
+
limit=limit,
|
|
1335
|
+
page_token=page_token,
|
|
1336
|
+
status_filter=status,
|
|
1337
|
+
workflow_name_filter=workflow_name,
|
|
1338
|
+
instance_id_filter=instance_id,
|
|
1339
|
+
started_after=started_after,
|
|
1340
|
+
started_before=started_before,
|
|
1341
|
+
input_filters=input_filters,
|
|
1342
|
+
)
|
|
1343
|
+
|
|
951
1344
|
# -------------------------------------------------------------------------
|
|
952
1345
|
# ASGI Interface
|
|
953
1346
|
# -------------------------------------------------------------------------
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Edda + Mirascope V2 integration for durable LLM calls.
|
|
3
|
+
|
|
4
|
+
This module provides utilities to make LLM calls durable through
|
|
5
|
+
Edda's activity system, enabling automatic caching, retry, and
|
|
6
|
+
crash recovery for LLM operations.
|
|
7
|
+
|
|
8
|
+
Example:
|
|
9
|
+
Using the decorator::
|
|
10
|
+
|
|
11
|
+
from edda import workflow, WorkflowContext
|
|
12
|
+
from edda.integrations.mirascope import durable_call
|
|
13
|
+
|
|
14
|
+
@durable_call("anthropic/claude-sonnet-4-20250514")
|
|
15
|
+
async def summarize(text: str) -> str:
|
|
16
|
+
return f"Summarize: {text}"
|
|
17
|
+
|
|
18
|
+
@workflow
|
|
19
|
+
async def my_workflow(ctx: WorkflowContext, text: str) -> str:
|
|
20
|
+
response = await summarize(ctx, text)
|
|
21
|
+
return response["content"]
|
|
22
|
+
|
|
23
|
+
Using the call function::
|
|
24
|
+
|
|
25
|
+
from edda import workflow, WorkflowContext
|
|
26
|
+
from edda.integrations.mirascope import call
|
|
27
|
+
|
|
28
|
+
@workflow
|
|
29
|
+
async def my_workflow(ctx: WorkflowContext, question: str) -> str:
|
|
30
|
+
response = await call(
|
|
31
|
+
ctx,
|
|
32
|
+
model="anthropic/claude-sonnet-4-20250514",
|
|
33
|
+
prompt=question,
|
|
34
|
+
)
|
|
35
|
+
return response["content"]
|
|
36
|
+
|
|
37
|
+
Using DurableAgent for context-aware conversations::
|
|
38
|
+
|
|
39
|
+
from dataclasses import dataclass
|
|
40
|
+
from mirascope import llm
|
|
41
|
+
from edda import workflow, WorkflowContext
|
|
42
|
+
from edda.integrations.mirascope import DurableAgent, DurableDeps
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class MyDeps:
|
|
46
|
+
documents: list[str]
|
|
47
|
+
|
|
48
|
+
class MyAgent(DurableAgent[MyDeps]):
|
|
49
|
+
model = "anthropic/claude-sonnet-4-20250514"
|
|
50
|
+
|
|
51
|
+
def build_prompt(self, ctx, message):
|
|
52
|
+
docs = "\\n".join(ctx.deps.documents)
|
|
53
|
+
return [
|
|
54
|
+
llm.messages.system(f"Documents:\\n{docs}"),
|
|
55
|
+
llm.messages.user(message),
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
@workflow
|
|
59
|
+
async def my_workflow(ctx: WorkflowContext, query: str) -> str:
|
|
60
|
+
deps = MyDeps(documents=["Doc 1", "Doc 2"])
|
|
61
|
+
agent = MyAgent(ctx)
|
|
62
|
+
response = await agent.chat(deps, query)
|
|
63
|
+
return response["content"]
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
from edda.integrations.mirascope.agent import DurableAgent, DurableDeps
|
|
67
|
+
from edda.integrations.mirascope.call import call, call_with_messages
|
|
68
|
+
from edda.integrations.mirascope.decorator import durable_call
|
|
69
|
+
from edda.integrations.mirascope.types import DurableResponse
|
|
70
|
+
|
|
71
|
+
__all__ = [
|
|
72
|
+
"durable_call",
|
|
73
|
+
"call",
|
|
74
|
+
"call_with_messages",
|
|
75
|
+
"DurableAgent",
|
|
76
|
+
"DurableDeps",
|
|
77
|
+
"DurableResponse",
|
|
78
|
+
]
|