flowforge-sdk 0.4.0__tar.gz → 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowforge_sdk-0.4.2/.gitignore +68 -0
- {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/PKG-INFO +1 -1
- {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/pyproject.toml +1 -1
- {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/__init__.py +4 -0
- {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/config.py +51 -0
- {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/decorators.py +16 -2
- {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/exceptions.py +55 -5
- {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/execution.py +22 -10
- {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/steps.py +148 -17
- flowforge_sdk-0.4.0/.gitignore +0 -57
- {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/README.md +0 -0
- {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/agent.py +0 -0
- {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/agent_def.py +0 -0
- {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/ai/__init__.py +0 -0
- {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/ai/providers.py +0 -0
- {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/client.py +0 -0
- {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/context.py +0 -0
- {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/dev/__init__.py +0 -0
- {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/dev/server.py +0 -0
- {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/integrations/__init__.py +0 -0
- {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/integrations/fastapi.py +0 -0
- {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/network.py +0 -0
- {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/router.py +0 -0
- {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/streaming.py +0 -0
- {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/tools.py +0 -0
- {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/triggers.py +0 -0
- {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/worker.py +0 -0
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# Environment and secrets
|
|
2
|
+
.env
|
|
3
|
+
.env.*
|
|
4
|
+
!.env.example
|
|
5
|
+
|
|
6
|
+
# Claude Code — ignore local state (plans, sessions, caches) everywhere.
|
|
7
|
+
.claude/
|
|
8
|
+
# …but at the repo root, commit just the in-repo FlowForge skill so every
|
|
9
|
+
# contributor gets the same domain expertise. Any other .claude/ content
|
|
10
|
+
# (including other user-installed skills under .claude/skills/) stays
|
|
11
|
+
# ignored, and nested .claude/ dirs (e.g. dashboard/.claude/) stay fully
|
|
12
|
+
# ignored.
|
|
13
|
+
!/.claude/
|
|
14
|
+
/.claude/*
|
|
15
|
+
!/.claude/skills/
|
|
16
|
+
/.claude/skills/*
|
|
17
|
+
!/.claude/skills/flowforge/
|
|
18
|
+
!/.claude/skills/flowforge/**
|
|
19
|
+
|
|
20
|
+
# Python
|
|
21
|
+
__pycache__/
|
|
22
|
+
*.py[cod]
|
|
23
|
+
*$py.class
|
|
24
|
+
*.so
|
|
25
|
+
.Python
|
|
26
|
+
.venv/
|
|
27
|
+
venv/
|
|
28
|
+
ENV/
|
|
29
|
+
*.egg-info/
|
|
30
|
+
*.egg
|
|
31
|
+
dist/
|
|
32
|
+
build/
|
|
33
|
+
.pytest_cache/
|
|
34
|
+
.mypy_cache/
|
|
35
|
+
.ruff_cache/
|
|
36
|
+
*.pyo
|
|
37
|
+
*.pyd
|
|
38
|
+
|
|
39
|
+
# Node.js
|
|
40
|
+
node_modules/
|
|
41
|
+
.next/
|
|
42
|
+
out/
|
|
43
|
+
.turbo/
|
|
44
|
+
*.tsbuildinfo
|
|
45
|
+
|
|
46
|
+
# IDE
|
|
47
|
+
.idea/
|
|
48
|
+
.vscode/
|
|
49
|
+
*.swp
|
|
50
|
+
*.swo
|
|
51
|
+
*~
|
|
52
|
+
|
|
53
|
+
# OS
|
|
54
|
+
.DS_Store
|
|
55
|
+
Thumbs.db
|
|
56
|
+
|
|
57
|
+
# Logs
|
|
58
|
+
*.log
|
|
59
|
+
logs/
|
|
60
|
+
|
|
61
|
+
# Testing
|
|
62
|
+
coverage/
|
|
63
|
+
.coverage
|
|
64
|
+
htmlcov/
|
|
65
|
+
|
|
66
|
+
# Docker
|
|
67
|
+
*.pid
|
|
68
|
+
/.openclaude-profile.json
|
|
@@ -9,6 +9,7 @@ from flowforge.config import (
|
|
|
9
9
|
FunctionConfig,
|
|
10
10
|
RateLimit,
|
|
11
11
|
Throttle,
|
|
12
|
+
TokenRateLimit,
|
|
12
13
|
concurrency,
|
|
13
14
|
rate_limit,
|
|
14
15
|
throttle,
|
|
@@ -18,6 +19,7 @@ from flowforge.decorators import function
|
|
|
18
19
|
from flowforge.exceptions import (
|
|
19
20
|
FlowForgeError,
|
|
20
21
|
NonRetryableError,
|
|
22
|
+
RateLimited,
|
|
21
23
|
RetryableError,
|
|
22
24
|
StepCompleted,
|
|
23
25
|
StepError,
|
|
@@ -66,6 +68,7 @@ __all__ = [
|
|
|
66
68
|
# Configuration
|
|
67
69
|
"Concurrency",
|
|
68
70
|
"RateLimit",
|
|
71
|
+
"TokenRateLimit",
|
|
69
72
|
"Throttle",
|
|
70
73
|
"FunctionConfig",
|
|
71
74
|
"concurrency",
|
|
@@ -81,5 +84,6 @@ __all__ = [
|
|
|
81
84
|
"StepFailed",
|
|
82
85
|
"StepTimeout",
|
|
83
86
|
"RetryableError",
|
|
87
|
+
"RateLimited",
|
|
84
88
|
"NonRetryableError",
|
|
85
89
|
]
|
|
@@ -75,6 +75,52 @@ class Throttle:
|
|
|
75
75
|
}
|
|
76
76
|
|
|
77
77
|
|
|
78
|
+
@dataclass
|
|
79
|
+
class TokenRateLimit:
|
|
80
|
+
"""
|
|
81
|
+
Per-model token-budget rate limit for LLM calls inside a function.
|
|
82
|
+
|
|
83
|
+
Caps the tokens consumed per minute for a specific model. Enforced as a
|
|
84
|
+
pre-flight token-bucket check inside the server's AI service: requests
|
|
85
|
+
that would exceed the bucket wait durably (via step.sleep inside the SDK
|
|
86
|
+
retry loop) instead of hitting the provider and getting a 429.
|
|
87
|
+
|
|
88
|
+
Example:
|
|
89
|
+
@flowforge.function(
|
|
90
|
+
id="research",
|
|
91
|
+
rate_limits=[
|
|
92
|
+
TokenRateLimit("claude-sonnet-4-6", tokens_per_minute=25_000),
|
|
93
|
+
],
|
|
94
|
+
)
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
model: str
|
|
98
|
+
"""Model name the limit applies to (e.g. "claude-sonnet-4-6")."""
|
|
99
|
+
|
|
100
|
+
tokens_per_minute: int
|
|
101
|
+
"""Maximum tokens consumed per minute."""
|
|
102
|
+
|
|
103
|
+
key: str | None = None
|
|
104
|
+
"""
|
|
105
|
+
Optional *literal* grouping key used as a suffix on the Redis bucket so
|
|
106
|
+
distinct values get distinct buckets. When None, the bucket is scoped
|
|
107
|
+
per (tenant, function, model).
|
|
108
|
+
|
|
109
|
+
Note: this is currently a static string, not an expression. The server
|
|
110
|
+
uses it verbatim as the Redis-key suffix; per-event evaluation (e.g.
|
|
111
|
+
``event.data.tenant_id``) is a follow-up. To isolate buckets by caller
|
|
112
|
+
today, pass a concrete value like ``"premium"`` or ``"free"`` from the
|
|
113
|
+
code that constructs the decorator.
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
def to_dict(self) -> dict[str, Any]:
|
|
117
|
+
return {
|
|
118
|
+
"model": self.model,
|
|
119
|
+
"tokens_per_minute": self.tokens_per_minute,
|
|
120
|
+
"key": self.key,
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
|
|
78
124
|
@dataclass
|
|
79
125
|
class Debounce:
|
|
80
126
|
"""
|
|
@@ -130,6 +176,9 @@ class FunctionConfig:
|
|
|
130
176
|
rate_limit: RateLimit | None = None
|
|
131
177
|
"""Rate limiting configuration."""
|
|
132
178
|
|
|
179
|
+
rate_limits: list["TokenRateLimit"] = field(default_factory=list)
|
|
180
|
+
"""Per-model token-budget limits (AC9)."""
|
|
181
|
+
|
|
133
182
|
throttle: Throttle | None = None
|
|
134
183
|
"""Throttle configuration."""
|
|
135
184
|
|
|
@@ -157,6 +206,8 @@ class FunctionConfig:
|
|
|
157
206
|
config["concurrency"] = self.concurrency.to_dict()
|
|
158
207
|
if self.rate_limit:
|
|
159
208
|
config["rate_limit"] = self.rate_limit.to_dict()
|
|
209
|
+
if self.rate_limits:
|
|
210
|
+
config["rate_limits"] = [rl.to_dict() for rl in self.rate_limits]
|
|
160
211
|
if self.throttle:
|
|
161
212
|
config["throttle"] = self.throttle.to_dict()
|
|
162
213
|
if self.debounce:
|
|
@@ -5,7 +5,14 @@ import inspect
|
|
|
5
5
|
from collections.abc import Awaitable, Callable
|
|
6
6
|
from typing import Any, ParamSpec, TypeVar
|
|
7
7
|
|
|
8
|
-
from flowforge.config import
|
|
8
|
+
from flowforge.config import (
|
|
9
|
+
Concurrency,
|
|
10
|
+
Debounce,
|
|
11
|
+
FunctionConfig,
|
|
12
|
+
RateLimit,
|
|
13
|
+
Throttle,
|
|
14
|
+
TokenRateLimit,
|
|
15
|
+
)
|
|
9
16
|
from flowforge.context import Context
|
|
10
17
|
from flowforge.triggers import Trigger
|
|
11
18
|
|
|
@@ -31,6 +38,7 @@ class FlowForgeFunction:
|
|
|
31
38
|
timeout: str = "5m",
|
|
32
39
|
concurrency: Concurrency | None = None,
|
|
33
40
|
rate_limit: RateLimit | None = None,
|
|
41
|
+
rate_limits: list[TokenRateLimit] | None = None,
|
|
34
42
|
throttle: Throttle | None = None,
|
|
35
43
|
debounce: Debounce | None = None,
|
|
36
44
|
cancel_on: list[str] | None = None,
|
|
@@ -47,6 +55,7 @@ class FlowForgeFunction:
|
|
|
47
55
|
timeout=timeout,
|
|
48
56
|
concurrency=concurrency,
|
|
49
57
|
rate_limit=rate_limit,
|
|
58
|
+
rate_limits=rate_limits or [],
|
|
50
59
|
throttle=throttle,
|
|
51
60
|
debounce=debounce,
|
|
52
61
|
cancel_on=cancel_on or [],
|
|
@@ -79,6 +88,7 @@ def function(
|
|
|
79
88
|
timeout: str = "5m",
|
|
80
89
|
concurrency: Concurrency | None = None,
|
|
81
90
|
rate_limit: RateLimit | None = None,
|
|
91
|
+
rate_limits: list[TokenRateLimit] | None = None,
|
|
82
92
|
throttle: Throttle | None = None,
|
|
83
93
|
debounce: Debounce | None = None,
|
|
84
94
|
cancel_on: list[str] | None = None,
|
|
@@ -97,7 +107,10 @@ def function(
|
|
|
97
107
|
retries: Number of retry attempts on failure (default: 3).
|
|
98
108
|
timeout: Maximum execution time (default: "5m").
|
|
99
109
|
concurrency: Concurrency limiting configuration.
|
|
100
|
-
rate_limit: Rate limiting configuration.
|
|
110
|
+
rate_limit: Rate limiting configuration (invocations per period).
|
|
111
|
+
rate_limits: Per-model token-budget limits enforced pre-flight on
|
|
112
|
+
LLM calls (see TokenRateLimit). Prevents 429s by absorbing
|
|
113
|
+
back-pressure into durable step.sleep waits.
|
|
101
114
|
throttle: Throttle configuration.
|
|
102
115
|
debounce: Debounce configuration.
|
|
103
116
|
cancel_on: List of events that cancel running instances.
|
|
@@ -156,6 +169,7 @@ def function(
|
|
|
156
169
|
timeout=timeout,
|
|
157
170
|
concurrency=concurrency,
|
|
158
171
|
rate_limit=rate_limit,
|
|
172
|
+
rate_limits=rate_limits,
|
|
159
173
|
throttle=throttle,
|
|
160
174
|
debounce=debounce,
|
|
161
175
|
cancel_on=cancel_on,
|
|
@@ -58,17 +58,67 @@ class StepTimeout(StepError):
|
|
|
58
58
|
super().__init__(step_id, f"timed out after {timeout_seconds}s")
|
|
59
59
|
|
|
60
60
|
|
|
61
|
-
class RetryableError(
|
|
61
|
+
class RetryableError(StepFailed):
|
|
62
62
|
"""
|
|
63
63
|
Raised to indicate an error that should trigger a retry.
|
|
64
64
|
|
|
65
|
-
|
|
66
|
-
|
|
65
|
+
Used for transient failures (rate limits, brief network issues). Subclasses
|
|
66
|
+
StepFailed so existing `except StepFailed` catchers still catch it; callers
|
|
67
|
+
that want retry-specific behaviour can catch RetryableError directly.
|
|
67
68
|
"""
|
|
68
69
|
|
|
69
|
-
def __init__(
|
|
70
|
+
def __init__(
|
|
71
|
+
self,
|
|
72
|
+
message: str = "",
|
|
73
|
+
*,
|
|
74
|
+
step_id: str = "",
|
|
75
|
+
retry_after: float | None = None,
|
|
76
|
+
attempt: int = 1,
|
|
77
|
+
max_attempts: int = 1,
|
|
78
|
+
) -> None:
|
|
70
79
|
self.retry_after = retry_after
|
|
71
|
-
super().__init__(message)
|
|
80
|
+
super().__init__(step_id, message, attempt=attempt, max_attempts=max_attempts)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class RateLimited(RetryableError):
|
|
84
|
+
"""
|
|
85
|
+
Raised when an LLM provider rate-limited the request and retries exhausted.
|
|
86
|
+
|
|
87
|
+
Carries enough context for callers to decide follow-up behaviour (switch
|
|
88
|
+
providers, surface to the user, park the run).
|
|
89
|
+
|
|
90
|
+
Aliases:
|
|
91
|
+
- ``self.original`` / ``self.original_error`` — both point at the
|
|
92
|
+
underlying provider exception (or its string form). Error payloads
|
|
93
|
+
serialised via ``str(e.original_error)`` therefore surface the real
|
|
94
|
+
root cause, not the synthesised "rate limited by …" banner.
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
def __init__(
|
|
98
|
+
self,
|
|
99
|
+
*,
|
|
100
|
+
step_id: str = "",
|
|
101
|
+
retry_after: float | None = None,
|
|
102
|
+
provider: str = "",
|
|
103
|
+
model: str = "",
|
|
104
|
+
original: Exception | str = "",
|
|
105
|
+
attempt: int = 1,
|
|
106
|
+
max_attempts: int = 1,
|
|
107
|
+
) -> None:
|
|
108
|
+
self.provider = provider
|
|
109
|
+
self.model = model
|
|
110
|
+
self.original = original
|
|
111
|
+
# Pass `original` through StepFailed so `e.original_error` reflects
|
|
112
|
+
# the underlying provider failure. The exception's __str__ still
|
|
113
|
+
# includes a readable banner via the StepError base message.
|
|
114
|
+
banner = f"rate limited by {provider or 'provider'} on {model or 'model'}"
|
|
115
|
+
super().__init__(
|
|
116
|
+
original if original else banner,
|
|
117
|
+
step_id=step_id,
|
|
118
|
+
retry_after=retry_after,
|
|
119
|
+
attempt=attempt,
|
|
120
|
+
max_attempts=max_attempts,
|
|
121
|
+
)
|
|
72
122
|
|
|
73
123
|
|
|
74
124
|
class NonRetryableError(FlowForgeError):
|
|
@@ -137,19 +137,31 @@ class ExecutionEngine:
|
|
|
137
137
|
)
|
|
138
138
|
|
|
139
139
|
except StepFailed as e:
|
|
140
|
-
# Step failed, let server handle retry
|
|
140
|
+
# Step failed, let server handle retry.
|
|
141
|
+
# Preserve the actual exception class (StepFailed | RetryableError |
|
|
142
|
+
# RateLimited) so the server / dashboard can distinguish them.
|
|
143
|
+
error_payload: dict[str, Any] = {
|
|
144
|
+
"type": type(e).__name__,
|
|
145
|
+
"message": str(e.original_error),
|
|
146
|
+
"step_id": e.step_id,
|
|
147
|
+
"attempt": e.attempt,
|
|
148
|
+
"max_attempts": e.max_attempts,
|
|
149
|
+
"retryable": True,
|
|
150
|
+
"traceback": traceback.format_exc(),
|
|
151
|
+
}
|
|
152
|
+
retry_after = getattr(e, "retry_after", None)
|
|
153
|
+
if retry_after is not None:
|
|
154
|
+
error_payload["retry_after"] = retry_after
|
|
155
|
+
provider = getattr(e, "provider", None)
|
|
156
|
+
if provider:
|
|
157
|
+
error_payload["provider"] = provider
|
|
158
|
+
model = getattr(e, "model", None)
|
|
159
|
+
if model:
|
|
160
|
+
error_payload["model"] = model
|
|
141
161
|
return ExecutionResult(
|
|
142
162
|
status="error",
|
|
143
163
|
step_id=e.step_id,
|
|
144
|
-
error=
|
|
145
|
-
"type": "StepFailed",
|
|
146
|
-
"message": str(e.original_error),
|
|
147
|
-
"step_id": e.step_id,
|
|
148
|
-
"attempt": e.attempt,
|
|
149
|
-
"max_attempts": e.max_attempts,
|
|
150
|
-
"retryable": True,
|
|
151
|
-
"traceback": traceback.format_exc(),
|
|
152
|
-
},
|
|
164
|
+
error=error_payload,
|
|
153
165
|
)
|
|
154
166
|
|
|
155
167
|
except NonRetryableError as e:
|
|
@@ -2,19 +2,60 @@
|
|
|
2
2
|
|
|
3
3
|
import hashlib
|
|
4
4
|
import json
|
|
5
|
+
import os
|
|
6
|
+
import random
|
|
5
7
|
from collections.abc import Awaitable, Callable
|
|
6
8
|
from datetime import UTC, datetime, timedelta
|
|
7
9
|
from typing import Any, TypeVar
|
|
8
10
|
|
|
9
11
|
from flowforge.agent import AgentResult, AgentState
|
|
10
12
|
from flowforge.agent_def import AgentDefinition
|
|
11
|
-
from flowforge.exceptions import StepCompleted, StepFailed
|
|
13
|
+
from flowforge.exceptions import RateLimited, StepCompleted, StepFailed
|
|
12
14
|
from flowforge.network import Network, NetworkResult, NetworkState, RouterContext
|
|
13
15
|
from flowforge.router import LLMRouter
|
|
14
16
|
from flowforge.tools import SubAgentConfig, Tool
|
|
15
17
|
|
|
16
18
|
T = TypeVar("T")
|
|
17
19
|
|
|
20
|
+
# Defaults for LLM-call retry on rate-limit. See _resolve_num_retries /
|
|
21
|
+
# _retry_sleep below.
|
|
22
|
+
_DEFAULT_LLM_NUM_RETRIES = 5
|
|
23
|
+
_DEFAULT_LLM_MAX_RETRY_DELAY = 120.0
|
|
24
|
+
_RETRY_JITTER_RANGE = (0.8, 1.2)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _resolve_num_retries(explicit: int | None) -> int:
|
|
28
|
+
"""
|
|
29
|
+
Determine the retry budget for an LLM call.
|
|
30
|
+
|
|
31
|
+
Precedence: explicit kwarg > FLOWFORGE_LLM_NUM_RETRIES env >
|
|
32
|
+
LITELLM_NUM_RETRIES env (back-compat) > default 5. Clamped to >= 0.
|
|
33
|
+
"""
|
|
34
|
+
if explicit is not None:
|
|
35
|
+
return max(0, int(explicit))
|
|
36
|
+
for var in ("FLOWFORGE_LLM_NUM_RETRIES", "LITELLM_NUM_RETRIES"):
|
|
37
|
+
raw = os.environ.get(var)
|
|
38
|
+
if raw is None:
|
|
39
|
+
continue
|
|
40
|
+
try:
|
|
41
|
+
return max(0, int(raw))
|
|
42
|
+
except ValueError:
|
|
43
|
+
continue
|
|
44
|
+
return _DEFAULT_LLM_NUM_RETRIES
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _retry_sleep(retry_after: float) -> float:
|
|
48
|
+
"""Apply ±20% jitter and clamp to [1s, FLOWFORGE_LLM_MAX_RETRY_DELAY]."""
|
|
49
|
+
try:
|
|
50
|
+
max_delay = float(
|
|
51
|
+
os.environ.get("FLOWFORGE_LLM_MAX_RETRY_DELAY", _DEFAULT_LLM_MAX_RETRY_DELAY)
|
|
52
|
+
)
|
|
53
|
+
except ValueError:
|
|
54
|
+
max_delay = _DEFAULT_LLM_MAX_RETRY_DELAY
|
|
55
|
+
base = max(0.0, float(retry_after))
|
|
56
|
+
jittered = base * random.uniform(*_RETRY_JITTER_RANGE)
|
|
57
|
+
return max(1.0, min(jittered, max_delay))
|
|
58
|
+
|
|
18
59
|
|
|
19
60
|
def _parse_duration(duration: str | timedelta) -> float:
|
|
20
61
|
"""Parse a duration string or timedelta to seconds."""
|
|
@@ -183,14 +224,25 @@ class StepManager:
|
|
|
183
224
|
tools: list[Any] | None = None,
|
|
184
225
|
tool_choice: str | dict[str, Any] = "auto",
|
|
185
226
|
max_tool_calls: int = 10,
|
|
227
|
+
num_retries: int | None = None,
|
|
186
228
|
**kwargs: Any,
|
|
187
229
|
) -> dict[str, Any]:
|
|
188
230
|
"""
|
|
189
|
-
Execute an LLM call with
|
|
190
|
-
|
|
191
|
-
Supports multiple providers (OpenAI, Anthropic, etc.) with
|
|
192
|
-
|
|
193
|
-
|
|
231
|
+
Execute an LLM call with durable rate-limit retry and cost tracking.
|
|
232
|
+
|
|
233
|
+
Supports multiple providers (OpenAI, Anthropic, etc.) with a unified
|
|
234
|
+
interface. On provider 429 (or pre-flight token-bucket exhaustion),
|
|
235
|
+
expands into a durable chain of ``{step_id}`` (first attempt,
|
|
236
|
+
rate-limited) → ``{step_id}/retry-sleep-1`` → ``{step_id}/attempt-2``
|
|
237
|
+
→ … driven by Retry-After with ±20% jitter. The worker is freed
|
|
238
|
+
during each sleep. When retries exhaust, raises
|
|
239
|
+
:class:`flowforge.RateLimited`.
|
|
240
|
+
|
|
241
|
+
**Narrower than you might expect:** only provider 429 responses are
|
|
242
|
+
retried here. Non-rate-limit transient errors (timeouts, 5xx,
|
|
243
|
+
connection failures) propagate immediately as ``StepFailed`` and
|
|
244
|
+
are subject to the function-level retry policy set on
|
|
245
|
+
``@flowforge.function(retries=N)``.
|
|
194
246
|
|
|
195
247
|
Args:
|
|
196
248
|
step_id: Unique identifier for this AI step.
|
|
@@ -204,6 +256,10 @@ class StepManager:
|
|
|
204
256
|
tools: List of Tool objects that the LLM can call.
|
|
205
257
|
tool_choice: How the LLM should choose tools ("auto", "required", "none", or specific tool).
|
|
206
258
|
max_tool_calls: Maximum number of tool calls allowed in this step.
|
|
259
|
+
num_retries: Override the rate-limit retry budget for this call.
|
|
260
|
+
``None`` (default) reads ``FLOWFORGE_LLM_NUM_RETRIES`` →
|
|
261
|
+
``LITELLM_NUM_RETRIES`` → 5. ``0`` disables retry and
|
|
262
|
+
raises ``RateLimited`` on the first 429.
|
|
207
263
|
**kwargs: Additional provider-specific parameters.
|
|
208
264
|
|
|
209
265
|
Returns:
|
|
@@ -256,12 +312,9 @@ class StepManager:
|
|
|
256
312
|
if result.get("tool_calls"):
|
|
257
313
|
print(f"Tools called: {result['tool_calls']}")
|
|
258
314
|
"""
|
|
259
|
-
|
|
260
|
-
is_memoized, result = self._get_memoized_result(step_id)
|
|
261
|
-
if is_memoized:
|
|
262
|
-
return result # type: ignore
|
|
315
|
+
num_retries = _resolve_num_retries(num_retries)
|
|
263
316
|
|
|
264
|
-
# Build messages
|
|
317
|
+
# Build messages once (shared across attempts).
|
|
265
318
|
if prompt is not None and messages is None:
|
|
266
319
|
if isinstance(prompt, str):
|
|
267
320
|
messages = [{"role": "user", "content": prompt}]
|
|
@@ -271,7 +324,6 @@ class StepManager:
|
|
|
271
324
|
if messages is None:
|
|
272
325
|
raise ValueError("Either 'prompt' or 'messages' must be provided")
|
|
273
326
|
|
|
274
|
-
# Convert Tool objects to JSON-serializable OpenAI schema dicts
|
|
275
327
|
tools_schema = None
|
|
276
328
|
if tools:
|
|
277
329
|
tools_schema = [
|
|
@@ -279,8 +331,79 @@ class StepManager:
|
|
|
279
331
|
for t in tools
|
|
280
332
|
]
|
|
281
333
|
|
|
282
|
-
#
|
|
283
|
-
|
|
334
|
+
# Durable retry loop. Each attempt is its own memoised sub-step, with
|
|
335
|
+
# a step.sleep between attempts so the worker is freed during the
|
|
336
|
+
# wait. First attempt keeps the caller's step_id (back-compat); only
|
|
337
|
+
# subsequent attempts get an /attempt-N suffix.
|
|
338
|
+
last_signal: dict[str, Any] | None = None
|
|
339
|
+
for attempt in range(num_retries + 1):
|
|
340
|
+
attempt_id = step_id if attempt == 0 else f"{step_id}/attempt-{attempt + 1}"
|
|
341
|
+
result = await self._ai_attempt(
|
|
342
|
+
attempt_id,
|
|
343
|
+
model=model,
|
|
344
|
+
messages=messages,
|
|
345
|
+
max_tokens=max_tokens,
|
|
346
|
+
temperature=temperature,
|
|
347
|
+
provider=provider,
|
|
348
|
+
use_cache=use_cache,
|
|
349
|
+
tools_schema=tools_schema,
|
|
350
|
+
tool_choice=tool_choice,
|
|
351
|
+
max_tool_calls=max_tool_calls,
|
|
352
|
+
extra_kwargs=kwargs,
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
if not (isinstance(result, dict) and result.get("__rate_limited")):
|
|
356
|
+
return result # type: ignore[return-value]
|
|
357
|
+
|
|
358
|
+
last_signal = result
|
|
359
|
+
if attempt >= num_retries:
|
|
360
|
+
break
|
|
361
|
+
|
|
362
|
+
retry_after = float(result.get("__retry_after") or 1.0)
|
|
363
|
+
sleep_s = _retry_sleep(retry_after)
|
|
364
|
+
await self.sleep(
|
|
365
|
+
f"{step_id}/retry-sleep-{attempt + 1}",
|
|
366
|
+
duration=f"{sleep_s:.3f}s",
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
assert last_signal is not None # loop always sets it before breaking
|
|
370
|
+
raise RateLimited(
|
|
371
|
+
step_id=step_id,
|
|
372
|
+
retry_after=last_signal.get("__retry_after"),
|
|
373
|
+
provider=str(last_signal.get("__provider") or ""),
|
|
374
|
+
model=str(last_signal.get("__model") or model),
|
|
375
|
+
original=str(last_signal.get("__error") or ""),
|
|
376
|
+
attempt=num_retries + 1,
|
|
377
|
+
max_attempts=num_retries + 1,
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
async def _ai_attempt(
|
|
381
|
+
self,
|
|
382
|
+
attempt_id: str,
|
|
383
|
+
*,
|
|
384
|
+
model: str,
|
|
385
|
+
messages: list[dict[str, str]],
|
|
386
|
+
max_tokens: int,
|
|
387
|
+
temperature: float,
|
|
388
|
+
provider: str | None,
|
|
389
|
+
use_cache: bool,
|
|
390
|
+
tools_schema: list[dict[str, Any]] | None,
|
|
391
|
+
tool_choice: str | dict[str, Any],
|
|
392
|
+
max_tool_calls: int,
|
|
393
|
+
extra_kwargs: dict[str, Any],
|
|
394
|
+
) -> dict[str, Any]:
|
|
395
|
+
"""
|
|
396
|
+
Execute a single LLM attempt.
|
|
397
|
+
|
|
398
|
+
On first call yields to the server (raises StepCompleted). On replay,
|
|
399
|
+
returns the memoised server response — either a normal AI response dict
|
|
400
|
+
or a rate-limit marker ({"__rate_limited": True, "__retry_after": ...}).
|
|
401
|
+
"""
|
|
402
|
+
is_memoized, result = self._get_memoized_result(attempt_id)
|
|
403
|
+
if is_memoized:
|
|
404
|
+
return result # type: ignore[return-value]
|
|
405
|
+
|
|
406
|
+
ai_request: dict[str, Any] = {
|
|
284
407
|
"type": "ai",
|
|
285
408
|
"model": model,
|
|
286
409
|
"messages": messages,
|
|
@@ -291,10 +414,10 @@ class StepManager:
|
|
|
291
414
|
"tools": tools_schema,
|
|
292
415
|
"tool_choice": tool_choice,
|
|
293
416
|
"max_tool_calls": max_tool_calls,
|
|
294
|
-
**
|
|
417
|
+
**extra_kwargs,
|
|
295
418
|
}
|
|
296
419
|
|
|
297
|
-
raise StepCompleted(step_id=
|
|
420
|
+
raise StepCompleted(step_id=attempt_id, result=ai_request)
|
|
298
421
|
|
|
299
422
|
async def wait_for_event(
|
|
300
423
|
self,
|
|
@@ -459,6 +582,7 @@ class StepManager:
|
|
|
459
582
|
checkpoint_strategy: str = "per_tool",
|
|
460
583
|
max_tool_calls: int = 50,
|
|
461
584
|
temperature: float = 0.7,
|
|
585
|
+
num_retries: int | None = None,
|
|
462
586
|
_depth: int = 0,
|
|
463
587
|
_max_depth: int = 3,
|
|
464
588
|
**kwargs: Any,
|
|
@@ -550,7 +674,9 @@ class StepManager:
|
|
|
550
674
|
state.status = "max_tool_calls"
|
|
551
675
|
break
|
|
552
676
|
|
|
553
|
-
# Call LLM with current messages
|
|
677
|
+
# Call LLM with current messages. num_retries propagates per
|
|
678
|
+
# iteration — a 429 on iter-N/think only retries iter-N/think;
|
|
679
|
+
# earlier iterations stay memoised.
|
|
554
680
|
think_step_id = f"{step_id}/iter-{state.iteration}/think"
|
|
555
681
|
ai_response = await self.ai(
|
|
556
682
|
think_step_id,
|
|
@@ -560,6 +686,7 @@ class StepManager:
|
|
|
560
686
|
tools=tools,
|
|
561
687
|
tool_choice="auto",
|
|
562
688
|
max_tool_calls=max_tool_calls - state.tool_calls_count,
|
|
689
|
+
num_retries=num_retries,
|
|
563
690
|
**kwargs,
|
|
564
691
|
)
|
|
565
692
|
|
|
@@ -1156,6 +1283,7 @@ class _StepProxy:
|
|
|
1156
1283
|
tools: list[Any] | None = None,
|
|
1157
1284
|
tool_choice: str | dict[str, Any] = "auto",
|
|
1158
1285
|
max_tool_calls: int = 10,
|
|
1286
|
+
num_retries: int | None = None,
|
|
1159
1287
|
**kwargs: Any,
|
|
1160
1288
|
) -> dict[str, Any]:
|
|
1161
1289
|
return await self._get_manager().ai(
|
|
@@ -1170,6 +1298,7 @@ class _StepProxy:
|
|
|
1170
1298
|
tools=tools,
|
|
1171
1299
|
tool_choice=tool_choice,
|
|
1172
1300
|
max_tool_calls=max_tool_calls,
|
|
1301
|
+
num_retries=num_retries,
|
|
1173
1302
|
**kwargs,
|
|
1174
1303
|
)
|
|
1175
1304
|
|
|
@@ -1219,6 +1348,7 @@ class _StepProxy:
|
|
|
1219
1348
|
checkpoint_strategy: str = "per_tool",
|
|
1220
1349
|
max_tool_calls: int = 50,
|
|
1221
1350
|
temperature: float = 0.7,
|
|
1351
|
+
num_retries: int | None = None,
|
|
1222
1352
|
_depth: int = 0,
|
|
1223
1353
|
_max_depth: int = 3,
|
|
1224
1354
|
**kwargs: Any,
|
|
@@ -1233,6 +1363,7 @@ class _StepProxy:
|
|
|
1233
1363
|
checkpoint_strategy=checkpoint_strategy,
|
|
1234
1364
|
max_tool_calls=max_tool_calls,
|
|
1235
1365
|
temperature=temperature,
|
|
1366
|
+
num_retries=num_retries,
|
|
1236
1367
|
_depth=_depth,
|
|
1237
1368
|
_max_depth=_max_depth,
|
|
1238
1369
|
**kwargs,
|
flowforge_sdk-0.4.0/.gitignore
DELETED
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
# Environment and secrets
|
|
2
|
-
.env
|
|
3
|
-
.env.*
|
|
4
|
-
!.env.example
|
|
5
|
-
|
|
6
|
-
# Claude Code
|
|
7
|
-
.claude/
|
|
8
|
-
|
|
9
|
-
# Python
|
|
10
|
-
__pycache__/
|
|
11
|
-
*.py[cod]
|
|
12
|
-
*$py.class
|
|
13
|
-
*.so
|
|
14
|
-
.Python
|
|
15
|
-
.venv/
|
|
16
|
-
venv/
|
|
17
|
-
ENV/
|
|
18
|
-
*.egg-info/
|
|
19
|
-
*.egg
|
|
20
|
-
dist/
|
|
21
|
-
build/
|
|
22
|
-
.pytest_cache/
|
|
23
|
-
.mypy_cache/
|
|
24
|
-
.ruff_cache/
|
|
25
|
-
*.pyo
|
|
26
|
-
*.pyd
|
|
27
|
-
|
|
28
|
-
# Node.js
|
|
29
|
-
node_modules/
|
|
30
|
-
.next/
|
|
31
|
-
out/
|
|
32
|
-
.turbo/
|
|
33
|
-
*.tsbuildinfo
|
|
34
|
-
|
|
35
|
-
# IDE
|
|
36
|
-
.idea/
|
|
37
|
-
.vscode/
|
|
38
|
-
*.swp
|
|
39
|
-
*.swo
|
|
40
|
-
*~
|
|
41
|
-
|
|
42
|
-
# OS
|
|
43
|
-
.DS_Store
|
|
44
|
-
Thumbs.db
|
|
45
|
-
|
|
46
|
-
# Logs
|
|
47
|
-
*.log
|
|
48
|
-
logs/
|
|
49
|
-
|
|
50
|
-
# Testing
|
|
51
|
-
coverage/
|
|
52
|
-
.coverage
|
|
53
|
-
htmlcov/
|
|
54
|
-
|
|
55
|
-
# Docker
|
|
56
|
-
*.pid
|
|
57
|
-
/.openclaude-profile.json
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|