flowforge-sdk 0.4.0__tar.gz → 0.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. flowforge_sdk-0.4.2/.gitignore +68 -0
  2. {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/PKG-INFO +1 -1
  3. {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/pyproject.toml +1 -1
  4. {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/__init__.py +4 -0
  5. {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/config.py +51 -0
  6. {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/decorators.py +16 -2
  7. {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/exceptions.py +55 -5
  8. {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/execution.py +22 -10
  9. {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/steps.py +148 -17
  10. flowforge_sdk-0.4.0/.gitignore +0 -57
  11. {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/README.md +0 -0
  12. {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/agent.py +0 -0
  13. {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/agent_def.py +0 -0
  14. {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/ai/__init__.py +0 -0
  15. {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/ai/providers.py +0 -0
  16. {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/client.py +0 -0
  17. {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/context.py +0 -0
  18. {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/dev/__init__.py +0 -0
  19. {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/dev/server.py +0 -0
  20. {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/integrations/__init__.py +0 -0
  21. {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/integrations/fastapi.py +0 -0
  22. {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/network.py +0 -0
  23. {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/router.py +0 -0
  24. {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/streaming.py +0 -0
  25. {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/tools.py +0 -0
  26. {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/triggers.py +0 -0
  27. {flowforge_sdk-0.4.0 → flowforge_sdk-0.4.2}/src/flowforge/worker.py +0 -0
@@ -0,0 +1,68 @@
1
+ # Environment and secrets
2
+ .env
3
+ .env.*
4
+ !.env.example
5
+
6
+ # Claude Code — ignore local state (plans, sessions, caches) everywhere.
7
+ .claude/
8
+ # …but at the repo root, commit just the in-repo FlowForge skill so every
9
+ # contributor gets the same domain expertise. Any other .claude/ content
10
+ # (including other user-installed skills under .claude/skills/) stays
11
+ # ignored, and nested .claude/ dirs (e.g. dashboard/.claude/) stay fully
12
+ # ignored.
13
+ !/.claude/
14
+ /.claude/*
15
+ !/.claude/skills/
16
+ /.claude/skills/*
17
+ !/.claude/skills/flowforge/
18
+ !/.claude/skills/flowforge/**
19
+
20
+ # Python
21
+ __pycache__/
22
+ *.py[cod]
23
+ *$py.class
24
+ *.so
25
+ .Python
26
+ .venv/
27
+ venv/
28
+ ENV/
29
+ *.egg-info/
30
+ *.egg
31
+ dist/
32
+ build/
33
+ .pytest_cache/
34
+ .mypy_cache/
35
+ .ruff_cache/
36
+ *.pyo
37
+ *.pyd
38
+
39
+ # Node.js
40
+ node_modules/
41
+ .next/
42
+ out/
43
+ .turbo/
44
+ *.tsbuildinfo
45
+
46
+ # IDE
47
+ .idea/
48
+ .vscode/
49
+ *.swp
50
+ *.swo
51
+ *~
52
+
53
+ # OS
54
+ .DS_Store
55
+ Thumbs.db
56
+
57
+ # Logs
58
+ *.log
59
+ logs/
60
+
61
+ # Testing
62
+ coverage/
63
+ .coverage
64
+ htmlcov/
65
+
66
+ # Docker
67
+ *.pid
68
+ /.openclaude-profile.json
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: flowforge-sdk
3
- Version: 0.4.0
3
+ Version: 0.4.2
4
4
  Summary: Python SDK for FlowForge - AI workflow orchestration
5
5
  Project-URL: Homepage, https://github.com/flowforge/flowforge
6
6
  Project-URL: Documentation, https://flowforge.dev/docs
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "flowforge-sdk"
3
- version = "0.4.0"
3
+ version = "0.4.2"
4
4
  description = "Python SDK for FlowForge - AI workflow orchestration"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -9,6 +9,7 @@ from flowforge.config import (
9
9
  FunctionConfig,
10
10
  RateLimit,
11
11
  Throttle,
12
+ TokenRateLimit,
12
13
  concurrency,
13
14
  rate_limit,
14
15
  throttle,
@@ -18,6 +19,7 @@ from flowforge.decorators import function
18
19
  from flowforge.exceptions import (
19
20
  FlowForgeError,
20
21
  NonRetryableError,
22
+ RateLimited,
21
23
  RetryableError,
22
24
  StepCompleted,
23
25
  StepError,
@@ -66,6 +68,7 @@ __all__ = [
66
68
  # Configuration
67
69
  "Concurrency",
68
70
  "RateLimit",
71
+ "TokenRateLimit",
69
72
  "Throttle",
70
73
  "FunctionConfig",
71
74
  "concurrency",
@@ -81,5 +84,6 @@ __all__ = [
81
84
  "StepFailed",
82
85
  "StepTimeout",
83
86
  "RetryableError",
87
+ "RateLimited",
84
88
  "NonRetryableError",
85
89
  ]
@@ -75,6 +75,52 @@ class Throttle:
75
75
  }
76
76
 
77
77
 
78
+ @dataclass
79
+ class TokenRateLimit:
80
+ """
81
+ Per-model token-budget rate limit for LLM calls inside a function.
82
+
83
+ Caps the tokens consumed per minute for a specific model. Enforced as a
84
+ pre-flight token-bucket check inside the server's AI service: requests
85
+ that would exceed the bucket wait durably (via step.sleep inside the SDK
86
+ retry loop) instead of hitting the provider and getting a 429.
87
+
88
+ Example:
89
+ @flowforge.function(
90
+ id="research",
91
+ rate_limits=[
92
+ TokenRateLimit("claude-sonnet-4-6", tokens_per_minute=25_000),
93
+ ],
94
+ )
95
+ """
96
+
97
+ model: str
98
+ """Model name the limit applies to (e.g. "claude-sonnet-4-6")."""
99
+
100
+ tokens_per_minute: int
101
+ """Maximum tokens consumed per minute."""
102
+
103
+ key: str | None = None
104
+ """
105
+ Optional *literal* grouping key used as a suffix on the Redis bucket so
106
+ distinct values get distinct buckets. When None, the bucket is scoped
107
+ per (tenant, function, model).
108
+
109
+ Note: this is currently a static string, not an expression. The server
110
+ uses it verbatim as the Redis-key suffix; per-event evaluation (e.g.
111
+ ``event.data.tenant_id``) is a follow-up. To isolate buckets by caller
112
+ today, pass a concrete value like ``"premium"`` or ``"free"`` from the
113
+ code that constructs the decorator.
114
+ """
115
+
116
+ def to_dict(self) -> dict[str, Any]:
117
+ return {
118
+ "model": self.model,
119
+ "tokens_per_minute": self.tokens_per_minute,
120
+ "key": self.key,
121
+ }
122
+
123
+
78
124
  @dataclass
79
125
  class Debounce:
80
126
  """
@@ -130,6 +176,9 @@ class FunctionConfig:
130
176
  rate_limit: RateLimit | None = None
131
177
  """Rate limiting configuration."""
132
178
 
179
+ rate_limits: list["TokenRateLimit"] = field(default_factory=list)
180
+ """Per-model token-budget limits (AC9)."""
181
+
133
182
  throttle: Throttle | None = None
134
183
  """Throttle configuration."""
135
184
 
@@ -157,6 +206,8 @@ class FunctionConfig:
157
206
  config["concurrency"] = self.concurrency.to_dict()
158
207
  if self.rate_limit:
159
208
  config["rate_limit"] = self.rate_limit.to_dict()
209
+ if self.rate_limits:
210
+ config["rate_limits"] = [rl.to_dict() for rl in self.rate_limits]
160
211
  if self.throttle:
161
212
  config["throttle"] = self.throttle.to_dict()
162
213
  if self.debounce:
@@ -5,7 +5,14 @@ import inspect
5
5
  from collections.abc import Awaitable, Callable
6
6
  from typing import Any, ParamSpec, TypeVar
7
7
 
8
- from flowforge.config import Concurrency, Debounce, FunctionConfig, RateLimit, Throttle
8
+ from flowforge.config import (
9
+ Concurrency,
10
+ Debounce,
11
+ FunctionConfig,
12
+ RateLimit,
13
+ Throttle,
14
+ TokenRateLimit,
15
+ )
9
16
  from flowforge.context import Context
10
17
  from flowforge.triggers import Trigger
11
18
 
@@ -31,6 +38,7 @@ class FlowForgeFunction:
31
38
  timeout: str = "5m",
32
39
  concurrency: Concurrency | None = None,
33
40
  rate_limit: RateLimit | None = None,
41
+ rate_limits: list[TokenRateLimit] | None = None,
34
42
  throttle: Throttle | None = None,
35
43
  debounce: Debounce | None = None,
36
44
  cancel_on: list[str] | None = None,
@@ -47,6 +55,7 @@ class FlowForgeFunction:
47
55
  timeout=timeout,
48
56
  concurrency=concurrency,
49
57
  rate_limit=rate_limit,
58
+ rate_limits=rate_limits or [],
50
59
  throttle=throttle,
51
60
  debounce=debounce,
52
61
  cancel_on=cancel_on or [],
@@ -79,6 +88,7 @@ def function(
79
88
  timeout: str = "5m",
80
89
  concurrency: Concurrency | None = None,
81
90
  rate_limit: RateLimit | None = None,
91
+ rate_limits: list[TokenRateLimit] | None = None,
82
92
  throttle: Throttle | None = None,
83
93
  debounce: Debounce | None = None,
84
94
  cancel_on: list[str] | None = None,
@@ -97,7 +107,10 @@ def function(
97
107
  retries: Number of retry attempts on failure (default: 3).
98
108
  timeout: Maximum execution time (default: "5m").
99
109
  concurrency: Concurrency limiting configuration.
100
- rate_limit: Rate limiting configuration.
110
+ rate_limit: Rate limiting configuration (invocations per period).
111
+ rate_limits: Per-model token-budget limits enforced pre-flight on
112
+ LLM calls (see TokenRateLimit). Prevents 429s by absorbing
113
+ back-pressure into durable step.sleep waits.
101
114
  throttle: Throttle configuration.
102
115
  debounce: Debounce configuration.
103
116
  cancel_on: List of events that cancel running instances.
@@ -156,6 +169,7 @@ def function(
156
169
  timeout=timeout,
157
170
  concurrency=concurrency,
158
171
  rate_limit=rate_limit,
172
+ rate_limits=rate_limits,
159
173
  throttle=throttle,
160
174
  debounce=debounce,
161
175
  cancel_on=cancel_on,
@@ -58,17 +58,67 @@ class StepTimeout(StepError):
58
58
  super().__init__(step_id, f"timed out after {timeout_seconds}s")
59
59
 
60
60
 
61
- class RetryableError(FlowForgeError):
61
+ class RetryableError(StepFailed):
62
62
  """
63
63
  Raised to indicate an error that should trigger a retry.
64
64
 
65
- Use this to signal that the current step should be retried,
66
- e.g., for transient network errors or rate limits.
65
+ Used for transient failures (rate limits, brief network issues). Subclasses
66
+ StepFailed so existing `except StepFailed` catchers still catch it; callers
67
+ that want retry-specific behaviour can catch RetryableError directly.
67
68
  """
68
69
 
69
- def __init__(self, message: str, retry_after: float | None = None) -> None:
70
+ def __init__(
71
+ self,
72
+ message: str = "",
73
+ *,
74
+ step_id: str = "",
75
+ retry_after: float | None = None,
76
+ attempt: int = 1,
77
+ max_attempts: int = 1,
78
+ ) -> None:
70
79
  self.retry_after = retry_after
71
- super().__init__(message)
80
+ super().__init__(step_id, message, attempt=attempt, max_attempts=max_attempts)
81
+
82
+
83
+ class RateLimited(RetryableError):
84
+ """
85
+ Raised when an LLM provider rate-limited the request and retries exhausted.
86
+
87
+ Carries enough context for callers to decide follow-up behaviour (switch
88
+ providers, surface to the user, park the run).
89
+
90
+ Aliases:
91
+ - ``self.original`` / ``self.original_error`` — both point at the
92
+ underlying provider exception (or its string form). Error payloads
93
+ serialised via ``str(e.original_error)`` therefore surface the real
94
+ root cause, not the synthesised "rate limited by …" banner.
95
+ """
96
+
97
+ def __init__(
98
+ self,
99
+ *,
100
+ step_id: str = "",
101
+ retry_after: float | None = None,
102
+ provider: str = "",
103
+ model: str = "",
104
+ original: Exception | str = "",
105
+ attempt: int = 1,
106
+ max_attempts: int = 1,
107
+ ) -> None:
108
+ self.provider = provider
109
+ self.model = model
110
+ self.original = original
111
+ # Pass `original` through StepFailed so `e.original_error` reflects
112
+ # the underlying provider failure. The exception's __str__ still
113
+ # includes a readable banner via the StepError base message.
114
+ banner = f"rate limited by {provider or 'provider'} on {model or 'model'}"
115
+ super().__init__(
116
+ original if original else banner,
117
+ step_id=step_id,
118
+ retry_after=retry_after,
119
+ attempt=attempt,
120
+ max_attempts=max_attempts,
121
+ )
72
122
 
73
123
 
74
124
  class NonRetryableError(FlowForgeError):
@@ -137,19 +137,31 @@ class ExecutionEngine:
137
137
  )
138
138
 
139
139
  except StepFailed as e:
140
- # Step failed, let server handle retry
140
+ # Step failed, let server handle retry.
141
+ # Preserve the actual exception class (StepFailed | RetryableError |
142
+ # RateLimited) so the server / dashboard can distinguish them.
143
+ error_payload: dict[str, Any] = {
144
+ "type": type(e).__name__,
145
+ "message": str(e.original_error),
146
+ "step_id": e.step_id,
147
+ "attempt": e.attempt,
148
+ "max_attempts": e.max_attempts,
149
+ "retryable": True,
150
+ "traceback": traceback.format_exc(),
151
+ }
152
+ retry_after = getattr(e, "retry_after", None)
153
+ if retry_after is not None:
154
+ error_payload["retry_after"] = retry_after
155
+ provider = getattr(e, "provider", None)
156
+ if provider:
157
+ error_payload["provider"] = provider
158
+ model = getattr(e, "model", None)
159
+ if model:
160
+ error_payload["model"] = model
141
161
  return ExecutionResult(
142
162
  status="error",
143
163
  step_id=e.step_id,
144
- error={
145
- "type": "StepFailed",
146
- "message": str(e.original_error),
147
- "step_id": e.step_id,
148
- "attempt": e.attempt,
149
- "max_attempts": e.max_attempts,
150
- "retryable": True,
151
- "traceback": traceback.format_exc(),
152
- },
164
+ error=error_payload,
153
165
  )
154
166
 
155
167
  except NonRetryableError as e:
@@ -2,19 +2,60 @@
2
2
 
3
3
  import hashlib
4
4
  import json
5
+ import os
6
+ import random
5
7
  from collections.abc import Awaitable, Callable
6
8
  from datetime import UTC, datetime, timedelta
7
9
  from typing import Any, TypeVar
8
10
 
9
11
  from flowforge.agent import AgentResult, AgentState
10
12
  from flowforge.agent_def import AgentDefinition
11
- from flowforge.exceptions import StepCompleted, StepFailed
13
+ from flowforge.exceptions import RateLimited, StepCompleted, StepFailed
12
14
  from flowforge.network import Network, NetworkResult, NetworkState, RouterContext
13
15
  from flowforge.router import LLMRouter
14
16
  from flowforge.tools import SubAgentConfig, Tool
15
17
 
16
18
  T = TypeVar("T")
17
19
 
20
+ # Defaults for LLM-call retry on rate-limit. See _resolve_num_retries /
21
+ # _retry_sleep below.
22
+ _DEFAULT_LLM_NUM_RETRIES = 5
23
+ _DEFAULT_LLM_MAX_RETRY_DELAY = 120.0
24
+ _RETRY_JITTER_RANGE = (0.8, 1.2)
25
+
26
+
27
+ def _resolve_num_retries(explicit: int | None) -> int:
28
+ """
29
+ Determine the retry budget for an LLM call.
30
+
31
+ Precedence: explicit kwarg > FLOWFORGE_LLM_NUM_RETRIES env >
32
+ LITELLM_NUM_RETRIES env (back-compat) > default 5. Clamped to >= 0.
33
+ """
34
+ if explicit is not None:
35
+ return max(0, int(explicit))
36
+ for var in ("FLOWFORGE_LLM_NUM_RETRIES", "LITELLM_NUM_RETRIES"):
37
+ raw = os.environ.get(var)
38
+ if raw is None:
39
+ continue
40
+ try:
41
+ return max(0, int(raw))
42
+ except ValueError:
43
+ continue
44
+ return _DEFAULT_LLM_NUM_RETRIES
45
+
46
+
47
+ def _retry_sleep(retry_after: float) -> float:
48
+ """Apply ±20% jitter and clamp to [1s, FLOWFORGE_LLM_MAX_RETRY_DELAY]."""
49
+ try:
50
+ max_delay = float(
51
+ os.environ.get("FLOWFORGE_LLM_MAX_RETRY_DELAY", _DEFAULT_LLM_MAX_RETRY_DELAY)
52
+ )
53
+ except ValueError:
54
+ max_delay = _DEFAULT_LLM_MAX_RETRY_DELAY
55
+ base = max(0.0, float(retry_after))
56
+ jittered = base * random.uniform(*_RETRY_JITTER_RANGE)
57
+ return max(1.0, min(jittered, max_delay))
58
+
18
59
 
19
60
  def _parse_duration(duration: str | timedelta) -> float:
20
61
  """Parse a duration string or timedelta to seconds."""
@@ -183,14 +224,25 @@ class StepManager:
183
224
  tools: list[Any] | None = None,
184
225
  tool_choice: str | dict[str, Any] = "auto",
185
226
  max_tool_calls: int = 10,
227
+ num_retries: int | None = None,
186
228
  **kwargs: Any,
187
229
  ) -> dict[str, Any]:
188
230
  """
189
- Execute an LLM call with automatic retry and cost tracking.
190
-
191
- Supports multiple providers (OpenAI, Anthropic, etc.) with
192
- unified interface. Automatically retries on rate limits
193
- and transient errors.
231
+ Execute an LLM call with durable rate-limit retry and cost tracking.
232
+
233
+ Supports multiple providers (OpenAI, Anthropic, etc.) with a unified
234
+ interface. On provider 429 (or pre-flight token-bucket exhaustion),
235
+ expands into a durable chain of ``{step_id}`` (first attempt,
236
+ rate-limited) → ``{step_id}/retry-sleep-1`` → ``{step_id}/attempt-2``
237
+ → … driven by Retry-After with ±20% jitter. The worker is freed
238
+ during each sleep. When retries exhaust, raises
239
+ :class:`flowforge.RateLimited`.
240
+
241
+ **Narrower than you might expect:** only provider 429 responses are
242
+ retried here. Non-rate-limit transient errors (timeouts, 5xx,
243
+ connection failures) propagate immediately as ``StepFailed`` and
244
+ are subject to the function-level retry policy set on
245
+ ``@flowforge.function(retries=N)``.
194
246
 
195
247
  Args:
196
248
  step_id: Unique identifier for this AI step.
@@ -204,6 +256,10 @@ class StepManager:
204
256
  tools: List of Tool objects that the LLM can call.
205
257
  tool_choice: How the LLM should choose tools ("auto", "required", "none", or specific tool).
206
258
  max_tool_calls: Maximum number of tool calls allowed in this step.
259
+ num_retries: Override the rate-limit retry budget for this call.
260
+ ``None`` (default) reads ``FLOWFORGE_LLM_NUM_RETRIES`` →
261
+ ``LITELLM_NUM_RETRIES`` → 5. ``0`` disables retry and
262
+ raises ``RateLimited`` on the first 429.
207
263
  **kwargs: Additional provider-specific parameters.
208
264
 
209
265
  Returns:
@@ -256,12 +312,9 @@ class StepManager:
256
312
  if result.get("tool_calls"):
257
313
  print(f"Tools called: {result['tool_calls']}")
258
314
  """
259
- # Check for memoized result
260
- is_memoized, result = self._get_memoized_result(step_id)
261
- if is_memoized:
262
- return result # type: ignore
315
+ num_retries = _resolve_num_retries(num_retries)
263
316
 
264
- # Build messages if prompt provided
317
+ # Build messages once (shared across attempts).
265
318
  if prompt is not None and messages is None:
266
319
  if isinstance(prompt, str):
267
320
  messages = [{"role": "user", "content": prompt}]
@@ -271,7 +324,6 @@ class StepManager:
271
324
  if messages is None:
272
325
  raise ValueError("Either 'prompt' or 'messages' must be provided")
273
326
 
274
- # Convert Tool objects to JSON-serializable OpenAI schema dicts
275
327
  tools_schema = None
276
328
  if tools:
277
329
  tools_schema = [
@@ -279,8 +331,79 @@ class StepManager:
279
331
  for t in tools
280
332
  ]
281
333
 
282
- # This will be executed by the server/executor with LLM client
283
- ai_request = {
334
+ # Durable retry loop. Each attempt is its own memoised sub-step, with
335
+ # a step.sleep between attempts so the worker is freed during the
336
+ # wait. First attempt keeps the caller's step_id (back-compat); only
337
+ # subsequent attempts get an /attempt-N suffix.
338
+ last_signal: dict[str, Any] | None = None
339
+ for attempt in range(num_retries + 1):
340
+ attempt_id = step_id if attempt == 0 else f"{step_id}/attempt-{attempt + 1}"
341
+ result = await self._ai_attempt(
342
+ attempt_id,
343
+ model=model,
344
+ messages=messages,
345
+ max_tokens=max_tokens,
346
+ temperature=temperature,
347
+ provider=provider,
348
+ use_cache=use_cache,
349
+ tools_schema=tools_schema,
350
+ tool_choice=tool_choice,
351
+ max_tool_calls=max_tool_calls,
352
+ extra_kwargs=kwargs,
353
+ )
354
+
355
+ if not (isinstance(result, dict) and result.get("__rate_limited")):
356
+ return result # type: ignore[return-value]
357
+
358
+ last_signal = result
359
+ if attempt >= num_retries:
360
+ break
361
+
362
+ retry_after = float(result.get("__retry_after") or 1.0)
363
+ sleep_s = _retry_sleep(retry_after)
364
+ await self.sleep(
365
+ f"{step_id}/retry-sleep-{attempt + 1}",
366
+ duration=f"{sleep_s:.3f}s",
367
+ )
368
+
369
+ assert last_signal is not None # loop always sets it before breaking
370
+ raise RateLimited(
371
+ step_id=step_id,
372
+ retry_after=last_signal.get("__retry_after"),
373
+ provider=str(last_signal.get("__provider") or ""),
374
+ model=str(last_signal.get("__model") or model),
375
+ original=str(last_signal.get("__error") or ""),
376
+ attempt=num_retries + 1,
377
+ max_attempts=num_retries + 1,
378
+ )
379
+
380
+ async def _ai_attempt(
381
+ self,
382
+ attempt_id: str,
383
+ *,
384
+ model: str,
385
+ messages: list[dict[str, str]],
386
+ max_tokens: int,
387
+ temperature: float,
388
+ provider: str | None,
389
+ use_cache: bool,
390
+ tools_schema: list[dict[str, Any]] | None,
391
+ tool_choice: str | dict[str, Any],
392
+ max_tool_calls: int,
393
+ extra_kwargs: dict[str, Any],
394
+ ) -> dict[str, Any]:
395
+ """
396
+ Execute a single LLM attempt.
397
+
398
+ On first call yields to the server (raises StepCompleted). On replay,
399
+ returns the memoised server response — either a normal AI response dict
400
+ or a rate-limit marker ({"__rate_limited": True, "__retry_after": ...}).
401
+ """
402
+ is_memoized, result = self._get_memoized_result(attempt_id)
403
+ if is_memoized:
404
+ return result # type: ignore[return-value]
405
+
406
+ ai_request: dict[str, Any] = {
284
407
  "type": "ai",
285
408
  "model": model,
286
409
  "messages": messages,
@@ -291,10 +414,10 @@ class StepManager:
291
414
  "tools": tools_schema,
292
415
  "tool_choice": tool_choice,
293
416
  "max_tool_calls": max_tool_calls,
294
- **kwargs,
417
+ **extra_kwargs,
295
418
  }
296
419
 
297
- raise StepCompleted(step_id=step_id, result=ai_request)
420
+ raise StepCompleted(step_id=attempt_id, result=ai_request)
298
421
 
299
422
  async def wait_for_event(
300
423
  self,
@@ -459,6 +582,7 @@ class StepManager:
459
582
  checkpoint_strategy: str = "per_tool",
460
583
  max_tool_calls: int = 50,
461
584
  temperature: float = 0.7,
585
+ num_retries: int | None = None,
462
586
  _depth: int = 0,
463
587
  _max_depth: int = 3,
464
588
  **kwargs: Any,
@@ -550,7 +674,9 @@ class StepManager:
550
674
  state.status = "max_tool_calls"
551
675
  break
552
676
 
553
- # Call LLM with current messages
677
+ # Call LLM with current messages. num_retries propagates per
678
+ # iteration — a 429 on iter-N/think only retries iter-N/think;
679
+ # earlier iterations stay memoised.
554
680
  think_step_id = f"{step_id}/iter-{state.iteration}/think"
555
681
  ai_response = await self.ai(
556
682
  think_step_id,
@@ -560,6 +686,7 @@ class StepManager:
560
686
  tools=tools,
561
687
  tool_choice="auto",
562
688
  max_tool_calls=max_tool_calls - state.tool_calls_count,
689
+ num_retries=num_retries,
563
690
  **kwargs,
564
691
  )
565
692
 
@@ -1156,6 +1283,7 @@ class _StepProxy:
1156
1283
  tools: list[Any] | None = None,
1157
1284
  tool_choice: str | dict[str, Any] = "auto",
1158
1285
  max_tool_calls: int = 10,
1286
+ num_retries: int | None = None,
1159
1287
  **kwargs: Any,
1160
1288
  ) -> dict[str, Any]:
1161
1289
  return await self._get_manager().ai(
@@ -1170,6 +1298,7 @@ class _StepProxy:
1170
1298
  tools=tools,
1171
1299
  tool_choice=tool_choice,
1172
1300
  max_tool_calls=max_tool_calls,
1301
+ num_retries=num_retries,
1173
1302
  **kwargs,
1174
1303
  )
1175
1304
 
@@ -1219,6 +1348,7 @@ class _StepProxy:
1219
1348
  checkpoint_strategy: str = "per_tool",
1220
1349
  max_tool_calls: int = 50,
1221
1350
  temperature: float = 0.7,
1351
+ num_retries: int | None = None,
1222
1352
  _depth: int = 0,
1223
1353
  _max_depth: int = 3,
1224
1354
  **kwargs: Any,
@@ -1233,6 +1363,7 @@ class _StepProxy:
1233
1363
  checkpoint_strategy=checkpoint_strategy,
1234
1364
  max_tool_calls=max_tool_calls,
1235
1365
  temperature=temperature,
1366
+ num_retries=num_retries,
1236
1367
  _depth=_depth,
1237
1368
  _max_depth=_max_depth,
1238
1369
  **kwargs,
@@ -1,57 +0,0 @@
1
- # Environment and secrets
2
- .env
3
- .env.*
4
- !.env.example
5
-
6
- # Claude Code
7
- .claude/
8
-
9
- # Python
10
- __pycache__/
11
- *.py[cod]
12
- *$py.class
13
- *.so
14
- .Python
15
- .venv/
16
- venv/
17
- ENV/
18
- *.egg-info/
19
- *.egg
20
- dist/
21
- build/
22
- .pytest_cache/
23
- .mypy_cache/
24
- .ruff_cache/
25
- *.pyo
26
- *.pyd
27
-
28
- # Node.js
29
- node_modules/
30
- .next/
31
- out/
32
- .turbo/
33
- *.tsbuildinfo
34
-
35
- # IDE
36
- .idea/
37
- .vscode/
38
- *.swp
39
- *.swo
40
- *~
41
-
42
- # OS
43
- .DS_Store
44
- Thumbs.db
45
-
46
- # Logs
47
- *.log
48
- logs/
49
-
50
- # Testing
51
- coverage/
52
- .coverage
53
- htmlcov/
54
-
55
- # Docker
56
- *.pid
57
- /.openclaude-profile.json
File without changes