arcwright-ai 0.1.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arcwright_ai/__init__.py +12 -0
- arcwright_ai/agent/__init__.py +16 -0
- arcwright_ai/agent/invoker.py +567 -0
- arcwright_ai/agent/prompt.py +59 -0
- arcwright_ai/agent/sandbox.py +181 -0
- arcwright_ai/cli/__init__.py +7 -0
- arcwright_ai/cli/app.py +29 -0
- arcwright_ai/cli/clean.py +226 -0
- arcwright_ai/cli/dispatch.py +876 -0
- arcwright_ai/cli/halt.py +700 -0
- arcwright_ai/cli/resume.py +171 -0
- arcwright_ai/cli/status.py +904 -0
- arcwright_ai/context/__init__.py +21 -0
- arcwright_ai/context/answerer.py +400 -0
- arcwright_ai/context/injector.py +517 -0
- arcwright_ai/core/__init__.py +129 -0
- arcwright_ai/core/config.py +777 -0
- arcwright_ai/core/constants.py +183 -0
- arcwright_ai/core/events.py +43 -0
- arcwright_ai/core/exceptions.py +112 -0
- arcwright_ai/core/io.py +94 -0
- arcwright_ai/core/lifecycle.py +60 -0
- arcwright_ai/core/types.py +212 -0
- arcwright_ai/engine/__init__.py +30 -0
- arcwright_ai/engine/graph.py +72 -0
- arcwright_ai/engine/nodes.py +1791 -0
- arcwright_ai/engine/state.py +91 -0
- arcwright_ai/output/__init__.py +41 -0
- arcwright_ai/output/provenance.py +250 -0
- arcwright_ai/output/run_manager.py +564 -0
- arcwright_ai/output/summary.py +939 -0
- arcwright_ai/py.typed +0 -0
- arcwright_ai/scm/__init__.py +32 -0
- arcwright_ai/scm/branch.py +793 -0
- arcwright_ai/scm/git.py +281 -0
- arcwright_ai/scm/pr.py +941 -0
- arcwright_ai/scm/worktree.py +367 -0
- arcwright_ai/validation/__init__.py +37 -0
- arcwright_ai/validation/pipeline.py +231 -0
- arcwright_ai/validation/v3_reflexion.py +426 -0
- arcwright_ai/validation/v6_invariant.py +507 -0
- arcwright_ai-0.1.1.dev0.dist-info/METADATA +325 -0
- arcwright_ai-0.1.1.dev0.dist-info/RECORD +46 -0
- arcwright_ai-0.1.1.dev0.dist-info/WHEEL +4 -0
- arcwright_ai-0.1.1.dev0.dist-info/entry_points.txt +2 -0
- arcwright_ai-0.1.1.dev0.dist-info/licenses/LICENSE +21 -0
arcwright_ai/__init__.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Arcwright AI — Deterministic orchestration shell for autonomous AI agent execution."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
__version__ = version("arcwright-ai")
|
|
9
|
+
except PackageNotFoundError: # pragma: no cover
|
|
10
|
+
__version__ = "0.0.0.dev0"
|
|
11
|
+
|
|
12
|
+
__all__ = ["__version__"]
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Agent package — Claude Code SDK integration for AI agent invocation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from arcwright_ai.agent.invoker import InvocationResult, invoke_agent
|
|
6
|
+
from arcwright_ai.agent.prompt import build_prompt
|
|
7
|
+
from arcwright_ai.agent.sandbox import PathValidator, validate_path, validate_temp_path
|
|
8
|
+
|
|
9
|
+
__all__: list[str] = [
|
|
10
|
+
"InvocationResult",
|
|
11
|
+
"PathValidator",
|
|
12
|
+
"build_prompt",
|
|
13
|
+
"invoke_agent",
|
|
14
|
+
"validate_path",
|
|
15
|
+
"validate_temp_path",
|
|
16
|
+
]
|
|
@@ -0,0 +1,567 @@
|
|
|
1
|
+
"""Agent invoker — Claude Code SDK integration for dispatching agent work."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
import os
|
|
8
|
+
import random
|
|
9
|
+
import re
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from decimal import Decimal
|
|
12
|
+
from typing import TYPE_CHECKING, Any
|
|
13
|
+
|
|
14
|
+
from arcwright_ai.core.constants import DIR_ARCWRIGHT, DIR_TMP
|
|
15
|
+
from arcwright_ai.core.exceptions import AgentError, AgentTimeoutError, SandboxViolation
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from collections.abc import AsyncGenerator, Awaitable, Callable
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
from claude_code_sdk.types import PermissionResultAllow, PermissionResultDeny
|
|
22
|
+
|
|
23
|
+
from arcwright_ai.agent.sandbox import PathValidator
|
|
24
|
+
|
|
25
|
+
__all__: list[str] = ["InvocationResult", "invoke_agent"]
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
# ---------------------------------------------------------------------------
|
|
30
|
+
# Module-level constants
|
|
31
|
+
# ---------------------------------------------------------------------------
|
|
32
|
+
|
|
33
|
+
_BACKOFF_BASE: float = 2.0
|
|
34
|
+
_BACKOFF_CAP: float = 120.0
|
|
35
|
+
_BACKOFF_MAX_RETRIES: int = 7
|
|
36
|
+
_RATE_LIMIT_RE: re.Pattern[str] = re.compile(r"rate.?limit|429|too many requests", re.IGNORECASE)
|
|
37
|
+
_FILE_WRITE_TOOLS: frozenset[str] = frozenset({"CreateFile", "Edit", "MultiEdit", "Write"})
|
|
38
|
+
|
|
39
|
+
# Flag prevents double-patching across multiple invoke_agent calls.
|
|
40
|
+
_SDK_PARSER_PATCHED: bool = False
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class _SkippedMessage:
|
|
44
|
+
"""Sentinel returned by the patched SDK parser for unrecognised message types.
|
|
45
|
+
|
|
46
|
+
Carrying the original ``type`` field lets the streaming loop detect whether
|
|
47
|
+
a ``rate_limit_event`` was silently dropped just before the claude CLI
|
|
48
|
+
exited with code 1, so we can treat that as a retryable condition.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
__slots__ = ("msg_type",)
|
|
52
|
+
|
|
53
|
+
def __init__(self, msg_type: str) -> None:
|
|
54
|
+
self.msg_type = msg_type
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# Flag prevents registering the asyncio exception handler more than once.
|
|
58
|
+
_BG_HANDLER_INSTALLED: bool = False
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _claude_meta_dir() -> Path:
|
|
62
|
+
"""Return the resolved ``~/.claude`` directory path (lazy, no import-time side-effects)."""
|
|
63
|
+
from pathlib import Path as _Path
|
|
64
|
+
|
|
65
|
+
return (_Path.home() / ".claude").resolve()
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _suppress_bg_cancel_scope_errors() -> None:
|
|
69
|
+
"""Install a one-shot asyncio exception handler to silence Python 3.14 / anyio
|
|
70
|
+
``RuntimeError: Attempted to exit cancel scope in a different task`` noise.
|
|
71
|
+
|
|
72
|
+
Python 3.14 tightened ``asyncio`` so that ``anyio`` cancel scopes cannot be
|
|
73
|
+
exited from a different task than they were entered in. The
|
|
74
|
+
``claude_code_sdk`` internal async generator cleanup path hits this edge
|
|
75
|
+
case when the iterator is abandoned (e.g. after a denied tool-use). The
|
|
76
|
+
resulting ``RuntimeError`` is surfaced only as a background
|
|
77
|
+
"Task exception was never retrieved" warning and has no effect on
|
|
78
|
+
correctness, so we suppress it here.
|
|
79
|
+
"""
|
|
80
|
+
global _BG_HANDLER_INSTALLED
|
|
81
|
+
if _BG_HANDLER_INSTALLED:
|
|
82
|
+
return
|
|
83
|
+
|
|
84
|
+
import asyncio
|
|
85
|
+
|
|
86
|
+
loop = asyncio.get_running_loop()
|
|
87
|
+
original_handler = loop.get_exception_handler()
|
|
88
|
+
|
|
89
|
+
def _handler(lp: asyncio.AbstractEventLoop, context: dict) -> None: # type: ignore[type-arg]
|
|
90
|
+
exc = context.get("exception")
|
|
91
|
+
if isinstance(exc, RuntimeError) and "cancel scope" in str(exc).lower():
|
|
92
|
+
logger.debug(
|
|
93
|
+
"agent.bg_cancel_scope_suppressed",
|
|
94
|
+
extra={"data": {"error": str(exc)}},
|
|
95
|
+
)
|
|
96
|
+
return
|
|
97
|
+
if original_handler is not None:
|
|
98
|
+
original_handler(lp, context)
|
|
99
|
+
else:
|
|
100
|
+
lp.default_exception_handler(context)
|
|
101
|
+
|
|
102
|
+
loop.set_exception_handler(_handler)
|
|
103
|
+
_BG_HANDLER_INSTALLED = True
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _patch_sdk_parser() -> None:
|
|
107
|
+
"""Monkeypatch the SDK message parser to skip unknown message types.
|
|
108
|
+
|
|
109
|
+
Claude Code SDK v0.0.25 raises ``MessageParseError`` for unrecognised
|
|
110
|
+
streaming message types (e.g. ``rate_limit_event``). This patch wraps
|
|
111
|
+
``parse_message`` so it returns ``None`` for unknown types instead of
|
|
112
|
+
raising, allowing the async generator in ``client.py`` to ``yield None``
|
|
113
|
+
which the invoker then filters out.
|
|
114
|
+
"""
|
|
115
|
+
global _SDK_PARSER_PATCHED
|
|
116
|
+
if _SDK_PARSER_PATCHED:
|
|
117
|
+
return
|
|
118
|
+
|
|
119
|
+
import claude_code_sdk._internal.client as _client_mod
|
|
120
|
+
import claude_code_sdk._internal.message_parser as _parser_mod
|
|
121
|
+
|
|
122
|
+
_original = _parser_mod.parse_message
|
|
123
|
+
|
|
124
|
+
def _safe_parse_message(data: Any) -> Any:
|
|
125
|
+
try:
|
|
126
|
+
return _original(data)
|
|
127
|
+
except Exception:
|
|
128
|
+
msg_type = data.get("type", "<unknown>") if isinstance(data, dict) else "<invalid>"
|
|
129
|
+
logger.debug("Skipping unrecognised SDK message type: %s", msg_type)
|
|
130
|
+
return _SkippedMessage(msg_type)
|
|
131
|
+
|
|
132
|
+
# Patch the name *in the client module* (where it was imported).
|
|
133
|
+
_client_mod.parse_message = _safe_parse_message # type: ignore[attr-defined]
|
|
134
|
+
_SDK_PARSER_PATCHED = True
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
# ---------------------------------------------------------------------------
|
|
138
|
+
# InvocationResult dataclass
|
|
139
|
+
# ---------------------------------------------------------------------------
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
@dataclass(frozen=True)
|
|
143
|
+
class InvocationResult:
|
|
144
|
+
"""Result of a single Claude Code SDK invocation.
|
|
145
|
+
|
|
146
|
+
Captures the agent's output text, token consumption, cost, and
|
|
147
|
+
session metadata for budget tracking and provenance.
|
|
148
|
+
|
|
149
|
+
Attributes:
|
|
150
|
+
output_text: The agent's full text output (concatenated TextBlocks).
|
|
151
|
+
tokens_input: Input tokens consumed (from SDK usage report).
|
|
152
|
+
tokens_output: Output tokens consumed (from SDK usage report).
|
|
153
|
+
total_cost: Estimated cost in USD (Decimal for exact arithmetic).
|
|
154
|
+
duration_ms: Wall-clock duration of the invocation in milliseconds.
|
|
155
|
+
session_id: SDK session identifier for debugging.
|
|
156
|
+
num_turns: Number of conversational turns in the session.
|
|
157
|
+
is_error: Whether the SDK reported an error condition.
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
output_text: str
|
|
161
|
+
tokens_input: int
|
|
162
|
+
tokens_output: int
|
|
163
|
+
total_cost: Decimal
|
|
164
|
+
duration_ms: int
|
|
165
|
+
session_id: str
|
|
166
|
+
num_turns: int
|
|
167
|
+
is_error: bool
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
# ---------------------------------------------------------------------------
|
|
171
|
+
# Internal helpers
|
|
172
|
+
# ---------------------------------------------------------------------------
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _wrap_sdk_error(error: Exception) -> AgentError:
|
|
176
|
+
"""Wrap an SDK or generic exception into the appropriate AgentError subclass.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
error: The original exception to wrap.
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
An ``AgentError`` (or appropriate subclass) preserving the original
|
|
183
|
+
message in ``details``.
|
|
184
|
+
"""
|
|
185
|
+
from claude_code_sdk._errors import ClaudeSDKError
|
|
186
|
+
|
|
187
|
+
message = str(error)
|
|
188
|
+
details: dict[str, Any] = {"original_error": message}
|
|
189
|
+
if isinstance(error, ClaudeSDKError):
|
|
190
|
+
if re.search(r"timeout", message, re.IGNORECASE):
|
|
191
|
+
return AgentTimeoutError(f"Agent session timed out: {message}", details=details)
|
|
192
|
+
return AgentError(f"SDK error: {message}", details=details)
|
|
193
|
+
return AgentError(f"Unexpected error during agent invocation: {message}", details=details)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _validate_tool_use(block: Any, sandbox: PathValidator, cwd: Path) -> None:
|
|
197
|
+
"""Validate a ToolUseBlock file path through the sandbox (defense-in-depth).
|
|
198
|
+
|
|
199
|
+
Called for every ToolUseBlock in the SDK stream after the primary
|
|
200
|
+
``can_use_tool`` callback. Raises ``SandboxViolation`` to abort the
|
|
201
|
+
invocation if a file-writing tool targets a path outside the boundary.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
block: A ``ToolUseBlock`` from the SDK message stream.
|
|
205
|
+
sandbox: The injected path validator function.
|
|
206
|
+
cwd: The working directory (sandbox boundary).
|
|
207
|
+
|
|
208
|
+
Raises:
|
|
209
|
+
SandboxViolation: If the tool targets a path outside the sandbox boundary.
|
|
210
|
+
"""
|
|
211
|
+
from pathlib import Path as _Path
|
|
212
|
+
|
|
213
|
+
if block.name in _FILE_WRITE_TOOLS:
|
|
214
|
+
file_path_str: str | None = block.input.get("file_path") or block.input.get("path")
|
|
215
|
+
if file_path_str:
|
|
216
|
+
file_path = _Path(file_path_str)
|
|
217
|
+
temp_dir = (cwd / DIR_ARCWRIGHT / DIR_TMP).resolve()
|
|
218
|
+
candidate_path = file_path if file_path.is_absolute() else cwd.resolve() / file_path
|
|
219
|
+
|
|
220
|
+
# Silently deny writes to ~/.claude/ (Claude's session-resume
|
|
221
|
+
# scratchpad). Arcwright never uses CLI resume, so the checkpoint
|
|
222
|
+
# is worthless and we don't want files accumulating in $HOME.
|
|
223
|
+
# Returning without raising lets the session continue normally.
|
|
224
|
+
if candidate_path.resolve().is_relative_to(_claude_meta_dir()):
|
|
225
|
+
logger.debug(
|
|
226
|
+
"agent.sandbox.deny_claude_meta",
|
|
227
|
+
extra={"data": {"tool": block.name, "path": str(file_path)}},
|
|
228
|
+
)
|
|
229
|
+
return
|
|
230
|
+
|
|
231
|
+
if candidate_path.resolve().is_relative_to(temp_dir):
|
|
232
|
+
temp_dir.mkdir(parents=True, exist_ok=True)
|
|
233
|
+
|
|
234
|
+
try:
|
|
235
|
+
sandbox(file_path, cwd, block.name)
|
|
236
|
+
except SandboxViolation:
|
|
237
|
+
logger.info(
|
|
238
|
+
"agent.sandbox.deny",
|
|
239
|
+
extra={
|
|
240
|
+
"data": {
|
|
241
|
+
"tool": block.name,
|
|
242
|
+
"path": str(file_path),
|
|
243
|
+
"cwd": str(cwd),
|
|
244
|
+
}
|
|
245
|
+
},
|
|
246
|
+
)
|
|
247
|
+
raise
|
|
248
|
+
|
|
249
|
+
if file_path.is_absolute() and file_path.resolve().is_relative_to(temp_dir):
|
|
250
|
+
return
|
|
251
|
+
|
|
252
|
+
normalized_parts = os.path.normpath(file_path_str).split(os.sep)
|
|
253
|
+
if (
|
|
254
|
+
normalized_parts[:3] == [".", DIR_ARCWRIGHT, DIR_TMP]
|
|
255
|
+
or normalized_parts[:2] == [DIR_ARCWRIGHT, DIR_TMP]
|
|
256
|
+
) and not candidate_path.resolve().is_relative_to(temp_dir):
|
|
257
|
+
raise SandboxViolation(
|
|
258
|
+
f"Temp files must target {temp_dir}, got: {candidate_path.resolve()}",
|
|
259
|
+
details={
|
|
260
|
+
"path": file_path_str,
|
|
261
|
+
"resolved": str(candidate_path.resolve()),
|
|
262
|
+
"expected_tmp": str(temp_dir),
|
|
263
|
+
},
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def _make_tool_validator(
|
|
268
|
+
sandbox: PathValidator,
|
|
269
|
+
cwd: Path,
|
|
270
|
+
) -> Callable[[str, dict[str, Any], Any], Awaitable[PermissionResultAllow | PermissionResultDeny]]:
|
|
271
|
+
"""Create a ``can_use_tool`` callback that enforces sandbox rules at the SDK level.
|
|
272
|
+
|
|
273
|
+
Returns an async callback compatible with ``ClaudeCodeOptions.can_use_tool``
|
|
274
|
+
that passes file-writing tool calls through the injected ``PathValidator``,
|
|
275
|
+
returning ``PermissionResultDeny`` for sandbox violations.
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
sandbox: The injected path validator.
|
|
279
|
+
cwd: The working directory (sandbox boundary).
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
An async callback that returns ``PermissionResultAllow`` for safe paths
|
|
283
|
+
and ``PermissionResultDeny`` for sandbox violations.
|
|
284
|
+
"""
|
|
285
|
+
from pathlib import Path as _Path
|
|
286
|
+
|
|
287
|
+
from claude_code_sdk.types import PermissionResultAllow, PermissionResultDeny
|
|
288
|
+
|
|
289
|
+
async def can_use_tool(
|
|
290
|
+
tool_name: str,
|
|
291
|
+
tool_input: dict[str, Any],
|
|
292
|
+
context: Any,
|
|
293
|
+
) -> PermissionResultAllow | PermissionResultDeny:
|
|
294
|
+
if tool_name in _FILE_WRITE_TOOLS:
|
|
295
|
+
file_path_str: str | None = tool_input.get("file_path") or tool_input.get("path")
|
|
296
|
+
if file_path_str:
|
|
297
|
+
file_path = _Path(file_path_str)
|
|
298
|
+
temp_dir = (cwd / DIR_ARCWRIGHT / DIR_TMP).resolve()
|
|
299
|
+
candidate_path = file_path if file_path.is_absolute() else cwd.resolve() / file_path
|
|
300
|
+
|
|
301
|
+
# Silently deny writes to ~/.claude/ (Claude's session-resume
|
|
302
|
+
# scratchpad). Arcwright never uses CLI resume, so the plan
|
|
303
|
+
# checkpoint is worthless and we don't want files accumulating
|
|
304
|
+
# outside the project. Deny is silent — the session continues.
|
|
305
|
+
if candidate_path.resolve().is_relative_to(_claude_meta_dir()):
|
|
306
|
+
logger.debug(
|
|
307
|
+
"agent.sandbox.deny_claude_meta",
|
|
308
|
+
extra={"data": {"tool": tool_name, "path": file_path_str}},
|
|
309
|
+
)
|
|
310
|
+
return PermissionResultDeny(
|
|
311
|
+
message="~/.claude/ writes are not permitted; Arcwright does not use Claude session-resume."
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
if candidate_path.resolve().is_relative_to(temp_dir):
|
|
315
|
+
temp_dir.mkdir(parents=True, exist_ok=True)
|
|
316
|
+
|
|
317
|
+
try:
|
|
318
|
+
sandbox(file_path, cwd, tool_name)
|
|
319
|
+
except SandboxViolation as exc:
|
|
320
|
+
logger.info(
|
|
321
|
+
"agent.sandbox.deny",
|
|
322
|
+
extra={
|
|
323
|
+
"data": {
|
|
324
|
+
"tool": tool_name,
|
|
325
|
+
"path": file_path_str,
|
|
326
|
+
"cwd": str(cwd),
|
|
327
|
+
"reason": str(exc),
|
|
328
|
+
}
|
|
329
|
+
},
|
|
330
|
+
)
|
|
331
|
+
return PermissionResultDeny(message=str(exc))
|
|
332
|
+
|
|
333
|
+
if file_path.is_absolute() and file_path.resolve().is_relative_to(temp_dir):
|
|
334
|
+
return PermissionResultAllow()
|
|
335
|
+
|
|
336
|
+
normalized_parts = os.path.normpath(file_path_str).split(os.sep)
|
|
337
|
+
if (
|
|
338
|
+
normalized_parts[:3] == [".", DIR_ARCWRIGHT, DIR_TMP]
|
|
339
|
+
or normalized_parts[:2] == [DIR_ARCWRIGHT, DIR_TMP]
|
|
340
|
+
) and not candidate_path.resolve().is_relative_to(temp_dir):
|
|
341
|
+
return PermissionResultDeny(
|
|
342
|
+
message=f"Temp files must target {temp_dir}, got: {candidate_path.resolve()}"
|
|
343
|
+
)
|
|
344
|
+
return PermissionResultAllow()
|
|
345
|
+
|
|
346
|
+
return can_use_tool
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
async def _invoke_with_backoff(
|
|
350
|
+
prompt: str,
|
|
351
|
+
options: Any,
|
|
352
|
+
) -> AsyncGenerator[Any, None]:
|
|
353
|
+
"""Invoke the SDK with exponential backoff on rate limit errors.
|
|
354
|
+
|
|
355
|
+
Calls ``claude_code_sdk.query()`` and re-yields all messages. On rate
|
|
356
|
+
limit errors (detected via regex on the error message) it sleeps with
|
|
357
|
+
exponential backoff and jitter before retrying, up to
|
|
358
|
+
``_BACKOFF_MAX_RETRIES`` attempts.
|
|
359
|
+
|
|
360
|
+
When ``can_use_tool`` is set on options, the SDK requires the prompt
|
|
361
|
+
to be an ``AsyncIterable`` (streaming mode). We wrap the plain string
|
|
362
|
+
into a single-message async iterable to satisfy this contract.
|
|
363
|
+
|
|
364
|
+
Args:
|
|
365
|
+
prompt: The prompt string to pass to the SDK.
|
|
366
|
+
options: A ``ClaudeCodeOptions`` instance.
|
|
367
|
+
|
|
368
|
+
Yields:
|
|
369
|
+
Typed SDK message objects as yielded by ``claude_code_sdk.query()``.
|
|
370
|
+
|
|
371
|
+
Raises:
|
|
372
|
+
AgentError: On non-rate-limit SDK errors or when max retries is exhausted.
|
|
373
|
+
"""
|
|
374
|
+
from claude_code_sdk import query as sdk_query
|
|
375
|
+
from claude_code_sdk._errors import ClaudeSDKError, MessageParseError
|
|
376
|
+
|
|
377
|
+
# Ensure the SDK parser tolerates unknown message types (e.g.
|
|
378
|
+
# rate_limit_event in v0.0.25) before we start streaming.
|
|
379
|
+
_patch_sdk_parser()
|
|
380
|
+
|
|
381
|
+
# SDK requires AsyncIterable prompt when can_use_tool is configured
|
|
382
|
+
needs_streaming = getattr(options, "can_use_tool", None) is not None
|
|
383
|
+
|
|
384
|
+
for attempt in range(_BACKOFF_MAX_RETRIES):
|
|
385
|
+
saw_rate_limit_event: bool = False
|
|
386
|
+
try:
|
|
387
|
+
|
|
388
|
+
async def _prompt_stream() -> AsyncGenerator[dict[str, Any], None]:
|
|
389
|
+
yield {
|
|
390
|
+
"type": "user",
|
|
391
|
+
"message": {"role": "user", "content": prompt},
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
sdk_prompt: str | AsyncGenerator[dict[str, Any], None] = _prompt_stream() if needs_streaming else prompt
|
|
395
|
+
async for message in sdk_query(prompt=sdk_prompt, options=options):
|
|
396
|
+
if isinstance(message, _SkippedMessage):
|
|
397
|
+
# Patched parse_message returned a sentinel for an unknown
|
|
398
|
+
# message type. Track rate_limit_event specifically so we
|
|
399
|
+
# can retry if the process then exits with code 1.
|
|
400
|
+
if message.msg_type == "rate_limit_event":
|
|
401
|
+
saw_rate_limit_event = True
|
|
402
|
+
continue
|
|
403
|
+
yield message
|
|
404
|
+
return
|
|
405
|
+
except MessageParseError as exc:
|
|
406
|
+
# SDK v0.0.25 doesn't handle some streaming message types
|
|
407
|
+
# (e.g. rate_limit_event). These are informational — log and
|
|
408
|
+
# retry so the agent can continue on the next attempt.
|
|
409
|
+
error_detail = str(exc)
|
|
410
|
+
logger.info(
|
|
411
|
+
"agent.sdk_parse_error",
|
|
412
|
+
extra={
|
|
413
|
+
"data": {
|
|
414
|
+
"attempt": attempt + 1,
|
|
415
|
+
"error": error_detail,
|
|
416
|
+
}
|
|
417
|
+
},
|
|
418
|
+
)
|
|
419
|
+
wait = min(
|
|
420
|
+
_BACKOFF_BASE * (2**attempt) + random.uniform(0, 0.5),
|
|
421
|
+
_BACKOFF_CAP,
|
|
422
|
+
)
|
|
423
|
+
await asyncio.sleep(wait)
|
|
424
|
+
except ClaudeSDKError as exc:
|
|
425
|
+
sdk_error_detail: str = str(exc)
|
|
426
|
+
stderr: str | None = getattr(exc, "stderr", None)
|
|
427
|
+
exit_code: int | None = getattr(exc, "exit_code", None)
|
|
428
|
+
if stderr:
|
|
429
|
+
sdk_error_detail = f"{sdk_error_detail} | stderr={stderr}"
|
|
430
|
+
is_rate_limit = _RATE_LIMIT_RE.search(sdk_error_detail) or (saw_rate_limit_event and exit_code == 1)
|
|
431
|
+
if is_rate_limit:
|
|
432
|
+
wait = min(
|
|
433
|
+
_BACKOFF_BASE * (2**attempt) + random.uniform(0, 0.5),
|
|
434
|
+
_BACKOFF_CAP,
|
|
435
|
+
)
|
|
436
|
+
logger.info(
|
|
437
|
+
"agent.rate_limit",
|
|
438
|
+
extra={
|
|
439
|
+
"data": {
|
|
440
|
+
"attempt": attempt + 1,
|
|
441
|
+
"wait_seconds": round(wait, 2),
|
|
442
|
+
"error": sdk_error_detail,
|
|
443
|
+
"exit_code": exit_code,
|
|
444
|
+
"triggered_by": "rate_limit_event" if saw_rate_limit_event else "error_pattern",
|
|
445
|
+
}
|
|
446
|
+
},
|
|
447
|
+
)
|
|
448
|
+
await asyncio.sleep(wait)
|
|
449
|
+
else:
|
|
450
|
+
raise _wrap_sdk_error(exc) from exc
|
|
451
|
+
|
|
452
|
+
raise AgentError(
|
|
453
|
+
"Rate limit: max retries exhausted",
|
|
454
|
+
details={"attempts": _BACKOFF_MAX_RETRIES},
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
# ---------------------------------------------------------------------------
|
|
459
|
+
# Public API
|
|
460
|
+
# ---------------------------------------------------------------------------
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
async def invoke_agent(
|
|
464
|
+
prompt: str,
|
|
465
|
+
*,
|
|
466
|
+
model: str,
|
|
467
|
+
cwd: Path,
|
|
468
|
+
sandbox: PathValidator,
|
|
469
|
+
max_turns: int | None = None,
|
|
470
|
+
) -> InvocationResult:
|
|
471
|
+
"""Invoke Claude Code SDK to execute a story implementation.
|
|
472
|
+
|
|
473
|
+
Calls the SDK's ``query()`` async iterator, processes streaming messages,
|
|
474
|
+
validates file operations through the injected sandbox, and captures
|
|
475
|
+
token usage for budget tracking. Each invocation is stateless — no
|
|
476
|
+
persistent agent state is shared between calls.
|
|
477
|
+
|
|
478
|
+
Args:
|
|
479
|
+
prompt: The assembled prompt string from ``build_prompt()``.
|
|
480
|
+
model: Claude model version identifier
|
|
481
|
+
(e.g., ``"claude-sonnet-4-20250514"``).
|
|
482
|
+
cwd: Working directory for agent file operations (typically the
|
|
483
|
+
worktree path). Also serves as the sandbox boundary.
|
|
484
|
+
sandbox: Path validator function (``PathValidator`` protocol) for
|
|
485
|
+
sandbox enforcement via dependency injection.
|
|
486
|
+
max_turns: Optional maximum number of conversational turns.
|
|
487
|
+
|
|
488
|
+
Returns:
|
|
489
|
+
``InvocationResult`` containing agent output, token usage, cost,
|
|
490
|
+
and session metadata.
|
|
491
|
+
|
|
492
|
+
Raises:
|
|
493
|
+
AgentError: On SDK invocation failure (network, process crash,
|
|
494
|
+
malformed response), or when rate limit max retries is exhausted.
|
|
495
|
+
AgentTimeoutError: On SDK timeout.
|
|
496
|
+
SandboxViolation: If the agent attempts a file operation outside
|
|
497
|
+
the sandbox boundary.
|
|
498
|
+
"""
|
|
499
|
+
from claude_code_sdk import ClaudeCodeOptions, query # noqa: F401
|
|
500
|
+
from claude_code_sdk.types import AssistantMessage, ResultMessage, TextBlock, ToolUseBlock
|
|
501
|
+
|
|
502
|
+
# Suppress Python 3.14 / anyio cancel-scope RuntimeErrors emitted as
|
|
503
|
+
# unhandled background-task warnings during async generator cleanup.
|
|
504
|
+
_suppress_bg_cancel_scope_errors()
|
|
505
|
+
|
|
506
|
+
options = ClaudeCodeOptions(
|
|
507
|
+
model=model,
|
|
508
|
+
cwd=str(cwd),
|
|
509
|
+
permission_mode="bypassPermissions",
|
|
510
|
+
max_turns=max_turns,
|
|
511
|
+
can_use_tool=_make_tool_validator(sandbox, cwd),
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
output_parts: list[str] = []
|
|
515
|
+
result_message: ResultMessage | None = None
|
|
516
|
+
|
|
517
|
+
stream = _invoke_with_backoff(prompt, options)
|
|
518
|
+
try:
|
|
519
|
+
async for message in stream:
|
|
520
|
+
if isinstance(message, AssistantMessage):
|
|
521
|
+
for block in message.content:
|
|
522
|
+
if isinstance(block, TextBlock):
|
|
523
|
+
output_parts.append(block.text)
|
|
524
|
+
elif isinstance(block, ToolUseBlock):
|
|
525
|
+
_validate_tool_use(block, sandbox, cwd)
|
|
526
|
+
elif isinstance(message, ResultMessage):
|
|
527
|
+
result_message = message
|
|
528
|
+
except AgentError:
|
|
529
|
+
raise
|
|
530
|
+
except Exception as exc:
|
|
531
|
+
raise _wrap_sdk_error(exc) from exc
|
|
532
|
+
finally:
|
|
533
|
+
await stream.aclose()
|
|
534
|
+
|
|
535
|
+
if result_message is None:
|
|
536
|
+
raise AgentError(
|
|
537
|
+
"SDK stream ended without ResultMessage",
|
|
538
|
+
details={"prompt_length": len(prompt)},
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
usage: dict[str, Any] = result_message.usage or {}
|
|
542
|
+
tokens_input: int = int(usage.get("input_tokens", 0))
|
|
543
|
+
tokens_output: int = int(usage.get("output_tokens", 0))
|
|
544
|
+
cost_float: float = result_message.total_cost_usd or 0.0
|
|
545
|
+
|
|
546
|
+
logger.info(
|
|
547
|
+
"agent.response",
|
|
548
|
+
extra={
|
|
549
|
+
"data": {
|
|
550
|
+
"tokens_input": tokens_input,
|
|
551
|
+
"tokens_output": tokens_output,
|
|
552
|
+
"cost_usd": str(round(cost_float, 6)),
|
|
553
|
+
"session_id": result_message.session_id,
|
|
554
|
+
}
|
|
555
|
+
},
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
return InvocationResult(
|
|
559
|
+
output_text="".join(output_parts),
|
|
560
|
+
tokens_input=tokens_input,
|
|
561
|
+
tokens_output=tokens_output,
|
|
562
|
+
total_cost=Decimal(str(cost_float)),
|
|
563
|
+
duration_ms=result_message.duration_ms,
|
|
564
|
+
session_id=result_message.session_id,
|
|
565
|
+
num_turns=result_message.num_turns,
|
|
566
|
+
is_error=result_message.is_error,
|
|
567
|
+
)
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""Agent prompt — Prompt construction and context assembly for agent invocation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from arcwright_ai.core.types import ContextBundle
|
|
9
|
+
from arcwright_ai.validation.v3_reflexion import ReflexionFeedback
|
|
10
|
+
|
|
11
|
+
__all__: list[str] = ["build_prompt"]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def build_prompt(bundle: ContextBundle, *, feedback: ReflexionFeedback | None = None) -> str:
|
|
15
|
+
"""Assemble an SDK prompt string from a ContextBundle.
|
|
16
|
+
|
|
17
|
+
Formats the bundle's story content, resolved requirements, architecture
|
|
18
|
+
excerpts, and project conventions into a structured prompt with clearly
|
|
19
|
+
delineated markdown sections. Sections are only included when the
|
|
20
|
+
corresponding bundle field is non-empty. When ``feedback`` is provided and
|
|
21
|
+
the feedback indicates failure, a ``## Previous Validation Feedback``
|
|
22
|
+
section is appended with the failing criteria and suggested fixes.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
bundle: The assembled context payload from the preflight node.
|
|
26
|
+
feedback: Optional reflexion feedback from a previous validation
|
|
27
|
+
attempt. Appended to the prompt only when feedback is not None
|
|
28
|
+
and feedback.passed is False.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
A formatted prompt string ready for ``claude_code_sdk.query()``.
|
|
32
|
+
"""
|
|
33
|
+
parts: list[str] = [f"## Story\n\n{bundle.story_content}"]
|
|
34
|
+
|
|
35
|
+
if bundle.domain_requirements:
|
|
36
|
+
parts.append(f"## Requirements\n\n{bundle.domain_requirements}")
|
|
37
|
+
|
|
38
|
+
if bundle.architecture_sections:
|
|
39
|
+
parts.append(f"## Architecture\n\n{bundle.architecture_sections}")
|
|
40
|
+
|
|
41
|
+
if bundle.answerer_rules:
|
|
42
|
+
parts.append(f"## Project Conventions\n\n{bundle.answerer_rules}")
|
|
43
|
+
|
|
44
|
+
if feedback is not None and not feedback.passed:
|
|
45
|
+
feedback_lines: list[str] = [
|
|
46
|
+
"## Previous Validation Feedback",
|
|
47
|
+
"",
|
|
48
|
+
f"**Attempt {feedback.attempt_number} failed.** The following acceptance criteria were NOT met:",
|
|
49
|
+
"",
|
|
50
|
+
]
|
|
51
|
+
for ac_id in feedback.unmet_criteria:
|
|
52
|
+
detail = feedback.feedback_per_criterion.get(ac_id, "No details provided")
|
|
53
|
+
feedback_lines.append(f"### AC {ac_id}")
|
|
54
|
+
feedback_lines.append(detail)
|
|
55
|
+
feedback_lines.append("")
|
|
56
|
+
feedback_lines.append("**Fix all unmet criteria above before completing this story.**")
|
|
57
|
+
parts.append("\n".join(feedback_lines))
|
|
58
|
+
|
|
59
|
+
return "\n\n".join(parts)
|