arcwright-ai 0.1.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. arcwright_ai/__init__.py +12 -0
  2. arcwright_ai/agent/__init__.py +16 -0
  3. arcwright_ai/agent/invoker.py +567 -0
  4. arcwright_ai/agent/prompt.py +59 -0
  5. arcwright_ai/agent/sandbox.py +181 -0
  6. arcwright_ai/cli/__init__.py +7 -0
  7. arcwright_ai/cli/app.py +29 -0
  8. arcwright_ai/cli/clean.py +226 -0
  9. arcwright_ai/cli/dispatch.py +876 -0
  10. arcwright_ai/cli/halt.py +700 -0
  11. arcwright_ai/cli/resume.py +171 -0
  12. arcwright_ai/cli/status.py +904 -0
  13. arcwright_ai/context/__init__.py +21 -0
  14. arcwright_ai/context/answerer.py +400 -0
  15. arcwright_ai/context/injector.py +517 -0
  16. arcwright_ai/core/__init__.py +129 -0
  17. arcwright_ai/core/config.py +777 -0
  18. arcwright_ai/core/constants.py +183 -0
  19. arcwright_ai/core/events.py +43 -0
  20. arcwright_ai/core/exceptions.py +112 -0
  21. arcwright_ai/core/io.py +94 -0
  22. arcwright_ai/core/lifecycle.py +60 -0
  23. arcwright_ai/core/types.py +212 -0
  24. arcwright_ai/engine/__init__.py +30 -0
  25. arcwright_ai/engine/graph.py +72 -0
  26. arcwright_ai/engine/nodes.py +1791 -0
  27. arcwright_ai/engine/state.py +91 -0
  28. arcwright_ai/output/__init__.py +41 -0
  29. arcwright_ai/output/provenance.py +250 -0
  30. arcwright_ai/output/run_manager.py +564 -0
  31. arcwright_ai/output/summary.py +939 -0
  32. arcwright_ai/py.typed +0 -0
  33. arcwright_ai/scm/__init__.py +32 -0
  34. arcwright_ai/scm/branch.py +793 -0
  35. arcwright_ai/scm/git.py +281 -0
  36. arcwright_ai/scm/pr.py +941 -0
  37. arcwright_ai/scm/worktree.py +367 -0
  38. arcwright_ai/validation/__init__.py +37 -0
  39. arcwright_ai/validation/pipeline.py +231 -0
  40. arcwright_ai/validation/v3_reflexion.py +426 -0
  41. arcwright_ai/validation/v6_invariant.py +507 -0
  42. arcwright_ai-0.1.1.dev0.dist-info/METADATA +325 -0
  43. arcwright_ai-0.1.1.dev0.dist-info/RECORD +46 -0
  44. arcwright_ai-0.1.1.dev0.dist-info/WHEEL +4 -0
  45. arcwright_ai-0.1.1.dev0.dist-info/entry_points.txt +2 -0
  46. arcwright_ai-0.1.1.dev0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,12 @@
1
+ """Arcwright AI — Deterministic orchestration shell for autonomous AI agent execution."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from importlib.metadata import PackageNotFoundError, version
6
+
7
+ try:
8
+ __version__ = version("arcwright-ai")
9
+ except PackageNotFoundError: # pragma: no cover
10
+ __version__ = "0.0.0.dev0"
11
+
12
+ __all__ = ["__version__"]
@@ -0,0 +1,16 @@
1
+ """Agent package — Claude Code SDK integration for AI agent invocation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from arcwright_ai.agent.invoker import InvocationResult, invoke_agent
6
+ from arcwright_ai.agent.prompt import build_prompt
7
+ from arcwright_ai.agent.sandbox import PathValidator, validate_path, validate_temp_path
8
+
9
+ __all__: list[str] = [
10
+ "InvocationResult",
11
+ "PathValidator",
12
+ "build_prompt",
13
+ "invoke_agent",
14
+ "validate_path",
15
+ "validate_temp_path",
16
+ ]
@@ -0,0 +1,567 @@
1
+ """Agent invoker — Claude Code SDK integration for dispatching agent work."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ import os
8
+ import random
9
+ import re
10
+ from dataclasses import dataclass
11
+ from decimal import Decimal
12
+ from typing import TYPE_CHECKING, Any
13
+
14
+ from arcwright_ai.core.constants import DIR_ARCWRIGHT, DIR_TMP
15
+ from arcwright_ai.core.exceptions import AgentError, AgentTimeoutError, SandboxViolation
16
+
17
+ if TYPE_CHECKING:
18
+ from collections.abc import AsyncGenerator, Awaitable, Callable
19
+ from pathlib import Path
20
+
21
+ from claude_code_sdk.types import PermissionResultAllow, PermissionResultDeny
22
+
23
+ from arcwright_ai.agent.sandbox import PathValidator
24
+
25
+ __all__: list[str] = ["InvocationResult", "invoke_agent"]
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+ # ---------------------------------------------------------------------------
30
+ # Module-level constants
31
+ # ---------------------------------------------------------------------------
32
+
33
+ _BACKOFF_BASE: float = 2.0
34
+ _BACKOFF_CAP: float = 120.0
35
+ _BACKOFF_MAX_RETRIES: int = 7
36
+ _RATE_LIMIT_RE: re.Pattern[str] = re.compile(r"rate.?limit|429|too many requests", re.IGNORECASE)
37
+ _FILE_WRITE_TOOLS: frozenset[str] = frozenset({"CreateFile", "Edit", "MultiEdit", "Write"})
38
+
39
+ # Flag prevents double-patching across multiple invoke_agent calls.
40
+ _SDK_PARSER_PATCHED: bool = False
41
+
42
+
43
+ class _SkippedMessage:
44
+ """Sentinel returned by the patched SDK parser for unrecognised message types.
45
+
46
+ Carrying the original ``type`` field lets the streaming loop detect whether
47
+ a ``rate_limit_event`` was silently dropped just before the claude CLI
48
+ exited with code 1, so we can treat that as a retryable condition.
49
+ """
50
+
51
+ __slots__ = ("msg_type",)
52
+
53
+ def __init__(self, msg_type: str) -> None:
54
+ self.msg_type = msg_type
55
+
56
+
57
+ # Flag prevents registering the asyncio exception handler more than once.
58
+ _BG_HANDLER_INSTALLED: bool = False
59
+
60
+
61
+ def _claude_meta_dir() -> Path:
62
+ """Return the resolved ``~/.claude`` directory path (lazy, no import-time side-effects)."""
63
+ from pathlib import Path as _Path
64
+
65
+ return (_Path.home() / ".claude").resolve()
66
+
67
+
68
+ def _suppress_bg_cancel_scope_errors() -> None:
69
+ """Install a one-shot asyncio exception handler to silence Python 3.14 / anyio
70
+ ``RuntimeError: Attempted to exit cancel scope in a different task`` noise.
71
+
72
+ Python 3.14 tightened ``asyncio`` so that ``anyio`` cancel scopes cannot be
73
+ exited from a different task than they were entered in. The
74
+ ``claude_code_sdk`` internal async generator cleanup path hits this edge
75
+ case when the iterator is abandoned (e.g. after a denied tool-use). The
76
+ resulting ``RuntimeError`` is surfaced only as a background
77
+ "Task exception was never retrieved" warning and has no effect on
78
+ correctness, so we suppress it here.
79
+ """
80
+ global _BG_HANDLER_INSTALLED
81
+ if _BG_HANDLER_INSTALLED:
82
+ return
83
+
84
+ import asyncio
85
+
86
+ loop = asyncio.get_running_loop()
87
+ original_handler = loop.get_exception_handler()
88
+
89
+ def _handler(lp: asyncio.AbstractEventLoop, context: dict) -> None: # type: ignore[type-arg]
90
+ exc = context.get("exception")
91
+ if isinstance(exc, RuntimeError) and "cancel scope" in str(exc).lower():
92
+ logger.debug(
93
+ "agent.bg_cancel_scope_suppressed",
94
+ extra={"data": {"error": str(exc)}},
95
+ )
96
+ return
97
+ if original_handler is not None:
98
+ original_handler(lp, context)
99
+ else:
100
+ lp.default_exception_handler(context)
101
+
102
+ loop.set_exception_handler(_handler)
103
+ _BG_HANDLER_INSTALLED = True
104
+
105
+
106
+ def _patch_sdk_parser() -> None:
107
+ """Monkeypatch the SDK message parser to skip unknown message types.
108
+
109
+ Claude Code SDK v0.0.25 raises ``MessageParseError`` for unrecognised
110
+ streaming message types (e.g. ``rate_limit_event``). This patch wraps
111
+ ``parse_message`` so it returns ``None`` for unknown types instead of
112
+ raising, allowing the async generator in ``client.py`` to ``yield None``
113
+ which the invoker then filters out.
114
+ """
115
+ global _SDK_PARSER_PATCHED
116
+ if _SDK_PARSER_PATCHED:
117
+ return
118
+
119
+ import claude_code_sdk._internal.client as _client_mod
120
+ import claude_code_sdk._internal.message_parser as _parser_mod
121
+
122
+ _original = _parser_mod.parse_message
123
+
124
+ def _safe_parse_message(data: Any) -> Any:
125
+ try:
126
+ return _original(data)
127
+ except Exception:
128
+ msg_type = data.get("type", "<unknown>") if isinstance(data, dict) else "<invalid>"
129
+ logger.debug("Skipping unrecognised SDK message type: %s", msg_type)
130
+ return _SkippedMessage(msg_type)
131
+
132
+ # Patch the name *in the client module* (where it was imported).
133
+ _client_mod.parse_message = _safe_parse_message # type: ignore[attr-defined]
134
+ _SDK_PARSER_PATCHED = True
135
+
136
+
137
+ # ---------------------------------------------------------------------------
138
+ # InvocationResult dataclass
139
+ # ---------------------------------------------------------------------------
140
+
141
+
142
+ @dataclass(frozen=True)
143
+ class InvocationResult:
144
+ """Result of a single Claude Code SDK invocation.
145
+
146
+ Captures the agent's output text, token consumption, cost, and
147
+ session metadata for budget tracking and provenance.
148
+
149
+ Attributes:
150
+ output_text: The agent's full text output (concatenated TextBlocks).
151
+ tokens_input: Input tokens consumed (from SDK usage report).
152
+ tokens_output: Output tokens consumed (from SDK usage report).
153
+ total_cost: Estimated cost in USD (Decimal for exact arithmetic).
154
+ duration_ms: Wall-clock duration of the invocation in milliseconds.
155
+ session_id: SDK session identifier for debugging.
156
+ num_turns: Number of conversational turns in the session.
157
+ is_error: Whether the SDK reported an error condition.
158
+ """
159
+
160
+ output_text: str
161
+ tokens_input: int
162
+ tokens_output: int
163
+ total_cost: Decimal
164
+ duration_ms: int
165
+ session_id: str
166
+ num_turns: int
167
+ is_error: bool
168
+
169
+
170
+ # ---------------------------------------------------------------------------
171
+ # Internal helpers
172
+ # ---------------------------------------------------------------------------
173
+
174
+
175
+ def _wrap_sdk_error(error: Exception) -> AgentError:
176
+ """Wrap an SDK or generic exception into the appropriate AgentError subclass.
177
+
178
+ Args:
179
+ error: The original exception to wrap.
180
+
181
+ Returns:
182
+ An ``AgentError`` (or appropriate subclass) preserving the original
183
+ message in ``details``.
184
+ """
185
+ from claude_code_sdk._errors import ClaudeSDKError
186
+
187
+ message = str(error)
188
+ details: dict[str, Any] = {"original_error": message}
189
+ if isinstance(error, ClaudeSDKError):
190
+ if re.search(r"timeout", message, re.IGNORECASE):
191
+ return AgentTimeoutError(f"Agent session timed out: {message}", details=details)
192
+ return AgentError(f"SDK error: {message}", details=details)
193
+ return AgentError(f"Unexpected error during agent invocation: {message}", details=details)
194
+
195
+
196
+ def _validate_tool_use(block: Any, sandbox: PathValidator, cwd: Path) -> None:
197
+ """Validate a ToolUseBlock file path through the sandbox (defense-in-depth).
198
+
199
+ Called for every ToolUseBlock in the SDK stream after the primary
200
+ ``can_use_tool`` callback. Raises ``SandboxViolation`` to abort the
201
+ invocation if a file-writing tool targets a path outside the boundary.
202
+
203
+ Args:
204
+ block: A ``ToolUseBlock`` from the SDK message stream.
205
+ sandbox: The injected path validator function.
206
+ cwd: The working directory (sandbox boundary).
207
+
208
+ Raises:
209
+ SandboxViolation: If the tool targets a path outside the sandbox boundary.
210
+ """
211
+ from pathlib import Path as _Path
212
+
213
+ if block.name in _FILE_WRITE_TOOLS:
214
+ file_path_str: str | None = block.input.get("file_path") or block.input.get("path")
215
+ if file_path_str:
216
+ file_path = _Path(file_path_str)
217
+ temp_dir = (cwd / DIR_ARCWRIGHT / DIR_TMP).resolve()
218
+ candidate_path = file_path if file_path.is_absolute() else cwd.resolve() / file_path
219
+
220
+ # Silently deny writes to ~/.claude/ (Claude's session-resume
221
+ # scratchpad). Arcwright never uses CLI resume, so the checkpoint
222
+ # is worthless and we don't want files accumulating in $HOME.
223
+ # Returning without raising lets the session continue normally.
224
+ if candidate_path.resolve().is_relative_to(_claude_meta_dir()):
225
+ logger.debug(
226
+ "agent.sandbox.deny_claude_meta",
227
+ extra={"data": {"tool": block.name, "path": str(file_path)}},
228
+ )
229
+ return
230
+
231
+ if candidate_path.resolve().is_relative_to(temp_dir):
232
+ temp_dir.mkdir(parents=True, exist_ok=True)
233
+
234
+ try:
235
+ sandbox(file_path, cwd, block.name)
236
+ except SandboxViolation:
237
+ logger.info(
238
+ "agent.sandbox.deny",
239
+ extra={
240
+ "data": {
241
+ "tool": block.name,
242
+ "path": str(file_path),
243
+ "cwd": str(cwd),
244
+ }
245
+ },
246
+ )
247
+ raise
248
+
249
+ if file_path.is_absolute() and file_path.resolve().is_relative_to(temp_dir):
250
+ return
251
+
252
+ normalized_parts = os.path.normpath(file_path_str).split(os.sep)
253
+ if (
254
+ normalized_parts[:3] == [".", DIR_ARCWRIGHT, DIR_TMP]
255
+ or normalized_parts[:2] == [DIR_ARCWRIGHT, DIR_TMP]
256
+ ) and not candidate_path.resolve().is_relative_to(temp_dir):
257
+ raise SandboxViolation(
258
+ f"Temp files must target {temp_dir}, got: {candidate_path.resolve()}",
259
+ details={
260
+ "path": file_path_str,
261
+ "resolved": str(candidate_path.resolve()),
262
+ "expected_tmp": str(temp_dir),
263
+ },
264
+ )
265
+
266
+
267
+ def _make_tool_validator(
268
+ sandbox: PathValidator,
269
+ cwd: Path,
270
+ ) -> Callable[[str, dict[str, Any], Any], Awaitable[PermissionResultAllow | PermissionResultDeny]]:
271
+ """Create a ``can_use_tool`` callback that enforces sandbox rules at the SDK level.
272
+
273
+ Returns an async callback compatible with ``ClaudeCodeOptions.can_use_tool``
274
+ that passes file-writing tool calls through the injected ``PathValidator``,
275
+ returning ``PermissionResultDeny`` for sandbox violations.
276
+
277
+ Args:
278
+ sandbox: The injected path validator.
279
+ cwd: The working directory (sandbox boundary).
280
+
281
+ Returns:
282
+ An async callback that returns ``PermissionResultAllow`` for safe paths
283
+ and ``PermissionResultDeny`` for sandbox violations.
284
+ """
285
+ from pathlib import Path as _Path
286
+
287
+ from claude_code_sdk.types import PermissionResultAllow, PermissionResultDeny
288
+
289
+ async def can_use_tool(
290
+ tool_name: str,
291
+ tool_input: dict[str, Any],
292
+ context: Any,
293
+ ) -> PermissionResultAllow | PermissionResultDeny:
294
+ if tool_name in _FILE_WRITE_TOOLS:
295
+ file_path_str: str | None = tool_input.get("file_path") or tool_input.get("path")
296
+ if file_path_str:
297
+ file_path = _Path(file_path_str)
298
+ temp_dir = (cwd / DIR_ARCWRIGHT / DIR_TMP).resolve()
299
+ candidate_path = file_path if file_path.is_absolute() else cwd.resolve() / file_path
300
+
301
+ # Silently deny writes to ~/.claude/ (Claude's session-resume
302
+ # scratchpad). Arcwright never uses CLI resume, so the plan
303
+ # checkpoint is worthless and we don't want files accumulating
304
+ # outside the project. Deny is silent — the session continues.
305
+ if candidate_path.resolve().is_relative_to(_claude_meta_dir()):
306
+ logger.debug(
307
+ "agent.sandbox.deny_claude_meta",
308
+ extra={"data": {"tool": tool_name, "path": file_path_str}},
309
+ )
310
+ return PermissionResultDeny(
311
+ message="~/.claude/ writes are not permitted; Arcwright does not use Claude session-resume."
312
+ )
313
+
314
+ if candidate_path.resolve().is_relative_to(temp_dir):
315
+ temp_dir.mkdir(parents=True, exist_ok=True)
316
+
317
+ try:
318
+ sandbox(file_path, cwd, tool_name)
319
+ except SandboxViolation as exc:
320
+ logger.info(
321
+ "agent.sandbox.deny",
322
+ extra={
323
+ "data": {
324
+ "tool": tool_name,
325
+ "path": file_path_str,
326
+ "cwd": str(cwd),
327
+ "reason": str(exc),
328
+ }
329
+ },
330
+ )
331
+ return PermissionResultDeny(message=str(exc))
332
+
333
+ if file_path.is_absolute() and file_path.resolve().is_relative_to(temp_dir):
334
+ return PermissionResultAllow()
335
+
336
+ normalized_parts = os.path.normpath(file_path_str).split(os.sep)
337
+ if (
338
+ normalized_parts[:3] == [".", DIR_ARCWRIGHT, DIR_TMP]
339
+ or normalized_parts[:2] == [DIR_ARCWRIGHT, DIR_TMP]
340
+ ) and not candidate_path.resolve().is_relative_to(temp_dir):
341
+ return PermissionResultDeny(
342
+ message=f"Temp files must target {temp_dir}, got: {candidate_path.resolve()}"
343
+ )
344
+ return PermissionResultAllow()
345
+
346
+ return can_use_tool
347
+
348
+
349
+ async def _invoke_with_backoff(
350
+ prompt: str,
351
+ options: Any,
352
+ ) -> AsyncGenerator[Any, None]:
353
+ """Invoke the SDK with exponential backoff on rate limit errors.
354
+
355
+ Calls ``claude_code_sdk.query()`` and re-yields all messages. On rate
356
+ limit errors (detected via regex on the error message) it sleeps with
357
+ exponential backoff and jitter before retrying, up to
358
+ ``_BACKOFF_MAX_RETRIES`` attempts.
359
+
360
+ When ``can_use_tool`` is set on options, the SDK requires the prompt
361
+ to be an ``AsyncIterable`` (streaming mode). We wrap the plain string
362
+ into a single-message async iterable to satisfy this contract.
363
+
364
+ Args:
365
+ prompt: The prompt string to pass to the SDK.
366
+ options: A ``ClaudeCodeOptions`` instance.
367
+
368
+ Yields:
369
+ Typed SDK message objects as yielded by ``claude_code_sdk.query()``.
370
+
371
+ Raises:
372
+ AgentError: On non-rate-limit SDK errors or when max retries is exhausted.
373
+ """
374
+ from claude_code_sdk import query as sdk_query
375
+ from claude_code_sdk._errors import ClaudeSDKError, MessageParseError
376
+
377
+ # Ensure the SDK parser tolerates unknown message types (e.g.
378
+ # rate_limit_event in v0.0.25) before we start streaming.
379
+ _patch_sdk_parser()
380
+
381
+ # SDK requires AsyncIterable prompt when can_use_tool is configured
382
+ needs_streaming = getattr(options, "can_use_tool", None) is not None
383
+
384
+ for attempt in range(_BACKOFF_MAX_RETRIES):
385
+ saw_rate_limit_event: bool = False
386
+ try:
387
+
388
+ async def _prompt_stream() -> AsyncGenerator[dict[str, Any], None]:
389
+ yield {
390
+ "type": "user",
391
+ "message": {"role": "user", "content": prompt},
392
+ }
393
+
394
+ sdk_prompt: str | AsyncGenerator[dict[str, Any], None] = _prompt_stream() if needs_streaming else prompt
395
+ async for message in sdk_query(prompt=sdk_prompt, options=options):
396
+ if isinstance(message, _SkippedMessage):
397
+ # Patched parse_message returned a sentinel for an unknown
398
+ # message type. Track rate_limit_event specifically so we
399
+ # can retry if the process then exits with code 1.
400
+ if message.msg_type == "rate_limit_event":
401
+ saw_rate_limit_event = True
402
+ continue
403
+ yield message
404
+ return
405
+ except MessageParseError as exc:
406
+ # SDK v0.0.25 doesn't handle some streaming message types
407
+ # (e.g. rate_limit_event). These are informational — log and
408
+ # retry so the agent can continue on the next attempt.
409
+ error_detail = str(exc)
410
+ logger.info(
411
+ "agent.sdk_parse_error",
412
+ extra={
413
+ "data": {
414
+ "attempt": attempt + 1,
415
+ "error": error_detail,
416
+ }
417
+ },
418
+ )
419
+ wait = min(
420
+ _BACKOFF_BASE * (2**attempt) + random.uniform(0, 0.5),
421
+ _BACKOFF_CAP,
422
+ )
423
+ await asyncio.sleep(wait)
424
+ except ClaudeSDKError as exc:
425
+ sdk_error_detail: str = str(exc)
426
+ stderr: str | None = getattr(exc, "stderr", None)
427
+ exit_code: int | None = getattr(exc, "exit_code", None)
428
+ if stderr:
429
+ sdk_error_detail = f"{sdk_error_detail} | stderr={stderr}"
430
+ is_rate_limit = _RATE_LIMIT_RE.search(sdk_error_detail) or (saw_rate_limit_event and exit_code == 1)
431
+ if is_rate_limit:
432
+ wait = min(
433
+ _BACKOFF_BASE * (2**attempt) + random.uniform(0, 0.5),
434
+ _BACKOFF_CAP,
435
+ )
436
+ logger.info(
437
+ "agent.rate_limit",
438
+ extra={
439
+ "data": {
440
+ "attempt": attempt + 1,
441
+ "wait_seconds": round(wait, 2),
442
+ "error": sdk_error_detail,
443
+ "exit_code": exit_code,
444
+ "triggered_by": "rate_limit_event" if saw_rate_limit_event else "error_pattern",
445
+ }
446
+ },
447
+ )
448
+ await asyncio.sleep(wait)
449
+ else:
450
+ raise _wrap_sdk_error(exc) from exc
451
+
452
+ raise AgentError(
453
+ "Rate limit: max retries exhausted",
454
+ details={"attempts": _BACKOFF_MAX_RETRIES},
455
+ )
456
+
457
+
458
+ # ---------------------------------------------------------------------------
459
+ # Public API
460
+ # ---------------------------------------------------------------------------
461
+
462
+
463
+ async def invoke_agent(
464
+ prompt: str,
465
+ *,
466
+ model: str,
467
+ cwd: Path,
468
+ sandbox: PathValidator,
469
+ max_turns: int | None = None,
470
+ ) -> InvocationResult:
471
+ """Invoke Claude Code SDK to execute a story implementation.
472
+
473
+ Calls the SDK's ``query()`` async iterator, processes streaming messages,
474
+ validates file operations through the injected sandbox, and captures
475
+ token usage for budget tracking. Each invocation is stateless — no
476
+ persistent agent state is shared between calls.
477
+
478
+ Args:
479
+ prompt: The assembled prompt string from ``build_prompt()``.
480
+ model: Claude model version identifier
481
+ (e.g., ``"claude-sonnet-4-20250514"``).
482
+ cwd: Working directory for agent file operations (typically the
483
+ worktree path). Also serves as the sandbox boundary.
484
+ sandbox: Path validator function (``PathValidator`` protocol) for
485
+ sandbox enforcement via dependency injection.
486
+ max_turns: Optional maximum number of conversational turns.
487
+
488
+ Returns:
489
+ ``InvocationResult`` containing agent output, token usage, cost,
490
+ and session metadata.
491
+
492
+ Raises:
493
+ AgentError: On SDK invocation failure (network, process crash,
494
+ malformed response), or when rate limit max retries is exhausted.
495
+ AgentTimeoutError: On SDK timeout.
496
+ SandboxViolation: If the agent attempts a file operation outside
497
+ the sandbox boundary.
498
+ """
499
+ from claude_code_sdk import ClaudeCodeOptions, query # noqa: F401
500
+ from claude_code_sdk.types import AssistantMessage, ResultMessage, TextBlock, ToolUseBlock
501
+
502
+ # Suppress Python 3.14 / anyio cancel-scope RuntimeErrors emitted as
503
+ # unhandled background-task warnings during async generator cleanup.
504
+ _suppress_bg_cancel_scope_errors()
505
+
506
+ options = ClaudeCodeOptions(
507
+ model=model,
508
+ cwd=str(cwd),
509
+ permission_mode="bypassPermissions",
510
+ max_turns=max_turns,
511
+ can_use_tool=_make_tool_validator(sandbox, cwd),
512
+ )
513
+
514
+ output_parts: list[str] = []
515
+ result_message: ResultMessage | None = None
516
+
517
+ stream = _invoke_with_backoff(prompt, options)
518
+ try:
519
+ async for message in stream:
520
+ if isinstance(message, AssistantMessage):
521
+ for block in message.content:
522
+ if isinstance(block, TextBlock):
523
+ output_parts.append(block.text)
524
+ elif isinstance(block, ToolUseBlock):
525
+ _validate_tool_use(block, sandbox, cwd)
526
+ elif isinstance(message, ResultMessage):
527
+ result_message = message
528
+ except AgentError:
529
+ raise
530
+ except Exception as exc:
531
+ raise _wrap_sdk_error(exc) from exc
532
+ finally:
533
+ await stream.aclose()
534
+
535
+ if result_message is None:
536
+ raise AgentError(
537
+ "SDK stream ended without ResultMessage",
538
+ details={"prompt_length": len(prompt)},
539
+ )
540
+
541
+ usage: dict[str, Any] = result_message.usage or {}
542
+ tokens_input: int = int(usage.get("input_tokens", 0))
543
+ tokens_output: int = int(usage.get("output_tokens", 0))
544
+ cost_float: float = result_message.total_cost_usd or 0.0
545
+
546
+ logger.info(
547
+ "agent.response",
548
+ extra={
549
+ "data": {
550
+ "tokens_input": tokens_input,
551
+ "tokens_output": tokens_output,
552
+ "cost_usd": str(round(cost_float, 6)),
553
+ "session_id": result_message.session_id,
554
+ }
555
+ },
556
+ )
557
+
558
+ return InvocationResult(
559
+ output_text="".join(output_parts),
560
+ tokens_input=tokens_input,
561
+ tokens_output=tokens_output,
562
+ total_cost=Decimal(str(cost_float)),
563
+ duration_ms=result_message.duration_ms,
564
+ session_id=result_message.session_id,
565
+ num_turns=result_message.num_turns,
566
+ is_error=result_message.is_error,
567
+ )
@@ -0,0 +1,59 @@
1
+ """Agent prompt — Prompt construction and context assembly for agent invocation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ if TYPE_CHECKING:
8
+ from arcwright_ai.core.types import ContextBundle
9
+ from arcwright_ai.validation.v3_reflexion import ReflexionFeedback
10
+
11
+ __all__: list[str] = ["build_prompt"]
12
+
13
+
14
+ def build_prompt(bundle: ContextBundle, *, feedback: ReflexionFeedback | None = None) -> str:
15
+ """Assemble an SDK prompt string from a ContextBundle.
16
+
17
+ Formats the bundle's story content, resolved requirements, architecture
18
+ excerpts, and project conventions into a structured prompt with clearly
19
+ delineated markdown sections. Sections are only included when the
20
+ corresponding bundle field is non-empty. When ``feedback`` is provided and
21
+ the feedback indicates failure, a ``## Previous Validation Feedback``
22
+ section is appended with the failing criteria and suggested fixes.
23
+
24
+ Args:
25
+ bundle: The assembled context payload from the preflight node.
26
+ feedback: Optional reflexion feedback from a previous validation
27
+ attempt. Appended to the prompt only when feedback is not None
28
+ and feedback.passed is False.
29
+
30
+ Returns:
31
+ A formatted prompt string ready for ``claude_code_sdk.query()``.
32
+ """
33
+ parts: list[str] = [f"## Story\n\n{bundle.story_content}"]
34
+
35
+ if bundle.domain_requirements:
36
+ parts.append(f"## Requirements\n\n{bundle.domain_requirements}")
37
+
38
+ if bundle.architecture_sections:
39
+ parts.append(f"## Architecture\n\n{bundle.architecture_sections}")
40
+
41
+ if bundle.answerer_rules:
42
+ parts.append(f"## Project Conventions\n\n{bundle.answerer_rules}")
43
+
44
+ if feedback is not None and not feedback.passed:
45
+ feedback_lines: list[str] = [
46
+ "## Previous Validation Feedback",
47
+ "",
48
+ f"**Attempt {feedback.attempt_number} failed.** The following acceptance criteria were NOT met:",
49
+ "",
50
+ ]
51
+ for ac_id in feedback.unmet_criteria:
52
+ detail = feedback.feedback_per_criterion.get(ac_id, "No details provided")
53
+ feedback_lines.append(f"### AC {ac_id}")
54
+ feedback_lines.append(detail)
55
+ feedback_lines.append("")
56
+ feedback_lines.append("**Fix all unmet criteria above before completing this story.**")
57
+ parts.append("\n".join(feedback_lines))
58
+
59
+ return "\n\n".join(parts)