@hegemonart/get-design-done 1.20.0 → 1.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/.claude-plugin/marketplace.json +9 -12
  2. package/.claude-plugin/plugin.json +8 -31
  3. package/CHANGELOG.md +200 -0
  4. package/README.md +48 -7
  5. package/bin/gdd-sdk +55 -0
  6. package/hooks/_hook-emit.js +81 -0
  7. package/hooks/gdd-bash-guard.js +8 -0
  8. package/hooks/gdd-decision-injector.js +2 -0
  9. package/hooks/gdd-protected-paths.js +8 -0
  10. package/hooks/gdd-trajectory-capture.js +64 -0
  11. package/hooks/hooks.json +9 -0
  12. package/package.json +19 -47
  13. package/reference/codex-tools.md +53 -0
  14. package/reference/gemini-tools.md +53 -0
  15. package/reference/registry.json +14 -0
  16. package/scripts/cli/gdd-events.mjs +283 -0
  17. package/scripts/e2e/run-headless.ts +514 -0
  18. package/scripts/lib/cli/commands/audit.ts +382 -0
  19. package/scripts/lib/cli/commands/init.ts +217 -0
  20. package/scripts/lib/cli/commands/query.ts +329 -0
  21. package/scripts/lib/cli/commands/run.ts +656 -0
  22. package/scripts/lib/cli/commands/stage.ts +468 -0
  23. package/scripts/lib/cli/index.ts +167 -0
  24. package/scripts/lib/cli/parse-args.ts +336 -0
  25. package/scripts/lib/connection-probe/index.cjs +263 -0
  26. package/scripts/lib/context-engine/index.ts +116 -0
  27. package/scripts/lib/context-engine/manifest.ts +69 -0
  28. package/scripts/lib/context-engine/truncate.ts +282 -0
  29. package/scripts/lib/context-engine/types.ts +59 -0
  30. package/scripts/lib/discuss-parallel-runner/aggregator.ts +448 -0
  31. package/scripts/lib/discuss-parallel-runner/discussants.ts +430 -0
  32. package/scripts/lib/discuss-parallel-runner/index.ts +223 -0
  33. package/scripts/lib/discuss-parallel-runner/types.ts +184 -0
  34. package/scripts/lib/event-chain.cjs +177 -0
  35. package/scripts/lib/event-stream/index.ts +31 -1
  36. package/scripts/lib/event-stream/reader.ts +139 -0
  37. package/scripts/lib/event-stream/types.ts +155 -1
  38. package/scripts/lib/event-stream/writer.ts +65 -8
  39. package/scripts/lib/explore-parallel-runner/index.ts +294 -0
  40. package/scripts/lib/explore-parallel-runner/mappers.ts +290 -0
  41. package/scripts/lib/explore-parallel-runner/synthesizer.ts +295 -0
  42. package/scripts/lib/explore-parallel-runner/types.ts +139 -0
  43. package/scripts/lib/harness/detect.ts +90 -0
  44. package/scripts/lib/harness/index.ts +64 -0
  45. package/scripts/lib/harness/tool-map.ts +142 -0
  46. package/scripts/lib/init-runner/index.ts +396 -0
  47. package/scripts/lib/init-runner/researchers.ts +245 -0
  48. package/scripts/lib/init-runner/scaffold.ts +224 -0
  49. package/scripts/lib/init-runner/synthesizer.ts +224 -0
  50. package/scripts/lib/init-runner/types.ts +143 -0
  51. package/scripts/lib/logger/index.ts +251 -0
  52. package/scripts/lib/logger/sinks.ts +269 -0
  53. package/scripts/lib/logger/types.ts +110 -0
  54. package/scripts/lib/pipeline-runner/human-gate.ts +134 -0
  55. package/scripts/lib/pipeline-runner/index.ts +527 -0
  56. package/scripts/lib/pipeline-runner/stage-handlers.ts +339 -0
  57. package/scripts/lib/pipeline-runner/state-machine.ts +144 -0
  58. package/scripts/lib/pipeline-runner/types.ts +183 -0
  59. package/scripts/lib/redact.cjs +122 -0
  60. package/scripts/lib/session-runner/errors.ts +406 -0
  61. package/scripts/lib/session-runner/index.ts +715 -0
  62. package/scripts/lib/session-runner/transcript.ts +189 -0
  63. package/scripts/lib/session-runner/types.ts +144 -0
  64. package/scripts/lib/tool-scoping/index.ts +219 -0
  65. package/scripts/lib/tool-scoping/parse-agent-tools.ts +207 -0
  66. package/scripts/lib/tool-scoping/stage-scopes.ts +139 -0
  67. package/scripts/lib/tool-scoping/types.ts +77 -0
  68. package/scripts/lib/trajectory/index.cjs +126 -0
  69. package/scripts/lib/transports/ws.cjs +179 -0
@@ -0,0 +1,715 @@
1
+ // scripts/lib/session-runner/index.ts — Phase 21 headless Agent SDK
2
+ // wrapper (Plan 21-01, SDK-13).
3
+ //
4
+ // Public surface:
5
+ //
6
+ // run(opts: SessionRunnerOptions): Promise<SessionResult>
7
+ //
8
+ // This is the ONLY point at which the repo should import
9
+ // `@anthropic-ai/claude-agent-sdk`. Every other Phase-21 runner
10
+ // (pipeline, explore, discuss, init) spawns sessions via `run()` so
11
+ // policy (budget, turn-cap, sanitizer, rate-guard, retry-once) is
12
+ // enforced in exactly one place.
13
+ //
14
+ // Contract highlights:
15
+ //
16
+ // * NEVER throws. Every failure mode becomes `SessionResult.status !==
17
+ // 'completed'` with `SessionResult.error` populated.
18
+ // * Prompt sanitizer runs BEFORE every SDK invocation (including
19
+ // retries). Sanitizer diagnostics ride on `SessionResult.sanitizer`.
20
+ // * Budget caps (USD + both token dims) are SESSION-TOTAL; retries
21
+ // share the envelope.
22
+ // * Retry-once fires only when `mapSdkError(err).retryable === true`.
23
+ // * Rate-guard is consulted pre-flight; response headers on chunks are
24
+ // ingested mid-session for cross-session cooperation.
25
+ // * Two events: `session.started` (always) + `session.completed`
26
+ // (always; payload status mirrors SessionResult.status). Optional
27
+ // `session.budget_exceeded` emitted when the budget trips.
28
+
29
+ import { appendEvent } from '../event-stream/index.ts';
30
+ import type { BaseEvent } from '../event-stream/index.ts';
31
+ import { sanitize as defaultSanitize } from '../prompt-sanitizer/index.ts';
32
+
33
+ import { mapSdkError } from './errors.ts';
34
+ import { TranscriptWriter, type TranscriptChunk } from './transcript.ts';
35
+ import type {
36
+ BudgetCap,
37
+ SessionResult,
38
+ SessionRunnerOptions,
39
+ TurnCap,
40
+ } from './types.ts';
41
+
42
+ // Re-exports — consumers import only from this file.
43
+ export type { BudgetCap, SessionRunnerOptions, SessionResult, TurnCap } from './types.ts';
44
+ export { mapSdkError } from './errors.ts';
45
+ export { TranscriptWriter } from './transcript.ts';
46
+
47
+ // CommonJS primitives — `.cjs` files loaded via createRequire. See
48
+ // errors.ts for the full rationale; same pattern here. We resolve paths
49
+ // against a repo-root anchor discovered at module load time so the
50
+ // session-runner survives tests that chdir into sandboxes.
51
+ import { createRequire } from 'node:module';
52
+ import { existsSync } from 'node:fs';
53
+ import { dirname as _dirname, join as _join, resolve as _resolve } from 'node:path';
54
+ function _findRepoRoot(): string {
55
+ let dir = process.cwd();
56
+ for (let i = 0; i < 8; i++) {
57
+ if (existsSync(_join(dir, 'package.json'))) return dir;
58
+ const parent = _dirname(dir);
59
+ if (parent === dir) break;
60
+ dir = parent;
61
+ }
62
+ return process.cwd();
63
+ }
64
+ const _REPO_ROOT = _findRepoRoot();
65
+ const _nodeRequire = createRequire(_join(_REPO_ROOT, 'package.json'));
66
+ const jitteredBackoff = _nodeRequire(
67
+ _resolve(_REPO_ROOT, 'scripts/lib/jittered-backoff.cjs'),
68
+ ) as {
69
+ delayMs: (attempt: number, opts?: { baseMs?: number; maxMs?: number; factor?: number; jitter?: number }) => number;
70
+ };
71
+ const rateGuard = _nodeRequire(
72
+ _resolve(_REPO_ROOT, 'scripts/lib/rate-guard.cjs'),
73
+ ) as {
74
+ remaining: (provider: string) => {
75
+ provider: string;
76
+ remaining: number;
77
+ resetAt: string;
78
+ updatedAt: string;
79
+ } | null;
80
+ ingestHeaders: (provider: string, headers: unknown) => Promise<unknown>;
81
+ };
82
+
83
+ /** Rate-guard provider key for the Anthropic Agent SDK. */
84
+ const RATE_GUARD_PROVIDER = 'anthropic';
85
+
86
+ /** Default retries (first attempt + 1 retry). */
87
+ const DEFAULT_MAX_RETRIES = 2;
88
+
89
+ /** Baseline retry backoff parameters (matches jittered-backoff defaults for
90
+ * the SDK-retry case; 1s base → 30s cap). */
91
+ const RETRY_BACKOFF = { baseMs: 1000, maxMs: 30_000 } as const;
92
+
93
+ /**
94
+ * Per-million-token USD rates. Unknown models default to the Sonnet
95
+ * rate (safer overestimate — we'd rather cap early than under-bill).
96
+ */
97
+ const MODEL_RATES: Readonly<Record<string, { input: number; output: number }>> = Object.freeze({
98
+ 'claude-opus-4-7': { input: 15, output: 75 },
99
+ 'claude-sonnet-4-5': { input: 3, output: 15 },
100
+ 'claude-haiku-4-5': { input: 0.8, output: 4 },
101
+ });
102
+ const DEFAULT_MODEL_RATE = Object.freeze({ input: 3, output: 15 });
103
+
104
+ /** Resolve a per-M-token rate for a model name, matching prefix when possible. */
105
+ function rateFor(modelName: string | null): { input: number; output: number } {
106
+ if (modelName === null || modelName === '') return DEFAULT_MODEL_RATE;
107
+ // Direct match first.
108
+ const direct = MODEL_RATES[modelName];
109
+ if (direct !== undefined) return direct;
110
+ // Prefix match (e.g. "claude-opus-4-7-20250101" → "claude-opus-4-7").
111
+ for (const key of Object.keys(MODEL_RATES)) {
112
+ if (modelName.startsWith(key)) {
113
+ const hit = MODEL_RATES[key];
114
+ if (hit !== undefined) return hit;
115
+ }
116
+ }
117
+ return DEFAULT_MODEL_RATE;
118
+ }
119
+
120
+ /** Compute USD cost from accumulated input + output tokens. */
121
+ function usdCost(inputTokens: number, outputTokens: number, modelName: string | null): number {
122
+ const r = rateFor(modelName);
123
+ return (inputTokens / 1_000_000) * r.input + (outputTokens / 1_000_000) * r.output;
124
+ }
125
+
126
+ /** Build a stable session identifier. */
127
+ function buildSessionId(stage: string): string {
128
+ return `gdd-session-${new Date().toISOString()}-${process.pid}-${stage}`;
129
+ }
130
+
131
+ /** Shape of a message chunk we care about. The SDK exports many types;
132
+ * this structural type captures just the fields the run-loop touches.
133
+ * Unknown fields are ignored safely. */
134
+ interface ChunkShape {
135
+ type?: string;
136
+ stop_reason?: string | null;
137
+ model?: string;
138
+ message?: {
139
+ stop_reason?: string | null;
140
+ model?: string;
141
+ usage?: { input_tokens?: number; output_tokens?: number };
142
+ content?: Array<{ type?: string; text?: string; name?: string; input?: unknown }>;
143
+ };
144
+ content?: Array<{ type?: string; text?: string; name?: string; input?: unknown; tool_use_id?: string; is_error?: boolean }>;
145
+ usage?: { input_tokens?: number; output_tokens?: number };
146
+ headers?: unknown;
147
+ rate_limit?: unknown;
148
+ subtype?: string;
149
+ // Tool event fields (SDK may emit tool_use / tool_result at top level).
150
+ name?: string;
151
+ input?: unknown;
152
+ tool_use_id?: string;
153
+ is_error?: boolean;
154
+ result?: unknown;
155
+ error?: unknown;
156
+ }
157
+
158
+ /** Narrow `unknown` to our structural ChunkShape when it's object-like. */
159
+ function asChunk(raw: unknown): ChunkShape {
160
+ if (raw === null || raw === undefined || typeof raw !== 'object') return {};
161
+ return raw as ChunkShape;
162
+ }
163
+
164
+ /** Transcript type inference from SDK chunk shape. */
165
+ function chunkKind(ch: ChunkShape): TranscriptChunk['type'] {
166
+ const t = ch.type ?? '';
167
+ switch (t) {
168
+ case 'user':
169
+ return 'user';
170
+ case 'assistant':
171
+ return 'assistant';
172
+ case 'tool_use':
173
+ return 'tool_use';
174
+ case 'tool_result':
175
+ return 'tool_result';
176
+ case 'system':
177
+ return 'system';
178
+ case 'usage':
179
+ return 'usage';
180
+ default:
181
+ // SDK's "result" / control frames land here — classify as system.
182
+ return 'system';
183
+ }
184
+ }
185
+
186
+ /**
187
+ * Try to extract rate-limit headers from a chunk. The SDK exposes them on
188
+ * `chunk.headers` or `chunk.rate_limit` depending on subtype. Returns
189
+ * `null` when nothing usable is present.
190
+ */
191
+ function extractHeaders(ch: ChunkShape): unknown {
192
+ if (ch.headers !== undefined && ch.headers !== null) return ch.headers;
193
+ if (ch.rate_limit !== undefined && ch.rate_limit !== null) return ch.rate_limit;
194
+ return null;
195
+ }
196
+
197
+ /**
198
+ * Accumulate usage numbers from a chunk onto a running total. Handles
199
+ * both top-level `usage` and nested `message.usage` shapes.
200
+ */
201
+ function foldUsage(
202
+ acc: { input: number; output: number; model: string | null },
203
+ ch: ChunkShape,
204
+ ): void {
205
+ const fromTop = ch.usage;
206
+ if (fromTop !== undefined) {
207
+ if (typeof fromTop.input_tokens === 'number' && Number.isFinite(fromTop.input_tokens)) {
208
+ acc.input += fromTop.input_tokens;
209
+ }
210
+ if (typeof fromTop.output_tokens === 'number' && Number.isFinite(fromTop.output_tokens)) {
211
+ acc.output += fromTop.output_tokens;
212
+ }
213
+ }
214
+ const fromMsg = ch.message?.usage;
215
+ if (fromMsg !== undefined) {
216
+ if (typeof fromMsg.input_tokens === 'number' && Number.isFinite(fromMsg.input_tokens)) {
217
+ acc.input += fromMsg.input_tokens;
218
+ }
219
+ if (typeof fromMsg.output_tokens === 'number' && Number.isFinite(fromMsg.output_tokens)) {
220
+ acc.output += fromMsg.output_tokens;
221
+ }
222
+ }
223
+ if (acc.model === null) {
224
+ const candidate = ch.model ?? ch.message?.model ?? null;
225
+ if (candidate !== null && candidate !== '') acc.model = candidate;
226
+ }
227
+ }
228
+
229
+ /** Detect an end-of-turn marker. Covers both top-level and nested forms. */
230
+ function isTurnStop(ch: ChunkShape): boolean {
231
+ if (ch.stop_reason !== undefined && ch.stop_reason !== null && ch.stop_reason !== '') {
232
+ return true;
233
+ }
234
+ const inner = ch.message?.stop_reason;
235
+ if (inner !== undefined && inner !== null && inner !== '') return true;
236
+ return false;
237
+ }
238
+
239
+ /** Collect tool_use chunks into the SessionResult's tool_calls array. */
240
+ function collectToolUse(
241
+ ch: ChunkShape,
242
+ toolCalls: SessionResult['tool_calls'],
243
+ ): void {
244
+ if (ch.type === 'tool_use') {
245
+ toolCalls.push({ name: ch.name ?? '', input: ch.input ?? null });
246
+ return;
247
+ }
248
+ // The SDK nests tool_use inside `content` blocks. Depending on the
249
+ // chunk subtype it may land at the top level (`ch.content`) or one
250
+ // level deeper (`ch.message.content`); check both.
251
+ const topContent = ch.content;
252
+ if (Array.isArray(topContent)) {
253
+ for (const block of topContent) {
254
+ if (block !== null && typeof block === 'object' && block.type === 'tool_use') {
255
+ toolCalls.push({ name: block.name ?? '', input: block.input ?? null });
256
+ }
257
+ }
258
+ }
259
+ const innerContent = ch.message?.content;
260
+ if (Array.isArray(innerContent)) {
261
+ for (const block of innerContent) {
262
+ if (block !== null && typeof block === 'object' && block.type === 'tool_use') {
263
+ toolCalls.push({ name: block.name ?? '', input: block.input ?? null });
264
+ }
265
+ }
266
+ }
267
+ }
268
+
269
+ /**
270
+ * Collect the final assistant text from `assistant`/`message.content[].text`
271
+ * blocks. We keep the LAST non-empty string we see — that matches the
272
+ * Agent SDK's convention where the final reply lands in the last assistant
273
+ * turn before the terminal `stop_reason`.
274
+ */
275
+ function updateFinalText(ch: ChunkShape, currentFinal: string | undefined): string | undefined {
276
+ // Top-level assistant text blocks.
277
+ const topContent = ch.content;
278
+ if (Array.isArray(topContent)) {
279
+ for (const block of topContent) {
280
+ if (block !== null && typeof block === 'object' && block.type === 'text' && typeof block.text === 'string' && block.text.length > 0) {
281
+ currentFinal = block.text;
282
+ }
283
+ }
284
+ }
285
+ // Nested message.content[].text.
286
+ const innerContent = ch.message?.content;
287
+ if (Array.isArray(innerContent)) {
288
+ for (const block of innerContent) {
289
+ if (block !== null && typeof block === 'object' && block.type === 'text' && typeof block.text === 'string' && block.text.length > 0) {
290
+ currentFinal = block.text;
291
+ }
292
+ }
293
+ }
294
+ return currentFinal;
295
+ }
296
+
297
+ /** Emit a session event via the shared appendEvent() surface. */
298
+ function emit(
299
+ type: 'session.started' | 'session.completed' | 'session.budget_exceeded',
300
+ stage: SessionRunnerOptions['stage'],
301
+ sessionId: string,
302
+ payload: Record<string, unknown>,
303
+ ): void {
304
+ const ev: BaseEvent = {
305
+ type,
306
+ timestamp: new Date().toISOString(),
307
+ sessionId,
308
+ payload,
309
+ };
310
+ // The event-stream `Stage` union is narrower than the runner's stage
311
+ // union (no 'init' / 'custom'); only stamp it when it's a Stage.
312
+ if (stage !== 'init' && stage !== 'custom') {
313
+ ev.stage = stage;
314
+ }
315
+ try {
316
+ appendEvent(ev);
317
+ } catch {
318
+ // appendEvent is persist-first + broadcast-second; persist never
319
+ // throws. Any throw here would come from a bus subscriber — we
320
+ // swallow because a broken observer must not fail the session.
321
+ }
322
+ }
323
+
324
+ /** Run-loop result for a single attempt. */
325
+ interface AttemptOutcome {
326
+ /** `null` when the attempt completed naturally; populated on error / cap. */
327
+ terminal: SessionResult['status'] | null;
328
+ error: SessionResult['error'];
329
+ backoff_hint_ms: number;
330
+ retryable: boolean;
331
+ }
332
+
333
+ /**
334
+ * Spawn one headless Agent SDK session. See the module header comment
335
+ * for the full contract. Never throws; check `SessionResult.status` to
336
+ * distinguish outcomes.
337
+ */
338
+ export async function run(opts: SessionRunnerOptions): Promise<SessionResult> {
339
+ // -- 1. Sanitize prompt first. ------------------------------------------
340
+ const sanitizer = opts.sanitizeOverride ?? defaultSanitize;
341
+ const sanResult = sanitizer(opts.prompt);
342
+ const sanitizedPrompt = sanResult.sanitized;
343
+
344
+ // -- 2. Resolve transcript path + open writer. --------------------------
345
+ const transcriptPath = TranscriptWriter.pathFor(opts.stage, opts.transcriptDir);
346
+ const transcript = new TranscriptWriter(transcriptPath);
347
+
348
+ // -- 3. Seed result accumulator. ----------------------------------------
349
+ const sessionId = buildSessionId(opts.stage);
350
+ const toolCalls: SessionResult['tool_calls'] = [];
351
+ const usage = { input: 0, output: 0, model: null as string | null };
352
+ let turns = 0;
353
+ let finalText: string | undefined;
354
+
355
+ // -- 4. Emit session.started. -------------------------------------------
356
+ emit('session.started', opts.stage, sessionId, {
357
+ stage: opts.stage,
358
+ sessionId,
359
+ allowedTools: opts.allowedTools ?? [],
360
+ budget: { ...opts.budget },
361
+ turnCap: { ...opts.turnCap },
362
+ transcript_path: transcriptPath,
363
+ });
364
+
365
+ // -- 5. Rate-guard pre-flight. ------------------------------------------
366
+ const preflight = rateGuard.remaining(RATE_GUARD_PROVIDER);
367
+ if (preflight !== null && preflight.remaining <= 0) {
368
+ const result = buildResult({
369
+ status: 'error',
370
+ transcriptPath,
371
+ turns,
372
+ usage,
373
+ toolCalls,
374
+ finalText,
375
+ sanitizer: sanResult,
376
+ error: {
377
+ code: 'RATE_LIMITED',
378
+ message: `rate-guard reports 0 remaining for ${RATE_GUARD_PROVIDER} until ${preflight.resetAt}`,
379
+ kind: 'state_conflict',
380
+ context: { provider: RATE_GUARD_PROVIDER, resetAt: preflight.resetAt },
381
+ },
382
+ });
383
+ emit('session.completed', opts.stage, sessionId, {
384
+ stage: opts.stage,
385
+ sessionId,
386
+ status: result.status,
387
+ turns: result.turns,
388
+ usage: result.usage,
389
+ transcript_path: transcriptPath,
390
+ sanitizer: { applied: [...result.sanitizer.applied], removedSections: [...result.sanitizer.removedSections] },
391
+ });
392
+ transcript.close();
393
+ return result;
394
+ }
395
+
396
+ // -- 6. External abort propagation. -------------------------------------
397
+ const abortController = new AbortController();
398
+ let externalAbortHit = false;
399
+ const onExternalAbort = () => {
400
+ externalAbortHit = true;
401
+ abortController.abort();
402
+ };
403
+ if (opts.signal !== undefined) {
404
+ if (opts.signal.aborted) {
405
+ onExternalAbort();
406
+ } else {
407
+ opts.signal.addEventListener('abort', onExternalAbort, { once: true });
408
+ }
409
+ }
410
+
411
+ // -- 7. Retry-once loop. ------------------------------------------------
412
+ const maxAttempts = opts.maxRetries !== undefined && opts.maxRetries > 0
413
+ ? opts.maxRetries
414
+ : DEFAULT_MAX_RETRIES;
415
+
416
+ // `maxTurns: 0` is a legal config meaning "abort before first turn".
417
+ if (opts.turnCap.maxTurns <= 0) {
418
+ const status: SessionResult['status'] = 'turn_cap_exceeded';
419
+ const result = buildResult({
420
+ status,
421
+ transcriptPath,
422
+ turns,
423
+ usage,
424
+ toolCalls,
425
+ finalText,
426
+ sanitizer: sanResult,
427
+ });
428
+ emit('session.completed', opts.stage, sessionId, {
429
+ stage: opts.stage,
430
+ sessionId,
431
+ status,
432
+ turns,
433
+ usage: result.usage,
434
+ transcript_path: transcriptPath,
435
+ sanitizer: { applied: [...sanResult.applied], removedSections: [...sanResult.removedSections] },
436
+ });
437
+ transcript.close();
438
+ if (opts.signal !== undefined) opts.signal.removeEventListener('abort', onExternalAbort);
439
+ return result;
440
+ }
441
+
442
+ let attempt = 0;
443
+ let terminalStatus: SessionResult['status'] = 'completed';
444
+ let terminalError: SessionResult['error'] | undefined;
445
+
446
+ while (attempt < maxAttempts) {
447
+ const outcome = await runOneAttempt({
448
+ attempt,
449
+ sanitizedPrompt,
450
+ opts,
451
+ abortController,
452
+ transcript,
453
+ toolCalls,
454
+ usage,
455
+ turnsRef: (v: number) => {
456
+ turns = v;
457
+ },
458
+ turnsGet: () => turns,
459
+ finalTextRef: (v: string | undefined) => {
460
+ finalText = v;
461
+ },
462
+ finalTextGet: () => finalText,
463
+ });
464
+
465
+ if (externalAbortHit) {
466
+ terminalStatus = 'aborted';
467
+ terminalError = undefined;
468
+ break;
469
+ }
470
+
471
+ if (outcome.terminal === null) {
472
+ // Clean completion.
473
+ terminalStatus = 'completed';
474
+ terminalError = undefined;
475
+ break;
476
+ }
477
+
478
+ if (outcome.terminal === 'budget_exceeded' || outcome.terminal === 'turn_cap_exceeded') {
479
+ terminalStatus = outcome.terminal;
480
+ terminalError = undefined;
481
+ if (outcome.terminal === 'budget_exceeded') {
482
+ emit('session.budget_exceeded', opts.stage, sessionId, {
483
+ stage: opts.stage,
484
+ sessionId,
485
+ usage: { input_tokens: usage.input, output_tokens: usage.output, usd_cost: usdCost(usage.input, usage.output, usage.model) },
486
+ budget: { ...opts.budget },
487
+ transcript_path: transcriptPath,
488
+ });
489
+ }
490
+ break;
491
+ }
492
+
493
+ // outcome.terminal === 'error' — decide retry.
494
+ terminalStatus = 'error';
495
+ terminalError = outcome.error;
496
+
497
+ if (outcome.retryable && attempt + 1 < maxAttempts) {
498
+ const baseBackoff = jitteredBackoff.delayMs(attempt, RETRY_BACKOFF);
499
+ const wait = Math.max(baseBackoff, outcome.backoff_hint_ms);
500
+ await sleep(wait);
501
+ attempt += 1;
502
+ continue;
503
+ }
504
+
505
+ break;
506
+ }
507
+
508
+ if (opts.signal !== undefined) opts.signal.removeEventListener('abort', onExternalAbort);
509
+ transcript.close();
510
+
511
+ const result = buildResult({
512
+ status: terminalStatus,
513
+ transcriptPath,
514
+ turns,
515
+ usage,
516
+ toolCalls,
517
+ finalText,
518
+ sanitizer: sanResult,
519
+ error: terminalError,
520
+ });
521
+
522
+ emit('session.completed', opts.stage, sessionId, {
523
+ stage: opts.stage,
524
+ sessionId,
525
+ status: result.status,
526
+ turns: result.turns,
527
+ usage: result.usage,
528
+ transcript_path: transcriptPath,
529
+ sanitizer: { applied: [...sanResult.applied], removedSections: [...sanResult.removedSections] },
530
+ });
531
+
532
+ return result;
533
+ }
534
+
535
+ // ---------------------------------------------------------------------------
536
+ // Run-loop internals
537
+ // ---------------------------------------------------------------------------
538
+
539
+ interface AttemptContext {
540
+ attempt: number;
541
+ sanitizedPrompt: string;
542
+ opts: SessionRunnerOptions;
543
+ abortController: AbortController;
544
+ transcript: TranscriptWriter;
545
+ toolCalls: SessionResult['tool_calls'];
546
+ usage: { input: number; output: number; model: string | null };
547
+ turnsRef: (v: number) => void;
548
+ turnsGet: () => number;
549
+ finalTextRef: (v: string | undefined) => void;
550
+ finalTextGet: () => string | undefined;
551
+ }
552
+
553
+ /**
554
+ * One attempt at the SDK. Returns `{ terminal: null }` on clean
555
+ * completion, `{ terminal: 'budget_exceeded' | 'turn_cap_exceeded' }`
556
+ * on cap trip, or `{ terminal: 'error', error, retryable, backoff_hint_ms }`
557
+ * on thrown error.
558
+ *
559
+ * Budget + turn-cap accounting is mutated on the caller-supplied `usage`
560
+ * + `turns` refs so they survive retry boundaries.
561
+ */
562
+ async function runOneAttempt(ctx: AttemptContext): Promise<AttemptOutcome> {
563
+ const queryImpl = ctx.opts.queryOverride ?? (await loadSdkQuery());
564
+
565
+ let stream: AsyncIterable<unknown>;
566
+ try {
567
+ const invokeOpts: Record<string, unknown> = {
568
+ abortSignal: ctx.abortController.signal,
569
+ };
570
+ if (ctx.opts.systemPrompt !== undefined) invokeOpts['systemPrompt'] = ctx.opts.systemPrompt;
571
+ if (ctx.opts.allowedTools !== undefined) invokeOpts['allowedTools'] = ctx.opts.allowedTools;
572
+
573
+ stream = queryImpl({ prompt: ctx.sanitizedPrompt, options: invokeOpts });
574
+ } catch (err) {
575
+ return asErrorOutcome(err);
576
+ }
577
+
578
+ try {
579
+ for await (const raw of stream) {
580
+ const ch = asChunk(raw);
581
+
582
+ // Write the chunk to the transcript regardless of kind.
583
+ ctx.transcript.append({
584
+ ts: new Date().toISOString(),
585
+ type: chunkKind(ch),
586
+ turn: ctx.turnsGet(),
587
+ payload: raw,
588
+ });
589
+
590
+ // Fold usage.
591
+ foldUsage(ctx.usage, ch);
592
+
593
+ // Collect tool-use + final text.
594
+ collectToolUse(ch, ctx.toolCalls);
595
+ const nextFinal = updateFinalText(ch, ctx.finalTextGet());
596
+ if (nextFinal !== undefined) ctx.finalTextRef(nextFinal);
597
+
598
+ // Ingest rate-limit headers if the chunk carried any.
599
+ const h = extractHeaders(ch);
600
+ if (h !== null) {
601
+ // Fire and forget — rate-guard persists under its own lock.
602
+ void rateGuard.ingestHeaders(RATE_GUARD_PROVIDER, h).catch(() => {
603
+ // Rate-guard write failed; tolerated — fresh headers next time.
604
+ });
605
+ }
606
+
607
+ // Turn boundary?
608
+ if (isTurnStop(ch)) {
609
+ ctx.turnsRef(ctx.turnsGet() + 1);
610
+
611
+ // Turn cap?
612
+ if (ctx.turnsGet() >= ctx.opts.turnCap.maxTurns) {
613
+ ctx.abortController.abort();
614
+ return { terminal: 'turn_cap_exceeded', error: undefined, backoff_hint_ms: 0, retryable: false };
615
+ }
616
+
617
+ // Budget (USD + both token dims)?
618
+ const costSoFar = usdCost(ctx.usage.input, ctx.usage.output, ctx.usage.model);
619
+ if (costSoFar >= ctx.opts.budget.usdLimit) {
620
+ ctx.abortController.abort();
621
+ return { terminal: 'budget_exceeded', error: undefined, backoff_hint_ms: 0, retryable: false };
622
+ }
623
+ if (ctx.usage.input >= ctx.opts.budget.inputTokensLimit) {
624
+ ctx.abortController.abort();
625
+ return { terminal: 'budget_exceeded', error: undefined, backoff_hint_ms: 0, retryable: false };
626
+ }
627
+ if (ctx.usage.output >= ctx.opts.budget.outputTokensLimit) {
628
+ ctx.abortController.abort();
629
+ return { terminal: 'budget_exceeded', error: undefined, backoff_hint_ms: 0, retryable: false };
630
+ }
631
+ }
632
+ }
633
+ } catch (err) {
634
+ return asErrorOutcome(err);
635
+ }
636
+
637
+ // Stream ended without error.
638
+ return { terminal: null, error: undefined, backoff_hint_ms: 0, retryable: false };
639
+ }
640
+
641
+ /** Build an AttemptOutcome from a thrown error. */
642
+ function asErrorOutcome(err: unknown): AttemptOutcome {
643
+ const mapped = mapSdkError(err);
644
+ const gdd = mapped.gddError;
645
+ return {
646
+ terminal: 'error',
647
+ error: {
648
+ code: (gdd as { code?: string }).code ?? 'SDK_UNKNOWN',
649
+ message: (gdd as { message?: string }).message ?? 'unknown SDK error',
650
+ kind: (gdd as { kind?: string }).kind ?? 'operation_failed',
651
+ context: (gdd as { context?: unknown }).context ?? {},
652
+ },
653
+ retryable: mapped.retryable,
654
+ backoff_hint_ms: mapped.backoff_hint_ms,
655
+ };
656
+ }
657
+
658
+ /** Lazy import of the real SDK. Kept in its own function so tests can
659
+ * inject `queryOverride` without pulling the SDK into the test process.
660
+ *
661
+ * Uses the repo-root-anchored `createRequire` loader (see top of file)
662
+ * so the SDK resolves regardless of cwd. */
663
+ async function loadSdkQuery(): Promise<(args: { prompt: unknown; options?: unknown }) => AsyncIterable<unknown>> {
664
+ const sdk = _nodeRequire('@anthropic-ai/claude-agent-sdk') as {
665
+ query: (args: { prompt: unknown; options?: unknown }) => AsyncIterable<unknown>;
666
+ };
667
+ return sdk.query;
668
+ }
669
+
670
+ /** Promise-returning sleep. */
671
+ function sleep(ms: number): Promise<void> {
672
+ return new Promise((resolve) => setTimeout(resolve, Math.max(0, ms)));
673
+ }
674
+
675
+ // ---------------------------------------------------------------------------
676
+ // Result construction
677
+ // ---------------------------------------------------------------------------
678
+
679
+ interface BuildResultArgs {
680
+ status: SessionResult['status'];
681
+ transcriptPath: string;
682
+ turns: number;
683
+ usage: { input: number; output: number; model: string | null };
684
+ toolCalls: SessionResult['tool_calls'];
685
+ finalText: string | undefined;
686
+ sanitizer: { sanitized: string; applied: readonly string[]; removedSections: readonly string[] };
687
+ error?: SessionResult['error'];
688
+ }
689
+
690
+ function buildResult(args: BuildResultArgs): SessionResult {
691
+ const cost = usdCost(args.usage.input, args.usage.output, args.usage.model);
692
+ const res: SessionResult = {
693
+ status: args.status,
694
+ transcript_path: args.transcriptPath,
695
+ turns: args.turns,
696
+ usage: {
697
+ input_tokens: args.usage.input,
698
+ output_tokens: args.usage.output,
699
+ usd_cost: cost,
700
+ },
701
+ tool_calls: args.toolCalls,
702
+ sanitizer: {
703
+ applied: [...args.sanitizer.applied],
704
+ removedSections: [...args.sanitizer.removedSections],
705
+ },
706
+ };
707
+ if (args.finalText !== undefined) res.final_text = args.finalText;
708
+ if (args.error !== undefined) res.error = args.error;
709
+ return res;
710
+ }
711
+
712
+ // Re-export types and primitives specifically for plan-level budget hint
713
+ // invariant: session-runner consumers can rely on these constants being
714
+ // stable across minor releases.
715
+ export { MODEL_RATES, DEFAULT_MODEL_RATE, RATE_GUARD_PROVIDER };