@zhixuan92/multi-model-agent-core 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/README.md +0 -6
  2. package/dist/config/schema.d.ts +27 -0
  3. package/dist/config/schema.d.ts.map +1 -1
  4. package/dist/config/schema.js +13 -0
  5. package/dist/config/schema.js.map +1 -1
  6. package/dist/context/context-block-store.d.ts +75 -0
  7. package/dist/context/context-block-store.d.ts.map +1 -0
  8. package/dist/context/context-block-store.js +82 -0
  9. package/dist/context/context-block-store.js.map +1 -0
  10. package/dist/context/expand-context-blocks.d.ts +20 -0
  11. package/dist/context/expand-context-blocks.d.ts.map +1 -0
  12. package/dist/context/expand-context-blocks.js +46 -0
  13. package/dist/context/expand-context-blocks.js.map +1 -0
  14. package/dist/delegate-with-escalation.d.ts +34 -0
  15. package/dist/delegate-with-escalation.d.ts.map +1 -0
  16. package/dist/delegate-with-escalation.js +168 -0
  17. package/dist/delegate-with-escalation.js.map +1 -0
  18. package/dist/index.d.ts +4 -1
  19. package/dist/index.d.ts.map +1 -1
  20. package/dist/index.js +3 -0
  21. package/dist/index.js.map +1 -1
  22. package/dist/model-profiles.json +8 -4
  23. package/dist/provider.d.ts.map +1 -1
  24. package/dist/provider.js +7 -1
  25. package/dist/provider.js.map +1 -1
  26. package/dist/routing/model-profiles.d.ts +1 -0
  27. package/dist/routing/model-profiles.d.ts.map +1 -1
  28. package/dist/routing/model-profiles.js +4 -0
  29. package/dist/routing/model-profiles.js.map +1 -1
  30. package/dist/run-tasks.d.ts +26 -2
  31. package/dist/run-tasks.d.ts.map +1 -1
  32. package/dist/run-tasks.js +61 -19
  33. package/dist/run-tasks.js.map +1 -1
  34. package/dist/runners/claude-runner.d.ts.map +1 -1
  35. package/dist/runners/claude-runner.js +643 -32
  36. package/dist/runners/claude-runner.js.map +1 -1
  37. package/dist/runners/codex-runner.d.ts.map +1 -1
  38. package/dist/runners/codex-runner.js +473 -48
  39. package/dist/runners/codex-runner.js.map +1 -1
  40. package/dist/runners/error-classification.d.ts +30 -0
  41. package/dist/runners/error-classification.d.ts.map +1 -0
  42. package/dist/runners/error-classification.js +72 -0
  43. package/dist/runners/error-classification.js.map +1 -0
  44. package/dist/runners/injection-type.d.ts +17 -0
  45. package/dist/runners/injection-type.d.ts.map +1 -0
  46. package/dist/runners/injection-type.js +27 -0
  47. package/dist/runners/injection-type.js.map +1 -0
  48. package/dist/runners/openai-runner.d.ts +5 -0
  49. package/dist/runners/openai-runner.d.ts.map +1 -1
  50. package/dist/runners/openai-runner.js +508 -36
  51. package/dist/runners/openai-runner.js.map +1 -1
  52. package/dist/runners/prevention.d.ts +41 -0
  53. package/dist/runners/prevention.d.ts.map +1 -0
  54. package/dist/runners/prevention.js +68 -0
  55. package/dist/runners/prevention.js.map +1 -0
  56. package/dist/runners/supervision.d.ts +130 -0
  57. package/dist/runners/supervision.d.ts.map +1 -0
  58. package/dist/runners/supervision.js +238 -0
  59. package/dist/runners/supervision.js.map +1 -0
  60. package/dist/tools/claude-adapter.d.ts.map +1 -1
  61. package/dist/tools/claude-adapter.js +6 -3
  62. package/dist/tools/claude-adapter.js.map +1 -1
  63. package/dist/tools/definitions.d.ts +3 -1
  64. package/dist/tools/definitions.d.ts.map +1 -1
  65. package/dist/tools/definitions.js +56 -5
  66. package/dist/tools/definitions.js.map +1 -1
  67. package/dist/tools/openai-adapter.d.ts.map +1 -1
  68. package/dist/tools/openai-adapter.js +6 -3
  69. package/dist/tools/openai-adapter.js.map +1 -1
  70. package/dist/tools/scratchpad.d.ts +28 -0
  71. package/dist/tools/scratchpad.d.ts.map +1 -0
  72. package/dist/tools/scratchpad.js +49 -0
  73. package/dist/tools/scratchpad.js.map +1 -0
  74. package/dist/tools/tracker.d.ts +38 -2
  75. package/dist/tools/tracker.d.ts.map +1 -1
  76. package/dist/tools/tracker.js +54 -5
  77. package/dist/tools/tracker.js.map +1 -1
  78. package/dist/types.d.ts +184 -2
  79. package/dist/types.d.ts.map +1 -1
  80. package/dist/types.js +17 -1
  81. package/dist/types.js.map +1 -1
  82. package/package.json +9 -15
@@ -1,8 +1,89 @@
1
1
  import { query } from '@anthropic-ai/claude-agent-sdk';
2
- import { withTimeout } from '../types.js';
2
+ import { createHash } from 'node:crypto';
3
+ import { withTimeout, computeCostUSD, } from '../types.js';
3
4
  import { FileTracker } from '../tools/tracker.js';
4
5
  import { createToolImplementations } from '../tools/definitions.js';
5
6
  import { createClaudeToolServer } from '../tools/claude-adapter.js';
7
+ import { TextScratchpad } from '../tools/scratchpad.js';
8
+ import { buildSystemPrompt, buildBudgetHint, buildReGroundingMessage, buildBudgetPressureNudge, RE_GROUNDING_INTERVAL_TURNS, } from './prevention.js';
9
+ import { validateCompletion, buildRePrompt, sameDegenerateOutput, resolveInputTokenSoftLimit, checkWatchdogThreshold, logWatchdogEvent, } from './supervision.js';
10
+ import { injectionTypeFor } from './injection-type.js';
11
+ import { classifyError } from './error-classification.js';
12
+ import { findModelProfile } from '../routing/model-profiles.js';
13
+ /**
14
+ * Hard cap on supervision re-prompts before we give up and salvage. Same as
15
+ * openai-runner; see spec A.2.2.
16
+ */
17
+ const MAX_SUPERVISION_RETRIES = 3;
18
+ /**
19
+ * Minimal pushable async-iterable queue for feeding user messages to the
20
+ * claude-agent-sdk `query()` in streaming-input mode.
21
+ *
22
+ * The SDK's `query({ prompt: string | AsyncIterable<SDKUserMessage>, ... })`
23
+ * signature (see node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts L1879-1882)
24
+ * accepts an async iterable when we want multi-turn input — the intended
25
+ * pathway for "push a follow-up user message into the current query without
26
+ * restarting the CLI subprocess." The built-in `streamInput(...)` method on
27
+ * the returned `Query` object (sdk.d.ts L1862) is documented as "used
28
+ * internally for multi-turn conversations", and the only public way to
29
+ * drive multi-turn input is via this iterable.
30
+ *
31
+ * This class is deliberately small: `push(msg)` delivers a message to a
32
+ * waiting iterator (or buffers it if the iterator isn't waiting yet),
33
+ * `close()` signals end-of-stream, and `[Symbol.asyncIterator]()` returns
34
+ * a generator that yields buffered messages then awaits the next push.
35
+ */
36
+ class PushableUserMessageQueue {
37
+ buffer = [];
38
+ resolvers = [];
39
+ closed = false;
40
+ push(msg) {
41
+ if (this.closed)
42
+ return;
43
+ const resolver = this.resolvers.shift();
44
+ if (resolver) {
45
+ resolver({ value: msg, done: false });
46
+ }
47
+ else {
48
+ this.buffer.push(msg);
49
+ }
50
+ }
51
+ close() {
52
+ if (this.closed)
53
+ return;
54
+ this.closed = true;
55
+ while (this.resolvers.length > 0) {
56
+ const resolver = this.resolvers.shift();
57
+ resolver({ value: undefined, done: true });
58
+ }
59
+ }
60
+ [Symbol.asyncIterator]() {
61
+ return {
62
+ next: () => {
63
+ if (this.buffer.length > 0) {
64
+ return Promise.resolve({ value: this.buffer.shift(), done: false });
65
+ }
66
+ if (this.closed) {
67
+ return Promise.resolve({ value: undefined, done: true });
68
+ }
69
+ return new Promise((resolve) => {
70
+ this.resolvers.push(resolve);
71
+ });
72
+ },
73
+ };
74
+ }
75
+ }
76
+ /**
77
+ * Wrap a plain string in the SDKUserMessage envelope the SDK expects when
78
+ * using streaming input mode. Keeps the per-call sites tidy.
79
+ */
80
+ function userMessage(text) {
81
+ return {
82
+ type: 'user',
83
+ message: { role: 'user', content: text },
84
+ parent_tool_use_id: null,
85
+ };
86
+ }
6
87
  export async function runClaude(prompt, options, providerConfig, defaults) {
7
88
  const maxTurns = options.maxTurns ?? providerConfig.maxTurns ?? defaults.maxTurns;
8
89
  const timeoutMs = options.timeoutMs ?? providerConfig.timeoutMs ?? defaults.timeoutMs;
@@ -11,8 +92,76 @@ export async function runClaude(prompt, options, providerConfig, defaults) {
11
92
  const effort = options.effort ?? providerConfig.effort;
12
93
  const sandboxPolicy = options.sandboxPolicy ?? providerConfig.sandboxPolicy ?? 'cwd-only';
13
94
  const abortController = new AbortController();
14
- const tracker = new FileTracker();
95
+ // --- Progress event emission (Task 10) ----------------------------------
96
+ //
97
+ // `onProgress` is already wrapped in `safeSink` by the orchestrator
98
+ // (Task 8), so any throw from the consumer callback is swallowed
99
+ // upstream and cannot corrupt this loop. We do not need to wrap it
100
+ // again here.
101
+ const onProgress = options.onProgress;
102
+ const emit = (event) => {
103
+ if (onProgress)
104
+ onProgress(event);
105
+ };
106
+ // Hoisted so the FileTracker callback (closed over below) can read the
107
+ // running turn count at callback firing time. Unlike openai-runner — where
108
+ // the turn counter comes from `currentResult?.state.usage.requests + 1`
109
+ // because the SDK only bumps the counter after the call completes — the
110
+ // claude-runner increments `turns` at the top of every `msg.type ===
111
+ // 'assistant'` branch, which is PROCESSED BEFORE the SDK fires any tool
112
+ // calls for that turn. That means `turns` already holds the current
113
+ // turn number when the tracker callback fires mid-tool-loop, so we
114
+ // attribute tool calls to `turns` directly (no +1 offset).
115
+ let inputTokens = 0;
116
+ let outputTokens = 0;
117
+ let costUSD = null;
118
+ let turns = 0;
119
+ const tracker = new FileTracker((summary) => {
120
+ emit({ kind: 'tool_call', turn: turns, toolSummary: summary });
121
+ });
15
122
  const toolImpls = createToolImplementations(tracker, cwd, sandboxPolicy, abortController.signal);
123
+ // --- Prevention layer: system prompt + budget hint ---
124
+ //
125
+ // buildSystemPrompt() is deliberately static and parameter-free (same
126
+ // decision as openai-runner: Task 1 review rejected provider/maxTurns
127
+ // options). We append our discipline rules onto the `claude_code` preset
128
+ // rather than REPLACING the default system prompt, because replacing it
129
+ // strips the SDK's tool-usage guidance. See
130
+ // node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts L1460-1465 for the
131
+ // systemPrompt union type — `{ type: 'preset', preset: 'claude_code',
132
+ // append: string }` is the intended "add to defaults" shape.
133
+ const systemPrompt = buildSystemPrompt();
134
+ const budgetHint = buildBudgetHint({ maxTurns });
135
+ const promptWithBudgetHint = `${budgetHint}\n\n${prompt}`;
136
+ // --- onInitialRequest (Task 12) ----------------------------------------
137
+ //
138
+ // Fire once per attempt with the canonical orchestrator-side initial
139
+ // brief: `${systemPrompt}\n\n${promptWithBudgetHint}`. This is NOT the
140
+ // literal bytes the Anthropic SDK will send — the SDK wraps our
141
+ // systemPrompt in `{ type: 'preset', preset: 'claude_code', append: ... }`
142
+ // (see queryOptions.systemPrompt below), so the wire-level system prompt
143
+ // includes the claude_code preset bytes that precede ours. We hash the
144
+ // canonical form anyway for two reasons:
145
+ // 1. It matches openai-runner and codex-runner, which also don't hash
146
+ // literal wire bytes (they hash the same canonical form before the
147
+ // SDK wraps it in its own `messages` / Responses API structures).
148
+ // Cross-runner stability is the Task 12 design requirement.
149
+ // 2. It answers the "did the orchestrator send the same brief across
150
+ // retries?" question, which is the actual debugging use case — NOT
151
+ // "were the literal wire bytes identical?".
152
+ // See `AttemptRecord.initialPromptHash` in types.ts for the full caveat.
153
+ if (options.onInitialRequest) {
154
+ const canonicalInitialBrief = `${systemPrompt}\n\n${promptWithBudgetHint}`;
155
+ try {
156
+ options.onInitialRequest({
157
+ lengthChars: canonicalInitialBrief.length,
158
+ sha256: createHash('sha256').update(canonicalInitialBrief).digest('hex'),
159
+ });
160
+ }
161
+ catch {
162
+ // Swallow — a broken callback must not affect dispatch.
163
+ }
164
+ }
16
165
  // Permission bypass is intentional for sub-agent use. File-system confinement
17
166
  // is enforced by assertWithinCwd in tool definitions when sandboxPolicy is 'cwd-only'.
18
167
  const queryOptions = {
@@ -23,6 +172,11 @@ export async function runClaude(prompt, options, providerConfig, defaults) {
23
172
  allowDangerouslySkipPermissions: true,
24
173
  persistSession: false,
25
174
  abortController,
175
+ systemPrompt: {
176
+ type: 'preset',
177
+ preset: 'claude_code',
178
+ append: systemPrompt,
179
+ },
26
180
  };
27
181
  if (toolMode === 'full') {
28
182
  const toolServer = createClaudeToolServer(toolImpls, sandboxPolicy);
@@ -46,73 +200,530 @@ export async function runClaude(prompt, options, providerConfig, defaults) {
46
200
  // effort is typed as EffortLevel in Options; cast from string
47
201
  queryOptions.effort = effort;
48
202
  }
49
- // Hoisted so the timeout callback can read partial progress
50
- let inputTokens = 0;
51
- let outputTokens = 0;
52
- let costUSD = null;
53
- let turns = 0;
203
+ // --- Scratchpad: buffers every assistant text block we see streaming
204
+ // through the iterator. On any termination path (ok/incomplete/max_turns/
205
+ // error/timeout/force_salvage) we salvage `scratchpad.latest()` when the
206
+ // final `result.result` is empty or degenerate. ---
207
+ const scratchpad = new TextScratchpad();
208
+ // --- Watchdog: resolve the input-token soft limit once per run ---
209
+ const profile = findModelProfile(providerConfig.model);
210
+ const softLimit = resolveInputTokenSoftLimit(providerConfig, profile);
54
211
  const run = async () => {
55
212
  let output = '';
56
- let hitMaxTurns = false;
213
+ // --- Supervision / watchdog bookkeeping ---
214
+ let supervisionRetries = 0;
215
+ // Initialised to `null` (NOT ''): on the first turn there is no
216
+ // previous degenerate output to compare against, so the same-output
217
+ // early-out must be skipped. See openai-runner regression #5.
218
+ let lastDegenerateOutput = null;
219
+ // High-watermark guard for the watchdog warning nudge — fire at most
220
+ // once per distinct input-token level. Mirrors openai-runner.
221
+ let lastWarnedInputTokens = -1;
222
+ // --- Completed-result sentinel. Every exit from the supervision
223
+ // state machine inside the `for await` iterator sets this to a fully-
224
+ // built RunResult and then `break`s. After the loop, the one explicit
225
+ // return on the happy path is `completedResult`. This gives every
226
+ // exit (ok / incomplete / force_salvage / max_turns) a single
227
+ // explicit owner, mirroring openai-runner's `while (true) + return`
228
+ // shape but compatible with the for-await iterator contract. ---
229
+ let completedResult = null;
230
+ // --- Streaming input queue. See PushableUserMessageQueue docstring:
231
+ // using an async iterable as the `prompt` enables mid-run user-message
232
+ // injection (supervision re-prompts, re-grounding, budget-pressure
233
+ // nudges) without restarting the CLI subprocess. ---
234
+ const messageQueue = new PushableUserMessageQueue();
235
+ messageQueue.push(userMessage(promptWithBudgetHint));
57
236
  try {
58
- for await (const msg of query({ prompt, options: queryOptions })) {
237
+ for await (const msg of query({ prompt: messageQueue, options: queryOptions })) {
59
238
  if (msg.type === 'assistant') {
60
239
  turns++;
240
+ emit({ kind: 'turn_start', turn: turns, provider: 'claude' });
241
+ // Capture every assistant text block as scratchpad fodder. The
242
+ // claude-agent-sdk's BetaMessage.content is an array of blocks:
243
+ // `{ type: 'text', text } | { type: 'tool_use', ... } |
244
+ // { type: 'thinking', ... } | ...`. We only want plain text;
245
+ // tool_use blocks have no salvage value (they're side-effects)
246
+ // and thinking blocks are stripped before the caller sees them.
247
+ if ('message' in msg && msg.message && 'content' in msg.message) {
248
+ // The claude-agent-sdk's BetaMessage.content is typed as an
249
+ // array of content blocks — but historically the API sometimes
250
+ // delivers a bare string, so we defensively handle both. The
251
+ // string branch is narrow-typed to `never` by the SDK, so we
252
+ // cast through `unknown` to keep runtime safety without fighting
253
+ // the compiler.
254
+ const content = msg.message.content;
255
+ if (typeof content === 'string') {
256
+ scratchpad.append(turns, content);
257
+ if (content.length > 0) {
258
+ emit({
259
+ kind: 'text_emission',
260
+ turn: turns,
261
+ chars: content.length,
262
+ preview: content.slice(0, 200),
263
+ });
264
+ }
265
+ }
266
+ else if (Array.isArray(content)) {
267
+ const texts = content
268
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
269
+ .filter((c) => c && c.type === 'text' && typeof c.text === 'string')
270
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
271
+ .map((c) => c.text);
272
+ if (texts.length > 0) {
273
+ const joined = texts.join('\n');
274
+ scratchpad.append(turns, joined);
275
+ if (joined.length > 0) {
276
+ emit({
277
+ kind: 'text_emission',
278
+ turn: turns,
279
+ chars: joined.length,
280
+ preview: joined.slice(0, 200),
281
+ });
282
+ }
283
+ }
284
+ }
285
+ }
286
+ // --- Watchdog check (assistant-message cadence). We check
287
+ // `inputTokens` as accumulated from prior `result` messages.
288
+ // On the very first assistant message inputTokens is 0 and no
289
+ // threshold can fire; that's correct. This is also the ONLY
290
+ // site that handles `warning` — it logs AND pushes the nudge
291
+ // as one action. The post-result site only handles
292
+ // force_salvage. ---
293
+ const watchdogStatus = checkWatchdogThreshold(inputTokens, softLimit);
294
+ if (watchdogStatus !== 'ok') {
295
+ logWatchdogEvent(watchdogStatus, {
296
+ provider: 'claude',
297
+ model: providerConfig.model,
298
+ turn: turns,
299
+ inputTokens,
300
+ softLimit,
301
+ scratchpadChars: scratchpad.toString().length,
302
+ });
303
+ }
304
+ if (watchdogStatus === 'force_salvage') {
305
+ // `watchdog_force_salvage` is not an injected message — no
306
+ // re-prompt is sent — but observers still want to see why the
307
+ // run is being killed. We emit the event with
308
+ // `contentLengthChars: 0` to reflect the "nothing was injected,
309
+ // we just terminated" semantics (mirrors openai-runner).
310
+ emit({
311
+ kind: 'injection',
312
+ injectionType: 'watchdog_force_salvage',
313
+ turn: turns,
314
+ contentLengthChars: 0,
315
+ });
316
+ completedResult = buildClaudeForceSalvageResult({
317
+ tracker,
318
+ scratchpad,
319
+ providerConfig,
320
+ sdkCostUSD: costUSD,
321
+ inputTokens,
322
+ outputTokens,
323
+ turns,
324
+ softLimit,
325
+ });
326
+ messageQueue.close();
327
+ abortController.abort();
328
+ break;
329
+ }
330
+ // Fire the warning nudge at most once per distinct input-token
331
+ // high-watermark. We push a user message into the queue so the
332
+ // next turn of the conversation will address the budget-pressure
333
+ // prompt. If the nudge response is itself a valid final answer,
334
+ // the supervision loop on the NEXT `result` message will return
335
+ // `ok`. High-watermark guard prevents re-nudging if inputTokens
336
+ // stays the same across two assistant messages.
337
+ if (watchdogStatus === 'warning' && inputTokens > lastWarnedInputTokens) {
338
+ lastWarnedInputTokens = inputTokens;
339
+ const warning = buildBudgetPressureNudge({ inputTokens, softLimit });
340
+ emit({
341
+ kind: 'injection',
342
+ injectionType: 'watchdog_warning',
343
+ turn: turns,
344
+ contentLengthChars: warning.length,
345
+ });
346
+ messageQueue.push(userMessage(warning));
347
+ }
348
+ // --- Periodic re-grounding (best-effort in streaming-input
349
+ // mode): inject a reminder every RE_GROUNDING_INTERVAL_TURNS
350
+ // turns via the same queue. The iterator keeps reading until
351
+ // the CLI subprocess decides to emit a final result after it
352
+ // processes the new user message. ---
353
+ if (turns > 0 && turns % RE_GROUNDING_INTERVAL_TURNS === 0) {
354
+ const reground = buildReGroundingMessage({
355
+ originalPromptExcerpt: prompt,
356
+ currentTurn: turns,
357
+ maxTurns,
358
+ toolCallsSoFar: tracker.getToolCalls().length,
359
+ filesReadSoFar: tracker.getReads().length,
360
+ });
361
+ emit({
362
+ kind: 'injection',
363
+ injectionType: 'reground',
364
+ turn: turns,
365
+ contentLengthChars: reground.length,
366
+ });
367
+ messageQueue.push(userMessage(reground));
368
+ }
61
369
  }
62
370
  if (msg.type === 'result') {
63
371
  if ('result' in msg) {
64
372
  output = msg.result;
65
373
  }
66
- if ('subtype' in msg && msg.subtype === 'error_max_turns') {
67
- hitMaxTurns = true;
68
- }
69
- // Extract usage from modelUsage or usage
374
+ const hitMaxTurns = 'subtype' in msg && msg.subtype === 'error_max_turns';
375
+ // Extract usage from modelUsage or usage, then ACCUMULATE into
376
+ // the running inputTokens/outputTokens. Supervision retries in
377
+ // streaming-input mode push a new user message into the queue
378
+ // and the SDK emits a fresh `result` message per top-level user
379
+ // turn — we want the cumulative usage across every result we
380
+ // see, not just the last one. Accumulation keeps the watchdog
381
+ // soft-limit check honest across retries and produces correct
382
+ // totals on any termination path.
383
+ let turnInputTokens = 0;
384
+ let turnOutputTokens = 0;
70
385
  if ('modelUsage' in msg && msg.modelUsage) {
71
386
  for (const model of Object.values(msg.modelUsage)) {
72
- inputTokens += model.inputTokens ?? 0;
73
- outputTokens += model.outputTokens ?? 0;
387
+ turnInputTokens += model.inputTokens ?? 0;
388
+ turnOutputTokens += model.outputTokens ?? 0;
74
389
  }
75
390
  }
76
391
  else if ('usage' in msg && msg.usage) {
77
392
  const u = msg.usage;
78
- inputTokens = u['input_tokens'] ?? 0;
79
- outputTokens = u['output_tokens'] ?? 0;
393
+ turnInputTokens = u['input_tokens'] ?? 0;
394
+ turnOutputTokens = u['output_tokens'] ?? 0;
80
395
  }
396
+ inputTokens += turnInputTokens;
397
+ outputTokens += turnOutputTokens;
81
398
  if ('total_cost_usd' in msg && typeof msg.total_cost_usd === 'number') {
82
399
  costUSD = msg.total_cost_usd;
83
400
  }
401
+ // --- turn_complete: one event per result message (which
402
+ // corresponds to one top-level assistant turn from the SDK's
403
+ // perspective). Fires after usage aggregation so the cumulative
404
+ // counters are up-to-date.
405
+ emit({
406
+ kind: 'turn_complete',
407
+ turn: turns,
408
+ cumulativeInputTokens: inputTokens,
409
+ cumulativeOutputTokens: outputTokens,
410
+ });
411
+ // --- Watchdog check on the result message as well: input tokens
412
+ // have just jumped and we may now be in force_salvage territory.
413
+ // The post-result site ONLY handles force_salvage. `warning` is
414
+ // intentionally ignored here — the assistant-message-cadence site
415
+ // above is the single place that logs warnings AND pushes the
416
+ // nudge into the queue. Logging `warning` here without pushing a
417
+ // nudge would be misleading (suggests action that didn't happen).
418
+ const postResultWatchdog = checkWatchdogThreshold(inputTokens, softLimit);
419
+ if (postResultWatchdog === 'force_salvage') {
420
+ logWatchdogEvent(postResultWatchdog, {
421
+ provider: 'claude',
422
+ model: providerConfig.model,
423
+ turn: turns,
424
+ inputTokens,
425
+ softLimit,
426
+ scratchpadChars: scratchpad.toString().length,
427
+ });
428
+ emit({
429
+ kind: 'injection',
430
+ injectionType: 'watchdog_force_salvage',
431
+ turn: turns,
432
+ contentLengthChars: 0,
433
+ });
434
+ completedResult = buildClaudeForceSalvageResult({
435
+ tracker,
436
+ scratchpad,
437
+ providerConfig,
438
+ sdkCostUSD: costUSD,
439
+ inputTokens,
440
+ outputTokens,
441
+ turns,
442
+ softLimit,
443
+ });
444
+ messageQueue.close();
445
+ abortController.abort();
446
+ break;
447
+ }
448
+ // --- Max-turns: don't supervise a max-turns termination,
449
+ // build the max_turns result directly and exit. ---
450
+ if (hitMaxTurns) {
451
+ completedResult = buildClaudeMaxTurnsResult({
452
+ tracker,
453
+ scratchpad,
454
+ providerConfig,
455
+ sdkCostUSD: costUSD,
456
+ inputTokens,
457
+ outputTokens,
458
+ turns,
459
+ maxTurns,
460
+ lastOutput: output,
461
+ });
462
+ messageQueue.close();
463
+ break;
464
+ }
465
+ // --- Supervision: validate the captured output. Valid output
466
+ // is an immediate ok-exit. Degenerate output either re-prompts
467
+ // (and keeps reading the iterator) or — if the retry budget is
468
+ // spent / same-output early-out fires — exits as incomplete. ---
469
+ const validation = validateCompletion(output);
470
+ if (validation.valid) {
471
+ completedResult = buildClaudeOkResult({
472
+ tracker,
473
+ scratchpad,
474
+ providerConfig,
475
+ sdkCostUSD: costUSD,
476
+ inputTokens,
477
+ outputTokens,
478
+ turns,
479
+ output,
480
+ });
481
+ messageQueue.close();
482
+ break;
483
+ }
484
+ // Same-output early-out: don't burn another retry on identical
485
+ // garbage. Compare only when we have a previous degenerate.
486
+ if (lastDegenerateOutput !== null &&
487
+ sameDegenerateOutput(output, lastDegenerateOutput)) {
488
+ completedResult = buildClaudeIncompleteResult({
489
+ tracker,
490
+ scratchpad,
491
+ providerConfig,
492
+ sdkCostUSD: costUSD,
493
+ inputTokens,
494
+ outputTokens,
495
+ turns,
496
+ });
497
+ messageQueue.close();
498
+ break;
499
+ }
500
+ lastDegenerateOutput = output;
501
+ supervisionRetries++;
502
+ if (supervisionRetries >= MAX_SUPERVISION_RETRIES) {
503
+ completedResult = buildClaudeIncompleteResult({
504
+ tracker,
505
+ scratchpad,
506
+ providerConfig,
507
+ sdkCostUSD: costUSD,
508
+ inputTokens,
509
+ outputTokens,
510
+ turns,
511
+ });
512
+ messageQueue.close();
513
+ break;
514
+ }
515
+ // Push the re-prompt and continue reading the iterator.
516
+ const rePrompt = buildRePrompt(validation);
517
+ emit({
518
+ kind: 'injection',
519
+ injectionType: injectionTypeFor(validation.kind),
520
+ turn: turns,
521
+ contentLengthChars: rePrompt.length,
522
+ });
523
+ messageQueue.push(userMessage(rePrompt));
84
524
  }
85
525
  }
86
526
  }
87
527
  catch (err) {
528
+ // Preserve partial usage — the scratchpad may have buffered text
529
+ // from turns that ran before the throw. Route the thrown error
530
+ // through the shared classifier so the escalation orchestrator can
531
+ // distinguish abort / network / HTTP-error / generic failure modes.
532
+ const { status, reason } = classifyError(err);
533
+ const msg = err instanceof Error ? err.message : String(err);
534
+ emit({ kind: 'done', status });
535
+ const hasSalvage = !scratchpad.isEmpty();
88
536
  return {
89
- output: `Sub-agent error: ${err instanceof Error ? err.message : String(err)}`,
90
- status: 'error',
91
- usage: { inputTokens, outputTokens, totalTokens: inputTokens + outputTokens, costUSD },
537
+ output: hasSalvage ? scratchpad.latest() : `Sub-agent error: ${msg}`,
538
+ status,
539
+ usage: {
540
+ inputTokens,
541
+ outputTokens,
542
+ totalTokens: inputTokens + outputTokens,
543
+ costUSD: effectiveClaudeCost(providerConfig, inputTokens, outputTokens, costUSD),
544
+ },
92
545
  turns,
93
- files: tracker.getFiles(),
94
- error: err instanceof Error ? err.message : String(err),
546
+ filesRead: tracker.getReads(),
547
+ filesWritten: tracker.getWrites(),
548
+ toolCalls: tracker.getToolCalls(),
549
+ outputIsDiagnostic: !hasSalvage,
550
+ escalationLog: [],
551
+ error: msg || reason,
95
552
  };
96
553
  }
554
+ // Every `break` inside the iterator above assigned `completedResult`
555
+ // before exiting. If the iterator drained without any break (e.g. the
556
+ // SDK closed the stream cleanly without ever emitting a final
557
+ // `result`), synthesize an incomplete result so the caller always
558
+ // gets a meaningful diagnostic instead of undefined.
559
+ if (completedResult) {
560
+ emit({ kind: 'done', status: completedResult.status });
561
+ return completedResult;
562
+ }
563
+ const drained = buildClaudeIncompleteResult({
564
+ tracker,
565
+ scratchpad,
566
+ providerConfig,
567
+ sdkCostUSD: costUSD,
568
+ inputTokens,
569
+ outputTokens,
570
+ turns,
571
+ });
572
+ emit({ kind: 'done', status: drained.status });
573
+ return drained;
574
+ };
575
+ return withTimeout(run(), timeoutMs, () => {
576
+ emit({ kind: 'done', status: 'timeout' });
577
+ const hasSalvage = !scratchpad.isEmpty();
97
578
  return {
98
- output: hitMaxTurns ? (output || `Agent exceeded max turns (${maxTurns}).`) : output,
99
- status: hitMaxTurns ? 'max_turns' : 'ok',
579
+ output: hasSalvage ? scratchpad.latest() : `Agent timed out after ${timeoutMs}ms.`,
580
+ status: 'timeout',
581
+ filesRead: tracker.getReads(),
582
+ filesWritten: tracker.getWrites(),
583
+ toolCalls: tracker.getToolCalls(),
100
584
  usage: {
101
585
  inputTokens,
102
586
  outputTokens,
103
587
  totalTokens: inputTokens + outputTokens,
104
- costUSD,
588
+ costUSD: effectiveClaudeCost(providerConfig, inputTokens, outputTokens, costUSD),
105
589
  },
106
590
  turns,
107
- files: tracker.getFiles(),
591
+ outputIsDiagnostic: !hasSalvage,
592
+ escalationLog: [],
108
593
  };
594
+ }, abortController);
595
+ }
596
+ function effectiveClaudeCost(providerConfig, inputTokens, outputTokens, sdkCost) {
597
+ const computed = computeCostUSD(inputTokens, outputTokens, providerConfig);
598
+ return computed ?? sdkCost;
599
+ }
600
+ function buildClaudeOkResult(args) {
601
+ const { tracker, providerConfig, sdkCostUSD, inputTokens, outputTokens, turns, output } = args;
602
+ return {
603
+ output,
604
+ status: 'ok',
605
+ usage: {
606
+ inputTokens,
607
+ outputTokens,
608
+ totalTokens: inputTokens + outputTokens,
609
+ costUSD: effectiveClaudeCost(providerConfig, inputTokens, outputTokens, sdkCostUSD),
610
+ },
611
+ turns,
612
+ filesRead: tracker.getReads(),
613
+ filesWritten: tracker.getWrites(),
614
+ toolCalls: tracker.getToolCalls(),
615
+ // `ok` always carries a real model answer — never a diagnostic.
616
+ outputIsDiagnostic: false,
617
+ escalationLog: [],
618
+ };
619
+ }
620
+ /**
621
+ * Supervision-exhausted path: retry cap hit or same-output early-out. Prefer
622
+ * scratchpad salvage; fall back to the incomplete diagnostic.
623
+ */
624
+ function buildClaudeIncompleteResult(args) {
625
+ const { tracker, scratchpad, providerConfig, sdkCostUSD, inputTokens, outputTokens, turns } = args;
626
+ const filesRead = tracker.getReads();
627
+ const filesWritten = tracker.getWrites();
628
+ const hasSalvage = !scratchpad.isEmpty();
629
+ return {
630
+ output: hasSalvage
631
+ ? scratchpad.latest()
632
+ : buildClaudeIncompleteDiagnostic({
633
+ turns,
634
+ inputTokens,
635
+ outputTokens,
636
+ filesRead,
637
+ filesWritten,
638
+ }),
639
+ status: 'incomplete',
640
+ usage: {
641
+ inputTokens,
642
+ outputTokens,
643
+ totalTokens: inputTokens + outputTokens,
644
+ costUSD: effectiveClaudeCost(providerConfig, inputTokens, outputTokens, sdkCostUSD),
645
+ },
646
+ turns,
647
+ filesRead,
648
+ filesWritten,
649
+ toolCalls: tracker.getToolCalls(),
650
+ outputIsDiagnostic: !hasSalvage,
651
+ escalationLog: [],
109
652
  };
110
- return withTimeout(run(), timeoutMs, () => ({
111
- output: `Agent timed out after ${timeoutMs}ms.`,
112
- status: 'timeout',
113
- files: tracker.getFiles(),
114
- usage: { inputTokens, outputTokens, totalTokens: inputTokens + outputTokens, costUSD },
653
+ }
654
+ function buildClaudeForceSalvageResult(args) {
655
+ const { tracker, scratchpad, providerConfig, sdkCostUSD, inputTokens, outputTokens, turns, softLimit } = args;
656
+ const hasSalvage = !scratchpad.isEmpty();
657
+ return {
658
+ output: hasSalvage
659
+ ? scratchpad.latest()
660
+ : `[claude sub-agent forcibly terminated at ${inputTokens} input tokens (soft limit ${softLimit}). No usable text was buffered.]`,
661
+ status: 'incomplete',
662
+ usage: {
663
+ inputTokens,
664
+ outputTokens,
665
+ totalTokens: inputTokens + outputTokens,
666
+ costUSD: effectiveClaudeCost(providerConfig, inputTokens, outputTokens, sdkCostUSD),
667
+ },
668
+ turns,
669
+ filesRead: tracker.getReads(),
670
+ filesWritten: tracker.getWrites(),
671
+ toolCalls: tracker.getToolCalls(),
672
+ outputIsDiagnostic: !hasSalvage,
673
+ escalationLog: [],
674
+ };
675
+ }
676
+ function buildClaudeMaxTurnsResult(args) {
677
+ const { tracker, scratchpad, providerConfig, sdkCostUSD, inputTokens, outputTokens, turns, maxTurns, lastOutput } = args;
678
+ const hasSalvage = !scratchpad.isEmpty();
679
+ // Note: `lastOutput` here is the model's last streamed text before the
680
+ // max-turns boundary — NOT a diagnostic template. If the scratchpad has
681
+ // nothing but `lastOutput` is non-empty, that's still real model content,
682
+ // so outputIsDiagnostic is false. Only the `Agent exceeded max turns…`
683
+ // fallback (empty scratchpad AND empty lastOutput) is a diagnostic.
684
+ const output = hasSalvage
685
+ ? scratchpad.latest()
686
+ : (lastOutput || `Agent exceeded max turns (${maxTurns}).`);
687
+ const outputIsDiagnostic = !hasSalvage && !lastOutput;
688
+ return {
689
+ output,
690
+ status: 'max_turns',
691
+ usage: {
692
+ inputTokens,
693
+ outputTokens,
694
+ totalTokens: inputTokens + outputTokens,
695
+ costUSD: effectiveClaudeCost(providerConfig, inputTokens, outputTokens, sdkCostUSD),
696
+ },
115
697
  turns,
116
- }), abortController);
698
+ filesRead: tracker.getReads(),
699
+ filesWritten: tracker.getWrites(),
700
+ toolCalls: tracker.getToolCalls(),
701
+ outputIsDiagnostic,
702
+ escalationLog: [],
703
+ };
704
+ }
705
+ function buildClaudeIncompleteDiagnostic(opts) {
706
+ const formatList = (files) => {
707
+ const MAX_SHOWN = 10;
708
+ if (files.length === 0)
709
+ return '';
710
+ if (files.length <= MAX_SHOWN)
711
+ return ` (${files.join(', ')})`;
712
+ return ` (${files.slice(0, MAX_SHOWN).join(', ')}, … ${files.length - MAX_SHOWN} more)`;
713
+ };
714
+ return [
715
+ '[claude sub-agent terminated without producing a final answer]',
716
+ '',
717
+ 'The query stream ended without ever emitting a result message. This usually means ' +
718
+ 'the agent loop exited prematurely or the SDK lost the final message.',
719
+ '',
720
+ `Turns used: ${opts.turns}`,
721
+ `Input tokens: ${opts.inputTokens}`,
722
+ `Output tokens: ${opts.outputTokens}`,
723
+ `Files read: ${opts.filesRead.length}${formatList(opts.filesRead)}`,
724
+ `Files written: ${opts.filesWritten.length}${formatList(opts.filesWritten)}`,
725
+ '',
726
+ 'Recommended action: re-dispatch with a tighter brief, or check Claude Agent SDK logs.',
727
+ ].join('\n');
117
728
  }
118
729
  //# sourceMappingURL=claude-runner.js.map