@poncho-ai/harness 0.50.3 → 0.50.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/harness.ts CHANGED
@@ -146,6 +146,14 @@ export interface HarnessOptions {
146
146
  * should also be browsable in the VFS. Empty by default.
147
147
  */
148
148
  systemSkillPaths?: string[];
149
+ /**
150
+ * Override the per-run hard wall-clock timeout, in seconds, taking
151
+ * precedence over the agent definition's `limits.timeout`. Platforms use
152
+ * this to give background subagents a longer budget than the foreground
153
+ * agent without forking the agent definition (e.g. a 1h research subagent
154
+ * vs. a 5-min foreground turn). `0` disables the hard timeout.
155
+ */
156
+ runTimeoutSecOverride?: number;
149
157
  }
150
158
 
151
159
  export interface HarnessRunOutput {
@@ -159,6 +167,16 @@ const now = (): number => Date.now();
159
167
  const FIRST_CHUNK_TIMEOUT_MS = 90_000; // 90s to receive the first chunk from the model
160
168
  const MAX_TRANSIENT_STEP_RETRIES = 1;
161
169
  const COMPACTION_CHECK_INTERVAL_STEPS = 3;
170
+ // Injected as a trailing user turn on the final allowed step, with tools
171
+ // disabled, so a step-exhausted run produces a text summary instead of
172
+ // terminating on a dangling tool call (which surfaces to a parent agent as
173
+ // an empty "(no response)" subagent result). See the `isFinalStep` branch in
174
+ // the run loop.
175
+ const FINAL_STEP_SUMMARY_PROMPT =
176
+ "You have reached the maximum number of steps for this run and cannot call " +
177
+ "any more tools. Do NOT attempt any tool calls. Using only the work you have " +
178
+ "already done, write your final response now: summarize what you found or " +
179
+ "accomplished, include any concrete results, and flag anything left unfinished.";
162
180
  const TOOL_RESULT_ARCHIVE_PARAM = "__toolResultArchive";
163
181
  const TOOL_RESULT_TRUNCATED_PREFIX = "[TRUNCATED_TOOL_RESULT]";
164
182
  const TOOL_RESULT_PREVIEW_CHARS = 700;
@@ -838,6 +856,7 @@ function extractMediaFromToolOutput(output: unknown): {
838
856
  export class AgentHarness {
839
857
  private readonly workingDir: string;
840
858
  private readonly environment: HarnessOptions["environment"];
859
+ private readonly runTimeoutSecOverride?: number;
841
860
  private modelProvider: ModelProviderFactory;
842
861
  private readonly modelProviderInjected: boolean;
843
862
  private readonly dispatcher = new ToolDispatcher();
@@ -1074,6 +1093,7 @@ export class AgentHarness {
1074
1093
  constructor(options: HarnessOptions = {}) {
1075
1094
  this.workingDir = options.workingDir ?? process.cwd();
1076
1095
  this.environment = options.environment ?? "development";
1096
+ this.runTimeoutSecOverride = options.runTimeoutSecOverride;
1077
1097
  this.modelProviderInjected = !!options.modelProvider;
1078
1098
  this.modelProvider = options.modelProvider ?? createModelProvider("anthropic");
1079
1099
  this.uploadStore = options.uploadStore;
@@ -2116,7 +2136,9 @@ export class AgentHarness {
2116
2136
  const runId = `run_${randomUUID()}`;
2117
2137
  const start = now();
2118
2138
  const maxSteps = agent.frontmatter.limits?.maxSteps ?? 20;
2119
- const configuredTimeout = agent.frontmatter.limits?.timeout;
2139
+ // A constructor-level override (e.g. a longer budget for background
2140
+ // subagents) takes precedence over the agent definition's limits.timeout.
2141
+ const configuredTimeout = this.runTimeoutSecOverride ?? agent.frontmatter.limits?.timeout;
2120
2142
  const timeoutMs = this.environment === "development" && configuredTimeout == null
2121
2143
  ? 0 // no hard timeout in development unless explicitly configured
2122
2144
  : (configuredTimeout ?? 300) * 1000;
@@ -2297,14 +2319,61 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
2297
2319
  };
2298
2320
  const isCancelled = (): boolean => input.abortSignal?.aborted === true;
2299
2321
  let cancellationEmitted = false;
2322
+ // The assistant turn for the current step, captured as it streams. The
2323
+ // assistant message + its tool results are only pushed to `messages`
2324
+ // *together*, after the tool batch finishes — so between "model streamed
2325
+ // a tool call" and "tools done" the turn lives only in these locals. If a
2326
+ // cancellation lands in that window we'd otherwise drop the whole turn
2327
+ // from the canonical history, leaving the next request with back-to-back
2328
+ // user messages and a model with no record of what it just said (the user
2329
+ // still sees it, since the display history is built separately). Cleared
2330
+ // once the turn is committed, and reset at the top of every step.
2331
+ let inflightTurn: {
2332
+ text: string;
2333
+ toolCalls: Array<{ id: string; name: string; input: Record<string, unknown> }>;
2334
+ } | null = null;
2300
2335
  const emitCancellation = (): AgentEvent => {
2301
2336
  cancellationEmitted = true;
2302
2337
  // Snapshot the in-flight messages so the orchestrator can persist them
2303
- // as the canonical history. Drop a trailing assistant tool_use message
2304
- // that has no matching tool result — sending that to the API on the next
2305
- // turn would be rejected.
2306
- const snapshot = trimToValidPrefix([...messages]);
2307
- return pushEvent({ type: "run:cancelled", runId, messages: snapshot });
2338
+ // as the canonical history.
2339
+ const snapshot: Message[] = [...messages];
2340
+ // Re-attach the in-flight assistant turn (if any). Synthesize a
2341
+ // tool_result for every pending tool_use so the turn is a valid prefix —
2342
+ // an assistant tool_use with no following tool result is rejected by the
2343
+ // API on the next turn, which is exactly why a naive snapshot drops it.
2344
+ if (inflightTurn && (inflightTurn.text.length > 0 || inflightTurn.toolCalls.length > 0)) {
2345
+ const hasToolCalls = inflightTurn.toolCalls.length > 0;
2346
+ const assistantContent = hasToolCalls
2347
+ ? JSON.stringify({
2348
+ text: inflightTurn.text,
2349
+ tool_calls: inflightTurn.toolCalls.map((tc) => ({
2350
+ id: tc.id,
2351
+ name: tc.name,
2352
+ input: tc.input,
2353
+ })),
2354
+ })
2355
+ : inflightTurn.text;
2356
+ snapshot.push({
2357
+ role: "assistant",
2358
+ content: assistantContent,
2359
+ metadata: { timestamp: now(), id: randomUUID(), runId },
2360
+ });
2361
+ if (hasToolCalls) {
2362
+ const cancelledResults = inflightTurn.toolCalls.map((tc) => ({
2363
+ type: "tool_result" as const,
2364
+ tool_use_id: tc.id,
2365
+ tool_name: tc.name,
2366
+ content: "Tool execution cancelled by user.",
2367
+ }));
2368
+ snapshot.push({
2369
+ role: "tool",
2370
+ content: JSON.stringify(cancelledResults),
2371
+ metadata: { timestamp: now(), id: randomUUID(), runId },
2372
+ });
2373
+ }
2374
+ }
2375
+ // Defensive: drop any trailing dangling tool_use we didn't pair above.
2376
+ return pushEvent({ type: "run:cancelled", runId, messages: trimToValidPrefix(snapshot) });
2308
2377
  };
2309
2378
 
2310
2379
  const resolvedModelName = agent.frontmatter.model?.name ?? "claude-opus-4-5";
@@ -2424,6 +2493,7 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
2424
2493
  let convertedUpTo = 0;
2425
2494
 
2426
2495
  for (let step = 1; step <= maxSteps; step += 1) {
2496
+ inflightTurn = null;
2427
2497
  try {
2428
2498
  yield* drainBrowserEvents();
2429
2499
  if (isCancelled()) {
@@ -2883,12 +2953,24 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
2883
2953
 
2884
2954
  const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
2885
2955
 
2956
+ // On the last permitted step, force a closing text turn: strip the
2957
+ // tools so the model cannot start another tool call it has no step
2958
+ // left to resolve, and append a one-shot nudge instructing it to
2959
+ // summarize. This is what keeps a step-exhausted run (very common in
2960
+ // subagents) from ending on a dangling tool call that a parent would
2961
+ // see as an empty result. The nudge is appended only to this model
2962
+ // request — it is never written into `messages`/history.
2963
+ const isFinalStep = step === maxSteps;
2964
+ const toolsForStep = isFinalStep ? {} : tools;
2965
+ const messagesForStep: ModelMessage[] = isFinalStep
2966
+ ? [...finalMessages, { role: "user", content: FINAL_STEP_SUMMARY_PROMPT }]
2967
+ : finalMessages;
2886
2968
 
2887
2969
  const result = await streamText({
2888
2970
  model: modelInstance,
2889
2971
  ...(useStaticCache ? {} : { system: systemPrompt }),
2890
- messages: finalMessages,
2891
- tools,
2972
+ messages: messagesForStep,
2973
+ tools: toolsForStep,
2892
2974
  temperature,
2893
2975
  abortSignal: input.abortSignal,
2894
2976
  ...(typeof maxTokens === "number" ? { maxTokens } : {}),
@@ -3026,6 +3108,11 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
3026
3108
  return;
3027
3109
  }
3028
3110
 
3111
+ // The model finished streaming this step's text. Capture it so a
3112
+ // cancellation from here on persists what the user already saw; the
3113
+ // tool calls are attached once they're parsed below.
3114
+ inflightTurn = { text: fullText, toolCalls: [] };
3115
+
3029
3116
  if (isCancelled()) {
3030
3117
  yield emitCancellation();
3031
3118
  return;
@@ -3135,6 +3222,7 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
3135
3222
  name: tc.toolName,
3136
3223
  input: (tc as any).input as Record<string, unknown>,
3137
3224
  }));
3225
+ if (inflightTurn) inflightTurn.toolCalls = toolCalls;
3138
3226
 
3139
3227
  if (toolCalls.length === 0) {
3140
3228
  // Detect silent empty responses — likely an SDK or model
@@ -3593,6 +3681,9 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
3593
3681
  content: JSON.stringify(toolResultsForModel),
3594
3682
  metadata: toolMsgMeta as Message["metadata"],
3595
3683
  });
3684
+ // Turn is now committed to `messages`; a later cancellation must not
3685
+ // re-append it from the in-flight holder.
3686
+ inflightTurn = null;
3596
3687
 
3597
3688
  // Post-tool-execution soft deadline: long-running tool batches (e.g.
3598
3689
  // multiple web_search calls) can push past the deadline. Checkpoint
@@ -610,50 +610,79 @@ const POLYFILL_FETCH_STUB = `
610
610
 
611
611
  const POLYFILL_TIMERS = `
612
612
  // --- Timers polyfill ---
613
+ //
614
+ // The isolate has no host event loop, so real wall-clock delays can't be
615
+ // honoured. What we *can* do is drain pending timers on the microtask queue
616
+ // (which isolated-vm does pump while resolving the run's promise), firing
617
+ // them in order of their requested delay against a virtual clock. This makes
618
+ // the overwhelmingly common pattern — \`await new Promise(r => setTimeout(r, n))\`
619
+ // as a sleep — actually resolve instead of hanging the whole run forever.
620
+ // Delays collapse to "as soon as possible, in delay order"; that's the right
621
+ // trade for a sandbox with no real time. A runaway setInterval is bounded by
622
+ // __MAX_FIRES here and, ultimately, by the host-side wall-clock timeout.
613
623
  (function() {
614
624
  let __timerId = 0;
615
- const __timers = new Map();
625
+ const __timers = new Map(); // id -> { fn, due, type }
626
+ const __intervals = new Set(); // ids that should reschedule
627
+ let __vclock = 0; // virtual clock (ms)
628
+ let __draining = false;
629
+ let __fired = 0;
630
+ const __MAX_FIRES = 1000000; // backstop against a runaway interval
631
+
632
+ function __schedule(fn, delayMs, type, id) {
633
+ __timers.set(id, { fn, due: __vclock + delayMs, type });
634
+ if (!__draining) __drain();
635
+ return id;
636
+ }
637
+
638
+ function __drain() {
639
+ __draining = true;
640
+ const step = function() {
641
+ if (__timers.size === 0) { __draining = false; return; }
642
+ // Pick the earliest-due timer (ties broken by insertion id for FIFO).
643
+ let pick = null;
644
+ for (const [id, t] of __timers) {
645
+ if (pick === null || t.due < pick.t.due || (t.due === pick.t.due && id < pick.id)) {
646
+ pick = { id, t };
647
+ }
648
+ }
649
+ __timers.delete(pick.id);
650
+ if (pick.t.due > __vclock) __vclock = pick.t.due;
651
+ __fired++;
652
+ try { pick.t.fn(); } catch (e) { /* host timers swallow callback throws */ }
653
+ if (__fired > __MAX_FIRES) { __draining = false; return; }
654
+ Promise.resolve().then(step);
655
+ };
656
+ Promise.resolve().then(step);
657
+ }
616
658
 
617
659
  globalThis.setTimeout = function(fn, delay) {
618
660
  const id = ++__timerId;
619
661
  const ms = Math.max(0, Number(delay) || 0);
620
- const start = Date.now();
621
- __timers.set(id, { fn, ms, start, type: "timeout" });
622
- // In the isolate, setTimeout returns the id but the callback is
623
- // executed via a polling mechanism in the async wrapper.
624
- // For simple cases (delay=0), we can use a microtask.
625
- if (ms === 0) {
626
- Promise.resolve().then(() => {
627
- if (__timers.has(id)) {
628
- __timers.delete(id);
629
- fn();
630
- }
631
- });
632
- }
633
- return id;
662
+ return __schedule(typeof fn === "function" ? fn : function() {}, ms, "timeout", id);
634
663
  };
635
664
 
636
665
  globalThis.clearTimeout = function(id) {
637
666
  __timers.delete(id);
667
+ __intervals.delete(id);
638
668
  };
639
669
 
640
670
  globalThis.setInterval = function(fn, delay) {
641
671
  const id = ++__timerId;
642
672
  const ms = Math.max(1, Number(delay) || 1);
643
- const wrapper = () => {
644
- if (!__timers.has(id)) return;
645
- fn();
646
- if (__timers.has(id)) {
647
- globalThis.setTimeout(wrapper, ms);
673
+ __intervals.add(id);
674
+ const tick = function() {
675
+ if (!__intervals.has(id)) return;
676
+ try { fn(); } finally {
677
+ if (__intervals.has(id)) __schedule(tick, ms, "interval", id);
648
678
  }
649
679
  };
650
- __timers.set(id, { fn: wrapper, ms, type: "interval" });
651
- globalThis.setTimeout(wrapper, ms);
652
- return id;
680
+ return __schedule(tick, ms, "interval", id);
653
681
  };
654
682
 
655
683
  globalThis.clearInterval = function(id) {
656
684
  __timers.delete(id);
685
+ __intervals.delete(id);
657
686
  };
658
687
 
659
688
  // queueMicrotask if not available
@@ -153,6 +153,14 @@ export function createIsolateRuntime(config: {
153
153
  const t0 = performance.now();
154
154
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
155
155
  let context: any;
156
+ // Wall-clock guard. isolated-vm's `timeout` option only bounds the
157
+ // *synchronous* portion of an eval; when the script returns a promise
158
+ // (which ours always does — it's an async IIFE) a never-settling promise
159
+ // would hang here forever (e.g. `await new Promise(() => {})`, or a
160
+ // bound host call that never resolves). Race the eval against a host
161
+ // timer that disposes the isolate, so `timeLimit` bounds total execution.
162
+ let timedOut = false;
163
+ let wallTimer: ReturnType<typeof setTimeout> | undefined;
156
164
  try {
157
165
  context = await isolate.createContext();
158
166
  const jail = context.global;
@@ -197,12 +205,35 @@ export function createIsolateRuntime(config: {
197
205
  // (context.eval + promise option handles Reference.apply resolution
198
206
  // correctly, unlike compileScript().run())
199
207
  const wrapped = `(async () => {\n${code}\n})()`;
200
- const rawResult = await context.eval(wrapped, {
208
+ const evalPromise = context.eval(wrapped, {
201
209
  filename: "<user-code>",
202
210
  promise: true,
203
211
  copy: true,
204
212
  timeout: config.timeout,
205
213
  });
214
+ const rawResult =
215
+ config.timeout > 0
216
+ ? await Promise.race([
217
+ evalPromise,
218
+ new Promise((_resolve, reject) => {
219
+ wallTimer = setTimeout(() => {
220
+ timedOut = true;
221
+ // Disposing rejects the pending eval; this reject is the
222
+ // one that wins the race when the promise never settles.
223
+ try {
224
+ isolate.dispose();
225
+ } catch {
226
+ /* already disposed */
227
+ }
228
+ reject(new Error("Execution timed out"));
229
+ }, config.timeout);
230
+ }),
231
+ ])
232
+ : await evalPromise;
233
+ if (wallTimer) {
234
+ clearTimeout(wallTimer);
235
+ wallTimer = undefined;
236
+ }
206
237
 
207
238
  // Read captured stdout/stderr from isolate
208
239
  const stdout = (await context.eval("__stdout.join('\\n')", { copy: true })) as string;
@@ -237,6 +268,18 @@ export function createIsolateRuntime(config: {
237
268
  };
238
269
  }
239
270
 
271
+ if (timedOut) {
272
+ return {
273
+ stdout: "",
274
+ stderr: "",
275
+ error: {
276
+ message: `Execution timed out after ${config.timeout}ms`,
277
+ name: "TimeoutError",
278
+ },
279
+ executionTimeMs: elapsed,
280
+ };
281
+ }
282
+
240
283
  // Try to recover stdout/stderr captured before the error
241
284
  let stdout = "";
242
285
  let stderr = "";
@@ -258,6 +301,7 @@ export function createIsolateRuntime(config: {
258
301
  executionTimeMs: elapsed,
259
302
  };
260
303
  } finally {
304
+ if (wallTimer) clearTimeout(wallTimer);
261
305
  if (abortHandler && signal) {
262
306
  signal.removeEventListener("abort", abortHandler);
263
307
  }
@@ -46,6 +46,9 @@ export {
46
46
 
47
47
  export {
48
48
  AgentOrchestrator,
49
+ lastAssistantText,
50
+ realResponseText,
51
+ abnormalEndResponse,
49
52
  type ActiveConversationRun,
50
53
  type EventSink,
51
54
  type OrchestratorHooks,
@@ -1,4 +1,4 @@
1
- import type { AgentEvent, Message } from "@poncho-ai/sdk";
1
+ import { getTextContent, type AgentEvent, type Message } from "@poncho-ai/sdk";
2
2
  import type { Conversation, ConversationStore, PendingSubagentResult } from "../state.js";
3
3
  import type { AgentHarness } from "../harness.js";
4
4
  import type { TelemetryEmitter } from "../telemetry.js";
@@ -28,6 +28,78 @@ import {
28
28
  STALE_SUBAGENT_THRESHOLD_MS,
29
29
  } from "./subagents.js";
30
30
 
31
+ // ── Subagent result extraction ──
32
+
33
+ /**
34
+ * Pull the human-readable text out of a single assistant message.
35
+ *
36
+ * Beyond the `string | ContentPart[]` shapes `getTextContent` handles, the
37
+ * harness serializes an assistant turn that ALSO made tool calls as a JSON
38
+ * string `{"text":"...","tool_calls":[...]}` (see the run loop's
39
+ * `assistantContent`). A naive `typeof content === "string"` read would hand
40
+ * that raw JSON blob back as the "response"; here we unwrap it to its `.text`.
41
+ */
42
+ const assistantMessageText = (message: Message): string => {
43
+ const raw = getTextContent(message).trim();
44
+ if (raw.startsWith("{") && raw.includes("\"tool_calls\"")) {
45
+ try {
46
+ const parsed = JSON.parse(raw) as { text?: unknown };
47
+ if (typeof parsed.text === "string") return parsed.text.trim();
48
+ } catch {
49
+ // Not the envelope we expected — fall through to the raw string.
50
+ }
51
+ }
52
+ return raw;
53
+ };
54
+
55
+ /**
56
+ * Find the last non-empty assistant text in a subagent transcript. Walking
57
+ * backwards (rather than reading only the final message) means a subagent
58
+ * that ended on a tool-call turn still yields the prose it produced just
59
+ * before — instead of surfacing to the parent as an empty result.
60
+ */
61
+ export const lastAssistantText = (messages: Message[]): string => {
62
+ for (let i = messages.length - 1; i >= 0; i -= 1) {
63
+ if (messages[i].role !== "assistant") continue;
64
+ const text = assistantMessageText(messages[i]);
65
+ if (text) return text;
66
+ }
67
+ return "";
68
+ };
69
+
70
+ /**
71
+ * The run loop stuffs a synthetic `[Error: ...]` placeholder into the draft /
72
+ * persisted assistant text when a run ends on `run:error` (e.g. a timeout).
73
+ * That placeholder is not real model output — strip it so we don't surface it
74
+ * to the parent as the subagent's "response".
75
+ */
76
+ export const realResponseText = (text: string | undefined): string => {
77
+ const t = (text ?? "").trim();
78
+ return t.startsWith("[Error:") ? "" : t;
79
+ };
80
+
81
+ /**
82
+ * Build the result text delivered to the parent when a subagent ended
83
+ * abnormally (timeout / error) with no RunResult. We never drop the work it
84
+ * gathered, and the parent is told it didn't finish — e.g. it may not have
85
+ * written its output files — plus how to recover (use what's here, send a
86
+ * write-only follow-up, or read the full transcript).
87
+ */
88
+ export const abnormalEndResponse = (opts: {
89
+ subagentId: string;
90
+ gathered: string;
91
+ runError?: { code?: string; message?: string };
92
+ }): string => {
93
+ const timedOut = opts.runError?.code === "TIMEOUT";
94
+ const head = timedOut
95
+ ? "[Subagent hit its time limit before finishing — it may not have written its output files.]"
96
+ : `[Subagent ended before finishing${opts.runError?.message ? `: ${opts.runError.message}` : ""}.]`;
97
+ const recover = opts.gathered
98
+ ? "Partial work it gathered is below — write the files yourself from it, or send a tight write-only follow-up with message_subagent."
99
+ : `Use read_subagent("${opts.subagentId}", mode:"full") to recover what it gathered.`;
100
+ return opts.gathered ? `${head} ${recover}\n\n${opts.gathered}` : `${head} ${recover}`;
101
+ };
102
+
31
103
  // ── Types ──
32
104
 
33
105
  export type ActiveConversationRun = {
@@ -723,6 +795,7 @@ export class AgentOrchestrator {
723
795
  const draft = createTurnDraftState();
724
796
  let latestRunId = "";
725
797
  let runResult: { status: "completed" | "error" | "cancelled"; response?: string; steps: number; duration: number; continuation?: boolean; continuationMessages?: Message[] } | undefined;
798
+ let runError: { code?: string; message?: string } | undefined;
726
799
 
727
800
  try {
728
801
  const conversation = await this.conversationStore.getWithArchive(childConversationId);
@@ -872,6 +945,7 @@ export class AgentOrchestrator {
872
945
  }
873
946
  }
874
947
  if (event.type === "run:error") {
948
+ runError = { code: event.error.code, message: event.error.message };
875
949
  draft.assistantResponse = draft.assistantResponse || `[Error: ${event.error.message}]`;
876
950
  }
877
951
  await this.eventSink(childConversationId, event);
@@ -922,7 +996,17 @@ export class AgentOrchestrator {
922
996
  return;
923
997
  }
924
998
 
925
- conv.subagentMeta = { ...conv.subagentMeta!, status: "completed" };
999
+ // No runResult means the run ended on run:error (timeout / model
1000
+ // error) rather than run:completed — flag the subagent accordingly
1001
+ // instead of faking "completed".
1002
+ const abnormalEnd = !runResult;
1003
+ conv.subagentMeta = {
1004
+ ...conv.subagentMeta!,
1005
+ status: abnormalEnd ? "error" : "completed",
1006
+ ...(abnormalEnd
1007
+ ? { error: { code: runError?.code ?? "SUBAGENT_INCOMPLETE", message: runError?.message ?? "subagent ended without a result" } }
1008
+ : {}),
1009
+ };
926
1010
  await this.conversationStore.update(conv);
927
1011
  }
928
1012
 
@@ -933,21 +1017,36 @@ export class AgentOrchestrator {
933
1017
  conversationId: childConversationId,
934
1018
  });
935
1019
 
936
- let subagentResponse = runResult?.response ?? draft.assistantResponse;
937
- if (!subagentResponse) {
1020
+ // Recover the subagent's real output: prefer the run response, then the
1021
+ // streamed draft, then walk the transcript — discarding the synthetic
1022
+ // "[Error: ...]" placeholder at each step.
1023
+ let gathered = realResponseText(runResult?.response) || realResponseText(draft.assistantResponse);
1024
+ if (!gathered) {
938
1025
  const freshSubConv = await this.conversationStore.get(childConversationId);
939
- if (freshSubConv) {
940
- const lastAssistant = [...freshSubConv.messages].reverse().find(m => m.role === "assistant");
941
- if (lastAssistant && typeof lastAssistant.content === "string") {
942
- subagentResponse = lastAssistant.content;
943
- }
944
- }
1026
+ if (freshSubConv) gathered = realResponseText(lastAssistantText(freshSubConv.messages));
945
1027
  }
1028
+
1029
+ // On an abnormal end (timeout / error) there is no runResult; don't drop
1030
+ // the work — deliver what it gathered, tagged so the parent knows it
1031
+ // didn't finish, and build a result so it never renders as "(no result)".
1032
+ const abnormal = !runResult;
1033
+ const subagentResponse = abnormal
1034
+ ? abnormalEndResponse({ subagentId: childConversationId, gathered, runError })
1035
+ : gathered;
946
1036
  const pendingResult: PendingSubagentResult = {
947
1037
  subagentId: childConversationId,
948
1038
  task,
949
- status: "completed",
950
- result: runResult ? { status: runResult.status, response: subagentResponse, steps: runResult.steps, tokens: { input: 0, output: 0, cached: 0 }, duration: runResult.duration } : undefined,
1039
+ status: abnormal ? "error" : "completed",
1040
+ result: {
1041
+ status: runResult?.status ?? "error",
1042
+ response: subagentResponse,
1043
+ steps: runResult?.steps ?? 0,
1044
+ tokens: { input: 0, output: 0, cached: 0 },
1045
+ duration: runResult?.duration ?? 0,
1046
+ },
1047
+ ...(abnormal
1048
+ ? { error: { code: runError?.code ?? "SUBAGENT_INCOMPLETE", message: runError?.message ?? "subagent ended without a result" } }
1049
+ : {}),
951
1050
  timestamp: Date.now(),
952
1051
  };
953
1052
  await this.conversationStore.appendSubagentResult(parentConversationId, pendingResult);
@@ -1040,8 +1139,16 @@ export class AgentOrchestrator {
1040
1139
  conversation.subagentCallbackCount = callbackCount;
1041
1140
 
1042
1141
  for (const pr of pendingResults) {
1142
+ // An empty response is recoverable, not a dead end: the subagent's work
1143
+ // lives in its transcript even when it produced no closing summary (e.g.
1144
+ // it ran out of steps mid-task). Hand the parent an actionable pointer
1145
+ // instead of a silent "(no response)" it can't act on.
1146
+ const responseText = (pr.result?.response ?? "").trim();
1147
+ const responseLine = responseText
1148
+ || `(subagent produced no final summary after ${pr.result?.steps ?? 0} step(s); its work may be incomplete. `
1149
+ + `Call read_subagent with subagent_id "${pr.subagentId}" and mode "assistant" to retrieve what it did.)`;
1043
1150
  const resultBody = pr.result
1044
- ? `Status: ${pr.result.status}\nResponse: ${pr.result.response ?? "(no response)"}\nSteps: ${pr.result.steps}, Duration: ${pr.result.duration}ms`
1151
+ ? `Status: ${pr.result.status}\nResponse: ${responseLine}\nSteps: ${pr.result.steps}, Duration: ${pr.result.duration}ms`
1045
1152
  : pr.error
1046
1153
  ? `Error: ${pr.error.message}`
1047
1154
  : "(no result)";
@@ -1227,7 +1334,8 @@ export class AgentOrchestrator {
1227
1334
  this.activeSubagentRuns.set(conversationId, { abortController: childAbortController, harness: childHarness, parentConversationId });
1228
1335
 
1229
1336
  const draft = createTurnDraftState();
1230
- let runResult: { status: string; response?: string; steps: number; duration: number; continuation?: boolean; continuationMessages?: Message[] } | undefined;
1337
+ let runResult: { status: "completed" | "error" | "cancelled"; response?: string; steps: number; duration: number; continuation?: boolean; continuationMessages?: Message[] } | undefined;
1338
+ let runError: { code?: string; message?: string } | undefined;
1231
1339
 
1232
1340
  try {
1233
1341
  const recallParams = this.hooks?.buildRecallParams?.({ ownerId, tenantId: conversation.tenantId, excludeConversationId: conversationId }) ?? {};
@@ -1262,6 +1370,7 @@ export class AgentOrchestrator {
1262
1370
  }
1263
1371
  }
1264
1372
  if (event.type === "run:error") {
1373
+ runError = { code: event.error.code, message: event.error.message };
1265
1374
  draft.assistantResponse = draft.assistantResponse || `[Error: ${event.error.message}]`;
1266
1375
  }
1267
1376
  await this.eventSink(conversationId, event);
@@ -1311,7 +1420,14 @@ export class AgentOrchestrator {
1311
1420
  return;
1312
1421
  }
1313
1422
 
1314
- conv.subagentMeta = { ...conv.subagentMeta!, status: "completed" };
1423
+ const abnormalEnd = !runResult;
1424
+ conv.subagentMeta = {
1425
+ ...conv.subagentMeta!,
1426
+ status: abnormalEnd ? "error" : "completed",
1427
+ ...(abnormalEnd
1428
+ ? { error: { code: runError?.code ?? "SUBAGENT_INCOMPLETE", message: runError?.message ?? "subagent ended without a result" } }
1429
+ : {}),
1430
+ };
1315
1431
  await this.conversationStore.update(conv);
1316
1432
  }
1317
1433
 
@@ -1322,24 +1438,26 @@ export class AgentOrchestrator {
1322
1438
  conversationId,
1323
1439
  });
1324
1440
 
1325
- let subagentResponse = runResult?.response ?? draft.assistantResponse;
1326
- if (!subagentResponse) {
1441
+ let gathered = realResponseText(runResult?.response) || realResponseText(draft.assistantResponse);
1442
+ if (!gathered) {
1327
1443
  const freshSubConv = await this.conversationStore.get(conversationId);
1328
- if (freshSubConv) {
1329
- const lastAssistant = [...freshSubConv.messages].reverse().find(m => m.role === "assistant");
1330
- if (lastAssistant) {
1331
- subagentResponse = typeof lastAssistant.content === "string" ? lastAssistant.content : "";
1332
- }
1333
- }
1444
+ if (freshSubConv) gathered = realResponseText(lastAssistantText(freshSubConv.messages));
1334
1445
  }
1446
+ const abnormal = !runResult;
1447
+ const subagentResponse = abnormal
1448
+ ? abnormalEndResponse({ subagentId: conversationId, gathered, runError })
1449
+ : gathered;
1335
1450
 
1336
1451
  const parentConv = await this.conversationStore.get(parentConversationId);
1337
1452
  if (parentConv) {
1338
1453
  const result: PendingSubagentResult = {
1339
1454
  subagentId: conversationId,
1340
1455
  task,
1341
- status: "completed",
1342
- result: { status: "completed", response: subagentResponse, steps: runResult?.steps ?? 0, tokens: { input: 0, output: 0, cached: 0 }, duration: runResult?.duration ?? 0 },
1456
+ status: abnormal ? "error" : "completed",
1457
+ result: { status: runResult?.status ?? "error", response: subagentResponse, steps: runResult?.steps ?? 0, tokens: { input: 0, output: 0, cached: 0 }, duration: runResult?.duration ?? 0 },
1458
+ ...(abnormal
1459
+ ? { error: { code: runError?.code ?? "SUBAGENT_INCOMPLETE", message: runError?.message ?? "subagent ended without a result" } }
1460
+ : {}),
1343
1461
  timestamp: Date.now(),
1344
1462
  };
1345
1463
  await this.conversationStore.appendSubagentResult(parentConversationId, result);