@poncho-ai/harness 0.50.3 → 0.50.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +6 -6
- package/CHANGELOG.md +24 -0
- package/dist/index.d.ts +39 -1
- package/dist/index.js +123 -30
- package/dist/{isolate-BNQ6P3HI.js → isolate-F2PPSUL6.js} +84 -24
- package/package.json +1 -1
- package/src/harness.ts +99 -8
- package/src/isolate/polyfills.ts +52 -23
- package/src/isolate/runtime.ts +45 -1
- package/src/orchestrator/index.ts +3 -0
- package/src/orchestrator/orchestrator.ts +143 -25
- package/test/isolate.test.ts +75 -0
- package/test/orchestrator.test.ts +112 -0
package/src/harness.ts
CHANGED
|
@@ -146,6 +146,14 @@ export interface HarnessOptions {
|
|
|
146
146
|
* should also be browsable in the VFS. Empty by default.
|
|
147
147
|
*/
|
|
148
148
|
systemSkillPaths?: string[];
|
|
149
|
+
/**
|
|
150
|
+
* Override the per-run hard wall-clock timeout, in seconds, taking
|
|
151
|
+
* precedence over the agent definition's `limits.timeout`. Platforms use
|
|
152
|
+
* this to give background subagents a longer budget than the foreground
|
|
153
|
+
* agent without forking the agent definition (e.g. a 1h research subagent
|
|
154
|
+
* vs. a 5-min foreground turn). `0` disables the hard timeout.
|
|
155
|
+
*/
|
|
156
|
+
runTimeoutSecOverride?: number;
|
|
149
157
|
}
|
|
150
158
|
|
|
151
159
|
export interface HarnessRunOutput {
|
|
@@ -159,6 +167,16 @@ const now = (): number => Date.now();
|
|
|
159
167
|
const FIRST_CHUNK_TIMEOUT_MS = 90_000; // 90s to receive the first chunk from the model
|
|
160
168
|
const MAX_TRANSIENT_STEP_RETRIES = 1;
|
|
161
169
|
const COMPACTION_CHECK_INTERVAL_STEPS = 3;
|
|
170
|
+
// Injected as a trailing user turn on the final allowed step, with tools
|
|
171
|
+
// disabled, so a step-exhausted run produces a text summary instead of
|
|
172
|
+
// terminating on a dangling tool call (which surfaces to a parent agent as
|
|
173
|
+
// an empty "(no response)" subagent result). See the `isFinalStep` branch in
|
|
174
|
+
// the run loop.
|
|
175
|
+
const FINAL_STEP_SUMMARY_PROMPT =
|
|
176
|
+
"You have reached the maximum number of steps for this run and cannot call " +
|
|
177
|
+
"any more tools. Do NOT attempt any tool calls. Using only the work you have " +
|
|
178
|
+
"already done, write your final response now: summarize what you found or " +
|
|
179
|
+
"accomplished, include any concrete results, and flag anything left unfinished.";
|
|
162
180
|
const TOOL_RESULT_ARCHIVE_PARAM = "__toolResultArchive";
|
|
163
181
|
const TOOL_RESULT_TRUNCATED_PREFIX = "[TRUNCATED_TOOL_RESULT]";
|
|
164
182
|
const TOOL_RESULT_PREVIEW_CHARS = 700;
|
|
@@ -838,6 +856,7 @@ function extractMediaFromToolOutput(output: unknown): {
|
|
|
838
856
|
export class AgentHarness {
|
|
839
857
|
private readonly workingDir: string;
|
|
840
858
|
private readonly environment: HarnessOptions["environment"];
|
|
859
|
+
private readonly runTimeoutSecOverride?: number;
|
|
841
860
|
private modelProvider: ModelProviderFactory;
|
|
842
861
|
private readonly modelProviderInjected: boolean;
|
|
843
862
|
private readonly dispatcher = new ToolDispatcher();
|
|
@@ -1074,6 +1093,7 @@ export class AgentHarness {
|
|
|
1074
1093
|
constructor(options: HarnessOptions = {}) {
|
|
1075
1094
|
this.workingDir = options.workingDir ?? process.cwd();
|
|
1076
1095
|
this.environment = options.environment ?? "development";
|
|
1096
|
+
this.runTimeoutSecOverride = options.runTimeoutSecOverride;
|
|
1077
1097
|
this.modelProviderInjected = !!options.modelProvider;
|
|
1078
1098
|
this.modelProvider = options.modelProvider ?? createModelProvider("anthropic");
|
|
1079
1099
|
this.uploadStore = options.uploadStore;
|
|
@@ -2116,7 +2136,9 @@ export class AgentHarness {
|
|
|
2116
2136
|
const runId = `run_${randomUUID()}`;
|
|
2117
2137
|
const start = now();
|
|
2118
2138
|
const maxSteps = agent.frontmatter.limits?.maxSteps ?? 20;
|
|
2119
|
-
|
|
2139
|
+
// A constructor-level override (e.g. a longer budget for background
|
|
2140
|
+
// subagents) takes precedence over the agent definition's limits.timeout.
|
|
2141
|
+
const configuredTimeout = this.runTimeoutSecOverride ?? agent.frontmatter.limits?.timeout;
|
|
2120
2142
|
const timeoutMs = this.environment === "development" && configuredTimeout == null
|
|
2121
2143
|
? 0 // no hard timeout in development unless explicitly configured
|
|
2122
2144
|
: (configuredTimeout ?? 300) * 1000;
|
|
@@ -2297,14 +2319,61 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
|
|
|
2297
2319
|
};
|
|
2298
2320
|
const isCancelled = (): boolean => input.abortSignal?.aborted === true;
|
|
2299
2321
|
let cancellationEmitted = false;
|
|
2322
|
+
// The assistant turn for the current step, captured as it streams. The
|
|
2323
|
+
// assistant message + its tool results are only pushed to `messages`
|
|
2324
|
+
// *together*, after the tool batch finishes — so between "model streamed
|
|
2325
|
+
// a tool call" and "tools done" the turn lives only in these locals. If a
|
|
2326
|
+
// cancellation lands in that window we'd otherwise drop the whole turn
|
|
2327
|
+
// from the canonical history, leaving the next request with back-to-back
|
|
2328
|
+
// user messages and a model with no record of what it just said (the user
|
|
2329
|
+
// still sees it, since the display history is built separately). Cleared
|
|
2330
|
+
// once the turn is committed, and reset at the top of every step.
|
|
2331
|
+
let inflightTurn: {
|
|
2332
|
+
text: string;
|
|
2333
|
+
toolCalls: Array<{ id: string; name: string; input: Record<string, unknown> }>;
|
|
2334
|
+
} | null = null;
|
|
2300
2335
|
const emitCancellation = (): AgentEvent => {
|
|
2301
2336
|
cancellationEmitted = true;
|
|
2302
2337
|
// Snapshot the in-flight messages so the orchestrator can persist them
|
|
2303
|
-
// as the canonical history.
|
|
2304
|
-
|
|
2305
|
-
// turn
|
|
2306
|
-
|
|
2307
|
-
|
|
2338
|
+
// as the canonical history.
|
|
2339
|
+
const snapshot: Message[] = [...messages];
|
|
2340
|
+
// Re-attach the in-flight assistant turn (if any). Synthesize a
|
|
2341
|
+
// tool_result for every pending tool_use so the turn is a valid prefix —
|
|
2342
|
+
// an assistant tool_use with no following tool result is rejected by the
|
|
2343
|
+
// API on the next turn, which is exactly why a naive snapshot drops it.
|
|
2344
|
+
if (inflightTurn && (inflightTurn.text.length > 0 || inflightTurn.toolCalls.length > 0)) {
|
|
2345
|
+
const hasToolCalls = inflightTurn.toolCalls.length > 0;
|
|
2346
|
+
const assistantContent = hasToolCalls
|
|
2347
|
+
? JSON.stringify({
|
|
2348
|
+
text: inflightTurn.text,
|
|
2349
|
+
tool_calls: inflightTurn.toolCalls.map((tc) => ({
|
|
2350
|
+
id: tc.id,
|
|
2351
|
+
name: tc.name,
|
|
2352
|
+
input: tc.input,
|
|
2353
|
+
})),
|
|
2354
|
+
})
|
|
2355
|
+
: inflightTurn.text;
|
|
2356
|
+
snapshot.push({
|
|
2357
|
+
role: "assistant",
|
|
2358
|
+
content: assistantContent,
|
|
2359
|
+
metadata: { timestamp: now(), id: randomUUID(), runId },
|
|
2360
|
+
});
|
|
2361
|
+
if (hasToolCalls) {
|
|
2362
|
+
const cancelledResults = inflightTurn.toolCalls.map((tc) => ({
|
|
2363
|
+
type: "tool_result" as const,
|
|
2364
|
+
tool_use_id: tc.id,
|
|
2365
|
+
tool_name: tc.name,
|
|
2366
|
+
content: "Tool execution cancelled by user.",
|
|
2367
|
+
}));
|
|
2368
|
+
snapshot.push({
|
|
2369
|
+
role: "tool",
|
|
2370
|
+
content: JSON.stringify(cancelledResults),
|
|
2371
|
+
metadata: { timestamp: now(), id: randomUUID(), runId },
|
|
2372
|
+
});
|
|
2373
|
+
}
|
|
2374
|
+
}
|
|
2375
|
+
// Defensive: drop any trailing dangling tool_use we didn't pair above.
|
|
2376
|
+
return pushEvent({ type: "run:cancelled", runId, messages: trimToValidPrefix(snapshot) });
|
|
2308
2377
|
};
|
|
2309
2378
|
|
|
2310
2379
|
const resolvedModelName = agent.frontmatter.model?.name ?? "claude-opus-4-5";
|
|
@@ -2424,6 +2493,7 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
|
|
|
2424
2493
|
let convertedUpTo = 0;
|
|
2425
2494
|
|
|
2426
2495
|
for (let step = 1; step <= maxSteps; step += 1) {
|
|
2496
|
+
inflightTurn = null;
|
|
2427
2497
|
try {
|
|
2428
2498
|
yield* drainBrowserEvents();
|
|
2429
2499
|
if (isCancelled()) {
|
|
@@ -2883,12 +2953,24 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
|
|
|
2883
2953
|
|
|
2884
2954
|
const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
|
|
2885
2955
|
|
|
2956
|
+
// On the last permitted step, force a closing text turn: strip the
|
|
2957
|
+
// tools so the model cannot start another tool call it has no step
|
|
2958
|
+
// left to resolve, and append a one-shot nudge instructing it to
|
|
2959
|
+
// summarize. This is what keeps a step-exhausted run (very common in
|
|
2960
|
+
// subagents) from ending on a dangling tool call that a parent would
|
|
2961
|
+
// see as an empty result. The nudge is appended only to this model
|
|
2962
|
+
// request — it is never written into `messages`/history.
|
|
2963
|
+
const isFinalStep = step === maxSteps;
|
|
2964
|
+
const toolsForStep = isFinalStep ? {} : tools;
|
|
2965
|
+
const messagesForStep: ModelMessage[] = isFinalStep
|
|
2966
|
+
? [...finalMessages, { role: "user", content: FINAL_STEP_SUMMARY_PROMPT }]
|
|
2967
|
+
: finalMessages;
|
|
2886
2968
|
|
|
2887
2969
|
const result = await streamText({
|
|
2888
2970
|
model: modelInstance,
|
|
2889
2971
|
...(useStaticCache ? {} : { system: systemPrompt }),
|
|
2890
|
-
messages:
|
|
2891
|
-
tools,
|
|
2972
|
+
messages: messagesForStep,
|
|
2973
|
+
tools: toolsForStep,
|
|
2892
2974
|
temperature,
|
|
2893
2975
|
abortSignal: input.abortSignal,
|
|
2894
2976
|
...(typeof maxTokens === "number" ? { maxTokens } : {}),
|
|
@@ -3026,6 +3108,11 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
|
|
|
3026
3108
|
return;
|
|
3027
3109
|
}
|
|
3028
3110
|
|
|
3111
|
+
// The model finished streaming this step's text. Capture it so a
|
|
3112
|
+
// cancellation from here on persists what the user already saw; the
|
|
3113
|
+
// tool calls are attached once they're parsed below.
|
|
3114
|
+
inflightTurn = { text: fullText, toolCalls: [] };
|
|
3115
|
+
|
|
3029
3116
|
if (isCancelled()) {
|
|
3030
3117
|
yield emitCancellation();
|
|
3031
3118
|
return;
|
|
@@ -3135,6 +3222,7 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
|
|
|
3135
3222
|
name: tc.toolName,
|
|
3136
3223
|
input: (tc as any).input as Record<string, unknown>,
|
|
3137
3224
|
}));
|
|
3225
|
+
if (inflightTurn) inflightTurn.toolCalls = toolCalls;
|
|
3138
3226
|
|
|
3139
3227
|
if (toolCalls.length === 0) {
|
|
3140
3228
|
// Detect silent empty responses — likely an SDK or model
|
|
@@ -3593,6 +3681,9 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
|
|
|
3593
3681
|
content: JSON.stringify(toolResultsForModel),
|
|
3594
3682
|
metadata: toolMsgMeta as Message["metadata"],
|
|
3595
3683
|
});
|
|
3684
|
+
// Turn is now committed to `messages`; a later cancellation must not
|
|
3685
|
+
// re-append it from the in-flight holder.
|
|
3686
|
+
inflightTurn = null;
|
|
3596
3687
|
|
|
3597
3688
|
// Post-tool-execution soft deadline: long-running tool batches (e.g.
|
|
3598
3689
|
// multiple web_search calls) can push past the deadline. Checkpoint
|
package/src/isolate/polyfills.ts
CHANGED
|
@@ -610,50 +610,79 @@ const POLYFILL_FETCH_STUB = `
|
|
|
610
610
|
|
|
611
611
|
const POLYFILL_TIMERS = `
|
|
612
612
|
// --- Timers polyfill ---
|
|
613
|
+
//
|
|
614
|
+
// The isolate has no host event loop, so real wall-clock delays can't be
|
|
615
|
+
// honoured. What we *can* do is drain pending timers on the microtask queue
|
|
616
|
+
// (which isolated-vm does pump while resolving the run's promise), firing
|
|
617
|
+
// them in order of their requested delay against a virtual clock. This makes
|
|
618
|
+
// the overwhelmingly common pattern — \`await new Promise(r => setTimeout(r, n))\`
|
|
619
|
+
// as a sleep — actually resolve instead of hanging the whole run forever.
|
|
620
|
+
// Delays collapse to "as soon as possible, in delay order"; that's the right
|
|
621
|
+
// trade for a sandbox with no real time. A runaway setInterval is bounded by
|
|
622
|
+
// __MAX_FIRES here and, ultimately, by the host-side wall-clock timeout.
|
|
613
623
|
(function() {
|
|
614
624
|
let __timerId = 0;
|
|
615
|
-
const __timers = new Map();
|
|
625
|
+
const __timers = new Map(); // id -> { fn, due, type }
|
|
626
|
+
const __intervals = new Set(); // ids that should reschedule
|
|
627
|
+
let __vclock = 0; // virtual clock (ms)
|
|
628
|
+
let __draining = false;
|
|
629
|
+
let __fired = 0;
|
|
630
|
+
const __MAX_FIRES = 1000000; // backstop against a runaway interval
|
|
631
|
+
|
|
632
|
+
function __schedule(fn, delayMs, type, id) {
|
|
633
|
+
__timers.set(id, { fn, due: __vclock + delayMs, type });
|
|
634
|
+
if (!__draining) __drain();
|
|
635
|
+
return id;
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
function __drain() {
|
|
639
|
+
__draining = true;
|
|
640
|
+
const step = function() {
|
|
641
|
+
if (__timers.size === 0) { __draining = false; return; }
|
|
642
|
+
// Pick the earliest-due timer (ties broken by insertion id for FIFO).
|
|
643
|
+
let pick = null;
|
|
644
|
+
for (const [id, t] of __timers) {
|
|
645
|
+
if (pick === null || t.due < pick.t.due || (t.due === pick.t.due && id < pick.id)) {
|
|
646
|
+
pick = { id, t };
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
__timers.delete(pick.id);
|
|
650
|
+
if (pick.t.due > __vclock) __vclock = pick.t.due;
|
|
651
|
+
__fired++;
|
|
652
|
+
try { pick.t.fn(); } catch (e) { /* host timers swallow callback throws */ }
|
|
653
|
+
if (__fired > __MAX_FIRES) { __draining = false; return; }
|
|
654
|
+
Promise.resolve().then(step);
|
|
655
|
+
};
|
|
656
|
+
Promise.resolve().then(step);
|
|
657
|
+
}
|
|
616
658
|
|
|
617
659
|
globalThis.setTimeout = function(fn, delay) {
|
|
618
660
|
const id = ++__timerId;
|
|
619
661
|
const ms = Math.max(0, Number(delay) || 0);
|
|
620
|
-
|
|
621
|
-
__timers.set(id, { fn, ms, start, type: "timeout" });
|
|
622
|
-
// In the isolate, setTimeout returns the id but the callback is
|
|
623
|
-
// executed via a polling mechanism in the async wrapper.
|
|
624
|
-
// For simple cases (delay=0), we can use a microtask.
|
|
625
|
-
if (ms === 0) {
|
|
626
|
-
Promise.resolve().then(() => {
|
|
627
|
-
if (__timers.has(id)) {
|
|
628
|
-
__timers.delete(id);
|
|
629
|
-
fn();
|
|
630
|
-
}
|
|
631
|
-
});
|
|
632
|
-
}
|
|
633
|
-
return id;
|
|
662
|
+
return __schedule(typeof fn === "function" ? fn : function() {}, ms, "timeout", id);
|
|
634
663
|
};
|
|
635
664
|
|
|
636
665
|
globalThis.clearTimeout = function(id) {
|
|
637
666
|
__timers.delete(id);
|
|
667
|
+
__intervals.delete(id);
|
|
638
668
|
};
|
|
639
669
|
|
|
640
670
|
globalThis.setInterval = function(fn, delay) {
|
|
641
671
|
const id = ++__timerId;
|
|
642
672
|
const ms = Math.max(1, Number(delay) || 1);
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
673
|
+
__intervals.add(id);
|
|
674
|
+
const tick = function() {
|
|
675
|
+
if (!__intervals.has(id)) return;
|
|
676
|
+
try { fn(); } finally {
|
|
677
|
+
if (__intervals.has(id)) __schedule(tick, ms, "interval", id);
|
|
648
678
|
}
|
|
649
679
|
};
|
|
650
|
-
|
|
651
|
-
globalThis.setTimeout(wrapper, ms);
|
|
652
|
-
return id;
|
|
680
|
+
return __schedule(tick, ms, "interval", id);
|
|
653
681
|
};
|
|
654
682
|
|
|
655
683
|
globalThis.clearInterval = function(id) {
|
|
656
684
|
__timers.delete(id);
|
|
685
|
+
__intervals.delete(id);
|
|
657
686
|
};
|
|
658
687
|
|
|
659
688
|
// queueMicrotask if not available
|
package/src/isolate/runtime.ts
CHANGED
|
@@ -153,6 +153,14 @@ export function createIsolateRuntime(config: {
|
|
|
153
153
|
const t0 = performance.now();
|
|
154
154
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
155
155
|
let context: any;
|
|
156
|
+
// Wall-clock guard. isolated-vm's `timeout` option only bounds the
|
|
157
|
+
// *synchronous* portion of an eval; when the script returns a promise
|
|
158
|
+
// (which ours always does — it's an async IIFE) a never-settling promise
|
|
159
|
+
// would hang here forever (e.g. `await new Promise(() => {})`, or a
|
|
160
|
+
// bound host call that never resolves). Race the eval against a host
|
|
161
|
+
// timer that disposes the isolate, so `timeLimit` bounds total execution.
|
|
162
|
+
let timedOut = false;
|
|
163
|
+
let wallTimer: ReturnType<typeof setTimeout> | undefined;
|
|
156
164
|
try {
|
|
157
165
|
context = await isolate.createContext();
|
|
158
166
|
const jail = context.global;
|
|
@@ -197,12 +205,35 @@ export function createIsolateRuntime(config: {
|
|
|
197
205
|
// (context.eval + promise option handles Reference.apply resolution
|
|
198
206
|
// correctly, unlike compileScript().run())
|
|
199
207
|
const wrapped = `(async () => {\n${code}\n})()`;
|
|
200
|
-
const
|
|
208
|
+
const evalPromise = context.eval(wrapped, {
|
|
201
209
|
filename: "<user-code>",
|
|
202
210
|
promise: true,
|
|
203
211
|
copy: true,
|
|
204
212
|
timeout: config.timeout,
|
|
205
213
|
});
|
|
214
|
+
const rawResult =
|
|
215
|
+
config.timeout > 0
|
|
216
|
+
? await Promise.race([
|
|
217
|
+
evalPromise,
|
|
218
|
+
new Promise((_resolve, reject) => {
|
|
219
|
+
wallTimer = setTimeout(() => {
|
|
220
|
+
timedOut = true;
|
|
221
|
+
// Disposing rejects the pending eval; this reject is the
|
|
222
|
+
// one that wins the race when the promise never settles.
|
|
223
|
+
try {
|
|
224
|
+
isolate.dispose();
|
|
225
|
+
} catch {
|
|
226
|
+
/* already disposed */
|
|
227
|
+
}
|
|
228
|
+
reject(new Error("Execution timed out"));
|
|
229
|
+
}, config.timeout);
|
|
230
|
+
}),
|
|
231
|
+
])
|
|
232
|
+
: await evalPromise;
|
|
233
|
+
if (wallTimer) {
|
|
234
|
+
clearTimeout(wallTimer);
|
|
235
|
+
wallTimer = undefined;
|
|
236
|
+
}
|
|
206
237
|
|
|
207
238
|
// Read captured stdout/stderr from isolate
|
|
208
239
|
const stdout = (await context.eval("__stdout.join('\\n')", { copy: true })) as string;
|
|
@@ -237,6 +268,18 @@ export function createIsolateRuntime(config: {
|
|
|
237
268
|
};
|
|
238
269
|
}
|
|
239
270
|
|
|
271
|
+
if (timedOut) {
|
|
272
|
+
return {
|
|
273
|
+
stdout: "",
|
|
274
|
+
stderr: "",
|
|
275
|
+
error: {
|
|
276
|
+
message: `Execution timed out after ${config.timeout}ms`,
|
|
277
|
+
name: "TimeoutError",
|
|
278
|
+
},
|
|
279
|
+
executionTimeMs: elapsed,
|
|
280
|
+
};
|
|
281
|
+
}
|
|
282
|
+
|
|
240
283
|
// Try to recover stdout/stderr captured before the error
|
|
241
284
|
let stdout = "";
|
|
242
285
|
let stderr = "";
|
|
@@ -258,6 +301,7 @@ export function createIsolateRuntime(config: {
|
|
|
258
301
|
executionTimeMs: elapsed,
|
|
259
302
|
};
|
|
260
303
|
} finally {
|
|
304
|
+
if (wallTimer) clearTimeout(wallTimer);
|
|
261
305
|
if (abortHandler && signal) {
|
|
262
306
|
signal.removeEventListener("abort", abortHandler);
|
|
263
307
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type
|
|
1
|
+
import { getTextContent, type AgentEvent, type Message } from "@poncho-ai/sdk";
|
|
2
2
|
import type { Conversation, ConversationStore, PendingSubagentResult } from "../state.js";
|
|
3
3
|
import type { AgentHarness } from "../harness.js";
|
|
4
4
|
import type { TelemetryEmitter } from "../telemetry.js";
|
|
@@ -28,6 +28,78 @@ import {
|
|
|
28
28
|
STALE_SUBAGENT_THRESHOLD_MS,
|
|
29
29
|
} from "./subagents.js";
|
|
30
30
|
|
|
31
|
+
// ── Subagent result extraction ──
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Pull the human-readable text out of a single assistant message.
|
|
35
|
+
*
|
|
36
|
+
* Beyond the `string | ContentPart[]` shapes `getTextContent` handles, the
|
|
37
|
+
* harness serializes an assistant turn that ALSO made tool calls as a JSON
|
|
38
|
+
* string `{"text":"...","tool_calls":[...]}` (see the run loop's
|
|
39
|
+
* `assistantContent`). A naive `typeof content === "string"` read would hand
|
|
40
|
+
* that raw JSON blob back as the "response"; here we unwrap it to its `.text`.
|
|
41
|
+
*/
|
|
42
|
+
const assistantMessageText = (message: Message): string => {
|
|
43
|
+
const raw = getTextContent(message).trim();
|
|
44
|
+
if (raw.startsWith("{") && raw.includes("\"tool_calls\"")) {
|
|
45
|
+
try {
|
|
46
|
+
const parsed = JSON.parse(raw) as { text?: unknown };
|
|
47
|
+
if (typeof parsed.text === "string") return parsed.text.trim();
|
|
48
|
+
} catch {
|
|
49
|
+
// Not the envelope we expected — fall through to the raw string.
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
return raw;
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Find the last non-empty assistant text in a subagent transcript. Walking
|
|
57
|
+
* backwards (rather than reading only the final message) means a subagent
|
|
58
|
+
* that ended on a tool-call turn still yields the prose it produced just
|
|
59
|
+
* before — instead of surfacing to the parent as an empty result.
|
|
60
|
+
*/
|
|
61
|
+
export const lastAssistantText = (messages: Message[]): string => {
|
|
62
|
+
for (let i = messages.length - 1; i >= 0; i -= 1) {
|
|
63
|
+
if (messages[i].role !== "assistant") continue;
|
|
64
|
+
const text = assistantMessageText(messages[i]);
|
|
65
|
+
if (text) return text;
|
|
66
|
+
}
|
|
67
|
+
return "";
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* The run loop stuffs a synthetic `[Error: ...]` placeholder into the draft /
|
|
72
|
+
* persisted assistant text when a run ends on `run:error` (e.g. a timeout).
|
|
73
|
+
* That placeholder is not real model output — strip it so we don't surface it
|
|
74
|
+
* to the parent as the subagent's "response".
|
|
75
|
+
*/
|
|
76
|
+
export const realResponseText = (text: string | undefined): string => {
|
|
77
|
+
const t = (text ?? "").trim();
|
|
78
|
+
return t.startsWith("[Error:") ? "" : t;
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Build the result text delivered to the parent when a subagent ended
|
|
83
|
+
* abnormally (timeout / error) with no RunResult. We never drop the work it
|
|
84
|
+
* gathered, and the parent is told it didn't finish — e.g. it may not have
|
|
85
|
+
* written its output files — plus how to recover (use what's here, send a
|
|
86
|
+
* write-only follow-up, or read the full transcript).
|
|
87
|
+
*/
|
|
88
|
+
export const abnormalEndResponse = (opts: {
|
|
89
|
+
subagentId: string;
|
|
90
|
+
gathered: string;
|
|
91
|
+
runError?: { code?: string; message?: string };
|
|
92
|
+
}): string => {
|
|
93
|
+
const timedOut = opts.runError?.code === "TIMEOUT";
|
|
94
|
+
const head = timedOut
|
|
95
|
+
? "[Subagent hit its time limit before finishing — it may not have written its output files.]"
|
|
96
|
+
: `[Subagent ended before finishing${opts.runError?.message ? `: ${opts.runError.message}` : ""}.]`;
|
|
97
|
+
const recover = opts.gathered
|
|
98
|
+
? "Partial work it gathered is below — write the files yourself from it, or send a tight write-only follow-up with message_subagent."
|
|
99
|
+
: `Use read_subagent("${opts.subagentId}", mode:"full") to recover what it gathered.`;
|
|
100
|
+
return opts.gathered ? `${head} ${recover}\n\n${opts.gathered}` : `${head} ${recover}`;
|
|
101
|
+
};
|
|
102
|
+
|
|
31
103
|
// ── Types ──
|
|
32
104
|
|
|
33
105
|
export type ActiveConversationRun = {
|
|
@@ -723,6 +795,7 @@ export class AgentOrchestrator {
|
|
|
723
795
|
const draft = createTurnDraftState();
|
|
724
796
|
let latestRunId = "";
|
|
725
797
|
let runResult: { status: "completed" | "error" | "cancelled"; response?: string; steps: number; duration: number; continuation?: boolean; continuationMessages?: Message[] } | undefined;
|
|
798
|
+
let runError: { code?: string; message?: string } | undefined;
|
|
726
799
|
|
|
727
800
|
try {
|
|
728
801
|
const conversation = await this.conversationStore.getWithArchive(childConversationId);
|
|
@@ -872,6 +945,7 @@ export class AgentOrchestrator {
|
|
|
872
945
|
}
|
|
873
946
|
}
|
|
874
947
|
if (event.type === "run:error") {
|
|
948
|
+
runError = { code: event.error.code, message: event.error.message };
|
|
875
949
|
draft.assistantResponse = draft.assistantResponse || `[Error: ${event.error.message}]`;
|
|
876
950
|
}
|
|
877
951
|
await this.eventSink(childConversationId, event);
|
|
@@ -922,7 +996,17 @@ export class AgentOrchestrator {
|
|
|
922
996
|
return;
|
|
923
997
|
}
|
|
924
998
|
|
|
925
|
-
|
|
999
|
+
// No runResult means the run ended on run:error (timeout / model
|
|
1000
|
+
// error) rather than run:completed — flag the subagent accordingly
|
|
1001
|
+
// instead of faking "completed".
|
|
1002
|
+
const abnormalEnd = !runResult;
|
|
1003
|
+
conv.subagentMeta = {
|
|
1004
|
+
...conv.subagentMeta!,
|
|
1005
|
+
status: abnormalEnd ? "error" : "completed",
|
|
1006
|
+
...(abnormalEnd
|
|
1007
|
+
? { error: { code: runError?.code ?? "SUBAGENT_INCOMPLETE", message: runError?.message ?? "subagent ended without a result" } }
|
|
1008
|
+
: {}),
|
|
1009
|
+
};
|
|
926
1010
|
await this.conversationStore.update(conv);
|
|
927
1011
|
}
|
|
928
1012
|
|
|
@@ -933,21 +1017,36 @@ export class AgentOrchestrator {
|
|
|
933
1017
|
conversationId: childConversationId,
|
|
934
1018
|
});
|
|
935
1019
|
|
|
936
|
-
|
|
937
|
-
|
|
1020
|
+
// Recover the subagent's real output: prefer the run response, then the
|
|
1021
|
+
// streamed draft, then walk the transcript — discarding the synthetic
|
|
1022
|
+
// "[Error: ...]" placeholder at each step.
|
|
1023
|
+
let gathered = realResponseText(runResult?.response) || realResponseText(draft.assistantResponse);
|
|
1024
|
+
if (!gathered) {
|
|
938
1025
|
const freshSubConv = await this.conversationStore.get(childConversationId);
|
|
939
|
-
if (freshSubConv)
|
|
940
|
-
const lastAssistant = [...freshSubConv.messages].reverse().find(m => m.role === "assistant");
|
|
941
|
-
if (lastAssistant && typeof lastAssistant.content === "string") {
|
|
942
|
-
subagentResponse = lastAssistant.content;
|
|
943
|
-
}
|
|
944
|
-
}
|
|
1026
|
+
if (freshSubConv) gathered = realResponseText(lastAssistantText(freshSubConv.messages));
|
|
945
1027
|
}
|
|
1028
|
+
|
|
1029
|
+
// On an abnormal end (timeout / error) there is no runResult; don't drop
|
|
1030
|
+
// the work — deliver what it gathered, tagged so the parent knows it
|
|
1031
|
+
// didn't finish, and build a result so it never renders as "(no result)".
|
|
1032
|
+
const abnormal = !runResult;
|
|
1033
|
+
const subagentResponse = abnormal
|
|
1034
|
+
? abnormalEndResponse({ subagentId: childConversationId, gathered, runError })
|
|
1035
|
+
: gathered;
|
|
946
1036
|
const pendingResult: PendingSubagentResult = {
|
|
947
1037
|
subagentId: childConversationId,
|
|
948
1038
|
task,
|
|
949
|
-
status: "completed",
|
|
950
|
-
result:
|
|
1039
|
+
status: abnormal ? "error" : "completed",
|
|
1040
|
+
result: {
|
|
1041
|
+
status: runResult?.status ?? "error",
|
|
1042
|
+
response: subagentResponse,
|
|
1043
|
+
steps: runResult?.steps ?? 0,
|
|
1044
|
+
tokens: { input: 0, output: 0, cached: 0 },
|
|
1045
|
+
duration: runResult?.duration ?? 0,
|
|
1046
|
+
},
|
|
1047
|
+
...(abnormal
|
|
1048
|
+
? { error: { code: runError?.code ?? "SUBAGENT_INCOMPLETE", message: runError?.message ?? "subagent ended without a result" } }
|
|
1049
|
+
: {}),
|
|
951
1050
|
timestamp: Date.now(),
|
|
952
1051
|
};
|
|
953
1052
|
await this.conversationStore.appendSubagentResult(parentConversationId, pendingResult);
|
|
@@ -1040,8 +1139,16 @@ export class AgentOrchestrator {
|
|
|
1040
1139
|
conversation.subagentCallbackCount = callbackCount;
|
|
1041
1140
|
|
|
1042
1141
|
for (const pr of pendingResults) {
|
|
1142
|
+
// An empty response is recoverable, not a dead end: the subagent's work
|
|
1143
|
+
// lives in its transcript even when it produced no closing summary (e.g.
|
|
1144
|
+
// it ran out of steps mid-task). Hand the parent an actionable pointer
|
|
1145
|
+
// instead of a silent "(no response)" it can't act on.
|
|
1146
|
+
const responseText = (pr.result?.response ?? "").trim();
|
|
1147
|
+
const responseLine = responseText
|
|
1148
|
+
|| `(subagent produced no final summary after ${pr.result?.steps ?? 0} step(s); its work may be incomplete. `
|
|
1149
|
+
+ `Call read_subagent with subagent_id "${pr.subagentId}" and mode "assistant" to retrieve what it did.)`;
|
|
1043
1150
|
const resultBody = pr.result
|
|
1044
|
-
? `Status: ${pr.result.status}\nResponse: ${
|
|
1151
|
+
? `Status: ${pr.result.status}\nResponse: ${responseLine}\nSteps: ${pr.result.steps}, Duration: ${pr.result.duration}ms`
|
|
1045
1152
|
: pr.error
|
|
1046
1153
|
? `Error: ${pr.error.message}`
|
|
1047
1154
|
: "(no result)";
|
|
@@ -1227,7 +1334,8 @@ export class AgentOrchestrator {
|
|
|
1227
1334
|
this.activeSubagentRuns.set(conversationId, { abortController: childAbortController, harness: childHarness, parentConversationId });
|
|
1228
1335
|
|
|
1229
1336
|
const draft = createTurnDraftState();
|
|
1230
|
-
let runResult: { status:
|
|
1337
|
+
let runResult: { status: "completed" | "error" | "cancelled"; response?: string; steps: number; duration: number; continuation?: boolean; continuationMessages?: Message[] } | undefined;
|
|
1338
|
+
let runError: { code?: string; message?: string } | undefined;
|
|
1231
1339
|
|
|
1232
1340
|
try {
|
|
1233
1341
|
const recallParams = this.hooks?.buildRecallParams?.({ ownerId, tenantId: conversation.tenantId, excludeConversationId: conversationId }) ?? {};
|
|
@@ -1262,6 +1370,7 @@ export class AgentOrchestrator {
|
|
|
1262
1370
|
}
|
|
1263
1371
|
}
|
|
1264
1372
|
if (event.type === "run:error") {
|
|
1373
|
+
runError = { code: event.error.code, message: event.error.message };
|
|
1265
1374
|
draft.assistantResponse = draft.assistantResponse || `[Error: ${event.error.message}]`;
|
|
1266
1375
|
}
|
|
1267
1376
|
await this.eventSink(conversationId, event);
|
|
@@ -1311,7 +1420,14 @@ export class AgentOrchestrator {
|
|
|
1311
1420
|
return;
|
|
1312
1421
|
}
|
|
1313
1422
|
|
|
1314
|
-
|
|
1423
|
+
const abnormalEnd = !runResult;
|
|
1424
|
+
conv.subagentMeta = {
|
|
1425
|
+
...conv.subagentMeta!,
|
|
1426
|
+
status: abnormalEnd ? "error" : "completed",
|
|
1427
|
+
...(abnormalEnd
|
|
1428
|
+
? { error: { code: runError?.code ?? "SUBAGENT_INCOMPLETE", message: runError?.message ?? "subagent ended without a result" } }
|
|
1429
|
+
: {}),
|
|
1430
|
+
};
|
|
1315
1431
|
await this.conversationStore.update(conv);
|
|
1316
1432
|
}
|
|
1317
1433
|
|
|
@@ -1322,24 +1438,26 @@ export class AgentOrchestrator {
|
|
|
1322
1438
|
conversationId,
|
|
1323
1439
|
});
|
|
1324
1440
|
|
|
1325
|
-
let
|
|
1326
|
-
if (!
|
|
1441
|
+
let gathered = realResponseText(runResult?.response) || realResponseText(draft.assistantResponse);
|
|
1442
|
+
if (!gathered) {
|
|
1327
1443
|
const freshSubConv = await this.conversationStore.get(conversationId);
|
|
1328
|
-
if (freshSubConv)
|
|
1329
|
-
const lastAssistant = [...freshSubConv.messages].reverse().find(m => m.role === "assistant");
|
|
1330
|
-
if (lastAssistant) {
|
|
1331
|
-
subagentResponse = typeof lastAssistant.content === "string" ? lastAssistant.content : "";
|
|
1332
|
-
}
|
|
1333
|
-
}
|
|
1444
|
+
if (freshSubConv) gathered = realResponseText(lastAssistantText(freshSubConv.messages));
|
|
1334
1445
|
}
|
|
1446
|
+
const abnormal = !runResult;
|
|
1447
|
+
const subagentResponse = abnormal
|
|
1448
|
+
? abnormalEndResponse({ subagentId: conversationId, gathered, runError })
|
|
1449
|
+
: gathered;
|
|
1335
1450
|
|
|
1336
1451
|
const parentConv = await this.conversationStore.get(parentConversationId);
|
|
1337
1452
|
if (parentConv) {
|
|
1338
1453
|
const result: PendingSubagentResult = {
|
|
1339
1454
|
subagentId: conversationId,
|
|
1340
1455
|
task,
|
|
1341
|
-
status: "completed",
|
|
1342
|
-
result: { status: "
|
|
1456
|
+
status: abnormal ? "error" : "completed",
|
|
1457
|
+
result: { status: runResult?.status ?? "error", response: subagentResponse, steps: runResult?.steps ?? 0, tokens: { input: 0, output: 0, cached: 0 }, duration: runResult?.duration ?? 0 },
|
|
1458
|
+
...(abnormal
|
|
1459
|
+
? { error: { code: runError?.code ?? "SUBAGENT_INCOMPLETE", message: runError?.message ?? "subagent ended without a result" } }
|
|
1460
|
+
: {}),
|
|
1343
1461
|
timestamp: Date.now(),
|
|
1344
1462
|
};
|
|
1345
1463
|
await this.conversationStore.appendSubagentResult(parentConversationId, result);
|