@oh-my-pi/pi-coding-agent 15.9.5 → 15.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +98 -1
- package/dist/types/cli/args.d.ts +1 -1
- package/dist/types/cli/gallery-cli.d.ts +43 -0
- package/dist/types/cli/gallery-fixtures/agentic.d.ts +2 -0
- package/dist/types/cli/gallery-fixtures/codeintel.d.ts +3 -0
- package/dist/types/cli/gallery-fixtures/edit.d.ts +3 -0
- package/dist/types/cli/gallery-fixtures/fs.d.ts +2 -0
- package/dist/types/cli/gallery-fixtures/index.d.ts +4 -0
- package/dist/types/cli/gallery-fixtures/interaction.d.ts +3 -0
- package/dist/types/cli/gallery-fixtures/memory.d.ts +2 -0
- package/dist/types/cli/gallery-fixtures/misc.d.ts +3 -0
- package/dist/types/cli/gallery-fixtures/search.d.ts +3 -0
- package/dist/types/cli/gallery-fixtures/shell.d.ts +3 -0
- package/dist/types/cli/gallery-fixtures/types.d.ts +44 -0
- package/dist/types/cli/gallery-fixtures/web.d.ts +2 -0
- package/dist/types/cli/gallery-screenshot.d.ts +35 -0
- package/dist/types/commands/gallery.d.ts +47 -0
- package/dist/types/config/keybindings.d.ts +10 -2
- package/dist/types/config/model-id-affixes.d.ts +2 -0
- package/dist/types/config/model-registry.d.ts +8 -1
- package/dist/types/config/settings-schema.d.ts +43 -7
- package/dist/types/edit/file-snapshot-store.d.ts +1 -1
- package/dist/types/eval/backend.d.ts +6 -6
- package/dist/types/eval/bridge-timeout.d.ts +27 -0
- package/dist/types/eval/idle-timeout.d.ts +16 -14
- package/dist/types/eval/js/executor.d.ts +3 -3
- package/dist/types/eval/py/executor.d.ts +2 -2
- package/dist/types/eval/py/spawn-options.d.ts +58 -0
- package/dist/types/extensibility/plugins/marketplace-auto-update.d.ts +8 -0
- package/dist/types/lsp/types.d.ts +10 -0
- package/dist/types/main.d.ts +3 -2
- package/dist/types/memory-backend/index.d.ts +2 -1
- package/dist/types/memory-backend/resolve.d.ts +1 -1
- package/dist/types/memory-backend/types.d.ts +1 -1
- package/dist/types/modes/components/assistant-message.d.ts +5 -0
- package/dist/types/modes/components/copy-selector.d.ts +22 -0
- package/dist/types/modes/components/custom-editor.d.ts +2 -1
- package/dist/types/modes/components/model-selector.d.ts +1 -0
- package/dist/types/modes/components/tool-execution.d.ts +18 -0
- package/dist/types/modes/controllers/command-controller.d.ts +0 -1
- package/dist/types/modes/controllers/selector-controller.d.ts +2 -1
- package/dist/types/modes/index.d.ts +5 -4
- package/dist/types/modes/interactive-mode.d.ts +2 -2
- package/dist/types/modes/setup-version.d.ts +11 -0
- package/dist/types/modes/setup-wizard/index.d.ts +2 -1
- package/dist/types/modes/setup-wizard/scenes/web-search.d.ts +2 -1
- package/dist/types/modes/types.d.ts +2 -2
- package/dist/types/modes/utils/copy-targets.d.ts +53 -0
- package/dist/types/sdk.d.ts +1 -1
- package/dist/types/task/executor.d.ts +7 -0
- package/dist/types/telemetry-export.d.ts +1 -1
- package/dist/types/tools/eval-render.d.ts +1 -0
- package/dist/types/tools/fetch.d.ts +15 -7
- package/dist/types/tools/render-utils.d.ts +33 -0
- package/dist/types/tools/renderers.d.ts +16 -2
- package/dist/types/tools/search.d.ts +1 -1
- package/dist/types/tools/write.d.ts +2 -0
- package/dist/types/tui/code-cell.d.ts +6 -0
- package/dist/types/tui/output-block.d.ts +11 -0
- package/dist/types/web/scrapers/github.d.ts +22 -0
- package/dist/types/web/search/providers/perplexity.d.ts +8 -1
- package/dist/types/web/search/types.d.ts +1 -1
- package/package.json +9 -9
- package/scripts/dev-launch +42 -0
- package/scripts/dev-launch-preload.ts +19 -0
- package/src/autoresearch/dashboard.ts +11 -21
- package/src/cli/args.ts +2 -2
- package/src/cli/claude-trace-cli.ts +13 -1
- package/src/cli/gallery-cli.ts +223 -0
- package/src/cli/gallery-fixtures/agentic.ts +292 -0
- package/src/cli/gallery-fixtures/codeintel.ts +188 -0
- package/src/cli/gallery-fixtures/edit.ts +194 -0
- package/src/cli/gallery-fixtures/fs.ts +153 -0
- package/src/cli/gallery-fixtures/index.ts +40 -0
- package/src/cli/gallery-fixtures/interaction.ts +49 -0
- package/src/cli/gallery-fixtures/memory.ts +81 -0
- package/src/cli/gallery-fixtures/misc.ts +221 -0
- package/src/cli/gallery-fixtures/search.ts +213 -0
- package/src/cli/gallery-fixtures/shell.ts +167 -0
- package/src/cli/gallery-fixtures/types.ts +41 -0
- package/src/cli/gallery-fixtures/web.ts +158 -0
- package/src/cli/gallery-screenshot.ts +279 -0
- package/src/cli-commands.ts +1 -0
- package/src/commands/gallery.ts +52 -0
- package/src/commands/launch.ts +1 -1
- package/src/config/keybindings.ts +68 -2
- package/src/config/model-equivalence.ts +35 -12
- package/src/config/model-id-affixes.ts +39 -22
- package/src/config/model-registry.ts +16 -16
- package/src/config/settings-schema.ts +29 -6
- package/src/config/settings.ts +11 -0
- package/src/dap/client.ts +14 -16
- package/src/debug/raw-sse.ts +18 -4
- package/src/edit/file-snapshot-store.ts +1 -1
- package/src/edit/index.ts +1 -1
- package/src/edit/renderer.ts +43 -55
- package/src/edit/streaming.ts +1 -1
- package/src/eval/__tests__/agent-bridge.test.ts +102 -58
- package/src/eval/__tests__/bridge-timeout.test.ts +64 -0
- package/src/eval/__tests__/idle-timeout.test.ts +26 -12
- package/src/eval/__tests__/kernel-spawn.test.ts +103 -0
- package/src/eval/__tests__/llm-bridge.test.ts +10 -10
- package/src/eval/agent-bridge.ts +38 -12
- package/src/eval/backend.ts +6 -6
- package/src/eval/bridge-timeout.ts +44 -0
- package/src/eval/idle-timeout.ts +33 -15
- package/src/eval/js/executor.ts +10 -10
- package/src/eval/llm-bridge.ts +4 -5
- package/src/eval/py/executor.ts +6 -6
- package/src/eval/py/kernel.ts +11 -1
- package/src/eval/py/spawn-options.ts +126 -0
- package/src/export/ttsr.ts +9 -0
- package/src/extensibility/extensions/runner.ts +3 -0
- package/src/extensibility/plugins/doctor.ts +0 -1
- package/src/extensibility/plugins/marketplace-auto-update.ts +49 -0
- package/src/goals/tools/goal-tool.ts +2 -2
- package/src/internal-urls/docs-index.generated.ts +7 -6
- package/src/lsp/client.ts +179 -52
- package/src/lsp/index.ts +38 -4
- package/src/lsp/render.ts +3 -3
- package/src/lsp/types.ts +10 -0
- package/src/main.ts +47 -52
- package/src/memory-backend/index.ts +13 -1
- package/src/memory-backend/resolve.ts +3 -5
- package/src/memory-backend/types.ts +1 -1
- package/src/modes/components/agent-dashboard.ts +13 -4
- package/src/modes/components/assistant-message.ts +22 -1
- package/src/modes/components/copy-selector.ts +249 -0
- package/src/modes/components/custom-editor.ts +10 -1
- package/src/modes/components/extensions/extension-list.ts +17 -8
- package/src/modes/components/history-search.ts +19 -11
- package/src/modes/components/model-selector.ts +125 -29
- package/src/modes/components/oauth-selector.ts +28 -12
- package/src/modes/components/session-observer-overlay.ts +13 -15
- package/src/modes/components/session-selector.ts +24 -13
- package/src/modes/components/status-line.ts +3 -5
- package/src/modes/components/tool-execution.ts +83 -24
- package/src/modes/components/tree-selector.ts +19 -7
- package/src/modes/components/user-message-selector.ts +25 -14
- package/src/modes/controllers/command-controller.ts +13 -118
- package/src/modes/controllers/event-controller.ts +26 -10
- package/src/modes/controllers/input-controller.ts +11 -3
- package/src/modes/controllers/selector-controller.ts +40 -3
- package/src/modes/index.ts +5 -4
- package/src/modes/interactive-mode.ts +21 -7
- package/src/modes/setup-version.ts +11 -0
- package/src/modes/setup-wizard/index.ts +3 -2
- package/src/modes/setup-wizard/scenes/web-search.ts +3 -2
- package/src/modes/theme/theme.ts +46 -10
- package/src/modes/types.ts +2 -2
- package/src/modes/utils/context-usage.ts +10 -6
- package/src/modes/utils/copy-targets.ts +254 -0
- package/src/modes/utils/hotkeys-markdown.ts +1 -0
- package/src/prompts/tools/ast-edit.md +1 -1
- package/src/prompts/tools/ast-grep.md +1 -1
- package/src/prompts/tools/read.md +1 -1
- package/src/prompts/tools/search.md +1 -1
- package/src/sdk.ts +21 -23
- package/src/session/agent-session.ts +13 -9
- package/src/slash-commands/builtin-registry.ts +4 -12
- package/src/slash-commands/helpers/usage-report.ts +2 -0
- package/src/task/executor.ts +20 -2
- package/src/task/render.ts +37 -11
- package/src/telemetry-export.ts +25 -7
- package/src/tools/bash.ts +18 -8
- package/src/tools/browser/render.ts +5 -4
- package/src/tools/debug.ts +3 -3
- package/src/tools/eval-backends.ts +6 -17
- package/src/tools/eval-render.ts +28 -10
- package/src/tools/eval.ts +19 -23
- package/src/tools/fetch.ts +99 -89
- package/src/tools/read.ts +7 -7
- package/src/tools/render-utils.ts +63 -3
- package/src/tools/renderers.ts +16 -1
- package/src/tools/report-tool-issue.ts +1 -1
- package/src/tools/search.ts +173 -81
- package/src/tools/ssh.ts +21 -8
- package/src/tools/todo.ts +20 -7
- package/src/tools/write.ts +39 -9
- package/src/tui/code-cell.ts +19 -4
- package/src/tui/output-block.ts +14 -0
- package/src/web/scrapers/github.ts +255 -3
- package/src/web/scrapers/youtube.ts +3 -2
- package/src/web/search/providers/perplexity.ts +199 -51
- package/src/web/search/render.ts +42 -57
- package/src/web/search/types.ts +5 -1
- package/dist/types/eval/heartbeat.d.ts +0 -45
- package/src/eval/__tests__/heartbeat.test.ts +0 -84
- package/src/eval/__tests__/shared-executors.test.ts +0 -609
- package/src/eval/heartbeat.ts +0 -74
- /package/dist/types/eval/__tests__/{heartbeat.test.d.ts → bridge-timeout.test.d.ts} +0 -0
- /package/dist/types/eval/__tests__/{shared-executors.test.d.ts → kernel-spawn.test.d.ts} +0 -0
|
@@ -10,7 +10,7 @@ import { AgentOutputManager } from "../../task/output-manager";
|
|
|
10
10
|
import type { AgentDefinition, AgentProgress, SingleResult } from "../../task/types";
|
|
11
11
|
import type { ToolSession } from "../../tools";
|
|
12
12
|
import { EVAL_AGENT_MAX_DEPTH, runEvalAgent } from "../agent-bridge";
|
|
13
|
-
import {
|
|
13
|
+
import { EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP } from "../bridge-timeout";
|
|
14
14
|
import { IdleTimeout } from "../idle-timeout";
|
|
15
15
|
import { disposeAllVmContexts } from "../js/context-manager";
|
|
16
16
|
import { executeJs } from "../js/executor";
|
|
@@ -231,12 +231,62 @@ describe("runEvalAgent", () => {
|
|
|
231
231
|
});
|
|
232
232
|
await expect(runEvalAgent({ prompt: "fail" }, { session: makeSession() })).rejects.toThrow("boom");
|
|
233
233
|
});
|
|
234
|
+
|
|
235
|
+
// Regression: a runtime-limit abort returns exitCode=1, stderr="", error=undefined,
|
|
236
|
+
// aborted=true, abortReason="Subagent runtime limit exceeded (...)". The previous
|
|
237
|
+
// failure-message coalesce stopped at the empty `stderr` (since `??` only skips
|
|
238
|
+
// nullish values) and shipped an empty error through the bridge — Python then
|
|
239
|
+
// surfaced the generic `bridge call '__agent__' failed`. See #2006.
|
|
240
|
+
it("surfaces abortReason for aborts that leave stderr empty", async () => {
|
|
241
|
+
mockAgents();
|
|
242
|
+
const runSpy = vi.spyOn(taskExecutor, "runSubprocess");
|
|
243
|
+
runSpy.mockImplementationOnce(async options =>
|
|
244
|
+
singleResult(options, {
|
|
245
|
+
exitCode: 1,
|
|
246
|
+
output: "",
|
|
247
|
+
stderr: "",
|
|
248
|
+
error: undefined,
|
|
249
|
+
aborted: true,
|
|
250
|
+
abortReason: "Subagent runtime limit exceeded (task.maxRuntimeMs=900000)",
|
|
251
|
+
}),
|
|
252
|
+
);
|
|
253
|
+
runSpy.mockImplementationOnce(async options =>
|
|
254
|
+
singleResult(options, {
|
|
255
|
+
exitCode: 1,
|
|
256
|
+
output: "",
|
|
257
|
+
stderr: " ",
|
|
258
|
+
error: " ",
|
|
259
|
+
aborted: true,
|
|
260
|
+
abortReason: "Cancelled by caller",
|
|
261
|
+
}),
|
|
262
|
+
);
|
|
263
|
+
runSpy.mockImplementationOnce(async options =>
|
|
264
|
+
singleResult(options, {
|
|
265
|
+
exitCode: 1,
|
|
266
|
+
output: "",
|
|
267
|
+
stderr: "",
|
|
268
|
+
error: undefined,
|
|
269
|
+
}),
|
|
270
|
+
);
|
|
271
|
+
|
|
272
|
+
await expect(runEvalAgent({ prompt: "slow" }, { session: makeSession() })).rejects.toThrow(
|
|
273
|
+
"Subagent runtime limit exceeded (task.maxRuntimeMs=900000)",
|
|
274
|
+
);
|
|
275
|
+
// Whitespace-only stderr/error must not mask abortReason either.
|
|
276
|
+
await expect(runEvalAgent({ prompt: "cancelled" }, { session: makeSession() })).rejects.toThrow(
|
|
277
|
+
"Cancelled by caller",
|
|
278
|
+
);
|
|
279
|
+
// Last resort: still produce a non-empty message even when nothing useful is set,
|
|
280
|
+
// so Python never falls back to `bridge call '__agent__' failed`.
|
|
281
|
+
await expect(runEvalAgent({ prompt: "blank" }, { session: makeSession() })).rejects.toThrow(
|
|
282
|
+
"agent() subagent 'task' failed.",
|
|
283
|
+
);
|
|
284
|
+
});
|
|
234
285
|
});
|
|
235
286
|
|
|
236
287
|
describe("agent() through eval runtimes", () => {
|
|
237
288
|
afterEach(() => {
|
|
238
289
|
vi.restoreAllMocks();
|
|
239
|
-
setBridgeHeartbeatIntervalMs();
|
|
240
290
|
});
|
|
241
291
|
|
|
242
292
|
afterAll(async () => {
|
|
@@ -327,18 +377,6 @@ describe("agent() through eval runtimes", () => {
|
|
|
327
377
|
singleResult(options, { output: "hello from python" }),
|
|
328
378
|
);
|
|
329
379
|
|
|
330
|
-
const probe = await executePython('print("probe")', {
|
|
331
|
-
cwd: tempDir.path(),
|
|
332
|
-
sessionId: `${sessionId}:probe`,
|
|
333
|
-
sessionFile,
|
|
334
|
-
kernelMode: "per-call",
|
|
335
|
-
});
|
|
336
|
-
if (probe.exitCode === undefined && probe.cancelled) {
|
|
337
|
-
expect(probe.output).toBe("");
|
|
338
|
-
return;
|
|
339
|
-
}
|
|
340
|
-
expect(probe.exitCode).toBe(0);
|
|
341
|
-
|
|
342
380
|
const result = await executePython('print(agent("hi"))', {
|
|
343
381
|
cwd: tempDir.path(),
|
|
344
382
|
sessionId,
|
|
@@ -346,6 +384,10 @@ describe("agent() through eval runtimes", () => {
|
|
|
346
384
|
kernelMode: "per-call",
|
|
347
385
|
toolSession: session,
|
|
348
386
|
});
|
|
387
|
+
if (result.exitCode === undefined && result.cancelled) {
|
|
388
|
+
expect(result.output).toBe("");
|
|
389
|
+
return; // kernel unavailable in this environment
|
|
390
|
+
}
|
|
349
391
|
|
|
350
392
|
expect(result.exitCode).toBe(0);
|
|
351
393
|
expect(result.output.trim()).toBe("hello from python");
|
|
@@ -374,22 +416,14 @@ describe("agent() through eval runtimes", () => {
|
|
|
374
416
|
}
|
|
375
417
|
});
|
|
376
418
|
|
|
377
|
-
const probe = await executePython('print("probe")', {
|
|
378
|
-
cwd: tempDir.path(),
|
|
379
|
-
sessionId: `${sessionId}:probe`,
|
|
380
|
-
sessionFile,
|
|
381
|
-
kernelMode: "per-call",
|
|
382
|
-
});
|
|
383
|
-
if (probe.exitCode === undefined && probe.cancelled) {
|
|
384
|
-
expect(probe.output).toBe("");
|
|
385
|
-
return;
|
|
386
|
-
}
|
|
387
|
-
expect(probe.exitCode).toBe(0);
|
|
388
|
-
|
|
389
419
|
const result = await executePython(
|
|
390
420
|
'import json\nprint(json.dumps(parallel([lambda n=n: agent(n) for n in ["a", "b", "c", "d"]])))',
|
|
391
421
|
{ cwd: tempDir.path(), sessionId, sessionFile, kernelMode: "per-call", toolSession: session },
|
|
392
422
|
);
|
|
423
|
+
if (result.exitCode === undefined && result.cancelled) {
|
|
424
|
+
expect(result.output).toBe("");
|
|
425
|
+
return; // kernel unavailable in this environment
|
|
426
|
+
}
|
|
393
427
|
|
|
394
428
|
expect(result.exitCode).toBe(0);
|
|
395
429
|
expect(JSON.parse(result.output.trim())).toEqual(["a", "b", "c", "d"]);
|
|
@@ -413,7 +447,14 @@ describe("agent() through eval runtimes", () => {
|
|
|
413
447
|
// The host must respond the instant the cell aborts so the kernel can
|
|
414
448
|
// unwind via KeyboardInterrupt instead of being hard-killed (which used to
|
|
415
449
|
// surface "[kernel] Python kernel shutdown" and lose all session state).
|
|
450
|
+
let inFlight = 0;
|
|
451
|
+
let markSaturated: (() => void) | undefined;
|
|
452
|
+
const saturated = new Promise<void>(resolve => {
|
|
453
|
+
markSaturated = resolve;
|
|
454
|
+
});
|
|
416
455
|
vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options => {
|
|
456
|
+
// task.maxConcurrency=6 → six bridge calls block at once; signal then.
|
|
457
|
+
if (++inFlight >= 6) markSaturated?.();
|
|
417
458
|
await Bun.sleep(9000); // deliberately ignores options.signal
|
|
418
459
|
return singleResult(options, { output: options.assignment ?? "" });
|
|
419
460
|
});
|
|
@@ -433,8 +474,9 @@ describe("agent() through eval runtimes", () => {
|
|
|
433
474
|
expect(seed.exitCode).toBe(0);
|
|
434
475
|
|
|
435
476
|
const ac = new AbortController();
|
|
436
|
-
// Abort
|
|
437
|
-
|
|
477
|
+
// Abort the instant all six worker threads are confirmed blocked in their
|
|
478
|
+
// bridge calls (condition-driven) instead of waiting a fixed wall second.
|
|
479
|
+
void saturated.then(() => ac.abort(new Error("external interrupt")));
|
|
438
480
|
|
|
439
481
|
const start = Date.now();
|
|
440
482
|
const result = await executePython(
|
|
@@ -560,52 +602,52 @@ describe("agent() through eval runtimes", () => {
|
|
|
560
602
|
expect(displayAgentEvents.length).toBe(2);
|
|
561
603
|
});
|
|
562
604
|
|
|
563
|
-
it("
|
|
564
|
-
using tempDir = TempDir.createSync("@omp-eval-agent-
|
|
565
|
-
const { session } = makeEvalSession(tempDir, "js-agent-
|
|
605
|
+
it("pauses the idle watchdog while a quiet agent() runs past the budget", async () => {
|
|
606
|
+
using tempDir = TempDir.createSync("@omp-eval-agent-timeout-pause-");
|
|
607
|
+
const { session } = makeEvalSession(tempDir, "js-agent-timeout-pause");
|
|
566
608
|
mockAgents();
|
|
567
|
-
// Heartbeat cadence well under the idle budget so a working-but-silent
|
|
568
|
-
// subagent re-arms the watchdog several times before it could expire.
|
|
569
|
-
setBridgeHeartbeatIntervalMs(15);
|
|
570
609
|
|
|
571
|
-
// runSubprocess runs far past the budget and emits NO progress
|
|
572
|
-
//
|
|
573
|
-
//
|
|
610
|
+
// runSubprocess runs far past the eval timeout budget and emits NO progress
|
|
611
|
+
// of its own. The bridge pause must make that delegated time invisible to
|
|
612
|
+
// the watchdog.
|
|
574
613
|
vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options => {
|
|
575
|
-
await Bun.sleep(
|
|
614
|
+
await Bun.sleep(40);
|
|
576
615
|
return singleResult(options, { output: "done" });
|
|
577
616
|
});
|
|
578
617
|
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
using idle = new IdleTimeout(60);
|
|
618
|
+
const ops: string[] = [];
|
|
619
|
+
using idle = new IdleTimeout(20);
|
|
582
620
|
const result = await runEvalAgent(
|
|
583
621
|
{ prompt: "investigate" },
|
|
584
622
|
{
|
|
585
623
|
session,
|
|
586
624
|
signal: idle.signal,
|
|
587
625
|
emitStatus: event => {
|
|
588
|
-
|
|
626
|
+
ops.push(event.op);
|
|
627
|
+
if (event.op === EVAL_TIMEOUT_PAUSE_OP) idle.pause();
|
|
628
|
+
if (event.op === EVAL_TIMEOUT_RESUME_OP) idle.resume();
|
|
589
629
|
},
|
|
590
630
|
},
|
|
591
631
|
);
|
|
592
632
|
|
|
593
|
-
expect(idle.signal.aborted).toBe(false);
|
|
594
633
|
expect(result.text).toBe("done");
|
|
634
|
+
expect(ops).toEqual([EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP]);
|
|
635
|
+
expect(idle.signal.aborted).toBe(false);
|
|
636
|
+
|
|
637
|
+
await Bun.sleep(60);
|
|
638
|
+
expect(idle.signal.aborted).toBe(true);
|
|
595
639
|
});
|
|
596
640
|
|
|
597
|
-
it("
|
|
598
|
-
using tempDir = TempDir.createSync("@omp-eval-agent-progress-
|
|
599
|
-
const { session } = makeEvalSession(tempDir, "js-agent-progress-
|
|
641
|
+
it("keeps timeout paused despite agent() progress snapshots", async () => {
|
|
642
|
+
using tempDir = TempDir.createSync("@omp-eval-agent-progress-timeout-pause-");
|
|
643
|
+
const { session } = makeEvalSession(tempDir, "js-agent-progress-timeout-pause");
|
|
600
644
|
mockAgents();
|
|
601
|
-
// Heartbeat slower than the budget: only the immediate beat at call start
|
|
602
|
-
// fires, so after the budget elapses nothing re-arms the watchdog.
|
|
603
|
-
setBridgeHeartbeatIntervalMs(10_000);
|
|
604
645
|
|
|
605
646
|
// Stream frequent progress snapshots (op:"agent") for well past the budget.
|
|
606
|
-
//
|
|
647
|
+
// They render as status, but timeout accounting is controlled only by the
|
|
648
|
+
// bridge pause/resume events.
|
|
607
649
|
vi.spyOn(taskExecutor, "runSubprocess").mockImplementation(async options => {
|
|
608
|
-
for (let i = 0; i <
|
|
650
|
+
for (let i = 0; i < 20; i++) {
|
|
609
651
|
options.onProgress?.({
|
|
610
652
|
index: options.index,
|
|
611
653
|
id: options.id,
|
|
@@ -622,28 +664,30 @@ describe("agent() through eval runtimes", () => {
|
|
|
622
664
|
cost: 0,
|
|
623
665
|
durationMs: i * 10,
|
|
624
666
|
});
|
|
625
|
-
await Bun.sleep(
|
|
667
|
+
await Bun.sleep(5);
|
|
626
668
|
}
|
|
627
669
|
return singleResult(options, { output: "done" });
|
|
628
670
|
});
|
|
629
671
|
|
|
630
672
|
const ops: string[] = [];
|
|
631
|
-
using idle = new IdleTimeout(
|
|
632
|
-
await runEvalAgent(
|
|
673
|
+
using idle = new IdleTimeout(40);
|
|
674
|
+
const result = await runEvalAgent(
|
|
633
675
|
{ prompt: "investigate" },
|
|
634
676
|
{
|
|
635
677
|
session,
|
|
636
678
|
signal: idle.signal,
|
|
637
679
|
emitStatus: event => {
|
|
638
680
|
ops.push(event.op);
|
|
639
|
-
if (event.op ===
|
|
681
|
+
if (event.op === EVAL_TIMEOUT_PAUSE_OP) idle.pause();
|
|
682
|
+
if (event.op === EVAL_TIMEOUT_RESUME_OP) idle.resume();
|
|
640
683
|
},
|
|
641
684
|
},
|
|
642
685
|
);
|
|
643
686
|
|
|
644
|
-
|
|
645
|
-
|
|
687
|
+
expect(result.text).toBe("done");
|
|
688
|
+
expect(ops[0]).toBe(EVAL_TIMEOUT_PAUSE_OP);
|
|
646
689
|
expect(ops).toContain("agent");
|
|
647
|
-
expect(
|
|
690
|
+
expect(ops.at(-1)).toBe(EVAL_TIMEOUT_RESUME_OP);
|
|
691
|
+
expect(idle.signal.aborted).toBe(false);
|
|
648
692
|
});
|
|
649
693
|
});
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import { describe, expect, it } from "bun:test";
|
|
2
|
+
import {
|
|
3
|
+
EVAL_TIMEOUT_PAUSE_OP,
|
|
4
|
+
EVAL_TIMEOUT_RESUME_OP,
|
|
5
|
+
isEvalTimeoutControlEvent,
|
|
6
|
+
withBridgeTimeoutPause,
|
|
7
|
+
} from "../bridge-timeout";
|
|
8
|
+
import type { JsStatusEvent } from "../js/shared/types";
|
|
9
|
+
|
|
10
|
+
describe("withBridgeTimeoutPause", () => {
|
|
11
|
+
it("emits one pause before the operation and one resume after it settles", async () => {
|
|
12
|
+
const events: JsStatusEvent[] = [];
|
|
13
|
+
|
|
14
|
+
const value = await withBridgeTimeoutPause(
|
|
15
|
+
event => events.push(event),
|
|
16
|
+
async () => {
|
|
17
|
+
await Bun.sleep(80);
|
|
18
|
+
return "done";
|
|
19
|
+
},
|
|
20
|
+
);
|
|
21
|
+
|
|
22
|
+
expect(value).toBe("done");
|
|
23
|
+
expect(events.map(event => event.op)).toEqual([EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP]);
|
|
24
|
+
|
|
25
|
+
const settledCount = events.length;
|
|
26
|
+
await Bun.sleep(40);
|
|
27
|
+
expect(events.length).toBe(settledCount);
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
it("resumes timeout accounting even when the operation throws", async () => {
|
|
31
|
+
const events: JsStatusEvent[] = [];
|
|
32
|
+
|
|
33
|
+
await expect(
|
|
34
|
+
withBridgeTimeoutPause(
|
|
35
|
+
event => events.push(event),
|
|
36
|
+
async () => {
|
|
37
|
+
await Bun.sleep(20);
|
|
38
|
+
throw new Error("boom");
|
|
39
|
+
},
|
|
40
|
+
),
|
|
41
|
+
).rejects.toThrow("boom");
|
|
42
|
+
|
|
43
|
+
expect(events.map(event => event.op)).toEqual([EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP]);
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it("runs the operation without emitting when no status sink is wired", async () => {
|
|
47
|
+
let ran = 0;
|
|
48
|
+
|
|
49
|
+
const value = await withBridgeTimeoutPause(undefined, async () => {
|
|
50
|
+
ran++;
|
|
51
|
+
await Bun.sleep(20);
|
|
52
|
+
return 42;
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
expect(value).toBe(42);
|
|
56
|
+
expect(ran).toBe(1);
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
it("identifies timeout-control events as non-renderable status", () => {
|
|
60
|
+
expect(isEvalTimeoutControlEvent({ op: EVAL_TIMEOUT_PAUSE_OP })).toBe(true);
|
|
61
|
+
expect(isEvalTimeoutControlEvent({ op: EVAL_TIMEOUT_RESUME_OP })).toBe(true);
|
|
62
|
+
expect(isEvalTimeoutControlEvent({ op: "agent", id: "subagent-1" })).toBe(false);
|
|
63
|
+
});
|
|
64
|
+
});
|
|
@@ -32,21 +32,34 @@ describe("IdleTimeout", () => {
|
|
|
32
32
|
expect((idle.signal.reason as DOMException).name).toBe("TimeoutError");
|
|
33
33
|
});
|
|
34
34
|
|
|
35
|
-
it("
|
|
36
|
-
using idle = new IdleTimeout(
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
for (let i = 0; i < 6; i++) {
|
|
40
|
-
await Bun.sleep(40);
|
|
41
|
-
idle.bump();
|
|
42
|
-
}
|
|
35
|
+
it("ignores elapsed time while paused and resumes with a fresh window", async () => {
|
|
36
|
+
using idle = new IdleTimeout(80);
|
|
37
|
+
idle.pause();
|
|
38
|
+
await Bun.sleep(160);
|
|
43
39
|
expect(idle.signal.aborted).toBe(false);
|
|
44
40
|
|
|
45
|
-
|
|
46
|
-
const
|
|
41
|
+
idle.resume();
|
|
42
|
+
const firedEarly = await abortedWithin(idle.signal, 30);
|
|
43
|
+
expect(firedEarly).toBe(false);
|
|
44
|
+
const fired = await abortedWithin(idle.signal, 500);
|
|
47
45
|
expect(fired).toBe(true);
|
|
48
46
|
});
|
|
49
47
|
|
|
48
|
+
it("reference-counts overlapping pauses", async () => {
|
|
49
|
+
using idle = new IdleTimeout(60);
|
|
50
|
+
idle.pause();
|
|
51
|
+
idle.pause();
|
|
52
|
+
await Bun.sleep(120);
|
|
53
|
+
expect(idle.signal.aborted).toBe(false);
|
|
54
|
+
|
|
55
|
+
idle.resume();
|
|
56
|
+
await Bun.sleep(90);
|
|
57
|
+
expect(idle.signal.aborted).toBe(false);
|
|
58
|
+
|
|
59
|
+
idle.resume();
|
|
60
|
+
const fired = await abortedWithin(idle.signal, 500);
|
|
61
|
+
expect(fired).toBe(true);
|
|
62
|
+
});
|
|
50
63
|
it("never fires after dispose()", async () => {
|
|
51
64
|
const idle = new IdleTimeout(30);
|
|
52
65
|
idle.dispose();
|
|
@@ -55,12 +68,13 @@ describe("IdleTimeout", () => {
|
|
|
55
68
|
expect(idle.signal.aborted).toBe(false);
|
|
56
69
|
});
|
|
57
70
|
|
|
58
|
-
it("ignores
|
|
71
|
+
it("ignores pause/resume after the watchdog has already fired", async () => {
|
|
59
72
|
using idle = new IdleTimeout(30);
|
|
60
73
|
await abortedWithin(idle.signal, 500);
|
|
61
74
|
expect(idle.signal.aborted).toBe(true);
|
|
62
75
|
// Late activity must not un-abort or rearm a settled watchdog.
|
|
63
|
-
idle.
|
|
76
|
+
idle.pause();
|
|
77
|
+
idle.resume();
|
|
64
78
|
expect(idle.signal.aborted).toBe(true);
|
|
65
79
|
});
|
|
66
80
|
});
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import { afterEach, describe, expect, it } from "bun:test";
|
|
2
|
+
import {
|
|
3
|
+
__resetWindowsConsoleProbeCache,
|
|
4
|
+
consoleAttachedViaTTY,
|
|
5
|
+
hostHasInheritableConsole,
|
|
6
|
+
shouldHideKernelWindow,
|
|
7
|
+
} from "../py/spawn-options";
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* `shouldHideKernelWindow` decides whether the long-lived Python kernel
|
|
11
|
+
* subprocess is spawned with `windowsHide: true`. On Windows, Bun maps that
|
|
12
|
+
* option to `CREATE_NO_WINDOW`, which detaches the child from any inherited
|
|
13
|
+
* console — breaking both (a) `LoadLibraryExW` for NumPy/pandas native
|
|
14
|
+
* extensions and (b) SIGINT delivery via `GenerateConsoleCtrlEvent`. See
|
|
15
|
+
* issue #1960. The tests below pin the three layered concerns the PR review
|
|
16
|
+
* surfaced:
|
|
17
|
+
*
|
|
18
|
+
* 1. `shouldHideKernelWindow` — pure predicate over a single boolean.
|
|
19
|
+
* 2. `consoleAttachedViaTTY` — the TTY-OR fallback used when the Win32 FFI
|
|
20
|
+
* probe is unavailable; covers the partial-redirection cases.
|
|
21
|
+
* 3. `hostHasInheritableConsole` — the integration boundary. Off-Windows it
|
|
22
|
+
* short-circuits to the TTY fallback; on Windows it is expected to
|
|
23
|
+
* consult `kernel32!GetConsoleWindow()` first, which is the authoritative
|
|
24
|
+
* signal even for the all-stdio-redirected case.
|
|
25
|
+
*/
|
|
26
|
+
describe("shouldHideKernelWindow", () => {
|
|
27
|
+
it("inherits the host console on Windows when one is attached", () => {
|
|
28
|
+
// Reporter's repro: omp launched in Windows Terminal, host has a
|
|
29
|
+
// console, kernel must inherit so `import pandas` doesn't deadlock in
|
|
30
|
+
// `_multiarray_umath` and SIGINT can recover the cell.
|
|
31
|
+
expect(shouldHideKernelWindow({ platform: "win32", hostHasInheritableConsole: true })).toBe(false);
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it("hides on Windows only when the host has no console at all (true service / daemon)", () => {
|
|
35
|
+
// CREATE_NO_WINDOW here suppresses the console window Windows would
|
|
36
|
+
// otherwise auto-allocate for the console-app Python kernel.
|
|
37
|
+
expect(shouldHideKernelWindow({ platform: "win32", hostHasInheritableConsole: false })).toBe(true);
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
it("never sets windowsHide off-Windows (the option is a Win32-only flag)", () => {
|
|
41
|
+
// On POSIX `windowsHide` is a no-op; the predicate must return false
|
|
42
|
+
// everywhere off-Windows so the spawn site matches pre-fix behavior.
|
|
43
|
+
expect(shouldHideKernelWindow({ platform: "linux", hostHasInheritableConsole: true })).toBe(false);
|
|
44
|
+
expect(shouldHideKernelWindow({ platform: "linux", hostHasInheritableConsole: false })).toBe(false);
|
|
45
|
+
expect(shouldHideKernelWindow({ platform: "darwin", hostHasInheritableConsole: true })).toBe(false);
|
|
46
|
+
expect(shouldHideKernelWindow({ platform: "darwin", hostHasInheritableConsole: false })).toBe(false);
|
|
47
|
+
});
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
describe("consoleAttachedViaTTY (FFI fallback heuristic)", () => {
|
|
51
|
+
// The OR of three TTY signals correctly classifies the realistic shell
|
|
52
|
+
// redirection scenarios that motivated widening the check beyond stdout
|
|
53
|
+
// in the first review pass (PR #1961). The all-three-redirected case
|
|
54
|
+
// (false here) is the gap that the Win32 FFI probe in
|
|
55
|
+
// `hostHasInheritableConsole` is meant to close — this fallback is best-
|
|
56
|
+
// effort.
|
|
57
|
+
|
|
58
|
+
it("treats a fully interactive launch as console-attached", () => {
|
|
59
|
+
expect(consoleAttachedViaTTY({ stdinIsTTY: true, stdoutIsTTY: true, stderrIsTTY: true })).toBe(true);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
it("treats `omp -p '...' > out.txt` (stdout-only redirect) as console-attached", () => {
|
|
63
|
+
// The reviewer's first-pass repro: stdout off the terminal, stdin
|
|
64
|
+
// and stderr still attached. OR keeps the console.
|
|
65
|
+
expect(consoleAttachedViaTTY({ stdinIsTTY: true, stdoutIsTTY: false, stderrIsTTY: true })).toBe(true);
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
it("treats stdin-only redirects (`< in.txt`) as console-attached", () => {
|
|
69
|
+
expect(consoleAttachedViaTTY({ stdinIsTTY: false, stdoutIsTTY: true, stderrIsTTY: true })).toBe(true);
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
it("treats stderr-only redirects (`2> err.log`) as console-attached", () => {
|
|
73
|
+
expect(consoleAttachedViaTTY({ stdinIsTTY: true, stdoutIsTTY: true, stderrIsTTY: false })).toBe(true);
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
it("returns false only when none of stdin/stdout/stderr is a TTY", () => {
|
|
77
|
+
// This is the gap: a real Windows Terminal session with all three
|
|
78
|
+
// streams redirected (`omp ... < in > out 2> err`) lands here.
|
|
79
|
+
// `hostHasInheritableConsole` uses the Win32 FFI probe to recover
|
|
80
|
+
// the right answer in that scenario; this helper is the fallback.
|
|
81
|
+
expect(consoleAttachedViaTTY({ stdinIsTTY: false, stdoutIsTTY: false, stderrIsTTY: false })).toBe(false);
|
|
82
|
+
});
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
describe("hostHasInheritableConsole", () => {
|
|
86
|
+
afterEach(() => {
|
|
87
|
+
__resetWindowsConsoleProbeCache();
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
if (process.platform !== "win32") {
|
|
91
|
+
it("matches the TTY-OR fallback off-Windows", () => {
|
|
92
|
+
// Off-Windows, `windowsHide` is a no-op anyway, but we still
|
|
93
|
+
// expose `hostHasInheritableConsole` symmetrically. Confirm it
|
|
94
|
+
// degrades to the same OR the call site would compute by hand.
|
|
95
|
+
const tty = consoleAttachedViaTTY({
|
|
96
|
+
stdinIsTTY: !!process.stdin.isTTY,
|
|
97
|
+
stdoutIsTTY: !!process.stdout.isTTY,
|
|
98
|
+
stderrIsTTY: !!process.stderr.isTTY,
|
|
99
|
+
});
|
|
100
|
+
expect(hostHasInheritableConsole()).toBe(tty);
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
});
|
|
@@ -8,7 +8,7 @@ import type { ModelRegistry } from "../../config/model-registry";
|
|
|
8
8
|
import { Settings } from "../../config/settings";
|
|
9
9
|
import type { ToolSession } from "../../tools";
|
|
10
10
|
import { ToolError } from "../../tools/tool-errors";
|
|
11
|
-
import {
|
|
11
|
+
import { EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP } from "../bridge-timeout";
|
|
12
12
|
import { IdleTimeout } from "../idle-timeout";
|
|
13
13
|
import { disposeAllVmContexts } from "../js/context-manager";
|
|
14
14
|
import { executeJs } from "../js/executor";
|
|
@@ -99,7 +99,6 @@ function assistant(opts: {
|
|
|
99
99
|
describe("runEvalLlm", () => {
|
|
100
100
|
afterEach(() => {
|
|
101
101
|
vi.restoreAllMocks();
|
|
102
|
-
setBridgeHeartbeatIntervalMs();
|
|
103
102
|
});
|
|
104
103
|
|
|
105
104
|
it("resolves each tier to its expected model", async () => {
|
|
@@ -217,31 +216,32 @@ describe("runEvalLlm", () => {
|
|
|
217
216
|
);
|
|
218
217
|
});
|
|
219
218
|
|
|
220
|
-
it("
|
|
221
|
-
// A oneshot completion emits no status until it returns;
|
|
222
|
-
// must
|
|
223
|
-
// awaits, re-arming the watchdog through emitStatus.
|
|
224
|
-
setBridgeHeartbeatIntervalMs(15);
|
|
219
|
+
it("pauses the idle watchdog while a slow llm() request is in flight", async () => {
|
|
220
|
+
// A oneshot completion emits no status until it returns; delegated model
|
|
221
|
+
// time must be invisible to the eval timeout budget.
|
|
225
222
|
vi.spyOn(ai, "completeSimple").mockImplementation(async () => {
|
|
226
223
|
await Bun.sleep(200);
|
|
227
224
|
return assistant({ text: "the answer" });
|
|
228
225
|
});
|
|
229
226
|
|
|
227
|
+
const ops: string[] = [];
|
|
230
228
|
using idle = new IdleTimeout(60);
|
|
231
229
|
const result = await runEvalLlm(
|
|
232
230
|
{ prompt: "q", model: "smol" },
|
|
233
231
|
{
|
|
234
232
|
session: makeSession(),
|
|
235
233
|
signal: idle.signal,
|
|
236
|
-
// Mirror the eval tool: only a bridge heartbeat re-arms the watchdog.
|
|
237
234
|
emitStatus: event => {
|
|
238
|
-
|
|
235
|
+
ops.push(event.op);
|
|
236
|
+
if (event.op === EVAL_TIMEOUT_PAUSE_OP) idle.pause();
|
|
237
|
+
if (event.op === EVAL_TIMEOUT_RESUME_OP) idle.resume();
|
|
239
238
|
},
|
|
240
239
|
},
|
|
241
240
|
);
|
|
242
241
|
|
|
243
|
-
expect(idle.signal.aborted).toBe(false);
|
|
244
242
|
expect(result.text).toBe("the answer");
|
|
243
|
+
expect(ops).toEqual([EVAL_TIMEOUT_PAUSE_OP, EVAL_TIMEOUT_RESUME_OP, "llm"]);
|
|
244
|
+
expect(idle.signal.aborted).toBe(false);
|
|
245
245
|
});
|
|
246
246
|
});
|
|
247
247
|
|
package/src/eval/agent-bridge.ts
CHANGED
|
@@ -13,10 +13,10 @@ import subagentUserPromptTemplate from "../prompts/system/subagent-user-prompt.m
|
|
|
13
13
|
import * as taskDiscovery from "../task/discovery";
|
|
14
14
|
import * as taskExecutor from "../task/executor";
|
|
15
15
|
import { AgentOutputManager } from "../task/output-manager";
|
|
16
|
-
import type { AgentDefinition, AgentProgress } from "../task/types";
|
|
16
|
+
import type { AgentDefinition, AgentProgress, SingleResult } from "../task/types";
|
|
17
17
|
import type { ToolSession } from "../tools";
|
|
18
18
|
import { ToolError } from "../tools/tool-errors";
|
|
19
|
-
import {
|
|
19
|
+
import { withBridgeTimeoutPause } from "./bridge-timeout";
|
|
20
20
|
import type { JsStatusEvent } from "./js/shared/types";
|
|
21
21
|
// Import review tools for side effects (registers subagent tool handlers).
|
|
22
22
|
import "../tools/review";
|
|
@@ -173,6 +173,26 @@ function emitProgressStatus(emitStatus: ((event: JsStatusEvent) => void) | undef
|
|
|
173
173
|
});
|
|
174
174
|
}
|
|
175
175
|
|
|
176
|
+
/**
|
|
177
|
+
* Coalesce a subagent failure into a non-empty, human-meaningful error message.
|
|
178
|
+
*
|
|
179
|
+
* When the executor aborts a subagent (runtime limit, parent cancellation, …)
|
|
180
|
+
* the actionable explanation lives on `abortReason`, while `error`/`stderr`
|
|
181
|
+
* are routinely empty strings. Plain `??` coalescing stops at the empty string
|
|
182
|
+
* and ships an empty error through the bridge — Python then surfaces only the
|
|
183
|
+
* generic `bridge call '__agent__' failed`. See #2006.
|
|
184
|
+
*/
|
|
185
|
+
function buildSubagentFailureMessage(agentName: string, result: SingleResult): string {
|
|
186
|
+
const abortReason = trimToUndefined(result.abortReason);
|
|
187
|
+
if (result.aborted && abortReason) return abortReason;
|
|
188
|
+
return (
|
|
189
|
+
trimToUndefined(result.error) ??
|
|
190
|
+
trimToUndefined(result.stderr) ??
|
|
191
|
+
abortReason ??
|
|
192
|
+
`agent() subagent '${agentName}' failed.`
|
|
193
|
+
);
|
|
194
|
+
}
|
|
195
|
+
|
|
176
196
|
/**
|
|
177
197
|
* Run a single subagent on behalf of an eval cell's `agent()` call.
|
|
178
198
|
*/
|
|
@@ -225,17 +245,15 @@ export async function runEvalAgent(args: unknown, options: EvalAgentBridgeOption
|
|
|
225
245
|
getSessionId: options.session.getSessionId ?? (() => null),
|
|
226
246
|
};
|
|
227
247
|
const parentArtifactManager = options.session.getArtifactManager?.() ?? undefined;
|
|
228
|
-
const parentEvalSessionId = options.session.getEvalSessionId?.() ?? undefined;
|
|
229
248
|
const mcpManager = options.session.mcpManager ?? MCPManager.instance();
|
|
230
249
|
const { sessionFile, artifactsDir, contextFile } = await getArtifacts(options.session);
|
|
231
250
|
const outputManager = getOutputManager(options.session);
|
|
232
251
|
const id = await outputManager.allocate(outputIdBase(parsed.label, agentName));
|
|
233
252
|
const assignment = parsed.prompt.trim();
|
|
234
253
|
const context = trimToUndefined(parsed.context);
|
|
235
|
-
//
|
|
236
|
-
//
|
|
237
|
-
|
|
238
|
-
const result = await withBridgeHeartbeat(options.emitStatus, () =>
|
|
254
|
+
// Suspend eval timeout accounting while the subagent owns control. The
|
|
255
|
+
// timeout clock restarts once the bridge returns to the cell runtime.
|
|
256
|
+
const result = await withBridgeTimeoutPause(options.emitStatus, () =>
|
|
239
257
|
taskExecutor.runSubprocess({
|
|
240
258
|
cwd: options.session.cwd,
|
|
241
259
|
agent: effectiveAgent,
|
|
@@ -261,6 +279,12 @@ export async function runEvalAgent(args: unknown, options: EvalAgentBridgeOption
|
|
|
261
279
|
authStorage: options.session.authStorage,
|
|
262
280
|
modelRegistry: options.session.modelRegistry,
|
|
263
281
|
settings: options.session.settings,
|
|
282
|
+
// Eval `agent()` subagents are never wall-clock capped: the parent
|
|
283
|
+
// cell's idle watchdog is suspended for the whole bridge call
|
|
284
|
+
// (withBridgeTimeoutPause), so a long-running phase/recovery workflow
|
|
285
|
+
// must not be killed by `task.maxRuntimeMs`. Force the limit off
|
|
286
|
+
// regardless of the inherited session setting.
|
|
287
|
+
maxRuntimeMs: 0,
|
|
264
288
|
mcpManager,
|
|
265
289
|
contextFiles,
|
|
266
290
|
skills: availableSkills,
|
|
@@ -272,14 +296,16 @@ export async function runEvalAgent(args: unknown, options: EvalAgentBridgeOption
|
|
|
272
296
|
parentHindsightSessionState: options.session.getHindsightSessionState?.(),
|
|
273
297
|
parentMnemopiSessionState: options.session.getMnemopiSessionState?.(),
|
|
274
298
|
parentTelemetry: options.session.getTelemetry?.(),
|
|
275
|
-
parentEvalSessionId
|
|
299
|
+
// Deliberately omit parentEvalSessionId: the parent's Python kernel is
|
|
300
|
+
// blocked on this bridge call, so sharing the eval session would deadlock
|
|
301
|
+
// (subagent queues behind the parent's in-flight execution, parent waits
|
|
302
|
+
// for subagent → circular). Each bridge-spawned subagent gets its own
|
|
303
|
+
// eval session with an independent kernel.
|
|
276
304
|
}),
|
|
277
305
|
);
|
|
278
306
|
|
|
279
|
-
if (result.exitCode !== 0 || result.error) {
|
|
280
|
-
|
|
281
|
-
result.error ?? result.stderr ?? result.abortReason ?? `agent() subagent '${agentName}' failed.`;
|
|
282
|
-
throw new ToolError(failureMessage);
|
|
307
|
+
if (result.exitCode !== 0 || result.error || result.aborted) {
|
|
308
|
+
throw new ToolError(buildSubagentFailureMessage(agentName, result));
|
|
283
309
|
}
|
|
284
310
|
|
|
285
311
|
options.session.recordEvalSubagentUsage?.(result.usage?.output ?? 0);
|
package/src/eval/backend.ts
CHANGED
|
@@ -10,12 +10,12 @@ export interface ExecutorBackendExecOptions {
|
|
|
10
10
|
signal?: AbortSignal;
|
|
11
11
|
session: ToolSession;
|
|
12
12
|
/**
|
|
13
|
-
*
|
|
14
|
-
* driven entirely by `signal`, which the eval tool arms as
|
|
15
|
-
*
|
|
16
|
-
*
|
|
17
|
-
* and as cold-start headroom; they MUST
|
|
18
|
-
* timer from it.
|
|
13
|
+
* Runtime-work budget in milliseconds (the cell's `timeout`). Cancellation is
|
|
14
|
+
* driven entirely by `signal`, which the eval tool arms as a watchdog that
|
|
15
|
+
* pauses on bridge timeout-control status events and fires a `TimeoutError`
|
|
16
|
+
* reason only while the Python/JS runtime owns control. Backends use this
|
|
17
|
+
* value only for timeout-annotation text and as cold-start headroom; they MUST
|
|
18
|
+
* NOT derive a competing wall-clock timer from it.
|
|
19
19
|
*/
|
|
20
20
|
idleTimeoutMs: number;
|
|
21
21
|
reset: boolean;
|