@poncho-ai/harness 0.50.3 → 0.50.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- > @poncho-ai/harness@0.50.3 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
2
+ > @poncho-ai/harness@0.50.4 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
3
3
  > node scripts/embed-docs.js && tsup src/index.ts --format esm --dts
4
4
 
5
5
  [embed-docs] Generated poncho-docs.ts with 4 topics
@@ -8,9 +8,9 @@
8
8
  CLI tsup v8.5.1
9
9
  CLI Target: es2022
10
10
  ESM Build start
11
- ESM dist/index.js 530.79 KB
12
- ESM dist/isolate-BNQ6P3HI.js 51.41 KB
13
- ESM ⚡️ Build success in 229ms
11
+ ESM dist/index.js 533.24 KB
12
+ ESM dist/isolate-F2PPSUL6.js 53.82 KB
13
+ ESM ⚡️ Build success in 250ms
14
14
  DTS Build start
15
- DTS ⚡️ Build success in 7430ms
16
- DTS dist/index.d.ts 89.60 KB
15
+ DTS ⚡️ Build success in 9179ms
16
+ DTS dist/index.d.ts 89.97 KB
package/CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # @poncho-ai/harness
2
2
 
3
+ ## 0.50.4
4
+
5
+ ### Patch Changes
6
+
7
+ - [`9a39327`](https://github.com/cesr/poncho-ai/commit/9a393274d8a8061371d268fa81db3501cb0a8308) Thanks [@cesr](https://github.com/cesr)! - harness: fix three `run_code` / cancellation bugs.
8
+ - **Timers polyfill never fired delayed callbacks.** `setTimeout(fn, ms)` only ran the callback when `ms === 0`; any non-zero delay was stored and never invoked, so `await new Promise(r => setTimeout(r, 50))` (the standard sleep) hung forever. The polyfill now drains pending timers on the microtask queue in delay order against a virtual clock, so sleeps resolve and `setInterval`/`clearInterval` work.
9
+ - **No wall-clock bound on `run_code`.** isolated-vm's `timeout` only bounds synchronous execution; a script that returns a never-settling promise hung the whole turn indefinitely. `runtime.execute` now races the eval against a host timer that disposes the isolate, so `isolate.timeLimit` bounds total execution and returns a `TimeoutError`.
10
+ - **Stopping a turn mid-tool-call dropped the assistant turn from canonical history.** On cancellation the in-flight assistant message (its text + tool calls) lives only in step-local state — it's pushed to `messages` together with the tool results, which never arrive when stopped. The cancellation snapshot now re-attaches that turn with a synthesized "cancelled by user" tool result for each pending tool call, so the next request keeps a valid record instead of showing the model back-to-back user messages.
11
+
12
+ - [`c604fd6`](https://github.com/cesr/poncho-ai/commit/c604fd6b41dfd06600af85daa892ab4fd3852bad) Thanks [@cesr](https://github.com/cesr)! - harness: harden subagent → parent result delivery so a step-exhausted subagent stops surfacing as `(no response)`.
13
+ - **Force a closing text turn on the final step.** On the last permitted step (`step === maxSteps`) the run loop now strips the tools and appends a one-shot "summarize now, no tools" nudge to that model request, so a run that hits its step ceiling produces a real text summary instead of terminating on a dangling tool call. Previously such a run ended on a tool-call turn with no final text — common in subagents doing many tool calls — and the parent received an empty result. `maxSteps` itself is unchanged; the nudge is request-only and never written to history.
14
+ - **Content-shape-robust result extraction.** Pulling a subagent's response no longer requires the last assistant message to be a plain `string`. The new `lastAssistantText` helper handles `string`, `ContentPart[]`, and the run loop's `{"text":...,"tool_calls":[...]}` envelope, and walks backwards to the last non-empty assistant text — so a transcript that ends on a text-less tool turn still yields the prose produced just before it.
15
+ - **Actionable empty-result sentinel.** When a subagent genuinely produced no summary, the injected parent message now says how many steps ran and points at `read_subagent(<id>, mode:"assistant")` to recover the work, instead of a dead-end `(no response)`.
16
+
3
17
  ## 0.50.3
4
18
 
5
19
  ### Patch Changes
package/dist/index.d.ts CHANGED
@@ -1981,6 +1981,13 @@ declare const MAX_SUBAGENT_CALLBACK_COUNT = 20;
1981
1981
  declare const CALLBACK_LOCK_STALE_MS: number;
1982
1982
  declare const STALE_SUBAGENT_THRESHOLD_MS: number;
1983
1983
 
1984
+ /**
1985
+ * Find the last non-empty assistant text in a subagent transcript. Walking
1986
+ * backwards (rather than reading only the final message) means a subagent
1987
+ * that ended on a tool-call turn still yields the prose it produced just
1988
+ * before — instead of surfacing to the parent as an empty result.
1989
+ */
1990
+ declare const lastAssistantText: (messages: Message[]) => string;
1984
1991
  type ActiveConversationRun = {
1985
1992
  ownerId: string;
1986
1993
  abortController: AbortController;
@@ -2143,4 +2150,4 @@ interface RunConversationTurnResult {
2143
2150
  }
2144
2151
  declare const runConversationTurn: (opts: RunConversationTurnOpts) => Promise<RunConversationTurnResult>;
2145
2152
 
2146
- export { type ActiveConversationRun, type ActiveSubagentRun, type AgentFrontmatter, AgentHarness, type AgentIdentity, type AgentLimitsConfig, type AgentModelConfig, AgentOrchestrator, type ApprovalEventItem, type ArchivedToolResult$1 as ArchivedToolResult, type BashConfig, BashEnvironmentManager, type BashExecutionLimits, type BuiltInToolToggles, CALLBACK_LOCK_STALE_MS, type CompactMessagesOptions, type CompactResult, type CompactionConfig, type ContinuationHooks, type Conversation, type ConversationCreateInit, type ConversationState, type ConversationStatusSnapshot, type ConversationStore, type ConversationSummary, type CreateSkillToolsOptions, type CronJobConfig, DEFAULT_AGENT_DESCRIPTION, DEFAULT_AGENT_NAME, DEFAULT_MAX_STEPS, DEFAULT_MODEL_NAME, DEFAULT_MODEL_PROVIDER, DEFAULT_TEMPERATURE, DEFAULT_TIMEOUT, type DefaultAgentDefinitionOptions, type EventSink, type ExecuteTurnResult, type HarnessOptions, type HarnessRunOutput, type HistorySource, InMemoryConversationStore, InMemoryEngine, InMemoryStateStore, type IsolateBinding, type IsolateConfig, LocalMcpBridge, LocalUploadStore, MAX_CONCURRENT_SUBAGENTS, MAX_CONTINUATION_COUNT, MAX_SUBAGENT_CALLBACK_COUNT, MAX_SUBAGENT_NESTING, type MainMemory, type McpConfig, type MemoryConfig, type MemoryStore, type MessagingChannelConfig, type ModelProviderFactory, type MountProvider, type NetworkConfig, OPENAI_CODEX_CLIENT_ID, type OpenAICodexAuthConfig, type OpenAICodexDeviceAuthRequest, type OpenAICodexSession, type OrchestratorHooks, type OrchestratorOptions, type OtlpConfig, type OtlpOption, PONCHO_UPLOAD_SCHEME, type ParsedAgent, type PendingSubagentApproval, type PendingSubagentResult, type PendingToolCall, type PonchoConfig, PonchoFsAdapter, PostgresEngine, type ProviderConfig, type Recurrence, type RecurrenceType, type Reminder, type ReminderCreateInput, type ReminderStatus, type ReminderStore, type RemoteMcpServerConfig, type RunConversationTurnOpts, type RunConversationTurnResult, type RunOutcome, type RunRequest, type RuntimeRenderContext, S3UploadStore, STALE_SUBAGENT_THRESHOLD_MS, STORAGE_SCHEMA_VERSION, type SecretsStore, type SkillContextEntry, type SkillMetadata, type SkillSource, SqliteEngine, type StateConfig, type StateProviderName, type StateStore, type StorageConfig, type StorageEngine, type StorageFactoryOptions, type StorageProvider, type StoredApproval, type SubagentManager, type SubagentResult, type SubagentSpawnResult, type SubagentSummary, type SubagentTranscript, type SubagentTranscriptMode, TOOL_RESULT_ARCHIVE_PARAM, type TelemetryConfig, TelemetryEmitter, type TenantTokenPayload, type ToolAccess, type ToolCall, ToolDispatcher, type ToolExecutionResult, type TurnDraftState, type TurnResultMetadata, type TurnSection, type UploadStore, type UploadsConfig, VFS_SCHEME, VercelBlobUploadStore, type VfsDirEntry, type VfsStat, type VirtualMount, applyTurnMetadata, buildAgentDirectoryName, buildApprovalCheckpoints, buildAssistantMetadata, buildSkillContextWindow, buildToolCompletedText, cloneSections, compactMessages, completeOpenAICodexDeviceAuth, computeNextOccurrence, createBashTool, createConversationStore, createConversationStoreFromEngine, createDefaultTools, createDeleteDirectoryTool, createDeleteTool, createEditTool, createMemoryStore, createMemoryStoreFromEngine, createMemoryTools, createModelProvider, createReminderStore, createReminderStoreFromEngine, createReminderTools, createSearchTools, createSecretsStore, createSkillTools, createStateStore, createStorageEngine, createSubagentTools, createTodoStoreFromEngine, createTurnDraftState, createUploadStore, createWriteTool, decodeFileInputData, defaultAgentDefinition, deleteOpenAICodexSession, deriveUploadKey, ensureAgentIdentity, estimateTokens, estimateTotalTokens, executeConversationTurn, findSafeSplitPoint, flushTurnDraft, generateAgentId, getAgentStoreDirectory, getModelContextWindow, getOpenAICodexAccessToken, getOpenAICodexAuthFilePath, getOpenAICodexRequiredScopes, getPonchoStoreRoot, isMessageArray, jsonSchemaToZod, loadCanonicalHistory, loadPonchoConfig, loadRunHistory, loadSkillContext, loadSkillInstructions, loadSkillMetadata, loadSkillMetadataFromDirs, loadVfsSkillMetadata, mergeSkills, normalizeApprovalCheckpoint, normalizeOtlp, normalizeScriptPolicyPath, normalizeToolAccess, parseAgentFile, parseAgentMarkdown, parseSkillFrontmatter, ponchoDocsTool, readOpenAICodexSession, readSkillResource, recordStandardTurnEvent, renderAgentPrompt, resolveAgentIdentity, resolveCompactionConfig, resolveEnv, resolveMemoryConfig, resolveRunRequest, resolveSkillDirs, resolveStateConfig, runConversationTurn, slugifyStorageComponent, startOpenAICodexDeviceAuth, verifyTenantToken, withToolResultArchiveParam, writeOpenAICodexSession };
2153
+ export { type ActiveConversationRun, type ActiveSubagentRun, type AgentFrontmatter, AgentHarness, type AgentIdentity, type AgentLimitsConfig, type AgentModelConfig, AgentOrchestrator, type ApprovalEventItem, type ArchivedToolResult$1 as ArchivedToolResult, type BashConfig, BashEnvironmentManager, type BashExecutionLimits, type BuiltInToolToggles, CALLBACK_LOCK_STALE_MS, type CompactMessagesOptions, type CompactResult, type CompactionConfig, type ContinuationHooks, type Conversation, type ConversationCreateInit, type ConversationState, type ConversationStatusSnapshot, type ConversationStore, type ConversationSummary, type CreateSkillToolsOptions, type CronJobConfig, DEFAULT_AGENT_DESCRIPTION, DEFAULT_AGENT_NAME, DEFAULT_MAX_STEPS, DEFAULT_MODEL_NAME, DEFAULT_MODEL_PROVIDER, DEFAULT_TEMPERATURE, DEFAULT_TIMEOUT, type DefaultAgentDefinitionOptions, type EventSink, type ExecuteTurnResult, type HarnessOptions, type HarnessRunOutput, type HistorySource, InMemoryConversationStore, InMemoryEngine, InMemoryStateStore, type IsolateBinding, type IsolateConfig, LocalMcpBridge, LocalUploadStore, MAX_CONCURRENT_SUBAGENTS, MAX_CONTINUATION_COUNT, MAX_SUBAGENT_CALLBACK_COUNT, MAX_SUBAGENT_NESTING, type MainMemory, type McpConfig, type MemoryConfig, type MemoryStore, type MessagingChannelConfig, type ModelProviderFactory, type MountProvider, type NetworkConfig, OPENAI_CODEX_CLIENT_ID, type OpenAICodexAuthConfig, type OpenAICodexDeviceAuthRequest, type OpenAICodexSession, type OrchestratorHooks, type OrchestratorOptions, type OtlpConfig, type OtlpOption, PONCHO_UPLOAD_SCHEME, type ParsedAgent, type PendingSubagentApproval, type PendingSubagentResult, type PendingToolCall, type PonchoConfig, PonchoFsAdapter, PostgresEngine, type ProviderConfig, type Recurrence, type RecurrenceType, type Reminder, type ReminderCreateInput, type ReminderStatus, type ReminderStore, type RemoteMcpServerConfig, type RunConversationTurnOpts, type RunConversationTurnResult, type RunOutcome, type RunRequest, type RuntimeRenderContext, S3UploadStore, STALE_SUBAGENT_THRESHOLD_MS, STORAGE_SCHEMA_VERSION, type SecretsStore, type SkillContextEntry, type SkillMetadata, type SkillSource, SqliteEngine, type StateConfig, type StateProviderName, type StateStore, type StorageConfig, type StorageEngine, type StorageFactoryOptions, type StorageProvider, type StoredApproval, type SubagentManager, type SubagentResult, type SubagentSpawnResult, type SubagentSummary, type SubagentTranscript, type SubagentTranscriptMode, TOOL_RESULT_ARCHIVE_PARAM, type TelemetryConfig, TelemetryEmitter, type TenantTokenPayload, type ToolAccess, type ToolCall, ToolDispatcher, type ToolExecutionResult, type TurnDraftState, type TurnResultMetadata, type TurnSection, type UploadStore, type UploadsConfig, VFS_SCHEME, VercelBlobUploadStore, type VfsDirEntry, type VfsStat, type VirtualMount, applyTurnMetadata, buildAgentDirectoryName, buildApprovalCheckpoints, buildAssistantMetadata, buildSkillContextWindow, buildToolCompletedText, cloneSections, compactMessages, completeOpenAICodexDeviceAuth, computeNextOccurrence, createBashTool, createConversationStore, createConversationStoreFromEngine, createDefaultTools, createDeleteDirectoryTool, createDeleteTool, createEditTool, createMemoryStore, createMemoryStoreFromEngine, createMemoryTools, createModelProvider, createReminderStore, createReminderStoreFromEngine, createReminderTools, createSearchTools, createSecretsStore, createSkillTools, createStateStore, createStorageEngine, createSubagentTools, createTodoStoreFromEngine, createTurnDraftState, createUploadStore, createWriteTool, decodeFileInputData, defaultAgentDefinition, deleteOpenAICodexSession, deriveUploadKey, ensureAgentIdentity, estimateTokens, estimateTotalTokens, executeConversationTurn, findSafeSplitPoint, flushTurnDraft, generateAgentId, getAgentStoreDirectory, getModelContextWindow, getOpenAICodexAccessToken, getOpenAICodexAuthFilePath, getOpenAICodexRequiredScopes, getPonchoStoreRoot, isMessageArray, jsonSchemaToZod, lastAssistantText, loadCanonicalHistory, loadPonchoConfig, loadRunHistory, loadSkillContext, loadSkillInstructions, loadSkillMetadata, loadSkillMetadataFromDirs, loadVfsSkillMetadata, mergeSkills, normalizeApprovalCheckpoint, normalizeOtlp, normalizeScriptPolicyPath, normalizeToolAccess, parseAgentFile, parseAgentMarkdown, parseSkillFrontmatter, ponchoDocsTool, readOpenAICodexSession, readSkillResource, recordStandardTurnEvent, renderAgentPrompt, resolveAgentIdentity, resolveCompactionConfig, resolveEnv, resolveMemoryConfig, resolveRunRequest, resolveSkillDirs, resolveStateConfig, runConversationTurn, slugifyStorageComponent, startOpenAICodexDeviceAuth, verifyTenantToken, withToolResultArchiveParam, writeOpenAICodexSession };
package/dist/index.js CHANGED
@@ -8626,6 +8626,7 @@ var now = () => Date.now();
8626
8626
  var FIRST_CHUNK_TIMEOUT_MS = 9e4;
8627
8627
  var MAX_TRANSIENT_STEP_RETRIES = 1;
8628
8628
  var COMPACTION_CHECK_INTERVAL_STEPS = 3;
8629
+ var FINAL_STEP_SUMMARY_PROMPT = "You have reached the maximum number of steps for this run and cannot call any more tools. Do NOT attempt any tool calls. Using only the work you have already done, write your final response now: summarize what you found or accomplished, include any concrete results, and flag anything left unfinished.";
8629
8630
  var TOOL_RESULT_ARCHIVE_PARAM = "__toolResultArchive";
8630
8631
  var TOOL_RESULT_TRUNCATED_PREFIX = "[TRUNCATED_TOOL_RESULT]";
8631
8632
  var TOOL_RESULT_PREVIEW_CHARS = 700;
@@ -9951,7 +9952,7 @@ var AgentHarness = class _AgentHarness {
9951
9952
  this.registerIfMissing(createEditFileTool(getFs));
9952
9953
  this.registerIfMissing(createWriteFileTool(getFs));
9953
9954
  if (config?.isolate) {
9954
- const { createRunCodeTool, buildRunCodeDescription, bundleLibraries } = await import("./isolate-BNQ6P3HI.js");
9955
+ const { createRunCodeTool, buildRunCodeDescription, bundleLibraries } = await import("./isolate-F2PPSUL6.js");
9955
9956
  let libraryPreamble = null;
9956
9957
  if (config.isolate.libraries?.length) {
9957
9958
  libraryPreamble = await bundleLibraries(config.isolate.libraries, this.workingDir);
@@ -10327,7 +10328,7 @@ Examples:${this.environment !== "production" ? `
10327
10328
  Files in the VFS are accessible to the user via \`/api/vfs/{path}\`. For example, a file at \`/downloads/report.pdf\` can be linked as \`/api/vfs/downloads/report.pdf\`. Use this to share downloadable files with the user.` : "";
10328
10329
  let isolateContext = "";
10329
10330
  if (this.loadedConfig?.isolate && this.dispatcher.get("run_code")) {
10330
- const { generateIsolateTypeStubs } = await import("./isolate-BNQ6P3HI.js");
10331
+ const { generateIsolateTypeStubs } = await import("./isolate-F2PPSUL6.js");
10331
10332
  const typeStubs = generateIsolateTypeStubs(this.loadedConfig.isolate);
10332
10333
  isolateContext = `
10333
10334
 
@@ -10374,10 +10375,40 @@ ${this.skillFingerprint}`;
10374
10375
  };
10375
10376
  const isCancelled = () => input.abortSignal?.aborted === true;
10376
10377
  let cancellationEmitted = false;
10378
+ let inflightTurn = null;
10377
10379
  const emitCancellation = () => {
10378
10380
  cancellationEmitted = true;
10379
- const snapshot = trimToValidPrefix([...messages]);
10380
- return pushEvent({ type: "run:cancelled", runId, messages: snapshot });
10381
+ const snapshot = [...messages];
10382
+ if (inflightTurn && (inflightTurn.text.length > 0 || inflightTurn.toolCalls.length > 0)) {
10383
+ const hasToolCalls = inflightTurn.toolCalls.length > 0;
10384
+ const assistantContent = hasToolCalls ? JSON.stringify({
10385
+ text: inflightTurn.text,
10386
+ tool_calls: inflightTurn.toolCalls.map((tc) => ({
10387
+ id: tc.id,
10388
+ name: tc.name,
10389
+ input: tc.input
10390
+ }))
10391
+ }) : inflightTurn.text;
10392
+ snapshot.push({
10393
+ role: "assistant",
10394
+ content: assistantContent,
10395
+ metadata: { timestamp: now(), id: randomUUID5(), runId }
10396
+ });
10397
+ if (hasToolCalls) {
10398
+ const cancelledResults = inflightTurn.toolCalls.map((tc) => ({
10399
+ type: "tool_result",
10400
+ tool_use_id: tc.id,
10401
+ tool_name: tc.name,
10402
+ content: "Tool execution cancelled by user."
10403
+ }));
10404
+ snapshot.push({
10405
+ role: "tool",
10406
+ content: JSON.stringify(cancelledResults),
10407
+ metadata: { timestamp: now(), id: randomUUID5(), runId }
10408
+ });
10409
+ }
10410
+ }
10411
+ return pushEvent({ type: "run:cancelled", runId, messages: trimToValidPrefix(snapshot) });
10381
10412
  };
10382
10413
  const resolvedModelName = agent.frontmatter.model?.name ?? "claude-opus-4-5";
10383
10414
  const contextWindow = agent.frontmatter.model?.contextWindow ?? getModelContextWindow(resolvedModelName);
@@ -10460,6 +10491,7 @@ ${this.skillFingerprint}`;
10460
10491
  let cachedCoreMessages = [];
10461
10492
  let convertedUpTo = 0;
10462
10493
  for (let step = 1; step <= maxSteps; step += 1) {
10494
+ inflightTurn = null;
10463
10495
  try {
10464
10496
  yield* drainBrowserEvents();
10465
10497
  if (isCancelled()) {
@@ -10817,11 +10849,14 @@ ${textContent}` };
10817
10849
  ...cachedMessages
10818
10850
  ] : cachedMessages;
10819
10851
  const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
10852
+ const isFinalStep = step === maxSteps;
10853
+ const toolsForStep = isFinalStep ? {} : tools;
10854
+ const messagesForStep = isFinalStep ? [...finalMessages, { role: "user", content: FINAL_STEP_SUMMARY_PROMPT }] : finalMessages;
10820
10855
  const result = await streamText({
10821
10856
  model: modelInstance,
10822
10857
  ...useStaticCache ? {} : { system: systemPrompt },
10823
- messages: finalMessages,
10824
- tools,
10858
+ messages: messagesForStep,
10859
+ tools: toolsForStep,
10825
10860
  temperature,
10826
10861
  abortSignal: input.abortSignal,
10827
10862
  ...typeof maxTokens === "number" ? { maxTokens } : {},
@@ -10950,6 +10985,7 @@ ${textContent}` };
10950
10985
  yield pushEvent({ type: "run:completed", runId, result: result_ });
10951
10986
  return;
10952
10987
  }
10988
+ inflightTurn = { text: fullText, toolCalls: [] };
10953
10989
  if (isCancelled()) {
10954
10990
  yield emitCancellation();
10955
10991
  return;
@@ -11036,6 +11072,7 @@ ${textContent}` };
11036
11072
  name: tc.toolName,
11037
11073
  input: tc.input
11038
11074
  }));
11075
+ if (inflightTurn) inflightTurn.toolCalls = toolCalls;
11039
11076
  if (toolCalls.length === 0) {
11040
11077
  if (fullText.length === 0) {
11041
11078
  const isExpectedEmpty = finishReason === "stop";
@@ -11416,6 +11453,7 @@ ${textContent}` };
11416
11453
  content: JSON.stringify(toolResultsForModel),
11417
11454
  metadata: toolMsgMeta
11418
11455
  });
11456
+ inflightTurn = null;
11419
11457
  if (softDeadlineMs > 0 && now() - start > softDeadlineMs) {
11420
11458
  const result_ = {
11421
11459
  status: "completed",
@@ -12282,6 +12320,26 @@ var CALLBACK_LOCK_STALE_MS = 5 * 60 * 1e3;
12282
12320
  var STALE_SUBAGENT_THRESHOLD_MS = 5 * 60 * 1e3;
12283
12321
 
12284
12322
  // src/orchestrator/orchestrator.ts
12323
+ import { getTextContent as getTextContent3 } from "@poncho-ai/sdk";
12324
+ var assistantMessageText = (message) => {
12325
+ const raw = getTextContent3(message).trim();
12326
+ if (raw.startsWith("{") && raw.includes('"tool_calls"')) {
12327
+ try {
12328
+ const parsed = JSON.parse(raw);
12329
+ if (typeof parsed.text === "string") return parsed.text.trim();
12330
+ } catch {
12331
+ }
12332
+ }
12333
+ return raw;
12334
+ };
12335
+ var lastAssistantText = (messages) => {
12336
+ for (let i = messages.length - 1; i >= 0; i -= 1) {
12337
+ if (messages[i].role !== "assistant") continue;
12338
+ const text = assistantMessageText(messages[i]);
12339
+ if (text) return text;
12340
+ }
12341
+ return "";
12342
+ };
12285
12343
  var AgentOrchestrator = class {
12286
12344
  harness;
12287
12345
  conversationStore;
@@ -12999,14 +13057,11 @@ var AgentOrchestrator = class {
12999
13057
  subagentId: childConversationId,
13000
13058
  conversationId: childConversationId
13001
13059
  });
13002
- let subagentResponse = runResult?.response ?? draft.assistantResponse;
13060
+ let subagentResponse = (runResult?.response ?? draft.assistantResponse ?? "").trim();
13003
13061
  if (!subagentResponse) {
13004
13062
  const freshSubConv = await this.conversationStore.get(childConversationId);
13005
13063
  if (freshSubConv) {
13006
- const lastAssistant = [...freshSubConv.messages].reverse().find((m) => m.role === "assistant");
13007
- if (lastAssistant && typeof lastAssistant.content === "string") {
13008
- subagentResponse = lastAssistant.content;
13009
- }
13064
+ subagentResponse = lastAssistantText(freshSubConv.messages);
13010
13065
  }
13011
13066
  }
13012
13067
  const pendingResult = {
@@ -13095,8 +13150,10 @@ var AgentOrchestrator = class {
13095
13150
  const callbackCount = (conversation.subagentCallbackCount ?? 0) + 1;
13096
13151
  conversation.subagentCallbackCount = callbackCount;
13097
13152
  for (const pr of pendingResults) {
13153
+ const responseText = (pr.result?.response ?? "").trim();
13154
+ const responseLine = responseText || `(subagent produced no final summary after ${pr.result?.steps ?? 0} step(s); its work may be incomplete. Call read_subagent with subagent_id "${pr.subagentId}" and mode "assistant" to retrieve what it did.)`;
13098
13155
  const resultBody = pr.result ? `Status: ${pr.result.status}
13099
- Response: ${pr.result.response ?? "(no response)"}
13156
+ Response: ${responseLine}
13100
13157
  Steps: ${pr.result.steps}, Duration: ${pr.result.duration}ms` : pr.error ? `Error: ${pr.error.message}` : "(no result)";
13101
13158
  conversation.messages.push({
13102
13159
  role: "user",
@@ -13348,14 +13405,11 @@ ${resultBody}`,
13348
13405
  subagentId: conversationId,
13349
13406
  conversationId
13350
13407
  });
13351
- let subagentResponse = runResult?.response ?? draft.assistantResponse;
13408
+ let subagentResponse = (runResult?.response ?? draft.assistantResponse ?? "").trim();
13352
13409
  if (!subagentResponse) {
13353
13410
  const freshSubConv = await this.conversationStore.get(conversationId);
13354
13411
  if (freshSubConv) {
13355
- const lastAssistant = [...freshSubConv.messages].reverse().find((m) => m.role === "assistant");
13356
- if (lastAssistant) {
13357
- subagentResponse = typeof lastAssistant.content === "string" ? lastAssistant.content : "";
13358
- }
13412
+ subagentResponse = lastAssistantText(freshSubConv.messages);
13359
13413
  }
13360
13414
  }
13361
13415
  const parentConv = await this.conversationStore.get(parentConversationId);
@@ -14048,6 +14102,7 @@ export {
14048
14102
  getPonchoStoreRoot,
14049
14103
  isMessageArray,
14050
14104
  jsonSchemaToZod,
14105
+ lastAssistantText,
14051
14106
  loadCanonicalHistory,
14052
14107
  loadPonchoConfig,
14053
14108
  loadRunHistory,
@@ -89,6 +89,8 @@ function createIsolateRuntime(config) {
89
89
  }
90
90
  const t0 = performance.now();
91
91
  let context;
92
+ let timedOut = false;
93
+ let wallTimer;
92
94
  try {
93
95
  context = await isolate.createContext();
94
96
  const jail = context.global;
@@ -121,12 +123,29 @@ function createIsolateRuntime(config) {
121
123
  const wrapped = `(async () => {
122
124
  ${code}
123
125
  })()`;
124
- const rawResult = await context.eval(wrapped, {
126
+ const evalPromise = context.eval(wrapped, {
125
127
  filename: "<user-code>",
126
128
  promise: true,
127
129
  copy: true,
128
130
  timeout: config.timeout
129
131
  });
132
+ const rawResult = config.timeout > 0 ? await Promise.race([
133
+ evalPromise,
134
+ new Promise((_resolve, reject) => {
135
+ wallTimer = setTimeout(() => {
136
+ timedOut = true;
137
+ try {
138
+ isolate.dispose();
139
+ } catch {
140
+ }
141
+ reject(new Error("Execution timed out"));
142
+ }, config.timeout);
143
+ })
144
+ ]) : await evalPromise;
145
+ if (wallTimer) {
146
+ clearTimeout(wallTimer);
147
+ wallTimer = void 0;
148
+ }
130
149
  const stdout = await context.eval("__stdout.join('\\n')", { copy: true });
131
150
  const stderr = await context.eval("__stderr.join('\\n')", { copy: true });
132
151
  let result;
@@ -151,6 +170,17 @@ ${code}
151
170
  executionTimeMs: elapsed
152
171
  };
153
172
  }
173
+ if (timedOut) {
174
+ return {
175
+ stdout: "",
176
+ stderr: "",
177
+ error: {
178
+ message: `Execution timed out after ${config.timeout}ms`,
179
+ name: "TimeoutError"
180
+ },
181
+ executionTimeMs: elapsed
182
+ };
183
+ }
154
184
  let stdout = "";
155
185
  let stderr = "";
156
186
  if (context) {
@@ -169,6 +199,7 @@ ${code}
169
199
  executionTimeMs: elapsed
170
200
  };
171
201
  } finally {
202
+ if (wallTimer) clearTimeout(wallTimer);
172
203
  if (abortHandler && signal) {
173
204
  signal.removeEventListener("abort", abortHandler);
174
205
  }
@@ -927,50 +958,79 @@ var POLYFILL_FETCH_STUB = `
927
958
  `;
928
959
  var POLYFILL_TIMERS = `
929
960
  // --- Timers polyfill ---
961
+ //
962
+ // The isolate has no host event loop, so real wall-clock delays can't be
963
+ // honoured. What we *can* do is drain pending timers on the microtask queue
964
+ // (which isolated-vm does pump while resolving the run's promise), firing
965
+ // them in order of their requested delay against a virtual clock. This makes
966
+ // the overwhelmingly common pattern \u2014 \`await new Promise(r => setTimeout(r, n))\`
967
+ // as a sleep \u2014 actually resolve instead of hanging the whole run forever.
968
+ // Delays collapse to "as soon as possible, in delay order"; that's the right
969
+ // trade for a sandbox with no real time. A runaway setInterval is bounded by
970
+ // __MAX_FIRES here and, ultimately, by the host-side wall-clock timeout.
930
971
  (function() {
931
972
  let __timerId = 0;
932
- const __timers = new Map();
973
+ const __timers = new Map(); // id -> { fn, due, type }
974
+ const __intervals = new Set(); // ids that should reschedule
975
+ let __vclock = 0; // virtual clock (ms)
976
+ let __draining = false;
977
+ let __fired = 0;
978
+ const __MAX_FIRES = 1000000; // backstop against a runaway interval
979
+
980
+ function __schedule(fn, delayMs, type, id) {
981
+ __timers.set(id, { fn, due: __vclock + delayMs, type });
982
+ if (!__draining) __drain();
983
+ return id;
984
+ }
985
+
986
+ function __drain() {
987
+ __draining = true;
988
+ const step = function() {
989
+ if (__timers.size === 0) { __draining = false; return; }
990
+ // Pick the earliest-due timer (ties broken by insertion id for FIFO).
991
+ let pick = null;
992
+ for (const [id, t] of __timers) {
993
+ if (pick === null || t.due < pick.t.due || (t.due === pick.t.due && id < pick.id)) {
994
+ pick = { id, t };
995
+ }
996
+ }
997
+ __timers.delete(pick.id);
998
+ if (pick.t.due > __vclock) __vclock = pick.t.due;
999
+ __fired++;
1000
+ try { pick.t.fn(); } catch (e) { /* host timers swallow callback throws */ }
1001
+ if (__fired > __MAX_FIRES) { __draining = false; return; }
1002
+ Promise.resolve().then(step);
1003
+ };
1004
+ Promise.resolve().then(step);
1005
+ }
933
1006
 
934
1007
  globalThis.setTimeout = function(fn, delay) {
935
1008
  const id = ++__timerId;
936
1009
  const ms = Math.max(0, Number(delay) || 0);
937
- const start = Date.now();
938
- __timers.set(id, { fn, ms, start, type: "timeout" });
939
- // In the isolate, setTimeout returns the id but the callback is
940
- // executed via a polling mechanism in the async wrapper.
941
- // For simple cases (delay=0), we can use a microtask.
942
- if (ms === 0) {
943
- Promise.resolve().then(() => {
944
- if (__timers.has(id)) {
945
- __timers.delete(id);
946
- fn();
947
- }
948
- });
949
- }
950
- return id;
1010
+ return __schedule(typeof fn === "function" ? fn : function() {}, ms, "timeout", id);
951
1011
  };
952
1012
 
953
1013
  globalThis.clearTimeout = function(id) {
954
1014
  __timers.delete(id);
1015
+ __intervals.delete(id);
955
1016
  };
956
1017
 
957
1018
  globalThis.setInterval = function(fn, delay) {
958
1019
  const id = ++__timerId;
959
1020
  const ms = Math.max(1, Number(delay) || 1);
960
- const wrapper = () => {
961
- if (!__timers.has(id)) return;
962
- fn();
963
- if (__timers.has(id)) {
964
- globalThis.setTimeout(wrapper, ms);
1021
+ __intervals.add(id);
1022
+ const tick = function() {
1023
+ if (!__intervals.has(id)) return;
1024
+ try { fn(); } finally {
1025
+ if (__intervals.has(id)) __schedule(tick, ms, "interval", id);
965
1026
  }
966
1027
  };
967
- __timers.set(id, { fn: wrapper, ms, type: "interval" });
968
- globalThis.setTimeout(wrapper, ms);
969
- return id;
1028
+ return __schedule(tick, ms, "interval", id);
970
1029
  };
971
1030
 
972
1031
  globalThis.clearInterval = function(id) {
973
1032
  __timers.delete(id);
1033
+ __intervals.delete(id);
974
1034
  };
975
1035
 
976
1036
  // queueMicrotask if not available
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@poncho-ai/harness",
3
- "version": "0.50.3",
3
+ "version": "0.50.4",
4
4
  "description": "Agent execution runtime - conversation loop, tool dispatch, streaming",
5
5
  "repository": {
6
6
  "type": "git",
package/src/harness.ts CHANGED
@@ -159,6 +159,16 @@ const now = (): number => Date.now();
159
159
  const FIRST_CHUNK_TIMEOUT_MS = 90_000; // 90s to receive the first chunk from the model
160
160
  const MAX_TRANSIENT_STEP_RETRIES = 1;
161
161
  const COMPACTION_CHECK_INTERVAL_STEPS = 3;
162
+ // Injected as a trailing user turn on the final allowed step, with tools
163
+ // disabled, so a step-exhausted run produces a text summary instead of
164
+ // terminating on a dangling tool call (which surfaces to a parent agent as
165
+ // an empty "(no response)" subagent result). See the `isFinalStep` branch in
166
+ // the run loop.
167
+ const FINAL_STEP_SUMMARY_PROMPT =
168
+ "You have reached the maximum number of steps for this run and cannot call " +
169
+ "any more tools. Do NOT attempt any tool calls. Using only the work you have " +
170
+ "already done, write your final response now: summarize what you found or " +
171
+ "accomplished, include any concrete results, and flag anything left unfinished.";
162
172
  const TOOL_RESULT_ARCHIVE_PARAM = "__toolResultArchive";
163
173
  const TOOL_RESULT_TRUNCATED_PREFIX = "[TRUNCATED_TOOL_RESULT]";
164
174
  const TOOL_RESULT_PREVIEW_CHARS = 700;
@@ -2297,14 +2307,61 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
2297
2307
  };
2298
2308
  const isCancelled = (): boolean => input.abortSignal?.aborted === true;
2299
2309
  let cancellationEmitted = false;
2310
+ // The assistant turn for the current step, captured as it streams. The
2311
+ // assistant message + its tool results are only pushed to `messages`
2312
+ // *together*, after the tool batch finishes — so between "model streamed
2313
+ // a tool call" and "tools done" the turn lives only in these locals. If a
2314
+ // cancellation lands in that window we'd otherwise drop the whole turn
2315
+ // from the canonical history, leaving the next request with back-to-back
2316
+ // user messages and a model with no record of what it just said (the user
2317
+ // still sees it, since the display history is built separately). Cleared
2318
+ // once the turn is committed, and reset at the top of every step.
2319
+ let inflightTurn: {
2320
+ text: string;
2321
+ toolCalls: Array<{ id: string; name: string; input: Record<string, unknown> }>;
2322
+ } | null = null;
2300
2323
  const emitCancellation = (): AgentEvent => {
2301
2324
  cancellationEmitted = true;
2302
2325
  // Snapshot the in-flight messages so the orchestrator can persist them
2303
- // as the canonical history. Drop a trailing assistant tool_use message
2304
- // that has no matching tool result — sending that to the API on the next
2305
- // turn would be rejected.
2306
- const snapshot = trimToValidPrefix([...messages]);
2307
- return pushEvent({ type: "run:cancelled", runId, messages: snapshot });
2326
+ // as the canonical history.
2327
+ const snapshot: Message[] = [...messages];
2328
+ // Re-attach the in-flight assistant turn (if any). Synthesize a
2329
+ // tool_result for every pending tool_use so the turn is a valid prefix —
2330
+ // an assistant tool_use with no following tool result is rejected by the
2331
+ // API on the next turn, which is exactly why a naive snapshot drops it.
2332
+ if (inflightTurn && (inflightTurn.text.length > 0 || inflightTurn.toolCalls.length > 0)) {
2333
+ const hasToolCalls = inflightTurn.toolCalls.length > 0;
2334
+ const assistantContent = hasToolCalls
2335
+ ? JSON.stringify({
2336
+ text: inflightTurn.text,
2337
+ tool_calls: inflightTurn.toolCalls.map((tc) => ({
2338
+ id: tc.id,
2339
+ name: tc.name,
2340
+ input: tc.input,
2341
+ })),
2342
+ })
2343
+ : inflightTurn.text;
2344
+ snapshot.push({
2345
+ role: "assistant",
2346
+ content: assistantContent,
2347
+ metadata: { timestamp: now(), id: randomUUID(), runId },
2348
+ });
2349
+ if (hasToolCalls) {
2350
+ const cancelledResults = inflightTurn.toolCalls.map((tc) => ({
2351
+ type: "tool_result" as const,
2352
+ tool_use_id: tc.id,
2353
+ tool_name: tc.name,
2354
+ content: "Tool execution cancelled by user.",
2355
+ }));
2356
+ snapshot.push({
2357
+ role: "tool",
2358
+ content: JSON.stringify(cancelledResults),
2359
+ metadata: { timestamp: now(), id: randomUUID(), runId },
2360
+ });
2361
+ }
2362
+ }
2363
+ // Defensive: drop any trailing dangling tool_use we didn't pair above.
2364
+ return pushEvent({ type: "run:cancelled", runId, messages: trimToValidPrefix(snapshot) });
2308
2365
  };
2309
2366
 
2310
2367
  const resolvedModelName = agent.frontmatter.model?.name ?? "claude-opus-4-5";
@@ -2424,6 +2481,7 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
2424
2481
  let convertedUpTo = 0;
2425
2482
 
2426
2483
  for (let step = 1; step <= maxSteps; step += 1) {
2484
+ inflightTurn = null;
2427
2485
  try {
2428
2486
  yield* drainBrowserEvents();
2429
2487
  if (isCancelled()) {
@@ -2883,12 +2941,24 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
2883
2941
 
2884
2942
  const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
2885
2943
 
2944
+ // On the last permitted step, force a closing text turn: strip the
2945
+ // tools so the model cannot start another tool call it has no step
2946
+ // left to resolve, and append a one-shot nudge instructing it to
2947
+ // summarize. This is what keeps a step-exhausted run (very common in
2948
+ // subagents) from ending on a dangling tool call that a parent would
2949
+ // see as an empty result. The nudge is appended only to this model
2950
+ // request — it is never written into `messages`/history.
2951
+ const isFinalStep = step === maxSteps;
2952
+ const toolsForStep = isFinalStep ? {} : tools;
2953
+ const messagesForStep: ModelMessage[] = isFinalStep
2954
+ ? [...finalMessages, { role: "user", content: FINAL_STEP_SUMMARY_PROMPT }]
2955
+ : finalMessages;
2886
2956
 
2887
2957
  const result = await streamText({
2888
2958
  model: modelInstance,
2889
2959
  ...(useStaticCache ? {} : { system: systemPrompt }),
2890
- messages: finalMessages,
2891
- tools,
2960
+ messages: messagesForStep,
2961
+ tools: toolsForStep,
2892
2962
  temperature,
2893
2963
  abortSignal: input.abortSignal,
2894
2964
  ...(typeof maxTokens === "number" ? { maxTokens } : {}),
@@ -3026,6 +3096,11 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
3026
3096
  return;
3027
3097
  }
3028
3098
 
3099
+ // The model finished streaming this step's text. Capture it so a
3100
+ // cancellation from here on persists what the user already saw; the
3101
+ // tool calls are attached once they're parsed below.
3102
+ inflightTurn = { text: fullText, toolCalls: [] };
3103
+
3029
3104
  if (isCancelled()) {
3030
3105
  yield emitCancellation();
3031
3106
  return;
@@ -3135,6 +3210,7 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
3135
3210
  name: tc.toolName,
3136
3211
  input: (tc as any).input as Record<string, unknown>,
3137
3212
  }));
3213
+ if (inflightTurn) inflightTurn.toolCalls = toolCalls;
3138
3214
 
3139
3215
  if (toolCalls.length === 0) {
3140
3216
  // Detect silent empty responses — likely an SDK or model
@@ -3593,6 +3669,9 @@ Code is wrapped in an async IIFE — use \`return\` to return a value to the too
3593
3669
  content: JSON.stringify(toolResultsForModel),
3594
3670
  metadata: toolMsgMeta as Message["metadata"],
3595
3671
  });
3672
+ // Turn is now committed to `messages`; a later cancellation must not
3673
+ // re-append it from the in-flight holder.
3674
+ inflightTurn = null;
3596
3675
 
3597
3676
  // Post-tool-execution soft deadline: long-running tool batches (e.g.
3598
3677
  // multiple web_search calls) can push past the deadline. Checkpoint
@@ -610,50 +610,79 @@ const POLYFILL_FETCH_STUB = `
610
610
 
611
611
  const POLYFILL_TIMERS = `
612
612
  // --- Timers polyfill ---
613
+ //
614
+ // The isolate has no host event loop, so real wall-clock delays can't be
615
+ // honoured. What we *can* do is drain pending timers on the microtask queue
616
+ // (which isolated-vm does pump while resolving the run's promise), firing
617
+ // them in order of their requested delay against a virtual clock. This makes
618
+ // the overwhelmingly common pattern — \`await new Promise(r => setTimeout(r, n))\`
619
+ // as a sleep — actually resolve instead of hanging the whole run forever.
620
+ // Delays collapse to "as soon as possible, in delay order"; that's the right
621
+ // trade for a sandbox with no real time. A runaway setInterval is bounded by
622
+ // __MAX_FIRES here and, ultimately, by the host-side wall-clock timeout.
613
623
  (function() {
614
624
  let __timerId = 0;
615
- const __timers = new Map();
625
+ const __timers = new Map(); // id -> { fn, due, type }
626
+ const __intervals = new Set(); // ids that should reschedule
627
+ let __vclock = 0; // virtual clock (ms)
628
+ let __draining = false;
629
+ let __fired = 0;
630
+ const __MAX_FIRES = 1000000; // backstop against a runaway interval
631
+
632
+ function __schedule(fn, delayMs, type, id) {
633
+ __timers.set(id, { fn, due: __vclock + delayMs, type });
634
+ if (!__draining) __drain();
635
+ return id;
636
+ }
637
+
638
+ function __drain() {
639
+ __draining = true;
640
+ const step = function() {
641
+ if (__timers.size === 0) { __draining = false; return; }
642
+ // Pick the earliest-due timer (ties broken by insertion id for FIFO).
643
+ let pick = null;
644
+ for (const [id, t] of __timers) {
645
+ if (pick === null || t.due < pick.t.due || (t.due === pick.t.due && id < pick.id)) {
646
+ pick = { id, t };
647
+ }
648
+ }
649
+ __timers.delete(pick.id);
650
+ if (pick.t.due > __vclock) __vclock = pick.t.due;
651
+ __fired++;
652
+ try { pick.t.fn(); } catch (e) { /* host timers swallow callback throws */ }
653
+ if (__fired > __MAX_FIRES) { __draining = false; return; }
654
+ Promise.resolve().then(step);
655
+ };
656
+ Promise.resolve().then(step);
657
+ }
616
658
 
617
659
  globalThis.setTimeout = function(fn, delay) {
618
660
  const id = ++__timerId;
619
661
  const ms = Math.max(0, Number(delay) || 0);
620
- const start = Date.now();
621
- __timers.set(id, { fn, ms, start, type: "timeout" });
622
- // In the isolate, setTimeout returns the id but the callback is
623
- // executed via a polling mechanism in the async wrapper.
624
- // For simple cases (delay=0), we can use a microtask.
625
- if (ms === 0) {
626
- Promise.resolve().then(() => {
627
- if (__timers.has(id)) {
628
- __timers.delete(id);
629
- fn();
630
- }
631
- });
632
- }
633
- return id;
662
+ return __schedule(typeof fn === "function" ? fn : function() {}, ms, "timeout", id);
634
663
  };
635
664
 
636
665
  globalThis.clearTimeout = function(id) {
637
666
  __timers.delete(id);
667
+ __intervals.delete(id);
638
668
  };
639
669
 
640
670
  globalThis.setInterval = function(fn, delay) {
641
671
  const id = ++__timerId;
642
672
  const ms = Math.max(1, Number(delay) || 1);
643
- const wrapper = () => {
644
- if (!__timers.has(id)) return;
645
- fn();
646
- if (__timers.has(id)) {
647
- globalThis.setTimeout(wrapper, ms);
673
+ __intervals.add(id);
674
+ const tick = function() {
675
+ if (!__intervals.has(id)) return;
676
+ try { fn(); } finally {
677
+ if (__intervals.has(id)) __schedule(tick, ms, "interval", id);
648
678
  }
649
679
  };
650
- __timers.set(id, { fn: wrapper, ms, type: "interval" });
651
- globalThis.setTimeout(wrapper, ms);
652
- return id;
680
+ return __schedule(tick, ms, "interval", id);
653
681
  };
654
682
 
655
683
  globalThis.clearInterval = function(id) {
656
684
  __timers.delete(id);
685
+ __intervals.delete(id);
657
686
  };
658
687
 
659
688
  // queueMicrotask if not available
@@ -153,6 +153,14 @@ export function createIsolateRuntime(config: {
153
153
  const t0 = performance.now();
154
154
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
155
155
  let context: any;
156
+ // Wall-clock guard. isolated-vm's `timeout` option only bounds the
157
+ // *synchronous* portion of an eval; when the script returns a promise
158
+ // (which ours always does — it's an async IIFE) a never-settling promise
159
+ // would hang here forever (e.g. `await new Promise(() => {})`, or a
160
+ // bound host call that never resolves). Race the eval against a host
161
+ // timer that disposes the isolate, so `timeLimit` bounds total execution.
162
+ let timedOut = false;
163
+ let wallTimer: ReturnType<typeof setTimeout> | undefined;
156
164
  try {
157
165
  context = await isolate.createContext();
158
166
  const jail = context.global;
@@ -197,12 +205,35 @@ export function createIsolateRuntime(config: {
197
205
  // (context.eval + promise option handles Reference.apply resolution
198
206
  // correctly, unlike compileScript().run())
199
207
  const wrapped = `(async () => {\n${code}\n})()`;
200
- const rawResult = await context.eval(wrapped, {
208
+ const evalPromise = context.eval(wrapped, {
201
209
  filename: "<user-code>",
202
210
  promise: true,
203
211
  copy: true,
204
212
  timeout: config.timeout,
205
213
  });
214
+ const rawResult =
215
+ config.timeout > 0
216
+ ? await Promise.race([
217
+ evalPromise,
218
+ new Promise((_resolve, reject) => {
219
+ wallTimer = setTimeout(() => {
220
+ timedOut = true;
221
+ // Disposing rejects the pending eval; this reject is the
222
+ // one that wins the race when the promise never settles.
223
+ try {
224
+ isolate.dispose();
225
+ } catch {
226
+ /* already disposed */
227
+ }
228
+ reject(new Error("Execution timed out"));
229
+ }, config.timeout);
230
+ }),
231
+ ])
232
+ : await evalPromise;
233
+ if (wallTimer) {
234
+ clearTimeout(wallTimer);
235
+ wallTimer = undefined;
236
+ }
206
237
 
207
238
  // Read captured stdout/stderr from isolate
208
239
  const stdout = (await context.eval("__stdout.join('\\n')", { copy: true })) as string;
@@ -237,6 +268,18 @@ export function createIsolateRuntime(config: {
237
268
  };
238
269
  }
239
270
 
271
+ if (timedOut) {
272
+ return {
273
+ stdout: "",
274
+ stderr: "",
275
+ error: {
276
+ message: `Execution timed out after ${config.timeout}ms`,
277
+ name: "TimeoutError",
278
+ },
279
+ executionTimeMs: elapsed,
280
+ };
281
+ }
282
+
240
283
  // Try to recover stdout/stderr captured before the error
241
284
  let stdout = "";
242
285
  let stderr = "";
@@ -258,6 +301,7 @@ export function createIsolateRuntime(config: {
258
301
  executionTimeMs: elapsed,
259
302
  };
260
303
  } finally {
304
+ if (wallTimer) clearTimeout(wallTimer);
261
305
  if (abortHandler && signal) {
262
306
  signal.removeEventListener("abort", abortHandler);
263
307
  }
@@ -46,6 +46,7 @@ export {
46
46
 
47
47
  export {
48
48
  AgentOrchestrator,
49
+ lastAssistantText,
49
50
  type ActiveConversationRun,
50
51
  type EventSink,
51
52
  type OrchestratorHooks,
@@ -1,4 +1,4 @@
1
- import type { AgentEvent, Message } from "@poncho-ai/sdk";
1
+ import { getTextContent, type AgentEvent, type Message } from "@poncho-ai/sdk";
2
2
  import type { Conversation, ConversationStore, PendingSubagentResult } from "../state.js";
3
3
  import type { AgentHarness } from "../harness.js";
4
4
  import type { TelemetryEmitter } from "../telemetry.js";
@@ -28,6 +28,45 @@ import {
28
28
  STALE_SUBAGENT_THRESHOLD_MS,
29
29
  } from "./subagents.js";
30
30
 
31
+ // ── Subagent result extraction ──
32
+
33
+ /**
34
+ * Pull the human-readable text out of a single assistant message.
35
+ *
36
+ * Beyond the `string | ContentPart[]` shapes `getTextContent` handles, the
37
+ * harness serializes an assistant turn that ALSO made tool calls as a JSON
38
+ * string `{"text":"...","tool_calls":[...]}` (see the run loop's
39
+ * `assistantContent`). A naive `typeof content === "string"` read would hand
40
+ * that raw JSON blob back as the "response"; here we unwrap it to its `.text`.
41
+ */
42
+ const assistantMessageText = (message: Message): string => {
43
+ const raw = getTextContent(message).trim();
44
+ if (raw.startsWith("{") && raw.includes("\"tool_calls\"")) {
45
+ try {
46
+ const parsed = JSON.parse(raw) as { text?: unknown };
47
+ if (typeof parsed.text === "string") return parsed.text.trim();
48
+ } catch {
49
+ // Not the envelope we expected — fall through to the raw string.
50
+ }
51
+ }
52
+ return raw;
53
+ };
54
+
55
+ /**
56
+ * Find the last non-empty assistant text in a subagent transcript. Walking
57
+ * backwards (rather than reading only the final message) means a subagent
58
+ * that ended on a tool-call turn still yields the prose it produced just
59
+ * before — instead of surfacing to the parent as an empty result.
60
+ */
61
+ export const lastAssistantText = (messages: Message[]): string => {
62
+ for (let i = messages.length - 1; i >= 0; i -= 1) {
63
+ if (messages[i].role !== "assistant") continue;
64
+ const text = assistantMessageText(messages[i]);
65
+ if (text) return text;
66
+ }
67
+ return "";
68
+ };
69
+
31
70
  // ── Types ──
32
71
 
33
72
  export type ActiveConversationRun = {
@@ -933,14 +972,11 @@ export class AgentOrchestrator {
933
972
  conversationId: childConversationId,
934
973
  });
935
974
 
936
- let subagentResponse = runResult?.response ?? draft.assistantResponse;
975
+ let subagentResponse = (runResult?.response ?? draft.assistantResponse ?? "").trim();
937
976
  if (!subagentResponse) {
938
977
  const freshSubConv = await this.conversationStore.get(childConversationId);
939
978
  if (freshSubConv) {
940
- const lastAssistant = [...freshSubConv.messages].reverse().find(m => m.role === "assistant");
941
- if (lastAssistant && typeof lastAssistant.content === "string") {
942
- subagentResponse = lastAssistant.content;
943
- }
979
+ subagentResponse = lastAssistantText(freshSubConv.messages);
944
980
  }
945
981
  }
946
982
  const pendingResult: PendingSubagentResult = {
@@ -1040,8 +1076,16 @@ export class AgentOrchestrator {
1040
1076
  conversation.subagentCallbackCount = callbackCount;
1041
1077
 
1042
1078
  for (const pr of pendingResults) {
1079
+ // An empty response is recoverable, not a dead end: the subagent's work
1080
+ // lives in its transcript even when it produced no closing summary (e.g.
1081
+ // it ran out of steps mid-task). Hand the parent an actionable pointer
1082
+ // instead of a silent "(no response)" it can't act on.
1083
+ const responseText = (pr.result?.response ?? "").trim();
1084
+ const responseLine = responseText
1085
+ || `(subagent produced no final summary after ${pr.result?.steps ?? 0} step(s); its work may be incomplete. `
1086
+ + `Call read_subagent with subagent_id "${pr.subagentId}" and mode "assistant" to retrieve what it did.)`;
1043
1087
  const resultBody = pr.result
1044
- ? `Status: ${pr.result.status}\nResponse: ${pr.result.response ?? "(no response)"}\nSteps: ${pr.result.steps}, Duration: ${pr.result.duration}ms`
1088
+ ? `Status: ${pr.result.status}\nResponse: ${responseLine}\nSteps: ${pr.result.steps}, Duration: ${pr.result.duration}ms`
1045
1089
  : pr.error
1046
1090
  ? `Error: ${pr.error.message}`
1047
1091
  : "(no result)";
@@ -1322,14 +1366,11 @@ export class AgentOrchestrator {
1322
1366
  conversationId,
1323
1367
  });
1324
1368
 
1325
- let subagentResponse = runResult?.response ?? draft.assistantResponse;
1369
+ let subagentResponse = (runResult?.response ?? draft.assistantResponse ?? "").trim();
1326
1370
  if (!subagentResponse) {
1327
1371
  const freshSubConv = await this.conversationStore.get(conversationId);
1328
1372
  if (freshSubConv) {
1329
- const lastAssistant = [...freshSubConv.messages].reverse().find(m => m.role === "assistant");
1330
- if (lastAssistant) {
1331
- subagentResponse = typeof lastAssistant.content === "string" ? lastAssistant.content : "";
1332
- }
1373
+ subagentResponse = lastAssistantText(freshSubConv.messages);
1333
1374
  }
1334
1375
  }
1335
1376
 
@@ -1,8 +1,10 @@
1
1
  import { describe, expect, it } from "vitest";
2
2
  import { createIsolateRuntime } from "../src/isolate/runtime.js";
3
+ import { buildPolyfillPreamble } from "../src/isolate/polyfills.js";
3
4
  import type { IsolateBinding } from "../src/config.js";
4
5
 
5
6
  const DEFAULT_CONFIG = { memoryLimit: 64, timeout: 5000, outputLimit: 65536 };
7
+ const POLYFILLS = buildPolyfillPreamble(false);
6
8
 
7
9
  describe("IsolateRuntime", () => {
8
10
  it("executes basic JavaScript and returns a result", async () => {
@@ -136,6 +138,79 @@ describe("IsolateRuntime", () => {
136
138
  });
137
139
  });
138
140
 
141
+ describe("IsolateRuntime timers + wall-clock", () => {
142
+ it("resolves a non-zero setTimeout sleep instead of hanging", async () => {
143
+ const runtime = createIsolateRuntime(DEFAULT_CONFIG);
144
+ const res = await runtime.execute(
145
+ `await new Promise(r => setTimeout(r, 50)); return "slept";`,
146
+ {},
147
+ null,
148
+ undefined,
149
+ POLYFILLS,
150
+ );
151
+
152
+ expect(res.error).toBeUndefined();
153
+ expect(res.result).toBe("slept");
154
+ });
155
+
156
+ it("runs awaited timers in delay order against the virtual clock", async () => {
157
+ const runtime = createIsolateRuntime(DEFAULT_CONFIG);
158
+ const res = await runtime.execute(
159
+ `const order = [];
160
+ async function at(ms, label) {
161
+ await new Promise(r => setTimeout(r, ms));
162
+ order.push(label);
163
+ }
164
+ await Promise.all([at(100, "a"), at(10, "b"), at(50, "c")]);
165
+ return order;`,
166
+ {},
167
+ null,
168
+ undefined,
169
+ POLYFILLS,
170
+ );
171
+
172
+ expect(res.error).toBeUndefined();
173
+ expect(res.result).toEqual(["b", "c", "a"]);
174
+ });
175
+
176
+ it("supports setInterval + clearInterval", async () => {
177
+ const runtime = createIsolateRuntime(DEFAULT_CONFIG);
178
+ const res = await runtime.execute(
179
+ `let n = 0;
180
+ await new Promise(resolve => {
181
+ const id = setInterval(() => {
182
+ n += 1;
183
+ if (n >= 3) { clearInterval(id); resolve(); }
184
+ }, 10);
185
+ });
186
+ return n;`,
187
+ {},
188
+ null,
189
+ undefined,
190
+ POLYFILLS,
191
+ );
192
+
193
+ expect(res.error).toBeUndefined();
194
+ expect(res.result).toBe(3);
195
+ });
196
+
197
+ it("times out a never-resolving promise via the wall-clock guard", async () => {
198
+ const runtime = createIsolateRuntime({ ...DEFAULT_CONFIG, timeout: 200 });
199
+ const start = performance.now();
200
+ const res = await runtime.execute(
201
+ `await new Promise(() => {}); return "never";`,
202
+ {},
203
+ null,
204
+ );
205
+
206
+ expect(res.error).toBeDefined();
207
+ expect(res.error!.message).toMatch(/timed out/i);
208
+ expect(res.error!.name).toBe("TimeoutError");
209
+ // Bounded by the wall clock, not hanging forever.
210
+ expect(performance.now() - start).toBeLessThan(2000);
211
+ });
212
+ });
213
+
139
214
  describe("IsolateRuntime bindings", () => {
140
215
  it("calls async bindings and returns results", async () => {
141
216
  const runtime = createIsolateRuntime(DEFAULT_CONFIG);
@@ -8,6 +8,7 @@ import {
8
8
  createTurnDraftState,
9
9
  recordStandardTurnEvent,
10
10
  executeConversationTurn,
11
+ lastAssistantText,
11
12
  } from "../src/orchestrator/index.js";
12
13
  import type { Conversation } from "../src/state.js";
13
14
 
@@ -174,3 +175,65 @@ describe("orchestrator helpers", () => {
174
175
  expect(seenTypes).toEqual(["run:started", "tool:started", "model:chunk", "run:completed"]);
175
176
  });
176
177
  });
178
+
179
+ describe("lastAssistantText (subagent result extraction)", () => {
180
+ it("returns a plain-string assistant message", () => {
181
+ const messages: Message[] = [
182
+ { role: "user", content: "find me 3 creators" },
183
+ { role: "assistant", content: "Here are 3 creators: ..." },
184
+ ];
185
+ expect(lastAssistantText(messages)).toBe("Here are 3 creators: ...");
186
+ });
187
+
188
+ it("unwraps the {text,tool_calls} envelope to its text", () => {
189
+ // How the run loop serializes an assistant turn that also called tools.
190
+ const envelope = JSON.stringify({
191
+ text: "Searching for candidates now.",
192
+ tool_calls: [{ id: "t1", name: "web_search", input: { q: "creators" } }],
193
+ });
194
+ const messages: Message[] = [{ role: "assistant", content: envelope }];
195
+ expect(lastAssistantText(messages)).toBe("Searching for candidates now.");
196
+ });
197
+
198
+ it("walks back past a trailing tool-call turn with no text", () => {
199
+ // The reported bug: subagent ends on a pure tool call (empty text), but it
200
+ // produced a real summary the turn before. We must surface that summary,
201
+ // not an empty string.
202
+ const toolOnly = JSON.stringify({
203
+ text: "",
204
+ tool_calls: [{ id: "t9", name: "web_search", input: { q: "x" } }],
205
+ });
206
+ const messages: Message[] = [
207
+ { role: "user", content: "go" },
208
+ { role: "assistant", content: "Found 12 candidates, here they are: ..." },
209
+ { role: "tool", content: "[]" },
210
+ { role: "assistant", content: toolOnly },
211
+ ];
212
+ expect(lastAssistantText(messages)).toBe("Found 12 candidates, here they are: ...");
213
+ });
214
+
215
+ it("extracts text from ContentPart[] content", () => {
216
+ const messages: Message[] = [
217
+ {
218
+ role: "assistant",
219
+ content: [
220
+ { type: "text", text: "part one" },
221
+ { type: "file", data: "Zm9v", mediaType: "image/png" },
222
+ { type: "text", text: " part two" },
223
+ ],
224
+ },
225
+ ];
226
+ expect(lastAssistantText(messages)).toBe("part one part two");
227
+ });
228
+
229
+ it("returns empty string when there is genuinely no assistant text", () => {
230
+ const messages: Message[] = [
231
+ { role: "user", content: "hi" },
232
+ {
233
+ role: "assistant",
234
+ content: JSON.stringify({ text: "", tool_calls: [{ id: "t1", name: "x", input: {} }] }),
235
+ },
236
+ ];
237
+ expect(lastAssistantText(messages)).toBe("");
238
+ });
239
+ });