npm - @poncho-ai/cli - Versions diffs - 0.30.0 → 0.30.2 - Mend

@poncho-ai/cli 0.30.0 → 0.30.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/.turbo/turbo-build.log +6 -6
package/CHANGELOG.md +28 -0
package/dist/{chunk-5OLH7U3C.js → chunk-FA546WPW.js} +703 -208
package/dist/cli.js +1 -1
package/dist/index.d.ts +10 -0
package/dist/index.js +1 -1
package/dist/{run-interactive-ink-EMTC7MK7.js → run-interactive-ink-FUMHN6DS.js} +1 -1
package/package.json +4 -4
package/src/index.ts +368 -110
package/src/web-ui-client.ts +241 -9
package/src/web-ui-styles.ts +50 -0

package/src/index.ts CHANGED Viewed

@@ -721,12 +721,16 @@ Set environment variables on your deployment platform:
 ANTHROPIC_API_KEY=sk-ant-...   # Required
 PONCHO_AUTH_TOKEN=your-secret  # Optional: protect your endpoint
 PONCHO_MAX_DURATION=55         # Optional: serverless timeout in seconds (enables auto-continuation)
+PONCHO_INTERNAL_SECRET=...     # Recommended on serverless: shared secret for internal callback auth
 \`\`\`
 When \`PONCHO_MAX_DURATION\` is set, the agent automatically checkpoints and resumes across
 request cycles when it approaches the platform timeout. The web UI and client SDK handle
 this transparently.
+For serverless deployments with subagents or background callbacks, use a shared state backend
+(\`upstash\`, \`redis\`, or \`dynamodb\`) instead of \`state.provider: 'local'\` / \`'memory'\`.
 ## Troubleshooting
 ### Vercel deploy issues
@@ -734,6 +738,7 @@ this transparently.
 - After upgrading \`@poncho-ai/cli\`, re-run \`poncho build vercel --force\` to refresh generated deploy files.
 - If Vercel fails during \`pnpm install\` due to a lockfile mismatch, run \`pnpm install --no-frozen-lockfile\` locally and commit \`pnpm-lock.yaml\`.
 - Deploy from the project root: \`vercel deploy --prod\`.
+- For subagents/background callbacks, set \`PONCHO_INTERNAL_SECRET\` and use non-local state storage.
 For full reference:
 https://github.com/cesr/poncho-ai
@@ -1391,6 +1396,11 @@ export type RequestHandler = ((
   _cronJobs?: Record<string, CronJobConfig>;
   _conversationStore?: ConversationStore;
   _messagingAdapters?: Map<string, MessagingAdapter>;
+  _activeConversationRuns?: Map<string, { ownerId: string; abortController: AbortController; runId: string | null }>;
+  _pendingCallbackNeeded?: Set<string>;
+  _processSubagentCallback?: (conversationId: string, skipLockCheck?: boolean) => Promise<void>;
+  _broadcastEvent?: (conversationId: string, event: AgentEvent) => void;
+  _finishConversationStream?: (conversationId: string) => void;
 };
 export const createRequestHandler = async (options?: {
@@ -1549,7 +1559,8 @@ export const createRequestHandler = async (options?: {
   await harness.initialize();
   const telemetry = new TelemetryEmitter(config?.telemetry);
   const identity = await ensureAgentIdentity(workingDir);
-  const conversationStore = createConversationStore(resolveStateConfig(config), {
+  const stateConfig = resolveStateConfig(config);
+  const conversationStore = createConversationStore(stateConfig, {
     workingDir,
     agentId: identity.id,
   });
@@ -1573,6 +1584,11 @@ export const createRequestHandler = async (options?: {
   };
   const pendingSubagentApprovals = new Map<string, PendingSubagentApproval>();
+  // Tracks approval decisions in memory so parallel batch requests don't
+  // race against the conversation store (each file-store read returns a
+  // separate copy, causing last-writer-wins when decisions overlap).
+  const approvalDecisionTracker = new Map<string, Map<string, boolean>>();
   const getSubagentDepth = async (conversationId: string): Promise<number> => {
     let depth = 0;
     let current = await conversationStore.get(conversationId);
@@ -1946,9 +1962,10 @@ export const createRequestHandler = async (options?: {
           try { await childHarness.shutdown(); } catch {}
           if (isServerless) {
-            selfFetchWithRetry(`/api/internal/subagent/${encodeURIComponent(childConversationId)}/run`, { continuation: true }).catch(err =>
+            const work = selfFetchWithRetry(`/api/internal/subagent/${encodeURIComponent(childConversationId)}/run`, { continuation: true }).catch(err =>
               console.error(`[poncho][subagent] Continuation self-fetch failed:`, err instanceof Error ? err.message : err),
             );
+            doWaitUntil(work);
           } else {
             runSubagent(childConversationId, parentConversationId, task, ownerId, true).catch(err =>
               console.error(`[poncho][subagent] Continuation failed:`, err instanceof Error ? err.message : err),
@@ -2042,8 +2059,15 @@ export const createRequestHandler = async (options?: {
   // ---------------------------------------------------------------------------
   const MAX_SUBAGENT_CALLBACK_COUNT = 20;
+  // Track conversations that received subagent results while a run was active.
+  // processSubagentCallback's finally block checks this to reliably re-trigger
+  // even if the store-level pendingSubagentResults was clobbered by a concurrent
+  // read-modify-write.
+  const pendingCallbackNeeded = new Set<string>();
   const triggerParentCallback = async (parentConversationId: string): Promise<void> => {
     if (activeConversationRuns.has(parentConversationId)) {
+      pendingCallbackNeeded.add(parentConversationId);
       return;
     }
     if (isServerless) {
@@ -2057,15 +2081,21 @@ export const createRequestHandler = async (options?: {
   const CALLBACK_LOCK_STALE_MS = 5 * 60 * 1000;
-  const processSubagentCallback = async (conversationId: string): Promise<void> => {
+  const processSubagentCallback = async (conversationId: string, skipLockCheck = false): Promise<void> => {
     const conversation = await conversationStore.get(conversationId);
     if (!conversation) return;
     const pendingResults = conversation.pendingSubagentResults ?? [];
-    if (pendingResults.length === 0) return;
-    // Store-based lock for serverless: skip if another invocation is processing
-    if (conversation.runningCallbackSince) {
+    const hasOrphanedContinuation = pendingResults.length === 0
+      && Array.isArray(conversation._continuationMessages)
+      && conversation._continuationMessages.length > 0
+      && !activeConversationRuns.has(conversationId);
+    if (pendingResults.length === 0 && !hasOrphanedContinuation) return;
+    // Store-based lock for serverless: skip if another invocation is processing.
+    // When re-triggered from a previous callback's finally block, skipLockCheck
+    // is true because we know the previous callback has finished.
+    if (!skipLockCheck && conversation.runningCallbackSince) {
       const elapsed = Date.now() - conversation.runningCallbackSince;
       if (elapsed < CALLBACK_LOCK_STALE_MS) {
         return;
@@ -2076,6 +2106,7 @@ export const createRequestHandler = async (options?: {
     // Acquire lock and clear pending
     conversation.pendingSubagentResults = [];
     conversation.runningCallbackSince = Date.now();
+    conversation.runStatus = "running";
     const callbackCount = (conversation.subagentCallbackCount ?? 0) + 1;
     conversation.subagentCallbackCount = callbackCount;
@@ -2097,11 +2128,13 @@ export const createRequestHandler = async (options?: {
     if (callbackCount > MAX_SUBAGENT_CALLBACK_COUNT) {
       console.warn(`[poncho][subagent-callback] Circuit breaker: ${callbackCount} callbacks for ${conversationId}, skipping re-run`);
       conversation.runningCallbackSince = undefined;
+      conversation.runStatus = "idle";
       await conversationStore.update(conversation);
       return;
     }
-    console.log(`[poncho][subagent-callback] Processing ${pendingResults.length} result(s) for ${conversationId} (callback #${callbackCount})`);
+    const isContinuationResume = hasOrphanedContinuation && pendingResults.length === 0;
+    console.log(`[poncho][subagent-callback] Processing ${pendingResults.length} result(s) for ${conversationId} (callback #${callbackCount})${isContinuationResume ? " (continuation resume)" : ""}`);
     const abortController = new AbortController();
     activeConversationRuns.set(conversationId, {
@@ -2109,12 +2142,29 @@ export const createRequestHandler = async (options?: {
       abortController,
       runId: null,
     });
+    // Reopen/reset the parent stream for this callback run so clients that stay
+    // on the main conversation can subscribe to live callback events.
+    const prevStream = conversationEventStreams.get(conversationId);
+    if (prevStream) {
+      prevStream.finished = false;
+      prevStream.buffer = [];
+    } else {
+      conversationEventStreams.set(conversationId, {
+        buffer: [],
+        subscribers: new Set(),
+        finished: false,
+      });
+    }
-    const historyMessages = [...conversation.messages];
+    const historyMessages = isContinuationResume && conversation._continuationMessages?.length
+      ? [...conversation._continuationMessages]
+      : [...conversation.messages];
     let assistantResponse = "";
     let latestRunId = "";
     let runContinuation = false;
     let runContinuationMessages: Message[] | undefined;
+    let runContextTokens = conversation.contextTokens ?? 0;
+    let runContextWindow = conversation.contextWindow ?? 0;
     const toolTimeline: string[] = [];
     const sections: Array<{ type: "text" | "tools"; content: string | string[] }> = [];
     let currentTools: string[] = [];
@@ -2170,6 +2220,8 @@ export const createRequestHandler = async (options?: {
           if (assistantResponse.length === 0 && event.result.response) {
             assistantResponse = event.result.response;
           }
+          runContextTokens = event.result.contextTokens ?? runContextTokens;
+          runContextWindow = event.result.contextWindow ?? runContextWindow;
           if (event.result.continuation) {
             runContinuation = true;
             if (event.result.continuationMessages) {
@@ -2200,6 +2252,9 @@ export const createRequestHandler = async (options?: {
           }
           freshConv.runtimeRunId = latestRunId || freshConv.runtimeRunId;
           freshConv.runningCallbackSince = undefined;
+          freshConv.runStatus = "idle";
+          if (runContextTokens > 0) freshConv.contextTokens = runContextTokens;
+          if (runContextWindow > 0) freshConv.contextWindow = runContextWindow;
           freshConv.updatedAt = Date.now();
           await conversationStore.update(freshConv);
@@ -2226,9 +2281,14 @@ export const createRequestHandler = async (options?: {
       // Handle continuation for the callback run itself
       if (runContinuation) {
         if (isServerless) {
-          selfFetchWithRetry(`/api/internal/conversations/${encodeURIComponent(conversationId)}/subagent-callback`).catch(err =>
+          const work = selfFetchWithRetry(`/api/internal/conversations/${encodeURIComponent(conversationId)}/subagent-callback`).catch(err =>
             console.error(`[poncho][subagent-callback] Continuation self-fetch failed:`, err instanceof Error ? err.message : err),
           );
+          doWaitUntil(work);
+        } else {
+          processSubagentCallback(conversationId, true).catch(err =>
+            console.error(`[poncho][subagent-callback] Continuation failed:`, err instanceof Error ? err.message : err),
+          );
         }
       }
     } catch (err) {
@@ -2236,30 +2296,50 @@ export const createRequestHandler = async (options?: {
       const errConv = await conversationStore.get(conversationId);
       if (errConv) {
         errConv.runningCallbackSince = undefined;
+        errConv.runStatus = "idle";
         await conversationStore.update(errConv);
       }
     } finally {
       activeConversationRuns.delete(conversationId);
       finishConversationStream(conversationId);
+      // Check both the in-memory flag (always reliable) and the store.
+      // We drain the flag first so a concurrent triggerParentCallback that
+      // sets it right after our delete above is still caught on the next
+      // iteration.
+      const hadDeferredTrigger = pendingCallbackNeeded.delete(conversationId);
       const freshConv = await conversationStore.get(conversationId);
-      if (freshConv) {
-        if (freshConv.runningCallbackSince) {
-          freshConv.runningCallbackSince = undefined;
-          await conversationStore.update(freshConv);
-        }
-      }
+      const hasPendingInStore = !!freshConv?.pendingSubagentResults?.length;
-      if (freshConv?.pendingSubagentResults?.length) {
+      if (hadDeferredTrigger || hasPendingInStore) {
+        // Re-trigger immediately. Skip the runningCallbackSince lock check
+        // because we know this callback just finished. The re-triggered
+        // callback will overwrite runningCallbackSince with its own timestamp.
         if (isServerless) {
           selfFetchWithRetry(`/api/internal/conversations/${encodeURIComponent(conversationId)}/subagent-callback`).catch(err =>
             console.error(`[poncho][subagent-callback] Recursive callback self-fetch failed:`, err instanceof Error ? err.message : err),
           );
         } else {
-          processSubagentCallback(conversationId).catch(err =>
+          processSubagentCallback(conversationId, true).catch(err =>
             console.error(`[poncho][subagent-callback] Recursive callback failed:`, err instanceof Error ? err.message : err),
           );
         }
+      } else if (freshConv?.runningCallbackSince) {
+        // No re-trigger needed. Use the atomic clearCallbackLock to avoid
+        // clobbering concurrent appendSubagentResult writes.
+        const afterClear = await conversationStore.clearCallbackLock(conversationId);
+        // Double-check: an append may have raced even the atomic clear
+        if (afterClear?.pendingSubagentResults?.length) {
+          if (isServerless) {
+            selfFetchWithRetry(`/api/internal/conversations/${encodeURIComponent(conversationId)}/subagent-callback`).catch(err =>
+              console.error(`[poncho][subagent-callback] Post-clear callback self-fetch failed:`, err instanceof Error ? err.message : err),
+            );
+          } else {
+            processSubagentCallback(conversationId, true).catch(err =>
+              console.error(`[poncho][subagent-callback] Post-clear callback failed:`, err instanceof Error ? err.message : err),
+            );
+          }
+        }
       }
     }
   };
@@ -2461,14 +2541,6 @@ export const createRequestHandler = async (options?: {
           if (active && active.abortController === abortController) {
             active.runId = event.runId;
           }
-          if (typeof event.contextWindow === "number" && event.contextWindow > 0) {
-            runContextWindow = event.contextWindow;
-          }
-        }
-        if (event.type === "model:response") {
-          if (typeof event.usage?.input === "number") {
-            runContextTokens = event.usage.input;
-          }
         }
         if (event.type === "model:chunk") {
           if (currentTools.length > 0) {
@@ -2533,12 +2605,12 @@ export const createRequestHandler = async (options?: {
           }
           checkpointedRun = true;
         }
-        if (
-          event.type === "run:completed" &&
-          assistantResponse.length === 0 &&
-          event.result.response
-        ) {
-          assistantResponse = event.result.response;
+        if (event.type === "run:completed") {
+          if (assistantResponse.length === 0 && event.result.response) {
+            assistantResponse = event.result.response;
+          }
+          runContextTokens = event.result.contextTokens ?? runContextTokens;
+          runContextWindow = event.result.contextWindow ?? runContextWindow;
         }
         if (event.type === "run:error") {
           assistantResponse = assistantResponse || `[Error: ${event.error.message}]`;
@@ -2627,6 +2699,15 @@ export const createRequestHandler = async (options?: {
       runConversations.delete(latestRunId);
     }
     console.log("[resume-run] complete for", conversationId);
+    // Check for pending subagent results that arrived during the run
+    const hadDeferred = pendingCallbackNeeded.delete(conversationId);
+    const postConv = await conversationStore.get(conversationId);
+    if (hadDeferred || postConv?.pendingSubagentResults?.length) {
+      processSubagentCallback(conversationId, true).catch(err =>
+        console.error(`[poncho][subagent-callback] Post-resume callback failed:`, err instanceof Error ? err.message : err),
+      );
+    }
   };
   // ---------------------------------------------------------------------------
@@ -2783,14 +2864,6 @@ export const createRequestHandler = async (options?: {
             latestRunId = event.runId;
             runOwners.set(event.runId, "local-owner");
             runConversations.set(event.runId, conversationId);
-            if (typeof event.contextWindow === "number" && event.contextWindow > 0) {
-              runContextWindow = event.contextWindow;
-            }
-          }
-          if (event.type === "model:response") {
-            if (typeof event.usage?.input === "number") {
-              runContextTokens = event.usage.input;
-            }
           }
           if (event.type === "model:chunk") {
             if (currentTools.length > 0) {
@@ -2892,6 +2965,8 @@ export const createRequestHandler = async (options?: {
             }
             runSteps = event.result.steps;
             if (typeof event.result.maxSteps === "number") runMaxSteps = event.result.maxSteps;
+            runContextTokens = event.result.contextTokens ?? runContextTokens;
+            runContextWindow = event.result.contextWindow ?? runContextWindow;
           }
           if (event.type === "run:error") {
             assistantResponse = assistantResponse || `[Error: ${event.error.message}]`;
@@ -2962,41 +3037,102 @@ export const createRequestHandler = async (options?: {
   }
   const isServerless = !!waitUntilHook;
-  const internalSecret = globalThis.crypto?.randomUUID?.() ?? `${Date.now()}-${Math.random()}`;
+  const configuredInternalSecret = process.env.PONCHO_INTERNAL_SECRET?.trim();
+  const vercelDeploymentSecret = process.env.VERCEL_DEPLOYMENT_ID?.trim();
+  const fallbackInternalSecret = globalThis.crypto?.randomUUID?.() ?? `${Date.now()}-${Math.random()}`;
+  const internalSecret = configuredInternalSecret || vercelDeploymentSecret || fallbackInternalSecret;
+  const isUsingEphemeralInternalSecret = !configuredInternalSecret && !vercelDeploymentSecret;
   let selfBaseUrl: string | null = process.env.VERCEL_URL
     ? `https://${process.env.VERCEL_URL}`
     : null;
+  if (!selfBaseUrl && process.env.VERCEL_PROJECT_PRODUCTION_URL) {
+    selfBaseUrl = `https://${process.env.VERCEL_PROJECT_PRODUCTION_URL}`;
+  }
+  if (!selfBaseUrl && process.env.PONCHO_SELF_BASE_URL) {
+    selfBaseUrl = process.env.PONCHO_SELF_BASE_URL.replace(/\/+$/, "");
+  }
+  if (isServerless && isUsingEphemeralInternalSecret) {
+    console.warn(
+      "[poncho][serverless] No stable internal secret found. Set PONCHO_INTERNAL_SECRET to avoid intermittent internal callback failures across instances.",
+    );
+  }
+  if (isServerless && !selfBaseUrl) {
+    console.warn(
+      "[poncho][serverless] No self base URL available. Set PONCHO_SELF_BASE_URL if internal background callbacks fail.",
+    );
+  }
+  const stateProvider = stateConfig?.provider ?? "local";
+  if (isServerless && (stateProvider === "local" || stateProvider === "memory")) {
+    console.warn(
+      `[poncho][serverless] state.provider="${stateProvider}" may lose cross-invocation state. Prefer "upstash", "redis", or "dynamodb" for subagents/reliability.`,
+    );
+  }
   const doWaitUntil = (promise: Promise<unknown>): void => {
     if (waitUntilHook) waitUntilHook(promise);
   };
-  const selfFetch = (path: string, body?: Record<string, unknown>): Promise<Response | void> => {
-    if (!selfBaseUrl) return Promise.resolve();
-    return fetch(`${selfBaseUrl}${path}`, {
-      method: "POST",
-      headers: {
-        "Content-Type": "application/json",
-        "x-poncho-internal": internalSecret,
-      },
-      body: body ? JSON.stringify(body) : undefined,
-    }).catch(err => {
-      console.error(`[poncho][self-fetch] Failed ${path}:`, err instanceof Error ? err.message : err);
-    }) as Promise<Response | void>;
-  };
   const selfFetchWithRetry = async (path: string, body?: Record<string, unknown>, retries = 3): Promise<Response | void> => {
+    if (!selfBaseUrl) {
+      console.error(`[poncho][self-fetch] Missing self base URL for ${path}`);
+      return;
+    }
+    let lastError: unknown;
     for (let attempt = 0; attempt < retries; attempt++) {
       try {
-        const result = await selfFetch(path, body);
-        return result;
+        const result = await fetch(`${selfBaseUrl}${path}`, {
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            "x-poncho-internal": internalSecret,
+          },
+          body: body ? JSON.stringify(body) : undefined,
+        });
+        if (result.ok) {
+          return result;
+        }
+        const responseText = await result.text().catch(() => "");
+        lastError = new Error(
+          `HTTP ${result.status}${responseText ? `: ${responseText.slice(0, 200)}` : ""}`,
+        );
       } catch (err) {
-        if (attempt === retries - 1) throw err;
-        await new Promise(r => setTimeout(r, 1000 * (attempt + 1)));
+        lastError = err;
+      }
+      if (attempt === retries - 1) {
+        break;
       }
+      await new Promise((resolveSleep) => setTimeout(resolveSleep, 1000 * (attempt + 1)));
+    }
+    if (lastError) {
+      console.error(
+        `[poncho][self-fetch] Failed ${path} after ${retries} attempt(s):`,
+        lastError instanceof Error ? lastError.message : String(lastError),
+      );
+      if (
+        lastError instanceof Error
+        && (lastError.message.includes("HTTP 403") || lastError.message.includes("HTTP 401"))
+      ) {
+        console.error(
+          "[poncho][self-fetch] Internal auth failed. Ensure all serverless instances share PONCHO_INTERNAL_SECRET.",
+        );
+      }
+    } else {
+      console.error(`[poncho][self-fetch] Failed ${path} after ${retries} attempt(s).`);
     }
   };
+  const getInternalRequestHeader = (headers: IncomingMessage["headers"]): string | undefined => {
+    const value = headers["x-poncho-internal"];
+    return Array.isArray(value) ? value[0] : value;
+  };
+  const isValidInternalRequest = (headers: IncomingMessage["headers"]): boolean => {
+    const headerValue = getInternalRequestHeader(headers);
+    return typeof headerValue === "string" && headerValue === internalSecret;
+  };
   const messagingAdapters = new Map<string, MessagingAdapter>();
   const messagingBridges: AgentBridge[] = [];
   if (config?.messaging && config.messaging.length > 0) {
@@ -3365,7 +3501,7 @@ export const createRequestHandler = async (options?: {
     // ── Internal endpoints (self-fetch only, secured by startup secret) ──
     if (pathname?.startsWith("/api/internal/") && request.method === "POST") {
-      if (request.headers["x-poncho-internal"] !== internalSecret) {
+      if (!isValidInternalRequest(request.headers)) {
         writeJson(response, 403, { code: "FORBIDDEN", message: "Internal endpoint" });
         return;
       }
@@ -3813,7 +3949,15 @@ export const createRequestHandler = async (options?: {
         return;
       }
-      // Record the decision on this approval entry
+      // Track decision in memory so parallel batch requests see a consistent
+      // view (file-store reads return independent copies, causing lost updates).
+      let batchDecisions = approvalDecisionTracker.get(conversationId);
+      if (!batchDecisions) {
+        batchDecisions = new Map();
+        approvalDecisionTracker.set(conversationId, batchDecisions);
+      }
+      batchDecisions.set(approvalId, approved);
       foundApproval.decision = approved ? "approved" : "denied";
       broadcastEvent(conversationId,
@@ -3823,16 +3967,26 @@ export const createRequestHandler = async (options?: {
       );
       const allApprovals = foundConversation.pendingApprovals ?? [];
-      const allDecided = allApprovals.length > 0 && allApprovals.every(a => a.decision != null);
+      const allDecided = allApprovals.length > 0 &&
+        allApprovals.every(a => batchDecisions!.has(a.approvalId));
       if (!allDecided) {
-        // Still waiting for more decisions — persist and respond
+        // Still waiting for more decisions — persist best-effort and respond.
+        // The write may be overwritten by a concurrent request, but that's
+        // fine: the in-memory tracker is the source of truth for completion.
         await conversationStore.update(foundConversation);
         writeJson(response, 200, { ok: true, approvalId, approved, batchComplete: false });
         return;
       }
-      // All approvals in the batch are decided — execute and resume
+      // All approvals in the batch are decided — apply tracked decisions,
+      // execute approved tools, and resume the run.
+      for (const a of allApprovals) {
+        const d = batchDecisions.get(a.approvalId);
+        if (d != null) a.decision = d ? "approved" : "denied";
+      }
+      approvalDecisionTracker.delete(conversationId);
       foundConversation.pendingApprovals = [];
       foundConversation.runStatus = "running";
       await conversationStore.update(foundConversation);
@@ -4101,14 +4255,21 @@ export const createRequestHandler = async (options?: {
             }
           }
         }
+        const hasPendingCallbackResults = Array.isArray(conversation.pendingSubagentResults)
+          && conversation.pendingSubagentResults.length > 0;
+        const needsContinuation = !hasActiveRun
+          && Array.isArray(conversation._continuationMessages)
+          && conversation._continuationMessages.length > 0;
         writeJson(response, 200, {
           conversation: {
             ...conversation,
             pendingApprovals: storedPending,
+            _continuationMessages: undefined,
           },
           subagentPendingApprovals: subagentPending,
-          hasActiveRun,
+          hasActiveRun: hasActiveRun || hasPendingCallbackResults,
           hasRunningSubagents,
+          needsContinuation,
         });
         return;
       }
@@ -4517,14 +4678,6 @@ export const createRequestHandler = async (options?: {
             if (active && active.abortController === abortController) {
               active.runId = event.runId;
             }
-            if (typeof event.contextWindow === "number" && event.contextWindow > 0) {
-              runContextWindow = event.contextWindow;
-            }
-          }
-          if (event.type === "model:response") {
-            if (typeof event.usage?.input === "number") {
-              runContextTokens = event.usage.input;
-            }
           }
           if (event.type === "run:cancelled") {
             runCancelled = true;
@@ -4623,6 +4776,8 @@ export const createRequestHandler = async (options?: {
             if (assistantResponse.length === 0 && event.result.response) {
               assistantResponse = event.result.response;
             }
+            runContextTokens = event.result.contextTokens ?? runContextTokens;
+            runContextWindow = event.result.contextWindow ?? runContextWindow;
             if (event.result.continuation && event.result.continuationMessages) {
               runContinuationMessages = event.result.continuationMessages;
               conversation._continuationMessages = runContinuationMessages;
@@ -4783,9 +4938,10 @@ export const createRequestHandler = async (options?: {
           // Already closed.
         }
         // Check for pending subagent results that arrived during the run
+        const hadDeferred = pendingCallbackNeeded.delete(conversationId);
         const freshConv = await conversationStore.get(conversationId);
-        if (freshConv?.pendingSubagentResults?.length) {
-          processSubagentCallback(conversationId).catch(err =>
+        if (hadDeferred || freshConv?.pendingSubagentResults?.length) {
+          processSubagentCallback(conversationId, true).catch(err =>
             console.error(`[poncho][subagent-callback] Post-run callback failed:`, err instanceof Error ? err.message : err),
           );
         }
@@ -4948,6 +5104,14 @@ export const createRequestHandler = async (options?: {
           );
         }
+        const convId = conversation.conversationId;
+        activeConversationRuns.set(convId, {
+          ownerId: conversation.ownerId,
+          abortController: new AbortController(),
+          runId: null,
+        });
+        try {
         const abortController = new AbortController();
         let assistantResponse = "";
         let latestRunId = "";
@@ -4955,7 +5119,7 @@ export const createRequestHandler = async (options?: {
         const sections: Array<{ type: "text" | "tools"; content: string | string[] }> = [];
         let currentTools: string[] = [];
         let currentText = "";
-        let runResult: { status: string; steps: number; continuation?: boolean } = {
+        let runResult: { status: string; steps: number; continuation?: boolean; contextTokens?: number; contextWindow?: number } = {
           status: "completed",
           steps: 0,
         };
@@ -4967,8 +5131,8 @@ export const createRequestHandler = async (options?: {
         for await (const event of harness.runWithTelemetry({
           task: cronJob.task,
-          conversationId: conversation.conversationId,
-          parameters: { __activeConversationId: conversation.conversationId },
+          conversationId: convId,
+          parameters: { __activeConversationId: convId },
           messages: historyMessages,
           abortSignal: abortController.signal,
         })) {
@@ -5010,14 +5174,19 @@ export const createRequestHandler = async (options?: {
               status: event.result.status,
               steps: event.result.steps,
               continuation: event.result.continuation,
+              contextTokens: event.result.contextTokens,
+              contextWindow: event.result.contextWindow,
             };
             if (!assistantResponse && event.result.response) {
               assistantResponse = event.result.response;
             }
           }
+          broadcastEvent(convId, event);
           await telemetry.emit(event);
         }
+        finishConversationStream(convId);
         if (currentTools.length > 0) {
           sections.push({ type: "tools", content: currentTools });
         }
@@ -5026,7 +5195,8 @@ export const createRequestHandler = async (options?: {
           currentText = "";
         }
-        // Persist the conversation
+        // Persist the conversation — read fresh state to avoid clobbering
+        // pendingSubagentResults appended during the run.
         const hasContent = assistantResponse.length > 0 || toolTimeline.length > 0;
         const assistantMetadata =
           toolTimeline.length > 0 || sections.length > 0
@@ -5044,14 +5214,19 @@ export const createRequestHandler = async (options?: {
             ? [{ role: "assistant" as const, content: assistantResponse, metadata: assistantMetadata }]
             : []),
         ];
-        conversation.messages = messages;
-        conversation.runtimeRunId = latestRunId || conversation.runtimeRunId;
-        conversation.updatedAt = Date.now();
-        await conversationStore.update(conversation);
+        const freshConv = await conversationStore.get(convId);
+        if (freshConv) {
+          freshConv.messages = messages;
+          freshConv.runtimeRunId = latestRunId || freshConv.runtimeRunId;
+          if (runResult.contextTokens) freshConv.contextTokens = runResult.contextTokens;
+          if (runResult.contextWindow) freshConv.contextWindow = runResult.contextWindow;
+          freshConv.updatedAt = Date.now();
+          await conversationStore.update(freshConv);
+        }
         // Self-continuation for serverless timeouts
         if (runResult.continuation && softDeadlineMs > 0) {
-          const selfUrl = `http://${request.headers.host ?? "localhost"}${pathname}?continue=${encodeURIComponent(conversation.conversationId)}&continuation=${continuationCount + 1}`;
+          const selfUrl = `http://${request.headers.host ?? "localhost"}${pathname}?continue=${encodeURIComponent(convId)}&continuation=${continuationCount + 1}`;
           try {
             const selfRes = await fetch(selfUrl, {
               method: "GET",
@@ -5061,7 +5236,7 @@ export const createRequestHandler = async (options?: {
             });
             const selfBody = await selfRes.json() as Record<string, unknown>;
             writeJson(response, 200, {
-              conversationId: conversation.conversationId,
+              conversationId: convId,
               status: "continued",
               continuations: continuationCount + 1,
               finalResult: selfBody,
@@ -5069,7 +5244,7 @@ export const createRequestHandler = async (options?: {
             });
           } catch (continueError) {
             writeJson(response, 200, {
-              conversationId: conversation.conversationId,
+              conversationId: convId,
               status: "continuation_failed",
               error: continueError instanceof Error ? continueError.message : "Unknown error",
               duration: Date.now() - start,
@@ -5080,12 +5255,28 @@ export const createRequestHandler = async (options?: {
         }
         writeJson(response, 200, {
-          conversationId: conversation.conversationId,
+          conversationId: convId,
           status: runResult.status,
           response: assistantResponse.slice(0, 500),
           duration: Date.now() - start,
           steps: runResult.steps,
         });
+        } finally {
+          activeConversationRuns.delete(convId);
+          const hadDeferred = pendingCallbackNeeded.delete(convId);
+          const checkConv = await conversationStore.get(convId);
+          if (hadDeferred || checkConv?.pendingSubagentResults?.length) {
+            if (isServerless) {
+              selfFetchWithRetry(`/api/internal/conversations/${encodeURIComponent(convId)}/subagent-callback`).catch(err =>
+                console.error(`[cron] subagent callback self-fetch failed:`, err instanceof Error ? err.message : err),
+              );
+            } else {
+              processSubagentCallback(convId, true).catch(err =>
+                console.error(`[cron] subagent callback failed:`, err instanceof Error ? err.message : err),
+              );
+            }
+          }
+        }
       } catch (error) {
         writeJson(response, 500, {
           code: "CRON_RUN_ERROR",
@@ -5101,6 +5292,11 @@ export const createRequestHandler = async (options?: {
   handler._cronJobs = cronJobs;
   handler._conversationStore = conversationStore;
   handler._messagingAdapters = messagingAdapters;
+  handler._activeConversationRuns = activeConversationRuns;
+  handler._pendingCallbackNeeded = pendingCallbackNeeded;
+  handler._processSubagentCallback = processSubagentCallback;
+  handler._broadcastEvent = broadcastEvent;
+  handler._finishConversationStream = finishConversationStream;
   // Recover stale subagent runs that were "running" when the server last stopped
   // or that have been inactive longer than the staleness threshold.
@@ -5169,6 +5365,8 @@ export const startDevServer = async (
     steps: number;
     assistantMetadata?: Message["metadata"];
     hasContent: boolean;
+    contextTokens: number;
+    contextWindow: number;
   };
   const runCronAgent = async (
@@ -5176,9 +5374,12 @@ export const startDevServer = async (
     task: string,
     conversationId: string,
     historyMessages: Message[],
+    onEvent?: (event: AgentEvent) => void,
   ): Promise<CronRunResult> => {
     let assistantResponse = "";
     let steps = 0;
+    let contextTokens = 0;
+    let contextWindow = 0;
     const toolTimeline: string[] = [];
     const sections: Array<{ type: "text" | "tools"; content: string | string[] }> = [];
     let currentTools: string[] = [];
@@ -5189,6 +5390,7 @@ export const startDevServer = async (
       parameters: { __activeConversationId: conversationId },
       messages: historyMessages,
     })) {
+      onEvent?.(event);
       if (event.type === "model:chunk") {
         if (currentTools.length > 0) {
           sections.push({ type: "tools", content: currentTools });
@@ -5221,6 +5423,8 @@ export const startDevServer = async (
       }
       if (event.type === "run:completed") {
         steps = event.result.steps;
+        contextTokens = event.result.contextTokens ?? 0;
+        contextWindow = event.result.contextWindow ?? 0;
         if (!assistantResponse && event.result.response) {
           assistantResponse = event.result.response;
         }
@@ -5240,7 +5444,7 @@ export const startDevServer = async (
             sections: sections.length > 0 ? sections : undefined,
           } as Message["metadata"])
         : undefined;
-    return { response: assistantResponse, steps, assistantMetadata, hasContent };
+    return { response: assistantResponse, steps, assistantMetadata, hasContent, contextTokens, contextWindow };
   };
   const buildCronMessages = (
@@ -5267,6 +5471,9 @@ export const startDevServer = async (
     const harnessRef = handler._harness;
     const store = handler._conversationStore;
     const adapters = handler._messagingAdapters;
+    const activeRuns = handler._activeConversationRuns;
+    const deferredCallbacks = handler._pendingCallbackNeeded;
+    const runCallback = handler._processSubagentCallback;
     if (!harnessRef || !store) return;
     for (const [jobName, config] of entries) {
@@ -5308,32 +5515,56 @@ export const startDevServer = async (
                 const task = `[Scheduled: ${jobName}]\n${config.task}`;
                 const historyMessages = [...conversation.messages];
+                const convId = conversation.conversationId;
+                activeRuns?.set(convId, {
+                  ownerId: "local-owner",
+                  abortController: new AbortController(),
+                  runId: null,
+                });
                 try {
-                  const result = await runCronAgent(harnessRef, task, conversation.conversationId, historyMessages);
-                  conversation.messages = buildCronMessages(task, historyMessages, result);
-                  conversation.updatedAt = Date.now();
-                  await store.update(conversation);
-                  if (result.response) {
-                    try {
-                      await adapter.sendReply(
-                        {
-                          channelId: chatId,
-                          platformThreadId: conversation.channelMeta?.platformThreadId ?? chatId,
-                        },
-                        result.response,
-                      );
-                    } catch (sendError) {
-                      const sendMsg = sendError instanceof Error ? sendError.message : String(sendError);
-                      process.stderr.write(`[cron] ${jobName}: send to ${chatId} failed: ${sendMsg}\n`);
+                  const broadcastCh = handler._broadcastEvent;
+                  const result = await runCronAgent(harnessRef, task, convId, historyMessages,
+                    broadcastCh ? (ev) => broadcastCh(convId, ev) : undefined,
+                  );
+                  handler._finishConversationStream?.(convId);
+                  const freshConv = await store.get(convId);
+                  if (freshConv) {
+                    freshConv.messages = buildCronMessages(task, historyMessages, result);
+                    if (result.contextTokens > 0) freshConv.contextTokens = result.contextTokens;
+                    if (result.contextWindow > 0) freshConv.contextWindow = result.contextWindow;
+                    freshConv.updatedAt = Date.now();
+                    await store.update(freshConv);
+                    if (result.response) {
+                      try {
+                        await adapter.sendReply(
+                          {
+                            channelId: chatId,
+                            platformThreadId: freshConv.channelMeta?.platformThreadId ?? chatId,
+                          },
+                          result.response,
+                        );
+                      } catch (sendError) {
+                        const sendMsg = sendError instanceof Error ? sendError.message : String(sendError);
+                        process.stderr.write(`[cron] ${jobName}: send to ${chatId} failed: ${sendMsg}\n`);
+                      }
                     }
                   }
                   totalChats++;
                 } catch (runError) {
                   const runMsg = runError instanceof Error ? runError.message : String(runError);
                   process.stderr.write(`[cron] ${jobName}: run for chat ${chatId} failed: ${runMsg}\n`);
+                } finally {
+                  activeRuns?.delete(convId);
+                  const hadDeferred = deferredCallbacks?.delete(convId) ?? false;
+                  const checkConv = await store.get(convId);
+                  if (hadDeferred || checkConv?.pendingSubagentResults?.length) {
+                    runCallback?.(convId, true).catch((err: unknown) =>
+                      console.error(`[cron] ${jobName}: subagent callback for ${chatId} failed:`, err instanceof Error ? err.message : err),
+                    );
+                  }
                 }
               }
@@ -5347,15 +5578,31 @@ export const startDevServer = async (
             return;
           }
+          let cronConvId: string | undefined;
           try {
             const conversation = await store.create(
               "local-owner",
               `[cron] ${jobName} ${timestamp}`,
             );
-            const result = await runCronAgent(harnessRef, config.task, conversation.conversationId, []);
-            conversation.messages = buildCronMessages(config.task, [], result);
-            conversation.updatedAt = Date.now();
-            await store.update(conversation);
+            cronConvId = conversation.conversationId;
+            activeRuns?.set(cronConvId, {
+              ownerId: "local-owner",
+              abortController: new AbortController(),
+              runId: null,
+            });
+            const broadcast = handler._broadcastEvent;
+            const result = await runCronAgent(harnessRef, config.task, cronConvId, [],
+              broadcast ? (ev) => broadcast(cronConvId!, ev) : undefined,
+            );
+            handler._finishConversationStream?.(cronConvId);
+            const freshConv = await store.get(cronConvId);
+            if (freshConv) {
+              freshConv.messages = buildCronMessages(config.task, [], result);
+              if (result.contextTokens > 0) freshConv.contextTokens = result.contextTokens;
+              if (result.contextWindow > 0) freshConv.contextWindow = result.contextWindow;
+              freshConv.updatedAt = Date.now();
+              await store.update(freshConv);
+            }
             const elapsed = ((Date.now() - start) / 1000).toFixed(1);
             process.stdout.write(
               `[cron] ${jobName} completed in ${elapsed}s (${result.steps} steps)\n`,
@@ -5366,6 +5613,17 @@ export const startDevServer = async (
             process.stderr.write(
               `[cron] ${jobName} failed after ${elapsed}s: ${msg}\n`,
             );
+          } finally {
+            if (cronConvId) {
+              activeRuns?.delete(cronConvId);
+              const hadDeferred = deferredCallbacks?.delete(cronConvId) ?? false;
+              const checkConv = await store.get(cronConvId);
+              if (hadDeferred || checkConv?.pendingSubagentResults?.length) {
+                runCallback?.(cronConvId, true).catch((err: unknown) =>
+                  console.error(`[cron] ${jobName}: subagent callback failed:`, err instanceof Error ? err.message : err),
+                );
+              }
+            }
           }
         },
       );