@poncho-ai/cli 0.30.0 → 0.30.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -721,12 +721,16 @@ Set environment variables on your deployment platform:
721
721
  ANTHROPIC_API_KEY=sk-ant-... # Required
722
722
  PONCHO_AUTH_TOKEN=your-secret # Optional: protect your endpoint
723
723
  PONCHO_MAX_DURATION=55 # Optional: serverless timeout in seconds (enables auto-continuation)
724
+ PONCHO_INTERNAL_SECRET=... # Recommended on serverless: shared secret for internal callback auth
724
725
  \`\`\`
725
726
 
726
727
  When \`PONCHO_MAX_DURATION\` is set, the agent automatically checkpoints and resumes across
727
728
  request cycles when it approaches the platform timeout. The web UI and client SDK handle
728
729
  this transparently.
729
730
 
731
+ For serverless deployments with subagents or background callbacks, use a shared state backend
732
+ (\`upstash\`, \`redis\`, or \`dynamodb\`) instead of \`state.provider: 'local'\` / \`'memory'\`.
733
+
730
734
  ## Troubleshooting
731
735
 
732
736
  ### Vercel deploy issues
@@ -734,6 +738,7 @@ this transparently.
734
738
  - After upgrading \`@poncho-ai/cli\`, re-run \`poncho build vercel --force\` to refresh generated deploy files.
735
739
  - If Vercel fails during \`pnpm install\` due to a lockfile mismatch, run \`pnpm install --no-frozen-lockfile\` locally and commit \`pnpm-lock.yaml\`.
736
740
  - Deploy from the project root: \`vercel deploy --prod\`.
741
+ - For subagents/background callbacks, set \`PONCHO_INTERNAL_SECRET\` and use non-local state storage.
737
742
 
738
743
  For full reference:
739
744
  https://github.com/cesr/poncho-ai
@@ -1391,6 +1396,11 @@ export type RequestHandler = ((
1391
1396
  _cronJobs?: Record<string, CronJobConfig>;
1392
1397
  _conversationStore?: ConversationStore;
1393
1398
  _messagingAdapters?: Map<string, MessagingAdapter>;
1399
+ _activeConversationRuns?: Map<string, { ownerId: string; abortController: AbortController; runId: string | null }>;
1400
+ _pendingCallbackNeeded?: Set<string>;
1401
+ _processSubagentCallback?: (conversationId: string, skipLockCheck?: boolean) => Promise<void>;
1402
+ _broadcastEvent?: (conversationId: string, event: AgentEvent) => void;
1403
+ _finishConversationStream?: (conversationId: string) => void;
1394
1404
  };
1395
1405
 
1396
1406
  export const createRequestHandler = async (options?: {
@@ -1549,7 +1559,8 @@ export const createRequestHandler = async (options?: {
1549
1559
  await harness.initialize();
1550
1560
  const telemetry = new TelemetryEmitter(config?.telemetry);
1551
1561
  const identity = await ensureAgentIdentity(workingDir);
1552
- const conversationStore = createConversationStore(resolveStateConfig(config), {
1562
+ const stateConfig = resolveStateConfig(config);
1563
+ const conversationStore = createConversationStore(stateConfig, {
1553
1564
  workingDir,
1554
1565
  agentId: identity.id,
1555
1566
  });
@@ -1573,6 +1584,11 @@ export const createRequestHandler = async (options?: {
1573
1584
  };
1574
1585
  const pendingSubagentApprovals = new Map<string, PendingSubagentApproval>();
1575
1586
 
1587
+ // Tracks approval decisions in memory so parallel batch requests don't
1588
+ // race against the conversation store (each file-store read returns a
1589
+ // separate copy, causing last-writer-wins when decisions overlap).
1590
+ const approvalDecisionTracker = new Map<string, Map<string, boolean>>();
1591
+
1576
1592
  const getSubagentDepth = async (conversationId: string): Promise<number> => {
1577
1593
  let depth = 0;
1578
1594
  let current = await conversationStore.get(conversationId);
@@ -1946,9 +1962,10 @@ export const createRequestHandler = async (options?: {
1946
1962
  try { await childHarness.shutdown(); } catch {}
1947
1963
 
1948
1964
  if (isServerless) {
1949
- selfFetchWithRetry(`/api/internal/subagent/${encodeURIComponent(childConversationId)}/run`, { continuation: true }).catch(err =>
1965
+ const work = selfFetchWithRetry(`/api/internal/subagent/${encodeURIComponent(childConversationId)}/run`, { continuation: true }).catch(err =>
1950
1966
  console.error(`[poncho][subagent] Continuation self-fetch failed:`, err instanceof Error ? err.message : err),
1951
1967
  );
1968
+ doWaitUntil(work);
1952
1969
  } else {
1953
1970
  runSubagent(childConversationId, parentConversationId, task, ownerId, true).catch(err =>
1954
1971
  console.error(`[poncho][subagent] Continuation failed:`, err instanceof Error ? err.message : err),
@@ -2042,8 +2059,15 @@ export const createRequestHandler = async (options?: {
2042
2059
  // ---------------------------------------------------------------------------
2043
2060
  const MAX_SUBAGENT_CALLBACK_COUNT = 20;
2044
2061
 
2062
+ // Track conversations that received subagent results while a run was active.
2063
+ // processSubagentCallback's finally block checks this to reliably re-trigger
2064
+ // even if the store-level pendingSubagentResults was clobbered by a concurrent
2065
+ // read-modify-write.
2066
+ const pendingCallbackNeeded = new Set<string>();
2067
+
2045
2068
  const triggerParentCallback = async (parentConversationId: string): Promise<void> => {
2046
2069
  if (activeConversationRuns.has(parentConversationId)) {
2070
+ pendingCallbackNeeded.add(parentConversationId);
2047
2071
  return;
2048
2072
  }
2049
2073
  if (isServerless) {
@@ -2057,15 +2081,21 @@ export const createRequestHandler = async (options?: {
2057
2081
 
2058
2082
  const CALLBACK_LOCK_STALE_MS = 5 * 60 * 1000;
2059
2083
 
2060
- const processSubagentCallback = async (conversationId: string): Promise<void> => {
2084
+ const processSubagentCallback = async (conversationId: string, skipLockCheck = false): Promise<void> => {
2061
2085
  const conversation = await conversationStore.get(conversationId);
2062
2086
  if (!conversation) return;
2063
2087
 
2064
2088
  const pendingResults = conversation.pendingSubagentResults ?? [];
2065
- if (pendingResults.length === 0) return;
2066
-
2067
- // Store-based lock for serverless: skip if another invocation is processing
2068
- if (conversation.runningCallbackSince) {
2089
+ const hasOrphanedContinuation = pendingResults.length === 0
2090
+ && Array.isArray(conversation._continuationMessages)
2091
+ && conversation._continuationMessages.length > 0
2092
+ && !activeConversationRuns.has(conversationId);
2093
+ if (pendingResults.length === 0 && !hasOrphanedContinuation) return;
2094
+
2095
+ // Store-based lock for serverless: skip if another invocation is processing.
2096
+ // When re-triggered from a previous callback's finally block, skipLockCheck
2097
+ // is true because we know the previous callback has finished.
2098
+ if (!skipLockCheck && conversation.runningCallbackSince) {
2069
2099
  const elapsed = Date.now() - conversation.runningCallbackSince;
2070
2100
  if (elapsed < CALLBACK_LOCK_STALE_MS) {
2071
2101
  return;
@@ -2076,6 +2106,7 @@ export const createRequestHandler = async (options?: {
2076
2106
  // Acquire lock and clear pending
2077
2107
  conversation.pendingSubagentResults = [];
2078
2108
  conversation.runningCallbackSince = Date.now();
2109
+ conversation.runStatus = "running";
2079
2110
  const callbackCount = (conversation.subagentCallbackCount ?? 0) + 1;
2080
2111
  conversation.subagentCallbackCount = callbackCount;
2081
2112
 
@@ -2097,11 +2128,13 @@ export const createRequestHandler = async (options?: {
2097
2128
  if (callbackCount > MAX_SUBAGENT_CALLBACK_COUNT) {
2098
2129
  console.warn(`[poncho][subagent-callback] Circuit breaker: ${callbackCount} callbacks for ${conversationId}, skipping re-run`);
2099
2130
  conversation.runningCallbackSince = undefined;
2131
+ conversation.runStatus = "idle";
2100
2132
  await conversationStore.update(conversation);
2101
2133
  return;
2102
2134
  }
2103
2135
 
2104
- console.log(`[poncho][subagent-callback] Processing ${pendingResults.length} result(s) for ${conversationId} (callback #${callbackCount})`);
2136
+ const isContinuationResume = hasOrphanedContinuation && pendingResults.length === 0;
2137
+ console.log(`[poncho][subagent-callback] Processing ${pendingResults.length} result(s) for ${conversationId} (callback #${callbackCount})${isContinuationResume ? " (continuation resume)" : ""}`);
2105
2138
 
2106
2139
  const abortController = new AbortController();
2107
2140
  activeConversationRuns.set(conversationId, {
@@ -2109,12 +2142,29 @@ export const createRequestHandler = async (options?: {
2109
2142
  abortController,
2110
2143
  runId: null,
2111
2144
  });
2145
+ // Reopen/reset the parent stream for this callback run so clients that stay
2146
+ // on the main conversation can subscribe to live callback events.
2147
+ const prevStream = conversationEventStreams.get(conversationId);
2148
+ if (prevStream) {
2149
+ prevStream.finished = false;
2150
+ prevStream.buffer = [];
2151
+ } else {
2152
+ conversationEventStreams.set(conversationId, {
2153
+ buffer: [],
2154
+ subscribers: new Set(),
2155
+ finished: false,
2156
+ });
2157
+ }
2112
2158
 
2113
- const historyMessages = [...conversation.messages];
2159
+ const historyMessages = isContinuationResume && conversation._continuationMessages?.length
2160
+ ? [...conversation._continuationMessages]
2161
+ : [...conversation.messages];
2114
2162
  let assistantResponse = "";
2115
2163
  let latestRunId = "";
2116
2164
  let runContinuation = false;
2117
2165
  let runContinuationMessages: Message[] | undefined;
2166
+ let runContextTokens = conversation.contextTokens ?? 0;
2167
+ let runContextWindow = conversation.contextWindow ?? 0;
2118
2168
  const toolTimeline: string[] = [];
2119
2169
  const sections: Array<{ type: "text" | "tools"; content: string | string[] }> = [];
2120
2170
  let currentTools: string[] = [];
@@ -2170,6 +2220,8 @@ export const createRequestHandler = async (options?: {
2170
2220
  if (assistantResponse.length === 0 && event.result.response) {
2171
2221
  assistantResponse = event.result.response;
2172
2222
  }
2223
+ runContextTokens = event.result.contextTokens ?? runContextTokens;
2224
+ runContextWindow = event.result.contextWindow ?? runContextWindow;
2173
2225
  if (event.result.continuation) {
2174
2226
  runContinuation = true;
2175
2227
  if (event.result.continuationMessages) {
@@ -2200,6 +2252,9 @@ export const createRequestHandler = async (options?: {
2200
2252
  }
2201
2253
  freshConv.runtimeRunId = latestRunId || freshConv.runtimeRunId;
2202
2254
  freshConv.runningCallbackSince = undefined;
2255
+ freshConv.runStatus = "idle";
2256
+ if (runContextTokens > 0) freshConv.contextTokens = runContextTokens;
2257
+ if (runContextWindow > 0) freshConv.contextWindow = runContextWindow;
2203
2258
  freshConv.updatedAt = Date.now();
2204
2259
  await conversationStore.update(freshConv);
2205
2260
 
@@ -2226,9 +2281,14 @@ export const createRequestHandler = async (options?: {
2226
2281
  // Handle continuation for the callback run itself
2227
2282
  if (runContinuation) {
2228
2283
  if (isServerless) {
2229
- selfFetchWithRetry(`/api/internal/conversations/${encodeURIComponent(conversationId)}/subagent-callback`).catch(err =>
2284
+ const work = selfFetchWithRetry(`/api/internal/conversations/${encodeURIComponent(conversationId)}/subagent-callback`).catch(err =>
2230
2285
  console.error(`[poncho][subagent-callback] Continuation self-fetch failed:`, err instanceof Error ? err.message : err),
2231
2286
  );
2287
+ doWaitUntil(work);
2288
+ } else {
2289
+ processSubagentCallback(conversationId, true).catch(err =>
2290
+ console.error(`[poncho][subagent-callback] Continuation failed:`, err instanceof Error ? err.message : err),
2291
+ );
2232
2292
  }
2233
2293
  }
2234
2294
  } catch (err) {
@@ -2236,30 +2296,50 @@ export const createRequestHandler = async (options?: {
2236
2296
  const errConv = await conversationStore.get(conversationId);
2237
2297
  if (errConv) {
2238
2298
  errConv.runningCallbackSince = undefined;
2299
+ errConv.runStatus = "idle";
2239
2300
  await conversationStore.update(errConv);
2240
2301
  }
2241
2302
  } finally {
2242
2303
  activeConversationRuns.delete(conversationId);
2243
2304
  finishConversationStream(conversationId);
2244
2305
 
2306
+ // Check both the in-memory flag (always reliable) and the store.
2307
+ // We drain the flag first so a concurrent triggerParentCallback that
2308
+ // sets it right after our delete above is still caught on the next
2309
+ // iteration.
2310
+ const hadDeferredTrigger = pendingCallbackNeeded.delete(conversationId);
2245
2311
  const freshConv = await conversationStore.get(conversationId);
2246
- if (freshConv) {
2247
- if (freshConv.runningCallbackSince) {
2248
- freshConv.runningCallbackSince = undefined;
2249
- await conversationStore.update(freshConv);
2250
- }
2251
- }
2312
+ const hasPendingInStore = !!freshConv?.pendingSubagentResults?.length;
2252
2313
 
2253
- if (freshConv?.pendingSubagentResults?.length) {
2314
+ if (hadDeferredTrigger || hasPendingInStore) {
2315
+ // Re-trigger immediately. Skip the runningCallbackSince lock check
2316
+ // because we know this callback just finished. The re-triggered
2317
+ // callback will overwrite runningCallbackSince with its own timestamp.
2254
2318
  if (isServerless) {
2255
2319
  selfFetchWithRetry(`/api/internal/conversations/${encodeURIComponent(conversationId)}/subagent-callback`).catch(err =>
2256
2320
  console.error(`[poncho][subagent-callback] Recursive callback self-fetch failed:`, err instanceof Error ? err.message : err),
2257
2321
  );
2258
2322
  } else {
2259
- processSubagentCallback(conversationId).catch(err =>
2323
+ processSubagentCallback(conversationId, true).catch(err =>
2260
2324
  console.error(`[poncho][subagent-callback] Recursive callback failed:`, err instanceof Error ? err.message : err),
2261
2325
  );
2262
2326
  }
2327
+ } else if (freshConv?.runningCallbackSince) {
2328
+ // No re-trigger needed. Use the atomic clearCallbackLock to avoid
2329
+ // clobbering concurrent appendSubagentResult writes.
2330
+ const afterClear = await conversationStore.clearCallbackLock(conversationId);
2331
+ // Double-check: an append may have raced even the atomic clear
2332
+ if (afterClear?.pendingSubagentResults?.length) {
2333
+ if (isServerless) {
2334
+ selfFetchWithRetry(`/api/internal/conversations/${encodeURIComponent(conversationId)}/subagent-callback`).catch(err =>
2335
+ console.error(`[poncho][subagent-callback] Post-clear callback self-fetch failed:`, err instanceof Error ? err.message : err),
2336
+ );
2337
+ } else {
2338
+ processSubagentCallback(conversationId, true).catch(err =>
2339
+ console.error(`[poncho][subagent-callback] Post-clear callback failed:`, err instanceof Error ? err.message : err),
2340
+ );
2341
+ }
2342
+ }
2263
2343
  }
2264
2344
  }
2265
2345
  };
@@ -2461,14 +2541,6 @@ export const createRequestHandler = async (options?: {
2461
2541
  if (active && active.abortController === abortController) {
2462
2542
  active.runId = event.runId;
2463
2543
  }
2464
- if (typeof event.contextWindow === "number" && event.contextWindow > 0) {
2465
- runContextWindow = event.contextWindow;
2466
- }
2467
- }
2468
- if (event.type === "model:response") {
2469
- if (typeof event.usage?.input === "number") {
2470
- runContextTokens = event.usage.input;
2471
- }
2472
2544
  }
2473
2545
  if (event.type === "model:chunk") {
2474
2546
  if (currentTools.length > 0) {
@@ -2533,12 +2605,12 @@ export const createRequestHandler = async (options?: {
2533
2605
  }
2534
2606
  checkpointedRun = true;
2535
2607
  }
2536
- if (
2537
- event.type === "run:completed" &&
2538
- assistantResponse.length === 0 &&
2539
- event.result.response
2540
- ) {
2541
- assistantResponse = event.result.response;
2608
+ if (event.type === "run:completed") {
2609
+ if (assistantResponse.length === 0 && event.result.response) {
2610
+ assistantResponse = event.result.response;
2611
+ }
2612
+ runContextTokens = event.result.contextTokens ?? runContextTokens;
2613
+ runContextWindow = event.result.contextWindow ?? runContextWindow;
2542
2614
  }
2543
2615
  if (event.type === "run:error") {
2544
2616
  assistantResponse = assistantResponse || `[Error: ${event.error.message}]`;
@@ -2627,6 +2699,15 @@ export const createRequestHandler = async (options?: {
2627
2699
  runConversations.delete(latestRunId);
2628
2700
  }
2629
2701
  console.log("[resume-run] complete for", conversationId);
2702
+
2703
+ // Check for pending subagent results that arrived during the run
2704
+ const hadDeferred = pendingCallbackNeeded.delete(conversationId);
2705
+ const postConv = await conversationStore.get(conversationId);
2706
+ if (hadDeferred || postConv?.pendingSubagentResults?.length) {
2707
+ processSubagentCallback(conversationId, true).catch(err =>
2708
+ console.error(`[poncho][subagent-callback] Post-resume callback failed:`, err instanceof Error ? err.message : err),
2709
+ );
2710
+ }
2630
2711
  };
2631
2712
 
2632
2713
  // ---------------------------------------------------------------------------
@@ -2783,14 +2864,6 @@ export const createRequestHandler = async (options?: {
2783
2864
  latestRunId = event.runId;
2784
2865
  runOwners.set(event.runId, "local-owner");
2785
2866
  runConversations.set(event.runId, conversationId);
2786
- if (typeof event.contextWindow === "number" && event.contextWindow > 0) {
2787
- runContextWindow = event.contextWindow;
2788
- }
2789
- }
2790
- if (event.type === "model:response") {
2791
- if (typeof event.usage?.input === "number") {
2792
- runContextTokens = event.usage.input;
2793
- }
2794
2867
  }
2795
2868
  if (event.type === "model:chunk") {
2796
2869
  if (currentTools.length > 0) {
@@ -2892,6 +2965,8 @@ export const createRequestHandler = async (options?: {
2892
2965
  }
2893
2966
  runSteps = event.result.steps;
2894
2967
  if (typeof event.result.maxSteps === "number") runMaxSteps = event.result.maxSteps;
2968
+ runContextTokens = event.result.contextTokens ?? runContextTokens;
2969
+ runContextWindow = event.result.contextWindow ?? runContextWindow;
2895
2970
  }
2896
2971
  if (event.type === "run:error") {
2897
2972
  assistantResponse = assistantResponse || `[Error: ${event.error.message}]`;
@@ -2962,41 +3037,102 @@ export const createRequestHandler = async (options?: {
2962
3037
  }
2963
3038
 
2964
3039
  const isServerless = !!waitUntilHook;
2965
- const internalSecret = globalThis.crypto?.randomUUID?.() ?? `${Date.now()}-${Math.random()}`;
3040
+ const configuredInternalSecret = process.env.PONCHO_INTERNAL_SECRET?.trim();
3041
+ const vercelDeploymentSecret = process.env.VERCEL_DEPLOYMENT_ID?.trim();
3042
+ const fallbackInternalSecret = globalThis.crypto?.randomUUID?.() ?? `${Date.now()}-${Math.random()}`;
3043
+ const internalSecret = configuredInternalSecret || vercelDeploymentSecret || fallbackInternalSecret;
3044
+ const isUsingEphemeralInternalSecret = !configuredInternalSecret && !vercelDeploymentSecret;
2966
3045
  let selfBaseUrl: string | null = process.env.VERCEL_URL
2967
3046
  ? `https://${process.env.VERCEL_URL}`
2968
3047
  : null;
2969
3048
 
3049
+ if (!selfBaseUrl && process.env.VERCEL_PROJECT_PRODUCTION_URL) {
3050
+ selfBaseUrl = `https://${process.env.VERCEL_PROJECT_PRODUCTION_URL}`;
3051
+ }
3052
+ if (!selfBaseUrl && process.env.PONCHO_SELF_BASE_URL) {
3053
+ selfBaseUrl = process.env.PONCHO_SELF_BASE_URL.replace(/\/+$/, "");
3054
+ }
3055
+
3056
+ if (isServerless && isUsingEphemeralInternalSecret) {
3057
+ console.warn(
3058
+ "[poncho][serverless] No stable internal secret found. Set PONCHO_INTERNAL_SECRET to avoid intermittent internal callback failures across instances.",
3059
+ );
3060
+ }
3061
+ if (isServerless && !selfBaseUrl) {
3062
+ console.warn(
3063
+ "[poncho][serverless] No self base URL available. Set PONCHO_SELF_BASE_URL if internal background callbacks fail.",
3064
+ );
3065
+ }
3066
+ const stateProvider = stateConfig?.provider ?? "local";
3067
+ if (isServerless && (stateProvider === "local" || stateProvider === "memory")) {
3068
+ console.warn(
3069
+ `[poncho][serverless] state.provider="${stateProvider}" may lose cross-invocation state. Prefer "upstash", "redis", or "dynamodb" for subagents/reliability.`,
3070
+ );
3071
+ }
3072
+
2970
3073
  const doWaitUntil = (promise: Promise<unknown>): void => {
2971
3074
  if (waitUntilHook) waitUntilHook(promise);
2972
3075
  };
2973
3076
 
2974
- const selfFetch = (path: string, body?: Record<string, unknown>): Promise<Response | void> => {
2975
- if (!selfBaseUrl) return Promise.resolve();
2976
- return fetch(`${selfBaseUrl}${path}`, {
2977
- method: "POST",
2978
- headers: {
2979
- "Content-Type": "application/json",
2980
- "x-poncho-internal": internalSecret,
2981
- },
2982
- body: body ? JSON.stringify(body) : undefined,
2983
- }).catch(err => {
2984
- console.error(`[poncho][self-fetch] Failed ${path}:`, err instanceof Error ? err.message : err);
2985
- }) as Promise<Response | void>;
2986
- };
2987
-
2988
3077
  const selfFetchWithRetry = async (path: string, body?: Record<string, unknown>, retries = 3): Promise<Response | void> => {
3078
+ if (!selfBaseUrl) {
3079
+ console.error(`[poncho][self-fetch] Missing self base URL for ${path}`);
3080
+ return;
3081
+ }
3082
+ let lastError: unknown;
2989
3083
  for (let attempt = 0; attempt < retries; attempt++) {
2990
3084
  try {
2991
- const result = await selfFetch(path, body);
2992
- return result;
3085
+ const result = await fetch(`${selfBaseUrl}${path}`, {
3086
+ method: "POST",
3087
+ headers: {
3088
+ "Content-Type": "application/json",
3089
+ "x-poncho-internal": internalSecret,
3090
+ },
3091
+ body: body ? JSON.stringify(body) : undefined,
3092
+ });
3093
+ if (result.ok) {
3094
+ return result;
3095
+ }
3096
+ const responseText = await result.text().catch(() => "");
3097
+ lastError = new Error(
3098
+ `HTTP ${result.status}${responseText ? `: ${responseText.slice(0, 200)}` : ""}`,
3099
+ );
2993
3100
  } catch (err) {
2994
- if (attempt === retries - 1) throw err;
2995
- await new Promise(r => setTimeout(r, 1000 * (attempt + 1)));
3101
+ lastError = err;
3102
+ }
3103
+ if (attempt === retries - 1) {
3104
+ break;
2996
3105
  }
3106
+ await new Promise((resolveSleep) => setTimeout(resolveSleep, 1000 * (attempt + 1)));
3107
+ }
3108
+ if (lastError) {
3109
+ console.error(
3110
+ `[poncho][self-fetch] Failed ${path} after ${retries} attempt(s):`,
3111
+ lastError instanceof Error ? lastError.message : String(lastError),
3112
+ );
3113
+ if (
3114
+ lastError instanceof Error
3115
+ && (lastError.message.includes("HTTP 403") || lastError.message.includes("HTTP 401"))
3116
+ ) {
3117
+ console.error(
3118
+ "[poncho][self-fetch] Internal auth failed. Ensure all serverless instances share PONCHO_INTERNAL_SECRET.",
3119
+ );
3120
+ }
3121
+ } else {
3122
+ console.error(`[poncho][self-fetch] Failed ${path} after ${retries} attempt(s).`);
2997
3123
  }
2998
3124
  };
2999
3125
 
3126
+ const getInternalRequestHeader = (headers: IncomingMessage["headers"]): string | undefined => {
3127
+ const value = headers["x-poncho-internal"];
3128
+ return Array.isArray(value) ? value[0] : value;
3129
+ };
3130
+
3131
+ const isValidInternalRequest = (headers: IncomingMessage["headers"]): boolean => {
3132
+ const headerValue = getInternalRequestHeader(headers);
3133
+ return typeof headerValue === "string" && headerValue === internalSecret;
3134
+ };
3135
+
3000
3136
  const messagingAdapters = new Map<string, MessagingAdapter>();
3001
3137
  const messagingBridges: AgentBridge[] = [];
3002
3138
  if (config?.messaging && config.messaging.length > 0) {
@@ -3365,7 +3501,7 @@ export const createRequestHandler = async (options?: {
3365
3501
 
3366
3502
  // ── Internal endpoints (self-fetch only, secured by startup secret) ──
3367
3503
  if (pathname?.startsWith("/api/internal/") && request.method === "POST") {
3368
- if (request.headers["x-poncho-internal"] !== internalSecret) {
3504
+ if (!isValidInternalRequest(request.headers)) {
3369
3505
  writeJson(response, 403, { code: "FORBIDDEN", message: "Internal endpoint" });
3370
3506
  return;
3371
3507
  }
@@ -3813,7 +3949,15 @@ export const createRequestHandler = async (options?: {
3813
3949
  return;
3814
3950
  }
3815
3951
 
3816
- // Record the decision on this approval entry
3952
+ // Track decision in memory so parallel batch requests see a consistent
3953
+ // view (file-store reads return independent copies, causing lost updates).
3954
+ let batchDecisions = approvalDecisionTracker.get(conversationId);
3955
+ if (!batchDecisions) {
3956
+ batchDecisions = new Map();
3957
+ approvalDecisionTracker.set(conversationId, batchDecisions);
3958
+ }
3959
+ batchDecisions.set(approvalId, approved);
3960
+
3817
3961
  foundApproval.decision = approved ? "approved" : "denied";
3818
3962
 
3819
3963
  broadcastEvent(conversationId,
@@ -3823,16 +3967,26 @@ export const createRequestHandler = async (options?: {
3823
3967
  );
3824
3968
 
3825
3969
  const allApprovals = foundConversation.pendingApprovals ?? [];
3826
- const allDecided = allApprovals.length > 0 && allApprovals.every(a => a.decision != null);
3970
+ const allDecided = allApprovals.length > 0 &&
3971
+ allApprovals.every(a => batchDecisions!.has(a.approvalId));
3827
3972
 
3828
3973
  if (!allDecided) {
3829
- // Still waiting for more decisions — persist and respond
3974
+ // Still waiting for more decisions — persist best-effort and respond.
3975
+ // The write may be overwritten by a concurrent request, but that's
3976
+ // fine: the in-memory tracker is the source of truth for completion.
3830
3977
  await conversationStore.update(foundConversation);
3831
3978
  writeJson(response, 200, { ok: true, approvalId, approved, batchComplete: false });
3832
3979
  return;
3833
3980
  }
3834
3981
 
3835
- // All approvals in the batch are decided — execute and resume
3982
+ // All approvals in the batch are decided — apply tracked decisions,
3983
+ // execute approved tools, and resume the run.
3984
+ for (const a of allApprovals) {
3985
+ const d = batchDecisions.get(a.approvalId);
3986
+ if (d != null) a.decision = d ? "approved" : "denied";
3987
+ }
3988
+ approvalDecisionTracker.delete(conversationId);
3989
+
3836
3990
  foundConversation.pendingApprovals = [];
3837
3991
  foundConversation.runStatus = "running";
3838
3992
  await conversationStore.update(foundConversation);
@@ -4101,14 +4255,21 @@ export const createRequestHandler = async (options?: {
4101
4255
  }
4102
4256
  }
4103
4257
  }
4258
+ const hasPendingCallbackResults = Array.isArray(conversation.pendingSubagentResults)
4259
+ && conversation.pendingSubagentResults.length > 0;
4260
+ const needsContinuation = !hasActiveRun
4261
+ && Array.isArray(conversation._continuationMessages)
4262
+ && conversation._continuationMessages.length > 0;
4104
4263
  writeJson(response, 200, {
4105
4264
  conversation: {
4106
4265
  ...conversation,
4107
4266
  pendingApprovals: storedPending,
4267
+ _continuationMessages: undefined,
4108
4268
  },
4109
4269
  subagentPendingApprovals: subagentPending,
4110
- hasActiveRun,
4270
+ hasActiveRun: hasActiveRun || hasPendingCallbackResults,
4111
4271
  hasRunningSubagents,
4272
+ needsContinuation,
4112
4273
  });
4113
4274
  return;
4114
4275
  }
@@ -4517,14 +4678,6 @@ export const createRequestHandler = async (options?: {
4517
4678
  if (active && active.abortController === abortController) {
4518
4679
  active.runId = event.runId;
4519
4680
  }
4520
- if (typeof event.contextWindow === "number" && event.contextWindow > 0) {
4521
- runContextWindow = event.contextWindow;
4522
- }
4523
- }
4524
- if (event.type === "model:response") {
4525
- if (typeof event.usage?.input === "number") {
4526
- runContextTokens = event.usage.input;
4527
- }
4528
4681
  }
4529
4682
  if (event.type === "run:cancelled") {
4530
4683
  runCancelled = true;
@@ -4623,6 +4776,8 @@ export const createRequestHandler = async (options?: {
4623
4776
  if (assistantResponse.length === 0 && event.result.response) {
4624
4777
  assistantResponse = event.result.response;
4625
4778
  }
4779
+ runContextTokens = event.result.contextTokens ?? runContextTokens;
4780
+ runContextWindow = event.result.contextWindow ?? runContextWindow;
4626
4781
  if (event.result.continuation && event.result.continuationMessages) {
4627
4782
  runContinuationMessages = event.result.continuationMessages;
4628
4783
  conversation._continuationMessages = runContinuationMessages;
@@ -4783,9 +4938,10 @@ export const createRequestHandler = async (options?: {
4783
4938
  // Already closed.
4784
4939
  }
4785
4940
  // Check for pending subagent results that arrived during the run
4941
+ const hadDeferred = pendingCallbackNeeded.delete(conversationId);
4786
4942
  const freshConv = await conversationStore.get(conversationId);
4787
- if (freshConv?.pendingSubagentResults?.length) {
4788
- processSubagentCallback(conversationId).catch(err =>
4943
+ if (hadDeferred || freshConv?.pendingSubagentResults?.length) {
4944
+ processSubagentCallback(conversationId, true).catch(err =>
4789
4945
  console.error(`[poncho][subagent-callback] Post-run callback failed:`, err instanceof Error ? err.message : err),
4790
4946
  );
4791
4947
  }
@@ -4948,6 +5104,14 @@ export const createRequestHandler = async (options?: {
4948
5104
  );
4949
5105
  }
4950
5106
 
5107
+ const convId = conversation.conversationId;
5108
+ activeConversationRuns.set(convId, {
5109
+ ownerId: conversation.ownerId,
5110
+ abortController: new AbortController(),
5111
+ runId: null,
5112
+ });
5113
+
5114
+ try {
4951
5115
  const abortController = new AbortController();
4952
5116
  let assistantResponse = "";
4953
5117
  let latestRunId = "";
@@ -4955,7 +5119,7 @@ export const createRequestHandler = async (options?: {
4955
5119
  const sections: Array<{ type: "text" | "tools"; content: string | string[] }> = [];
4956
5120
  let currentTools: string[] = [];
4957
5121
  let currentText = "";
4958
- let runResult: { status: string; steps: number; continuation?: boolean } = {
5122
+ let runResult: { status: string; steps: number; continuation?: boolean; contextTokens?: number; contextWindow?: number } = {
4959
5123
  status: "completed",
4960
5124
  steps: 0,
4961
5125
  };
@@ -4967,8 +5131,8 @@ export const createRequestHandler = async (options?: {
4967
5131
 
4968
5132
  for await (const event of harness.runWithTelemetry({
4969
5133
  task: cronJob.task,
4970
- conversationId: conversation.conversationId,
4971
- parameters: { __activeConversationId: conversation.conversationId },
5134
+ conversationId: convId,
5135
+ parameters: { __activeConversationId: convId },
4972
5136
  messages: historyMessages,
4973
5137
  abortSignal: abortController.signal,
4974
5138
  })) {
@@ -5010,14 +5174,19 @@ export const createRequestHandler = async (options?: {
5010
5174
  status: event.result.status,
5011
5175
  steps: event.result.steps,
5012
5176
  continuation: event.result.continuation,
5177
+ contextTokens: event.result.contextTokens,
5178
+ contextWindow: event.result.contextWindow,
5013
5179
  };
5014
5180
  if (!assistantResponse && event.result.response) {
5015
5181
  assistantResponse = event.result.response;
5016
5182
  }
5017
5183
  }
5184
+ broadcastEvent(convId, event);
5018
5185
  await telemetry.emit(event);
5019
5186
  }
5020
5187
 
5188
+ finishConversationStream(convId);
5189
+
5021
5190
  if (currentTools.length > 0) {
5022
5191
  sections.push({ type: "tools", content: currentTools });
5023
5192
  }
@@ -5026,7 +5195,8 @@ export const createRequestHandler = async (options?: {
5026
5195
  currentText = "";
5027
5196
  }
5028
5197
 
5029
- // Persist the conversation
5198
+ // Persist the conversation — read fresh state to avoid clobbering
5199
+ // pendingSubagentResults appended during the run.
5030
5200
  const hasContent = assistantResponse.length > 0 || toolTimeline.length > 0;
5031
5201
  const assistantMetadata =
5032
5202
  toolTimeline.length > 0 || sections.length > 0
@@ -5044,14 +5214,19 @@ export const createRequestHandler = async (options?: {
5044
5214
  ? [{ role: "assistant" as const, content: assistantResponse, metadata: assistantMetadata }]
5045
5215
  : []),
5046
5216
  ];
5047
- conversation.messages = messages;
5048
- conversation.runtimeRunId = latestRunId || conversation.runtimeRunId;
5049
- conversation.updatedAt = Date.now();
5050
- await conversationStore.update(conversation);
5217
+ const freshConv = await conversationStore.get(convId);
5218
+ if (freshConv) {
5219
+ freshConv.messages = messages;
5220
+ freshConv.runtimeRunId = latestRunId || freshConv.runtimeRunId;
5221
+ if (runResult.contextTokens) freshConv.contextTokens = runResult.contextTokens;
5222
+ if (runResult.contextWindow) freshConv.contextWindow = runResult.contextWindow;
5223
+ freshConv.updatedAt = Date.now();
5224
+ await conversationStore.update(freshConv);
5225
+ }
5051
5226
 
5052
5227
  // Self-continuation for serverless timeouts
5053
5228
  if (runResult.continuation && softDeadlineMs > 0) {
5054
- const selfUrl = `http://${request.headers.host ?? "localhost"}${pathname}?continue=${encodeURIComponent(conversation.conversationId)}&continuation=${continuationCount + 1}`;
5229
+ const selfUrl = `http://${request.headers.host ?? "localhost"}${pathname}?continue=${encodeURIComponent(convId)}&continuation=${continuationCount + 1}`;
5055
5230
  try {
5056
5231
  const selfRes = await fetch(selfUrl, {
5057
5232
  method: "GET",
@@ -5061,7 +5236,7 @@ export const createRequestHandler = async (options?: {
5061
5236
  });
5062
5237
  const selfBody = await selfRes.json() as Record<string, unknown>;
5063
5238
  writeJson(response, 200, {
5064
- conversationId: conversation.conversationId,
5239
+ conversationId: convId,
5065
5240
  status: "continued",
5066
5241
  continuations: continuationCount + 1,
5067
5242
  finalResult: selfBody,
@@ -5069,7 +5244,7 @@ export const createRequestHandler = async (options?: {
5069
5244
  });
5070
5245
  } catch (continueError) {
5071
5246
  writeJson(response, 200, {
5072
- conversationId: conversation.conversationId,
5247
+ conversationId: convId,
5073
5248
  status: "continuation_failed",
5074
5249
  error: continueError instanceof Error ? continueError.message : "Unknown error",
5075
5250
  duration: Date.now() - start,
@@ -5080,12 +5255,28 @@ export const createRequestHandler = async (options?: {
5080
5255
  }
5081
5256
 
5082
5257
  writeJson(response, 200, {
5083
- conversationId: conversation.conversationId,
5258
+ conversationId: convId,
5084
5259
  status: runResult.status,
5085
5260
  response: assistantResponse.slice(0, 500),
5086
5261
  duration: Date.now() - start,
5087
5262
  steps: runResult.steps,
5088
5263
  });
5264
+ } finally {
5265
+ activeConversationRuns.delete(convId);
5266
+ const hadDeferred = pendingCallbackNeeded.delete(convId);
5267
+ const checkConv = await conversationStore.get(convId);
5268
+ if (hadDeferred || checkConv?.pendingSubagentResults?.length) {
5269
+ if (isServerless) {
5270
+ selfFetchWithRetry(`/api/internal/conversations/${encodeURIComponent(convId)}/subagent-callback`).catch(err =>
5271
+ console.error(`[cron] subagent callback self-fetch failed:`, err instanceof Error ? err.message : err),
5272
+ );
5273
+ } else {
5274
+ processSubagentCallback(convId, true).catch(err =>
5275
+ console.error(`[cron] subagent callback failed:`, err instanceof Error ? err.message : err),
5276
+ );
5277
+ }
5278
+ }
5279
+ }
5089
5280
  } catch (error) {
5090
5281
  writeJson(response, 500, {
5091
5282
  code: "CRON_RUN_ERROR",
@@ -5101,6 +5292,11 @@ export const createRequestHandler = async (options?: {
5101
5292
  handler._cronJobs = cronJobs;
5102
5293
  handler._conversationStore = conversationStore;
5103
5294
  handler._messagingAdapters = messagingAdapters;
5295
+ handler._activeConversationRuns = activeConversationRuns;
5296
+ handler._pendingCallbackNeeded = pendingCallbackNeeded;
5297
+ handler._processSubagentCallback = processSubagentCallback;
5298
+ handler._broadcastEvent = broadcastEvent;
5299
+ handler._finishConversationStream = finishConversationStream;
5104
5300
 
5105
5301
  // Recover stale subagent runs that were "running" when the server last stopped
5106
5302
  // or that have been inactive longer than the staleness threshold.
@@ -5169,6 +5365,8 @@ export const startDevServer = async (
5169
5365
  steps: number;
5170
5366
  assistantMetadata?: Message["metadata"];
5171
5367
  hasContent: boolean;
5368
+ contextTokens: number;
5369
+ contextWindow: number;
5172
5370
  };
5173
5371
 
5174
5372
  const runCronAgent = async (
@@ -5176,9 +5374,12 @@ export const startDevServer = async (
5176
5374
  task: string,
5177
5375
  conversationId: string,
5178
5376
  historyMessages: Message[],
5377
+ onEvent?: (event: AgentEvent) => void,
5179
5378
  ): Promise<CronRunResult> => {
5180
5379
  let assistantResponse = "";
5181
5380
  let steps = 0;
5381
+ let contextTokens = 0;
5382
+ let contextWindow = 0;
5182
5383
  const toolTimeline: string[] = [];
5183
5384
  const sections: Array<{ type: "text" | "tools"; content: string | string[] }> = [];
5184
5385
  let currentTools: string[] = [];
@@ -5189,6 +5390,7 @@ export const startDevServer = async (
5189
5390
  parameters: { __activeConversationId: conversationId },
5190
5391
  messages: historyMessages,
5191
5392
  })) {
5393
+ onEvent?.(event);
5192
5394
  if (event.type === "model:chunk") {
5193
5395
  if (currentTools.length > 0) {
5194
5396
  sections.push({ type: "tools", content: currentTools });
@@ -5221,6 +5423,8 @@ export const startDevServer = async (
5221
5423
  }
5222
5424
  if (event.type === "run:completed") {
5223
5425
  steps = event.result.steps;
5426
+ contextTokens = event.result.contextTokens ?? 0;
5427
+ contextWindow = event.result.contextWindow ?? 0;
5224
5428
  if (!assistantResponse && event.result.response) {
5225
5429
  assistantResponse = event.result.response;
5226
5430
  }
@@ -5240,7 +5444,7 @@ export const startDevServer = async (
5240
5444
  sections: sections.length > 0 ? sections : undefined,
5241
5445
  } as Message["metadata"])
5242
5446
  : undefined;
5243
- return { response: assistantResponse, steps, assistantMetadata, hasContent };
5447
+ return { response: assistantResponse, steps, assistantMetadata, hasContent, contextTokens, contextWindow };
5244
5448
  };
5245
5449
 
5246
5450
  const buildCronMessages = (
@@ -5267,6 +5471,9 @@ export const startDevServer = async (
5267
5471
  const harnessRef = handler._harness;
5268
5472
  const store = handler._conversationStore;
5269
5473
  const adapters = handler._messagingAdapters;
5474
+ const activeRuns = handler._activeConversationRuns;
5475
+ const deferredCallbacks = handler._pendingCallbackNeeded;
5476
+ const runCallback = handler._processSubagentCallback;
5270
5477
  if (!harnessRef || !store) return;
5271
5478
 
5272
5479
  for (const [jobName, config] of entries) {
@@ -5308,32 +5515,56 @@ export const startDevServer = async (
5308
5515
 
5309
5516
  const task = `[Scheduled: ${jobName}]\n${config.task}`;
5310
5517
  const historyMessages = [...conversation.messages];
5518
+ const convId = conversation.conversationId;
5311
5519
 
5520
+ activeRuns?.set(convId, {
5521
+ ownerId: "local-owner",
5522
+ abortController: new AbortController(),
5523
+ runId: null,
5524
+ });
5312
5525
  try {
5313
- const result = await runCronAgent(harnessRef, task, conversation.conversationId, historyMessages);
5314
-
5315
- conversation.messages = buildCronMessages(task, historyMessages, result);
5316
- conversation.updatedAt = Date.now();
5317
- await store.update(conversation);
5318
-
5319
- if (result.response) {
5320
- try {
5321
- await adapter.sendReply(
5322
- {
5323
- channelId: chatId,
5324
- platformThreadId: conversation.channelMeta?.platformThreadId ?? chatId,
5325
- },
5326
- result.response,
5327
- );
5328
- } catch (sendError) {
5329
- const sendMsg = sendError instanceof Error ? sendError.message : String(sendError);
5330
- process.stderr.write(`[cron] ${jobName}: send to ${chatId} failed: ${sendMsg}\n`);
5526
+ const broadcastCh = handler._broadcastEvent;
5527
+ const result = await runCronAgent(harnessRef, task, convId, historyMessages,
5528
+ broadcastCh ? (ev) => broadcastCh(convId, ev) : undefined,
5529
+ );
5530
+ handler._finishConversationStream?.(convId);
5531
+
5532
+ const freshConv = await store.get(convId);
5533
+ if (freshConv) {
5534
+ freshConv.messages = buildCronMessages(task, historyMessages, result);
5535
+ if (result.contextTokens > 0) freshConv.contextTokens = result.contextTokens;
5536
+ if (result.contextWindow > 0) freshConv.contextWindow = result.contextWindow;
5537
+ freshConv.updatedAt = Date.now();
5538
+ await store.update(freshConv);
5539
+
5540
+ if (result.response) {
5541
+ try {
5542
+ await adapter.sendReply(
5543
+ {
5544
+ channelId: chatId,
5545
+ platformThreadId: freshConv.channelMeta?.platformThreadId ?? chatId,
5546
+ },
5547
+ result.response,
5548
+ );
5549
+ } catch (sendError) {
5550
+ const sendMsg = sendError instanceof Error ? sendError.message : String(sendError);
5551
+ process.stderr.write(`[cron] ${jobName}: send to ${chatId} failed: ${sendMsg}\n`);
5552
+ }
5331
5553
  }
5332
5554
  }
5333
5555
  totalChats++;
5334
5556
  } catch (runError) {
5335
5557
  const runMsg = runError instanceof Error ? runError.message : String(runError);
5336
5558
  process.stderr.write(`[cron] ${jobName}: run for chat ${chatId} failed: ${runMsg}\n`);
5559
+ } finally {
5560
+ activeRuns?.delete(convId);
5561
+ const hadDeferred = deferredCallbacks?.delete(convId) ?? false;
5562
+ const checkConv = await store.get(convId);
5563
+ if (hadDeferred || checkConv?.pendingSubagentResults?.length) {
5564
+ runCallback?.(convId, true).catch((err: unknown) =>
5565
+ console.error(`[cron] ${jobName}: subagent callback for ${chatId} failed:`, err instanceof Error ? err.message : err),
5566
+ );
5567
+ }
5337
5568
  }
5338
5569
  }
5339
5570
 
@@ -5347,15 +5578,31 @@ export const startDevServer = async (
5347
5578
  return;
5348
5579
  }
5349
5580
 
5581
+ let cronConvId: string | undefined;
5350
5582
  try {
5351
5583
  const conversation = await store.create(
5352
5584
  "local-owner",
5353
5585
  `[cron] ${jobName} ${timestamp}`,
5354
5586
  );
5355
- const result = await runCronAgent(harnessRef, config.task, conversation.conversationId, []);
5356
- conversation.messages = buildCronMessages(config.task, [], result);
5357
- conversation.updatedAt = Date.now();
5358
- await store.update(conversation);
5587
+ cronConvId = conversation.conversationId;
5588
+ activeRuns?.set(cronConvId, {
5589
+ ownerId: "local-owner",
5590
+ abortController: new AbortController(),
5591
+ runId: null,
5592
+ });
5593
+ const broadcast = handler._broadcastEvent;
5594
+ const result = await runCronAgent(harnessRef, config.task, cronConvId, [],
5595
+ broadcast ? (ev) => broadcast(cronConvId!, ev) : undefined,
5596
+ );
5597
+ handler._finishConversationStream?.(cronConvId);
5598
+ const freshConv = await store.get(cronConvId);
5599
+ if (freshConv) {
5600
+ freshConv.messages = buildCronMessages(config.task, [], result);
5601
+ if (result.contextTokens > 0) freshConv.contextTokens = result.contextTokens;
5602
+ if (result.contextWindow > 0) freshConv.contextWindow = result.contextWindow;
5603
+ freshConv.updatedAt = Date.now();
5604
+ await store.update(freshConv);
5605
+ }
5359
5606
  const elapsed = ((Date.now() - start) / 1000).toFixed(1);
5360
5607
  process.stdout.write(
5361
5608
  `[cron] ${jobName} completed in ${elapsed}s (${result.steps} steps)\n`,
@@ -5366,6 +5613,17 @@ export const startDevServer = async (
5366
5613
  process.stderr.write(
5367
5614
  `[cron] ${jobName} failed after ${elapsed}s: ${msg}\n`,
5368
5615
  );
5616
+ } finally {
5617
+ if (cronConvId) {
5618
+ activeRuns?.delete(cronConvId);
5619
+ const hadDeferred = deferredCallbacks?.delete(cronConvId) ?? false;
5620
+ const checkConv = await store.get(cronConvId);
5621
+ if (hadDeferred || checkConv?.pendingSubagentResults?.length) {
5622
+ runCallback?.(cronConvId, true).catch((err: unknown) =>
5623
+ console.error(`[cron] ${jobName}: subagent callback failed:`, err instanceof Error ? err.message : err),
5624
+ );
5625
+ }
5626
+ }
5369
5627
  }
5370
5628
  },
5371
5629
  );