@poncho-ai/cli 0.29.0 → 0.30.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -586,14 +586,10 @@ ${name}/
586
586
  ├── tests/
587
587
  │ └── basic.yaml # Test suite
588
588
  └── skills/
589
- ├── starter/
590
- │ ├── SKILL.md
591
- │ └── scripts/
592
- │ └── starter-echo.ts
593
- └── fetch-page/
589
+ └── starter/
594
590
  ├── SKILL.md
595
591
  └── scripts/
596
- └── fetch-page.ts
592
+ └── starter-echo.ts
597
593
  \`\`\`
598
594
 
599
595
  ## Cron Jobs
@@ -776,69 +772,6 @@ const SKILL_TOOL_TEMPLATE = `export default async function run(input) {
776
772
  }
777
773
  `;
778
774
 
779
- const FETCH_PAGE_SKILL_TEMPLATE = `---
780
- name: fetch-page
781
- description: Fetch a web page and return its text content
782
- allowed-tools:
783
- - ./scripts/fetch-page.ts
784
- ---
785
-
786
- # Fetch Page
787
-
788
- Fetches a URL and returns the page body as plain text (HTML tags stripped).
789
-
790
- ## Usage
791
-
792
- Call \`run_skill_script\` with:
793
- - **skill**: \`fetch-page\`
794
- - **script**: \`./scripts/fetch-page.ts\`
795
- - **input**: \`{ "url": "https://example.com" }\`
796
-
797
- The script returns \`{ url, status, content }\` where \`content\` is the
798
- text-only body (capped at ~32 000 chars to stay context-friendly).
799
- `;
800
-
801
- const FETCH_PAGE_SCRIPT_TEMPLATE = `export default async function run(input) {
802
- const url = typeof input?.url === "string" ? input.url.trim() : "";
803
- if (!url) {
804
- return { error: "A \\"url\\" string is required." };
805
- }
806
-
807
- const MAX_LENGTH = 32_000;
808
-
809
- const response = await fetch(url, {
810
- headers: { "User-Agent": "poncho-fetch-page/1.0" },
811
- redirect: "follow",
812
- });
813
-
814
- if (!response.ok) {
815
- return { url, status: response.status, error: response.statusText };
816
- }
817
-
818
- const html = await response.text();
819
-
820
- // Lightweight HTML-to-text: strip tags, collapse whitespace.
821
- const text = html
822
- .replace(/<script[\\s\\S]*?<\\/script>/gi, "")
823
- .replace(/<style[\\s\\S]*?<\\/style>/gi, "")
824
- .replace(/<[^>]+>/g, " ")
825
- .replace(/&nbsp;/gi, " ")
826
- .replace(/&amp;/gi, "&")
827
- .replace(/&lt;/gi, "<")
828
- .replace(/&gt;/gi, ">")
829
- .replace(/&quot;/gi, '"')
830
- .replace(/&#39;/gi, "'")
831
- .replace(/\\s+/g, " ")
832
- .trim();
833
-
834
- const content = text.length > MAX_LENGTH
835
- ? text.slice(0, MAX_LENGTH) + "… (truncated)"
836
- : text;
837
-
838
- return { url, status: response.status, content };
839
- }
840
- `;
841
-
842
775
  const ensureFile = async (path: string, content: string): Promise<void> => {
843
776
  await mkdir(dirname(path), { recursive: true });
844
777
  await writeFile(path, content, { encoding: "utf8", flag: "wx" });
@@ -1375,8 +1308,6 @@ export const initProject = async (
1375
1308
  { path: "tests/basic.yaml", content: TEST_TEMPLATE },
1376
1309
  { path: "skills/starter/SKILL.md", content: SKILL_TEMPLATE },
1377
1310
  { path: "skills/starter/scripts/starter-echo.ts", content: SKILL_TOOL_TEMPLATE },
1378
- { path: "skills/fetch-page/SKILL.md", content: FETCH_PAGE_SKILL_TEMPLATE },
1379
- { path: "skills/fetch-page/scripts/fetch-page.ts", content: FETCH_PAGE_SCRIPT_TEMPLATE },
1380
1311
  ];
1381
1312
  if (onboarding.envFile) {
1382
1313
  scaffoldFiles.push({ path: ".env", content: onboarding.envFile });
@@ -1460,6 +1391,11 @@ export type RequestHandler = ((
1460
1391
  _cronJobs?: Record<string, CronJobConfig>;
1461
1392
  _conversationStore?: ConversationStore;
1462
1393
  _messagingAdapters?: Map<string, MessagingAdapter>;
1394
+ _activeConversationRuns?: Map<string, { ownerId: string; abortController: AbortController; runId: string | null }>;
1395
+ _pendingCallbackNeeded?: Set<string>;
1396
+ _processSubagentCallback?: (conversationId: string, skipLockCheck?: boolean) => Promise<void>;
1397
+ _broadcastEvent?: (conversationId: string, event: AgentEvent) => void;
1398
+ _finishConversationStream?: (conversationId: string) => void;
1463
1399
  };
1464
1400
 
1465
1401
  export const createRequestHandler = async (options?: {
@@ -1642,6 +1578,11 @@ export const createRequestHandler = async (options?: {
1642
1578
  };
1643
1579
  const pendingSubagentApprovals = new Map<string, PendingSubagentApproval>();
1644
1580
 
1581
+ // Tracks approval decisions in memory so parallel batch requests don't
1582
+ // race against the conversation store (each file-store read returns a
1583
+ // separate copy, causing last-writer-wins when decisions overlap).
1584
+ const approvalDecisionTracker = new Map<string, Map<string, boolean>>();
1585
+
1645
1586
  const getSubagentDepth = async (conversationId: string): Promise<number> => {
1646
1587
  let depth = 0;
1647
1588
  let current = await conversationStore.get(conversationId);
@@ -1808,6 +1749,9 @@ export const createRequestHandler = async (options?: {
1808
1749
  if (currentTools.length > 0) {
1809
1750
  sections.push({ type: "tools", content: currentTools });
1810
1751
  currentTools = [];
1752
+ if (assistantResponse.length > 0 && !/\s$/.test(assistantResponse)) {
1753
+ assistantResponse += " ";
1754
+ }
1811
1755
  }
1812
1756
  assistantResponse += event.content;
1813
1757
  currentText += event.content;
@@ -1930,6 +1874,9 @@ export const createRequestHandler = async (options?: {
1930
1874
  if (currentTools.length > 0) {
1931
1875
  sections.push({ type: "tools", content: currentTools });
1932
1876
  currentTools = [];
1877
+ if (assistantResponse.length > 0 && !/\s$/.test(assistantResponse)) {
1878
+ assistantResponse += " ";
1879
+ }
1933
1880
  }
1934
1881
  assistantResponse += resumeEvent.content;
1935
1882
  currentText += resumeEvent.content;
@@ -2105,8 +2052,15 @@ export const createRequestHandler = async (options?: {
2105
2052
  // ---------------------------------------------------------------------------
2106
2053
  const MAX_SUBAGENT_CALLBACK_COUNT = 20;
2107
2054
 
2055
+ // Track conversations that received subagent results while a run was active.
2056
+ // processSubagentCallback's finally block checks this to reliably re-trigger
2057
+ // even if the store-level pendingSubagentResults was clobbered by a concurrent
2058
+ // read-modify-write.
2059
+ const pendingCallbackNeeded = new Set<string>();
2060
+
2108
2061
  const triggerParentCallback = async (parentConversationId: string): Promise<void> => {
2109
2062
  if (activeConversationRuns.has(parentConversationId)) {
2063
+ pendingCallbackNeeded.add(parentConversationId);
2110
2064
  return;
2111
2065
  }
2112
2066
  if (isServerless) {
@@ -2120,15 +2074,17 @@ export const createRequestHandler = async (options?: {
2120
2074
 
2121
2075
  const CALLBACK_LOCK_STALE_MS = 5 * 60 * 1000;
2122
2076
 
2123
- const processSubagentCallback = async (conversationId: string): Promise<void> => {
2077
+ const processSubagentCallback = async (conversationId: string, skipLockCheck = false): Promise<void> => {
2124
2078
  const conversation = await conversationStore.get(conversationId);
2125
2079
  if (!conversation) return;
2126
2080
 
2127
2081
  const pendingResults = conversation.pendingSubagentResults ?? [];
2128
2082
  if (pendingResults.length === 0) return;
2129
2083
 
2130
- // Store-based lock for serverless: skip if another invocation is processing
2131
- if (conversation.runningCallbackSince) {
2084
+ // Store-based lock for serverless: skip if another invocation is processing.
2085
+ // When re-triggered from a previous callback's finally block, skipLockCheck
2086
+ // is true because we know the previous callback has finished.
2087
+ if (!skipLockCheck && conversation.runningCallbackSince) {
2132
2088
  const elapsed = Date.now() - conversation.runningCallbackSince;
2133
2089
  if (elapsed < CALLBACK_LOCK_STALE_MS) {
2134
2090
  return;
@@ -2172,12 +2128,27 @@ export const createRequestHandler = async (options?: {
2172
2128
  abortController,
2173
2129
  runId: null,
2174
2130
  });
2131
+ // Reopen/reset the parent stream for this callback run so clients that stay
2132
+ // on the main conversation can subscribe to live callback events.
2133
+ const prevStream = conversationEventStreams.get(conversationId);
2134
+ if (prevStream) {
2135
+ prevStream.finished = false;
2136
+ prevStream.buffer = [];
2137
+ } else {
2138
+ conversationEventStreams.set(conversationId, {
2139
+ buffer: [],
2140
+ subscribers: new Set(),
2141
+ finished: false,
2142
+ });
2143
+ }
2175
2144
 
2176
2145
  const historyMessages = [...conversation.messages];
2177
2146
  let assistantResponse = "";
2178
2147
  let latestRunId = "";
2179
2148
  let runContinuation = false;
2180
2149
  let runContinuationMessages: Message[] | undefined;
2150
+ let runContextTokens = conversation.contextTokens ?? 0;
2151
+ let runContextWindow = conversation.contextWindow ?? 0;
2181
2152
  const toolTimeline: string[] = [];
2182
2153
  const sections: Array<{ type: "text" | "tools"; content: string | string[] }> = [];
2183
2154
  let currentTools: string[] = [];
@@ -2203,6 +2174,9 @@ export const createRequestHandler = async (options?: {
2203
2174
  if (currentTools.length > 0) {
2204
2175
  sections.push({ type: "tools", content: currentTools });
2205
2176
  currentTools = [];
2177
+ if (assistantResponse.length > 0 && !/\s$/.test(assistantResponse)) {
2178
+ assistantResponse += " ";
2179
+ }
2206
2180
  }
2207
2181
  assistantResponse += event.content;
2208
2182
  currentText += event.content;
@@ -2230,6 +2204,8 @@ export const createRequestHandler = async (options?: {
2230
2204
  if (assistantResponse.length === 0 && event.result.response) {
2231
2205
  assistantResponse = event.result.response;
2232
2206
  }
2207
+ runContextTokens = event.result.contextTokens ?? runContextTokens;
2208
+ runContextWindow = event.result.contextWindow ?? runContextWindow;
2233
2209
  if (event.result.continuation) {
2234
2210
  runContinuation = true;
2235
2211
  if (event.result.continuationMessages) {
@@ -2260,6 +2236,8 @@ export const createRequestHandler = async (options?: {
2260
2236
  }
2261
2237
  freshConv.runtimeRunId = latestRunId || freshConv.runtimeRunId;
2262
2238
  freshConv.runningCallbackSince = undefined;
2239
+ if (runContextTokens > 0) freshConv.contextTokens = runContextTokens;
2240
+ if (runContextWindow > 0) freshConv.contextWindow = runContextWindow;
2263
2241
  freshConv.updatedAt = Date.now();
2264
2242
  await conversationStore.update(freshConv);
2265
2243
 
@@ -2302,24 +2280,43 @@ export const createRequestHandler = async (options?: {
2302
2280
  activeConversationRuns.delete(conversationId);
2303
2281
  finishConversationStream(conversationId);
2304
2282
 
2283
+ // Check both the in-memory flag (always reliable) and the store.
2284
+ // We drain the flag first so a concurrent triggerParentCallback that
2285
+ // sets it right after our delete above is still caught on the next
2286
+ // iteration.
2287
+ const hadDeferredTrigger = pendingCallbackNeeded.delete(conversationId);
2305
2288
  const freshConv = await conversationStore.get(conversationId);
2306
- if (freshConv) {
2307
- if (freshConv.runningCallbackSince) {
2308
- freshConv.runningCallbackSince = undefined;
2309
- await conversationStore.update(freshConv);
2310
- }
2311
- }
2289
+ const hasPendingInStore = !!freshConv?.pendingSubagentResults?.length;
2312
2290
 
2313
- if (freshConv?.pendingSubagentResults?.length) {
2291
+ if (hadDeferredTrigger || hasPendingInStore) {
2292
+ // Re-trigger immediately. Skip the runningCallbackSince lock check
2293
+ // because we know this callback just finished. The re-triggered
2294
+ // callback will overwrite runningCallbackSince with its own timestamp.
2314
2295
  if (isServerless) {
2315
2296
  selfFetchWithRetry(`/api/internal/conversations/${encodeURIComponent(conversationId)}/subagent-callback`).catch(err =>
2316
2297
  console.error(`[poncho][subagent-callback] Recursive callback self-fetch failed:`, err instanceof Error ? err.message : err),
2317
2298
  );
2318
2299
  } else {
2319
- processSubagentCallback(conversationId).catch(err =>
2300
+ processSubagentCallback(conversationId, true).catch(err =>
2320
2301
  console.error(`[poncho][subagent-callback] Recursive callback failed:`, err instanceof Error ? err.message : err),
2321
2302
  );
2322
2303
  }
2304
+ } else if (freshConv?.runningCallbackSince) {
2305
+ // No re-trigger needed. Use the atomic clearCallbackLock to avoid
2306
+ // clobbering concurrent appendSubagentResult writes.
2307
+ const afterClear = await conversationStore.clearCallbackLock(conversationId);
2308
+ // Double-check: an append may have raced even the atomic clear
2309
+ if (afterClear?.pendingSubagentResults?.length) {
2310
+ if (isServerless) {
2311
+ selfFetchWithRetry(`/api/internal/conversations/${encodeURIComponent(conversationId)}/subagent-callback`).catch(err =>
2312
+ console.error(`[poncho][subagent-callback] Post-clear callback self-fetch failed:`, err instanceof Error ? err.message : err),
2313
+ );
2314
+ } else {
2315
+ processSubagentCallback(conversationId, true).catch(err =>
2316
+ console.error(`[poncho][subagent-callback] Post-clear callback failed:`, err instanceof Error ? err.message : err),
2317
+ );
2318
+ }
2319
+ }
2323
2320
  }
2324
2321
  }
2325
2322
  };
@@ -2521,19 +2518,14 @@ export const createRequestHandler = async (options?: {
2521
2518
  if (active && active.abortController === abortController) {
2522
2519
  active.runId = event.runId;
2523
2520
  }
2524
- if (typeof event.contextWindow === "number" && event.contextWindow > 0) {
2525
- runContextWindow = event.contextWindow;
2526
- }
2527
- }
2528
- if (event.type === "model:response") {
2529
- if (typeof event.usage?.input === "number") {
2530
- runContextTokens = event.usage.input;
2531
- }
2532
2521
  }
2533
2522
  if (event.type === "model:chunk") {
2534
2523
  if (currentTools.length > 0) {
2535
2524
  sections.push({ type: "tools", content: currentTools });
2536
2525
  currentTools = [];
2526
+ if (assistantResponse.length > 0 && !/\s$/.test(assistantResponse)) {
2527
+ assistantResponse += " ";
2528
+ }
2537
2529
  }
2538
2530
  assistantResponse += event.content;
2539
2531
  currentText += event.content;
@@ -2590,12 +2582,12 @@ export const createRequestHandler = async (options?: {
2590
2582
  }
2591
2583
  checkpointedRun = true;
2592
2584
  }
2593
- if (
2594
- event.type === "run:completed" &&
2595
- assistantResponse.length === 0 &&
2596
- event.result.response
2597
- ) {
2598
- assistantResponse = event.result.response;
2585
+ if (event.type === "run:completed") {
2586
+ if (assistantResponse.length === 0 && event.result.response) {
2587
+ assistantResponse = event.result.response;
2588
+ }
2589
+ runContextTokens = event.result.contextTokens ?? runContextTokens;
2590
+ runContextWindow = event.result.contextWindow ?? runContextWindow;
2599
2591
  }
2600
2592
  if (event.type === "run:error") {
2601
2593
  assistantResponse = assistantResponse || `[Error: ${event.error.message}]`;
@@ -2684,6 +2676,15 @@ export const createRequestHandler = async (options?: {
2684
2676
  runConversations.delete(latestRunId);
2685
2677
  }
2686
2678
  console.log("[resume-run] complete for", conversationId);
2679
+
2680
+ // Check for pending subagent results that arrived during the run
2681
+ const hadDeferred = pendingCallbackNeeded.delete(conversationId);
2682
+ const postConv = await conversationStore.get(conversationId);
2683
+ if (hadDeferred || postConv?.pendingSubagentResults?.length) {
2684
+ processSubagentCallback(conversationId, true).catch(err =>
2685
+ console.error(`[poncho][subagent-callback] Post-resume callback failed:`, err instanceof Error ? err.message : err),
2686
+ );
2687
+ }
2687
2688
  };
2688
2689
 
2689
2690
  // ---------------------------------------------------------------------------
@@ -2840,19 +2841,14 @@ export const createRequestHandler = async (options?: {
2840
2841
  latestRunId = event.runId;
2841
2842
  runOwners.set(event.runId, "local-owner");
2842
2843
  runConversations.set(event.runId, conversationId);
2843
- if (typeof event.contextWindow === "number" && event.contextWindow > 0) {
2844
- runContextWindow = event.contextWindow;
2845
- }
2846
- }
2847
- if (event.type === "model:response") {
2848
- if (typeof event.usage?.input === "number") {
2849
- runContextTokens = event.usage.input;
2850
- }
2851
2844
  }
2852
2845
  if (event.type === "model:chunk") {
2853
2846
  if (currentTools.length > 0) {
2854
2847
  sections.push({ type: "tools", content: currentTools });
2855
2848
  currentTools = [];
2849
+ if (assistantResponse.length > 0 && !/\s$/.test(assistantResponse)) {
2850
+ assistantResponse += " ";
2851
+ }
2856
2852
  }
2857
2853
  assistantResponse += event.content;
2858
2854
  currentText += event.content;
@@ -2946,6 +2942,8 @@ export const createRequestHandler = async (options?: {
2946
2942
  }
2947
2943
  runSteps = event.result.steps;
2948
2944
  if (typeof event.result.maxSteps === "number") runMaxSteps = event.result.maxSteps;
2945
+ runContextTokens = event.result.contextTokens ?? runContextTokens;
2946
+ runContextWindow = event.result.contextWindow ?? runContextWindow;
2949
2947
  }
2950
2948
  if (event.type === "run:error") {
2951
2949
  assistantResponse = assistantResponse || `[Error: ${event.error.message}]`;
@@ -3867,7 +3865,15 @@ export const createRequestHandler = async (options?: {
3867
3865
  return;
3868
3866
  }
3869
3867
 
3870
- // Record the decision on this approval entry
3868
+ // Track decision in memory so parallel batch requests see a consistent
3869
+ // view (file-store reads return independent copies, causing lost updates).
3870
+ let batchDecisions = approvalDecisionTracker.get(conversationId);
3871
+ if (!batchDecisions) {
3872
+ batchDecisions = new Map();
3873
+ approvalDecisionTracker.set(conversationId, batchDecisions);
3874
+ }
3875
+ batchDecisions.set(approvalId, approved);
3876
+
3871
3877
  foundApproval.decision = approved ? "approved" : "denied";
3872
3878
 
3873
3879
  broadcastEvent(conversationId,
@@ -3877,16 +3883,26 @@ export const createRequestHandler = async (options?: {
3877
3883
  );
3878
3884
 
3879
3885
  const allApprovals = foundConversation.pendingApprovals ?? [];
3880
- const allDecided = allApprovals.length > 0 && allApprovals.every(a => a.decision != null);
3886
+ const allDecided = allApprovals.length > 0 &&
3887
+ allApprovals.every(a => batchDecisions!.has(a.approvalId));
3881
3888
 
3882
3889
  if (!allDecided) {
3883
- // Still waiting for more decisions — persist and respond
3890
+ // Still waiting for more decisions — persist best-effort and respond.
3891
+ // The write may be overwritten by a concurrent request, but that's
3892
+ // fine: the in-memory tracker is the source of truth for completion.
3884
3893
  await conversationStore.update(foundConversation);
3885
3894
  writeJson(response, 200, { ok: true, approvalId, approved, batchComplete: false });
3886
3895
  return;
3887
3896
  }
3888
3897
 
3889
- // All approvals in the batch are decided — execute and resume
3898
+ // All approvals in the batch are decided — apply tracked decisions,
3899
+ // execute approved tools, and resume the run.
3900
+ for (const a of allApprovals) {
3901
+ const d = batchDecisions.get(a.approvalId);
3902
+ if (d != null) a.decision = d ? "approved" : "denied";
3903
+ }
3904
+ approvalDecisionTracker.delete(conversationId);
3905
+
3890
3906
  foundConversation.pendingApprovals = [];
3891
3907
  foundConversation.runStatus = "running";
3892
3908
  await conversationStore.update(foundConversation);
@@ -4571,14 +4587,6 @@ export const createRequestHandler = async (options?: {
4571
4587
  if (active && active.abortController === abortController) {
4572
4588
  active.runId = event.runId;
4573
4589
  }
4574
- if (typeof event.contextWindow === "number" && event.contextWindow > 0) {
4575
- runContextWindow = event.contextWindow;
4576
- }
4577
- }
4578
- if (event.type === "model:response") {
4579
- if (typeof event.usage?.input === "number") {
4580
- runContextTokens = event.usage.input;
4581
- }
4582
4590
  }
4583
4591
  if (event.type === "run:cancelled") {
4584
4592
  runCancelled = true;
@@ -4587,6 +4595,9 @@ export const createRequestHandler = async (options?: {
4587
4595
  if (currentTools.length > 0) {
4588
4596
  sections.push({ type: "tools", content: currentTools });
4589
4597
  currentTools = [];
4598
+ if (assistantResponse.length > 0 && !/\s$/.test(assistantResponse)) {
4599
+ assistantResponse += " ";
4600
+ }
4590
4601
  }
4591
4602
  assistantResponse += event.content;
4592
4603
  currentText += event.content;
@@ -4674,6 +4685,8 @@ export const createRequestHandler = async (options?: {
4674
4685
  if (assistantResponse.length === 0 && event.result.response) {
4675
4686
  assistantResponse = event.result.response;
4676
4687
  }
4688
+ runContextTokens = event.result.contextTokens ?? runContextTokens;
4689
+ runContextWindow = event.result.contextWindow ?? runContextWindow;
4677
4690
  if (event.result.continuation && event.result.continuationMessages) {
4678
4691
  runContinuationMessages = event.result.continuationMessages;
4679
4692
  conversation._continuationMessages = runContinuationMessages;
@@ -4834,9 +4847,10 @@ export const createRequestHandler = async (options?: {
4834
4847
  // Already closed.
4835
4848
  }
4836
4849
  // Check for pending subagent results that arrived during the run
4850
+ const hadDeferred = pendingCallbackNeeded.delete(conversationId);
4837
4851
  const freshConv = await conversationStore.get(conversationId);
4838
- if (freshConv?.pendingSubagentResults?.length) {
4839
- processSubagentCallback(conversationId).catch(err =>
4852
+ if (hadDeferred || freshConv?.pendingSubagentResults?.length) {
4853
+ processSubagentCallback(conversationId, true).catch(err =>
4840
4854
  console.error(`[poncho][subagent-callback] Post-run callback failed:`, err instanceof Error ? err.message : err),
4841
4855
  );
4842
4856
  }
@@ -4999,6 +5013,14 @@ export const createRequestHandler = async (options?: {
4999
5013
  );
5000
5014
  }
5001
5015
 
5016
+ const convId = conversation.conversationId;
5017
+ activeConversationRuns.set(convId, {
5018
+ ownerId: conversation.ownerId,
5019
+ abortController: new AbortController(),
5020
+ runId: null,
5021
+ });
5022
+
5023
+ try {
5002
5024
  const abortController = new AbortController();
5003
5025
  let assistantResponse = "";
5004
5026
  let latestRunId = "";
@@ -5006,7 +5028,7 @@ export const createRequestHandler = async (options?: {
5006
5028
  const sections: Array<{ type: "text" | "tools"; content: string | string[] }> = [];
5007
5029
  let currentTools: string[] = [];
5008
5030
  let currentText = "";
5009
- let runResult: { status: string; steps: number; continuation?: boolean } = {
5031
+ let runResult: { status: string; steps: number; continuation?: boolean; contextTokens?: number; contextWindow?: number } = {
5010
5032
  status: "completed",
5011
5033
  steps: 0,
5012
5034
  };
@@ -5018,8 +5040,8 @@ export const createRequestHandler = async (options?: {
5018
5040
 
5019
5041
  for await (const event of harness.runWithTelemetry({
5020
5042
  task: cronJob.task,
5021
- conversationId: conversation.conversationId,
5022
- parameters: { __activeConversationId: conversation.conversationId },
5043
+ conversationId: convId,
5044
+ parameters: { __activeConversationId: convId },
5023
5045
  messages: historyMessages,
5024
5046
  abortSignal: abortController.signal,
5025
5047
  })) {
@@ -5030,6 +5052,9 @@ export const createRequestHandler = async (options?: {
5030
5052
  if (currentTools.length > 0) {
5031
5053
  sections.push({ type: "tools", content: currentTools });
5032
5054
  currentTools = [];
5055
+ if (assistantResponse.length > 0 && !/\s$/.test(assistantResponse)) {
5056
+ assistantResponse += " ";
5057
+ }
5033
5058
  }
5034
5059
  assistantResponse += event.content;
5035
5060
  currentText += event.content;
@@ -5058,14 +5083,19 @@ export const createRequestHandler = async (options?: {
5058
5083
  status: event.result.status,
5059
5084
  steps: event.result.steps,
5060
5085
  continuation: event.result.continuation,
5086
+ contextTokens: event.result.contextTokens,
5087
+ contextWindow: event.result.contextWindow,
5061
5088
  };
5062
5089
  if (!assistantResponse && event.result.response) {
5063
5090
  assistantResponse = event.result.response;
5064
5091
  }
5065
5092
  }
5093
+ broadcastEvent(convId, event);
5066
5094
  await telemetry.emit(event);
5067
5095
  }
5068
5096
 
5097
+ finishConversationStream(convId);
5098
+
5069
5099
  if (currentTools.length > 0) {
5070
5100
  sections.push({ type: "tools", content: currentTools });
5071
5101
  }
@@ -5074,7 +5104,8 @@ export const createRequestHandler = async (options?: {
5074
5104
  currentText = "";
5075
5105
  }
5076
5106
 
5077
- // Persist the conversation
5107
+ // Persist the conversation — read fresh state to avoid clobbering
5108
+ // pendingSubagentResults appended during the run.
5078
5109
  const hasContent = assistantResponse.length > 0 || toolTimeline.length > 0;
5079
5110
  const assistantMetadata =
5080
5111
  toolTimeline.length > 0 || sections.length > 0
@@ -5092,14 +5123,19 @@ export const createRequestHandler = async (options?: {
5092
5123
  ? [{ role: "assistant" as const, content: assistantResponse, metadata: assistantMetadata }]
5093
5124
  : []),
5094
5125
  ];
5095
- conversation.messages = messages;
5096
- conversation.runtimeRunId = latestRunId || conversation.runtimeRunId;
5097
- conversation.updatedAt = Date.now();
5098
- await conversationStore.update(conversation);
5126
+ const freshConv = await conversationStore.get(convId);
5127
+ if (freshConv) {
5128
+ freshConv.messages = messages;
5129
+ freshConv.runtimeRunId = latestRunId || freshConv.runtimeRunId;
5130
+ if (runResult.contextTokens) freshConv.contextTokens = runResult.contextTokens;
5131
+ if (runResult.contextWindow) freshConv.contextWindow = runResult.contextWindow;
5132
+ freshConv.updatedAt = Date.now();
5133
+ await conversationStore.update(freshConv);
5134
+ }
5099
5135
 
5100
5136
  // Self-continuation for serverless timeouts
5101
5137
  if (runResult.continuation && softDeadlineMs > 0) {
5102
- const selfUrl = `http://${request.headers.host ?? "localhost"}${pathname}?continue=${encodeURIComponent(conversation.conversationId)}&continuation=${continuationCount + 1}`;
5138
+ const selfUrl = `http://${request.headers.host ?? "localhost"}${pathname}?continue=${encodeURIComponent(convId)}&continuation=${continuationCount + 1}`;
5103
5139
  try {
5104
5140
  const selfRes = await fetch(selfUrl, {
5105
5141
  method: "GET",
@@ -5109,7 +5145,7 @@ export const createRequestHandler = async (options?: {
5109
5145
  });
5110
5146
  const selfBody = await selfRes.json() as Record<string, unknown>;
5111
5147
  writeJson(response, 200, {
5112
- conversationId: conversation.conversationId,
5148
+ conversationId: convId,
5113
5149
  status: "continued",
5114
5150
  continuations: continuationCount + 1,
5115
5151
  finalResult: selfBody,
@@ -5117,7 +5153,7 @@ export const createRequestHandler = async (options?: {
5117
5153
  });
5118
5154
  } catch (continueError) {
5119
5155
  writeJson(response, 200, {
5120
- conversationId: conversation.conversationId,
5156
+ conversationId: convId,
5121
5157
  status: "continuation_failed",
5122
5158
  error: continueError instanceof Error ? continueError.message : "Unknown error",
5123
5159
  duration: Date.now() - start,
@@ -5128,12 +5164,28 @@ export const createRequestHandler = async (options?: {
5128
5164
  }
5129
5165
 
5130
5166
  writeJson(response, 200, {
5131
- conversationId: conversation.conversationId,
5167
+ conversationId: convId,
5132
5168
  status: runResult.status,
5133
5169
  response: assistantResponse.slice(0, 500),
5134
5170
  duration: Date.now() - start,
5135
5171
  steps: runResult.steps,
5136
5172
  });
5173
+ } finally {
5174
+ activeConversationRuns.delete(convId);
5175
+ const hadDeferred = pendingCallbackNeeded.delete(convId);
5176
+ const checkConv = await conversationStore.get(convId);
5177
+ if (hadDeferred || checkConv?.pendingSubagentResults?.length) {
5178
+ if (isServerless) {
5179
+ selfFetchWithRetry(`/api/internal/conversations/${encodeURIComponent(convId)}/subagent-callback`).catch(err =>
5180
+ console.error(`[cron] subagent callback self-fetch failed:`, err instanceof Error ? err.message : err),
5181
+ );
5182
+ } else {
5183
+ processSubagentCallback(convId, true).catch(err =>
5184
+ console.error(`[cron] subagent callback failed:`, err instanceof Error ? err.message : err),
5185
+ );
5186
+ }
5187
+ }
5188
+ }
5137
5189
  } catch (error) {
5138
5190
  writeJson(response, 500, {
5139
5191
  code: "CRON_RUN_ERROR",
@@ -5149,6 +5201,11 @@ export const createRequestHandler = async (options?: {
5149
5201
  handler._cronJobs = cronJobs;
5150
5202
  handler._conversationStore = conversationStore;
5151
5203
  handler._messagingAdapters = messagingAdapters;
5204
+ handler._activeConversationRuns = activeConversationRuns;
5205
+ handler._pendingCallbackNeeded = pendingCallbackNeeded;
5206
+ handler._processSubagentCallback = processSubagentCallback;
5207
+ handler._broadcastEvent = broadcastEvent;
5208
+ handler._finishConversationStream = finishConversationStream;
5152
5209
 
5153
5210
  // Recover stale subagent runs that were "running" when the server last stopped
5154
5211
  // or that have been inactive longer than the staleness threshold.
@@ -5217,6 +5274,8 @@ export const startDevServer = async (
5217
5274
  steps: number;
5218
5275
  assistantMetadata?: Message["metadata"];
5219
5276
  hasContent: boolean;
5277
+ contextTokens: number;
5278
+ contextWindow: number;
5220
5279
  };
5221
5280
 
5222
5281
  const runCronAgent = async (
@@ -5224,9 +5283,12 @@ export const startDevServer = async (
5224
5283
  task: string,
5225
5284
  conversationId: string,
5226
5285
  historyMessages: Message[],
5286
+ onEvent?: (event: AgentEvent) => void,
5227
5287
  ): Promise<CronRunResult> => {
5228
5288
  let assistantResponse = "";
5229
5289
  let steps = 0;
5290
+ let contextTokens = 0;
5291
+ let contextWindow = 0;
5230
5292
  const toolTimeline: string[] = [];
5231
5293
  const sections: Array<{ type: "text" | "tools"; content: string | string[] }> = [];
5232
5294
  let currentTools: string[] = [];
@@ -5237,10 +5299,14 @@ export const startDevServer = async (
5237
5299
  parameters: { __activeConversationId: conversationId },
5238
5300
  messages: historyMessages,
5239
5301
  })) {
5302
+ onEvent?.(event);
5240
5303
  if (event.type === "model:chunk") {
5241
5304
  if (currentTools.length > 0) {
5242
5305
  sections.push({ type: "tools", content: currentTools });
5243
5306
  currentTools = [];
5307
+ if (assistantResponse.length > 0 && !/\s$/.test(assistantResponse)) {
5308
+ assistantResponse += " ";
5309
+ }
5244
5310
  }
5245
5311
  assistantResponse += event.content;
5246
5312
  currentText += event.content;
@@ -5266,6 +5332,8 @@ export const startDevServer = async (
5266
5332
  }
5267
5333
  if (event.type === "run:completed") {
5268
5334
  steps = event.result.steps;
5335
+ contextTokens = event.result.contextTokens ?? 0;
5336
+ contextWindow = event.result.contextWindow ?? 0;
5269
5337
  if (!assistantResponse && event.result.response) {
5270
5338
  assistantResponse = event.result.response;
5271
5339
  }
@@ -5285,7 +5353,7 @@ export const startDevServer = async (
5285
5353
  sections: sections.length > 0 ? sections : undefined,
5286
5354
  } as Message["metadata"])
5287
5355
  : undefined;
5288
- return { response: assistantResponse, steps, assistantMetadata, hasContent };
5356
+ return { response: assistantResponse, steps, assistantMetadata, hasContent, contextTokens, contextWindow };
5289
5357
  };
5290
5358
 
5291
5359
  const buildCronMessages = (
@@ -5312,6 +5380,9 @@ export const startDevServer = async (
5312
5380
  const harnessRef = handler._harness;
5313
5381
  const store = handler._conversationStore;
5314
5382
  const adapters = handler._messagingAdapters;
5383
+ const activeRuns = handler._activeConversationRuns;
5384
+ const deferredCallbacks = handler._pendingCallbackNeeded;
5385
+ const runCallback = handler._processSubagentCallback;
5315
5386
  if (!harnessRef || !store) return;
5316
5387
 
5317
5388
  for (const [jobName, config] of entries) {
@@ -5353,32 +5424,56 @@ export const startDevServer = async (
5353
5424
 
5354
5425
  const task = `[Scheduled: ${jobName}]\n${config.task}`;
5355
5426
  const historyMessages = [...conversation.messages];
5427
+ const convId = conversation.conversationId;
5356
5428
 
5429
+ activeRuns?.set(convId, {
5430
+ ownerId: "local-owner",
5431
+ abortController: new AbortController(),
5432
+ runId: null,
5433
+ });
5357
5434
  try {
5358
- const result = await runCronAgent(harnessRef, task, conversation.conversationId, historyMessages);
5359
-
5360
- conversation.messages = buildCronMessages(task, historyMessages, result);
5361
- conversation.updatedAt = Date.now();
5362
- await store.update(conversation);
5363
-
5364
- if (result.response) {
5365
- try {
5366
- await adapter.sendReply(
5367
- {
5368
- channelId: chatId,
5369
- platformThreadId: conversation.channelMeta?.platformThreadId ?? chatId,
5370
- },
5371
- result.response,
5372
- );
5373
- } catch (sendError) {
5374
- const sendMsg = sendError instanceof Error ? sendError.message : String(sendError);
5375
- process.stderr.write(`[cron] ${jobName}: send to ${chatId} failed: ${sendMsg}\n`);
5435
+ const broadcastCh = handler._broadcastEvent;
5436
+ const result = await runCronAgent(harnessRef, task, convId, historyMessages,
5437
+ broadcastCh ? (ev) => broadcastCh(convId, ev) : undefined,
5438
+ );
5439
+ handler._finishConversationStream?.(convId);
5440
+
5441
+ const freshConv = await store.get(convId);
5442
+ if (freshConv) {
5443
+ freshConv.messages = buildCronMessages(task, historyMessages, result);
5444
+ if (result.contextTokens > 0) freshConv.contextTokens = result.contextTokens;
5445
+ if (result.contextWindow > 0) freshConv.contextWindow = result.contextWindow;
5446
+ freshConv.updatedAt = Date.now();
5447
+ await store.update(freshConv);
5448
+
5449
+ if (result.response) {
5450
+ try {
5451
+ await adapter.sendReply(
5452
+ {
5453
+ channelId: chatId,
5454
+ platformThreadId: freshConv.channelMeta?.platformThreadId ?? chatId,
5455
+ },
5456
+ result.response,
5457
+ );
5458
+ } catch (sendError) {
5459
+ const sendMsg = sendError instanceof Error ? sendError.message : String(sendError);
5460
+ process.stderr.write(`[cron] ${jobName}: send to ${chatId} failed: ${sendMsg}\n`);
5461
+ }
5376
5462
  }
5377
5463
  }
5378
5464
  totalChats++;
5379
5465
  } catch (runError) {
5380
5466
  const runMsg = runError instanceof Error ? runError.message : String(runError);
5381
5467
  process.stderr.write(`[cron] ${jobName}: run for chat ${chatId} failed: ${runMsg}\n`);
5468
+ } finally {
5469
+ activeRuns?.delete(convId);
5470
+ const hadDeferred = deferredCallbacks?.delete(convId) ?? false;
5471
+ const checkConv = await store.get(convId);
5472
+ if (hadDeferred || checkConv?.pendingSubagentResults?.length) {
5473
+ runCallback?.(convId, true).catch((err: unknown) =>
5474
+ console.error(`[cron] ${jobName}: subagent callback for ${chatId} failed:`, err instanceof Error ? err.message : err),
5475
+ );
5476
+ }
5382
5477
  }
5383
5478
  }
5384
5479
 
@@ -5392,15 +5487,31 @@ export const startDevServer = async (
5392
5487
  return;
5393
5488
  }
5394
5489
 
5490
+ let cronConvId: string | undefined;
5395
5491
  try {
5396
5492
  const conversation = await store.create(
5397
5493
  "local-owner",
5398
5494
  `[cron] ${jobName} ${timestamp}`,
5399
5495
  );
5400
- const result = await runCronAgent(harnessRef, config.task, conversation.conversationId, []);
5401
- conversation.messages = buildCronMessages(config.task, [], result);
5402
- conversation.updatedAt = Date.now();
5403
- await store.update(conversation);
5496
+ cronConvId = conversation.conversationId;
5497
+ activeRuns?.set(cronConvId, {
5498
+ ownerId: "local-owner",
5499
+ abortController: new AbortController(),
5500
+ runId: null,
5501
+ });
5502
+ const broadcast = handler._broadcastEvent;
5503
+ const result = await runCronAgent(harnessRef, config.task, cronConvId, [],
5504
+ broadcast ? (ev) => broadcast(cronConvId!, ev) : undefined,
5505
+ );
5506
+ handler._finishConversationStream?.(cronConvId);
5507
+ const freshConv = await store.get(cronConvId);
5508
+ if (freshConv) {
5509
+ freshConv.messages = buildCronMessages(config.task, [], result);
5510
+ if (result.contextTokens > 0) freshConv.contextTokens = result.contextTokens;
5511
+ if (result.contextWindow > 0) freshConv.contextWindow = result.contextWindow;
5512
+ freshConv.updatedAt = Date.now();
5513
+ await store.update(freshConv);
5514
+ }
5404
5515
  const elapsed = ((Date.now() - start) / 1000).toFixed(1);
5405
5516
  process.stdout.write(
5406
5517
  `[cron] ${jobName} completed in ${elapsed}s (${result.steps} steps)\n`,
@@ -5411,6 +5522,17 @@ export const startDevServer = async (
5411
5522
  process.stderr.write(
5412
5523
  `[cron] ${jobName} failed after ${elapsed}s: ${msg}\n`,
5413
5524
  );
5525
+ } finally {
5526
+ if (cronConvId) {
5527
+ activeRuns?.delete(cronConvId);
5528
+ const hadDeferred = deferredCallbacks?.delete(cronConvId) ?? false;
5529
+ const checkConv = await store.get(cronConvId);
5530
+ if (hadDeferred || checkConv?.pendingSubagentResults?.length) {
5531
+ runCallback?.(cronConvId, true).catch((err: unknown) =>
5532
+ console.error(`[cron] ${jobName}: subagent callback failed:`, err instanceof Error ? err.message : err),
5533
+ );
5534
+ }
5535
+ }
5414
5536
  }
5415
5537
  },
5416
5538
  );