@poncho-ai/harness 0.28.0 → 0.28.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- > @poncho-ai/harness@0.28.0 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
2
+ > @poncho-ai/harness@0.28.2 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
3
3
  > node scripts/embed-docs.js && tsup src/index.ts --format esm --dts
4
4
 
5
5
  [embed-docs] Generated poncho-docs.ts with 4 topics
@@ -8,8 +8,8 @@
8
8
  CLI tsup v8.5.1
9
9
  CLI Target: es2022
10
10
  ESM Build start
11
- ESM dist/index.js 284.05 KB
12
- ESM ⚡️ Build success in 147ms
11
+ ESM dist/index.js 289.62 KB
12
+ ESM ⚡️ Build success in 213ms
13
13
  DTS Build start
14
- DTS ⚡️ Build success in 7926ms
15
- DTS dist/index.d.ts 29.26 KB
14
+ DTS ⚡️ Build success in 7196ms
15
+ DTS dist/index.d.ts 29.62 KB
package/CHANGELOG.md CHANGED
@@ -1,5 +1,28 @@
1
1
  # @poncho-ai/harness
2
2
 
3
+ ## 0.28.2
4
+
5
+ ### Patch Changes
6
+
7
+ - [`98df42f`](https://github.com/cesr/poncho-ai/commit/98df42f79e0a376d0a864598557758bfa644039d) Thanks [@cesr](https://github.com/cesr)! - Fix serverless subagent and continuation reliability
8
+ - Use stable internal secret across serverless instances for callback auth
9
+ - Wrap continuation self-fetches in waitUntil to survive function shutdown
10
+ - Set runStatus during callback re-runs so clients detect active processing
11
+ - Add post-streaming soft deadline check to catch long model responses
12
+ - Client auto-recovers from abrupt stream termination and orphaned continuations
13
+ - Fix callback continuation losing \_continuationMessages when no pending results
14
+
15
+ ## 0.28.1
16
+
17
+ ### Patch Changes
18
+
19
+ - [`4d50ad9`](https://github.com/cesr/poncho-ai/commit/4d50ad970886c9d3635ec36a407514c91ce6a71a) Thanks [@cesr](https://github.com/cesr)! - Improve callback-run reliability and streaming across subagent workflows, including safer concurrent approval handling and parent callback retriggers.
20
+
21
+ Add context window/token reporting through run completion events, improve cron/web UI rendering and approval streaming behavior, and harden built-in web search retry/throttle behavior.
22
+
23
+ - Updated dependencies [[`4d50ad9`](https://github.com/cesr/poncho-ai/commit/4d50ad970886c9d3635ec36a407514c91ce6a71a)]:
24
+ - @poncho-ai/sdk@1.6.1
25
+
3
26
  ## 0.28.0
4
27
 
5
28
  ### Minor Changes
package/dist/index.d.ts CHANGED
@@ -188,6 +188,12 @@ interface ConversationStore {
188
188
  rename(conversationId: string, title: string): Promise<Conversation | undefined>;
189
189
  delete(conversationId: string): Promise<boolean>;
190
190
  appendSubagentResult(conversationId: string, result: PendingSubagentResult): Promise<void>;
191
+ /**
192
+ * Atomically clear `runningCallbackSince` without clobbering other fields.
193
+ * Returns the conversation as it exists after the clear (with current
194
+ * `pendingSubagentResults`).
195
+ */
196
+ clearCallbackLock(conversationId: string): Promise<Conversation | undefined>;
191
197
  }
192
198
  type StateProviderName = "local" | "memory" | "redis" | "upstash" | "dynamodb";
193
199
  interface StateConfig {
@@ -221,6 +227,7 @@ declare class InMemoryConversationStore implements ConversationStore {
221
227
  rename(conversationId: string, title: string): Promise<Conversation | undefined>;
222
228
  delete(conversationId: string): Promise<boolean>;
223
229
  appendSubagentResult(conversationId: string, result: PendingSubagentResult): Promise<void>;
230
+ clearCallbackLock(conversationId: string): Promise<Conversation | undefined>;
224
231
  }
225
232
  type ConversationSummary = {
226
233
  conversationId: string;
package/dist/index.js CHANGED
@@ -1604,6 +1604,8 @@ Remote storage keys are namespaced and versioned, for example \`poncho:v1:<agent
1604
1604
  | \`ANTHROPIC_API_KEY\` | Yes* | Claude API key |
1605
1605
  | \`OPENAI_API_KEY\` | No | OpenAI API key (if using OpenAI) |
1606
1606
  | \`PONCHO_AUTH_TOKEN\` | No | Unified auth token (Web UI passphrase + API Bearer token) |
1607
+ | \`PONCHO_INTERNAL_SECRET\` | No | Shared secret used by internal serverless callbacks (recommended for Vercel/Lambda) |
1608
+ | \`PONCHO_SELF_BASE_URL\` | No | Explicit base URL for internal self-callbacks when auto-detection is unavailable |
1607
1609
  | \`OTEL_EXPORTER_OTLP_ENDPOINT\` | No | Telemetry destination |
1608
1610
  | \`LATITUDE_API_KEY\` | No | Latitude dashboard integration |
1609
1611
  | \`LATITUDE_PROJECT_ID\` | No | Latitude project identifier for capture traces |
@@ -4211,15 +4213,16 @@ var loadRunnableScriptFunction = async (scriptPath) => {
4211
4213
  };
4212
4214
  var loadScriptModule = async (scriptPath) => {
4213
4215
  const extension = extname(scriptPath).toLowerCase();
4216
+ const cacheBust = `?t=${Date.now()}`;
4214
4217
  if (extension === ".ts" || extension === ".mts" || extension === ".cts") {
4215
- const jiti = createJiti2(import.meta.url, { interopDefault: true });
4216
- return await jiti.import(scriptPath);
4218
+ const jiti = createJiti2(import.meta.url, { interopDefault: true, moduleCache: false });
4219
+ return await jiti.import(scriptPath + cacheBust);
4217
4220
  }
4218
4221
  try {
4219
- return await import(pathToFileURL(scriptPath).href);
4222
+ return await import(pathToFileURL(scriptPath).href + cacheBust);
4220
4223
  } catch {
4221
- const jiti = createJiti2(import.meta.url, { interopDefault: true });
4222
- return await jiti.import(scriptPath);
4224
+ const jiti = createJiti2(import.meta.url, { interopDefault: true, moduleCache: false });
4225
+ return await jiti.import(scriptPath + cacheBust);
4223
4226
  }
4224
4227
  };
4225
4228
  var extractRunnableFunction = (value) => {
@@ -4263,21 +4266,93 @@ import { load as cheerioLoad } from "cheerio";
4263
4266
  import { defineTool as defineTool5 } from "@poncho-ai/sdk";
4264
4267
  var SEARCH_UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36";
4265
4268
  var FETCH_TIMEOUT_MS = 15e3;
4269
+ var SEARCH_MAX_RETRIES = 4;
4270
+ var SEARCH_INITIAL_DELAY_MS = 2e3;
4271
+ var SEARCH_MIN_INTERVAL_MS = 4e3;
4272
+ var SEARCH_FALLBACK_COOLDOWN_MS = 12e3;
4273
+ var sleep = (ms) => new Promise((r) => setTimeout(r, ms));
4274
+ var searchQueue = Promise.resolve();
4275
+ var nextSearchAllowedAt = 0;
4276
+ function parseRetryAfterMs(retryAfterHeader) {
4277
+ if (!retryAfterHeader) return SEARCH_FALLBACK_COOLDOWN_MS;
4278
+ const asSeconds = Number(retryAfterHeader);
4279
+ if (Number.isFinite(asSeconds) && asSeconds >= 0) {
4280
+ return Math.max(Math.floor(asSeconds * 1e3), SEARCH_MIN_INTERVAL_MS);
4281
+ }
4282
+ const asDate = new Date(retryAfterHeader).getTime();
4283
+ if (Number.isFinite(asDate)) {
4284
+ return Math.max(asDate - Date.now(), SEARCH_MIN_INTERVAL_MS);
4285
+ }
4286
+ return SEARCH_FALLBACK_COOLDOWN_MS;
4287
+ }
4288
+ function applyRateLimitCooldown(retryAfterHeader) {
4289
+ const cooldownMs = parseRetryAfterMs(retryAfterHeader);
4290
+ nextSearchAllowedAt = Math.max(nextSearchAllowedAt, Date.now() + cooldownMs);
4291
+ }
4292
+ async function runWithSearchThrottle(fn) {
4293
+ const previous = searchQueue;
4294
+ let release;
4295
+ searchQueue = new Promise((resolve12) => {
4296
+ release = resolve12;
4297
+ });
4298
+ await previous.catch(() => {
4299
+ });
4300
+ try {
4301
+ const waitMs = nextSearchAllowedAt - Date.now();
4302
+ if (waitMs > 0) {
4303
+ await sleep(waitMs);
4304
+ }
4305
+ const result = await fn();
4306
+ nextSearchAllowedAt = Math.max(nextSearchAllowedAt, Date.now() + SEARCH_MIN_INTERVAL_MS);
4307
+ return result;
4308
+ } finally {
4309
+ release?.();
4310
+ }
4311
+ }
4312
+ function isRetryableStatus(status) {
4313
+ return status === 429 || status === 503 || status >= 500;
4314
+ }
4266
4315
  async function braveSearch(query, maxResults) {
4267
4316
  const url = `https://search.brave.com/search?q=${encodeURIComponent(query)}`;
4268
- const res = await fetch(url, {
4269
- headers: {
4270
- "User-Agent": SEARCH_UA,
4271
- Accept: "text/html,application/xhtml+xml",
4272
- "Accept-Language": "en-US,en;q=0.9"
4273
- },
4274
- signal: AbortSignal.timeout(FETCH_TIMEOUT_MS)
4275
- });
4276
- if (!res.ok) {
4277
- throw new Error(`Search request failed (${res.status} ${res.statusText})`);
4317
+ let lastError;
4318
+ for (let attempt = 0; attempt < SEARCH_MAX_RETRIES; attempt++) {
4319
+ if (attempt > 0) {
4320
+ const delay = SEARCH_INITIAL_DELAY_MS * 2 ** (attempt - 1) + Math.floor(Math.random() * 500);
4321
+ await sleep(delay);
4322
+ }
4323
+ try {
4324
+ const html = await runWithSearchThrottle(async () => {
4325
+ const res = await fetch(url, {
4326
+ headers: {
4327
+ "User-Agent": SEARCH_UA,
4328
+ Accept: "text/html,application/xhtml+xml",
4329
+ "Accept-Language": "en-US,en;q=0.9"
4330
+ },
4331
+ signal: AbortSignal.timeout(FETCH_TIMEOUT_MS)
4332
+ });
4333
+ if (!res.ok) {
4334
+ if (res.status === 429) {
4335
+ applyRateLimitCooldown(res.headers.get("retry-after"));
4336
+ }
4337
+ const error = new Error(`Search request failed (${res.status} ${res.statusText})`);
4338
+ if (isRetryableStatus(res.status)) {
4339
+ throw error;
4340
+ }
4341
+ error.retryable = false;
4342
+ throw error;
4343
+ }
4344
+ return await res.text();
4345
+ });
4346
+ return parseBraveResults(html, maxResults);
4347
+ } catch (err) {
4348
+ lastError = err instanceof Error ? err : new Error(String(err));
4349
+ if (lastError.retryable === false) {
4350
+ throw lastError;
4351
+ }
4352
+ if (attempt < SEARCH_MAX_RETRIES - 1) continue;
4353
+ }
4278
4354
  }
4279
- const html = await res.text();
4280
- return parseBraveResults(html, maxResults);
4355
+ throw lastError ?? new Error("Search failed after retries");
4281
4356
  }
4282
4357
  function parseBraveResults(html, max) {
4283
4358
  const $ = cheerioLoad(html);
@@ -5878,6 +5953,8 @@ ${this.skillFingerprint}`;
5878
5953
  let totalOutputTokens = 0;
5879
5954
  let totalCachedTokens = 0;
5880
5955
  let transientStepRetryCount = 0;
5956
+ let latestContextTokens = 0;
5957
+ let toolOutputEstimateSinceModel = 0;
5881
5958
  let cachedCoreMessages = [];
5882
5959
  let convertedUpTo = 0;
5883
5960
  for (let step = 1; step <= maxSteps; step += 1) {
@@ -5907,7 +5984,9 @@ ${this.skillFingerprint}`;
5907
5984
  duration: now() - start,
5908
5985
  continuation: true,
5909
5986
  continuationMessages: [...messages],
5910
- maxSteps
5987
+ maxSteps,
5988
+ contextTokens: latestContextTokens + toolOutputEstimateSinceModel,
5989
+ contextWindow
5911
5990
  };
5912
5991
  yield pushEvent({ type: "run:completed", runId, result: result2 });
5913
5992
  return;
@@ -6147,10 +6226,13 @@ ${textContent}` };
6147
6226
  if (emittedMessages.length > 0 && emittedMessages[emittedMessages.length - 1].role === "user") {
6148
6227
  emittedMessages.pop();
6149
6228
  }
6229
+ const tokensAfterCompaction = estimateTotalTokens(integrityPrompt, messages, toolDefsJson);
6230
+ latestContextTokens = tokensAfterCompaction;
6231
+ toolOutputEstimateSinceModel = 0;
6150
6232
  yield pushEvent({
6151
6233
  type: "compaction:completed",
6152
6234
  tokensBefore: effectiveTokens,
6153
- tokensAfter: estimateTotalTokens(integrityPrompt, messages, toolDefsJson),
6235
+ tokensAfter: tokensAfterCompaction,
6154
6236
  messagesBefore: compactResult.messagesBefore,
6155
6237
  compactedMessages: emittedMessages,
6156
6238
  messagesAfter: compactResult.messagesAfter
@@ -6267,6 +6349,22 @@ ${textContent}` };
6267
6349
  yield emitCancellation();
6268
6350
  return;
6269
6351
  }
6352
+ if (softDeadlineMs > 0 && now() - start > softDeadlineMs) {
6353
+ const result_ = {
6354
+ status: "completed",
6355
+ response: responseText + fullText,
6356
+ steps: step,
6357
+ tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
6358
+ duration: now() - start,
6359
+ continuation: true,
6360
+ continuationMessages: [...messages],
6361
+ maxSteps,
6362
+ contextTokens: latestContextTokens + toolOutputEstimateSinceModel,
6363
+ contextWindow
6364
+ };
6365
+ yield pushEvent({ type: "run:completed", runId, result: result_ });
6366
+ return;
6367
+ }
6270
6368
  const finishReason = await result.finishReason;
6271
6369
  if (finishReason === "error") {
6272
6370
  yield pushEvent({
@@ -6297,13 +6395,16 @@ ${textContent}` };
6297
6395
  const usage = await result.usage;
6298
6396
  const toolCallsResult = await result.toolCalls;
6299
6397
  const stepCachedTokens = usage.inputTokenDetails?.cacheReadTokens ?? 0;
6300
- totalInputTokens += usage.inputTokens ?? 0;
6398
+ const stepInputTokens = usage.inputTokens ?? 0;
6399
+ totalInputTokens += stepInputTokens;
6301
6400
  totalOutputTokens += usage.outputTokens ?? 0;
6302
6401
  totalCachedTokens += stepCachedTokens;
6402
+ latestContextTokens = stepInputTokens;
6403
+ toolOutputEstimateSinceModel = 0;
6303
6404
  yield pushEvent({
6304
6405
  type: "model:response",
6305
6406
  usage: {
6306
- input: usage.inputTokens ?? 0,
6407
+ input: stepInputTokens,
6307
6408
  output: usage.outputTokens ?? 0,
6308
6409
  cached: stepCachedTokens
6309
6410
  }
@@ -6349,7 +6450,9 @@ ${textContent}` };
6349
6450
  output: totalOutputTokens,
6350
6451
  cached: totalCachedTokens
6351
6452
  },
6352
- duration: now() - start
6453
+ duration: now() - start,
6454
+ contextTokens: latestContextTokens + toolOutputEstimateSinceModel,
6455
+ contextWindow
6353
6456
  };
6354
6457
  yield pushEvent({ type: "run:completed", runId, result: result2 });
6355
6458
  return;
@@ -6477,6 +6580,7 @@ ${textContent}` };
6477
6580
  span?.end({ result: { value: result2.output ?? null, isError: false } });
6478
6581
  const serialized = JSON.stringify(result2.output ?? null);
6479
6582
  const outputTokenEstimate = Math.ceil(serialized.length / 4);
6583
+ toolOutputEstimateSinceModel += outputTokenEstimate;
6480
6584
  yield pushEvent({
6481
6585
  type: "tool:completed",
6482
6586
  tool: result2.tool,
@@ -6587,7 +6691,9 @@ ${this.skillFingerprint}`;
6587
6691
  duration: now() - start,
6588
6692
  continuation: true,
6589
6693
  continuationMessages: [...messages],
6590
- maxSteps
6694
+ maxSteps,
6695
+ contextTokens: latestContextTokens + toolOutputEstimateSinceModel,
6696
+ contextWindow
6591
6697
  };
6592
6698
  yield pushEvent({ type: "run:completed", runId, result });
6593
6699
  } else {
@@ -6915,6 +7021,13 @@ var InMemoryConversationStore = class {
6915
7021
  conversation.pendingSubagentResults.push(result);
6916
7022
  conversation.updatedAt = Date.now();
6917
7023
  }
7024
+ async clearCallbackLock(conversationId) {
7025
+ const conversation = this.conversations.get(conversationId);
7026
+ if (!conversation) return void 0;
7027
+ conversation.runningCallbackSince = void 0;
7028
+ conversation.updatedAt = Date.now();
7029
+ return conversation;
7030
+ }
6918
7031
  };
6919
7032
  var FileConversationStore = class {
6920
7033
  workingDir;
@@ -7137,6 +7250,28 @@ var FileConversationStore = class {
7137
7250
  conversation.updatedAt = Date.now();
7138
7251
  await this.update(conversation);
7139
7252
  }
7253
+ async clearCallbackLock(conversationId) {
7254
+ await this.ensureLoaded();
7255
+ const summary = this.conversations.get(conversationId);
7256
+ if (!summary) return void 0;
7257
+ const { conversationsDir } = await this.resolvePaths();
7258
+ const filePath = resolve11(conversationsDir, summary.fileName);
7259
+ let result;
7260
+ this.writing = this.writing.then(async () => {
7261
+ const conv = await this.readConversationFile(summary.fileName);
7262
+ if (!conv) return;
7263
+ conv.runningCallbackSince = void 0;
7264
+ conv.updatedAt = Date.now();
7265
+ await writeJsonAtomic3(filePath, conv);
7266
+ this.conversations.set(conversationId, {
7267
+ ...summary,
7268
+ updatedAt: conv.updatedAt
7269
+ });
7270
+ result = conv;
7271
+ });
7272
+ await this.writing;
7273
+ return result;
7274
+ }
7140
7275
  };
7141
7276
  var FileStateStore = class {
7142
7277
  workingDir;
@@ -7477,6 +7612,18 @@ var KeyValueConversationStoreBase = class {
7477
7612
  await this.update(conversation);
7478
7613
  });
7479
7614
  }
7615
+ async clearCallbackLock(conversationId) {
7616
+ let result;
7617
+ await this.withAppendLock(conversationId, async () => {
7618
+ const conversation = await this.get(conversationId);
7619
+ if (!conversation) return;
7620
+ conversation.runningCallbackSince = void 0;
7621
+ conversation.updatedAt = Date.now();
7622
+ await this.update(conversation);
7623
+ result = conversation;
7624
+ });
7625
+ return result;
7626
+ }
7480
7627
  };
7481
7628
  var UpstashConversationStore = class extends KeyValueConversationStoreBase {
7482
7629
  baseUrl;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@poncho-ai/harness",
3
- "version": "0.28.0",
3
+ "version": "0.28.2",
4
4
  "description": "Agent execution runtime - conversation loop, tool dispatch, streaming",
5
5
  "repository": {
6
6
  "type": "git",
@@ -32,7 +32,7 @@
32
32
  "redis": "^5.10.0",
33
33
  "yaml": "^2.4.0",
34
34
  "zod": "^3.22.0",
35
- "@poncho-ai/sdk": "1.6.0"
35
+ "@poncho-ai/sdk": "1.6.1"
36
36
  },
37
37
  "devDependencies": {
38
38
  "@types/mustache": "^4.2.6",
package/src/harness.ts CHANGED
@@ -1554,6 +1554,8 @@ ${boundedMainMemory.trim()}`
1554
1554
  let totalOutputTokens = 0;
1555
1555
  let totalCachedTokens = 0;
1556
1556
  let transientStepRetryCount = 0;
1557
+ let latestContextTokens = 0;
1558
+ let toolOutputEstimateSinceModel = 0;
1557
1559
  let cachedCoreMessages: ModelMessage[] = [];
1558
1560
  let convertedUpTo = 0;
1559
1561
 
@@ -1585,6 +1587,8 @@ ${boundedMainMemory.trim()}`
1585
1587
  continuation: true,
1586
1588
  continuationMessages: [...messages],
1587
1589
  maxSteps,
1590
+ contextTokens: latestContextTokens + toolOutputEstimateSinceModel,
1591
+ contextWindow,
1588
1592
  };
1589
1593
  yield pushEvent({ type: "run:completed", runId, result });
1590
1594
  return;
@@ -1885,10 +1889,13 @@ ${boundedMainMemory.trim()}`
1885
1889
  if (emittedMessages.length > 0 && emittedMessages[emittedMessages.length - 1].role === "user") {
1886
1890
  emittedMessages.pop();
1887
1891
  }
1892
+ const tokensAfterCompaction = estimateTotalTokens(integrityPrompt, messages, toolDefsJson);
1893
+ latestContextTokens = tokensAfterCompaction;
1894
+ toolOutputEstimateSinceModel = 0;
1888
1895
  yield pushEvent({
1889
1896
  type: "compaction:completed",
1890
1897
  tokensBefore: effectiveTokens,
1891
- tokensAfter: estimateTotalTokens(integrityPrompt, messages, toolDefsJson),
1898
+ tokensAfter: tokensAfterCompaction,
1892
1899
  messagesBefore: compactResult.messagesBefore!,
1893
1900
  compactedMessages: emittedMessages,
1894
1901
  messagesAfter: compactResult.messagesAfter!,
@@ -2023,6 +2030,25 @@ ${boundedMainMemory.trim()}`
2023
2030
  return;
2024
2031
  }
2025
2032
 
2033
+ // Post-streaming soft deadline: if the model stream took long enough to
2034
+ // push past the soft deadline, checkpoint now before tool execution.
2035
+ if (softDeadlineMs > 0 && now() - start > softDeadlineMs) {
2036
+ const result_: RunResult = {
2037
+ status: "completed",
2038
+ response: responseText + fullText,
2039
+ steps: step,
2040
+ tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
2041
+ duration: now() - start,
2042
+ continuation: true,
2043
+ continuationMessages: [...messages],
2044
+ maxSteps,
2045
+ contextTokens: latestContextTokens + toolOutputEstimateSinceModel,
2046
+ contextWindow,
2047
+ };
2048
+ yield pushEvent({ type: "run:completed", runId, result: result_ });
2049
+ return;
2050
+ }
2051
+
2026
2052
  // Check finish reason for error / abnormal completions.
2027
2053
  const finishReason = await result.finishReason;
2028
2054
 
@@ -2060,14 +2086,17 @@ ${boundedMainMemory.trim()}`
2060
2086
 
2061
2087
  // Update token usage
2062
2088
  const stepCachedTokens = usage.inputTokenDetails?.cacheReadTokens ?? 0;
2063
- totalInputTokens += usage.inputTokens ?? 0;
2089
+ const stepInputTokens = usage.inputTokens ?? 0;
2090
+ totalInputTokens += stepInputTokens;
2064
2091
  totalOutputTokens += usage.outputTokens ?? 0;
2065
2092
  totalCachedTokens += stepCachedTokens;
2093
+ latestContextTokens = stepInputTokens;
2094
+ toolOutputEstimateSinceModel = 0;
2066
2095
 
2067
2096
  yield pushEvent({
2068
2097
  type: "model:response",
2069
2098
  usage: {
2070
- input: usage.inputTokens ?? 0,
2099
+ input: stepInputTokens,
2071
2100
  output: usage.outputTokens ?? 0,
2072
2101
  cached: stepCachedTokens,
2073
2102
  },
@@ -2120,6 +2149,8 @@ ${boundedMainMemory.trim()}`
2120
2149
  cached: totalCachedTokens,
2121
2150
  },
2122
2151
  duration: now() - start,
2152
+ contextTokens: latestContextTokens + toolOutputEstimateSinceModel,
2153
+ contextWindow,
2123
2154
  };
2124
2155
  yield pushEvent({ type: "run:completed", runId, result });
2125
2156
  return;
@@ -2290,6 +2321,7 @@ ${boundedMainMemory.trim()}`
2290
2321
  span?.end({ result: { value: result.output ?? null, isError: false } });
2291
2322
  const serialized = JSON.stringify(result.output ?? null);
2292
2323
  const outputTokenEstimate = Math.ceil(serialized.length / 4);
2324
+ toolOutputEstimateSinceModel += outputTokenEstimate;
2293
2325
  yield pushEvent({
2294
2326
  type: "tool:completed",
2295
2327
  tool: result.tool,
@@ -2415,6 +2447,8 @@ ${boundedMainMemory.trim()}`
2415
2447
  continuation: true,
2416
2448
  continuationMessages: [...messages],
2417
2449
  maxSteps,
2450
+ contextTokens: latestContextTokens + toolOutputEstimateSinceModel,
2451
+ contextWindow,
2418
2452
  };
2419
2453
  yield pushEvent({ type: "run:completed", runId, result });
2420
2454
  } else {
@@ -6,6 +6,13 @@ const SEARCH_UA =
6
6
 
7
7
  const FETCH_TIMEOUT_MS = 15_000;
8
8
 
9
+ const SEARCH_MAX_RETRIES = 4;
10
+ const SEARCH_INITIAL_DELAY_MS = 2_000;
11
+ const SEARCH_MIN_INTERVAL_MS = 4_000;
12
+ const SEARCH_FALLBACK_COOLDOWN_MS = 12_000;
13
+
14
+ const sleep = (ms: number) => new Promise<void>((r) => setTimeout(r, ms));
15
+
9
16
  // ---------------------------------------------------------------------------
10
17
  // web_search — Brave Search HTML scraping (no API key)
11
18
  // ---------------------------------------------------------------------------
@@ -16,21 +23,100 @@ interface SearchResult {
16
23
  snippet: string;
17
24
  }
18
25
 
26
+ let searchQueue: Promise<void> = Promise.resolve();
27
+ let nextSearchAllowedAt = 0;
28
+
29
+ function parseRetryAfterMs(retryAfterHeader: string | null): number {
30
+ if (!retryAfterHeader) return SEARCH_FALLBACK_COOLDOWN_MS;
31
+ const asSeconds = Number(retryAfterHeader);
32
+ if (Number.isFinite(asSeconds) && asSeconds >= 0) {
33
+ return Math.max(Math.floor(asSeconds * 1000), SEARCH_MIN_INTERVAL_MS);
34
+ }
35
+ const asDate = new Date(retryAfterHeader).getTime();
36
+ if (Number.isFinite(asDate)) {
37
+ return Math.max(asDate - Date.now(), SEARCH_MIN_INTERVAL_MS);
38
+ }
39
+ return SEARCH_FALLBACK_COOLDOWN_MS;
40
+ }
41
+
42
+ function applyRateLimitCooldown(retryAfterHeader: string | null): void {
43
+ const cooldownMs = parseRetryAfterMs(retryAfterHeader);
44
+ nextSearchAllowedAt = Math.max(nextSearchAllowedAt, Date.now() + cooldownMs);
45
+ }
46
+
47
+ async function runWithSearchThrottle<T>(fn: () => Promise<T>): Promise<T> {
48
+ const previous = searchQueue;
49
+ let release: (() => void) | undefined;
50
+ searchQueue = new Promise<void>((resolve) => {
51
+ release = resolve;
52
+ });
53
+
54
+ await previous.catch(() => {});
55
+ try {
56
+ const waitMs = nextSearchAllowedAt - Date.now();
57
+ if (waitMs > 0) {
58
+ await sleep(waitMs);
59
+ }
60
+ const result = await fn();
61
+ nextSearchAllowedAt = Math.max(nextSearchAllowedAt, Date.now() + SEARCH_MIN_INTERVAL_MS);
62
+ return result;
63
+ } finally {
64
+ release?.();
65
+ }
66
+ }
67
+
68
+ function isRetryableStatus(status: number): boolean {
69
+ return status === 429 || status === 503 || status >= 500;
70
+ }
71
+
19
72
  async function braveSearch(query: string, maxResults: number): Promise<SearchResult[]> {
20
73
  const url = `https://search.brave.com/search?q=${encodeURIComponent(query)}`;
21
- const res = await fetch(url, {
22
- headers: {
23
- "User-Agent": SEARCH_UA,
24
- Accept: "text/html,application/xhtml+xml",
25
- "Accept-Language": "en-US,en;q=0.9",
26
- },
27
- signal: AbortSignal.timeout(FETCH_TIMEOUT_MS),
28
- });
29
- if (!res.ok) {
30
- throw new Error(`Search request failed (${res.status} ${res.statusText})`);
74
+
75
+ let lastError: Error | undefined;
76
+ for (let attempt = 0; attempt < SEARCH_MAX_RETRIES; attempt++) {
77
+ if (attempt > 0) {
78
+ const delay = SEARCH_INITIAL_DELAY_MS * 2 ** (attempt - 1) + Math.floor(Math.random() * 500);
79
+ await sleep(delay);
80
+ }
81
+
82
+ try {
83
+ const html = await runWithSearchThrottle(async () => {
84
+ const res = await fetch(url, {
85
+ headers: {
86
+ "User-Agent": SEARCH_UA,
87
+ Accept: "text/html,application/xhtml+xml",
88
+ "Accept-Language": "en-US,en;q=0.9",
89
+ },
90
+ signal: AbortSignal.timeout(FETCH_TIMEOUT_MS),
91
+ });
92
+
93
+ if (!res.ok) {
94
+ if (res.status === 429) {
95
+ applyRateLimitCooldown(res.headers.get("retry-after"));
96
+ }
97
+ const error = new Error(`Search request failed (${res.status} ${res.statusText})`);
98
+ if (isRetryableStatus(res.status)) {
99
+ throw error;
100
+ }
101
+ // Non-retryable status: surface immediately.
102
+ (error as Error & { retryable?: boolean }).retryable = false;
103
+ throw error;
104
+ }
105
+
106
+ return await res.text();
107
+ });
108
+
109
+ return parseBraveResults(html, maxResults);
110
+ } catch (err) {
111
+ lastError = err instanceof Error ? err : new Error(String(err));
112
+ if ((lastError as Error & { retryable?: boolean }).retryable === false) {
113
+ throw lastError;
114
+ }
115
+ if (attempt < SEARCH_MAX_RETRIES - 1) continue;
116
+ }
31
117
  }
32
- const html = await res.text();
33
- return parseBraveResults(html, maxResults);
118
+
119
+ throw lastError ?? new Error("Search failed after retries");
34
120
  }
35
121
 
36
122
  function parseBraveResults(html: string, max: number): SearchResult[] {
@@ -412,17 +412,19 @@ const loadRunnableScriptFunction = async (
412
412
 
413
413
  const loadScriptModule = async (scriptPath: string): Promise<unknown> => {
414
414
  const extension = extname(scriptPath).toLowerCase();
415
- // Node emits noisy warnings when attempting native ESM import on TypeScript
416
- // files in serverless environments. Use jiti first for TS entrypoints.
415
+ // Both Node's native import() and jiti cache modules by URL/path.
416
+ // Append a cache-busting query string so edits made by the agent are
417
+ // picked up on the next run_skill_script call.
418
+ const cacheBust = `?t=${Date.now()}`;
417
419
  if (extension === ".ts" || extension === ".mts" || extension === ".cts") {
418
- const jiti = createJiti(import.meta.url, { interopDefault: true });
419
- return await jiti.import(scriptPath);
420
+ const jiti = createJiti(import.meta.url, { interopDefault: true, moduleCache: false });
421
+ return await jiti.import(scriptPath + cacheBust);
420
422
  }
421
423
  try {
422
- return await import(pathToFileURL(scriptPath).href);
424
+ return await import(pathToFileURL(scriptPath).href + cacheBust);
423
425
  } catch {
424
- const jiti = createJiti(import.meta.url, { interopDefault: true });
425
- return await jiti.import(scriptPath);
426
+ const jiti = createJiti(import.meta.url, { interopDefault: true, moduleCache: false });
427
+ return await jiti.import(scriptPath + cacheBust);
426
428
  }
427
429
  };
428
430
 
package/src/state.ts CHANGED
@@ -84,6 +84,12 @@ export interface ConversationStore {
84
84
  rename(conversationId: string, title: string): Promise<Conversation | undefined>;
85
85
  delete(conversationId: string): Promise<boolean>;
86
86
  appendSubagentResult(conversationId: string, result: PendingSubagentResult): Promise<void>;
87
+ /**
88
+ * Atomically clear `runningCallbackSince` without clobbering other fields.
89
+ * Returns the conversation as it exists after the clear (with current
90
+ * `pendingSubagentResults`).
91
+ */
92
+ clearCallbackLock(conversationId: string): Promise<Conversation | undefined>;
87
93
  }
88
94
 
89
95
  export type StateProviderName =
@@ -325,6 +331,14 @@ export class InMemoryConversationStore implements ConversationStore {
325
331
  conversation.pendingSubagentResults.push(result);
326
332
  conversation.updatedAt = Date.now();
327
333
  }
334
+
335
+ async clearCallbackLock(conversationId: string): Promise<Conversation | undefined> {
336
+ const conversation = this.conversations.get(conversationId);
337
+ if (!conversation) return undefined;
338
+ conversation.runningCallbackSince = undefined;
339
+ conversation.updatedAt = Date.now();
340
+ return conversation;
341
+ }
328
342
  }
329
343
 
330
344
  export type ConversationSummary = {
@@ -607,6 +621,31 @@ class FileConversationStore implements ConversationStore {
607
621
  conversation.updatedAt = Date.now();
608
622
  await this.update(conversation);
609
623
  }
624
+
625
+ async clearCallbackLock(conversationId: string): Promise<Conversation | undefined> {
626
+ await this.ensureLoaded();
627
+ const summary = this.conversations.get(conversationId);
628
+ if (!summary) return undefined;
629
+ const { conversationsDir } = await this.resolvePaths();
630
+ const filePath = resolve(conversationsDir, summary.fileName);
631
+ let result: Conversation | undefined;
632
+ // Read inside the writing chain so we see the latest state after any
633
+ // pending appendSubagentResult writes have flushed.
634
+ this.writing = this.writing.then(async () => {
635
+ const conv = await this.readConversationFile(summary.fileName);
636
+ if (!conv) return;
637
+ conv.runningCallbackSince = undefined;
638
+ conv.updatedAt = Date.now();
639
+ await writeJsonAtomic(filePath, conv);
640
+ this.conversations.set(conversationId, {
641
+ ...summary,
642
+ updatedAt: conv.updatedAt,
643
+ });
644
+ result = conv;
645
+ });
646
+ await this.writing;
647
+ return result;
648
+ }
610
649
  }
611
650
 
612
651
  type LocalStateFile = {
@@ -1005,6 +1044,19 @@ abstract class KeyValueConversationStoreBase implements ConversationStore {
1005
1044
  await this.update(conversation);
1006
1045
  });
1007
1046
  }
1047
+
1048
+ async clearCallbackLock(conversationId: string): Promise<Conversation | undefined> {
1049
+ let result: Conversation | undefined;
1050
+ await this.withAppendLock(conversationId, async () => {
1051
+ const conversation = await this.get(conversationId);
1052
+ if (!conversation) return;
1053
+ conversation.runningCallbackSince = undefined;
1054
+ conversation.updatedAt = Date.now();
1055
+ await this.update(conversation);
1056
+ result = conversation;
1057
+ });
1058
+ return result;
1059
+ }
1008
1060
  }
1009
1061
 
1010
1062
  class UpstashConversationStore extends KeyValueConversationStoreBase {