@poncho-ai/harness 0.31.1 → 0.31.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/harness.ts CHANGED
@@ -12,7 +12,7 @@ import type {
12
12
  ToolContext,
13
13
  ToolDefinition,
14
14
  } from "@poncho-ai/sdk";
15
- import { getTextContent } from "@poncho-ai/sdk";
15
+ import { defineTool, getTextContent } from "@poncho-ai/sdk";
16
16
  import type { UploadStore } from "./upload-store.js";
17
17
  import { PONCHO_UPLOAD_SCHEME, deriveUploadKey } from "./upload-store.js";
18
18
  import { parseAgentFile, parseAgentMarkdown, renderAgentPrompt, type ParsedAgent, type AgentFrontmatter } from "./agent-parser.js";
@@ -72,8 +72,22 @@ export interface HarnessRunOutput {
72
72
  }
73
73
 
74
74
  const now = (): number => Date.now();
75
- const FIRST_CHUNK_TIMEOUT_MS = 300_000; // 300s to receive the first chunk from the model
76
- const MAX_TRANSIENT_STEP_RETRIES = 2;
75
+ const FIRST_CHUNK_TIMEOUT_MS = 90_000; // 90s to receive the first chunk from the model
76
+ const MAX_TRANSIENT_STEP_RETRIES = 1;
77
+ const COMPACTION_CHECK_INTERVAL_STEPS = 3;
78
+ const TOOL_RESULT_ARCHIVE_PARAM = "__toolResultArchive";
79
+ const TOOL_RESULT_TRUNCATED_PREFIX = "[TRUNCATED_TOOL_RESULT]";
80
+ const TOOL_RESULT_PREVIEW_CHARS = 700;
81
+
82
+ interface ArchivedToolResult {
83
+ toolResultId: string;
84
+ conversationId: string;
85
+ toolName: string;
86
+ toolCallId: string;
87
+ createdAt: number;
88
+ sizeBytes: number;
89
+ payload: string;
90
+ }
77
91
 
78
92
  class FirstChunkTimeoutError extends Error {
79
93
  constructor(modelName: string, timeoutMs: number) {
@@ -140,23 +154,11 @@ const isRetryableModelError = (error: unknown): boolean => {
140
154
  if (error instanceof FirstChunkTimeoutError) {
141
155
  return true;
142
156
  }
143
- if (isNoOutputGeneratedError(error)) {
144
- return true;
145
- }
146
157
  const statusCode = getErrorStatusCode(error);
147
158
  if (typeof statusCode === "number") {
148
159
  return statusCode === 429 || statusCode >= 500;
149
160
  }
150
- if (!error || typeof error !== "object") {
151
- return false;
152
- }
153
- const maybeMessage = "message" in error ? String(error.message ?? "").toLowerCase() : "";
154
- return (
155
- maybeMessage.includes("internal server error") ||
156
- maybeMessage.includes("service unavailable") ||
157
- maybeMessage.includes("gateway timeout") ||
158
- maybeMessage.includes("rate limit")
159
- );
161
+ return false;
160
162
  };
161
163
 
162
164
  const toRunError = (error: unknown): { code: string; message: string; details?: Record<string, unknown> } => {
@@ -225,6 +227,83 @@ const toProviderSafeToolName = (
225
227
  return candidate;
226
228
  };
227
229
 
230
+ const isToolResultRow = (value: unknown): value is {
231
+ tool_use_id: string;
232
+ tool_name: string;
233
+ content: string;
234
+ } => {
235
+ if (typeof value !== "object" || value === null) return false;
236
+ const row = value as Record<string, unknown>;
237
+ return (
238
+ typeof row.tool_use_id === "string" &&
239
+ typeof row.tool_name === "string" &&
240
+ typeof row.content === "string"
241
+ );
242
+ };
243
+
244
+ const readArchiveFromParameters = (
245
+ parameters: Record<string, unknown> | undefined,
246
+ ): Record<string, ArchivedToolResult> => {
247
+ const raw = parameters?.[TOOL_RESULT_ARCHIVE_PARAM];
248
+ if (typeof raw !== "object" || raw === null) return {};
249
+ const out: Record<string, ArchivedToolResult> = {};
250
+ for (const [key, value] of Object.entries(raw)) {
251
+ if (typeof value !== "object" || value === null) continue;
252
+ const row = value as Record<string, unknown>;
253
+ if (
254
+ typeof row.toolResultId !== "string" ||
255
+ typeof row.conversationId !== "string" ||
256
+ typeof row.toolName !== "string" ||
257
+ typeof row.toolCallId !== "string" ||
258
+ typeof row.createdAt !== "number" ||
259
+ typeof row.sizeBytes !== "number" ||
260
+ typeof row.payload !== "string"
261
+ ) {
262
+ continue;
263
+ }
264
+ out[key] = {
265
+ toolResultId: row.toolResultId,
266
+ conversationId: row.conversationId,
267
+ toolName: row.toolName,
268
+ toolCallId: row.toolCallId,
269
+ createdAt: row.createdAt,
270
+ sizeBytes: row.sizeBytes,
271
+ payload: row.payload,
272
+ };
273
+ }
274
+ return out;
275
+ };
276
+
277
+ const makeTruncatedToolResultNotice = (
278
+ toolResultId: string,
279
+ toolName: string,
280
+ payload: string,
281
+ ): string => {
282
+ const preview = payload.slice(0, TOOL_RESULT_PREVIEW_CHARS);
283
+ const omittedChars = Math.max(0, payload.length - preview.length);
284
+ return `${TOOL_RESULT_TRUNCATED_PREFIX} id="${toolResultId}" tool="${toolName}" omittedChars=${omittedChars}\n${preview}${omittedChars > 0 ? "\n...[truncated]" : ""}`;
285
+ };
286
+
287
+ const hasUntruncatedToolResults = (messages: Message[]): boolean => {
288
+ for (const msg of messages) {
289
+ if (msg.role !== "tool" || typeof msg.content !== "string") continue;
290
+ let parsed: unknown;
291
+ try {
292
+ parsed = JSON.parse(msg.content);
293
+ } catch {
294
+ continue;
295
+ }
296
+ if (!Array.isArray(parsed)) continue;
297
+ for (const row of parsed) {
298
+ if (!isToolResultRow(row)) continue;
299
+ if (!row.content.startsWith(TOOL_RESULT_TRUNCATED_PREFIX)) {
300
+ return true;
301
+ }
302
+ }
303
+ }
304
+ return false;
305
+ };
306
+
228
307
  const DEVELOPMENT_MODE_CONTEXT = `## Development Mode Context
229
308
 
230
309
  You are running locally in development mode. Treat this as an editable agent workspace.
@@ -580,6 +659,7 @@ export class AgentHarness {
580
659
  private agentFileFingerprint = "";
581
660
  private mcpBridge?: LocalMcpBridge;
582
661
  private subagentManager?: SubagentManager;
662
+ private readonly archivedToolResultsByConversation = new Map<string, Record<string, ArchivedToolResult>>();
583
663
 
584
664
  private resolveToolAccess(toolName: string): ToolAccess {
585
665
  const tools = this.loadedConfig?.tools;
@@ -662,6 +742,60 @@ export class AgentHarness {
662
742
  if (this.environment === "development" && this.isToolEnabled("poncho_docs")) {
663
743
  this.registerIfMissing(ponchoDocsTool);
664
744
  }
745
+ if (this.isToolEnabled("get_tool_result_by_id")) {
746
+ this.registerIfMissing(this.createGetToolResultByIdTool());
747
+ }
748
+ }
749
+
750
+ private createGetToolResultByIdTool(): ToolDefinition {
751
+ return defineTool({
752
+ name: "get_tool_result_by_id",
753
+ description:
754
+ "Retrieve a previously archived full tool result by id for the current conversation. " +
755
+ "Use this when older tool outputs were truncated in prompt history.",
756
+ inputSchema: {
757
+ type: "object",
758
+ properties: {
759
+ toolResultId: { type: "string", description: "Archived tool result id to retrieve" },
760
+ offset: { type: "number", description: "Optional character offset for paging large payloads" },
761
+ limit: { type: "number", description: "Optional maximum characters to return (default 6000, max 20000)" },
762
+ },
763
+ required: ["toolResultId"],
764
+ additionalProperties: false,
765
+ },
766
+ handler: async (input, context) => {
767
+ const conversationId = context.conversationId ?? "__default__";
768
+ const archive = this.archivedToolResultsByConversation.get(conversationId) ?? {};
769
+ const toolResultId = typeof input.toolResultId === "string" ? input.toolResultId : "";
770
+ const record = archive[toolResultId];
771
+ if (!record) {
772
+ console.info(
773
+ `[poncho][cost] Archived tool result lookup miss: id="${toolResultId}" conversation="${conversationId}"`,
774
+ );
775
+ return {
776
+ error: `No archived tool result found for id "${toolResultId}" in this conversation.`,
777
+ };
778
+ }
779
+ const offset = Math.max(0, Number(input.offset) || 0);
780
+ const limit = Math.min(Math.max(Number(input.limit) || 6000, 1), 20_000);
781
+ const end = Math.min(record.payload.length, offset + limit);
782
+ const chunk = record.payload.slice(offset, end);
783
+ console.info(
784
+ `[poncho][cost] Archived tool result lookup hit: id="${toolResultId}" conversation="${conversationId}" ` +
785
+ `offset=${offset} returned=${chunk.length} total=${record.payload.length}`,
786
+ );
787
+ return {
788
+ toolResultId: record.toolResultId,
789
+ toolName: record.toolName,
790
+ toolCallId: record.toolCallId,
791
+ totalChars: record.payload.length,
792
+ offset,
793
+ returnedChars: chunk.length,
794
+ hasMore: end < record.payload.length,
795
+ payload: chunk,
796
+ };
797
+ },
798
+ });
665
799
  }
666
800
 
667
801
  private shouldEnableWriteTool(): boolean {
@@ -691,6 +825,150 @@ export class AgentHarness {
691
825
  return this.parsedAgent?.frontmatter;
692
826
  }
693
827
 
828
+ getToolResultArchive(conversationId: string): Record<string, ArchivedToolResult> {
829
+ const archive = this.archivedToolResultsByConversation.get(conversationId);
830
+ return archive ? { ...archive } : {};
831
+ }
832
+
833
+ private seedToolResultArchive(
834
+ conversationId: string,
835
+ parameters: Record<string, unknown> | undefined,
836
+ ): Record<string, ArchivedToolResult> {
837
+ const seeded = readArchiveFromParameters(parameters);
838
+ const existing = this.archivedToolResultsByConversation.get(conversationId) ?? {};
839
+ const merged = { ...existing, ...seeded };
840
+ this.archivedToolResultsByConversation.set(conversationId, merged);
841
+ return merged;
842
+ }
843
+
844
+ private truncateHistoricalToolResults(
845
+ messages: Message[],
846
+ conversationId: string,
847
+ ): { changed: boolean; truncatedCount: number; archivedCount: number; omittedChars: number } {
848
+ let latestRunId: string | undefined;
849
+ let latestToolMessageIndex = -1;
850
+ for (let i = messages.length - 1; i >= 0; i -= 1) {
851
+ const msg = messages[i]!;
852
+ if (latestToolMessageIndex === -1 && msg.role === "tool" && typeof msg.content === "string") {
853
+ latestToolMessageIndex = i;
854
+ }
855
+ const meta = msg.metadata as Record<string, unknown> | undefined;
856
+ const runId = typeof meta?.runId === "string" ? meta.runId : undefined;
857
+ if (runId) {
858
+ latestRunId = runId;
859
+ break;
860
+ }
861
+ }
862
+ if (!latestRunId && latestToolMessageIndex === -1) {
863
+ return { changed: false, truncatedCount: 0, archivedCount: 0, omittedChars: 0 };
864
+ }
865
+ const archive = this.archivedToolResultsByConversation.get(conversationId) ?? {};
866
+ this.archivedToolResultsByConversation.set(conversationId, archive);
867
+ let changed = false;
868
+ let truncatedCount = 0;
869
+ let archivedCount = 0;
870
+ let omittedChars = 0;
871
+
872
+ for (let index = 0; index < messages.length; index += 1) {
873
+ const msg = messages[index]!;
874
+ if (msg.role !== "tool" || typeof msg.content !== "string") continue;
875
+ const meta = msg.metadata as Record<string, unknown> | undefined;
876
+ const runId = typeof meta?.runId === "string" ? meta.runId : undefined;
877
+ if (latestRunId) {
878
+ if (runId === latestRunId) continue;
879
+ } else if (index === latestToolMessageIndex) {
880
+ // Legacy fallback for pre-runId conversations: keep newest tool turn intact.
881
+ continue;
882
+ }
883
+ let parsed: unknown;
884
+ try {
885
+ parsed = JSON.parse(msg.content);
886
+ } catch {
887
+ continue;
888
+ }
889
+ if (!Array.isArray(parsed)) continue;
890
+ let rowChanged = false;
891
+ const nextRows = parsed.map((row) => {
892
+ if (!isToolResultRow(row)) return row;
893
+ if (row.content.startsWith(TOOL_RESULT_TRUNCATED_PREFIX)) return row;
894
+ if (this.shouldPreserveSkillToolResult(row)) return row;
895
+ const toolResultId = row.tool_use_id;
896
+ if (!archive[toolResultId]) {
897
+ archive[toolResultId] = {
898
+ toolResultId,
899
+ conversationId,
900
+ toolName: row.tool_name,
901
+ toolCallId: row.tool_use_id,
902
+ createdAt: now(),
903
+ sizeBytes: Buffer.byteLength(row.content, "utf8"),
904
+ payload: row.content,
905
+ };
906
+ archivedCount += 1;
907
+ }
908
+ const omitted = Math.max(0, row.content.length - TOOL_RESULT_PREVIEW_CHARS);
909
+ omittedChars += omitted;
910
+ truncatedCount += 1;
911
+ rowChanged = true;
912
+ return {
913
+ ...row,
914
+ content: makeTruncatedToolResultNotice(toolResultId, row.tool_name, row.content),
915
+ };
916
+ });
917
+ if (rowChanged) {
918
+ msg.content = JSON.stringify(nextRows);
919
+ // Critical: historical messages may still carry full-fidelity
920
+ // `_richToolResults`. If we keep it, convertMessage will prefer that
921
+ // path and bypass truncated `content`, causing token growth to remain.
922
+ if (msg.metadata && typeof msg.metadata === "object") {
923
+ const meta = msg.metadata as Record<string, unknown>;
924
+ if ("_richToolResults" in meta) {
925
+ delete meta._richToolResults;
926
+ }
927
+ }
928
+ changed = true;
929
+ }
930
+ }
931
+ return { changed, truncatedCount, archivedCount, omittedChars };
932
+ }
933
+
934
+ private shouldPreserveSkillToolResult(row: {
935
+ tool_use_id: string;
936
+ tool_name: string;
937
+ content: string;
938
+ }): boolean {
939
+ if (row.tool_name.startsWith("todo_")) {
940
+ return true;
941
+ }
942
+ if (row.tool_name !== "activate_skill" && row.tool_name !== "deactivate_skill") {
943
+ return false;
944
+ }
945
+ const content = row.content.trim();
946
+ if (content.startsWith("Tool error:")) {
947
+ return false;
948
+ }
949
+ try {
950
+ const parsed = JSON.parse(content) as Record<string, unknown>;
951
+ const skill =
952
+ typeof parsed.skill === "string"
953
+ ? parsed.skill
954
+ : undefined;
955
+ if (skill && this.activeSkillNames.has(skill)) {
956
+ return true;
957
+ }
958
+ const activeSkills = Array.isArray(parsed.activeSkills)
959
+ ? parsed.activeSkills.filter((v): v is string => typeof v === "string")
960
+ : [];
961
+ for (const name of activeSkills) {
962
+ if (this.activeSkillNames.has(name)) {
963
+ return true;
964
+ }
965
+ }
966
+ } catch {
967
+ // Non-JSON tool content should not block truncation.
968
+ }
969
+ return false;
970
+ }
971
+
694
972
  async getTodos(conversationId: string): Promise<TodoItem[]> {
695
973
  if (!this.todoStore) return [];
696
974
  return this.todoStore.get(conversationId);
@@ -1475,7 +1753,7 @@ export class AgentHarness {
1475
1753
  let agent = this.parsedAgent as ParsedAgent;
1476
1754
  const runId = `run_${randomUUID()}`;
1477
1755
  const start = now();
1478
- const maxSteps = agent.frontmatter.limits?.maxSteps ?? 50;
1756
+ const maxSteps = agent.frontmatter.limits?.maxSteps ?? 20;
1479
1757
  const configuredTimeout = agent.frontmatter.limits?.timeout;
1480
1758
  const timeoutMs = this.environment === "development" && configuredTimeout == null
1481
1759
  ? 0 // no hard timeout in development unless explicitly configured
@@ -1485,6 +1763,29 @@ export class AgentHarness {
1485
1763
  ? 0
1486
1764
  : platformMaxDurationSec * 800;
1487
1765
  const messages: Message[] = [...(input.messages ?? [])];
1766
+ const conversationId = input.conversationId ?? "__default__";
1767
+ this.seedToolResultArchive(conversationId, input.parameters);
1768
+ const truncationSummary = this.truncateHistoricalToolResults(messages, conversationId);
1769
+ if (truncationSummary.changed) {
1770
+ console.info(
1771
+ `[poncho][cost] Truncated ${truncationSummary.truncatedCount} historical tool result(s) ` +
1772
+ `(archived_new=${truncationSummary.archivedCount}, omitted_chars=${truncationSummary.omittedChars}) ` +
1773
+ `for conversation="${conversationId}"`,
1774
+ );
1775
+ }
1776
+ const hasFullToolResults = hasUntruncatedToolResults(messages);
1777
+ const enablePromptCache = !hasFullToolResults;
1778
+ if (!enablePromptCache) {
1779
+ console.info(
1780
+ `[poncho][cost] Prompt cache write disabled for run "${runId}" ` +
1781
+ `(untruncated tool results present in history).`,
1782
+ );
1783
+ } else {
1784
+ console.info(
1785
+ `[poncho][cost] Prompt cache write enabled for run "${runId}" ` +
1786
+ `(history has no untruncated tool results).`,
1787
+ );
1788
+ }
1488
1789
  const inputMessageCount = messages.length;
1489
1790
  const events: AgentEvent[] = [];
1490
1791
 
@@ -1583,7 +1884,6 @@ ${boundedMainMemory.trim()}`
1583
1884
  profileDir: string;
1584
1885
  isLaunched: boolean }
1585
1886
  | undefined;
1586
- const conversationId = input.conversationId ?? "__default__";
1587
1887
  if (browserSession) {
1588
1888
  browserCleanups.push(
1589
1889
  browserSession.onFrame(conversationId, (frame) => {
@@ -1655,6 +1955,7 @@ ${boundedMainMemory.trim()}`
1655
1955
  let totalInputTokens = 0;
1656
1956
  let totalOutputTokens = 0;
1657
1957
  let totalCachedTokens = 0;
1958
+ let totalCacheWriteTokens = 0;
1658
1959
  let transientStepRetryCount = 0;
1659
1960
  let latestContextTokens = 0;
1660
1961
  let toolOutputEstimateSinceModel = 0;
@@ -1684,7 +1985,12 @@ ${boundedMainMemory.trim()}`
1684
1985
  status: "completed",
1685
1986
  response: responseText,
1686
1987
  steps: step - 1,
1687
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
1988
+ tokens: {
1989
+ input: totalInputTokens,
1990
+ output: totalOutputTokens,
1991
+ cached: totalCachedTokens,
1992
+ cacheWrite: totalCacheWriteTokens,
1993
+ },
1688
1994
  duration: now() - start,
1689
1995
  continuation: true,
1690
1996
  continuationMessages: [...messages],
@@ -1698,7 +2004,6 @@ ${boundedMainMemory.trim()}`
1698
2004
 
1699
2005
  const stepStart = now();
1700
2006
  yield pushEvent({ type: "step:started", step });
1701
- yield pushEvent({ type: "model:request", tokens: 0 });
1702
2007
 
1703
2008
  const dispatcherTools = this.dispatcher.list();
1704
2009
  const exposedToolNames = new Map<string, string>();
@@ -1720,6 +2025,15 @@ ${boundedMainMemory.trim()}`
1720
2025
  inputSchema: jsonSchemaToZod(tool.inputSchema),
1721
2026
  };
1722
2027
  }
2028
+ const toolDefsJsonForEstimate = JSON.stringify(
2029
+ dispatcherTools.map((t) => ({
2030
+ name: t.name,
2031
+ description: t.description,
2032
+ inputSchema: t.inputSchema,
2033
+ })),
2034
+ );
2035
+ const requestTokenEstimate = estimateTotalTokens(integrityPrompt, messages, toolDefsJsonForEstimate);
2036
+ yield pushEvent({ type: "model:request", tokens: requestTokenEstimate });
1723
2037
 
1724
2038
  // Convert messages to ModelMessage format
1725
2039
  const convertMessage = async (msg: Message): Promise<ModelMessage[]> => {
@@ -1956,20 +2270,11 @@ ${boundedMainMemory.trim()}`
1956
2270
  }
1957
2271
  const modelInstance = this.modelProvider(modelName);
1958
2272
 
1959
- // --- Auto-compaction (step 1 only) ---
1960
- // On step 2+ the messages array contains harness-internal formats
1961
- // (JSON-stringified tool_calls / tool results) that must not leak
1962
- // into the conversation store via compactedMessages.
2273
+ // --- Auto-compaction ---
2274
+ // Re-check every N steps to curb runaway context growth in longer runs.
1963
2275
  const compactionConfig = resolveCompactionConfig(agent.frontmatter.compaction);
1964
- if (compactionConfig.enabled && step === 1) {
1965
- const toolDefsJson = JSON.stringify(
1966
- dispatcherTools.map((t) => ({
1967
- name: t.name,
1968
- description: t.description,
1969
- inputSchema: t.inputSchema,
1970
- })),
1971
- );
1972
- const estimated = estimateTotalTokens(integrityPrompt, messages, toolDefsJson);
2276
+ if (compactionConfig.enabled && (step === 1 || step % COMPACTION_CHECK_INTERVAL_STEPS === 0)) {
2277
+ const estimated = estimateTotalTokens(integrityPrompt, messages, toolDefsJsonForEstimate);
1973
2278
  const lastReportedInput = totalInputTokens > 0 ? totalInputTokens : 0;
1974
2279
  const effectiveTokens = Math.max(estimated, lastReportedInput);
1975
2280
 
@@ -1984,14 +2289,17 @@ ${boundedMainMemory.trim()}`
1984
2289
  if (compactResult.compacted) {
1985
2290
  messages.length = 0;
1986
2291
  messages.push(...compactResult.messages);
1987
- // Strip the trailing user task message so runners can use
1988
- // compactedMessages directly as historyMessages without
1989
- // duplicating the user turn they append themselves.
1990
- const emittedMessages = [...compactResult.messages];
1991
- if (emittedMessages.length > 0 && emittedMessages[emittedMessages.length - 1].role === "user") {
1992
- emittedMessages.pop();
2292
+ let emittedMessages: Message[] | undefined;
2293
+ if (step === 1) {
2294
+ // Strip the trailing user task message so runners can use
2295
+ // compactedMessages directly as historyMessages without
2296
+ // duplicating the user turn they append themselves.
2297
+ emittedMessages = [...compactResult.messages];
2298
+ if (emittedMessages.length > 0 && emittedMessages[emittedMessages.length - 1].role === "user") {
2299
+ emittedMessages.pop();
2300
+ }
1993
2301
  }
1994
- const tokensAfterCompaction = estimateTotalTokens(integrityPrompt, messages, toolDefsJson);
2302
+ const tokensAfterCompaction = estimateTotalTokens(integrityPrompt, messages, toolDefsJsonForEstimate);
1995
2303
  latestContextTokens = tokensAfterCompaction;
1996
2304
  toolOutputEstimateSinceModel = 0;
1997
2305
  yield pushEvent({
@@ -2024,7 +2332,9 @@ ${boundedMainMemory.trim()}`
2024
2332
 
2025
2333
  const temperature = agent.frontmatter.model?.temperature ?? 0.2;
2026
2334
  const maxTokens = agent.frontmatter.model?.maxTokens;
2027
- const cachedMessages = addPromptCacheBreakpoints(coreMessages, modelInstance);
2335
+ const cachedMessages = enablePromptCache
2336
+ ? addPromptCacheBreakpoints(coreMessages, modelInstance)
2337
+ : coreMessages;
2028
2338
 
2029
2339
  const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
2030
2340
 
@@ -2146,14 +2456,19 @@ ${boundedMainMemory.trim()}`
2146
2456
  messages.push({
2147
2457
  role: "assistant",
2148
2458
  content: fullText,
2149
- metadata: { timestamp: now(), id: randomUUID(), step },
2459
+ metadata: { timestamp: now(), id: randomUUID(), step, runId },
2150
2460
  });
2151
2461
  }
2152
2462
  const result_: RunResult = {
2153
2463
  status: "completed",
2154
2464
  response: responseText + fullText,
2155
2465
  steps: step,
2156
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
2466
+ tokens: {
2467
+ input: totalInputTokens,
2468
+ output: totalOutputTokens,
2469
+ cached: totalCachedTokens,
2470
+ cacheWrite: totalCacheWriteTokens,
2471
+ },
2157
2472
  duration: now() - start,
2158
2473
  continuation: true,
2159
2474
  continuationMessages: [...messages],
@@ -2178,14 +2493,19 @@ ${boundedMainMemory.trim()}`
2178
2493
  messages.push({
2179
2494
  role: "assistant",
2180
2495
  content: fullText,
2181
- metadata: { timestamp: now(), id: randomUUID(), step },
2496
+ metadata: { timestamp: now(), id: randomUUID(), step, runId },
2182
2497
  });
2183
2498
  }
2184
2499
  const result_: RunResult = {
2185
2500
  status: "completed",
2186
2501
  response: responseText + fullText,
2187
2502
  steps: step,
2188
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
2503
+ tokens: {
2504
+ input: totalInputTokens,
2505
+ output: totalOutputTokens,
2506
+ cached: totalCachedTokens,
2507
+ cacheWrite: totalCacheWriteTokens,
2508
+ },
2189
2509
  duration: now() - start,
2190
2510
  continuation: true,
2191
2511
  continuationMessages: [...messages],
@@ -2233,11 +2553,21 @@ ${boundedMainMemory.trim()}`
2233
2553
  const toolCallsResult = await result.toolCalls;
2234
2554
 
2235
2555
  // Update token usage
2236
- const stepCachedTokens = usage.inputTokenDetails?.cacheReadTokens ?? 0;
2556
+ const details = (usage.inputTokenDetails ?? {}) as Record<string, unknown>;
2557
+ const stepCachedTokens = typeof details.cacheReadTokens === "number" ? details.cacheReadTokens : 0;
2558
+ const stepCacheWriteTokens =
2559
+ typeof details.cacheWriteTokens === "number"
2560
+ ? details.cacheWriteTokens
2561
+ : typeof details.cacheCreationTokens === "number"
2562
+ ? details.cacheCreationTokens
2563
+ : typeof details.cacheCreationInputTokens === "number"
2564
+ ? details.cacheCreationInputTokens
2565
+ : 0;
2237
2566
  const stepInputTokens = usage.inputTokens ?? 0;
2238
2567
  totalInputTokens += stepInputTokens;
2239
2568
  totalOutputTokens += usage.outputTokens ?? 0;
2240
2569
  totalCachedTokens += stepCachedTokens;
2570
+ totalCacheWriteTokens += stepCacheWriteTokens;
2241
2571
  latestContextTokens = stepInputTokens;
2242
2572
  toolOutputEstimateSinceModel = 0;
2243
2573
 
@@ -2247,8 +2577,15 @@ ${boundedMainMemory.trim()}`
2247
2577
  input: stepInputTokens,
2248
2578
  output: usage.outputTokens ?? 0,
2249
2579
  cached: stepCachedTokens,
2580
+ cacheWrite: stepCacheWriteTokens,
2250
2581
  },
2251
2582
  });
2583
+ console.info(
2584
+ `[poncho][cost] model="${modelName}" step=${step} ` +
2585
+ `input=${stepInputTokens} output=${usage.outputTokens ?? 0} ` +
2586
+ `cached=${stepCachedTokens} cacheWrite=${stepCacheWriteTokens} ` +
2587
+ `totals(input=${totalInputTokens}, output=${totalOutputTokens}, cached=${totalCachedTokens}, cacheWrite=${totalCacheWriteTokens})`,
2588
+ );
2252
2589
 
2253
2590
  // Extract tool calls
2254
2591
  const toolCalls = toolCallsResult.map((tc) => ({
@@ -2285,7 +2622,7 @@ ${boundedMainMemory.trim()}`
2285
2622
  messages.push({
2286
2623
  role: "assistant",
2287
2624
  content: fullText,
2288
- metadata: { timestamp: now(), id: randomUUID(), step },
2625
+ metadata: { timestamp: now(), id: randomUUID(), step, runId },
2289
2626
  });
2290
2627
  }
2291
2628
  responseText = fullText;
@@ -2302,6 +2639,7 @@ ${boundedMainMemory.trim()}`
2302
2639
  input: totalInputTokens,
2303
2640
  output: totalOutputTokens,
2304
2641
  cached: totalCachedTokens,
2642
+ cacheWrite: totalCacheWriteTokens,
2305
2643
  },
2306
2644
  duration: now() - start,
2307
2645
  contextTokens: latestContextTokens + toolOutputEstimateSinceModel,
@@ -2398,7 +2736,7 @@ ${boundedMainMemory.trim()}`
2398
2736
  const assistantMsg: Message = {
2399
2737
  role: "assistant",
2400
2738
  content: assistantContent,
2401
- metadata: { timestamp: now(), id: randomUUID(), step },
2739
+ metadata: { timestamp: now(), id: randomUUID(), step, runId },
2402
2740
  };
2403
2741
  const deltaMessages = [...messages.slice(inputMessageCount), assistantMsg];
2404
2742
  yield pushEvent({
@@ -2498,14 +2836,19 @@ ${boundedMainMemory.trim()}`
2498
2836
  messages.push({
2499
2837
  role: "assistant",
2500
2838
  content: fullText,
2501
- metadata: { timestamp: now(), id: randomUUID(), step },
2839
+ metadata: { timestamp: now(), id: randomUUID(), step, runId },
2502
2840
  });
2503
2841
  }
2504
2842
  const result_: RunResult = {
2505
2843
  status: "completed",
2506
2844
  response: responseText + fullText,
2507
2845
  steps: step,
2508
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
2846
+ tokens: {
2847
+ input: totalInputTokens,
2848
+ output: totalOutputTokens,
2849
+ cached: totalCachedTokens,
2850
+ cacheWrite: totalCacheWriteTokens,
2851
+ },
2509
2852
  duration: now() - start,
2510
2853
  continuation: true,
2511
2854
  continuationMessages: [...messages],
@@ -2538,6 +2881,20 @@ ${boundedMainMemory.trim()}`
2538
2881
  tool_name: result.tool,
2539
2882
  content: `Tool error: ${result.error}`,
2540
2883
  });
2884
+ {
2885
+ const archive = this.archivedToolResultsByConversation.get(conversationId);
2886
+ if (archive) {
2887
+ archive[result.callId] = {
2888
+ toolResultId: result.callId,
2889
+ conversationId,
2890
+ toolName: result.tool,
2891
+ toolCallId: result.callId,
2892
+ createdAt: now(),
2893
+ sizeBytes: Buffer.byteLength(`Tool error: ${result.error}`, "utf8"),
2894
+ payload: `Tool error: ${result.error}`,
2895
+ };
2896
+ }
2897
+ }
2541
2898
  richToolResults.push({
2542
2899
  type: "tool-result",
2543
2900
  toolCallId: result.callId,
@@ -2564,6 +2921,21 @@ ${boundedMainMemory.trim()}`
2564
2921
  tool_name: result.tool,
2565
2922
  content: JSON.stringify(strippedOutput ?? null),
2566
2923
  });
2924
+ {
2925
+ const archive = this.archivedToolResultsByConversation.get(conversationId);
2926
+ if (archive) {
2927
+ const payload = JSON.stringify(result.output ?? null);
2928
+ archive[result.callId] = {
2929
+ toolResultId: result.callId,
2930
+ conversationId,
2931
+ toolName: result.tool,
2932
+ toolCallId: result.callId,
2933
+ createdAt: now(),
2934
+ sizeBytes: Buffer.byteLength(payload, "utf8"),
2935
+ payload,
2936
+ };
2937
+ }
2938
+ }
2567
2939
 
2568
2940
  if (mediaItems.length > 0) {
2569
2941
  richToolResults.push({
@@ -2604,9 +2976,15 @@ ${boundedMainMemory.trim()}`
2604
2976
  messages.push({
2605
2977
  role: "assistant",
2606
2978
  content: assistantContent,
2607
- metadata: { timestamp: now(), id: randomUUID(), step },
2979
+ metadata: { timestamp: now(), id: randomUUID(), step, runId },
2608
2980
  });
2609
- const toolMsgMeta: Record<string, unknown> = { timestamp: now(), id: randomUUID(), step, _richToolResults: richToolResults };
2981
+ const toolMsgMeta: Record<string, unknown> = {
2982
+ timestamp: now(),
2983
+ id: randomUUID(),
2984
+ step,
2985
+ runId,
2986
+ _richToolResults: richToolResults,
2987
+ };
2610
2988
  messages.push({
2611
2989
  role: "tool",
2612
2990
  content: JSON.stringify(toolResultsForModel),
@@ -2621,7 +2999,12 @@ ${boundedMainMemory.trim()}`
2621
2999
  status: "completed",
2622
3000
  response: responseText + fullText,
2623
3001
  steps: step,
2624
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
3002
+ tokens: {
3003
+ input: totalInputTokens,
3004
+ output: totalOutputTokens,
3005
+ cached: totalCachedTokens,
3006
+ cacheWrite: totalCacheWriteTokens,
3007
+ },
2625
3008
  duration: now() - start,
2626
3009
  continuation: true,
2627
3010
  continuationMessages: [...messages],
@@ -2689,7 +3072,12 @@ ${boundedMainMemory.trim()}`
2689
3072
  status: "completed",
2690
3073
  response: responseText,
2691
3074
  steps: maxSteps,
2692
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
3075
+ tokens: {
3076
+ input: totalInputTokens,
3077
+ output: totalOutputTokens,
3078
+ cached: totalCachedTokens,
3079
+ cacheWrite: totalCacheWriteTokens,
3080
+ },
2693
3081
  duration: now() - start,
2694
3082
  continuation: true,
2695
3083
  continuationMessages: [...messages],