@poncho-ai/harness 0.31.0 → 0.31.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/harness.ts CHANGED
@@ -12,7 +12,7 @@ import type {
12
12
  ToolContext,
13
13
  ToolDefinition,
14
14
  } from "@poncho-ai/sdk";
15
- import { getTextContent } from "@poncho-ai/sdk";
15
+ import { defineTool, getTextContent } from "@poncho-ai/sdk";
16
16
  import type { UploadStore } from "./upload-store.js";
17
17
  import { PONCHO_UPLOAD_SCHEME, deriveUploadKey } from "./upload-store.js";
18
18
  import { parseAgentFile, parseAgentMarkdown, renderAgentPrompt, type ParsedAgent, type AgentFrontmatter } from "./agent-parser.js";
@@ -72,8 +72,22 @@ export interface HarnessRunOutput {
72
72
  }
73
73
 
74
74
  const now = (): number => Date.now();
75
- const FIRST_CHUNK_TIMEOUT_MS = 300_000; // 300s to receive the first chunk from the model
76
- const MAX_TRANSIENT_STEP_RETRIES = 2;
75
+ const FIRST_CHUNK_TIMEOUT_MS = 90_000; // 90s to receive the first chunk from the model
76
+ const MAX_TRANSIENT_STEP_RETRIES = 1;
77
+ const COMPACTION_CHECK_INTERVAL_STEPS = 3;
78
+ const TOOL_RESULT_ARCHIVE_PARAM = "__toolResultArchive";
79
+ const TOOL_RESULT_TRUNCATED_PREFIX = "[TRUNCATED_TOOL_RESULT]";
80
+ const TOOL_RESULT_PREVIEW_CHARS = 700;
81
+
82
+ interface ArchivedToolResult {
83
+ toolResultId: string;
84
+ conversationId: string;
85
+ toolName: string;
86
+ toolCallId: string;
87
+ createdAt: number;
88
+ sizeBytes: number;
89
+ payload: string;
90
+ }
77
91
 
78
92
  class FirstChunkTimeoutError extends Error {
79
93
  constructor(modelName: string, timeoutMs: number) {
@@ -140,23 +154,11 @@ const isRetryableModelError = (error: unknown): boolean => {
140
154
  if (error instanceof FirstChunkTimeoutError) {
141
155
  return true;
142
156
  }
143
- if (isNoOutputGeneratedError(error)) {
144
- return true;
145
- }
146
157
  const statusCode = getErrorStatusCode(error);
147
158
  if (typeof statusCode === "number") {
148
159
  return statusCode === 429 || statusCode >= 500;
149
160
  }
150
- if (!error || typeof error !== "object") {
151
- return false;
152
- }
153
- const maybeMessage = "message" in error ? String(error.message ?? "").toLowerCase() : "";
154
- return (
155
- maybeMessage.includes("internal server error") ||
156
- maybeMessage.includes("service unavailable") ||
157
- maybeMessage.includes("gateway timeout") ||
158
- maybeMessage.includes("rate limit")
159
- );
161
+ return false;
160
162
  };
161
163
 
162
164
  const toRunError = (error: unknown): { code: string; message: string; details?: Record<string, unknown> } => {
@@ -225,6 +227,83 @@ const toProviderSafeToolName = (
225
227
  return candidate;
226
228
  };
227
229
 
230
+ const isToolResultRow = (value: unknown): value is {
231
+ tool_use_id: string;
232
+ tool_name: string;
233
+ content: string;
234
+ } => {
235
+ if (typeof value !== "object" || value === null) return false;
236
+ const row = value as Record<string, unknown>;
237
+ return (
238
+ typeof row.tool_use_id === "string" &&
239
+ typeof row.tool_name === "string" &&
240
+ typeof row.content === "string"
241
+ );
242
+ };
243
+
244
+ const readArchiveFromParameters = (
245
+ parameters: Record<string, unknown> | undefined,
246
+ ): Record<string, ArchivedToolResult> => {
247
+ const raw = parameters?.[TOOL_RESULT_ARCHIVE_PARAM];
248
+ if (typeof raw !== "object" || raw === null) return {};
249
+ const out: Record<string, ArchivedToolResult> = {};
250
+ for (const [key, value] of Object.entries(raw)) {
251
+ if (typeof value !== "object" || value === null) continue;
252
+ const row = value as Record<string, unknown>;
253
+ if (
254
+ typeof row.toolResultId !== "string" ||
255
+ typeof row.conversationId !== "string" ||
256
+ typeof row.toolName !== "string" ||
257
+ typeof row.toolCallId !== "string" ||
258
+ typeof row.createdAt !== "number" ||
259
+ typeof row.sizeBytes !== "number" ||
260
+ typeof row.payload !== "string"
261
+ ) {
262
+ continue;
263
+ }
264
+ out[key] = {
265
+ toolResultId: row.toolResultId,
266
+ conversationId: row.conversationId,
267
+ toolName: row.toolName,
268
+ toolCallId: row.toolCallId,
269
+ createdAt: row.createdAt,
270
+ sizeBytes: row.sizeBytes,
271
+ payload: row.payload,
272
+ };
273
+ }
274
+ return out;
275
+ };
276
+
277
+ const makeTruncatedToolResultNotice = (
278
+ toolResultId: string,
279
+ toolName: string,
280
+ payload: string,
281
+ ): string => {
282
+ const preview = payload.slice(0, TOOL_RESULT_PREVIEW_CHARS);
283
+ const omittedChars = Math.max(0, payload.length - preview.length);
284
+ return `${TOOL_RESULT_TRUNCATED_PREFIX} id="${toolResultId}" tool="${toolName}" omittedChars=${omittedChars}\n${preview}${omittedChars > 0 ? "\n...[truncated]" : ""}`;
285
+ };
286
+
287
+ const hasUntruncatedToolResults = (messages: Message[]): boolean => {
288
+ for (const msg of messages) {
289
+ if (msg.role !== "tool" || typeof msg.content !== "string") continue;
290
+ let parsed: unknown;
291
+ try {
292
+ parsed = JSON.parse(msg.content);
293
+ } catch {
294
+ continue;
295
+ }
296
+ if (!Array.isArray(parsed)) continue;
297
+ for (const row of parsed) {
298
+ if (!isToolResultRow(row)) continue;
299
+ if (!row.content.startsWith(TOOL_RESULT_TRUNCATED_PREFIX)) {
300
+ return true;
301
+ }
302
+ }
303
+ }
304
+ return false;
305
+ };
306
+
228
307
  const DEVELOPMENT_MODE_CONTEXT = `## Development Mode Context
229
308
 
230
309
  You are running locally in development mode. Treat this as an editable agent workspace.
@@ -580,6 +659,7 @@ export class AgentHarness {
580
659
  private agentFileFingerprint = "";
581
660
  private mcpBridge?: LocalMcpBridge;
582
661
  private subagentManager?: SubagentManager;
662
+ private readonly archivedToolResultsByConversation = new Map<string, Record<string, ArchivedToolResult>>();
583
663
 
584
664
  private resolveToolAccess(toolName: string): ToolAccess {
585
665
  const tools = this.loadedConfig?.tools;
@@ -662,6 +742,60 @@ export class AgentHarness {
662
742
  if (this.environment === "development" && this.isToolEnabled("poncho_docs")) {
663
743
  this.registerIfMissing(ponchoDocsTool);
664
744
  }
745
+ if (this.isToolEnabled("get_tool_result_by_id")) {
746
+ this.registerIfMissing(this.createGetToolResultByIdTool());
747
+ }
748
+ }
749
+
750
+ private createGetToolResultByIdTool(): ToolDefinition {
751
+ return defineTool({
752
+ name: "get_tool_result_by_id",
753
+ description:
754
+ "Retrieve a previously archived full tool result by id for the current conversation. " +
755
+ "Use this when older tool outputs were truncated in prompt history.",
756
+ inputSchema: {
757
+ type: "object",
758
+ properties: {
759
+ toolResultId: { type: "string", description: "Archived tool result id to retrieve" },
760
+ offset: { type: "number", description: "Optional character offset for paging large payloads" },
761
+ limit: { type: "number", description: "Optional maximum characters to return (default 6000, max 20000)" },
762
+ },
763
+ required: ["toolResultId"],
764
+ additionalProperties: false,
765
+ },
766
+ handler: async (input, context) => {
767
+ const conversationId = context.conversationId ?? "__default__";
768
+ const archive = this.archivedToolResultsByConversation.get(conversationId) ?? {};
769
+ const toolResultId = typeof input.toolResultId === "string" ? input.toolResultId : "";
770
+ const record = archive[toolResultId];
771
+ if (!record) {
772
+ console.info(
773
+ `[poncho][cost] Archived tool result lookup miss: id="${toolResultId}" conversation="${conversationId}"`,
774
+ );
775
+ return {
776
+ error: `No archived tool result found for id "${toolResultId}" in this conversation.`,
777
+ };
778
+ }
779
+ const offset = Math.max(0, Number(input.offset) || 0);
780
+ const limit = Math.min(Math.max(Number(input.limit) || 6000, 1), 20_000);
781
+ const end = Math.min(record.payload.length, offset + limit);
782
+ const chunk = record.payload.slice(offset, end);
783
+ console.info(
784
+ `[poncho][cost] Archived tool result lookup hit: id="${toolResultId}" conversation="${conversationId}" ` +
785
+ `offset=${offset} returned=${chunk.length} total=${record.payload.length}`,
786
+ );
787
+ return {
788
+ toolResultId: record.toolResultId,
789
+ toolName: record.toolName,
790
+ toolCallId: record.toolCallId,
791
+ totalChars: record.payload.length,
792
+ offset,
793
+ returnedChars: chunk.length,
794
+ hasMore: end < record.payload.length,
795
+ payload: chunk,
796
+ };
797
+ },
798
+ });
665
799
  }
666
800
 
667
801
  private shouldEnableWriteTool(): boolean {
@@ -691,6 +825,140 @@ export class AgentHarness {
691
825
  return this.parsedAgent?.frontmatter;
692
826
  }
693
827
 
828
+ getToolResultArchive(conversationId: string): Record<string, ArchivedToolResult> {
829
+ const archive = this.archivedToolResultsByConversation.get(conversationId);
830
+ return archive ? { ...archive } : {};
831
+ }
832
+
833
+ private seedToolResultArchive(
834
+ conversationId: string,
835
+ parameters: Record<string, unknown> | undefined,
836
+ ): Record<string, ArchivedToolResult> {
837
+ const seeded = readArchiveFromParameters(parameters);
838
+ const existing = this.archivedToolResultsByConversation.get(conversationId) ?? {};
839
+ const merged = { ...existing, ...seeded };
840
+ this.archivedToolResultsByConversation.set(conversationId, merged);
841
+ return merged;
842
+ }
843
+
844
+ private truncateHistoricalToolResults(
845
+ messages: Message[],
846
+ conversationId: string,
847
+ ): { changed: boolean; truncatedCount: number; archivedCount: number; omittedChars: number } {
848
+ let latestRunId: string | undefined;
849
+ for (let i = messages.length - 1; i >= 0; i -= 1) {
850
+ const msg = messages[i]!;
851
+ const meta = msg.metadata as Record<string, unknown> | undefined;
852
+ const runId = typeof meta?.runId === "string" ? meta.runId : undefined;
853
+ if (runId) {
854
+ latestRunId = runId;
855
+ break;
856
+ }
857
+ }
858
+ if (!latestRunId) {
859
+ return { changed: false, truncatedCount: 0, archivedCount: 0, omittedChars: 0 };
860
+ }
861
+ const archive = this.archivedToolResultsByConversation.get(conversationId) ?? {};
862
+ this.archivedToolResultsByConversation.set(conversationId, archive);
863
+ let changed = false;
864
+ let truncatedCount = 0;
865
+ let archivedCount = 0;
866
+ let omittedChars = 0;
867
+
868
+ for (const msg of messages) {
869
+ if (msg.role !== "tool" || typeof msg.content !== "string") continue;
870
+ const meta = msg.metadata as Record<string, unknown> | undefined;
871
+ const runId = typeof meta?.runId === "string" ? meta.runId : undefined;
872
+ if (runId === latestRunId) continue;
873
+ let parsed: unknown;
874
+ try {
875
+ parsed = JSON.parse(msg.content);
876
+ } catch {
877
+ continue;
878
+ }
879
+ if (!Array.isArray(parsed)) continue;
880
+ let rowChanged = false;
881
+ const nextRows = parsed.map((row) => {
882
+ if (!isToolResultRow(row)) return row;
883
+ if (row.content.startsWith(TOOL_RESULT_TRUNCATED_PREFIX)) return row;
884
+ if (this.shouldPreserveSkillToolResult(row)) return row;
885
+ const toolResultId = row.tool_use_id;
886
+ if (!archive[toolResultId]) {
887
+ archive[toolResultId] = {
888
+ toolResultId,
889
+ conversationId,
890
+ toolName: row.tool_name,
891
+ toolCallId: row.tool_use_id,
892
+ createdAt: now(),
893
+ sizeBytes: Buffer.byteLength(row.content, "utf8"),
894
+ payload: row.content,
895
+ };
896
+ archivedCount += 1;
897
+ }
898
+ const omitted = Math.max(0, row.content.length - TOOL_RESULT_PREVIEW_CHARS);
899
+ omittedChars += omitted;
900
+ truncatedCount += 1;
901
+ rowChanged = true;
902
+ return {
903
+ ...row,
904
+ content: makeTruncatedToolResultNotice(toolResultId, row.tool_name, row.content),
905
+ };
906
+ });
907
+ if (rowChanged) {
908
+ msg.content = JSON.stringify(nextRows);
909
+ // Critical: historical messages may still carry full-fidelity
910
+ // `_richToolResults`. If we keep it, convertMessage will prefer that
911
+ // path and bypass truncated `content`, causing token growth to remain.
912
+ if (msg.metadata && typeof msg.metadata === "object") {
913
+ const meta = msg.metadata as Record<string, unknown>;
914
+ if ("_richToolResults" in meta) {
915
+ delete meta._richToolResults;
916
+ }
917
+ }
918
+ changed = true;
919
+ }
920
+ }
921
+ return { changed, truncatedCount, archivedCount, omittedChars };
922
+ }
923
+
924
+ private shouldPreserveSkillToolResult(row: {
925
+ tool_use_id: string;
926
+ tool_name: string;
927
+ content: string;
928
+ }): boolean {
929
+ if (row.tool_name.startsWith("todo_")) {
930
+ return true;
931
+ }
932
+ if (row.tool_name !== "activate_skill" && row.tool_name !== "deactivate_skill") {
933
+ return false;
934
+ }
935
+ const content = row.content.trim();
936
+ if (content.startsWith("Tool error:")) {
937
+ return false;
938
+ }
939
+ try {
940
+ const parsed = JSON.parse(content) as Record<string, unknown>;
941
+ const skill =
942
+ typeof parsed.skill === "string"
943
+ ? parsed.skill
944
+ : undefined;
945
+ if (skill && this.activeSkillNames.has(skill)) {
946
+ return true;
947
+ }
948
+ const activeSkills = Array.isArray(parsed.activeSkills)
949
+ ? parsed.activeSkills.filter((v): v is string => typeof v === "string")
950
+ : [];
951
+ for (const name of activeSkills) {
952
+ if (this.activeSkillNames.has(name)) {
953
+ return true;
954
+ }
955
+ }
956
+ } catch {
957
+ // Non-JSON tool content should not block truncation.
958
+ }
959
+ return false;
960
+ }
961
+
694
962
  async getTodos(conversationId: string): Promise<TodoItem[]> {
695
963
  if (!this.todoStore) return [];
696
964
  return this.todoStore.get(conversationId);
@@ -1475,7 +1743,7 @@ export class AgentHarness {
1475
1743
  let agent = this.parsedAgent as ParsedAgent;
1476
1744
  const runId = `run_${randomUUID()}`;
1477
1745
  const start = now();
1478
- const maxSteps = agent.frontmatter.limits?.maxSteps ?? 50;
1746
+ const maxSteps = agent.frontmatter.limits?.maxSteps ?? 20;
1479
1747
  const configuredTimeout = agent.frontmatter.limits?.timeout;
1480
1748
  const timeoutMs = this.environment === "development" && configuredTimeout == null
1481
1749
  ? 0 // no hard timeout in development unless explicitly configured
@@ -1485,6 +1753,29 @@ export class AgentHarness {
1485
1753
  ? 0
1486
1754
  : platformMaxDurationSec * 800;
1487
1755
  const messages: Message[] = [...(input.messages ?? [])];
1756
+ const conversationId = input.conversationId ?? "__default__";
1757
+ this.seedToolResultArchive(conversationId, input.parameters);
1758
+ const truncationSummary = this.truncateHistoricalToolResults(messages, conversationId);
1759
+ if (truncationSummary.changed) {
1760
+ console.info(
1761
+ `[poncho][cost] Truncated ${truncationSummary.truncatedCount} historical tool result(s) ` +
1762
+ `(archived_new=${truncationSummary.archivedCount}, omitted_chars=${truncationSummary.omittedChars}) ` +
1763
+ `for conversation="${conversationId}"`,
1764
+ );
1765
+ }
1766
+ const hasFullToolResults = hasUntruncatedToolResults(messages);
1767
+ const enablePromptCache = !hasFullToolResults;
1768
+ if (!enablePromptCache) {
1769
+ console.info(
1770
+ `[poncho][cost] Prompt cache write disabled for run "${runId}" ` +
1771
+ `(untruncated tool results present in history).`,
1772
+ );
1773
+ } else {
1774
+ console.info(
1775
+ `[poncho][cost] Prompt cache write enabled for run "${runId}" ` +
1776
+ `(history has no untruncated tool results).`,
1777
+ );
1778
+ }
1488
1779
  const inputMessageCount = messages.length;
1489
1780
  const events: AgentEvent[] = [];
1490
1781
 
@@ -1583,7 +1874,6 @@ ${boundedMainMemory.trim()}`
1583
1874
  profileDir: string;
1584
1875
  isLaunched: boolean }
1585
1876
  | undefined;
1586
- const conversationId = input.conversationId ?? "__default__";
1587
1877
  if (browserSession) {
1588
1878
  browserCleanups.push(
1589
1879
  browserSession.onFrame(conversationId, (frame) => {
@@ -1655,6 +1945,7 @@ ${boundedMainMemory.trim()}`
1655
1945
  let totalInputTokens = 0;
1656
1946
  let totalOutputTokens = 0;
1657
1947
  let totalCachedTokens = 0;
1948
+ let totalCacheWriteTokens = 0;
1658
1949
  let transientStepRetryCount = 0;
1659
1950
  let latestContextTokens = 0;
1660
1951
  let toolOutputEstimateSinceModel = 0;
@@ -1684,7 +1975,12 @@ ${boundedMainMemory.trim()}`
1684
1975
  status: "completed",
1685
1976
  response: responseText,
1686
1977
  steps: step - 1,
1687
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
1978
+ tokens: {
1979
+ input: totalInputTokens,
1980
+ output: totalOutputTokens,
1981
+ cached: totalCachedTokens,
1982
+ cacheWrite: totalCacheWriteTokens,
1983
+ },
1688
1984
  duration: now() - start,
1689
1985
  continuation: true,
1690
1986
  continuationMessages: [...messages],
@@ -1698,7 +1994,6 @@ ${boundedMainMemory.trim()}`
1698
1994
 
1699
1995
  const stepStart = now();
1700
1996
  yield pushEvent({ type: "step:started", step });
1701
- yield pushEvent({ type: "model:request", tokens: 0 });
1702
1997
 
1703
1998
  const dispatcherTools = this.dispatcher.list();
1704
1999
  const exposedToolNames = new Map<string, string>();
@@ -1720,6 +2015,15 @@ ${boundedMainMemory.trim()}`
1720
2015
  inputSchema: jsonSchemaToZod(tool.inputSchema),
1721
2016
  };
1722
2017
  }
2018
+ const toolDefsJsonForEstimate = JSON.stringify(
2019
+ dispatcherTools.map((t) => ({
2020
+ name: t.name,
2021
+ description: t.description,
2022
+ inputSchema: t.inputSchema,
2023
+ })),
2024
+ );
2025
+ const requestTokenEstimate = estimateTotalTokens(integrityPrompt, messages, toolDefsJsonForEstimate);
2026
+ yield pushEvent({ type: "model:request", tokens: requestTokenEstimate });
1723
2027
 
1724
2028
  // Convert messages to ModelMessage format
1725
2029
  const convertMessage = async (msg: Message): Promise<ModelMessage[]> => {
@@ -1956,20 +2260,11 @@ ${boundedMainMemory.trim()}`
1956
2260
  }
1957
2261
  const modelInstance = this.modelProvider(modelName);
1958
2262
 
1959
- // --- Auto-compaction (step 1 only) ---
1960
- // On step 2+ the messages array contains harness-internal formats
1961
- // (JSON-stringified tool_calls / tool results) that must not leak
1962
- // into the conversation store via compactedMessages.
2263
+ // --- Auto-compaction ---
2264
+ // Re-check every N steps to curb runaway context growth in longer runs.
1963
2265
  const compactionConfig = resolveCompactionConfig(agent.frontmatter.compaction);
1964
- if (compactionConfig.enabled && step === 1) {
1965
- const toolDefsJson = JSON.stringify(
1966
- dispatcherTools.map((t) => ({
1967
- name: t.name,
1968
- description: t.description,
1969
- inputSchema: t.inputSchema,
1970
- })),
1971
- );
1972
- const estimated = estimateTotalTokens(integrityPrompt, messages, toolDefsJson);
2266
+ if (compactionConfig.enabled && (step === 1 || step % COMPACTION_CHECK_INTERVAL_STEPS === 0)) {
2267
+ const estimated = estimateTotalTokens(integrityPrompt, messages, toolDefsJsonForEstimate);
1973
2268
  const lastReportedInput = totalInputTokens > 0 ? totalInputTokens : 0;
1974
2269
  const effectiveTokens = Math.max(estimated, lastReportedInput);
1975
2270
 
@@ -1984,14 +2279,17 @@ ${boundedMainMemory.trim()}`
1984
2279
  if (compactResult.compacted) {
1985
2280
  messages.length = 0;
1986
2281
  messages.push(...compactResult.messages);
1987
- // Strip the trailing user task message so runners can use
1988
- // compactedMessages directly as historyMessages without
1989
- // duplicating the user turn they append themselves.
1990
- const emittedMessages = [...compactResult.messages];
1991
- if (emittedMessages.length > 0 && emittedMessages[emittedMessages.length - 1].role === "user") {
1992
- emittedMessages.pop();
2282
+ let emittedMessages: Message[] | undefined;
2283
+ if (step === 1) {
2284
+ // Strip the trailing user task message so runners can use
2285
+ // compactedMessages directly as historyMessages without
2286
+ // duplicating the user turn they append themselves.
2287
+ emittedMessages = [...compactResult.messages];
2288
+ if (emittedMessages.length > 0 && emittedMessages[emittedMessages.length - 1].role === "user") {
2289
+ emittedMessages.pop();
2290
+ }
1993
2291
  }
1994
- const tokensAfterCompaction = estimateTotalTokens(integrityPrompt, messages, toolDefsJson);
2292
+ const tokensAfterCompaction = estimateTotalTokens(integrityPrompt, messages, toolDefsJsonForEstimate);
1995
2293
  latestContextTokens = tokensAfterCompaction;
1996
2294
  toolOutputEstimateSinceModel = 0;
1997
2295
  yield pushEvent({
@@ -2024,7 +2322,9 @@ ${boundedMainMemory.trim()}`
2024
2322
 
2025
2323
  const temperature = agent.frontmatter.model?.temperature ?? 0.2;
2026
2324
  const maxTokens = agent.frontmatter.model?.maxTokens;
2027
- const cachedMessages = addPromptCacheBreakpoints(coreMessages, modelInstance);
2325
+ const cachedMessages = enablePromptCache
2326
+ ? addPromptCacheBreakpoints(coreMessages, modelInstance)
2327
+ : coreMessages;
2028
2328
 
2029
2329
  const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
2030
2330
 
@@ -2153,7 +2453,12 @@ ${boundedMainMemory.trim()}`
2153
2453
  status: "completed",
2154
2454
  response: responseText + fullText,
2155
2455
  steps: step,
2156
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
2456
+ tokens: {
2457
+ input: totalInputTokens,
2458
+ output: totalOutputTokens,
2459
+ cached: totalCachedTokens,
2460
+ cacheWrite: totalCacheWriteTokens,
2461
+ },
2157
2462
  duration: now() - start,
2158
2463
  continuation: true,
2159
2464
  continuationMessages: [...messages],
@@ -2185,7 +2490,12 @@ ${boundedMainMemory.trim()}`
2185
2490
  status: "completed",
2186
2491
  response: responseText + fullText,
2187
2492
  steps: step,
2188
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
2493
+ tokens: {
2494
+ input: totalInputTokens,
2495
+ output: totalOutputTokens,
2496
+ cached: totalCachedTokens,
2497
+ cacheWrite: totalCacheWriteTokens,
2498
+ },
2189
2499
  duration: now() - start,
2190
2500
  continuation: true,
2191
2501
  continuationMessages: [...messages],
@@ -2233,11 +2543,21 @@ ${boundedMainMemory.trim()}`
2233
2543
  const toolCallsResult = await result.toolCalls;
2234
2544
 
2235
2545
  // Update token usage
2236
- const stepCachedTokens = usage.inputTokenDetails?.cacheReadTokens ?? 0;
2546
+ const details = (usage.inputTokenDetails ?? {}) as Record<string, unknown>;
2547
+ const stepCachedTokens = typeof details.cacheReadTokens === "number" ? details.cacheReadTokens : 0;
2548
+ const stepCacheWriteTokens =
2549
+ typeof details.cacheWriteTokens === "number"
2550
+ ? details.cacheWriteTokens
2551
+ : typeof details.cacheCreationTokens === "number"
2552
+ ? details.cacheCreationTokens
2553
+ : typeof details.cacheCreationInputTokens === "number"
2554
+ ? details.cacheCreationInputTokens
2555
+ : 0;
2237
2556
  const stepInputTokens = usage.inputTokens ?? 0;
2238
2557
  totalInputTokens += stepInputTokens;
2239
2558
  totalOutputTokens += usage.outputTokens ?? 0;
2240
2559
  totalCachedTokens += stepCachedTokens;
2560
+ totalCacheWriteTokens += stepCacheWriteTokens;
2241
2561
  latestContextTokens = stepInputTokens;
2242
2562
  toolOutputEstimateSinceModel = 0;
2243
2563
 
@@ -2247,8 +2567,15 @@ ${boundedMainMemory.trim()}`
2247
2567
  input: stepInputTokens,
2248
2568
  output: usage.outputTokens ?? 0,
2249
2569
  cached: stepCachedTokens,
2570
+ cacheWrite: stepCacheWriteTokens,
2250
2571
  },
2251
2572
  });
2573
+ console.info(
2574
+ `[poncho][cost] model="${modelName}" step=${step} ` +
2575
+ `input=${stepInputTokens} output=${usage.outputTokens ?? 0} ` +
2576
+ `cached=${stepCachedTokens} cacheWrite=${stepCacheWriteTokens} ` +
2577
+ `totals(input=${totalInputTokens}, output=${totalOutputTokens}, cached=${totalCachedTokens}, cacheWrite=${totalCacheWriteTokens})`,
2578
+ );
2252
2579
 
2253
2580
  // Extract tool calls
2254
2581
  const toolCalls = toolCallsResult.map((tc) => ({
@@ -2302,6 +2629,7 @@ ${boundedMainMemory.trim()}`
2302
2629
  input: totalInputTokens,
2303
2630
  output: totalOutputTokens,
2304
2631
  cached: totalCachedTokens,
2632
+ cacheWrite: totalCacheWriteTokens,
2305
2633
  },
2306
2634
  duration: now() - start,
2307
2635
  contextTokens: latestContextTokens + toolOutputEstimateSinceModel,
@@ -2505,7 +2833,12 @@ ${boundedMainMemory.trim()}`
2505
2833
  status: "completed",
2506
2834
  response: responseText + fullText,
2507
2835
  steps: step,
2508
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
2836
+ tokens: {
2837
+ input: totalInputTokens,
2838
+ output: totalOutputTokens,
2839
+ cached: totalCachedTokens,
2840
+ cacheWrite: totalCacheWriteTokens,
2841
+ },
2509
2842
  duration: now() - start,
2510
2843
  continuation: true,
2511
2844
  continuationMessages: [...messages],
@@ -2538,6 +2871,20 @@ ${boundedMainMemory.trim()}`
2538
2871
  tool_name: result.tool,
2539
2872
  content: `Tool error: ${result.error}`,
2540
2873
  });
2874
+ {
2875
+ const archive = this.archivedToolResultsByConversation.get(conversationId);
2876
+ if (archive) {
2877
+ archive[result.callId] = {
2878
+ toolResultId: result.callId,
2879
+ conversationId,
2880
+ toolName: result.tool,
2881
+ toolCallId: result.callId,
2882
+ createdAt: now(),
2883
+ sizeBytes: Buffer.byteLength(`Tool error: ${result.error}`, "utf8"),
2884
+ payload: `Tool error: ${result.error}`,
2885
+ };
2886
+ }
2887
+ }
2541
2888
  richToolResults.push({
2542
2889
  type: "tool-result",
2543
2890
  toolCallId: result.callId,
@@ -2564,6 +2911,21 @@ ${boundedMainMemory.trim()}`
2564
2911
  tool_name: result.tool,
2565
2912
  content: JSON.stringify(strippedOutput ?? null),
2566
2913
  });
2914
+ {
2915
+ const archive = this.archivedToolResultsByConversation.get(conversationId);
2916
+ if (archive) {
2917
+ const payload = JSON.stringify(result.output ?? null);
2918
+ archive[result.callId] = {
2919
+ toolResultId: result.callId,
2920
+ conversationId,
2921
+ toolName: result.tool,
2922
+ toolCallId: result.callId,
2923
+ createdAt: now(),
2924
+ sizeBytes: Buffer.byteLength(payload, "utf8"),
2925
+ payload,
2926
+ };
2927
+ }
2928
+ }
2567
2929
 
2568
2930
  if (mediaItems.length > 0) {
2569
2931
  richToolResults.push({
@@ -2604,9 +2966,15 @@ ${boundedMainMemory.trim()}`
2604
2966
  messages.push({
2605
2967
  role: "assistant",
2606
2968
  content: assistantContent,
2607
- metadata: { timestamp: now(), id: randomUUID(), step },
2969
+ metadata: { timestamp: now(), id: randomUUID(), step, runId },
2608
2970
  });
2609
- const toolMsgMeta: Record<string, unknown> = { timestamp: now(), id: randomUUID(), step, _richToolResults: richToolResults };
2971
+ const toolMsgMeta: Record<string, unknown> = {
2972
+ timestamp: now(),
2973
+ id: randomUUID(),
2974
+ step,
2975
+ runId,
2976
+ _richToolResults: richToolResults,
2977
+ };
2610
2978
  messages.push({
2611
2979
  role: "tool",
2612
2980
  content: JSON.stringify(toolResultsForModel),
@@ -2621,7 +2989,12 @@ ${boundedMainMemory.trim()}`
2621
2989
  status: "completed",
2622
2990
  response: responseText + fullText,
2623
2991
  steps: step,
2624
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
2992
+ tokens: {
2993
+ input: totalInputTokens,
2994
+ output: totalOutputTokens,
2995
+ cached: totalCachedTokens,
2996
+ cacheWrite: totalCacheWriteTokens,
2997
+ },
2625
2998
  duration: now() - start,
2626
2999
  continuation: true,
2627
3000
  continuationMessages: [...messages],
@@ -2689,7 +3062,12 @@ ${boundedMainMemory.trim()}`
2689
3062
  status: "completed",
2690
3063
  response: responseText,
2691
3064
  steps: maxSteps,
2692
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
3065
+ tokens: {
3066
+ input: totalInputTokens,
3067
+ output: totalOutputTokens,
3068
+ cached: totalCachedTokens,
3069
+ cacheWrite: totalCacheWriteTokens,
3070
+ },
2693
3071
  duration: now() - start,
2694
3072
  continuation: true,
2695
3073
  continuationMessages: [...messages],