@poncho-ai/harness 0.31.1 → 0.31.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -361,9 +361,11 @@ var OVERHEAD_MULTIPLIER = 1.15;
361
361
  var MIN_COMPACTABLE_MESSAGES = 4;
362
362
  var DEFAULT_COMPACTION_CONFIG = {
363
363
  enabled: true,
364
- trigger: 0.8,
365
- keepRecentMessages: 6
364
+ trigger: 0.75,
365
+ keepRecentMessages: 4
366
366
  };
367
+ var SUMMARIZATION_MESSAGE_TRUNCATION_CHARS = 1200;
368
+ var SUMMARIZATION_MAX_OUTPUT_TOKENS = 768;
367
369
  var SUMMARIZATION_PROMPT = `Summarize the following conversation into a structured working state that allows continuation without re-asking questions. Include:
368
370
 
369
371
  1. **User intent**: What the user originally asked for and any refinements
@@ -418,7 +420,7 @@ var buildSummarizationMessages = (messagesToCompact, instructions) => {
418
420
  const conversationLines = [];
419
421
  for (const msg of messagesToCompact) {
420
422
  const text = getTextContent(msg);
421
- const truncated = text.length > 2e3 ? text.slice(0, 2e3) + "\n...[truncated]" : text;
423
+ const truncated = text.length > SUMMARIZATION_MESSAGE_TRUNCATION_CHARS ? text.slice(0, SUMMARIZATION_MESSAGE_TRUNCATION_CHARS) + "\n...[truncated]" : text;
422
424
  conversationLines.push(`[${msg.role}]: ${truncated}`);
423
425
  }
424
426
  const prompt = instructions ? `${SUMMARIZATION_PROMPT}
@@ -473,7 +475,7 @@ var compactMessages = async (model, messages, config, options) => {
473
475
  const result = await generateText({
474
476
  model,
475
477
  messages: summarizationMessages,
476
- maxOutputTokens: 2048
478
+ maxOutputTokens: SUMMARIZATION_MAX_OUTPUT_TOKENS
477
479
  });
478
480
  const summary = result.text.trim();
479
481
  if (!summary) {
@@ -1658,7 +1660,7 @@ Logs print to console:
1658
1660
  [event] run:started {"type":"run:started","runId":"run_abc123","agentId":"my-agent"}
1659
1661
  [event] tool:started {"type":"tool:started","tool":"read_file","input":{"path":"README.md"}}
1660
1662
  [event] tool:completed {"type":"tool:completed","tool":"read_file","duration":45,"output":{"path":"README.md","content":"..."}}
1661
- [event] run:completed {"type":"run:completed","runId":"run_abc123","result":{"status":"completed","response":"...","steps":3,"tokens":{"input":1500,"output":840}}}
1663
+ [event] run:completed {"type":"run:completed","runId":"run_abc123","result":{"status":"completed","response":"...","steps":3,"tokens":{"input":1500,"output":840,"cached":1200,"cacheWrite":300}}}
1662
1664
  \`\`\`
1663
1665
 
1664
1666
  ### Production telemetry (generic OTLP)
@@ -2110,7 +2112,7 @@ var ponchoDocsTool = defineTool({
2110
2112
  import { randomUUID as randomUUID3 } from "crypto";
2111
2113
  import { readFile as readFile9 } from "fs/promises";
2112
2114
  import { resolve as resolve11 } from "path";
2113
- import { getTextContent as getTextContent2 } from "@poncho-ai/sdk";
2115
+ import { defineTool as defineTool7, getTextContent as getTextContent2 } from "@poncho-ai/sdk";
2114
2116
 
2115
2117
  // src/upload-store.ts
2116
2118
  import { createHash as createHash2 } from "crypto";
@@ -5015,6 +5017,9 @@ var TelemetryEmitter = class {
5015
5017
  if (otlp) {
5016
5018
  await this.sendOtlp(event, otlp);
5017
5019
  }
5020
+ if (event.type === "model:chunk") {
5021
+ return;
5022
+ }
5018
5023
  process.stdout.write(`[event] ${event.type} ${sanitizeEventForLog(event)}
5019
5024
  `);
5020
5025
  }
@@ -5141,8 +5146,12 @@ var ToolDispatcher = class {
5141
5146
 
5142
5147
  // src/harness.ts
5143
5148
  var now = () => Date.now();
5144
- var FIRST_CHUNK_TIMEOUT_MS = 3e5;
5145
- var MAX_TRANSIENT_STEP_RETRIES = 2;
5149
+ var FIRST_CHUNK_TIMEOUT_MS = 9e4;
5150
+ var MAX_TRANSIENT_STEP_RETRIES = 1;
5151
+ var COMPACTION_CHECK_INTERVAL_STEPS = 3;
5152
+ var TOOL_RESULT_ARCHIVE_PARAM = "__toolResultArchive";
5153
+ var TOOL_RESULT_TRUNCATED_PREFIX = "[TRUNCATED_TOOL_RESULT]";
5154
+ var TOOL_RESULT_PREVIEW_CHARS = 700;
5146
5155
  var FirstChunkTimeoutError = class extends Error {
5147
5156
  constructor(modelName, timeoutMs) {
5148
5157
  super(
@@ -5201,18 +5210,11 @@ var isRetryableModelError = (error) => {
5201
5210
  if (error instanceof FirstChunkTimeoutError) {
5202
5211
  return true;
5203
5212
  }
5204
- if (isNoOutputGeneratedError(error)) {
5205
- return true;
5206
- }
5207
5213
  const statusCode = getErrorStatusCode(error);
5208
5214
  if (typeof statusCode === "number") {
5209
5215
  return statusCode === 429 || statusCode >= 500;
5210
5216
  }
5211
- if (!error || typeof error !== "object") {
5212
- return false;
5213
- }
5214
- const maybeMessage = "message" in error ? String(error.message ?? "").toLowerCase() : "";
5215
- return maybeMessage.includes("internal server error") || maybeMessage.includes("service unavailable") || maybeMessage.includes("gateway timeout") || maybeMessage.includes("rate limit");
5217
+ return false;
5216
5218
  };
5217
5219
  var toRunError = (error) => {
5218
5220
  const statusCode = getErrorStatusCode(error);
@@ -5264,6 +5266,58 @@ var toProviderSafeToolName = (originalName, index, used) => {
5264
5266
  used.add(candidate);
5265
5267
  return candidate;
5266
5268
  };
5269
+ var isToolResultRow = (value) => {
5270
+ if (typeof value !== "object" || value === null) return false;
5271
+ const row = value;
5272
+ return typeof row.tool_use_id === "string" && typeof row.tool_name === "string" && typeof row.content === "string";
5273
+ };
5274
+ var readArchiveFromParameters = (parameters) => {
5275
+ const raw = parameters?.[TOOL_RESULT_ARCHIVE_PARAM];
5276
+ if (typeof raw !== "object" || raw === null) return {};
5277
+ const out = {};
5278
+ for (const [key, value] of Object.entries(raw)) {
5279
+ if (typeof value !== "object" || value === null) continue;
5280
+ const row = value;
5281
+ if (typeof row.toolResultId !== "string" || typeof row.conversationId !== "string" || typeof row.toolName !== "string" || typeof row.toolCallId !== "string" || typeof row.createdAt !== "number" || typeof row.sizeBytes !== "number" || typeof row.payload !== "string") {
5282
+ continue;
5283
+ }
5284
+ out[key] = {
5285
+ toolResultId: row.toolResultId,
5286
+ conversationId: row.conversationId,
5287
+ toolName: row.toolName,
5288
+ toolCallId: row.toolCallId,
5289
+ createdAt: row.createdAt,
5290
+ sizeBytes: row.sizeBytes,
5291
+ payload: row.payload
5292
+ };
5293
+ }
5294
+ return out;
5295
+ };
5296
+ var makeTruncatedToolResultNotice = (toolResultId, toolName, payload) => {
5297
+ const preview = payload.slice(0, TOOL_RESULT_PREVIEW_CHARS);
5298
+ const omittedChars = Math.max(0, payload.length - preview.length);
5299
+ return `${TOOL_RESULT_TRUNCATED_PREFIX} id="${toolResultId}" tool="${toolName}" omittedChars=${omittedChars}
5300
+ ${preview}${omittedChars > 0 ? "\n...[truncated]" : ""}`;
5301
+ };
5302
+ var hasUntruncatedToolResults = (messages) => {
5303
+ for (const msg of messages) {
5304
+ if (msg.role !== "tool" || typeof msg.content !== "string") continue;
5305
+ let parsed;
5306
+ try {
5307
+ parsed = JSON.parse(msg.content);
5308
+ } catch {
5309
+ continue;
5310
+ }
5311
+ if (!Array.isArray(parsed)) continue;
5312
+ for (const row of parsed) {
5313
+ if (!isToolResultRow(row)) continue;
5314
+ if (!row.content.startsWith(TOOL_RESULT_TRUNCATED_PREFIX)) {
5315
+ return true;
5316
+ }
5317
+ }
5318
+ }
5319
+ return false;
5320
+ };
5267
5321
  var DEVELOPMENT_MODE_CONTEXT = `## Development Mode Context
5268
5322
 
5269
5323
  You are running locally in development mode. Treat this as an editable agent workspace.
@@ -5595,6 +5649,7 @@ var AgentHarness = class _AgentHarness {
5595
5649
  agentFileFingerprint = "";
5596
5650
  mcpBridge;
5597
5651
  subagentManager;
5652
+ archivedToolResultsByConversation = /* @__PURE__ */ new Map();
5598
5653
  resolveToolAccess(toolName) {
5599
5654
  const tools = this.loadedConfig?.tools;
5600
5655
  if (!tools) return true;
@@ -5666,6 +5721,56 @@ var AgentHarness = class _AgentHarness {
5666
5721
  if (this.environment === "development" && this.isToolEnabled("poncho_docs")) {
5667
5722
  this.registerIfMissing(ponchoDocsTool);
5668
5723
  }
5724
+ if (this.isToolEnabled("get_tool_result_by_id")) {
5725
+ this.registerIfMissing(this.createGetToolResultByIdTool());
5726
+ }
5727
+ }
5728
+ createGetToolResultByIdTool() {
5729
+ return defineTool7({
5730
+ name: "get_tool_result_by_id",
5731
+ description: "Retrieve a previously archived full tool result by id for the current conversation. Use this when older tool outputs were truncated in prompt history.",
5732
+ inputSchema: {
5733
+ type: "object",
5734
+ properties: {
5735
+ toolResultId: { type: "string", description: "Archived tool result id to retrieve" },
5736
+ offset: { type: "number", description: "Optional character offset for paging large payloads" },
5737
+ limit: { type: "number", description: "Optional maximum characters to return (default 6000, max 20000)" }
5738
+ },
5739
+ required: ["toolResultId"],
5740
+ additionalProperties: false
5741
+ },
5742
+ handler: async (input, context) => {
5743
+ const conversationId = context.conversationId ?? "__default__";
5744
+ const archive = this.archivedToolResultsByConversation.get(conversationId) ?? {};
5745
+ const toolResultId = typeof input.toolResultId === "string" ? input.toolResultId : "";
5746
+ const record = archive[toolResultId];
5747
+ if (!record) {
5748
+ console.info(
5749
+ `[poncho][cost] Archived tool result lookup miss: id="${toolResultId}" conversation="${conversationId}"`
5750
+ );
5751
+ return {
5752
+ error: `No archived tool result found for id "${toolResultId}" in this conversation.`
5753
+ };
5754
+ }
5755
+ const offset = Math.max(0, Number(input.offset) || 0);
5756
+ const limit = Math.min(Math.max(Number(input.limit) || 6e3, 1), 2e4);
5757
+ const end = Math.min(record.payload.length, offset + limit);
5758
+ const chunk = record.payload.slice(offset, end);
5759
+ console.info(
5760
+ `[poncho][cost] Archived tool result lookup hit: id="${toolResultId}" conversation="${conversationId}" offset=${offset} returned=${chunk.length} total=${record.payload.length}`
5761
+ );
5762
+ return {
5763
+ toolResultId: record.toolResultId,
5764
+ toolName: record.toolName,
5765
+ toolCallId: record.toolCallId,
5766
+ totalChars: record.payload.length,
5767
+ offset,
5768
+ returnedChars: chunk.length,
5769
+ hasMore: end < record.payload.length,
5770
+ payload: chunk
5771
+ };
5772
+ }
5773
+ });
5669
5774
  }
5670
5775
  shouldEnableWriteTool() {
5671
5776
  const override = process.env.PONCHO_FS_WRITE?.toLowerCase();
@@ -5690,6 +5795,125 @@ var AgentHarness = class _AgentHarness {
5690
5795
  get frontmatter() {
5691
5796
  return this.parsedAgent?.frontmatter;
5692
5797
  }
5798
+ getToolResultArchive(conversationId) {
5799
+ const archive = this.archivedToolResultsByConversation.get(conversationId);
5800
+ return archive ? { ...archive } : {};
5801
+ }
5802
+ seedToolResultArchive(conversationId, parameters) {
5803
+ const seeded = readArchiveFromParameters(parameters);
5804
+ const existing = this.archivedToolResultsByConversation.get(conversationId) ?? {};
5805
+ const merged = { ...existing, ...seeded };
5806
+ this.archivedToolResultsByConversation.set(conversationId, merged);
5807
+ return merged;
5808
+ }
5809
+ truncateHistoricalToolResults(messages, conversationId) {
5810
+ let latestRunId;
5811
+ let latestToolMessageIndex = -1;
5812
+ for (let i = messages.length - 1; i >= 0; i -= 1) {
5813
+ const msg = messages[i];
5814
+ if (latestToolMessageIndex === -1 && msg.role === "tool" && typeof msg.content === "string") {
5815
+ latestToolMessageIndex = i;
5816
+ }
5817
+ const meta = msg.metadata;
5818
+ const runId = typeof meta?.runId === "string" ? meta.runId : void 0;
5819
+ if (runId) {
5820
+ latestRunId = runId;
5821
+ break;
5822
+ }
5823
+ }
5824
+ if (!latestRunId && latestToolMessageIndex === -1) {
5825
+ return { changed: false, truncatedCount: 0, archivedCount: 0, omittedChars: 0 };
5826
+ }
5827
+ const archive = this.archivedToolResultsByConversation.get(conversationId) ?? {};
5828
+ this.archivedToolResultsByConversation.set(conversationId, archive);
5829
+ let changed = false;
5830
+ let truncatedCount = 0;
5831
+ let archivedCount = 0;
5832
+ let omittedChars = 0;
5833
+ for (let index = 0; index < messages.length; index += 1) {
5834
+ const msg = messages[index];
5835
+ if (msg.role !== "tool" || typeof msg.content !== "string") continue;
5836
+ const meta = msg.metadata;
5837
+ const runId = typeof meta?.runId === "string" ? meta.runId : void 0;
5838
+ if (latestRunId) {
5839
+ if (runId === latestRunId) continue;
5840
+ } else if (index === latestToolMessageIndex) {
5841
+ continue;
5842
+ }
5843
+ let parsed;
5844
+ try {
5845
+ parsed = JSON.parse(msg.content);
5846
+ } catch {
5847
+ continue;
5848
+ }
5849
+ if (!Array.isArray(parsed)) continue;
5850
+ let rowChanged = false;
5851
+ const nextRows = parsed.map((row) => {
5852
+ if (!isToolResultRow(row)) return row;
5853
+ if (row.content.startsWith(TOOL_RESULT_TRUNCATED_PREFIX)) return row;
5854
+ if (this.shouldPreserveSkillToolResult(row)) return row;
5855
+ const toolResultId = row.tool_use_id;
5856
+ if (!archive[toolResultId]) {
5857
+ archive[toolResultId] = {
5858
+ toolResultId,
5859
+ conversationId,
5860
+ toolName: row.tool_name,
5861
+ toolCallId: row.tool_use_id,
5862
+ createdAt: now(),
5863
+ sizeBytes: Buffer.byteLength(row.content, "utf8"),
5864
+ payload: row.content
5865
+ };
5866
+ archivedCount += 1;
5867
+ }
5868
+ const omitted = Math.max(0, row.content.length - TOOL_RESULT_PREVIEW_CHARS);
5869
+ omittedChars += omitted;
5870
+ truncatedCount += 1;
5871
+ rowChanged = true;
5872
+ return {
5873
+ ...row,
5874
+ content: makeTruncatedToolResultNotice(toolResultId, row.tool_name, row.content)
5875
+ };
5876
+ });
5877
+ if (rowChanged) {
5878
+ msg.content = JSON.stringify(nextRows);
5879
+ if (msg.metadata && typeof msg.metadata === "object") {
5880
+ const meta2 = msg.metadata;
5881
+ if ("_richToolResults" in meta2) {
5882
+ delete meta2._richToolResults;
5883
+ }
5884
+ }
5885
+ changed = true;
5886
+ }
5887
+ }
5888
+ return { changed, truncatedCount, archivedCount, omittedChars };
5889
+ }
5890
+ shouldPreserveSkillToolResult(row) {
5891
+ if (row.tool_name.startsWith("todo_")) {
5892
+ return true;
5893
+ }
5894
+ if (row.tool_name !== "activate_skill" && row.tool_name !== "deactivate_skill") {
5895
+ return false;
5896
+ }
5897
+ const content = row.content.trim();
5898
+ if (content.startsWith("Tool error:")) {
5899
+ return false;
5900
+ }
5901
+ try {
5902
+ const parsed = JSON.parse(content);
5903
+ const skill = typeof parsed.skill === "string" ? parsed.skill : void 0;
5904
+ if (skill && this.activeSkillNames.has(skill)) {
5905
+ return true;
5906
+ }
5907
+ const activeSkills = Array.isArray(parsed.activeSkills) ? parsed.activeSkills.filter((v) => typeof v === "string") : [];
5908
+ for (const name of activeSkills) {
5909
+ if (this.activeSkillNames.has(name)) {
5910
+ return true;
5911
+ }
5912
+ }
5913
+ } catch {
5914
+ }
5915
+ return false;
5916
+ }
5693
5917
  async getTodos(conversationId) {
5694
5918
  if (!this.todoStore) return [];
5695
5919
  return this.todoStore.get(conversationId);
@@ -6358,12 +6582,31 @@ var AgentHarness = class _AgentHarness {
6358
6582
  let agent = this.parsedAgent;
6359
6583
  const runId = `run_${randomUUID3()}`;
6360
6584
  const start = now();
6361
- const maxSteps = agent.frontmatter.limits?.maxSteps ?? 50;
6585
+ const maxSteps = agent.frontmatter.limits?.maxSteps ?? 20;
6362
6586
  const configuredTimeout = agent.frontmatter.limits?.timeout;
6363
6587
  const timeoutMs = this.environment === "development" && configuredTimeout == null ? 0 : (configuredTimeout ?? 300) * 1e3;
6364
6588
  const platformMaxDurationSec = Number(process.env.PONCHO_MAX_DURATION) || 0;
6365
6589
  const softDeadlineMs = input.disableSoftDeadline || platformMaxDurationSec <= 0 ? 0 : platformMaxDurationSec * 800;
6366
6590
  const messages = [...input.messages ?? []];
6591
+ const conversationId = input.conversationId ?? "__default__";
6592
+ this.seedToolResultArchive(conversationId, input.parameters);
6593
+ const truncationSummary = this.truncateHistoricalToolResults(messages, conversationId);
6594
+ if (truncationSummary.changed) {
6595
+ console.info(
6596
+ `[poncho][cost] Truncated ${truncationSummary.truncatedCount} historical tool result(s) (archived_new=${truncationSummary.archivedCount}, omitted_chars=${truncationSummary.omittedChars}) for conversation="${conversationId}"`
6597
+ );
6598
+ }
6599
+ const hasFullToolResults = hasUntruncatedToolResults(messages);
6600
+ const enablePromptCache = !hasFullToolResults;
6601
+ if (!enablePromptCache) {
6602
+ console.info(
6603
+ `[poncho][cost] Prompt cache write disabled for run "${runId}" (untruncated tool results present in history).`
6604
+ );
6605
+ } else {
6606
+ console.info(
6607
+ `[poncho][cost] Prompt cache write enabled for run "${runId}" (history has no untruncated tool results).`
6608
+ );
6609
+ }
6367
6610
  const inputMessageCount = messages.length;
6368
6611
  const events = [];
6369
6612
  const renderCurrentAgentPrompt = () => renderAgentPrompt(this.parsedAgent, {
@@ -6443,7 +6686,6 @@ ${this.skillFingerprint}`;
6443
6686
  const browserEventQueue = [];
6444
6687
  const browserCleanups = [];
6445
6688
  const browserSession = this._browserSession;
6446
- const conversationId = input.conversationId ?? "__default__";
6447
6689
  if (browserSession) {
6448
6690
  browserCleanups.push(
6449
6691
  browserSession.onFrame(conversationId, (frame) => {
@@ -6510,6 +6752,7 @@ ${this.skillFingerprint}`;
6510
6752
  let totalInputTokens = 0;
6511
6753
  let totalOutputTokens = 0;
6512
6754
  let totalCachedTokens = 0;
6755
+ let totalCacheWriteTokens = 0;
6513
6756
  let transientStepRetryCount = 0;
6514
6757
  let latestContextTokens = 0;
6515
6758
  let toolOutputEstimateSinceModel = 0;
@@ -6538,7 +6781,12 @@ ${this.skillFingerprint}`;
6538
6781
  status: "completed",
6539
6782
  response: responseText,
6540
6783
  steps: step - 1,
6541
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
6784
+ tokens: {
6785
+ input: totalInputTokens,
6786
+ output: totalOutputTokens,
6787
+ cached: totalCachedTokens,
6788
+ cacheWrite: totalCacheWriteTokens
6789
+ },
6542
6790
  duration: now() - start,
6543
6791
  continuation: true,
6544
6792
  continuationMessages: [...messages],
@@ -6551,7 +6799,6 @@ ${this.skillFingerprint}`;
6551
6799
  }
6552
6800
  const stepStart = now();
6553
6801
  yield pushEvent({ type: "step:started", step });
6554
- yield pushEvent({ type: "model:request", tokens: 0 });
6555
6802
  const dispatcherTools = this.dispatcher.list();
6556
6803
  const exposedToolNames = /* @__PURE__ */ new Map();
6557
6804
  const usedProviderToolNames = /* @__PURE__ */ new Set();
@@ -6570,6 +6817,15 @@ ${this.skillFingerprint}`;
6570
6817
  inputSchema: jsonSchemaToZod(tool.inputSchema)
6571
6818
  };
6572
6819
  }
6820
+ const toolDefsJsonForEstimate = JSON.stringify(
6821
+ dispatcherTools.map((t) => ({
6822
+ name: t.name,
6823
+ description: t.description,
6824
+ inputSchema: t.inputSchema
6825
+ }))
6826
+ );
6827
+ const requestTokenEstimate = estimateTotalTokens(integrityPrompt, messages, toolDefsJsonForEstimate);
6828
+ yield pushEvent({ type: "model:request", tokens: requestTokenEstimate });
6573
6829
  const convertMessage = async (msg) => {
6574
6830
  if (msg.role === "tool") {
6575
6831
  const meta = msg.metadata;
@@ -6759,15 +7015,8 @@ ${textContent}` };
6759
7015
  }
6760
7016
  const modelInstance = this.modelProvider(modelName);
6761
7017
  const compactionConfig = resolveCompactionConfig(agent.frontmatter.compaction);
6762
- if (compactionConfig.enabled && step === 1) {
6763
- const toolDefsJson = JSON.stringify(
6764
- dispatcherTools.map((t) => ({
6765
- name: t.name,
6766
- description: t.description,
6767
- inputSchema: t.inputSchema
6768
- }))
6769
- );
6770
- const estimated = estimateTotalTokens(integrityPrompt, messages, toolDefsJson);
7018
+ if (compactionConfig.enabled && (step === 1 || step % COMPACTION_CHECK_INTERVAL_STEPS === 0)) {
7019
+ const estimated = estimateTotalTokens(integrityPrompt, messages, toolDefsJsonForEstimate);
6771
7020
  const lastReportedInput = totalInputTokens > 0 ? totalInputTokens : 0;
6772
7021
  const effectiveTokens = Math.max(estimated, lastReportedInput);
6773
7022
  if (effectiveTokens > compactionConfig.trigger * contextWindow) {
@@ -6780,11 +7029,14 @@ ${textContent}` };
6780
7029
  if (compactResult.compacted) {
6781
7030
  messages.length = 0;
6782
7031
  messages.push(...compactResult.messages);
6783
- const emittedMessages = [...compactResult.messages];
6784
- if (emittedMessages.length > 0 && emittedMessages[emittedMessages.length - 1].role === "user") {
6785
- emittedMessages.pop();
7032
+ let emittedMessages;
7033
+ if (step === 1) {
7034
+ emittedMessages = [...compactResult.messages];
7035
+ if (emittedMessages.length > 0 && emittedMessages[emittedMessages.length - 1].role === "user") {
7036
+ emittedMessages.pop();
7037
+ }
6786
7038
  }
6787
- const tokensAfterCompaction = estimateTotalTokens(integrityPrompt, messages, toolDefsJson);
7039
+ const tokensAfterCompaction = estimateTotalTokens(integrityPrompt, messages, toolDefsJsonForEstimate);
6788
7040
  latestContextTokens = tokensAfterCompaction;
6789
7041
  toolOutputEstimateSinceModel = 0;
6790
7042
  yield pushEvent({
@@ -6811,7 +7063,7 @@ ${textContent}` };
6811
7063
  const coreMessages = cachedCoreMessages;
6812
7064
  const temperature = agent.frontmatter.model?.temperature ?? 0.2;
6813
7065
  const maxTokens = agent.frontmatter.model?.maxTokens;
6814
- const cachedMessages = addPromptCacheBreakpoints(coreMessages, modelInstance);
7066
+ const cachedMessages = enablePromptCache ? addPromptCacheBreakpoints(coreMessages, modelInstance) : coreMessages;
6815
7067
  const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
6816
7068
  const result = await streamText({
6817
7069
  model: modelInstance,
@@ -6922,14 +7174,19 @@ ${textContent}` };
6922
7174
  messages.push({
6923
7175
  role: "assistant",
6924
7176
  content: fullText,
6925
- metadata: { timestamp: now(), id: randomUUID3(), step }
7177
+ metadata: { timestamp: now(), id: randomUUID3(), step, runId }
6926
7178
  });
6927
7179
  }
6928
7180
  const result_ = {
6929
7181
  status: "completed",
6930
7182
  response: responseText + fullText,
6931
7183
  steps: step,
6932
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
7184
+ tokens: {
7185
+ input: totalInputTokens,
7186
+ output: totalOutputTokens,
7187
+ cached: totalCachedTokens,
7188
+ cacheWrite: totalCacheWriteTokens
7189
+ },
6933
7190
  duration: now() - start,
6934
7191
  continuation: true,
6935
7192
  continuationMessages: [...messages],
@@ -6950,14 +7207,19 @@ ${textContent}` };
6950
7207
  messages.push({
6951
7208
  role: "assistant",
6952
7209
  content: fullText,
6953
- metadata: { timestamp: now(), id: randomUUID3(), step }
7210
+ metadata: { timestamp: now(), id: randomUUID3(), step, runId }
6954
7211
  });
6955
7212
  }
6956
7213
  const result_ = {
6957
7214
  status: "completed",
6958
7215
  response: responseText + fullText,
6959
7216
  steps: step,
6960
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
7217
+ tokens: {
7218
+ input: totalInputTokens,
7219
+ output: totalOutputTokens,
7220
+ cached: totalCachedTokens,
7221
+ cacheWrite: totalCacheWriteTokens
7222
+ },
6961
7223
  duration: now() - start,
6962
7224
  continuation: true,
6963
7225
  continuationMessages: [...messages],
@@ -6997,11 +7259,14 @@ ${textContent}` };
6997
7259
  const fullResult = await result.response;
6998
7260
  const usage = await result.usage;
6999
7261
  const toolCallsResult = await result.toolCalls;
7000
- const stepCachedTokens = usage.inputTokenDetails?.cacheReadTokens ?? 0;
7262
+ const details = usage.inputTokenDetails ?? {};
7263
+ const stepCachedTokens = typeof details.cacheReadTokens === "number" ? details.cacheReadTokens : 0;
7264
+ const stepCacheWriteTokens = typeof details.cacheWriteTokens === "number" ? details.cacheWriteTokens : typeof details.cacheCreationTokens === "number" ? details.cacheCreationTokens : typeof details.cacheCreationInputTokens === "number" ? details.cacheCreationInputTokens : 0;
7001
7265
  const stepInputTokens = usage.inputTokens ?? 0;
7002
7266
  totalInputTokens += stepInputTokens;
7003
7267
  totalOutputTokens += usage.outputTokens ?? 0;
7004
7268
  totalCachedTokens += stepCachedTokens;
7269
+ totalCacheWriteTokens += stepCacheWriteTokens;
7005
7270
  latestContextTokens = stepInputTokens;
7006
7271
  toolOutputEstimateSinceModel = 0;
7007
7272
  yield pushEvent({
@@ -7009,9 +7274,13 @@ ${textContent}` };
7009
7274
  usage: {
7010
7275
  input: stepInputTokens,
7011
7276
  output: usage.outputTokens ?? 0,
7012
- cached: stepCachedTokens
7277
+ cached: stepCachedTokens,
7278
+ cacheWrite: stepCacheWriteTokens
7013
7279
  }
7014
7280
  });
7281
+ console.info(
7282
+ `[poncho][cost] model="${modelName}" step=${step} input=${stepInputTokens} output=${usage.outputTokens ?? 0} cached=${stepCachedTokens} cacheWrite=${stepCacheWriteTokens} totals(input=${totalInputTokens}, output=${totalOutputTokens}, cached=${totalCachedTokens}, cacheWrite=${totalCacheWriteTokens})`
7283
+ );
7015
7284
  const toolCalls = toolCallsResult.map((tc) => ({
7016
7285
  id: tc.toolCallId,
7017
7286
  name: tc.toolName,
@@ -7042,7 +7311,7 @@ ${textContent}` };
7042
7311
  messages.push({
7043
7312
  role: "assistant",
7044
7313
  content: fullText,
7045
- metadata: { timestamp: now(), id: randomUUID3(), step }
7314
+ metadata: { timestamp: now(), id: randomUUID3(), step, runId }
7046
7315
  });
7047
7316
  }
7048
7317
  responseText = fullText;
@@ -7058,7 +7327,8 @@ ${textContent}` };
7058
7327
  tokens: {
7059
7328
  input: totalInputTokens,
7060
7329
  output: totalOutputTokens,
7061
- cached: totalCachedTokens
7330
+ cached: totalCachedTokens,
7331
+ cacheWrite: totalCacheWriteTokens
7062
7332
  },
7063
7333
  duration: now() - start,
7064
7334
  contextTokens: latestContextTokens + toolOutputEstimateSinceModel,
@@ -7123,7 +7393,7 @@ ${textContent}` };
7123
7393
  const assistantMsg = {
7124
7394
  role: "assistant",
7125
7395
  content: assistantContent2,
7126
- metadata: { timestamp: now(), id: randomUUID3(), step }
7396
+ metadata: { timestamp: now(), id: randomUUID3(), step, runId }
7127
7397
  };
7128
7398
  const deltaMessages = [...messages.slice(inputMessageCount), assistantMsg];
7129
7399
  yield pushEvent({
@@ -7208,14 +7478,19 @@ ${textContent}` };
7208
7478
  messages.push({
7209
7479
  role: "assistant",
7210
7480
  content: fullText,
7211
- metadata: { timestamp: now(), id: randomUUID3(), step }
7481
+ metadata: { timestamp: now(), id: randomUUID3(), step, runId }
7212
7482
  });
7213
7483
  }
7214
7484
  const result_ = {
7215
7485
  status: "completed",
7216
7486
  response: responseText + fullText,
7217
7487
  steps: step,
7218
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
7488
+ tokens: {
7489
+ input: totalInputTokens,
7490
+ output: totalOutputTokens,
7491
+ cached: totalCachedTokens,
7492
+ cacheWrite: totalCacheWriteTokens
7493
+ },
7219
7494
  duration: now() - start,
7220
7495
  continuation: true,
7221
7496
  continuationMessages: [...messages],
@@ -7246,6 +7521,20 @@ ${textContent}` };
7246
7521
  tool_name: result2.tool,
7247
7522
  content: `Tool error: ${result2.error}`
7248
7523
  });
7524
+ {
7525
+ const archive = this.archivedToolResultsByConversation.get(conversationId);
7526
+ if (archive) {
7527
+ archive[result2.callId] = {
7528
+ toolResultId: result2.callId,
7529
+ conversationId,
7530
+ toolName: result2.tool,
7531
+ toolCallId: result2.callId,
7532
+ createdAt: now(),
7533
+ sizeBytes: Buffer.byteLength(`Tool error: ${result2.error}`, "utf8"),
7534
+ payload: `Tool error: ${result2.error}`
7535
+ };
7536
+ }
7537
+ }
7249
7538
  richToolResults.push({
7250
7539
  type: "tool-result",
7251
7540
  toolCallId: result2.callId,
@@ -7271,6 +7560,21 @@ ${textContent}` };
7271
7560
  tool_name: result2.tool,
7272
7561
  content: JSON.stringify(strippedOutput ?? null)
7273
7562
  });
7563
+ {
7564
+ const archive = this.archivedToolResultsByConversation.get(conversationId);
7565
+ if (archive) {
7566
+ const payload = JSON.stringify(result2.output ?? null);
7567
+ archive[result2.callId] = {
7568
+ toolResultId: result2.callId,
7569
+ conversationId,
7570
+ toolName: result2.tool,
7571
+ toolCallId: result2.callId,
7572
+ createdAt: now(),
7573
+ sizeBytes: Buffer.byteLength(payload, "utf8"),
7574
+ payload
7575
+ };
7576
+ }
7577
+ }
7274
7578
  if (mediaItems.length > 0) {
7275
7579
  richToolResults.push({
7276
7580
  type: "tool-result",
@@ -7305,9 +7609,15 @@ ${textContent}` };
7305
7609
  messages.push({
7306
7610
  role: "assistant",
7307
7611
  content: assistantContent,
7308
- metadata: { timestamp: now(), id: randomUUID3(), step }
7612
+ metadata: { timestamp: now(), id: randomUUID3(), step, runId }
7309
7613
  });
7310
- const toolMsgMeta = { timestamp: now(), id: randomUUID3(), step, _richToolResults: richToolResults };
7614
+ const toolMsgMeta = {
7615
+ timestamp: now(),
7616
+ id: randomUUID3(),
7617
+ step,
7618
+ runId,
7619
+ _richToolResults: richToolResults
7620
+ };
7311
7621
  messages.push({
7312
7622
  role: "tool",
7313
7623
  content: JSON.stringify(toolResultsForModel),
@@ -7318,7 +7628,12 @@ ${textContent}` };
7318
7628
  status: "completed",
7319
7629
  response: responseText + fullText,
7320
7630
  steps: step,
7321
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
7631
+ tokens: {
7632
+ input: totalInputTokens,
7633
+ output: totalOutputTokens,
7634
+ cached: totalCachedTokens,
7635
+ cacheWrite: totalCacheWriteTokens
7636
+ },
7322
7637
  duration: now() - start,
7323
7638
  continuation: true,
7324
7639
  continuationMessages: [...messages],
@@ -7379,7 +7694,12 @@ ${this.skillFingerprint}`;
7379
7694
  status: "completed",
7380
7695
  response: responseText,
7381
7696
  steps: maxSteps,
7382
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
7697
+ tokens: {
7698
+ input: totalInputTokens,
7699
+ output: totalOutputTokens,
7700
+ cached: totalCachedTokens,
7701
+ cacheWrite: totalCacheWriteTokens
7702
+ },
7383
7703
  duration: now() - start,
7384
7704
  continuation: true,
7385
7705
  continuationMessages: [...messages],
@@ -8699,7 +9019,7 @@ var createConversationStore = (config, options) => {
8699
9019
  };
8700
9020
 
8701
9021
  // src/index.ts
8702
- import { defineTool as defineTool7 } from "@poncho-ai/sdk";
9022
+ import { defineTool as defineTool8 } from "@poncho-ai/sdk";
8703
9023
  export {
8704
9024
  AgentHarness,
8705
9025
  InMemoryConversationStore,
@@ -8732,7 +9052,7 @@ export {
8732
9052
  createSubagentTools,
8733
9053
  createUploadStore,
8734
9054
  createWriteTool,
8735
- defineTool7 as defineTool,
9055
+ defineTool8 as defineTool,
8736
9056
  deleteOpenAICodexSession,
8737
9057
  deriveUploadKey,
8738
9058
  ensureAgentIdentity,