@poncho-ai/harness 0.31.1 → 0.31.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -361,9 +361,11 @@ var OVERHEAD_MULTIPLIER = 1.15;
361
361
  var MIN_COMPACTABLE_MESSAGES = 4;
362
362
  var DEFAULT_COMPACTION_CONFIG = {
363
363
  enabled: true,
364
- trigger: 0.8,
365
- keepRecentMessages: 6
364
+ trigger: 0.75,
365
+ keepRecentMessages: 4
366
366
  };
367
+ var SUMMARIZATION_MESSAGE_TRUNCATION_CHARS = 1200;
368
+ var SUMMARIZATION_MAX_OUTPUT_TOKENS = 768;
367
369
  var SUMMARIZATION_PROMPT = `Summarize the following conversation into a structured working state that allows continuation without re-asking questions. Include:
368
370
 
369
371
  1. **User intent**: What the user originally asked for and any refinements
@@ -418,7 +420,7 @@ var buildSummarizationMessages = (messagesToCompact, instructions) => {
418
420
  const conversationLines = [];
419
421
  for (const msg of messagesToCompact) {
420
422
  const text = getTextContent(msg);
421
- const truncated = text.length > 2e3 ? text.slice(0, 2e3) + "\n...[truncated]" : text;
423
+ const truncated = text.length > SUMMARIZATION_MESSAGE_TRUNCATION_CHARS ? text.slice(0, SUMMARIZATION_MESSAGE_TRUNCATION_CHARS) + "\n...[truncated]" : text;
422
424
  conversationLines.push(`[${msg.role}]: ${truncated}`);
423
425
  }
424
426
  const prompt = instructions ? `${SUMMARIZATION_PROMPT}
@@ -473,7 +475,7 @@ var compactMessages = async (model, messages, config, options) => {
473
475
  const result = await generateText({
474
476
  model,
475
477
  messages: summarizationMessages,
476
- maxOutputTokens: 2048
478
+ maxOutputTokens: SUMMARIZATION_MAX_OUTPUT_TOKENS
477
479
  });
478
480
  const summary = result.text.trim();
479
481
  if (!summary) {
@@ -1658,7 +1660,7 @@ Logs print to console:
1658
1660
  [event] run:started {"type":"run:started","runId":"run_abc123","agentId":"my-agent"}
1659
1661
  [event] tool:started {"type":"tool:started","tool":"read_file","input":{"path":"README.md"}}
1660
1662
  [event] tool:completed {"type":"tool:completed","tool":"read_file","duration":45,"output":{"path":"README.md","content":"..."}}
1661
- [event] run:completed {"type":"run:completed","runId":"run_abc123","result":{"status":"completed","response":"...","steps":3,"tokens":{"input":1500,"output":840}}}
1663
+ [event] run:completed {"type":"run:completed","runId":"run_abc123","result":{"status":"completed","response":"...","steps":3,"tokens":{"input":1500,"output":840,"cached":1200,"cacheWrite":300}}}
1662
1664
  \`\`\`
1663
1665
 
1664
1666
  ### Production telemetry (generic OTLP)
@@ -2110,7 +2112,7 @@ var ponchoDocsTool = defineTool({
2110
2112
  import { randomUUID as randomUUID3 } from "crypto";
2111
2113
  import { readFile as readFile9 } from "fs/promises";
2112
2114
  import { resolve as resolve11 } from "path";
2113
- import { getTextContent as getTextContent2 } from "@poncho-ai/sdk";
2115
+ import { defineTool as defineTool7, getTextContent as getTextContent2 } from "@poncho-ai/sdk";
2114
2116
 
2115
2117
  // src/upload-store.ts
2116
2118
  import { createHash as createHash2 } from "crypto";
@@ -5015,6 +5017,9 @@ var TelemetryEmitter = class {
5015
5017
  if (otlp) {
5016
5018
  await this.sendOtlp(event, otlp);
5017
5019
  }
5020
+ if (event.type === "model:chunk") {
5021
+ return;
5022
+ }
5018
5023
  process.stdout.write(`[event] ${event.type} ${sanitizeEventForLog(event)}
5019
5024
  `);
5020
5025
  }
@@ -5141,8 +5146,12 @@ var ToolDispatcher = class {
5141
5146
 
5142
5147
  // src/harness.ts
5143
5148
  var now = () => Date.now();
5144
- var FIRST_CHUNK_TIMEOUT_MS = 3e5;
5145
- var MAX_TRANSIENT_STEP_RETRIES = 2;
5149
+ var FIRST_CHUNK_TIMEOUT_MS = 9e4;
5150
+ var MAX_TRANSIENT_STEP_RETRIES = 1;
5151
+ var COMPACTION_CHECK_INTERVAL_STEPS = 3;
5152
+ var TOOL_RESULT_ARCHIVE_PARAM = "__toolResultArchive";
5153
+ var TOOL_RESULT_TRUNCATED_PREFIX = "[TRUNCATED_TOOL_RESULT]";
5154
+ var TOOL_RESULT_PREVIEW_CHARS = 700;
5146
5155
  var FirstChunkTimeoutError = class extends Error {
5147
5156
  constructor(modelName, timeoutMs) {
5148
5157
  super(
@@ -5201,18 +5210,11 @@ var isRetryableModelError = (error) => {
5201
5210
  if (error instanceof FirstChunkTimeoutError) {
5202
5211
  return true;
5203
5212
  }
5204
- if (isNoOutputGeneratedError(error)) {
5205
- return true;
5206
- }
5207
5213
  const statusCode = getErrorStatusCode(error);
5208
5214
  if (typeof statusCode === "number") {
5209
5215
  return statusCode === 429 || statusCode >= 500;
5210
5216
  }
5211
- if (!error || typeof error !== "object") {
5212
- return false;
5213
- }
5214
- const maybeMessage = "message" in error ? String(error.message ?? "").toLowerCase() : "";
5215
- return maybeMessage.includes("internal server error") || maybeMessage.includes("service unavailable") || maybeMessage.includes("gateway timeout") || maybeMessage.includes("rate limit");
5217
+ return false;
5216
5218
  };
5217
5219
  var toRunError = (error) => {
5218
5220
  const statusCode = getErrorStatusCode(error);
@@ -5264,6 +5266,58 @@ var toProviderSafeToolName = (originalName, index, used) => {
5264
5266
  used.add(candidate);
5265
5267
  return candidate;
5266
5268
  };
5269
+ var isToolResultRow = (value) => {
5270
+ if (typeof value !== "object" || value === null) return false;
5271
+ const row = value;
5272
+ return typeof row.tool_use_id === "string" && typeof row.tool_name === "string" && typeof row.content === "string";
5273
+ };
5274
+ var readArchiveFromParameters = (parameters) => {
5275
+ const raw = parameters?.[TOOL_RESULT_ARCHIVE_PARAM];
5276
+ if (typeof raw !== "object" || raw === null) return {};
5277
+ const out = {};
5278
+ for (const [key, value] of Object.entries(raw)) {
5279
+ if (typeof value !== "object" || value === null) continue;
5280
+ const row = value;
5281
+ if (typeof row.toolResultId !== "string" || typeof row.conversationId !== "string" || typeof row.toolName !== "string" || typeof row.toolCallId !== "string" || typeof row.createdAt !== "number" || typeof row.sizeBytes !== "number" || typeof row.payload !== "string") {
5282
+ continue;
5283
+ }
5284
+ out[key] = {
5285
+ toolResultId: row.toolResultId,
5286
+ conversationId: row.conversationId,
5287
+ toolName: row.toolName,
5288
+ toolCallId: row.toolCallId,
5289
+ createdAt: row.createdAt,
5290
+ sizeBytes: row.sizeBytes,
5291
+ payload: row.payload
5292
+ };
5293
+ }
5294
+ return out;
5295
+ };
5296
+ var makeTruncatedToolResultNotice = (toolResultId, toolName, payload) => {
5297
+ const preview = payload.slice(0, TOOL_RESULT_PREVIEW_CHARS);
5298
+ const omittedChars = Math.max(0, payload.length - preview.length);
5299
+ return `${TOOL_RESULT_TRUNCATED_PREFIX} id="${toolResultId}" tool="${toolName}" omittedChars=${omittedChars}
5300
+ ${preview}${omittedChars > 0 ? "\n...[truncated]" : ""}`;
5301
+ };
5302
+ var hasUntruncatedToolResults = (messages) => {
5303
+ for (const msg of messages) {
5304
+ if (msg.role !== "tool" || typeof msg.content !== "string") continue;
5305
+ let parsed;
5306
+ try {
5307
+ parsed = JSON.parse(msg.content);
5308
+ } catch {
5309
+ continue;
5310
+ }
5311
+ if (!Array.isArray(parsed)) continue;
5312
+ for (const row of parsed) {
5313
+ if (!isToolResultRow(row)) continue;
5314
+ if (!row.content.startsWith(TOOL_RESULT_TRUNCATED_PREFIX)) {
5315
+ return true;
5316
+ }
5317
+ }
5318
+ }
5319
+ return false;
5320
+ };
5267
5321
  var DEVELOPMENT_MODE_CONTEXT = `## Development Mode Context
5268
5322
 
5269
5323
  You are running locally in development mode. Treat this as an editable agent workspace.
@@ -5595,6 +5649,7 @@ var AgentHarness = class _AgentHarness {
5595
5649
  agentFileFingerprint = "";
5596
5650
  mcpBridge;
5597
5651
  subagentManager;
5652
+ archivedToolResultsByConversation = /* @__PURE__ */ new Map();
5598
5653
  resolveToolAccess(toolName) {
5599
5654
  const tools = this.loadedConfig?.tools;
5600
5655
  if (!tools) return true;
@@ -5666,6 +5721,56 @@ var AgentHarness = class _AgentHarness {
5666
5721
  if (this.environment === "development" && this.isToolEnabled("poncho_docs")) {
5667
5722
  this.registerIfMissing(ponchoDocsTool);
5668
5723
  }
5724
+ if (this.isToolEnabled("get_tool_result_by_id")) {
5725
+ this.registerIfMissing(this.createGetToolResultByIdTool());
5726
+ }
5727
+ }
5728
+ createGetToolResultByIdTool() {
5729
+ return defineTool7({
5730
+ name: "get_tool_result_by_id",
5731
+ description: "Retrieve a previously archived full tool result by id for the current conversation. Use this when older tool outputs were truncated in prompt history.",
5732
+ inputSchema: {
5733
+ type: "object",
5734
+ properties: {
5735
+ toolResultId: { type: "string", description: "Archived tool result id to retrieve" },
5736
+ offset: { type: "number", description: "Optional character offset for paging large payloads" },
5737
+ limit: { type: "number", description: "Optional maximum characters to return (default 6000, max 20000)" }
5738
+ },
5739
+ required: ["toolResultId"],
5740
+ additionalProperties: false
5741
+ },
5742
+ handler: async (input, context) => {
5743
+ const conversationId = context.conversationId ?? "__default__";
5744
+ const archive = this.archivedToolResultsByConversation.get(conversationId) ?? {};
5745
+ const toolResultId = typeof input.toolResultId === "string" ? input.toolResultId : "";
5746
+ const record = archive[toolResultId];
5747
+ if (!record) {
5748
+ console.info(
5749
+ `[poncho][cost] Archived tool result lookup miss: id="${toolResultId}" conversation="${conversationId}"`
5750
+ );
5751
+ return {
5752
+ error: `No archived tool result found for id "${toolResultId}" in this conversation.`
5753
+ };
5754
+ }
5755
+ const offset = Math.max(0, Number(input.offset) || 0);
5756
+ const limit = Math.min(Math.max(Number(input.limit) || 6e3, 1), 2e4);
5757
+ const end = Math.min(record.payload.length, offset + limit);
5758
+ const chunk = record.payload.slice(offset, end);
5759
+ console.info(
5760
+ `[poncho][cost] Archived tool result lookup hit: id="${toolResultId}" conversation="${conversationId}" offset=${offset} returned=${chunk.length} total=${record.payload.length}`
5761
+ );
5762
+ return {
5763
+ toolResultId: record.toolResultId,
5764
+ toolName: record.toolName,
5765
+ toolCallId: record.toolCallId,
5766
+ totalChars: record.payload.length,
5767
+ offset,
5768
+ returnedChars: chunk.length,
5769
+ hasMore: end < record.payload.length,
5770
+ payload: chunk
5771
+ };
5772
+ }
5773
+ });
5669
5774
  }
5670
5775
  shouldEnableWriteTool() {
5671
5776
  const override = process.env.PONCHO_FS_WRITE?.toLowerCase();
@@ -5690,6 +5795,116 @@ var AgentHarness = class _AgentHarness {
5690
5795
  get frontmatter() {
5691
5796
  return this.parsedAgent?.frontmatter;
5692
5797
  }
5798
+ getToolResultArchive(conversationId) {
5799
+ const archive = this.archivedToolResultsByConversation.get(conversationId);
5800
+ return archive ? { ...archive } : {};
5801
+ }
5802
+ seedToolResultArchive(conversationId, parameters) {
5803
+ const seeded = readArchiveFromParameters(parameters);
5804
+ const existing = this.archivedToolResultsByConversation.get(conversationId) ?? {};
5805
+ const merged = { ...existing, ...seeded };
5806
+ this.archivedToolResultsByConversation.set(conversationId, merged);
5807
+ return merged;
5808
+ }
5809
+ truncateHistoricalToolResults(messages, conversationId) {
5810
+ let latestRunId;
5811
+ for (let i = messages.length - 1; i >= 0; i -= 1) {
5812
+ const msg = messages[i];
5813
+ const meta = msg.metadata;
5814
+ const runId = typeof meta?.runId === "string" ? meta.runId : void 0;
5815
+ if (runId) {
5816
+ latestRunId = runId;
5817
+ break;
5818
+ }
5819
+ }
5820
+ if (!latestRunId) {
5821
+ return { changed: false, truncatedCount: 0, archivedCount: 0, omittedChars: 0 };
5822
+ }
5823
+ const archive = this.archivedToolResultsByConversation.get(conversationId) ?? {};
5824
+ this.archivedToolResultsByConversation.set(conversationId, archive);
5825
+ let changed = false;
5826
+ let truncatedCount = 0;
5827
+ let archivedCount = 0;
5828
+ let omittedChars = 0;
5829
+ for (const msg of messages) {
5830
+ if (msg.role !== "tool" || typeof msg.content !== "string") continue;
5831
+ const meta = msg.metadata;
5832
+ const runId = typeof meta?.runId === "string" ? meta.runId : void 0;
5833
+ if (runId === latestRunId) continue;
5834
+ let parsed;
5835
+ try {
5836
+ parsed = JSON.parse(msg.content);
5837
+ } catch {
5838
+ continue;
5839
+ }
5840
+ if (!Array.isArray(parsed)) continue;
5841
+ let rowChanged = false;
5842
+ const nextRows = parsed.map((row) => {
5843
+ if (!isToolResultRow(row)) return row;
5844
+ if (row.content.startsWith(TOOL_RESULT_TRUNCATED_PREFIX)) return row;
5845
+ if (this.shouldPreserveSkillToolResult(row)) return row;
5846
+ const toolResultId = row.tool_use_id;
5847
+ if (!archive[toolResultId]) {
5848
+ archive[toolResultId] = {
5849
+ toolResultId,
5850
+ conversationId,
5851
+ toolName: row.tool_name,
5852
+ toolCallId: row.tool_use_id,
5853
+ createdAt: now(),
5854
+ sizeBytes: Buffer.byteLength(row.content, "utf8"),
5855
+ payload: row.content
5856
+ };
5857
+ archivedCount += 1;
5858
+ }
5859
+ const omitted = Math.max(0, row.content.length - TOOL_RESULT_PREVIEW_CHARS);
5860
+ omittedChars += omitted;
5861
+ truncatedCount += 1;
5862
+ rowChanged = true;
5863
+ return {
5864
+ ...row,
5865
+ content: makeTruncatedToolResultNotice(toolResultId, row.tool_name, row.content)
5866
+ };
5867
+ });
5868
+ if (rowChanged) {
5869
+ msg.content = JSON.stringify(nextRows);
5870
+ if (msg.metadata && typeof msg.metadata === "object") {
5871
+ const meta2 = msg.metadata;
5872
+ if ("_richToolResults" in meta2) {
5873
+ delete meta2._richToolResults;
5874
+ }
5875
+ }
5876
+ changed = true;
5877
+ }
5878
+ }
5879
+ return { changed, truncatedCount, archivedCount, omittedChars };
5880
+ }
5881
+ shouldPreserveSkillToolResult(row) {
5882
+ if (row.tool_name.startsWith("todo_")) {
5883
+ return true;
5884
+ }
5885
+ if (row.tool_name !== "activate_skill" && row.tool_name !== "deactivate_skill") {
5886
+ return false;
5887
+ }
5888
+ const content = row.content.trim();
5889
+ if (content.startsWith("Tool error:")) {
5890
+ return false;
5891
+ }
5892
+ try {
5893
+ const parsed = JSON.parse(content);
5894
+ const skill = typeof parsed.skill === "string" ? parsed.skill : void 0;
5895
+ if (skill && this.activeSkillNames.has(skill)) {
5896
+ return true;
5897
+ }
5898
+ const activeSkills = Array.isArray(parsed.activeSkills) ? parsed.activeSkills.filter((v) => typeof v === "string") : [];
5899
+ for (const name of activeSkills) {
5900
+ if (this.activeSkillNames.has(name)) {
5901
+ return true;
5902
+ }
5903
+ }
5904
+ } catch {
5905
+ }
5906
+ return false;
5907
+ }
5693
5908
  async getTodos(conversationId) {
5694
5909
  if (!this.todoStore) return [];
5695
5910
  return this.todoStore.get(conversationId);
@@ -6358,12 +6573,31 @@ var AgentHarness = class _AgentHarness {
6358
6573
  let agent = this.parsedAgent;
6359
6574
  const runId = `run_${randomUUID3()}`;
6360
6575
  const start = now();
6361
- const maxSteps = agent.frontmatter.limits?.maxSteps ?? 50;
6576
+ const maxSteps = agent.frontmatter.limits?.maxSteps ?? 20;
6362
6577
  const configuredTimeout = agent.frontmatter.limits?.timeout;
6363
6578
  const timeoutMs = this.environment === "development" && configuredTimeout == null ? 0 : (configuredTimeout ?? 300) * 1e3;
6364
6579
  const platformMaxDurationSec = Number(process.env.PONCHO_MAX_DURATION) || 0;
6365
6580
  const softDeadlineMs = input.disableSoftDeadline || platformMaxDurationSec <= 0 ? 0 : platformMaxDurationSec * 800;
6366
6581
  const messages = [...input.messages ?? []];
6582
+ const conversationId = input.conversationId ?? "__default__";
6583
+ this.seedToolResultArchive(conversationId, input.parameters);
6584
+ const truncationSummary = this.truncateHistoricalToolResults(messages, conversationId);
6585
+ if (truncationSummary.changed) {
6586
+ console.info(
6587
+ `[poncho][cost] Truncated ${truncationSummary.truncatedCount} historical tool result(s) (archived_new=${truncationSummary.archivedCount}, omitted_chars=${truncationSummary.omittedChars}) for conversation="${conversationId}"`
6588
+ );
6589
+ }
6590
+ const hasFullToolResults = hasUntruncatedToolResults(messages);
6591
+ const enablePromptCache = !hasFullToolResults;
6592
+ if (!enablePromptCache) {
6593
+ console.info(
6594
+ `[poncho][cost] Prompt cache write disabled for run "${runId}" (untruncated tool results present in history).`
6595
+ );
6596
+ } else {
6597
+ console.info(
6598
+ `[poncho][cost] Prompt cache write enabled for run "${runId}" (history has no untruncated tool results).`
6599
+ );
6600
+ }
6367
6601
  const inputMessageCount = messages.length;
6368
6602
  const events = [];
6369
6603
  const renderCurrentAgentPrompt = () => renderAgentPrompt(this.parsedAgent, {
@@ -6443,7 +6677,6 @@ ${this.skillFingerprint}`;
6443
6677
  const browserEventQueue = [];
6444
6678
  const browserCleanups = [];
6445
6679
  const browserSession = this._browserSession;
6446
- const conversationId = input.conversationId ?? "__default__";
6447
6680
  if (browserSession) {
6448
6681
  browserCleanups.push(
6449
6682
  browserSession.onFrame(conversationId, (frame) => {
@@ -6510,6 +6743,7 @@ ${this.skillFingerprint}`;
6510
6743
  let totalInputTokens = 0;
6511
6744
  let totalOutputTokens = 0;
6512
6745
  let totalCachedTokens = 0;
6746
+ let totalCacheWriteTokens = 0;
6513
6747
  let transientStepRetryCount = 0;
6514
6748
  let latestContextTokens = 0;
6515
6749
  let toolOutputEstimateSinceModel = 0;
@@ -6538,7 +6772,12 @@ ${this.skillFingerprint}`;
6538
6772
  status: "completed",
6539
6773
  response: responseText,
6540
6774
  steps: step - 1,
6541
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
6775
+ tokens: {
6776
+ input: totalInputTokens,
6777
+ output: totalOutputTokens,
6778
+ cached: totalCachedTokens,
6779
+ cacheWrite: totalCacheWriteTokens
6780
+ },
6542
6781
  duration: now() - start,
6543
6782
  continuation: true,
6544
6783
  continuationMessages: [...messages],
@@ -6551,7 +6790,6 @@ ${this.skillFingerprint}`;
6551
6790
  }
6552
6791
  const stepStart = now();
6553
6792
  yield pushEvent({ type: "step:started", step });
6554
- yield pushEvent({ type: "model:request", tokens: 0 });
6555
6793
  const dispatcherTools = this.dispatcher.list();
6556
6794
  const exposedToolNames = /* @__PURE__ */ new Map();
6557
6795
  const usedProviderToolNames = /* @__PURE__ */ new Set();
@@ -6570,6 +6808,15 @@ ${this.skillFingerprint}`;
6570
6808
  inputSchema: jsonSchemaToZod(tool.inputSchema)
6571
6809
  };
6572
6810
  }
6811
+ const toolDefsJsonForEstimate = JSON.stringify(
6812
+ dispatcherTools.map((t) => ({
6813
+ name: t.name,
6814
+ description: t.description,
6815
+ inputSchema: t.inputSchema
6816
+ }))
6817
+ );
6818
+ const requestTokenEstimate = estimateTotalTokens(integrityPrompt, messages, toolDefsJsonForEstimate);
6819
+ yield pushEvent({ type: "model:request", tokens: requestTokenEstimate });
6573
6820
  const convertMessage = async (msg) => {
6574
6821
  if (msg.role === "tool") {
6575
6822
  const meta = msg.metadata;
@@ -6759,15 +7006,8 @@ ${textContent}` };
6759
7006
  }
6760
7007
  const modelInstance = this.modelProvider(modelName);
6761
7008
  const compactionConfig = resolveCompactionConfig(agent.frontmatter.compaction);
6762
- if (compactionConfig.enabled && step === 1) {
6763
- const toolDefsJson = JSON.stringify(
6764
- dispatcherTools.map((t) => ({
6765
- name: t.name,
6766
- description: t.description,
6767
- inputSchema: t.inputSchema
6768
- }))
6769
- );
6770
- const estimated = estimateTotalTokens(integrityPrompt, messages, toolDefsJson);
7009
+ if (compactionConfig.enabled && (step === 1 || step % COMPACTION_CHECK_INTERVAL_STEPS === 0)) {
7010
+ const estimated = estimateTotalTokens(integrityPrompt, messages, toolDefsJsonForEstimate);
6771
7011
  const lastReportedInput = totalInputTokens > 0 ? totalInputTokens : 0;
6772
7012
  const effectiveTokens = Math.max(estimated, lastReportedInput);
6773
7013
  if (effectiveTokens > compactionConfig.trigger * contextWindow) {
@@ -6780,11 +7020,14 @@ ${textContent}` };
6780
7020
  if (compactResult.compacted) {
6781
7021
  messages.length = 0;
6782
7022
  messages.push(...compactResult.messages);
6783
- const emittedMessages = [...compactResult.messages];
6784
- if (emittedMessages.length > 0 && emittedMessages[emittedMessages.length - 1].role === "user") {
6785
- emittedMessages.pop();
7023
+ let emittedMessages;
7024
+ if (step === 1) {
7025
+ emittedMessages = [...compactResult.messages];
7026
+ if (emittedMessages.length > 0 && emittedMessages[emittedMessages.length - 1].role === "user") {
7027
+ emittedMessages.pop();
7028
+ }
6786
7029
  }
6787
- const tokensAfterCompaction = estimateTotalTokens(integrityPrompt, messages, toolDefsJson);
7030
+ const tokensAfterCompaction = estimateTotalTokens(integrityPrompt, messages, toolDefsJsonForEstimate);
6788
7031
  latestContextTokens = tokensAfterCompaction;
6789
7032
  toolOutputEstimateSinceModel = 0;
6790
7033
  yield pushEvent({
@@ -6811,7 +7054,7 @@ ${textContent}` };
6811
7054
  const coreMessages = cachedCoreMessages;
6812
7055
  const temperature = agent.frontmatter.model?.temperature ?? 0.2;
6813
7056
  const maxTokens = agent.frontmatter.model?.maxTokens;
6814
- const cachedMessages = addPromptCacheBreakpoints(coreMessages, modelInstance);
7057
+ const cachedMessages = enablePromptCache ? addPromptCacheBreakpoints(coreMessages, modelInstance) : coreMessages;
6815
7058
  const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
6816
7059
  const result = await streamText({
6817
7060
  model: modelInstance,
@@ -6929,7 +7172,12 @@ ${textContent}` };
6929
7172
  status: "completed",
6930
7173
  response: responseText + fullText,
6931
7174
  steps: step,
6932
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
7175
+ tokens: {
7176
+ input: totalInputTokens,
7177
+ output: totalOutputTokens,
7178
+ cached: totalCachedTokens,
7179
+ cacheWrite: totalCacheWriteTokens
7180
+ },
6933
7181
  duration: now() - start,
6934
7182
  continuation: true,
6935
7183
  continuationMessages: [...messages],
@@ -6957,7 +7205,12 @@ ${textContent}` };
6957
7205
  status: "completed",
6958
7206
  response: responseText + fullText,
6959
7207
  steps: step,
6960
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
7208
+ tokens: {
7209
+ input: totalInputTokens,
7210
+ output: totalOutputTokens,
7211
+ cached: totalCachedTokens,
7212
+ cacheWrite: totalCacheWriteTokens
7213
+ },
6961
7214
  duration: now() - start,
6962
7215
  continuation: true,
6963
7216
  continuationMessages: [...messages],
@@ -6997,11 +7250,14 @@ ${textContent}` };
6997
7250
  const fullResult = await result.response;
6998
7251
  const usage = await result.usage;
6999
7252
  const toolCallsResult = await result.toolCalls;
7000
- const stepCachedTokens = usage.inputTokenDetails?.cacheReadTokens ?? 0;
7253
+ const details = usage.inputTokenDetails ?? {};
7254
+ const stepCachedTokens = typeof details.cacheReadTokens === "number" ? details.cacheReadTokens : 0;
7255
+ const stepCacheWriteTokens = typeof details.cacheWriteTokens === "number" ? details.cacheWriteTokens : typeof details.cacheCreationTokens === "number" ? details.cacheCreationTokens : typeof details.cacheCreationInputTokens === "number" ? details.cacheCreationInputTokens : 0;
7001
7256
  const stepInputTokens = usage.inputTokens ?? 0;
7002
7257
  totalInputTokens += stepInputTokens;
7003
7258
  totalOutputTokens += usage.outputTokens ?? 0;
7004
7259
  totalCachedTokens += stepCachedTokens;
7260
+ totalCacheWriteTokens += stepCacheWriteTokens;
7005
7261
  latestContextTokens = stepInputTokens;
7006
7262
  toolOutputEstimateSinceModel = 0;
7007
7263
  yield pushEvent({
@@ -7009,9 +7265,13 @@ ${textContent}` };
7009
7265
  usage: {
7010
7266
  input: stepInputTokens,
7011
7267
  output: usage.outputTokens ?? 0,
7012
- cached: stepCachedTokens
7268
+ cached: stepCachedTokens,
7269
+ cacheWrite: stepCacheWriteTokens
7013
7270
  }
7014
7271
  });
7272
+ console.info(
7273
+ `[poncho][cost] model="${modelName}" step=${step} input=${stepInputTokens} output=${usage.outputTokens ?? 0} cached=${stepCachedTokens} cacheWrite=${stepCacheWriteTokens} totals(input=${totalInputTokens}, output=${totalOutputTokens}, cached=${totalCachedTokens}, cacheWrite=${totalCacheWriteTokens})`
7274
+ );
7015
7275
  const toolCalls = toolCallsResult.map((tc) => ({
7016
7276
  id: tc.toolCallId,
7017
7277
  name: tc.toolName,
@@ -7058,7 +7318,8 @@ ${textContent}` };
7058
7318
  tokens: {
7059
7319
  input: totalInputTokens,
7060
7320
  output: totalOutputTokens,
7061
- cached: totalCachedTokens
7321
+ cached: totalCachedTokens,
7322
+ cacheWrite: totalCacheWriteTokens
7062
7323
  },
7063
7324
  duration: now() - start,
7064
7325
  contextTokens: latestContextTokens + toolOutputEstimateSinceModel,
@@ -7215,7 +7476,12 @@ ${textContent}` };
7215
7476
  status: "completed",
7216
7477
  response: responseText + fullText,
7217
7478
  steps: step,
7218
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
7479
+ tokens: {
7480
+ input: totalInputTokens,
7481
+ output: totalOutputTokens,
7482
+ cached: totalCachedTokens,
7483
+ cacheWrite: totalCacheWriteTokens
7484
+ },
7219
7485
  duration: now() - start,
7220
7486
  continuation: true,
7221
7487
  continuationMessages: [...messages],
@@ -7246,6 +7512,20 @@ ${textContent}` };
7246
7512
  tool_name: result2.tool,
7247
7513
  content: `Tool error: ${result2.error}`
7248
7514
  });
7515
+ {
7516
+ const archive = this.archivedToolResultsByConversation.get(conversationId);
7517
+ if (archive) {
7518
+ archive[result2.callId] = {
7519
+ toolResultId: result2.callId,
7520
+ conversationId,
7521
+ toolName: result2.tool,
7522
+ toolCallId: result2.callId,
7523
+ createdAt: now(),
7524
+ sizeBytes: Buffer.byteLength(`Tool error: ${result2.error}`, "utf8"),
7525
+ payload: `Tool error: ${result2.error}`
7526
+ };
7527
+ }
7528
+ }
7249
7529
  richToolResults.push({
7250
7530
  type: "tool-result",
7251
7531
  toolCallId: result2.callId,
@@ -7271,6 +7551,21 @@ ${textContent}` };
7271
7551
  tool_name: result2.tool,
7272
7552
  content: JSON.stringify(strippedOutput ?? null)
7273
7553
  });
7554
+ {
7555
+ const archive = this.archivedToolResultsByConversation.get(conversationId);
7556
+ if (archive) {
7557
+ const payload = JSON.stringify(result2.output ?? null);
7558
+ archive[result2.callId] = {
7559
+ toolResultId: result2.callId,
7560
+ conversationId,
7561
+ toolName: result2.tool,
7562
+ toolCallId: result2.callId,
7563
+ createdAt: now(),
7564
+ sizeBytes: Buffer.byteLength(payload, "utf8"),
7565
+ payload
7566
+ };
7567
+ }
7568
+ }
7274
7569
  if (mediaItems.length > 0) {
7275
7570
  richToolResults.push({
7276
7571
  type: "tool-result",
@@ -7305,9 +7600,15 @@ ${textContent}` };
7305
7600
  messages.push({
7306
7601
  role: "assistant",
7307
7602
  content: assistantContent,
7308
- metadata: { timestamp: now(), id: randomUUID3(), step }
7603
+ metadata: { timestamp: now(), id: randomUUID3(), step, runId }
7309
7604
  });
7310
- const toolMsgMeta = { timestamp: now(), id: randomUUID3(), step, _richToolResults: richToolResults };
7605
+ const toolMsgMeta = {
7606
+ timestamp: now(),
7607
+ id: randomUUID3(),
7608
+ step,
7609
+ runId,
7610
+ _richToolResults: richToolResults
7611
+ };
7311
7612
  messages.push({
7312
7613
  role: "tool",
7313
7614
  content: JSON.stringify(toolResultsForModel),
@@ -7318,7 +7619,12 @@ ${textContent}` };
7318
7619
  status: "completed",
7319
7620
  response: responseText + fullText,
7320
7621
  steps: step,
7321
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
7622
+ tokens: {
7623
+ input: totalInputTokens,
7624
+ output: totalOutputTokens,
7625
+ cached: totalCachedTokens,
7626
+ cacheWrite: totalCacheWriteTokens
7627
+ },
7322
7628
  duration: now() - start,
7323
7629
  continuation: true,
7324
7630
  continuationMessages: [...messages],
@@ -7379,7 +7685,12 @@ ${this.skillFingerprint}`;
7379
7685
  status: "completed",
7380
7686
  response: responseText,
7381
7687
  steps: maxSteps,
7382
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
7688
+ tokens: {
7689
+ input: totalInputTokens,
7690
+ output: totalOutputTokens,
7691
+ cached: totalCachedTokens,
7692
+ cacheWrite: totalCacheWriteTokens
7693
+ },
7383
7694
  duration: now() - start,
7384
7695
  continuation: true,
7385
7696
  continuationMessages: [...messages],
@@ -8699,7 +9010,7 @@ var createConversationStore = (config, options) => {
8699
9010
  };
8700
9011
 
8701
9012
  // src/index.ts
8702
- import { defineTool as defineTool7 } from "@poncho-ai/sdk";
9013
+ import { defineTool as defineTool8 } from "@poncho-ai/sdk";
8703
9014
  export {
8704
9015
  AgentHarness,
8705
9016
  InMemoryConversationStore,
@@ -8732,7 +9043,7 @@ export {
8732
9043
  createSubagentTools,
8733
9044
  createUploadStore,
8734
9045
  createWriteTool,
8735
- defineTool7 as defineTool,
9046
+ defineTool8 as defineTool,
8736
9047
  deleteOpenAICodexSession,
8737
9048
  deriveUploadKey,
8738
9049
  ensureAgentIdentity,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@poncho-ai/harness",
3
- "version": "0.31.1",
3
+ "version": "0.31.2",
4
4
  "description": "Agent execution runtime - conversation loop, tool dispatch, streaming",
5
5
  "repository": {
6
6
  "type": "git",
@@ -34,7 +34,7 @@
34
34
  "redis": "^5.10.0",
35
35
  "yaml": "^2.4.0",
36
36
  "zod": "^3.22.0",
37
- "@poncho-ai/sdk": "1.7.0"
37
+ "@poncho-ai/sdk": "1.7.1"
38
38
  },
39
39
  "devDependencies": {
40
40
  "@types/mustache": "^4.2.6",