@poncho-ai/harness 0.31.0 → 0.31.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -361,9 +361,11 @@ var OVERHEAD_MULTIPLIER = 1.15;
361
361
  var MIN_COMPACTABLE_MESSAGES = 4;
362
362
  var DEFAULT_COMPACTION_CONFIG = {
363
363
  enabled: true,
364
- trigger: 0.8,
365
- keepRecentMessages: 6
364
+ trigger: 0.75,
365
+ keepRecentMessages: 4
366
366
  };
367
+ var SUMMARIZATION_MESSAGE_TRUNCATION_CHARS = 1200;
368
+ var SUMMARIZATION_MAX_OUTPUT_TOKENS = 768;
367
369
  var SUMMARIZATION_PROMPT = `Summarize the following conversation into a structured working state that allows continuation without re-asking questions. Include:
368
370
 
369
371
  1. **User intent**: What the user originally asked for and any refinements
@@ -418,7 +420,7 @@ var buildSummarizationMessages = (messagesToCompact, instructions) => {
418
420
  const conversationLines = [];
419
421
  for (const msg of messagesToCompact) {
420
422
  const text = getTextContent(msg);
421
- const truncated = text.length > 2e3 ? text.slice(0, 2e3) + "\n...[truncated]" : text;
423
+ const truncated = text.length > SUMMARIZATION_MESSAGE_TRUNCATION_CHARS ? text.slice(0, SUMMARIZATION_MESSAGE_TRUNCATION_CHARS) + "\n...[truncated]" : text;
422
424
  conversationLines.push(`[${msg.role}]: ${truncated}`);
423
425
  }
424
426
  const prompt = instructions ? `${SUMMARIZATION_PROMPT}
@@ -473,7 +475,7 @@ var compactMessages = async (model, messages, config, options) => {
473
475
  const result = await generateText({
474
476
  model,
475
477
  messages: summarizationMessages,
476
- maxOutputTokens: 2048
478
+ maxOutputTokens: SUMMARIZATION_MAX_OUTPUT_TOKENS
477
479
  });
478
480
  const summary = result.text.trim();
479
481
  if (!summary) {
@@ -1658,7 +1660,7 @@ Logs print to console:
1658
1660
  [event] run:started {"type":"run:started","runId":"run_abc123","agentId":"my-agent"}
1659
1661
  [event] tool:started {"type":"tool:started","tool":"read_file","input":{"path":"README.md"}}
1660
1662
  [event] tool:completed {"type":"tool:completed","tool":"read_file","duration":45,"output":{"path":"README.md","content":"..."}}
1661
- [event] run:completed {"type":"run:completed","runId":"run_abc123","result":{"status":"completed","response":"...","steps":3,"tokens":{"input":1500,"output":840}}}
1663
+ [event] run:completed {"type":"run:completed","runId":"run_abc123","result":{"status":"completed","response":"...","steps":3,"tokens":{"input":1500,"output":840,"cached":1200,"cacheWrite":300}}}
1662
1664
  \`\`\`
1663
1665
 
1664
1666
  ### Production telemetry (generic OTLP)
@@ -2110,7 +2112,7 @@ var ponchoDocsTool = defineTool({
2110
2112
  import { randomUUID as randomUUID3 } from "crypto";
2111
2113
  import { readFile as readFile9 } from "fs/promises";
2112
2114
  import { resolve as resolve11 } from "path";
2113
- import { getTextContent as getTextContent2 } from "@poncho-ai/sdk";
2115
+ import { defineTool as defineTool7, getTextContent as getTextContent2 } from "@poncho-ai/sdk";
2114
2116
 
2115
2117
  // src/upload-store.ts
2116
2118
  import { createHash as createHash2 } from "crypto";
@@ -3887,6 +3889,7 @@ var MODEL_CONTEXT_WINDOWS = {
3887
3889
  };
3888
3890
  var DEFAULT_CONTEXT_WINDOW = 2e5;
3889
3891
  var OPENAI_CODEX_DEFAULT_INSTRUCTIONS = "You are Codex, based on GPT-5. You are running as a coding agent in Poncho.";
3892
+ var OPENAI_CODEX_RESPONSES_URL = process.env.OPENAI_CODEX_RESPONSES_URL ?? "https://chatgpt.com/backend-api/codex/responses";
3890
3893
  var extractSystemInstructionFromInput = (input) => {
3891
3894
  if (!Array.isArray(input)) return void 0;
3892
3895
  for (const message of input) {
@@ -3909,6 +3912,18 @@ var extractSystemInstructionFromInput = (input) => {
3909
3912
  }
3910
3913
  return void 0;
3911
3914
  };
3915
+ var normalizeToolParameterSchemas = (tools) => {
3916
+ if (!Array.isArray(tools)) return;
3917
+ for (const tool of tools) {
3918
+ if (!tool || typeof tool !== "object") continue;
3919
+ const entry = tool;
3920
+ if (!entry.parameters || typeof entry.parameters !== "object") continue;
3921
+ const schema = entry.parameters;
3922
+ if (schema.type === "object" && (typeof schema.properties !== "object" || schema.properties === null)) {
3923
+ schema.properties = {};
3924
+ }
3925
+ }
3926
+ };
3912
3927
  var getModelContextWindow = (modelName) => {
3913
3928
  if (MODEL_CONTEXT_WINDOWS[modelName] !== void 0) {
3914
3929
  return MODEL_CONTEXT_WINDOWS[modelName];
@@ -3938,7 +3953,7 @@ var createModelProvider = (provider, config) => {
3938
3953
  const originalUrl = input instanceof URL ? input.toString() : typeof input === "string" ? input : input.url;
3939
3954
  const parsed = new URL(originalUrl);
3940
3955
  const shouldRewrite = parsed.pathname.includes("/v1/responses") || parsed.pathname.includes("/chat/completions");
3941
- const targetUrl = shouldRewrite ? "https://chatgpt.com/backend-api/codex/responses" : originalUrl;
3956
+ const targetUrl = shouldRewrite ? OPENAI_CODEX_RESPONSES_URL : originalUrl;
3942
3957
  let body = init?.body;
3943
3958
  if (shouldRewrite && typeof body === "string" && headers.get("Content-Type")?.includes("application/json")) {
3944
3959
  try {
@@ -3946,12 +3961,21 @@ var createModelProvider = (provider, config) => {
3946
3961
  if (typeof payload.instructions !== "string" || payload.instructions.trim() === "") {
3947
3962
  payload.instructions = extractSystemInstructionFromInput(payload.input) ?? OPENAI_CODEX_DEFAULT_INSTRUCTIONS;
3948
3963
  }
3964
+ normalizeToolParameterSchemas(payload.tools);
3949
3965
  payload.store = false;
3950
3966
  body = JSON.stringify(payload);
3951
3967
  } catch {
3952
3968
  }
3953
3969
  }
3954
- return fetch(targetUrl, { ...init, headers, body });
3970
+ try {
3971
+ return await fetch(targetUrl, { ...init, headers, body });
3972
+ } catch (error) {
3973
+ const message = error instanceof Error ? error.message : String(error);
3974
+ if (shouldRewrite && targetUrl.includes("chatgpt.com") && message.includes("ENOTFOUND chatgpt.com")) {
3975
+ return fetch(originalUrl, { ...init, headers, body });
3976
+ }
3977
+ throw error;
3978
+ }
3955
3979
  }
3956
3980
  });
3957
3981
  return (modelName) => openai(modelName);
@@ -4993,6 +5017,9 @@ var TelemetryEmitter = class {
4993
5017
  if (otlp) {
4994
5018
  await this.sendOtlp(event, otlp);
4995
5019
  }
5020
+ if (event.type === "model:chunk") {
5021
+ return;
5022
+ }
4996
5023
  process.stdout.write(`[event] ${event.type} ${sanitizeEventForLog(event)}
4997
5024
  `);
4998
5025
  }
@@ -5119,8 +5146,12 @@ var ToolDispatcher = class {
5119
5146
 
5120
5147
  // src/harness.ts
5121
5148
  var now = () => Date.now();
5122
- var FIRST_CHUNK_TIMEOUT_MS = 3e5;
5123
- var MAX_TRANSIENT_STEP_RETRIES = 2;
5149
+ var FIRST_CHUNK_TIMEOUT_MS = 9e4;
5150
+ var MAX_TRANSIENT_STEP_RETRIES = 1;
5151
+ var COMPACTION_CHECK_INTERVAL_STEPS = 3;
5152
+ var TOOL_RESULT_ARCHIVE_PARAM = "__toolResultArchive";
5153
+ var TOOL_RESULT_TRUNCATED_PREFIX = "[TRUNCATED_TOOL_RESULT]";
5154
+ var TOOL_RESULT_PREVIEW_CHARS = 700;
5124
5155
  var FirstChunkTimeoutError = class extends Error {
5125
5156
  constructor(modelName, timeoutMs) {
5126
5157
  super(
@@ -5179,18 +5210,11 @@ var isRetryableModelError = (error) => {
5179
5210
  if (error instanceof FirstChunkTimeoutError) {
5180
5211
  return true;
5181
5212
  }
5182
- if (isNoOutputGeneratedError(error)) {
5183
- return true;
5184
- }
5185
5213
  const statusCode = getErrorStatusCode(error);
5186
5214
  if (typeof statusCode === "number") {
5187
5215
  return statusCode === 429 || statusCode >= 500;
5188
5216
  }
5189
- if (!error || typeof error !== "object") {
5190
- return false;
5191
- }
5192
- const maybeMessage = "message" in error ? String(error.message ?? "").toLowerCase() : "";
5193
- return maybeMessage.includes("internal server error") || maybeMessage.includes("service unavailable") || maybeMessage.includes("gateway timeout") || maybeMessage.includes("rate limit");
5217
+ return false;
5194
5218
  };
5195
5219
  var toRunError = (error) => {
5196
5220
  const statusCode = getErrorStatusCode(error);
@@ -5242,6 +5266,58 @@ var toProviderSafeToolName = (originalName, index, used) => {
5242
5266
  used.add(candidate);
5243
5267
  return candidate;
5244
5268
  };
5269
+ var isToolResultRow = (value) => {
5270
+ if (typeof value !== "object" || value === null) return false;
5271
+ const row = value;
5272
+ return typeof row.tool_use_id === "string" && typeof row.tool_name === "string" && typeof row.content === "string";
5273
+ };
5274
+ var readArchiveFromParameters = (parameters) => {
5275
+ const raw = parameters?.[TOOL_RESULT_ARCHIVE_PARAM];
5276
+ if (typeof raw !== "object" || raw === null) return {};
5277
+ const out = {};
5278
+ for (const [key, value] of Object.entries(raw)) {
5279
+ if (typeof value !== "object" || value === null) continue;
5280
+ const row = value;
5281
+ if (typeof row.toolResultId !== "string" || typeof row.conversationId !== "string" || typeof row.toolName !== "string" || typeof row.toolCallId !== "string" || typeof row.createdAt !== "number" || typeof row.sizeBytes !== "number" || typeof row.payload !== "string") {
5282
+ continue;
5283
+ }
5284
+ out[key] = {
5285
+ toolResultId: row.toolResultId,
5286
+ conversationId: row.conversationId,
5287
+ toolName: row.toolName,
5288
+ toolCallId: row.toolCallId,
5289
+ createdAt: row.createdAt,
5290
+ sizeBytes: row.sizeBytes,
5291
+ payload: row.payload
5292
+ };
5293
+ }
5294
+ return out;
5295
+ };
5296
+ var makeTruncatedToolResultNotice = (toolResultId, toolName, payload) => {
5297
+ const preview = payload.slice(0, TOOL_RESULT_PREVIEW_CHARS);
5298
+ const omittedChars = Math.max(0, payload.length - preview.length);
5299
+ return `${TOOL_RESULT_TRUNCATED_PREFIX} id="${toolResultId}" tool="${toolName}" omittedChars=${omittedChars}
5300
+ ${preview}${omittedChars > 0 ? "\n...[truncated]" : ""}`;
5301
+ };
5302
+ var hasUntruncatedToolResults = (messages) => {
5303
+ for (const msg of messages) {
5304
+ if (msg.role !== "tool" || typeof msg.content !== "string") continue;
5305
+ let parsed;
5306
+ try {
5307
+ parsed = JSON.parse(msg.content);
5308
+ } catch {
5309
+ continue;
5310
+ }
5311
+ if (!Array.isArray(parsed)) continue;
5312
+ for (const row of parsed) {
5313
+ if (!isToolResultRow(row)) continue;
5314
+ if (!row.content.startsWith(TOOL_RESULT_TRUNCATED_PREFIX)) {
5315
+ return true;
5316
+ }
5317
+ }
5318
+ }
5319
+ return false;
5320
+ };
5245
5321
  var DEVELOPMENT_MODE_CONTEXT = `## Development Mode Context
5246
5322
 
5247
5323
  You are running locally in development mode. Treat this as an editable agent workspace.
@@ -5573,6 +5649,7 @@ var AgentHarness = class _AgentHarness {
5573
5649
  agentFileFingerprint = "";
5574
5650
  mcpBridge;
5575
5651
  subagentManager;
5652
+ archivedToolResultsByConversation = /* @__PURE__ */ new Map();
5576
5653
  resolveToolAccess(toolName) {
5577
5654
  const tools = this.loadedConfig?.tools;
5578
5655
  if (!tools) return true;
@@ -5644,6 +5721,56 @@ var AgentHarness = class _AgentHarness {
5644
5721
  if (this.environment === "development" && this.isToolEnabled("poncho_docs")) {
5645
5722
  this.registerIfMissing(ponchoDocsTool);
5646
5723
  }
5724
+ if (this.isToolEnabled("get_tool_result_by_id")) {
5725
+ this.registerIfMissing(this.createGetToolResultByIdTool());
5726
+ }
5727
+ }
5728
+ createGetToolResultByIdTool() {
5729
+ return defineTool7({
5730
+ name: "get_tool_result_by_id",
5731
+ description: "Retrieve a previously archived full tool result by id for the current conversation. Use this when older tool outputs were truncated in prompt history.",
5732
+ inputSchema: {
5733
+ type: "object",
5734
+ properties: {
5735
+ toolResultId: { type: "string", description: "Archived tool result id to retrieve" },
5736
+ offset: { type: "number", description: "Optional character offset for paging large payloads" },
5737
+ limit: { type: "number", description: "Optional maximum characters to return (default 6000, max 20000)" }
5738
+ },
5739
+ required: ["toolResultId"],
5740
+ additionalProperties: false
5741
+ },
5742
+ handler: async (input, context) => {
5743
+ const conversationId = context.conversationId ?? "__default__";
5744
+ const archive = this.archivedToolResultsByConversation.get(conversationId) ?? {};
5745
+ const toolResultId = typeof input.toolResultId === "string" ? input.toolResultId : "";
5746
+ const record = archive[toolResultId];
5747
+ if (!record) {
5748
+ console.info(
5749
+ `[poncho][cost] Archived tool result lookup miss: id="${toolResultId}" conversation="${conversationId}"`
5750
+ );
5751
+ return {
5752
+ error: `No archived tool result found for id "${toolResultId}" in this conversation.`
5753
+ };
5754
+ }
5755
+ const offset = Math.max(0, Number(input.offset) || 0);
5756
+ const limit = Math.min(Math.max(Number(input.limit) || 6e3, 1), 2e4);
5757
+ const end = Math.min(record.payload.length, offset + limit);
5758
+ const chunk = record.payload.slice(offset, end);
5759
+ console.info(
5760
+ `[poncho][cost] Archived tool result lookup hit: id="${toolResultId}" conversation="${conversationId}" offset=${offset} returned=${chunk.length} total=${record.payload.length}`
5761
+ );
5762
+ return {
5763
+ toolResultId: record.toolResultId,
5764
+ toolName: record.toolName,
5765
+ toolCallId: record.toolCallId,
5766
+ totalChars: record.payload.length,
5767
+ offset,
5768
+ returnedChars: chunk.length,
5769
+ hasMore: end < record.payload.length,
5770
+ payload: chunk
5771
+ };
5772
+ }
5773
+ });
5647
5774
  }
5648
5775
  shouldEnableWriteTool() {
5649
5776
  const override = process.env.PONCHO_FS_WRITE?.toLowerCase();
@@ -5668,6 +5795,116 @@ var AgentHarness = class _AgentHarness {
5668
5795
  get frontmatter() {
5669
5796
  return this.parsedAgent?.frontmatter;
5670
5797
  }
5798
+ getToolResultArchive(conversationId) {
5799
+ const archive = this.archivedToolResultsByConversation.get(conversationId);
5800
+ return archive ? { ...archive } : {};
5801
+ }
5802
+ seedToolResultArchive(conversationId, parameters) {
5803
+ const seeded = readArchiveFromParameters(parameters);
5804
+ const existing = this.archivedToolResultsByConversation.get(conversationId) ?? {};
5805
+ const merged = { ...existing, ...seeded };
5806
+ this.archivedToolResultsByConversation.set(conversationId, merged);
5807
+ return merged;
5808
+ }
5809
+ truncateHistoricalToolResults(messages, conversationId) {
5810
+ let latestRunId;
5811
+ for (let i = messages.length - 1; i >= 0; i -= 1) {
5812
+ const msg = messages[i];
5813
+ const meta = msg.metadata;
5814
+ const runId = typeof meta?.runId === "string" ? meta.runId : void 0;
5815
+ if (runId) {
5816
+ latestRunId = runId;
5817
+ break;
5818
+ }
5819
+ }
5820
+ if (!latestRunId) {
5821
+ return { changed: false, truncatedCount: 0, archivedCount: 0, omittedChars: 0 };
5822
+ }
5823
+ const archive = this.archivedToolResultsByConversation.get(conversationId) ?? {};
5824
+ this.archivedToolResultsByConversation.set(conversationId, archive);
5825
+ let changed = false;
5826
+ let truncatedCount = 0;
5827
+ let archivedCount = 0;
5828
+ let omittedChars = 0;
5829
+ for (const msg of messages) {
5830
+ if (msg.role !== "tool" || typeof msg.content !== "string") continue;
5831
+ const meta = msg.metadata;
5832
+ const runId = typeof meta?.runId === "string" ? meta.runId : void 0;
5833
+ if (runId === latestRunId) continue;
5834
+ let parsed;
5835
+ try {
5836
+ parsed = JSON.parse(msg.content);
5837
+ } catch {
5838
+ continue;
5839
+ }
5840
+ if (!Array.isArray(parsed)) continue;
5841
+ let rowChanged = false;
5842
+ const nextRows = parsed.map((row) => {
5843
+ if (!isToolResultRow(row)) return row;
5844
+ if (row.content.startsWith(TOOL_RESULT_TRUNCATED_PREFIX)) return row;
5845
+ if (this.shouldPreserveSkillToolResult(row)) return row;
5846
+ const toolResultId = row.tool_use_id;
5847
+ if (!archive[toolResultId]) {
5848
+ archive[toolResultId] = {
5849
+ toolResultId,
5850
+ conversationId,
5851
+ toolName: row.tool_name,
5852
+ toolCallId: row.tool_use_id,
5853
+ createdAt: now(),
5854
+ sizeBytes: Buffer.byteLength(row.content, "utf8"),
5855
+ payload: row.content
5856
+ };
5857
+ archivedCount += 1;
5858
+ }
5859
+ const omitted = Math.max(0, row.content.length - TOOL_RESULT_PREVIEW_CHARS);
5860
+ omittedChars += omitted;
5861
+ truncatedCount += 1;
5862
+ rowChanged = true;
5863
+ return {
5864
+ ...row,
5865
+ content: makeTruncatedToolResultNotice(toolResultId, row.tool_name, row.content)
5866
+ };
5867
+ });
5868
+ if (rowChanged) {
5869
+ msg.content = JSON.stringify(nextRows);
5870
+ if (msg.metadata && typeof msg.metadata === "object") {
5871
+ const meta2 = msg.metadata;
5872
+ if ("_richToolResults" in meta2) {
5873
+ delete meta2._richToolResults;
5874
+ }
5875
+ }
5876
+ changed = true;
5877
+ }
5878
+ }
5879
+ return { changed, truncatedCount, archivedCount, omittedChars };
5880
+ }
5881
+ shouldPreserveSkillToolResult(row) {
5882
+ if (row.tool_name.startsWith("todo_")) {
5883
+ return true;
5884
+ }
5885
+ if (row.tool_name !== "activate_skill" && row.tool_name !== "deactivate_skill") {
5886
+ return false;
5887
+ }
5888
+ const content = row.content.trim();
5889
+ if (content.startsWith("Tool error:")) {
5890
+ return false;
5891
+ }
5892
+ try {
5893
+ const parsed = JSON.parse(content);
5894
+ const skill = typeof parsed.skill === "string" ? parsed.skill : void 0;
5895
+ if (skill && this.activeSkillNames.has(skill)) {
5896
+ return true;
5897
+ }
5898
+ const activeSkills = Array.isArray(parsed.activeSkills) ? parsed.activeSkills.filter((v) => typeof v === "string") : [];
5899
+ for (const name of activeSkills) {
5900
+ if (this.activeSkillNames.has(name)) {
5901
+ return true;
5902
+ }
5903
+ }
5904
+ } catch {
5905
+ }
5906
+ return false;
5907
+ }
5671
5908
  async getTodos(conversationId) {
5672
5909
  if (!this.todoStore) return [];
5673
5910
  return this.todoStore.get(conversationId);
@@ -6336,12 +6573,31 @@ var AgentHarness = class _AgentHarness {
6336
6573
  let agent = this.parsedAgent;
6337
6574
  const runId = `run_${randomUUID3()}`;
6338
6575
  const start = now();
6339
- const maxSteps = agent.frontmatter.limits?.maxSteps ?? 50;
6576
+ const maxSteps = agent.frontmatter.limits?.maxSteps ?? 20;
6340
6577
  const configuredTimeout = agent.frontmatter.limits?.timeout;
6341
6578
  const timeoutMs = this.environment === "development" && configuredTimeout == null ? 0 : (configuredTimeout ?? 300) * 1e3;
6342
6579
  const platformMaxDurationSec = Number(process.env.PONCHO_MAX_DURATION) || 0;
6343
6580
  const softDeadlineMs = input.disableSoftDeadline || platformMaxDurationSec <= 0 ? 0 : platformMaxDurationSec * 800;
6344
6581
  const messages = [...input.messages ?? []];
6582
+ const conversationId = input.conversationId ?? "__default__";
6583
+ this.seedToolResultArchive(conversationId, input.parameters);
6584
+ const truncationSummary = this.truncateHistoricalToolResults(messages, conversationId);
6585
+ if (truncationSummary.changed) {
6586
+ console.info(
6587
+ `[poncho][cost] Truncated ${truncationSummary.truncatedCount} historical tool result(s) (archived_new=${truncationSummary.archivedCount}, omitted_chars=${truncationSummary.omittedChars}) for conversation="${conversationId}"`
6588
+ );
6589
+ }
6590
+ const hasFullToolResults = hasUntruncatedToolResults(messages);
6591
+ const enablePromptCache = !hasFullToolResults;
6592
+ if (!enablePromptCache) {
6593
+ console.info(
6594
+ `[poncho][cost] Prompt cache write disabled for run "${runId}" (untruncated tool results present in history).`
6595
+ );
6596
+ } else {
6597
+ console.info(
6598
+ `[poncho][cost] Prompt cache write enabled for run "${runId}" (history has no untruncated tool results).`
6599
+ );
6600
+ }
6345
6601
  const inputMessageCount = messages.length;
6346
6602
  const events = [];
6347
6603
  const renderCurrentAgentPrompt = () => renderAgentPrompt(this.parsedAgent, {
@@ -6421,7 +6677,6 @@ ${this.skillFingerprint}`;
6421
6677
  const browserEventQueue = [];
6422
6678
  const browserCleanups = [];
6423
6679
  const browserSession = this._browserSession;
6424
- const conversationId = input.conversationId ?? "__default__";
6425
6680
  if (browserSession) {
6426
6681
  browserCleanups.push(
6427
6682
  browserSession.onFrame(conversationId, (frame) => {
@@ -6488,6 +6743,7 @@ ${this.skillFingerprint}`;
6488
6743
  let totalInputTokens = 0;
6489
6744
  let totalOutputTokens = 0;
6490
6745
  let totalCachedTokens = 0;
6746
+ let totalCacheWriteTokens = 0;
6491
6747
  let transientStepRetryCount = 0;
6492
6748
  let latestContextTokens = 0;
6493
6749
  let toolOutputEstimateSinceModel = 0;
@@ -6516,7 +6772,12 @@ ${this.skillFingerprint}`;
6516
6772
  status: "completed",
6517
6773
  response: responseText,
6518
6774
  steps: step - 1,
6519
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
6775
+ tokens: {
6776
+ input: totalInputTokens,
6777
+ output: totalOutputTokens,
6778
+ cached: totalCachedTokens,
6779
+ cacheWrite: totalCacheWriteTokens
6780
+ },
6520
6781
  duration: now() - start,
6521
6782
  continuation: true,
6522
6783
  continuationMessages: [...messages],
@@ -6529,7 +6790,6 @@ ${this.skillFingerprint}`;
6529
6790
  }
6530
6791
  const stepStart = now();
6531
6792
  yield pushEvent({ type: "step:started", step });
6532
- yield pushEvent({ type: "model:request", tokens: 0 });
6533
6793
  const dispatcherTools = this.dispatcher.list();
6534
6794
  const exposedToolNames = /* @__PURE__ */ new Map();
6535
6795
  const usedProviderToolNames = /* @__PURE__ */ new Set();
@@ -6548,6 +6808,15 @@ ${this.skillFingerprint}`;
6548
6808
  inputSchema: jsonSchemaToZod(tool.inputSchema)
6549
6809
  };
6550
6810
  }
6811
+ const toolDefsJsonForEstimate = JSON.stringify(
6812
+ dispatcherTools.map((t) => ({
6813
+ name: t.name,
6814
+ description: t.description,
6815
+ inputSchema: t.inputSchema
6816
+ }))
6817
+ );
6818
+ const requestTokenEstimate = estimateTotalTokens(integrityPrompt, messages, toolDefsJsonForEstimate);
6819
+ yield pushEvent({ type: "model:request", tokens: requestTokenEstimate });
6551
6820
  const convertMessage = async (msg) => {
6552
6821
  if (msg.role === "tool") {
6553
6822
  const meta = msg.metadata;
@@ -6737,15 +7006,8 @@ ${textContent}` };
6737
7006
  }
6738
7007
  const modelInstance = this.modelProvider(modelName);
6739
7008
  const compactionConfig = resolveCompactionConfig(agent.frontmatter.compaction);
6740
- if (compactionConfig.enabled && step === 1) {
6741
- const toolDefsJson = JSON.stringify(
6742
- dispatcherTools.map((t) => ({
6743
- name: t.name,
6744
- description: t.description,
6745
- inputSchema: t.inputSchema
6746
- }))
6747
- );
6748
- const estimated = estimateTotalTokens(integrityPrompt, messages, toolDefsJson);
7009
+ if (compactionConfig.enabled && (step === 1 || step % COMPACTION_CHECK_INTERVAL_STEPS === 0)) {
7010
+ const estimated = estimateTotalTokens(integrityPrompt, messages, toolDefsJsonForEstimate);
6749
7011
  const lastReportedInput = totalInputTokens > 0 ? totalInputTokens : 0;
6750
7012
  const effectiveTokens = Math.max(estimated, lastReportedInput);
6751
7013
  if (effectiveTokens > compactionConfig.trigger * contextWindow) {
@@ -6758,11 +7020,14 @@ ${textContent}` };
6758
7020
  if (compactResult.compacted) {
6759
7021
  messages.length = 0;
6760
7022
  messages.push(...compactResult.messages);
6761
- const emittedMessages = [...compactResult.messages];
6762
- if (emittedMessages.length > 0 && emittedMessages[emittedMessages.length - 1].role === "user") {
6763
- emittedMessages.pop();
7023
+ let emittedMessages;
7024
+ if (step === 1) {
7025
+ emittedMessages = [...compactResult.messages];
7026
+ if (emittedMessages.length > 0 && emittedMessages[emittedMessages.length - 1].role === "user") {
7027
+ emittedMessages.pop();
7028
+ }
6764
7029
  }
6765
- const tokensAfterCompaction = estimateTotalTokens(integrityPrompt, messages, toolDefsJson);
7030
+ const tokensAfterCompaction = estimateTotalTokens(integrityPrompt, messages, toolDefsJsonForEstimate);
6766
7031
  latestContextTokens = tokensAfterCompaction;
6767
7032
  toolOutputEstimateSinceModel = 0;
6768
7033
  yield pushEvent({
@@ -6789,7 +7054,7 @@ ${textContent}` };
6789
7054
  const coreMessages = cachedCoreMessages;
6790
7055
  const temperature = agent.frontmatter.model?.temperature ?? 0.2;
6791
7056
  const maxTokens = agent.frontmatter.model?.maxTokens;
6792
- const cachedMessages = addPromptCacheBreakpoints(coreMessages, modelInstance);
7057
+ const cachedMessages = enablePromptCache ? addPromptCacheBreakpoints(coreMessages, modelInstance) : coreMessages;
6793
7058
  const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
6794
7059
  const result = await streamText({
6795
7060
  model: modelInstance,
@@ -6907,7 +7172,12 @@ ${textContent}` };
6907
7172
  status: "completed",
6908
7173
  response: responseText + fullText,
6909
7174
  steps: step,
6910
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
7175
+ tokens: {
7176
+ input: totalInputTokens,
7177
+ output: totalOutputTokens,
7178
+ cached: totalCachedTokens,
7179
+ cacheWrite: totalCacheWriteTokens
7180
+ },
6911
7181
  duration: now() - start,
6912
7182
  continuation: true,
6913
7183
  continuationMessages: [...messages],
@@ -6935,7 +7205,12 @@ ${textContent}` };
6935
7205
  status: "completed",
6936
7206
  response: responseText + fullText,
6937
7207
  steps: step,
6938
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
7208
+ tokens: {
7209
+ input: totalInputTokens,
7210
+ output: totalOutputTokens,
7211
+ cached: totalCachedTokens,
7212
+ cacheWrite: totalCacheWriteTokens
7213
+ },
6939
7214
  duration: now() - start,
6940
7215
  continuation: true,
6941
7216
  continuationMessages: [...messages],
@@ -6975,11 +7250,14 @@ ${textContent}` };
6975
7250
  const fullResult = await result.response;
6976
7251
  const usage = await result.usage;
6977
7252
  const toolCallsResult = await result.toolCalls;
6978
- const stepCachedTokens = usage.inputTokenDetails?.cacheReadTokens ?? 0;
7253
+ const details = usage.inputTokenDetails ?? {};
7254
+ const stepCachedTokens = typeof details.cacheReadTokens === "number" ? details.cacheReadTokens : 0;
7255
+ const stepCacheWriteTokens = typeof details.cacheWriteTokens === "number" ? details.cacheWriteTokens : typeof details.cacheCreationTokens === "number" ? details.cacheCreationTokens : typeof details.cacheCreationInputTokens === "number" ? details.cacheCreationInputTokens : 0;
6979
7256
  const stepInputTokens = usage.inputTokens ?? 0;
6980
7257
  totalInputTokens += stepInputTokens;
6981
7258
  totalOutputTokens += usage.outputTokens ?? 0;
6982
7259
  totalCachedTokens += stepCachedTokens;
7260
+ totalCacheWriteTokens += stepCacheWriteTokens;
6983
7261
  latestContextTokens = stepInputTokens;
6984
7262
  toolOutputEstimateSinceModel = 0;
6985
7263
  yield pushEvent({
@@ -6987,9 +7265,13 @@ ${textContent}` };
6987
7265
  usage: {
6988
7266
  input: stepInputTokens,
6989
7267
  output: usage.outputTokens ?? 0,
6990
- cached: stepCachedTokens
7268
+ cached: stepCachedTokens,
7269
+ cacheWrite: stepCacheWriteTokens
6991
7270
  }
6992
7271
  });
7272
+ console.info(
7273
+ `[poncho][cost] model="${modelName}" step=${step} input=${stepInputTokens} output=${usage.outputTokens ?? 0} cached=${stepCachedTokens} cacheWrite=${stepCacheWriteTokens} totals(input=${totalInputTokens}, output=${totalOutputTokens}, cached=${totalCachedTokens}, cacheWrite=${totalCacheWriteTokens})`
7274
+ );
6993
7275
  const toolCalls = toolCallsResult.map((tc) => ({
6994
7276
  id: tc.toolCallId,
6995
7277
  name: tc.toolName,
@@ -7036,7 +7318,8 @@ ${textContent}` };
7036
7318
  tokens: {
7037
7319
  input: totalInputTokens,
7038
7320
  output: totalOutputTokens,
7039
- cached: totalCachedTokens
7321
+ cached: totalCachedTokens,
7322
+ cacheWrite: totalCacheWriteTokens
7040
7323
  },
7041
7324
  duration: now() - start,
7042
7325
  contextTokens: latestContextTokens + toolOutputEstimateSinceModel,
@@ -7193,7 +7476,12 @@ ${textContent}` };
7193
7476
  status: "completed",
7194
7477
  response: responseText + fullText,
7195
7478
  steps: step,
7196
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
7479
+ tokens: {
7480
+ input: totalInputTokens,
7481
+ output: totalOutputTokens,
7482
+ cached: totalCachedTokens,
7483
+ cacheWrite: totalCacheWriteTokens
7484
+ },
7197
7485
  duration: now() - start,
7198
7486
  continuation: true,
7199
7487
  continuationMessages: [...messages],
@@ -7224,6 +7512,20 @@ ${textContent}` };
7224
7512
  tool_name: result2.tool,
7225
7513
  content: `Tool error: ${result2.error}`
7226
7514
  });
7515
+ {
7516
+ const archive = this.archivedToolResultsByConversation.get(conversationId);
7517
+ if (archive) {
7518
+ archive[result2.callId] = {
7519
+ toolResultId: result2.callId,
7520
+ conversationId,
7521
+ toolName: result2.tool,
7522
+ toolCallId: result2.callId,
7523
+ createdAt: now(),
7524
+ sizeBytes: Buffer.byteLength(`Tool error: ${result2.error}`, "utf8"),
7525
+ payload: `Tool error: ${result2.error}`
7526
+ };
7527
+ }
7528
+ }
7227
7529
  richToolResults.push({
7228
7530
  type: "tool-result",
7229
7531
  toolCallId: result2.callId,
@@ -7249,6 +7551,21 @@ ${textContent}` };
7249
7551
  tool_name: result2.tool,
7250
7552
  content: JSON.stringify(strippedOutput ?? null)
7251
7553
  });
7554
+ {
7555
+ const archive = this.archivedToolResultsByConversation.get(conversationId);
7556
+ if (archive) {
7557
+ const payload = JSON.stringify(result2.output ?? null);
7558
+ archive[result2.callId] = {
7559
+ toolResultId: result2.callId,
7560
+ conversationId,
7561
+ toolName: result2.tool,
7562
+ toolCallId: result2.callId,
7563
+ createdAt: now(),
7564
+ sizeBytes: Buffer.byteLength(payload, "utf8"),
7565
+ payload
7566
+ };
7567
+ }
7568
+ }
7252
7569
  if (mediaItems.length > 0) {
7253
7570
  richToolResults.push({
7254
7571
  type: "tool-result",
@@ -7283,9 +7600,15 @@ ${textContent}` };
7283
7600
  messages.push({
7284
7601
  role: "assistant",
7285
7602
  content: assistantContent,
7286
- metadata: { timestamp: now(), id: randomUUID3(), step }
7603
+ metadata: { timestamp: now(), id: randomUUID3(), step, runId }
7287
7604
  });
7288
- const toolMsgMeta = { timestamp: now(), id: randomUUID3(), step, _richToolResults: richToolResults };
7605
+ const toolMsgMeta = {
7606
+ timestamp: now(),
7607
+ id: randomUUID3(),
7608
+ step,
7609
+ runId,
7610
+ _richToolResults: richToolResults
7611
+ };
7289
7612
  messages.push({
7290
7613
  role: "tool",
7291
7614
  content: JSON.stringify(toolResultsForModel),
@@ -7296,7 +7619,12 @@ ${textContent}` };
7296
7619
  status: "completed",
7297
7620
  response: responseText + fullText,
7298
7621
  steps: step,
7299
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
7622
+ tokens: {
7623
+ input: totalInputTokens,
7624
+ output: totalOutputTokens,
7625
+ cached: totalCachedTokens,
7626
+ cacheWrite: totalCacheWriteTokens
7627
+ },
7300
7628
  duration: now() - start,
7301
7629
  continuation: true,
7302
7630
  continuationMessages: [...messages],
@@ -7357,7 +7685,12 @@ ${this.skillFingerprint}`;
7357
7685
  status: "completed",
7358
7686
  response: responseText,
7359
7687
  steps: maxSteps,
7360
- tokens: { input: totalInputTokens, output: totalOutputTokens, cached: totalCachedTokens },
7688
+ tokens: {
7689
+ input: totalInputTokens,
7690
+ output: totalOutputTokens,
7691
+ cached: totalCachedTokens,
7692
+ cacheWrite: totalCacheWriteTokens
7693
+ },
7361
7694
  duration: now() - start,
7362
7695
  continuation: true,
7363
7696
  continuationMessages: [...messages],
@@ -8677,7 +9010,7 @@ var createConversationStore = (config, options) => {
8677
9010
  };
8678
9011
 
8679
9012
  // src/index.ts
8680
- import { defineTool as defineTool7 } from "@poncho-ai/sdk";
9013
+ import { defineTool as defineTool8 } from "@poncho-ai/sdk";
8681
9014
  export {
8682
9015
  AgentHarness,
8683
9016
  InMemoryConversationStore,
@@ -8710,7 +9043,7 @@ export {
8710
9043
  createSubagentTools,
8711
9044
  createUploadStore,
8712
9045
  createWriteTool,
8713
- defineTool7 as defineTool,
9046
+ defineTool8 as defineTool,
8714
9047
  deleteOpenAICodexSession,
8715
9048
  deriveUploadKey,
8716
9049
  ensureAgentIdentity,