@poncho-ai/harness 0.31.0 → 0.31.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +5 -5
- package/CHANGELOG.md +17 -0
- package/dist/index.d.ts +27 -1
- package/dist/index.js +381 -48
- package/package.json +2 -2
- package/src/compaction.ts +8 -4
- package/src/harness.ts +427 -49
- package/src/model-factory.ts +35 -2
- package/src/state.ts +12 -0
- package/src/telemetry.ts +4 -0
- package/.turbo/turbo-lint.log +0 -6
- package/.turbo/turbo-test.log +0 -34
package/src/harness.ts
CHANGED
|
@@ -12,7 +12,7 @@ import type {
|
|
|
12
12
|
ToolContext,
|
|
13
13
|
ToolDefinition,
|
|
14
14
|
} from "@poncho-ai/sdk";
|
|
15
|
-
import { getTextContent } from "@poncho-ai/sdk";
|
|
15
|
+
import { defineTool, getTextContent } from "@poncho-ai/sdk";
|
|
16
16
|
import type { UploadStore } from "./upload-store.js";
|
|
17
17
|
import { PONCHO_UPLOAD_SCHEME, deriveUploadKey } from "./upload-store.js";
|
|
18
18
|
import { parseAgentFile, parseAgentMarkdown, renderAgentPrompt, type ParsedAgent, type AgentFrontmatter } from "./agent-parser.js";
|
|
@@ -72,8 +72,22 @@ export interface HarnessRunOutput {
|
|
|
72
72
|
}
|
|
73
73
|
|
|
74
74
|
const now = (): number => Date.now();
|
|
75
|
-
const FIRST_CHUNK_TIMEOUT_MS =
|
|
76
|
-
const MAX_TRANSIENT_STEP_RETRIES =
|
|
75
|
+
const FIRST_CHUNK_TIMEOUT_MS = 90_000; // 90s to receive the first chunk from the model
|
|
76
|
+
const MAX_TRANSIENT_STEP_RETRIES = 1;
|
|
77
|
+
const COMPACTION_CHECK_INTERVAL_STEPS = 3;
|
|
78
|
+
const TOOL_RESULT_ARCHIVE_PARAM = "__toolResultArchive";
|
|
79
|
+
const TOOL_RESULT_TRUNCATED_PREFIX = "[TRUNCATED_TOOL_RESULT]";
|
|
80
|
+
const TOOL_RESULT_PREVIEW_CHARS = 700;
|
|
81
|
+
|
|
82
|
+
interface ArchivedToolResult {
|
|
83
|
+
toolResultId: string;
|
|
84
|
+
conversationId: string;
|
|
85
|
+
toolName: string;
|
|
86
|
+
toolCallId: string;
|
|
87
|
+
createdAt: number;
|
|
88
|
+
sizeBytes: number;
|
|
89
|
+
payload: string;
|
|
90
|
+
}
|
|
77
91
|
|
|
78
92
|
class FirstChunkTimeoutError extends Error {
|
|
79
93
|
constructor(modelName: string, timeoutMs: number) {
|
|
@@ -140,23 +154,11 @@ const isRetryableModelError = (error: unknown): boolean => {
|
|
|
140
154
|
if (error instanceof FirstChunkTimeoutError) {
|
|
141
155
|
return true;
|
|
142
156
|
}
|
|
143
|
-
if (isNoOutputGeneratedError(error)) {
|
|
144
|
-
return true;
|
|
145
|
-
}
|
|
146
157
|
const statusCode = getErrorStatusCode(error);
|
|
147
158
|
if (typeof statusCode === "number") {
|
|
148
159
|
return statusCode === 429 || statusCode >= 500;
|
|
149
160
|
}
|
|
150
|
-
|
|
151
|
-
return false;
|
|
152
|
-
}
|
|
153
|
-
const maybeMessage = "message" in error ? String(error.message ?? "").toLowerCase() : "";
|
|
154
|
-
return (
|
|
155
|
-
maybeMessage.includes("internal server error") ||
|
|
156
|
-
maybeMessage.includes("service unavailable") ||
|
|
157
|
-
maybeMessage.includes("gateway timeout") ||
|
|
158
|
-
maybeMessage.includes("rate limit")
|
|
159
|
-
);
|
|
161
|
+
return false;
|
|
160
162
|
};
|
|
161
163
|
|
|
162
164
|
const toRunError = (error: unknown): { code: string; message: string; details?: Record<string, unknown> } => {
|
|
@@ -225,6 +227,83 @@ const toProviderSafeToolName = (
|
|
|
225
227
|
return candidate;
|
|
226
228
|
};
|
|
227
229
|
|
|
230
|
+
const isToolResultRow = (value: unknown): value is {
|
|
231
|
+
tool_use_id: string;
|
|
232
|
+
tool_name: string;
|
|
233
|
+
content: string;
|
|
234
|
+
} => {
|
|
235
|
+
if (typeof value !== "object" || value === null) return false;
|
|
236
|
+
const row = value as Record<string, unknown>;
|
|
237
|
+
return (
|
|
238
|
+
typeof row.tool_use_id === "string" &&
|
|
239
|
+
typeof row.tool_name === "string" &&
|
|
240
|
+
typeof row.content === "string"
|
|
241
|
+
);
|
|
242
|
+
};
|
|
243
|
+
|
|
244
|
+
const readArchiveFromParameters = (
|
|
245
|
+
parameters: Record<string, unknown> | undefined,
|
|
246
|
+
): Record<string, ArchivedToolResult> => {
|
|
247
|
+
const raw = parameters?.[TOOL_RESULT_ARCHIVE_PARAM];
|
|
248
|
+
if (typeof raw !== "object" || raw === null) return {};
|
|
249
|
+
const out: Record<string, ArchivedToolResult> = {};
|
|
250
|
+
for (const [key, value] of Object.entries(raw)) {
|
|
251
|
+
if (typeof value !== "object" || value === null) continue;
|
|
252
|
+
const row = value as Record<string, unknown>;
|
|
253
|
+
if (
|
|
254
|
+
typeof row.toolResultId !== "string" ||
|
|
255
|
+
typeof row.conversationId !== "string" ||
|
|
256
|
+
typeof row.toolName !== "string" ||
|
|
257
|
+
typeof row.toolCallId !== "string" ||
|
|
258
|
+
typeof row.createdAt !== "number" ||
|
|
259
|
+
typeof row.sizeBytes !== "number" ||
|
|
260
|
+
typeof row.payload !== "string"
|
|
261
|
+
) {
|
|
262
|
+
continue;
|
|
263
|
+
}
|
|
264
|
+
out[key] = {
|
|
265
|
+
toolResultId: row.toolResultId,
|
|
266
|
+
conversationId: row.conversationId,
|
|
267
|
+
toolName: row.toolName,
|
|
268
|
+
toolCallId: row.toolCallId,
|
|
269
|
+
createdAt: row.createdAt,
|
|
270
|
+
sizeBytes: row.sizeBytes,
|
|
271
|
+
payload: row.payload,
|
|
272
|
+
};
|
|
273
|
+
}
|
|
274
|
+
return out;
|
|
275
|
+
};
|
|
276
|
+
|
|
277
|
+
const makeTruncatedToolResultNotice = (
|
|
278
|
+
toolResultId: string,
|
|
279
|
+
toolName: string,
|
|
280
|
+
payload: string,
|
|
281
|
+
): string => {
|
|
282
|
+
const preview = payload.slice(0, TOOL_RESULT_PREVIEW_CHARS);
|
|
283
|
+
const omittedChars = Math.max(0, payload.length - preview.length);
|
|
284
|
+
return `${TOOL_RESULT_TRUNCATED_PREFIX} id="${toolResultId}" tool="${toolName}" omittedChars=${omittedChars}\n${preview}${omittedChars > 0 ? "\n...[truncated]" : ""}`;
|
|
285
|
+
};
|
|
286
|
+
|
|
287
|
+
const hasUntruncatedToolResults = (messages: Message[]): boolean => {
|
|
288
|
+
for (const msg of messages) {
|
|
289
|
+
if (msg.role !== "tool" || typeof msg.content !== "string") continue;
|
|
290
|
+
let parsed: unknown;
|
|
291
|
+
try {
|
|
292
|
+
parsed = JSON.parse(msg.content);
|
|
293
|
+
} catch {
|
|
294
|
+
continue;
|
|
295
|
+
}
|
|
296
|
+
if (!Array.isArray(parsed)) continue;
|
|
297
|
+
for (const row of parsed) {
|
|
298
|
+
if (!isToolResultRow(row)) continue;
|
|
299
|
+
if (!row.content.startsWith(TOOL_RESULT_TRUNCATED_PREFIX)) {
|
|
300
|
+
return true;
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
return false;
|
|
305
|
+
};
|
|
306
|
+
|
|
228
307
|
const DEVELOPMENT_MODE_CONTEXT = `## Development Mode Context
|
|
229
308
|
|
|
230
309
|
You are running locally in development mode. Treat this as an editable agent workspace.
|
|
@@ -580,6 +659,7 @@ export class AgentHarness {
|
|
|
580
659
|
private agentFileFingerprint = "";
|
|
581
660
|
private mcpBridge?: LocalMcpBridge;
|
|
582
661
|
private subagentManager?: SubagentManager;
|
|
662
|
+
private readonly archivedToolResultsByConversation = new Map<string, Record<string, ArchivedToolResult>>();
|
|
583
663
|
|
|
584
664
|
private resolveToolAccess(toolName: string): ToolAccess {
|
|
585
665
|
const tools = this.loadedConfig?.tools;
|
|
@@ -662,6 +742,60 @@ export class AgentHarness {
|
|
|
662
742
|
if (this.environment === "development" && this.isToolEnabled("poncho_docs")) {
|
|
663
743
|
this.registerIfMissing(ponchoDocsTool);
|
|
664
744
|
}
|
|
745
|
+
if (this.isToolEnabled("get_tool_result_by_id")) {
|
|
746
|
+
this.registerIfMissing(this.createGetToolResultByIdTool());
|
|
747
|
+
}
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
private createGetToolResultByIdTool(): ToolDefinition {
|
|
751
|
+
return defineTool({
|
|
752
|
+
name: "get_tool_result_by_id",
|
|
753
|
+
description:
|
|
754
|
+
"Retrieve a previously archived full tool result by id for the current conversation. " +
|
|
755
|
+
"Use this when older tool outputs were truncated in prompt history.",
|
|
756
|
+
inputSchema: {
|
|
757
|
+
type: "object",
|
|
758
|
+
properties: {
|
|
759
|
+
toolResultId: { type: "string", description: "Archived tool result id to retrieve" },
|
|
760
|
+
offset: { type: "number", description: "Optional character offset for paging large payloads" },
|
|
761
|
+
limit: { type: "number", description: "Optional maximum characters to return (default 6000, max 20000)" },
|
|
762
|
+
},
|
|
763
|
+
required: ["toolResultId"],
|
|
764
|
+
additionalProperties: false,
|
|
765
|
+
},
|
|
766
|
+
handler: async (input, context) => {
|
|
767
|
+
const conversationId = context.conversationId ?? "__default__";
|
|
768
|
+
const archive = this.archivedToolResultsByConversation.get(conversationId) ?? {};
|
|
769
|
+
const toolResultId = typeof input.toolResultId === "string" ? input.toolResultId : "";
|
|
770
|
+
const record = archive[toolResultId];
|
|
771
|
+
if (!record) {
|
|
772
|
+
console.info(
|
|
773
|
+
`[poncho][cost] Archived tool result lookup miss: id="${toolResultId}" conversation="${conversationId}"`,
|
|
774
|
+
);
|
|
775
|
+
return {
|
|
776
|
+
error: `No archived tool result found for id "${toolResultId}" in this conversation.`,
|
|
777
|
+
};
|
|
778
|
+
}
|
|
779
|
+
const offset = Math.max(0, Number(input.offset) || 0);
|
|
780
|
+
const limit = Math.min(Math.max(Number(input.limit) || 6000, 1), 20_000);
|
|
781
|
+
const end = Math.min(record.payload.length, offset + limit);
|
|
782
|
+
const chunk = record.payload.slice(offset, end);
|
|
783
|
+
console.info(
|
|
784
|
+
`[poncho][cost] Archived tool result lookup hit: id="${toolResultId}" conversation="${conversationId}" ` +
|
|
785
|
+
`offset=${offset} returned=${chunk.length} total=${record.payload.length}`,
|
|
786
|
+
);
|
|
787
|
+
return {
|
|
788
|
+
toolResultId: record.toolResultId,
|
|
789
|
+
toolName: record.toolName,
|
|
790
|
+
toolCallId: record.toolCallId,
|
|
791
|
+
totalChars: record.payload.length,
|
|
792
|
+
offset,
|
|
793
|
+
returnedChars: chunk.length,
|
|
794
|
+
hasMore: end < record.payload.length,
|
|
795
|
+
payload: chunk,
|
|
796
|
+
};
|
|
797
|
+
},
|
|
798
|
+
});
|
|
665
799
|
}
|
|
666
800
|
|
|
667
801
|
private shouldEnableWriteTool(): boolean {
|
|
@@ -691,6 +825,140 @@ export class AgentHarness {
|
|
|
691
825
|
return this.parsedAgent?.frontmatter;
|
|
692
826
|
}
|
|
693
827
|
|
|
828
|
+
getToolResultArchive(conversationId: string): Record<string, ArchivedToolResult> {
|
|
829
|
+
const archive = this.archivedToolResultsByConversation.get(conversationId);
|
|
830
|
+
return archive ? { ...archive } : {};
|
|
831
|
+
}
|
|
832
|
+
|
|
833
|
+
private seedToolResultArchive(
|
|
834
|
+
conversationId: string,
|
|
835
|
+
parameters: Record<string, unknown> | undefined,
|
|
836
|
+
): Record<string, ArchivedToolResult> {
|
|
837
|
+
const seeded = readArchiveFromParameters(parameters);
|
|
838
|
+
const existing = this.archivedToolResultsByConversation.get(conversationId) ?? {};
|
|
839
|
+
const merged = { ...existing, ...seeded };
|
|
840
|
+
this.archivedToolResultsByConversation.set(conversationId, merged);
|
|
841
|
+
return merged;
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
private truncateHistoricalToolResults(
|
|
845
|
+
messages: Message[],
|
|
846
|
+
conversationId: string,
|
|
847
|
+
): { changed: boolean; truncatedCount: number; archivedCount: number; omittedChars: number } {
|
|
848
|
+
let latestRunId: string | undefined;
|
|
849
|
+
for (let i = messages.length - 1; i >= 0; i -= 1) {
|
|
850
|
+
const msg = messages[i]!;
|
|
851
|
+
const meta = msg.metadata as Record<string, unknown> | undefined;
|
|
852
|
+
const runId = typeof meta?.runId === "string" ? meta.runId : undefined;
|
|
853
|
+
if (runId) {
|
|
854
|
+
latestRunId = runId;
|
|
855
|
+
break;
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
if (!latestRunId) {
|
|
859
|
+
return { changed: false, truncatedCount: 0, archivedCount: 0, omittedChars: 0 };
|
|
860
|
+
}
|
|
861
|
+
const archive = this.archivedToolResultsByConversation.get(conversationId) ?? {};
|
|
862
|
+
this.archivedToolResultsByConversation.set(conversationId, archive);
|
|
863
|
+
let changed = false;
|
|
864
|
+
let truncatedCount = 0;
|
|
865
|
+
let archivedCount = 0;
|
|
866
|
+
let omittedChars = 0;
|
|
867
|
+
|
|
868
|
+
for (const msg of messages) {
|
|
869
|
+
if (msg.role !== "tool" || typeof msg.content !== "string") continue;
|
|
870
|
+
const meta = msg.metadata as Record<string, unknown> | undefined;
|
|
871
|
+
const runId = typeof meta?.runId === "string" ? meta.runId : undefined;
|
|
872
|
+
if (runId === latestRunId) continue;
|
|
873
|
+
let parsed: unknown;
|
|
874
|
+
try {
|
|
875
|
+
parsed = JSON.parse(msg.content);
|
|
876
|
+
} catch {
|
|
877
|
+
continue;
|
|
878
|
+
}
|
|
879
|
+
if (!Array.isArray(parsed)) continue;
|
|
880
|
+
let rowChanged = false;
|
|
881
|
+
const nextRows = parsed.map((row) => {
|
|
882
|
+
if (!isToolResultRow(row)) return row;
|
|
883
|
+
if (row.content.startsWith(TOOL_RESULT_TRUNCATED_PREFIX)) return row;
|
|
884
|
+
if (this.shouldPreserveSkillToolResult(row)) return row;
|
|
885
|
+
const toolResultId = row.tool_use_id;
|
|
886
|
+
if (!archive[toolResultId]) {
|
|
887
|
+
archive[toolResultId] = {
|
|
888
|
+
toolResultId,
|
|
889
|
+
conversationId,
|
|
890
|
+
toolName: row.tool_name,
|
|
891
|
+
toolCallId: row.tool_use_id,
|
|
892
|
+
createdAt: now(),
|
|
893
|
+
sizeBytes: Buffer.byteLength(row.content, "utf8"),
|
|
894
|
+
payload: row.content,
|
|
895
|
+
};
|
|
896
|
+
archivedCount += 1;
|
|
897
|
+
}
|
|
898
|
+
const omitted = Math.max(0, row.content.length - TOOL_RESULT_PREVIEW_CHARS);
|
|
899
|
+
omittedChars += omitted;
|
|
900
|
+
truncatedCount += 1;
|
|
901
|
+
rowChanged = true;
|
|
902
|
+
return {
|
|
903
|
+
...row,
|
|
904
|
+
content: makeTruncatedToolResultNotice(toolResultId, row.tool_name, row.content),
|
|
905
|
+
};
|
|
906
|
+
});
|
|
907
|
+
if (rowChanged) {
|
|
908
|
+
msg.content = JSON.stringify(nextRows);
|
|
909
|
+
// Critical: historical messages may still carry full-fidelity
|
|
910
|
+
// `_richToolResults`. If we keep it, convertMessage will prefer that
|
|
911
|
+
// path and bypass truncated `content`, causing token growth to remain.
|
|
912
|
+
if (msg.metadata && typeof msg.metadata === "object") {
|
|
913
|
+
const meta = msg.metadata as Record<string, unknown>;
|
|
914
|
+
if ("_richToolResults" in meta) {
|
|
915
|
+
delete meta._richToolResults;
|
|
916
|
+
}
|
|
917
|
+
}
|
|
918
|
+
changed = true;
|
|
919
|
+
}
|
|
920
|
+
}
|
|
921
|
+
return { changed, truncatedCount, archivedCount, omittedChars };
|
|
922
|
+
}
|
|
923
|
+
|
|
924
|
+
private shouldPreserveSkillToolResult(row: {
|
|
925
|
+
tool_use_id: string;
|
|
926
|
+
tool_name: string;
|
|
927
|
+
content: string;
|
|
928
|
+
}): boolean {
|
|
929
|
+
if (row.tool_name.startsWith("todo_")) {
|
|
930
|
+
return true;
|
|
931
|
+
}
|
|
932
|
+
if (row.tool_name !== "activate_skill" && row.tool_name !== "deactivate_skill") {
|
|
933
|
+
return false;
|
|
934
|
+
}
|
|
935
|
+
const content = row.content.trim();
|
|
936
|
+
if (content.startsWith("Tool error:")) {
|
|
937
|
+
return false;
|
|
938
|
+
}
|
|
939
|
+
try {
|
|
940
|
+
const parsed = JSON.parse(content) as Record<string, unknown>;
|
|
941
|
+
const skill =
|
|
942
|
+
typeof parsed.skill === "string"
|
|
943
|
+
? parsed.skill
|
|
944
|
+
: undefined;
|
|
945
|
+
if (skill && this.activeSkillNames.has(skill)) {
|
|
946
|
+
return true;
|
|
947
|
+
}
|
|
948
|
+
const activeSkills = Array.isArray(parsed.activeSkills)
|
|
949
|
+
? parsed.activeSkills.filter((v): v is string => typeof v === "string")
|
|
950
|
+
: [];
|
|
951
|
+
for (const name of activeSkills) {
|
|
952
|
+
if (this.activeSkillNames.has(name)) {
|
|
953
|
+
return true;
|
|
954
|
+
}
|
|
955
|
+
}
|
|
956
|
+
} catch {
|
|
957
|
+
// Non-JSON tool content should not block truncation.
|
|
958
|
+
}
|
|
959
|
+
return false;
|
|
960
|
+
}
|
|
961
|
+
|
|
694
962
|
async getTodos(conversationId: string): Promise<TodoItem[]> {
|
|
695
963
|
if (!this.todoStore) return [];
|
|
696
964
|
return this.todoStore.get(conversationId);
|
|
@@ -1475,7 +1743,7 @@ export class AgentHarness {
|
|
|
1475
1743
|
let agent = this.parsedAgent as ParsedAgent;
|
|
1476
1744
|
const runId = `run_${randomUUID()}`;
|
|
1477
1745
|
const start = now();
|
|
1478
|
-
const maxSteps = agent.frontmatter.limits?.maxSteps ??
|
|
1746
|
+
const maxSteps = agent.frontmatter.limits?.maxSteps ?? 20;
|
|
1479
1747
|
const configuredTimeout = agent.frontmatter.limits?.timeout;
|
|
1480
1748
|
const timeoutMs = this.environment === "development" && configuredTimeout == null
|
|
1481
1749
|
? 0 // no hard timeout in development unless explicitly configured
|
|
@@ -1485,6 +1753,29 @@ export class AgentHarness {
|
|
|
1485
1753
|
? 0
|
|
1486
1754
|
: platformMaxDurationSec * 800;
|
|
1487
1755
|
const messages: Message[] = [...(input.messages ?? [])];
|
|
1756
|
+
const conversationId = input.conversationId ?? "__default__";
|
|
1757
|
+
this.seedToolResultArchive(conversationId, input.parameters);
|
|
1758
|
+
const truncationSummary = this.truncateHistoricalToolResults(messages, conversationId);
|
|
1759
|
+
if (truncationSummary.changed) {
|
|
1760
|
+
console.info(
|
|
1761
|
+
`[poncho][cost] Truncated ${truncationSummary.truncatedCount} historical tool result(s) ` +
|
|
1762
|
+
`(archived_new=${truncationSummary.archivedCount}, omitted_chars=${truncationSummary.omittedChars}) ` +
|
|
1763
|
+
`for conversation="${conversationId}"`,
|
|
1764
|
+
);
|
|
1765
|
+
}
|
|
1766
|
+
const hasFullToolResults = hasUntruncatedToolResults(messages);
|
|
1767
|
+
const enablePromptCache = !hasFullToolResults;
|
|
1768
|
+
if (!enablePromptCache) {
|
|
1769
|
+
console.info(
|
|
1770
|
+
`[poncho][cost] Prompt cache write disabled for run "${runId}" ` +
|
|
1771
|
+
`(untruncated tool results present in history).`,
|
|
1772
|
+
);
|
|
1773
|
+
} else {
|
|
1774
|
+
console.info(
|
|
1775
|
+
`[poncho][cost] Prompt cache write enabled for run "${runId}" ` +
|
|
1776
|
+
`(history has no untruncated tool results).`,
|
|
1777
|
+
);
|
|
1778
|
+
}
|
|
1488
1779
|
const inputMessageCount = messages.length;
|
|
1489
1780
|
const events: AgentEvent[] = [];
|
|
1490
1781
|
|
|
@@ -1583,7 +1874,6 @@ ${boundedMainMemory.trim()}`
|
|
|
1583
1874
|
profileDir: string;
|
|
1584
1875
|
isLaunched: boolean }
|
|
1585
1876
|
| undefined;
|
|
1586
|
-
const conversationId = input.conversationId ?? "__default__";
|
|
1587
1877
|
if (browserSession) {
|
|
1588
1878
|
browserCleanups.push(
|
|
1589
1879
|
browserSession.onFrame(conversationId, (frame) => {
|
|
@@ -1655,6 +1945,7 @@ ${boundedMainMemory.trim()}`
|
|
|
1655
1945
|
let totalInputTokens = 0;
|
|
1656
1946
|
let totalOutputTokens = 0;
|
|
1657
1947
|
let totalCachedTokens = 0;
|
|
1948
|
+
let totalCacheWriteTokens = 0;
|
|
1658
1949
|
let transientStepRetryCount = 0;
|
|
1659
1950
|
let latestContextTokens = 0;
|
|
1660
1951
|
let toolOutputEstimateSinceModel = 0;
|
|
@@ -1684,7 +1975,12 @@ ${boundedMainMemory.trim()}`
|
|
|
1684
1975
|
status: "completed",
|
|
1685
1976
|
response: responseText,
|
|
1686
1977
|
steps: step - 1,
|
|
1687
|
-
tokens: {
|
|
1978
|
+
tokens: {
|
|
1979
|
+
input: totalInputTokens,
|
|
1980
|
+
output: totalOutputTokens,
|
|
1981
|
+
cached: totalCachedTokens,
|
|
1982
|
+
cacheWrite: totalCacheWriteTokens,
|
|
1983
|
+
},
|
|
1688
1984
|
duration: now() - start,
|
|
1689
1985
|
continuation: true,
|
|
1690
1986
|
continuationMessages: [...messages],
|
|
@@ -1698,7 +1994,6 @@ ${boundedMainMemory.trim()}`
|
|
|
1698
1994
|
|
|
1699
1995
|
const stepStart = now();
|
|
1700
1996
|
yield pushEvent({ type: "step:started", step });
|
|
1701
|
-
yield pushEvent({ type: "model:request", tokens: 0 });
|
|
1702
1997
|
|
|
1703
1998
|
const dispatcherTools = this.dispatcher.list();
|
|
1704
1999
|
const exposedToolNames = new Map<string, string>();
|
|
@@ -1720,6 +2015,15 @@ ${boundedMainMemory.trim()}`
|
|
|
1720
2015
|
inputSchema: jsonSchemaToZod(tool.inputSchema),
|
|
1721
2016
|
};
|
|
1722
2017
|
}
|
|
2018
|
+
const toolDefsJsonForEstimate = JSON.stringify(
|
|
2019
|
+
dispatcherTools.map((t) => ({
|
|
2020
|
+
name: t.name,
|
|
2021
|
+
description: t.description,
|
|
2022
|
+
inputSchema: t.inputSchema,
|
|
2023
|
+
})),
|
|
2024
|
+
);
|
|
2025
|
+
const requestTokenEstimate = estimateTotalTokens(integrityPrompt, messages, toolDefsJsonForEstimate);
|
|
2026
|
+
yield pushEvent({ type: "model:request", tokens: requestTokenEstimate });
|
|
1723
2027
|
|
|
1724
2028
|
// Convert messages to ModelMessage format
|
|
1725
2029
|
const convertMessage = async (msg: Message): Promise<ModelMessage[]> => {
|
|
@@ -1956,20 +2260,11 @@ ${boundedMainMemory.trim()}`
|
|
|
1956
2260
|
}
|
|
1957
2261
|
const modelInstance = this.modelProvider(modelName);
|
|
1958
2262
|
|
|
1959
|
-
// --- Auto-compaction
|
|
1960
|
-
//
|
|
1961
|
-
// (JSON-stringified tool_calls / tool results) that must not leak
|
|
1962
|
-
// into the conversation store via compactedMessages.
|
|
2263
|
+
// --- Auto-compaction ---
|
|
2264
|
+
// Re-check every N steps to curb runaway context growth in longer runs.
|
|
1963
2265
|
const compactionConfig = resolveCompactionConfig(agent.frontmatter.compaction);
|
|
1964
|
-
if (compactionConfig.enabled && step === 1) {
|
|
1965
|
-
const
|
|
1966
|
-
dispatcherTools.map((t) => ({
|
|
1967
|
-
name: t.name,
|
|
1968
|
-
description: t.description,
|
|
1969
|
-
inputSchema: t.inputSchema,
|
|
1970
|
-
})),
|
|
1971
|
-
);
|
|
1972
|
-
const estimated = estimateTotalTokens(integrityPrompt, messages, toolDefsJson);
|
|
2266
|
+
if (compactionConfig.enabled && (step === 1 || step % COMPACTION_CHECK_INTERVAL_STEPS === 0)) {
|
|
2267
|
+
const estimated = estimateTotalTokens(integrityPrompt, messages, toolDefsJsonForEstimate);
|
|
1973
2268
|
const lastReportedInput = totalInputTokens > 0 ? totalInputTokens : 0;
|
|
1974
2269
|
const effectiveTokens = Math.max(estimated, lastReportedInput);
|
|
1975
2270
|
|
|
@@ -1984,14 +2279,17 @@ ${boundedMainMemory.trim()}`
|
|
|
1984
2279
|
if (compactResult.compacted) {
|
|
1985
2280
|
messages.length = 0;
|
|
1986
2281
|
messages.push(...compactResult.messages);
|
|
1987
|
-
|
|
1988
|
-
|
|
1989
|
-
|
|
1990
|
-
|
|
1991
|
-
|
|
1992
|
-
emittedMessages.
|
|
2282
|
+
let emittedMessages: Message[] | undefined;
|
|
2283
|
+
if (step === 1) {
|
|
2284
|
+
// Strip the trailing user task message so runners can use
|
|
2285
|
+
// compactedMessages directly as historyMessages without
|
|
2286
|
+
// duplicating the user turn they append themselves.
|
|
2287
|
+
emittedMessages = [...compactResult.messages];
|
|
2288
|
+
if (emittedMessages.length > 0 && emittedMessages[emittedMessages.length - 1].role === "user") {
|
|
2289
|
+
emittedMessages.pop();
|
|
2290
|
+
}
|
|
1993
2291
|
}
|
|
1994
|
-
const tokensAfterCompaction = estimateTotalTokens(integrityPrompt, messages,
|
|
2292
|
+
const tokensAfterCompaction = estimateTotalTokens(integrityPrompt, messages, toolDefsJsonForEstimate);
|
|
1995
2293
|
latestContextTokens = tokensAfterCompaction;
|
|
1996
2294
|
toolOutputEstimateSinceModel = 0;
|
|
1997
2295
|
yield pushEvent({
|
|
@@ -2024,7 +2322,9 @@ ${boundedMainMemory.trim()}`
|
|
|
2024
2322
|
|
|
2025
2323
|
const temperature = agent.frontmatter.model?.temperature ?? 0.2;
|
|
2026
2324
|
const maxTokens = agent.frontmatter.model?.maxTokens;
|
|
2027
|
-
const cachedMessages =
|
|
2325
|
+
const cachedMessages = enablePromptCache
|
|
2326
|
+
? addPromptCacheBreakpoints(coreMessages, modelInstance)
|
|
2327
|
+
: coreMessages;
|
|
2028
2328
|
|
|
2029
2329
|
const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
|
|
2030
2330
|
|
|
@@ -2153,7 +2453,12 @@ ${boundedMainMemory.trim()}`
|
|
|
2153
2453
|
status: "completed",
|
|
2154
2454
|
response: responseText + fullText,
|
|
2155
2455
|
steps: step,
|
|
2156
|
-
tokens: {
|
|
2456
|
+
tokens: {
|
|
2457
|
+
input: totalInputTokens,
|
|
2458
|
+
output: totalOutputTokens,
|
|
2459
|
+
cached: totalCachedTokens,
|
|
2460
|
+
cacheWrite: totalCacheWriteTokens,
|
|
2461
|
+
},
|
|
2157
2462
|
duration: now() - start,
|
|
2158
2463
|
continuation: true,
|
|
2159
2464
|
continuationMessages: [...messages],
|
|
@@ -2185,7 +2490,12 @@ ${boundedMainMemory.trim()}`
|
|
|
2185
2490
|
status: "completed",
|
|
2186
2491
|
response: responseText + fullText,
|
|
2187
2492
|
steps: step,
|
|
2188
|
-
tokens: {
|
|
2493
|
+
tokens: {
|
|
2494
|
+
input: totalInputTokens,
|
|
2495
|
+
output: totalOutputTokens,
|
|
2496
|
+
cached: totalCachedTokens,
|
|
2497
|
+
cacheWrite: totalCacheWriteTokens,
|
|
2498
|
+
},
|
|
2189
2499
|
duration: now() - start,
|
|
2190
2500
|
continuation: true,
|
|
2191
2501
|
continuationMessages: [...messages],
|
|
@@ -2233,11 +2543,21 @@ ${boundedMainMemory.trim()}`
|
|
|
2233
2543
|
const toolCallsResult = await result.toolCalls;
|
|
2234
2544
|
|
|
2235
2545
|
// Update token usage
|
|
2236
|
-
const
|
|
2546
|
+
const details = (usage.inputTokenDetails ?? {}) as Record<string, unknown>;
|
|
2547
|
+
const stepCachedTokens = typeof details.cacheReadTokens === "number" ? details.cacheReadTokens : 0;
|
|
2548
|
+
const stepCacheWriteTokens =
|
|
2549
|
+
typeof details.cacheWriteTokens === "number"
|
|
2550
|
+
? details.cacheWriteTokens
|
|
2551
|
+
: typeof details.cacheCreationTokens === "number"
|
|
2552
|
+
? details.cacheCreationTokens
|
|
2553
|
+
: typeof details.cacheCreationInputTokens === "number"
|
|
2554
|
+
? details.cacheCreationInputTokens
|
|
2555
|
+
: 0;
|
|
2237
2556
|
const stepInputTokens = usage.inputTokens ?? 0;
|
|
2238
2557
|
totalInputTokens += stepInputTokens;
|
|
2239
2558
|
totalOutputTokens += usage.outputTokens ?? 0;
|
|
2240
2559
|
totalCachedTokens += stepCachedTokens;
|
|
2560
|
+
totalCacheWriteTokens += stepCacheWriteTokens;
|
|
2241
2561
|
latestContextTokens = stepInputTokens;
|
|
2242
2562
|
toolOutputEstimateSinceModel = 0;
|
|
2243
2563
|
|
|
@@ -2247,8 +2567,15 @@ ${boundedMainMemory.trim()}`
|
|
|
2247
2567
|
input: stepInputTokens,
|
|
2248
2568
|
output: usage.outputTokens ?? 0,
|
|
2249
2569
|
cached: stepCachedTokens,
|
|
2570
|
+
cacheWrite: stepCacheWriteTokens,
|
|
2250
2571
|
},
|
|
2251
2572
|
});
|
|
2573
|
+
console.info(
|
|
2574
|
+
`[poncho][cost] model="${modelName}" step=${step} ` +
|
|
2575
|
+
`input=${stepInputTokens} output=${usage.outputTokens ?? 0} ` +
|
|
2576
|
+
`cached=${stepCachedTokens} cacheWrite=${stepCacheWriteTokens} ` +
|
|
2577
|
+
`totals(input=${totalInputTokens}, output=${totalOutputTokens}, cached=${totalCachedTokens}, cacheWrite=${totalCacheWriteTokens})`,
|
|
2578
|
+
);
|
|
2252
2579
|
|
|
2253
2580
|
// Extract tool calls
|
|
2254
2581
|
const toolCalls = toolCallsResult.map((tc) => ({
|
|
@@ -2302,6 +2629,7 @@ ${boundedMainMemory.trim()}`
|
|
|
2302
2629
|
input: totalInputTokens,
|
|
2303
2630
|
output: totalOutputTokens,
|
|
2304
2631
|
cached: totalCachedTokens,
|
|
2632
|
+
cacheWrite: totalCacheWriteTokens,
|
|
2305
2633
|
},
|
|
2306
2634
|
duration: now() - start,
|
|
2307
2635
|
contextTokens: latestContextTokens + toolOutputEstimateSinceModel,
|
|
@@ -2505,7 +2833,12 @@ ${boundedMainMemory.trim()}`
|
|
|
2505
2833
|
status: "completed",
|
|
2506
2834
|
response: responseText + fullText,
|
|
2507
2835
|
steps: step,
|
|
2508
|
-
tokens: {
|
|
2836
|
+
tokens: {
|
|
2837
|
+
input: totalInputTokens,
|
|
2838
|
+
output: totalOutputTokens,
|
|
2839
|
+
cached: totalCachedTokens,
|
|
2840
|
+
cacheWrite: totalCacheWriteTokens,
|
|
2841
|
+
},
|
|
2509
2842
|
duration: now() - start,
|
|
2510
2843
|
continuation: true,
|
|
2511
2844
|
continuationMessages: [...messages],
|
|
@@ -2538,6 +2871,20 @@ ${boundedMainMemory.trim()}`
|
|
|
2538
2871
|
tool_name: result.tool,
|
|
2539
2872
|
content: `Tool error: ${result.error}`,
|
|
2540
2873
|
});
|
|
2874
|
+
{
|
|
2875
|
+
const archive = this.archivedToolResultsByConversation.get(conversationId);
|
|
2876
|
+
if (archive) {
|
|
2877
|
+
archive[result.callId] = {
|
|
2878
|
+
toolResultId: result.callId,
|
|
2879
|
+
conversationId,
|
|
2880
|
+
toolName: result.tool,
|
|
2881
|
+
toolCallId: result.callId,
|
|
2882
|
+
createdAt: now(),
|
|
2883
|
+
sizeBytes: Buffer.byteLength(`Tool error: ${result.error}`, "utf8"),
|
|
2884
|
+
payload: `Tool error: ${result.error}`,
|
|
2885
|
+
};
|
|
2886
|
+
}
|
|
2887
|
+
}
|
|
2541
2888
|
richToolResults.push({
|
|
2542
2889
|
type: "tool-result",
|
|
2543
2890
|
toolCallId: result.callId,
|
|
@@ -2564,6 +2911,21 @@ ${boundedMainMemory.trim()}`
|
|
|
2564
2911
|
tool_name: result.tool,
|
|
2565
2912
|
content: JSON.stringify(strippedOutput ?? null),
|
|
2566
2913
|
});
|
|
2914
|
+
{
|
|
2915
|
+
const archive = this.archivedToolResultsByConversation.get(conversationId);
|
|
2916
|
+
if (archive) {
|
|
2917
|
+
const payload = JSON.stringify(result.output ?? null);
|
|
2918
|
+
archive[result.callId] = {
|
|
2919
|
+
toolResultId: result.callId,
|
|
2920
|
+
conversationId,
|
|
2921
|
+
toolName: result.tool,
|
|
2922
|
+
toolCallId: result.callId,
|
|
2923
|
+
createdAt: now(),
|
|
2924
|
+
sizeBytes: Buffer.byteLength(payload, "utf8"),
|
|
2925
|
+
payload,
|
|
2926
|
+
};
|
|
2927
|
+
}
|
|
2928
|
+
}
|
|
2567
2929
|
|
|
2568
2930
|
if (mediaItems.length > 0) {
|
|
2569
2931
|
richToolResults.push({
|
|
@@ -2604,9 +2966,15 @@ ${boundedMainMemory.trim()}`
|
|
|
2604
2966
|
messages.push({
|
|
2605
2967
|
role: "assistant",
|
|
2606
2968
|
content: assistantContent,
|
|
2607
|
-
metadata: { timestamp: now(), id: randomUUID(), step },
|
|
2969
|
+
metadata: { timestamp: now(), id: randomUUID(), step, runId },
|
|
2608
2970
|
});
|
|
2609
|
-
const toolMsgMeta: Record<string, unknown> = {
|
|
2971
|
+
const toolMsgMeta: Record<string, unknown> = {
|
|
2972
|
+
timestamp: now(),
|
|
2973
|
+
id: randomUUID(),
|
|
2974
|
+
step,
|
|
2975
|
+
runId,
|
|
2976
|
+
_richToolResults: richToolResults,
|
|
2977
|
+
};
|
|
2610
2978
|
messages.push({
|
|
2611
2979
|
role: "tool",
|
|
2612
2980
|
content: JSON.stringify(toolResultsForModel),
|
|
@@ -2621,7 +2989,12 @@ ${boundedMainMemory.trim()}`
|
|
|
2621
2989
|
status: "completed",
|
|
2622
2990
|
response: responseText + fullText,
|
|
2623
2991
|
steps: step,
|
|
2624
|
-
tokens: {
|
|
2992
|
+
tokens: {
|
|
2993
|
+
input: totalInputTokens,
|
|
2994
|
+
output: totalOutputTokens,
|
|
2995
|
+
cached: totalCachedTokens,
|
|
2996
|
+
cacheWrite: totalCacheWriteTokens,
|
|
2997
|
+
},
|
|
2625
2998
|
duration: now() - start,
|
|
2626
2999
|
continuation: true,
|
|
2627
3000
|
continuationMessages: [...messages],
|
|
@@ -2689,7 +3062,12 @@ ${boundedMainMemory.trim()}`
|
|
|
2689
3062
|
status: "completed",
|
|
2690
3063
|
response: responseText,
|
|
2691
3064
|
steps: maxSteps,
|
|
2692
|
-
tokens: {
|
|
3065
|
+
tokens: {
|
|
3066
|
+
input: totalInputTokens,
|
|
3067
|
+
output: totalOutputTokens,
|
|
3068
|
+
cached: totalCachedTokens,
|
|
3069
|
+
cacheWrite: totalCacheWriteTokens,
|
|
3070
|
+
},
|
|
2693
3071
|
duration: now() - start,
|
|
2694
3072
|
continuation: true,
|
|
2695
3073
|
continuationMessages: [...messages],
|