@poncho-ai/harness 0.31.1 → 0.31.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +5 -5
- package/CHANGELOG.md +19 -0
- package/dist/index.d.ts +27 -1
- package/dist/index.js +371 -51
- package/package.json +2 -2
- package/src/compaction.ts +8 -4
- package/src/harness.ts +442 -54
- package/src/state.ts +12 -0
- package/src/telemetry.ts +4 -0
- package/.turbo/turbo-lint.log +0 -6
- package/.turbo/turbo-test.log +0 -34
package/src/harness.ts
CHANGED
|
@@ -12,7 +12,7 @@ import type {
|
|
|
12
12
|
ToolContext,
|
|
13
13
|
ToolDefinition,
|
|
14
14
|
} from "@poncho-ai/sdk";
|
|
15
|
-
import { getTextContent } from "@poncho-ai/sdk";
|
|
15
|
+
import { defineTool, getTextContent } from "@poncho-ai/sdk";
|
|
16
16
|
import type { UploadStore } from "./upload-store.js";
|
|
17
17
|
import { PONCHO_UPLOAD_SCHEME, deriveUploadKey } from "./upload-store.js";
|
|
18
18
|
import { parseAgentFile, parseAgentMarkdown, renderAgentPrompt, type ParsedAgent, type AgentFrontmatter } from "./agent-parser.js";
|
|
@@ -72,8 +72,22 @@ export interface HarnessRunOutput {
|
|
|
72
72
|
}
|
|
73
73
|
|
|
74
74
|
const now = (): number => Date.now();
|
|
75
|
-
const FIRST_CHUNK_TIMEOUT_MS =
|
|
76
|
-
const MAX_TRANSIENT_STEP_RETRIES =
|
|
75
|
+
const FIRST_CHUNK_TIMEOUT_MS = 90_000; // 90s to receive the first chunk from the model
|
|
76
|
+
const MAX_TRANSIENT_STEP_RETRIES = 1;
|
|
77
|
+
const COMPACTION_CHECK_INTERVAL_STEPS = 3;
|
|
78
|
+
const TOOL_RESULT_ARCHIVE_PARAM = "__toolResultArchive";
|
|
79
|
+
const TOOL_RESULT_TRUNCATED_PREFIX = "[TRUNCATED_TOOL_RESULT]";
|
|
80
|
+
const TOOL_RESULT_PREVIEW_CHARS = 700;
|
|
81
|
+
|
|
82
|
+
interface ArchivedToolResult {
|
|
83
|
+
toolResultId: string;
|
|
84
|
+
conversationId: string;
|
|
85
|
+
toolName: string;
|
|
86
|
+
toolCallId: string;
|
|
87
|
+
createdAt: number;
|
|
88
|
+
sizeBytes: number;
|
|
89
|
+
payload: string;
|
|
90
|
+
}
|
|
77
91
|
|
|
78
92
|
class FirstChunkTimeoutError extends Error {
|
|
79
93
|
constructor(modelName: string, timeoutMs: number) {
|
|
@@ -140,23 +154,11 @@ const isRetryableModelError = (error: unknown): boolean => {
|
|
|
140
154
|
if (error instanceof FirstChunkTimeoutError) {
|
|
141
155
|
return true;
|
|
142
156
|
}
|
|
143
|
-
if (isNoOutputGeneratedError(error)) {
|
|
144
|
-
return true;
|
|
145
|
-
}
|
|
146
157
|
const statusCode = getErrorStatusCode(error);
|
|
147
158
|
if (typeof statusCode === "number") {
|
|
148
159
|
return statusCode === 429 || statusCode >= 500;
|
|
149
160
|
}
|
|
150
|
-
|
|
151
|
-
return false;
|
|
152
|
-
}
|
|
153
|
-
const maybeMessage = "message" in error ? String(error.message ?? "").toLowerCase() : "";
|
|
154
|
-
return (
|
|
155
|
-
maybeMessage.includes("internal server error") ||
|
|
156
|
-
maybeMessage.includes("service unavailable") ||
|
|
157
|
-
maybeMessage.includes("gateway timeout") ||
|
|
158
|
-
maybeMessage.includes("rate limit")
|
|
159
|
-
);
|
|
161
|
+
return false;
|
|
160
162
|
};
|
|
161
163
|
|
|
162
164
|
const toRunError = (error: unknown): { code: string; message: string; details?: Record<string, unknown> } => {
|
|
@@ -225,6 +227,83 @@ const toProviderSafeToolName = (
|
|
|
225
227
|
return candidate;
|
|
226
228
|
};
|
|
227
229
|
|
|
230
|
+
const isToolResultRow = (value: unknown): value is {
|
|
231
|
+
tool_use_id: string;
|
|
232
|
+
tool_name: string;
|
|
233
|
+
content: string;
|
|
234
|
+
} => {
|
|
235
|
+
if (typeof value !== "object" || value === null) return false;
|
|
236
|
+
const row = value as Record<string, unknown>;
|
|
237
|
+
return (
|
|
238
|
+
typeof row.tool_use_id === "string" &&
|
|
239
|
+
typeof row.tool_name === "string" &&
|
|
240
|
+
typeof row.content === "string"
|
|
241
|
+
);
|
|
242
|
+
};
|
|
243
|
+
|
|
244
|
+
const readArchiveFromParameters = (
|
|
245
|
+
parameters: Record<string, unknown> | undefined,
|
|
246
|
+
): Record<string, ArchivedToolResult> => {
|
|
247
|
+
const raw = parameters?.[TOOL_RESULT_ARCHIVE_PARAM];
|
|
248
|
+
if (typeof raw !== "object" || raw === null) return {};
|
|
249
|
+
const out: Record<string, ArchivedToolResult> = {};
|
|
250
|
+
for (const [key, value] of Object.entries(raw)) {
|
|
251
|
+
if (typeof value !== "object" || value === null) continue;
|
|
252
|
+
const row = value as Record<string, unknown>;
|
|
253
|
+
if (
|
|
254
|
+
typeof row.toolResultId !== "string" ||
|
|
255
|
+
typeof row.conversationId !== "string" ||
|
|
256
|
+
typeof row.toolName !== "string" ||
|
|
257
|
+
typeof row.toolCallId !== "string" ||
|
|
258
|
+
typeof row.createdAt !== "number" ||
|
|
259
|
+
typeof row.sizeBytes !== "number" ||
|
|
260
|
+
typeof row.payload !== "string"
|
|
261
|
+
) {
|
|
262
|
+
continue;
|
|
263
|
+
}
|
|
264
|
+
out[key] = {
|
|
265
|
+
toolResultId: row.toolResultId,
|
|
266
|
+
conversationId: row.conversationId,
|
|
267
|
+
toolName: row.toolName,
|
|
268
|
+
toolCallId: row.toolCallId,
|
|
269
|
+
createdAt: row.createdAt,
|
|
270
|
+
sizeBytes: row.sizeBytes,
|
|
271
|
+
payload: row.payload,
|
|
272
|
+
};
|
|
273
|
+
}
|
|
274
|
+
return out;
|
|
275
|
+
};
|
|
276
|
+
|
|
277
|
+
const makeTruncatedToolResultNotice = (
|
|
278
|
+
toolResultId: string,
|
|
279
|
+
toolName: string,
|
|
280
|
+
payload: string,
|
|
281
|
+
): string => {
|
|
282
|
+
const preview = payload.slice(0, TOOL_RESULT_PREVIEW_CHARS);
|
|
283
|
+
const omittedChars = Math.max(0, payload.length - preview.length);
|
|
284
|
+
return `${TOOL_RESULT_TRUNCATED_PREFIX} id="${toolResultId}" tool="${toolName}" omittedChars=${omittedChars}\n${preview}${omittedChars > 0 ? "\n...[truncated]" : ""}`;
|
|
285
|
+
};
|
|
286
|
+
|
|
287
|
+
const hasUntruncatedToolResults = (messages: Message[]): boolean => {
|
|
288
|
+
for (const msg of messages) {
|
|
289
|
+
if (msg.role !== "tool" || typeof msg.content !== "string") continue;
|
|
290
|
+
let parsed: unknown;
|
|
291
|
+
try {
|
|
292
|
+
parsed = JSON.parse(msg.content);
|
|
293
|
+
} catch {
|
|
294
|
+
continue;
|
|
295
|
+
}
|
|
296
|
+
if (!Array.isArray(parsed)) continue;
|
|
297
|
+
for (const row of parsed) {
|
|
298
|
+
if (!isToolResultRow(row)) continue;
|
|
299
|
+
if (!row.content.startsWith(TOOL_RESULT_TRUNCATED_PREFIX)) {
|
|
300
|
+
return true;
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
return false;
|
|
305
|
+
};
|
|
306
|
+
|
|
228
307
|
const DEVELOPMENT_MODE_CONTEXT = `## Development Mode Context
|
|
229
308
|
|
|
230
309
|
You are running locally in development mode. Treat this as an editable agent workspace.
|
|
@@ -580,6 +659,7 @@ export class AgentHarness {
|
|
|
580
659
|
private agentFileFingerprint = "";
|
|
581
660
|
private mcpBridge?: LocalMcpBridge;
|
|
582
661
|
private subagentManager?: SubagentManager;
|
|
662
|
+
private readonly archivedToolResultsByConversation = new Map<string, Record<string, ArchivedToolResult>>();
|
|
583
663
|
|
|
584
664
|
private resolveToolAccess(toolName: string): ToolAccess {
|
|
585
665
|
const tools = this.loadedConfig?.tools;
|
|
@@ -662,6 +742,60 @@ export class AgentHarness {
|
|
|
662
742
|
if (this.environment === "development" && this.isToolEnabled("poncho_docs")) {
|
|
663
743
|
this.registerIfMissing(ponchoDocsTool);
|
|
664
744
|
}
|
|
745
|
+
if (this.isToolEnabled("get_tool_result_by_id")) {
|
|
746
|
+
this.registerIfMissing(this.createGetToolResultByIdTool());
|
|
747
|
+
}
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
private createGetToolResultByIdTool(): ToolDefinition {
|
|
751
|
+
return defineTool({
|
|
752
|
+
name: "get_tool_result_by_id",
|
|
753
|
+
description:
|
|
754
|
+
"Retrieve a previously archived full tool result by id for the current conversation. " +
|
|
755
|
+
"Use this when older tool outputs were truncated in prompt history.",
|
|
756
|
+
inputSchema: {
|
|
757
|
+
type: "object",
|
|
758
|
+
properties: {
|
|
759
|
+
toolResultId: { type: "string", description: "Archived tool result id to retrieve" },
|
|
760
|
+
offset: { type: "number", description: "Optional character offset for paging large payloads" },
|
|
761
|
+
limit: { type: "number", description: "Optional maximum characters to return (default 6000, max 20000)" },
|
|
762
|
+
},
|
|
763
|
+
required: ["toolResultId"],
|
|
764
|
+
additionalProperties: false,
|
|
765
|
+
},
|
|
766
|
+
handler: async (input, context) => {
|
|
767
|
+
const conversationId = context.conversationId ?? "__default__";
|
|
768
|
+
const archive = this.archivedToolResultsByConversation.get(conversationId) ?? {};
|
|
769
|
+
const toolResultId = typeof input.toolResultId === "string" ? input.toolResultId : "";
|
|
770
|
+
const record = archive[toolResultId];
|
|
771
|
+
if (!record) {
|
|
772
|
+
console.info(
|
|
773
|
+
`[poncho][cost] Archived tool result lookup miss: id="${toolResultId}" conversation="${conversationId}"`,
|
|
774
|
+
);
|
|
775
|
+
return {
|
|
776
|
+
error: `No archived tool result found for id "${toolResultId}" in this conversation.`,
|
|
777
|
+
};
|
|
778
|
+
}
|
|
779
|
+
const offset = Math.max(0, Number(input.offset) || 0);
|
|
780
|
+
const limit = Math.min(Math.max(Number(input.limit) || 6000, 1), 20_000);
|
|
781
|
+
const end = Math.min(record.payload.length, offset + limit);
|
|
782
|
+
const chunk = record.payload.slice(offset, end);
|
|
783
|
+
console.info(
|
|
784
|
+
`[poncho][cost] Archived tool result lookup hit: id="${toolResultId}" conversation="${conversationId}" ` +
|
|
785
|
+
`offset=${offset} returned=${chunk.length} total=${record.payload.length}`,
|
|
786
|
+
);
|
|
787
|
+
return {
|
|
788
|
+
toolResultId: record.toolResultId,
|
|
789
|
+
toolName: record.toolName,
|
|
790
|
+
toolCallId: record.toolCallId,
|
|
791
|
+
totalChars: record.payload.length,
|
|
792
|
+
offset,
|
|
793
|
+
returnedChars: chunk.length,
|
|
794
|
+
hasMore: end < record.payload.length,
|
|
795
|
+
payload: chunk,
|
|
796
|
+
};
|
|
797
|
+
},
|
|
798
|
+
});
|
|
665
799
|
}
|
|
666
800
|
|
|
667
801
|
private shouldEnableWriteTool(): boolean {
|
|
@@ -691,6 +825,150 @@ export class AgentHarness {
|
|
|
691
825
|
return this.parsedAgent?.frontmatter;
|
|
692
826
|
}
|
|
693
827
|
|
|
828
|
+
getToolResultArchive(conversationId: string): Record<string, ArchivedToolResult> {
|
|
829
|
+
const archive = this.archivedToolResultsByConversation.get(conversationId);
|
|
830
|
+
return archive ? { ...archive } : {};
|
|
831
|
+
}
|
|
832
|
+
|
|
833
|
+
private seedToolResultArchive(
|
|
834
|
+
conversationId: string,
|
|
835
|
+
parameters: Record<string, unknown> | undefined,
|
|
836
|
+
): Record<string, ArchivedToolResult> {
|
|
837
|
+
const seeded = readArchiveFromParameters(parameters);
|
|
838
|
+
const existing = this.archivedToolResultsByConversation.get(conversationId) ?? {};
|
|
839
|
+
const merged = { ...existing, ...seeded };
|
|
840
|
+
this.archivedToolResultsByConversation.set(conversationId, merged);
|
|
841
|
+
return merged;
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
private truncateHistoricalToolResults(
|
|
845
|
+
messages: Message[],
|
|
846
|
+
conversationId: string,
|
|
847
|
+
): { changed: boolean; truncatedCount: number; archivedCount: number; omittedChars: number } {
|
|
848
|
+
let latestRunId: string | undefined;
|
|
849
|
+
let latestToolMessageIndex = -1;
|
|
850
|
+
for (let i = messages.length - 1; i >= 0; i -= 1) {
|
|
851
|
+
const msg = messages[i]!;
|
|
852
|
+
if (latestToolMessageIndex === -1 && msg.role === "tool" && typeof msg.content === "string") {
|
|
853
|
+
latestToolMessageIndex = i;
|
|
854
|
+
}
|
|
855
|
+
const meta = msg.metadata as Record<string, unknown> | undefined;
|
|
856
|
+
const runId = typeof meta?.runId === "string" ? meta.runId : undefined;
|
|
857
|
+
if (runId) {
|
|
858
|
+
latestRunId = runId;
|
|
859
|
+
break;
|
|
860
|
+
}
|
|
861
|
+
}
|
|
862
|
+
if (!latestRunId && latestToolMessageIndex === -1) {
|
|
863
|
+
return { changed: false, truncatedCount: 0, archivedCount: 0, omittedChars: 0 };
|
|
864
|
+
}
|
|
865
|
+
const archive = this.archivedToolResultsByConversation.get(conversationId) ?? {};
|
|
866
|
+
this.archivedToolResultsByConversation.set(conversationId, archive);
|
|
867
|
+
let changed = false;
|
|
868
|
+
let truncatedCount = 0;
|
|
869
|
+
let archivedCount = 0;
|
|
870
|
+
let omittedChars = 0;
|
|
871
|
+
|
|
872
|
+
for (let index = 0; index < messages.length; index += 1) {
|
|
873
|
+
const msg = messages[index]!;
|
|
874
|
+
if (msg.role !== "tool" || typeof msg.content !== "string") continue;
|
|
875
|
+
const meta = msg.metadata as Record<string, unknown> | undefined;
|
|
876
|
+
const runId = typeof meta?.runId === "string" ? meta.runId : undefined;
|
|
877
|
+
if (latestRunId) {
|
|
878
|
+
if (runId === latestRunId) continue;
|
|
879
|
+
} else if (index === latestToolMessageIndex) {
|
|
880
|
+
// Legacy fallback for pre-runId conversations: keep newest tool turn intact.
|
|
881
|
+
continue;
|
|
882
|
+
}
|
|
883
|
+
let parsed: unknown;
|
|
884
|
+
try {
|
|
885
|
+
parsed = JSON.parse(msg.content);
|
|
886
|
+
} catch {
|
|
887
|
+
continue;
|
|
888
|
+
}
|
|
889
|
+
if (!Array.isArray(parsed)) continue;
|
|
890
|
+
let rowChanged = false;
|
|
891
|
+
const nextRows = parsed.map((row) => {
|
|
892
|
+
if (!isToolResultRow(row)) return row;
|
|
893
|
+
if (row.content.startsWith(TOOL_RESULT_TRUNCATED_PREFIX)) return row;
|
|
894
|
+
if (this.shouldPreserveSkillToolResult(row)) return row;
|
|
895
|
+
const toolResultId = row.tool_use_id;
|
|
896
|
+
if (!archive[toolResultId]) {
|
|
897
|
+
archive[toolResultId] = {
|
|
898
|
+
toolResultId,
|
|
899
|
+
conversationId,
|
|
900
|
+
toolName: row.tool_name,
|
|
901
|
+
toolCallId: row.tool_use_id,
|
|
902
|
+
createdAt: now(),
|
|
903
|
+
sizeBytes: Buffer.byteLength(row.content, "utf8"),
|
|
904
|
+
payload: row.content,
|
|
905
|
+
};
|
|
906
|
+
archivedCount += 1;
|
|
907
|
+
}
|
|
908
|
+
const omitted = Math.max(0, row.content.length - TOOL_RESULT_PREVIEW_CHARS);
|
|
909
|
+
omittedChars += omitted;
|
|
910
|
+
truncatedCount += 1;
|
|
911
|
+
rowChanged = true;
|
|
912
|
+
return {
|
|
913
|
+
...row,
|
|
914
|
+
content: makeTruncatedToolResultNotice(toolResultId, row.tool_name, row.content),
|
|
915
|
+
};
|
|
916
|
+
});
|
|
917
|
+
if (rowChanged) {
|
|
918
|
+
msg.content = JSON.stringify(nextRows);
|
|
919
|
+
// Critical: historical messages may still carry full-fidelity
|
|
920
|
+
// `_richToolResults`. If we keep it, convertMessage will prefer that
|
|
921
|
+
// path and bypass truncated `content`, causing token growth to remain.
|
|
922
|
+
if (msg.metadata && typeof msg.metadata === "object") {
|
|
923
|
+
const meta = msg.metadata as Record<string, unknown>;
|
|
924
|
+
if ("_richToolResults" in meta) {
|
|
925
|
+
delete meta._richToolResults;
|
|
926
|
+
}
|
|
927
|
+
}
|
|
928
|
+
changed = true;
|
|
929
|
+
}
|
|
930
|
+
}
|
|
931
|
+
return { changed, truncatedCount, archivedCount, omittedChars };
|
|
932
|
+
}
|
|
933
|
+
|
|
934
|
+
private shouldPreserveSkillToolResult(row: {
|
|
935
|
+
tool_use_id: string;
|
|
936
|
+
tool_name: string;
|
|
937
|
+
content: string;
|
|
938
|
+
}): boolean {
|
|
939
|
+
if (row.tool_name.startsWith("todo_")) {
|
|
940
|
+
return true;
|
|
941
|
+
}
|
|
942
|
+
if (row.tool_name !== "activate_skill" && row.tool_name !== "deactivate_skill") {
|
|
943
|
+
return false;
|
|
944
|
+
}
|
|
945
|
+
const content = row.content.trim();
|
|
946
|
+
if (content.startsWith("Tool error:")) {
|
|
947
|
+
return false;
|
|
948
|
+
}
|
|
949
|
+
try {
|
|
950
|
+
const parsed = JSON.parse(content) as Record<string, unknown>;
|
|
951
|
+
const skill =
|
|
952
|
+
typeof parsed.skill === "string"
|
|
953
|
+
? parsed.skill
|
|
954
|
+
: undefined;
|
|
955
|
+
if (skill && this.activeSkillNames.has(skill)) {
|
|
956
|
+
return true;
|
|
957
|
+
}
|
|
958
|
+
const activeSkills = Array.isArray(parsed.activeSkills)
|
|
959
|
+
? parsed.activeSkills.filter((v): v is string => typeof v === "string")
|
|
960
|
+
: [];
|
|
961
|
+
for (const name of activeSkills) {
|
|
962
|
+
if (this.activeSkillNames.has(name)) {
|
|
963
|
+
return true;
|
|
964
|
+
}
|
|
965
|
+
}
|
|
966
|
+
} catch {
|
|
967
|
+
// Non-JSON tool content should not block truncation.
|
|
968
|
+
}
|
|
969
|
+
return false;
|
|
970
|
+
}
|
|
971
|
+
|
|
694
972
|
async getTodos(conversationId: string): Promise<TodoItem[]> {
|
|
695
973
|
if (!this.todoStore) return [];
|
|
696
974
|
return this.todoStore.get(conversationId);
|
|
@@ -1475,7 +1753,7 @@ export class AgentHarness {
|
|
|
1475
1753
|
let agent = this.parsedAgent as ParsedAgent;
|
|
1476
1754
|
const runId = `run_${randomUUID()}`;
|
|
1477
1755
|
const start = now();
|
|
1478
|
-
const maxSteps = agent.frontmatter.limits?.maxSteps ??
|
|
1756
|
+
const maxSteps = agent.frontmatter.limits?.maxSteps ?? 20;
|
|
1479
1757
|
const configuredTimeout = agent.frontmatter.limits?.timeout;
|
|
1480
1758
|
const timeoutMs = this.environment === "development" && configuredTimeout == null
|
|
1481
1759
|
? 0 // no hard timeout in development unless explicitly configured
|
|
@@ -1485,6 +1763,29 @@ export class AgentHarness {
|
|
|
1485
1763
|
? 0
|
|
1486
1764
|
: platformMaxDurationSec * 800;
|
|
1487
1765
|
const messages: Message[] = [...(input.messages ?? [])];
|
|
1766
|
+
const conversationId = input.conversationId ?? "__default__";
|
|
1767
|
+
this.seedToolResultArchive(conversationId, input.parameters);
|
|
1768
|
+
const truncationSummary = this.truncateHistoricalToolResults(messages, conversationId);
|
|
1769
|
+
if (truncationSummary.changed) {
|
|
1770
|
+
console.info(
|
|
1771
|
+
`[poncho][cost] Truncated ${truncationSummary.truncatedCount} historical tool result(s) ` +
|
|
1772
|
+
`(archived_new=${truncationSummary.archivedCount}, omitted_chars=${truncationSummary.omittedChars}) ` +
|
|
1773
|
+
`for conversation="${conversationId}"`,
|
|
1774
|
+
);
|
|
1775
|
+
}
|
|
1776
|
+
const hasFullToolResults = hasUntruncatedToolResults(messages);
|
|
1777
|
+
const enablePromptCache = !hasFullToolResults;
|
|
1778
|
+
if (!enablePromptCache) {
|
|
1779
|
+
console.info(
|
|
1780
|
+
`[poncho][cost] Prompt cache write disabled for run "${runId}" ` +
|
|
1781
|
+
`(untruncated tool results present in history).`,
|
|
1782
|
+
);
|
|
1783
|
+
} else {
|
|
1784
|
+
console.info(
|
|
1785
|
+
`[poncho][cost] Prompt cache write enabled for run "${runId}" ` +
|
|
1786
|
+
`(history has no untruncated tool results).`,
|
|
1787
|
+
);
|
|
1788
|
+
}
|
|
1488
1789
|
const inputMessageCount = messages.length;
|
|
1489
1790
|
const events: AgentEvent[] = [];
|
|
1490
1791
|
|
|
@@ -1583,7 +1884,6 @@ ${boundedMainMemory.trim()}`
|
|
|
1583
1884
|
profileDir: string;
|
|
1584
1885
|
isLaunched: boolean }
|
|
1585
1886
|
| undefined;
|
|
1586
|
-
const conversationId = input.conversationId ?? "__default__";
|
|
1587
1887
|
if (browserSession) {
|
|
1588
1888
|
browserCleanups.push(
|
|
1589
1889
|
browserSession.onFrame(conversationId, (frame) => {
|
|
@@ -1655,6 +1955,7 @@ ${boundedMainMemory.trim()}`
|
|
|
1655
1955
|
let totalInputTokens = 0;
|
|
1656
1956
|
let totalOutputTokens = 0;
|
|
1657
1957
|
let totalCachedTokens = 0;
|
|
1958
|
+
let totalCacheWriteTokens = 0;
|
|
1658
1959
|
let transientStepRetryCount = 0;
|
|
1659
1960
|
let latestContextTokens = 0;
|
|
1660
1961
|
let toolOutputEstimateSinceModel = 0;
|
|
@@ -1684,7 +1985,12 @@ ${boundedMainMemory.trim()}`
|
|
|
1684
1985
|
status: "completed",
|
|
1685
1986
|
response: responseText,
|
|
1686
1987
|
steps: step - 1,
|
|
1687
|
-
tokens: {
|
|
1988
|
+
tokens: {
|
|
1989
|
+
input: totalInputTokens,
|
|
1990
|
+
output: totalOutputTokens,
|
|
1991
|
+
cached: totalCachedTokens,
|
|
1992
|
+
cacheWrite: totalCacheWriteTokens,
|
|
1993
|
+
},
|
|
1688
1994
|
duration: now() - start,
|
|
1689
1995
|
continuation: true,
|
|
1690
1996
|
continuationMessages: [...messages],
|
|
@@ -1698,7 +2004,6 @@ ${boundedMainMemory.trim()}`
|
|
|
1698
2004
|
|
|
1699
2005
|
const stepStart = now();
|
|
1700
2006
|
yield pushEvent({ type: "step:started", step });
|
|
1701
|
-
yield pushEvent({ type: "model:request", tokens: 0 });
|
|
1702
2007
|
|
|
1703
2008
|
const dispatcherTools = this.dispatcher.list();
|
|
1704
2009
|
const exposedToolNames = new Map<string, string>();
|
|
@@ -1720,6 +2025,15 @@ ${boundedMainMemory.trim()}`
|
|
|
1720
2025
|
inputSchema: jsonSchemaToZod(tool.inputSchema),
|
|
1721
2026
|
};
|
|
1722
2027
|
}
|
|
2028
|
+
const toolDefsJsonForEstimate = JSON.stringify(
|
|
2029
|
+
dispatcherTools.map((t) => ({
|
|
2030
|
+
name: t.name,
|
|
2031
|
+
description: t.description,
|
|
2032
|
+
inputSchema: t.inputSchema,
|
|
2033
|
+
})),
|
|
2034
|
+
);
|
|
2035
|
+
const requestTokenEstimate = estimateTotalTokens(integrityPrompt, messages, toolDefsJsonForEstimate);
|
|
2036
|
+
yield pushEvent({ type: "model:request", tokens: requestTokenEstimate });
|
|
1723
2037
|
|
|
1724
2038
|
// Convert messages to ModelMessage format
|
|
1725
2039
|
const convertMessage = async (msg: Message): Promise<ModelMessage[]> => {
|
|
@@ -1956,20 +2270,11 @@ ${boundedMainMemory.trim()}`
|
|
|
1956
2270
|
}
|
|
1957
2271
|
const modelInstance = this.modelProvider(modelName);
|
|
1958
2272
|
|
|
1959
|
-
// --- Auto-compaction
|
|
1960
|
-
//
|
|
1961
|
-
// (JSON-stringified tool_calls / tool results) that must not leak
|
|
1962
|
-
// into the conversation store via compactedMessages.
|
|
2273
|
+
// --- Auto-compaction ---
|
|
2274
|
+
// Re-check every N steps to curb runaway context growth in longer runs.
|
|
1963
2275
|
const compactionConfig = resolveCompactionConfig(agent.frontmatter.compaction);
|
|
1964
|
-
if (compactionConfig.enabled && step === 1) {
|
|
1965
|
-
const
|
|
1966
|
-
dispatcherTools.map((t) => ({
|
|
1967
|
-
name: t.name,
|
|
1968
|
-
description: t.description,
|
|
1969
|
-
inputSchema: t.inputSchema,
|
|
1970
|
-
})),
|
|
1971
|
-
);
|
|
1972
|
-
const estimated = estimateTotalTokens(integrityPrompt, messages, toolDefsJson);
|
|
2276
|
+
if (compactionConfig.enabled && (step === 1 || step % COMPACTION_CHECK_INTERVAL_STEPS === 0)) {
|
|
2277
|
+
const estimated = estimateTotalTokens(integrityPrompt, messages, toolDefsJsonForEstimate);
|
|
1973
2278
|
const lastReportedInput = totalInputTokens > 0 ? totalInputTokens : 0;
|
|
1974
2279
|
const effectiveTokens = Math.max(estimated, lastReportedInput);
|
|
1975
2280
|
|
|
@@ -1984,14 +2289,17 @@ ${boundedMainMemory.trim()}`
|
|
|
1984
2289
|
if (compactResult.compacted) {
|
|
1985
2290
|
messages.length = 0;
|
|
1986
2291
|
messages.push(...compactResult.messages);
|
|
1987
|
-
|
|
1988
|
-
|
|
1989
|
-
|
|
1990
|
-
|
|
1991
|
-
|
|
1992
|
-
emittedMessages.
|
|
2292
|
+
let emittedMessages: Message[] | undefined;
|
|
2293
|
+
if (step === 1) {
|
|
2294
|
+
// Strip the trailing user task message so runners can use
|
|
2295
|
+
// compactedMessages directly as historyMessages without
|
|
2296
|
+
// duplicating the user turn they append themselves.
|
|
2297
|
+
emittedMessages = [...compactResult.messages];
|
|
2298
|
+
if (emittedMessages.length > 0 && emittedMessages[emittedMessages.length - 1].role === "user") {
|
|
2299
|
+
emittedMessages.pop();
|
|
2300
|
+
}
|
|
1993
2301
|
}
|
|
1994
|
-
const tokensAfterCompaction = estimateTotalTokens(integrityPrompt, messages,
|
|
2302
|
+
const tokensAfterCompaction = estimateTotalTokens(integrityPrompt, messages, toolDefsJsonForEstimate);
|
|
1995
2303
|
latestContextTokens = tokensAfterCompaction;
|
|
1996
2304
|
toolOutputEstimateSinceModel = 0;
|
|
1997
2305
|
yield pushEvent({
|
|
@@ -2024,7 +2332,9 @@ ${boundedMainMemory.trim()}`
|
|
|
2024
2332
|
|
|
2025
2333
|
const temperature = agent.frontmatter.model?.temperature ?? 0.2;
|
|
2026
2334
|
const maxTokens = agent.frontmatter.model?.maxTokens;
|
|
2027
|
-
const cachedMessages =
|
|
2335
|
+
const cachedMessages = enablePromptCache
|
|
2336
|
+
? addPromptCacheBreakpoints(coreMessages, modelInstance)
|
|
2337
|
+
: coreMessages;
|
|
2028
2338
|
|
|
2029
2339
|
const telemetryEnabled = this.loadedConfig?.telemetry?.enabled !== false;
|
|
2030
2340
|
|
|
@@ -2146,14 +2456,19 @@ ${boundedMainMemory.trim()}`
|
|
|
2146
2456
|
messages.push({
|
|
2147
2457
|
role: "assistant",
|
|
2148
2458
|
content: fullText,
|
|
2149
|
-
metadata: { timestamp: now(), id: randomUUID(), step },
|
|
2459
|
+
metadata: { timestamp: now(), id: randomUUID(), step, runId },
|
|
2150
2460
|
});
|
|
2151
2461
|
}
|
|
2152
2462
|
const result_: RunResult = {
|
|
2153
2463
|
status: "completed",
|
|
2154
2464
|
response: responseText + fullText,
|
|
2155
2465
|
steps: step,
|
|
2156
|
-
tokens: {
|
|
2466
|
+
tokens: {
|
|
2467
|
+
input: totalInputTokens,
|
|
2468
|
+
output: totalOutputTokens,
|
|
2469
|
+
cached: totalCachedTokens,
|
|
2470
|
+
cacheWrite: totalCacheWriteTokens,
|
|
2471
|
+
},
|
|
2157
2472
|
duration: now() - start,
|
|
2158
2473
|
continuation: true,
|
|
2159
2474
|
continuationMessages: [...messages],
|
|
@@ -2178,14 +2493,19 @@ ${boundedMainMemory.trim()}`
|
|
|
2178
2493
|
messages.push({
|
|
2179
2494
|
role: "assistant",
|
|
2180
2495
|
content: fullText,
|
|
2181
|
-
metadata: { timestamp: now(), id: randomUUID(), step },
|
|
2496
|
+
metadata: { timestamp: now(), id: randomUUID(), step, runId },
|
|
2182
2497
|
});
|
|
2183
2498
|
}
|
|
2184
2499
|
const result_: RunResult = {
|
|
2185
2500
|
status: "completed",
|
|
2186
2501
|
response: responseText + fullText,
|
|
2187
2502
|
steps: step,
|
|
2188
|
-
tokens: {
|
|
2503
|
+
tokens: {
|
|
2504
|
+
input: totalInputTokens,
|
|
2505
|
+
output: totalOutputTokens,
|
|
2506
|
+
cached: totalCachedTokens,
|
|
2507
|
+
cacheWrite: totalCacheWriteTokens,
|
|
2508
|
+
},
|
|
2189
2509
|
duration: now() - start,
|
|
2190
2510
|
continuation: true,
|
|
2191
2511
|
continuationMessages: [...messages],
|
|
@@ -2233,11 +2553,21 @@ ${boundedMainMemory.trim()}`
|
|
|
2233
2553
|
const toolCallsResult = await result.toolCalls;
|
|
2234
2554
|
|
|
2235
2555
|
// Update token usage
|
|
2236
|
-
const
|
|
2556
|
+
const details = (usage.inputTokenDetails ?? {}) as Record<string, unknown>;
|
|
2557
|
+
const stepCachedTokens = typeof details.cacheReadTokens === "number" ? details.cacheReadTokens : 0;
|
|
2558
|
+
const stepCacheWriteTokens =
|
|
2559
|
+
typeof details.cacheWriteTokens === "number"
|
|
2560
|
+
? details.cacheWriteTokens
|
|
2561
|
+
: typeof details.cacheCreationTokens === "number"
|
|
2562
|
+
? details.cacheCreationTokens
|
|
2563
|
+
: typeof details.cacheCreationInputTokens === "number"
|
|
2564
|
+
? details.cacheCreationInputTokens
|
|
2565
|
+
: 0;
|
|
2237
2566
|
const stepInputTokens = usage.inputTokens ?? 0;
|
|
2238
2567
|
totalInputTokens += stepInputTokens;
|
|
2239
2568
|
totalOutputTokens += usage.outputTokens ?? 0;
|
|
2240
2569
|
totalCachedTokens += stepCachedTokens;
|
|
2570
|
+
totalCacheWriteTokens += stepCacheWriteTokens;
|
|
2241
2571
|
latestContextTokens = stepInputTokens;
|
|
2242
2572
|
toolOutputEstimateSinceModel = 0;
|
|
2243
2573
|
|
|
@@ -2247,8 +2577,15 @@ ${boundedMainMemory.trim()}`
|
|
|
2247
2577
|
input: stepInputTokens,
|
|
2248
2578
|
output: usage.outputTokens ?? 0,
|
|
2249
2579
|
cached: stepCachedTokens,
|
|
2580
|
+
cacheWrite: stepCacheWriteTokens,
|
|
2250
2581
|
},
|
|
2251
2582
|
});
|
|
2583
|
+
console.info(
|
|
2584
|
+
`[poncho][cost] model="${modelName}" step=${step} ` +
|
|
2585
|
+
`input=${stepInputTokens} output=${usage.outputTokens ?? 0} ` +
|
|
2586
|
+
`cached=${stepCachedTokens} cacheWrite=${stepCacheWriteTokens} ` +
|
|
2587
|
+
`totals(input=${totalInputTokens}, output=${totalOutputTokens}, cached=${totalCachedTokens}, cacheWrite=${totalCacheWriteTokens})`,
|
|
2588
|
+
);
|
|
2252
2589
|
|
|
2253
2590
|
// Extract tool calls
|
|
2254
2591
|
const toolCalls = toolCallsResult.map((tc) => ({
|
|
@@ -2285,7 +2622,7 @@ ${boundedMainMemory.trim()}`
|
|
|
2285
2622
|
messages.push({
|
|
2286
2623
|
role: "assistant",
|
|
2287
2624
|
content: fullText,
|
|
2288
|
-
metadata: { timestamp: now(), id: randomUUID(), step },
|
|
2625
|
+
metadata: { timestamp: now(), id: randomUUID(), step, runId },
|
|
2289
2626
|
});
|
|
2290
2627
|
}
|
|
2291
2628
|
responseText = fullText;
|
|
@@ -2302,6 +2639,7 @@ ${boundedMainMemory.trim()}`
|
|
|
2302
2639
|
input: totalInputTokens,
|
|
2303
2640
|
output: totalOutputTokens,
|
|
2304
2641
|
cached: totalCachedTokens,
|
|
2642
|
+
cacheWrite: totalCacheWriteTokens,
|
|
2305
2643
|
},
|
|
2306
2644
|
duration: now() - start,
|
|
2307
2645
|
contextTokens: latestContextTokens + toolOutputEstimateSinceModel,
|
|
@@ -2398,7 +2736,7 @@ ${boundedMainMemory.trim()}`
|
|
|
2398
2736
|
const assistantMsg: Message = {
|
|
2399
2737
|
role: "assistant",
|
|
2400
2738
|
content: assistantContent,
|
|
2401
|
-
metadata: { timestamp: now(), id: randomUUID(), step },
|
|
2739
|
+
metadata: { timestamp: now(), id: randomUUID(), step, runId },
|
|
2402
2740
|
};
|
|
2403
2741
|
const deltaMessages = [...messages.slice(inputMessageCount), assistantMsg];
|
|
2404
2742
|
yield pushEvent({
|
|
@@ -2498,14 +2836,19 @@ ${boundedMainMemory.trim()}`
|
|
|
2498
2836
|
messages.push({
|
|
2499
2837
|
role: "assistant",
|
|
2500
2838
|
content: fullText,
|
|
2501
|
-
metadata: { timestamp: now(), id: randomUUID(), step },
|
|
2839
|
+
metadata: { timestamp: now(), id: randomUUID(), step, runId },
|
|
2502
2840
|
});
|
|
2503
2841
|
}
|
|
2504
2842
|
const result_: RunResult = {
|
|
2505
2843
|
status: "completed",
|
|
2506
2844
|
response: responseText + fullText,
|
|
2507
2845
|
steps: step,
|
|
2508
|
-
tokens: {
|
|
2846
|
+
tokens: {
|
|
2847
|
+
input: totalInputTokens,
|
|
2848
|
+
output: totalOutputTokens,
|
|
2849
|
+
cached: totalCachedTokens,
|
|
2850
|
+
cacheWrite: totalCacheWriteTokens,
|
|
2851
|
+
},
|
|
2509
2852
|
duration: now() - start,
|
|
2510
2853
|
continuation: true,
|
|
2511
2854
|
continuationMessages: [...messages],
|
|
@@ -2538,6 +2881,20 @@ ${boundedMainMemory.trim()}`
|
|
|
2538
2881
|
tool_name: result.tool,
|
|
2539
2882
|
content: `Tool error: ${result.error}`,
|
|
2540
2883
|
});
|
|
2884
|
+
{
|
|
2885
|
+
const archive = this.archivedToolResultsByConversation.get(conversationId);
|
|
2886
|
+
if (archive) {
|
|
2887
|
+
archive[result.callId] = {
|
|
2888
|
+
toolResultId: result.callId,
|
|
2889
|
+
conversationId,
|
|
2890
|
+
toolName: result.tool,
|
|
2891
|
+
toolCallId: result.callId,
|
|
2892
|
+
createdAt: now(),
|
|
2893
|
+
sizeBytes: Buffer.byteLength(`Tool error: ${result.error}`, "utf8"),
|
|
2894
|
+
payload: `Tool error: ${result.error}`,
|
|
2895
|
+
};
|
|
2896
|
+
}
|
|
2897
|
+
}
|
|
2541
2898
|
richToolResults.push({
|
|
2542
2899
|
type: "tool-result",
|
|
2543
2900
|
toolCallId: result.callId,
|
|
@@ -2564,6 +2921,21 @@ ${boundedMainMemory.trim()}`
|
|
|
2564
2921
|
tool_name: result.tool,
|
|
2565
2922
|
content: JSON.stringify(strippedOutput ?? null),
|
|
2566
2923
|
});
|
|
2924
|
+
{
|
|
2925
|
+
const archive = this.archivedToolResultsByConversation.get(conversationId);
|
|
2926
|
+
if (archive) {
|
|
2927
|
+
const payload = JSON.stringify(result.output ?? null);
|
|
2928
|
+
archive[result.callId] = {
|
|
2929
|
+
toolResultId: result.callId,
|
|
2930
|
+
conversationId,
|
|
2931
|
+
toolName: result.tool,
|
|
2932
|
+
toolCallId: result.callId,
|
|
2933
|
+
createdAt: now(),
|
|
2934
|
+
sizeBytes: Buffer.byteLength(payload, "utf8"),
|
|
2935
|
+
payload,
|
|
2936
|
+
};
|
|
2937
|
+
}
|
|
2938
|
+
}
|
|
2567
2939
|
|
|
2568
2940
|
if (mediaItems.length > 0) {
|
|
2569
2941
|
richToolResults.push({
|
|
@@ -2604,9 +2976,15 @@ ${boundedMainMemory.trim()}`
|
|
|
2604
2976
|
messages.push({
|
|
2605
2977
|
role: "assistant",
|
|
2606
2978
|
content: assistantContent,
|
|
2607
|
-
metadata: { timestamp: now(), id: randomUUID(), step },
|
|
2979
|
+
metadata: { timestamp: now(), id: randomUUID(), step, runId },
|
|
2608
2980
|
});
|
|
2609
|
-
const toolMsgMeta: Record<string, unknown> = {
|
|
2981
|
+
const toolMsgMeta: Record<string, unknown> = {
|
|
2982
|
+
timestamp: now(),
|
|
2983
|
+
id: randomUUID(),
|
|
2984
|
+
step,
|
|
2985
|
+
runId,
|
|
2986
|
+
_richToolResults: richToolResults,
|
|
2987
|
+
};
|
|
2610
2988
|
messages.push({
|
|
2611
2989
|
role: "tool",
|
|
2612
2990
|
content: JSON.stringify(toolResultsForModel),
|
|
@@ -2621,7 +2999,12 @@ ${boundedMainMemory.trim()}`
|
|
|
2621
2999
|
status: "completed",
|
|
2622
3000
|
response: responseText + fullText,
|
|
2623
3001
|
steps: step,
|
|
2624
|
-
tokens: {
|
|
3002
|
+
tokens: {
|
|
3003
|
+
input: totalInputTokens,
|
|
3004
|
+
output: totalOutputTokens,
|
|
3005
|
+
cached: totalCachedTokens,
|
|
3006
|
+
cacheWrite: totalCacheWriteTokens,
|
|
3007
|
+
},
|
|
2625
3008
|
duration: now() - start,
|
|
2626
3009
|
continuation: true,
|
|
2627
3010
|
continuationMessages: [...messages],
|
|
@@ -2689,7 +3072,12 @@ ${boundedMainMemory.trim()}`
|
|
|
2689
3072
|
status: "completed",
|
|
2690
3073
|
response: responseText,
|
|
2691
3074
|
steps: maxSteps,
|
|
2692
|
-
tokens: {
|
|
3075
|
+
tokens: {
|
|
3076
|
+
input: totalInputTokens,
|
|
3077
|
+
output: totalOutputTokens,
|
|
3078
|
+
cached: totalCachedTokens,
|
|
3079
|
+
cacheWrite: totalCacheWriteTokens,
|
|
3080
|
+
},
|
|
2693
3081
|
duration: now() - start,
|
|
2694
3082
|
continuation: true,
|
|
2695
3083
|
continuationMessages: [...messages],
|