@poncho-ai/harness 0.52.2 → 0.55.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +5 -5
- package/CHANGELOG.md +73 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.js +131 -19
- package/package.json +2 -2
- package/src/compaction.ts +206 -13
- package/src/harness.ts +3 -1
- package/src/orchestrator/orchestrator.ts +20 -2
- package/src/state.ts +3 -0
- package/src/storage/entries.ts +204 -0
- package/src/subagent-manager.ts +4 -0
- package/src/subagent-tools.ts +1 -0
- package/src/tool-dispatcher.ts +4 -1
- package/test/compaction.test.ts +274 -0
- package/test/entries.test.ts +125 -0
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import type { LanguageModel } from "ai";
|
|
3
|
+
import { MockLanguageModelV3 } from "ai/test";
|
|
4
|
+
import type { Message } from "@poncho-ai/sdk";
|
|
5
|
+
import {
|
|
6
|
+
compactMessages,
|
|
7
|
+
findSafeSplitPoint,
|
|
8
|
+
resolveCompactionConfig,
|
|
9
|
+
} from "../src/compaction.js";
|
|
10
|
+
|
|
11
|
+
// ── Fake model ──────────────────────────────────────────────────────────
|
|
12
|
+
// A MockLanguageModelV3 whose doGenerate returns a fixed text and records the
|
|
13
|
+
// prompt it was handed, so tests can assert what was sent to the summarizer.
|
|
14
|
+
function fakeModel(summaryText: string): {
|
|
15
|
+
model: LanguageModel;
|
|
16
|
+
prompts: string[];
|
|
17
|
+
} {
|
|
18
|
+
const prompts: string[] = [];
|
|
19
|
+
const model = new MockLanguageModelV3({
|
|
20
|
+
doGenerate: async (options) => {
|
|
21
|
+
// Flatten the prompt text we were given (the user message content).
|
|
22
|
+
for (const m of options.prompt) {
|
|
23
|
+
if (Array.isArray(m.content)) {
|
|
24
|
+
for (const part of m.content) {
|
|
25
|
+
if (part.type === "text") prompts.push(part.text);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return {
|
|
30
|
+
content: [{ type: "text", text: summaryText }],
|
|
31
|
+
finishReason: "stop",
|
|
32
|
+
usage: { inputTokens: 10, outputTokens: 10, totalTokens: 20 },
|
|
33
|
+
warnings: [],
|
|
34
|
+
};
|
|
35
|
+
},
|
|
36
|
+
});
|
|
37
|
+
return { model: model as unknown as LanguageModel, prompts };
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const userMsg = (text: string, metadata?: Message["metadata"]): Message => ({
|
|
41
|
+
role: "user",
|
|
42
|
+
content: text,
|
|
43
|
+
...(metadata ? { metadata } : {}),
|
|
44
|
+
});
|
|
45
|
+
const assistantText = (text: string): Message => ({
|
|
46
|
+
role: "assistant",
|
|
47
|
+
content: text,
|
|
48
|
+
});
|
|
49
|
+
const assistantToolCall = (text: string, toolName: string): Message => ({
|
|
50
|
+
role: "assistant",
|
|
51
|
+
content: JSON.stringify({
|
|
52
|
+
text,
|
|
53
|
+
tool_calls: [{ id: "call_1", name: toolName, arguments: {} }],
|
|
54
|
+
}),
|
|
55
|
+
});
|
|
56
|
+
const toolResult = (text: string): Message => ({ role: "tool", content: text });
|
|
57
|
+
|
|
58
|
+
describe("findSafeSplitPoint", () => {
|
|
59
|
+
it("splits at a normal user-message boundary", () => {
|
|
60
|
+
const messages: Message[] = [
|
|
61
|
+
userMsg("u0"),
|
|
62
|
+
assistantText("a0"),
|
|
63
|
+
userMsg("u1"),
|
|
64
|
+
assistantText("a1"),
|
|
65
|
+
userMsg("u2"), // index 4 — a clean user boundary
|
|
66
|
+
assistantText("a2"),
|
|
67
|
+
userMsg("u3"),
|
|
68
|
+
assistantText("a3"),
|
|
69
|
+
];
|
|
70
|
+
const idx = findSafeSplitPoint(messages, 4);
|
|
71
|
+
// candidate = 8 - 4 = 4, which is already a user message → split there.
|
|
72
|
+
expect(idx).toBe(4);
|
|
73
|
+
expect(messages[idx]!.role).toBe("user");
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
it("returns -1 when there are too few messages", () => {
|
|
77
|
+
const messages: Message[] = [userMsg("u0"), assistantText("a0")];
|
|
78
|
+
expect(findSafeSplitPoint(messages, 4)).toBe(-1);
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
it("walks earlier when the split would orphan tool_calls being moved", () => {
|
|
82
|
+
// The candidate user boundary sits right after an assistant tool-call
|
|
83
|
+
// message whose tool result is on the preserved side — splitting there
|
|
84
|
+
// would strand the tool_calls in the summary. Guard must walk earlier to
|
|
85
|
+
// the next clean user boundary (which is still >= MIN_COMPACTABLE_MESSAGES).
|
|
86
|
+
const messages: Message[] = [
|
|
87
|
+
userMsg("u0"), // 0
|
|
88
|
+
assistantText("a0"), // 1
|
|
89
|
+
userMsg("u1"), // 2
|
|
90
|
+
assistantText("a1"), // 3
|
|
91
|
+
userMsg("u2"), // 4 <- safe earlier boundary (>= MIN_COMPACTABLE_MESSAGES)
|
|
92
|
+
assistantText("a2"), // 5
|
|
93
|
+
assistantToolCall("calling tool", "search"), // 6 <- would be last-compacted if split at 7
|
|
94
|
+
userMsg("u3 (tool result delivered as user)"), // 7 <- candidate boundary
|
|
95
|
+
toolResult("result"), // 8
|
|
96
|
+
assistantText("a3"), // 9
|
|
97
|
+
];
|
|
98
|
+
// candidate = 10 - 3 = 7 (a user message), but messages[6] is an assistant
|
|
99
|
+
// with tool_calls → orphan. Must walk back to index 4.
|
|
100
|
+
const idx = findSafeSplitPoint(messages, 3);
|
|
101
|
+
expect(idx).toBe(4);
|
|
102
|
+
// Confirm the chosen split does NOT end the compacted side on a dangling
|
|
103
|
+
// assistant-with-tool_calls.
|
|
104
|
+
const lastCompacted = messages[idx - 1]!;
|
|
105
|
+
expect(
|
|
106
|
+
typeof lastCompacted.content === "string" &&
|
|
107
|
+
lastCompacted.content.includes('"tool_calls"'),
|
|
108
|
+
).toBe(false);
|
|
109
|
+
});
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
describe("compactMessages", () => {
|
|
113
|
+
const config = resolveCompactionConfig({ keepRecentMessages: 2 });
|
|
114
|
+
|
|
115
|
+
it("compacts older messages into a summary continuation message", async () => {
|
|
116
|
+
const { model } = fakeModel("SUMMARY TEXT");
|
|
117
|
+
const messages: Message[] = [
|
|
118
|
+
userMsg("u0"),
|
|
119
|
+
assistantText("a0"),
|
|
120
|
+
userMsg("u1"),
|
|
121
|
+
assistantText("a1"),
|
|
122
|
+
userMsg("u2"),
|
|
123
|
+
assistantText("a2"),
|
|
124
|
+
];
|
|
125
|
+
const res = await compactMessages(model, messages, config);
|
|
126
|
+
expect(res.compacted).toBe(true);
|
|
127
|
+
expect(res.messages[0]!.metadata?.isCompactionSummary).toBe(true);
|
|
128
|
+
expect(res.messages[0]!.content).toContain("SUMMARY TEXT");
|
|
129
|
+
// No subagents → no ledger block.
|
|
130
|
+
expect(res.messages[0]!.content).not.toContain("## Subagents");
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
it("appends a verbatim subagent ledger after the LLM summary", async () => {
|
|
134
|
+
const { model } = fakeModel("SUMMARY TEXT");
|
|
135
|
+
const messages: Message[] = [
|
|
136
|
+
userMsg("u0"),
|
|
137
|
+
assistantText("a0"),
|
|
138
|
+
userMsg(
|
|
139
|
+
'[Subagent Result] Subagent "research the API" (sub_abc) completed:\n\nFound that the endpoint returns JSON with a data array. Use /v2/items.',
|
|
140
|
+
{
|
|
141
|
+
_subagentCallback: true,
|
|
142
|
+
subagentId: "sub_abc",
|
|
143
|
+
task: "research the API",
|
|
144
|
+
} as Message["metadata"],
|
|
145
|
+
),
|
|
146
|
+
assistantText("a1"),
|
|
147
|
+
userMsg("u2"),
|
|
148
|
+
assistantText("a2"),
|
|
149
|
+
];
|
|
150
|
+
const res = await compactMessages(model, messages, config);
|
|
151
|
+
expect(res.compacted).toBe(true);
|
|
152
|
+
const content = res.messages[0]!.content as string;
|
|
153
|
+
expect(content).toContain("## Subagents");
|
|
154
|
+
expect(content).toContain("sub_abc");
|
|
155
|
+
expect(content).toContain("research the API");
|
|
156
|
+
// Digest carries the verbatim result body.
|
|
157
|
+
expect(content).toContain("endpoint returns JSON");
|
|
158
|
+
// Ledger comes AFTER the summary text.
|
|
159
|
+
expect(content.indexOf("SUMMARY TEXT")).toBeLessThan(
|
|
160
|
+
content.indexOf("## Subagents"),
|
|
161
|
+
);
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
it("detects subagent callbacks by text marker even without metadata", async () => {
|
|
165
|
+
const { model } = fakeModel("S");
|
|
166
|
+
const messages: Message[] = [
|
|
167
|
+
userMsg("u0"),
|
|
168
|
+
assistantText("a0"),
|
|
169
|
+
userMsg(
|
|
170
|
+
'[Subagent Result] Subagent "compile report" (sub_xyz) completed:\n\nThe report is ready.',
|
|
171
|
+
),
|
|
172
|
+
assistantText("a1"),
|
|
173
|
+
userMsg("u2"),
|
|
174
|
+
assistantText("a2"),
|
|
175
|
+
];
|
|
176
|
+
const res = await compactMessages(model, messages, config);
|
|
177
|
+
const content = res.messages[0]!.content as string;
|
|
178
|
+
expect(content).toContain("sub_xyz");
|
|
179
|
+
expect(content).toContain("compile report");
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
it("carries forward a prior ledger and dedupes by subagentId", async () => {
|
|
183
|
+
const { model } = fakeModel("NEW SUMMARY");
|
|
184
|
+
// First compacted message is itself a prior compaction summary that
|
|
185
|
+
// already embeds a ## Subagents block for sub_abc and sub_old.
|
|
186
|
+
const priorSummary: Message = {
|
|
187
|
+
role: "user",
|
|
188
|
+
content: [
|
|
189
|
+
"[CONTEXT COMPACTION] prior.",
|
|
190
|
+
"<summary>",
|
|
191
|
+
"Earlier work done.",
|
|
192
|
+
"",
|
|
193
|
+
"## Subagents",
|
|
194
|
+
"- **research the API** (sub_abc) — completed",
|
|
195
|
+
" Old digest about the API.",
|
|
196
|
+
"- **legacy task** (sub_old) — completed",
|
|
197
|
+
" Legacy digest text.",
|
|
198
|
+
"</summary>",
|
|
199
|
+
].join("\n"),
|
|
200
|
+
metadata: { isCompactionSummary: true },
|
|
201
|
+
};
|
|
202
|
+
const messages: Message[] = [
|
|
203
|
+
priorSummary,
|
|
204
|
+
assistantText("a0"),
|
|
205
|
+
// A fresh callback for sub_abc should OVERRIDE the prior entry.
|
|
206
|
+
userMsg(
|
|
207
|
+
'[Subagent Result] Subagent "research the API" (sub_abc) completed:\n\nUpdated finding: the endpoint moved to /v3/items.',
|
|
208
|
+
{
|
|
209
|
+
_subagentCallback: true,
|
|
210
|
+
subagentId: "sub_abc",
|
|
211
|
+
task: "research the API",
|
|
212
|
+
} as Message["metadata"],
|
|
213
|
+
),
|
|
214
|
+
assistantText("a1"),
|
|
215
|
+
userMsg("u2"),
|
|
216
|
+
assistantText("a2"),
|
|
217
|
+
];
|
|
218
|
+
const res = await compactMessages(model, messages, config);
|
|
219
|
+
const content = res.messages[0]!.content as string;
|
|
220
|
+
// Both subagents present.
|
|
221
|
+
expect(content).toContain("sub_abc");
|
|
222
|
+
expect(content).toContain("sub_old");
|
|
223
|
+
// sub_abc appears exactly once (deduped).
|
|
224
|
+
const occurrences = content.split("sub_abc").length - 1;
|
|
225
|
+
expect(occurrences).toBe(1);
|
|
226
|
+
// The newer digest won.
|
|
227
|
+
expect(content).toContain("/v3/items");
|
|
228
|
+
expect(content).not.toContain("Old digest about the API");
|
|
229
|
+
});
|
|
230
|
+
|
|
231
|
+
it("passes a prior summary in full (no 1200-char truncation) and adds the merge instruction", async () => {
|
|
232
|
+
const { model, prompts } = fakeModel("MERGED");
|
|
233
|
+
const longPrior = "PRIOR-STATE ".repeat(200); // ~2400 chars, > 1200
|
|
234
|
+
const priorSummary: Message = {
|
|
235
|
+
role: "user",
|
|
236
|
+
content: longPrior,
|
|
237
|
+
metadata: { isCompactionSummary: true },
|
|
238
|
+
};
|
|
239
|
+
const messages: Message[] = [
|
|
240
|
+
priorSummary,
|
|
241
|
+
assistantText("a0"),
|
|
242
|
+
userMsg("u1"),
|
|
243
|
+
assistantText("a1"),
|
|
244
|
+
userMsg("u2"),
|
|
245
|
+
assistantText("a2"),
|
|
246
|
+
];
|
|
247
|
+
await compactMessages(model, messages, config);
|
|
248
|
+
const sentPrompt = prompts.join("\n");
|
|
249
|
+
// The whole prior summary text was sent, untruncated.
|
|
250
|
+
expect(sentPrompt).toContain(longPrior.trim());
|
|
251
|
+
expect(sentPrompt).not.toContain("[truncated]");
|
|
252
|
+
// Tagged as prior-summary, with the merge-and-update instruction.
|
|
253
|
+
expect(sentPrompt).toContain("[prior-summary]");
|
|
254
|
+
expect(sentPrompt).toContain("MERGE AND UPDATE");
|
|
255
|
+
});
|
|
256
|
+
|
|
257
|
+
it("still truncates non-prior-summary long messages to 1200 chars", async () => {
|
|
258
|
+
const { model, prompts } = fakeModel("S");
|
|
259
|
+
const longUser = "X".repeat(3000);
|
|
260
|
+
const messages: Message[] = [
|
|
261
|
+
userMsg(longUser),
|
|
262
|
+
assistantText("a0"),
|
|
263
|
+
userMsg("u1"),
|
|
264
|
+
assistantText("a1"),
|
|
265
|
+
userMsg("u2"),
|
|
266
|
+
assistantText("a2"),
|
|
267
|
+
];
|
|
268
|
+
await compactMessages(model, messages, config);
|
|
269
|
+
const sentPrompt = prompts.join("\n");
|
|
270
|
+
expect(sentPrompt).toContain("[truncated]");
|
|
271
|
+
// The first message was NOT a prior summary, so no merge instruction.
|
|
272
|
+
expect(sentPrompt).not.toContain("MERGE AND UPDATE");
|
|
273
|
+
});
|
|
274
|
+
});
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import {
|
|
3
|
+
buildLlmContext,
|
|
4
|
+
buildDisplaySnapshot,
|
|
5
|
+
getPendingSubagentResults,
|
|
6
|
+
type ConversationEntry,
|
|
7
|
+
} from "../src/storage/entries.js";
|
|
8
|
+
import type { Message } from "@poncho-ai/sdk";
|
|
9
|
+
|
|
10
|
+
const msg = (role: Message["role"], content: string): Message => ({ role, content });
|
|
11
|
+
|
|
12
|
+
let seq = 0;
|
|
13
|
+
const reset = () => { seq = 0; };
|
|
14
|
+
const next = () => ++seq;
|
|
15
|
+
|
|
16
|
+
const harness = (content: string, turnId = "t1"): ConversationEntry => ({
|
|
17
|
+
type: "harness_message", id: `h${seq + 1}`, seq: next(), createdAt: 0,
|
|
18
|
+
message: msg("assistant", content), turnId,
|
|
19
|
+
});
|
|
20
|
+
const user = (content: string, opts: { hidden?: boolean } = {}): ConversationEntry => ({
|
|
21
|
+
type: "user_message", id: `u${seq + 1}`, seq: next(), createdAt: 0,
|
|
22
|
+
message: msg("user", content), turnId: "t1", hidden: opts.hidden,
|
|
23
|
+
});
|
|
24
|
+
const assistant = (id: string, content: string): ConversationEntry => ({
|
|
25
|
+
type: "assistant_message", id, seq: next(), createdAt: 0,
|
|
26
|
+
message: msg("assistant", content), turnId: "t1", runId: "r1",
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
describe("buildLlmContext", () => {
|
|
30
|
+
it("returns all harness messages in order with no compaction", () => {
|
|
31
|
+
reset();
|
|
32
|
+
const entries = [harness("a"), harness("b"), harness("c")];
|
|
33
|
+
expect(buildLlmContext(entries).map((m) => m.content)).toEqual(["a", "b", "c"]);
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
it("applies a compaction overlay: summary + messages from firstKeptSeq", () => {
|
|
37
|
+
reset();
|
|
38
|
+
const h1 = harness("old1"); // seq 1
|
|
39
|
+
const h2 = harness("old2"); // seq 2
|
|
40
|
+
const h3 = harness("kept3"); // seq 3
|
|
41
|
+
const h4 = harness("kept4"); // seq 4
|
|
42
|
+
const compaction: ConversationEntry = {
|
|
43
|
+
type: "compaction", id: "c1", seq: next(), createdAt: 0,
|
|
44
|
+
summaryMessage: msg("user", "[summary]"), firstKeptSeq: 3,
|
|
45
|
+
};
|
|
46
|
+
const ctx = buildLlmContext([h1, h2, h3, h4, compaction]);
|
|
47
|
+
expect(ctx.map((m) => m.content)).toEqual(["[summary]", "kept3", "kept4"]);
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
it("uses the LATEST compaction when several exist (layered)", () => {
|
|
51
|
+
reset();
|
|
52
|
+
const h1 = harness("a");
|
|
53
|
+
const c1: ConversationEntry = { type: "compaction", id: "c1", seq: next(), createdAt: 0, summaryMessage: msg("user", "[sum1]"), firstKeptSeq: 1 };
|
|
54
|
+
const h2 = harness("b"); // seq 3
|
|
55
|
+
const c2: ConversationEntry = { type: "compaction", id: "c2", seq: next(), createdAt: 0, summaryMessage: msg("user", "[sum2]"), firstKeptSeq: 3 };
|
|
56
|
+
const ctx = buildLlmContext([h1, c1, h2, c2]);
|
|
57
|
+
expect(ctx.map((m) => m.content)).toEqual(["[sum2]", "b"]);
|
|
58
|
+
});
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
describe("buildDisplaySnapshot", () => {
|
|
62
|
+
it("drops hidden user messages and returns the tail", () => {
|
|
63
|
+
reset();
|
|
64
|
+
const entries = [
|
|
65
|
+
user("hidden-framed", { hidden: true }),
|
|
66
|
+
user("hello"),
|
|
67
|
+
assistant("a1", "hi"),
|
|
68
|
+
user("again"),
|
|
69
|
+
assistant("a2", "yo"),
|
|
70
|
+
];
|
|
71
|
+
const snap = buildDisplaySnapshot(entries, 10);
|
|
72
|
+
expect(snap.messages.map((m) => m.content)).toEqual(["hello", "hi", "again", "yo"]);
|
|
73
|
+
expect(snap.totalMessages).toBe(4);
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
it("folds amendments into their target assistant message", () => {
|
|
77
|
+
reset();
|
|
78
|
+
const a = assistant("a1", "part1");
|
|
79
|
+
const amend: ConversationEntry = {
|
|
80
|
+
type: "assistant_amendment", id: "am1", seq: next(), createdAt: 0,
|
|
81
|
+
targetEntryId: "a1", appendText: " + part2",
|
|
82
|
+
};
|
|
83
|
+
const snap = buildDisplaySnapshot([user("q"), a, amend], 10);
|
|
84
|
+
expect(snap.messages.map((m) => m.content)).toEqual(["q", "part1 + part2"]);
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
it("returns only the trailing tailN messages", () => {
|
|
88
|
+
reset();
|
|
89
|
+
const entries = [user("1"), assistant("a", "2"), user("3"), assistant("b", "4")];
|
|
90
|
+
const snap = buildDisplaySnapshot(entries, 2);
|
|
91
|
+
expect(snap.messages.map((m) => m.content)).toEqual(["3", "4"]);
|
|
92
|
+
expect(snap.totalMessages).toBe(4);
|
|
93
|
+
});
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
describe("getPendingSubagentResults", () => {
|
|
97
|
+
const result = (subagentId: string): ConversationEntry => ({
|
|
98
|
+
type: "subagent_result", id: `sr-${subagentId}`, seq: next(), createdAt: 0,
|
|
99
|
+
result: { subagentId, task: "t", status: "completed", timestamp: 0 },
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
it("returns results not yet consumed by a callback", () => {
|
|
103
|
+
reset();
|
|
104
|
+
const r1 = result("s1"); // seq 1
|
|
105
|
+
const r2 = result("s2"); // seq 2
|
|
106
|
+
const callback: ConversationEntry = {
|
|
107
|
+
type: "callback_started", id: "cb1", seq: next(), createdAt: 0,
|
|
108
|
+
consumedSeqs: [1],
|
|
109
|
+
};
|
|
110
|
+
const r3 = result("s3"); // seq 4
|
|
111
|
+
const pending = getPendingSubagentResults([r1, r2, callback, r3]);
|
|
112
|
+
// s1 consumed; s2 + s3 still pending
|
|
113
|
+
expect(pending.map((p) => p.subagentId)).toEqual(["s2", "s3"]);
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it("returns empty when all consumed", () => {
|
|
117
|
+
reset();
|
|
118
|
+
const r1 = result("s1");
|
|
119
|
+
const callback: ConversationEntry = {
|
|
120
|
+
type: "callback_started", id: "cb1", seq: next(), createdAt: 0,
|
|
121
|
+
consumedSeqs: [1],
|
|
122
|
+
};
|
|
123
|
+
expect(getPendingSubagentResults([r1, callback])).toEqual([]);
|
|
124
|
+
});
|
|
125
|
+
});
|