@poncho-ai/harness 0.53.0 → 0.55.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +5 -5
- package/CHANGELOG.md +41 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.js +101 -8
- package/package.json +1 -1
- package/src/compaction.ts +206 -13
- package/src/storage/entries.ts +204 -0
- package/test/compaction.test.ts +274 -0
- package/test/entries.test.ts +125 -0
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
|
|
2
|
-
> @poncho-ai/harness@0.
|
|
2
|
+
> @poncho-ai/harness@0.55.0 build /home/runner/work/poncho-ai/poncho-ai/packages/harness
|
|
3
3
|
> node scripts/embed-docs.js && tsup src/index.ts --format esm --dts
|
|
4
4
|
|
|
5
5
|
[embed-docs] Generated poncho-docs.ts with 4 topics
|
|
@@ -9,8 +9,8 @@
|
|
|
9
9
|
[34mCLI[39m Target: es2022
|
|
10
10
|
[34mESM[39m Build start
|
|
11
11
|
[32mESM[39m [1mdist/isolate-F2PPSUL6.js [22m[32m53.82 KB[39m
|
|
12
|
-
[32mESM[39m [1mdist/index.js [22m[
|
|
13
|
-
[32mESM[39m ⚡️ Build success in
|
|
12
|
+
[32mESM[39m [1mdist/index.js [22m[32m545.48 KB[39m
|
|
13
|
+
[32mESM[39m ⚡️ Build success in 258ms
|
|
14
14
|
[34mDTS[39m Build start
|
|
15
|
-
[32mDTS[39m ⚡️ Build success in
|
|
16
|
-
[32mDTS[39m [1mdist/index.d.ts [22m[32m94.
|
|
15
|
+
[32mDTS[39m ⚡️ Build success in 7628ms
|
|
16
|
+
[32mDTS[39m [1mdist/index.d.ts [22m[32m94.38 KB[39m
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,46 @@
|
|
|
1
1
|
# @poncho-ai/harness
|
|
2
2
|
|
|
3
|
+
## 0.55.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- [#149](https://github.com/cesr/poncho-ai/pull/149) [`f5a8260`](https://github.com/cesr/poncho-ai/commit/f5a8260d0515038afc1797d00507908c334115ff) Thanks [@cesr](https://github.com/cesr)! - compaction: preserve subagent context and prior summaries, harden the split
|
|
8
|
+
|
|
9
|
+
Three improvements to context compaction (fires at ~75% context):
|
|
10
|
+
- **Split safety**: `findSafeSplitPoint` now refuses a split whose compacted
|
|
11
|
+
side would end on an assistant message with unanswered `tool_calls` (its
|
|
12
|
+
answering `role:"tool"` result having moved to the preserved side), walking
|
|
13
|
+
earlier to the next clean `user` boundary. Prevents orphaning a tool-call
|
|
14
|
+
relationship inside the summary boundary. Still returns `-1` when no safe
|
|
15
|
+
point exists.
|
|
16
|
+
- **Subagent ledger**: while compacting, scans for subagent-callback records
|
|
17
|
+
(metadata `_subagentCallback`/`subagentCallback`, or text starting with
|
|
18
|
+
`[Subagent Result]`) and any `## Subagents` block embedded in a prior
|
|
19
|
+
compaction summary, then renders a combined, deduped (by `subagentId`)
|
|
20
|
+
ledger that is appended VERBATIM after the LLM summary text — so the model
|
|
21
|
+
can never paraphrase or truncate subagent results away. Cumulative across
|
|
22
|
+
successive compactions.
|
|
23
|
+
- **Cumulative summary**: when the first compacted message is itself a prior
|
|
24
|
+
compaction summary, it is passed to the summarizer in full (not truncated
|
|
25
|
+
to 1200 chars) and the prompt instructs the model to merge-and-update the
|
|
26
|
+
prior working state rather than re-summarize it from scratch. All other
|
|
27
|
+
messages keep the 1200-char truncation.
|
|
28
|
+
|
|
29
|
+
## 0.54.0
|
|
30
|
+
|
|
31
|
+
### Minor Changes
|
|
32
|
+
|
|
33
|
+
- [#147](https://github.com/cesr/poncho-ai/pull/147) [`a3eed14`](https://github.com/cesr/poncho-ai/commit/a3eed142832318b6397cd73819d3296c79d6eff0) Thanks [@cesr](https://github.com/cesr)! - storage: add append-only conversation-entry substrate (unused groundwork)
|
|
34
|
+
|
|
35
|
+
Pure entry types + rebuild functions (`buildLlmContext`,
|
|
36
|
+
`buildDisplaySnapshot`, `getPendingSubagentResults`) for the eventual
|
|
37
|
+
append-only conversation model that removes the mutable-blob clobber race
|
|
38
|
+
(the root cause behind lost subagent results). No storage-engine wiring
|
|
39
|
+
and no live callers yet — additive, deploys nothing behavioral. The
|
|
40
|
+
rebuild logic (compaction overlay, amendment folding, callback-consumption)
|
|
41
|
+
is covered by unit tests so the design is proven before the bigger
|
|
42
|
+
dual-write / migration / cutover PRs.
|
|
43
|
+
|
|
3
44
|
## 0.53.0
|
|
4
45
|
|
|
5
46
|
### Minor Changes
|
package/dist/index.d.ts
CHANGED
|
@@ -96,6 +96,12 @@ declare const estimateTotalTokens: (systemPrompt: string, messages: Message[], t
|
|
|
96
96
|
* and everything from it onward is preserved. The split always lands just
|
|
97
97
|
* before a `user` message to avoid breaking assistant+tool pairs.
|
|
98
98
|
*
|
|
99
|
+
* Defensive guard: even at a `user` boundary, refuse a split whose compacted
|
|
100
|
+
* side would END on an assistant message with unanswered tool_calls (its
|
|
101
|
+
* `tool` result having moved to the preserved side). Such a split would
|
|
102
|
+
* orphan the tool_calls inside the summary boundary. When that happens we
|
|
103
|
+
* walk earlier to the next safe `user` boundary.
|
|
104
|
+
*
|
|
99
105
|
* Returns -1 if no valid split point is found.
|
|
100
106
|
*/
|
|
101
107
|
declare const findSafeSplitPoint: (messages: Message[], keepRecentMessages: number) => number;
|
package/dist/index.js
CHANGED
|
@@ -374,6 +374,9 @@ var SUMMARIZATION_PROMPT = `Summarize the following conversation into a structur
|
|
|
374
374
|
|
|
375
375
|
Be concise but preserve all information needed to continue the task.
|
|
376
376
|
Omit any section that has no relevant content.`;
|
|
377
|
+
var CUMULATIVE_SUMMARY_PROMPT = `The FIRST message below (tagged [prior-summary]) is an existing working-state summary produced by an earlier compaction. Treat it as the authoritative prior working state: MERGE AND UPDATE it with the newer messages that follow it, carrying forward all still-relevant detail. Do NOT discard or re-compress information from the prior summary just because it is older \u2014 only drop it if the newer messages explicitly supersede it.`;
|
|
378
|
+
var SUBAGENT_DIGEST_CHARS = 500;
|
|
379
|
+
var SUBAGENT_LEDGER_HEADING = "## Subagents";
|
|
377
380
|
var resolveCompactionConfig = (explicit) => {
|
|
378
381
|
if (!explicit) return { ...DEFAULT_COMPACTION_CONFIG };
|
|
379
382
|
return {
|
|
@@ -398,32 +401,57 @@ var estimateTotalTokens = (systemPrompt, messages, toolDefinitionsJson) => {
|
|
|
398
401
|
}
|
|
399
402
|
return tokens;
|
|
400
403
|
};
|
|
404
|
+
var assistantHasToolCalls = (msg) => {
|
|
405
|
+
if (msg.role !== "assistant") return false;
|
|
406
|
+
if (typeof msg.content !== "string") return false;
|
|
407
|
+
if (!msg.content.includes('"tool_calls"')) return false;
|
|
408
|
+
try {
|
|
409
|
+
const parsed = JSON.parse(msg.content);
|
|
410
|
+
return Array.isArray(parsed.tool_calls) && parsed.tool_calls.length > 0;
|
|
411
|
+
} catch {
|
|
412
|
+
return false;
|
|
413
|
+
}
|
|
414
|
+
};
|
|
415
|
+
var splitOrphansToolCalls = (messages, idx) => {
|
|
416
|
+
if (idx <= 0 || idx >= messages.length) return false;
|
|
417
|
+
const lastCompacted = messages[idx - 1];
|
|
418
|
+
return assistantHasToolCalls(lastCompacted);
|
|
419
|
+
};
|
|
401
420
|
var findSafeSplitPoint = (messages, keepRecentMessages) => {
|
|
402
421
|
const candidateIdx = messages.length - keepRecentMessages;
|
|
403
422
|
if (candidateIdx < MIN_COMPACTABLE_MESSAGES) return -1;
|
|
404
423
|
for (let i = candidateIdx; i >= MIN_COMPACTABLE_MESSAGES; i--) {
|
|
405
|
-
if (messages[i].role === "user") {
|
|
424
|
+
if (messages[i].role === "user" && !splitOrphansToolCalls(messages, i)) {
|
|
406
425
|
return i;
|
|
407
426
|
}
|
|
408
427
|
}
|
|
409
428
|
for (let i = candidateIdx + 1; i < messages.length - 1; i++) {
|
|
410
|
-
if (messages[i].role === "user") {
|
|
429
|
+
if (messages[i].role === "user" && !splitOrphansToolCalls(messages, i)) {
|
|
411
430
|
if (i < MIN_COMPACTABLE_MESSAGES) return -1;
|
|
412
431
|
return i;
|
|
413
432
|
}
|
|
414
433
|
}
|
|
415
434
|
return -1;
|
|
416
435
|
};
|
|
436
|
+
var isCompactionSummary = (msg) => msg.metadata?.isCompactionSummary === true;
|
|
417
437
|
var buildSummarizationMessages = (messagesToCompact, instructions) => {
|
|
438
|
+
const hasPriorSummary = messagesToCompact.length > 0 && isCompactionSummary(messagesToCompact[0]);
|
|
418
439
|
const conversationLines = [];
|
|
419
|
-
for (
|
|
440
|
+
for (let i = 0; i < messagesToCompact.length; i++) {
|
|
441
|
+
const msg = messagesToCompact[i];
|
|
420
442
|
const text = getTextContent(msg);
|
|
421
|
-
const
|
|
422
|
-
|
|
443
|
+
const isPrior = i === 0 && hasPriorSummary;
|
|
444
|
+
const rendered = isPrior || text.length <= SUMMARIZATION_MESSAGE_TRUNCATION_CHARS ? text : text.slice(0, SUMMARIZATION_MESSAGE_TRUNCATION_CHARS) + "\n...[truncated]";
|
|
445
|
+
const tag = isPrior ? "prior-summary" : msg.role;
|
|
446
|
+
conversationLines.push(`[${tag}]: ${rendered}`);
|
|
423
447
|
}
|
|
424
|
-
|
|
448
|
+
let prompt = SUMMARIZATION_PROMPT;
|
|
449
|
+
if (hasPriorSummary) prompt = `${prompt}
|
|
450
|
+
|
|
451
|
+
${CUMULATIVE_SUMMARY_PROMPT}`;
|
|
452
|
+
if (instructions) prompt = `${prompt}
|
|
425
453
|
|
|
426
|
-
Additional focus: ${instructions}
|
|
454
|
+
Additional focus: ${instructions}`;
|
|
427
455
|
return [
|
|
428
456
|
{
|
|
429
457
|
role: "user",
|
|
@@ -435,6 +463,67 @@ ${conversationLines.join("\n\n")}`
|
|
|
435
463
|
}
|
|
436
464
|
];
|
|
437
465
|
};
|
|
466
|
+
var SUBAGENT_RESULT_HEADER = /^\[Subagent Result\] Subagent "([^"]*)" \(([^)]*)\) (\S+):/;
|
|
467
|
+
var parseSubagentCallback = (msg) => {
|
|
468
|
+
if (msg.role !== "user") return null;
|
|
469
|
+
const meta = msg.metadata ?? {};
|
|
470
|
+
const text = getTextContent(msg);
|
|
471
|
+
const hasMetaFlag = meta._subagentCallback === true || meta.subagentCallback === true;
|
|
472
|
+
const hasTextMarker = text.startsWith("[Subagent Result]");
|
|
473
|
+
if (!hasMetaFlag && !hasTextMarker) return null;
|
|
474
|
+
const headerMatch = text.match(SUBAGENT_RESULT_HEADER);
|
|
475
|
+
const subagentId = typeof meta.subagentId === "string" && meta.subagentId ? meta.subagentId : headerMatch?.[2] ?? "";
|
|
476
|
+
if (!subagentId) return null;
|
|
477
|
+
const task = typeof meta.task === "string" && meta.task ? meta.task : headerMatch?.[1] ?? "";
|
|
478
|
+
const status = headerMatch?.[3] ?? "completed";
|
|
479
|
+
const bodyStart = text.indexOf("\n\n");
|
|
480
|
+
const body = bodyStart >= 0 ? text.slice(bodyStart + 2) : text;
|
|
481
|
+
const digest = body.length > SUBAGENT_DIGEST_CHARS ? body.slice(0, SUBAGENT_DIGEST_CHARS) + "\u2026" : body;
|
|
482
|
+
return { subagentId, task, status, digest };
|
|
483
|
+
};
|
|
484
|
+
var parsePriorLedger = (summaryText) => {
|
|
485
|
+
const headingIdx = summaryText.indexOf(SUBAGENT_LEDGER_HEADING);
|
|
486
|
+
if (headingIdx < 0) return [];
|
|
487
|
+
const block = summaryText.slice(headingIdx + SUBAGENT_LEDGER_HEADING.length);
|
|
488
|
+
const entries = [];
|
|
489
|
+
const entryRe = /^- \*\*(.*?)\*\* \((.+?)\) — (\S+)\n {2}(.*)$/gm;
|
|
490
|
+
let m;
|
|
491
|
+
while ((m = entryRe.exec(block)) !== null) {
|
|
492
|
+
entries.push({
|
|
493
|
+
task: m[1],
|
|
494
|
+
subagentId: m[2],
|
|
495
|
+
status: m[3],
|
|
496
|
+
digest: m[4]
|
|
497
|
+
});
|
|
498
|
+
}
|
|
499
|
+
return entries;
|
|
500
|
+
};
|
|
501
|
+
var collectSubagentLedger = (messagesToCompact) => {
|
|
502
|
+
const byId = /* @__PURE__ */ new Map();
|
|
503
|
+
const order = [];
|
|
504
|
+
const upsert = (entry) => {
|
|
505
|
+
if (!byId.has(entry.subagentId)) order.push(entry.subagentId);
|
|
506
|
+
byId.set(entry.subagentId, entry);
|
|
507
|
+
};
|
|
508
|
+
for (const msg of messagesToCompact) {
|
|
509
|
+
if (isCompactionSummary(msg)) {
|
|
510
|
+
for (const prior of parsePriorLedger(getTextContent(msg))) upsert(prior);
|
|
511
|
+
continue;
|
|
512
|
+
}
|
|
513
|
+
const entry = parseSubagentCallback(msg);
|
|
514
|
+
if (entry) upsert(entry);
|
|
515
|
+
}
|
|
516
|
+
return order.map((id) => byId.get(id));
|
|
517
|
+
};
|
|
518
|
+
var renderSubagentLedger = (entries) => {
|
|
519
|
+
if (entries.length === 0) return "";
|
|
520
|
+
const lines = entries.map(
|
|
521
|
+
(e) => `- **${e.task}** (${e.subagentId}) \u2014 ${e.status}
|
|
522
|
+
${e.digest.replace(/\n/g, " ")}`
|
|
523
|
+
);
|
|
524
|
+
return `${SUBAGENT_LEDGER_HEADING}
|
|
525
|
+
${lines.join("\n")}`;
|
|
526
|
+
};
|
|
438
527
|
var buildContinuationMessage = (summary) => ({
|
|
439
528
|
role: "user",
|
|
440
529
|
content: `[CONTEXT COMPACTION] This conversation was automatically compacted. The summary below covers earlier messages.
|
|
@@ -483,7 +572,11 @@ var compactMessages = async (model, messages, config, options) => {
|
|
|
483
572
|
warning: "Summarization returned empty result"
|
|
484
573
|
};
|
|
485
574
|
}
|
|
486
|
-
const
|
|
575
|
+
const ledger = renderSubagentLedger(collectSubagentLedger(toCompact));
|
|
576
|
+
const summaryWithLedger = ledger ? `${summary}
|
|
577
|
+
|
|
578
|
+
${ledger}` : summary;
|
|
579
|
+
const continuationMessage = buildContinuationMessage(summaryWithLedger);
|
|
487
580
|
const compactedMessages = [continuationMessage, ...toPreserve];
|
|
488
581
|
return {
|
|
489
582
|
compacted: true,
|
package/package.json
CHANGED
package/src/compaction.ts
CHANGED
|
@@ -26,6 +26,20 @@ const SUMMARIZATION_PROMPT = `Summarize the following conversation into a struct
|
|
|
26
26
|
Be concise but preserve all information needed to continue the task.
|
|
27
27
|
Omit any section that has no relevant content.`;
|
|
28
28
|
|
|
29
|
+
/**
|
|
30
|
+
* Extra instruction appended when the first compacted message is itself a
|
|
31
|
+
* prior compaction summary. The model must treat that block as the existing
|
|
32
|
+
* working state and produce an updated, merged version rather than
|
|
33
|
+
* re-summarizing the (already lossy) summary from scratch.
|
|
34
|
+
*/
|
|
35
|
+
const CUMULATIVE_SUMMARY_PROMPT = `The FIRST message below (tagged [prior-summary]) is an existing working-state summary produced by an earlier compaction. Treat it as the authoritative prior working state: MERGE AND UPDATE it with the newer messages that follow it, carrying forward all still-relevant detail. Do NOT discard or re-compress information from the prior summary just because it is older — only drop it if the newer messages explicitly supersede it.`;
|
|
36
|
+
|
|
37
|
+
/** Max chars of a subagent result text kept verbatim in the ledger digest. */
|
|
38
|
+
const SUBAGENT_DIGEST_CHARS = 500;
|
|
39
|
+
|
|
40
|
+
/** Heading used for the verbatim, model-proof subagent ledger block. */
|
|
41
|
+
const SUBAGENT_LEDGER_HEADING = "## Subagents";
|
|
42
|
+
|
|
29
43
|
export const resolveCompactionConfig = (
|
|
30
44
|
explicit?: Partial<CompactionConfig>,
|
|
31
45
|
): CompactionConfig => {
|
|
@@ -78,11 +92,48 @@ export const estimateTotalTokens = (
|
|
|
78
92
|
return tokens;
|
|
79
93
|
};
|
|
80
94
|
|
|
95
|
+
/**
|
|
96
|
+
* Whether an assistant message carries serialized tool_calls. Assistant
|
|
97
|
+
* tool-call turns serialize their content as a JSON string of the shape
|
|
98
|
+
* `{"text":...,"tool_calls":[...]}` (see the harness run loop). A plain-text
|
|
99
|
+
* assistant message returns false.
|
|
100
|
+
*/
|
|
101
|
+
const assistantHasToolCalls = (msg: Message): boolean => {
|
|
102
|
+
if (msg.role !== "assistant") return false;
|
|
103
|
+
if (typeof msg.content !== "string") return false;
|
|
104
|
+
if (!msg.content.includes('"tool_calls"')) return false;
|
|
105
|
+
try {
|
|
106
|
+
const parsed = JSON.parse(msg.content) as { tool_calls?: unknown };
|
|
107
|
+
return Array.isArray(parsed.tool_calls) && parsed.tool_calls.length > 0;
|
|
108
|
+
} catch {
|
|
109
|
+
return false;
|
|
110
|
+
}
|
|
111
|
+
};
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Whether splitting at `idx` would orphan a tool-call relationship on the
|
|
115
|
+
* COMPACTED side — i.e. the last compacted message (`messages[idx-1]`) is an
|
|
116
|
+
* assistant message with tool_calls whose answering `role:"tool"` result
|
|
117
|
+
* lives on the PRESERVED side (`messages[idx]`). Folding only the assistant
|
|
118
|
+
* call into the summary strands the tool_calls with no matching result.
|
|
119
|
+
*/
|
|
120
|
+
const splitOrphansToolCalls = (messages: Message[], idx: number): boolean => {
|
|
121
|
+
if (idx <= 0 || idx >= messages.length) return false;
|
|
122
|
+
const lastCompacted = messages[idx - 1]!;
|
|
123
|
+
return assistantHasToolCalls(lastCompacted);
|
|
124
|
+
};
|
|
125
|
+
|
|
81
126
|
/**
|
|
82
127
|
* Find the safe split index so that everything before it can be compacted
|
|
83
128
|
* and everything from it onward is preserved. The split always lands just
|
|
84
129
|
* before a `user` message to avoid breaking assistant+tool pairs.
|
|
85
130
|
*
|
|
131
|
+
* Defensive guard: even at a `user` boundary, refuse a split whose compacted
|
|
132
|
+
* side would END on an assistant message with unanswered tool_calls (its
|
|
133
|
+
* `tool` result having moved to the preserved side). Such a split would
|
|
134
|
+
* orphan the tool_calls inside the summary boundary. When that happens we
|
|
135
|
+
* walk earlier to the next safe `user` boundary.
|
|
136
|
+
*
|
|
86
137
|
* Returns -1 if no valid split point is found.
|
|
87
138
|
*/
|
|
88
139
|
export const findSafeSplitPoint = (
|
|
@@ -92,16 +143,17 @@ export const findSafeSplitPoint = (
|
|
|
92
143
|
const candidateIdx = messages.length - keepRecentMessages;
|
|
93
144
|
if (candidateIdx < MIN_COMPACTABLE_MESSAGES) return -1;
|
|
94
145
|
|
|
95
|
-
// Walk backwards from candidate to find a user message boundary
|
|
146
|
+
// Walk backwards from candidate to find a user message boundary that does
|
|
147
|
+
// not orphan a tool-call relationship on the compacted side.
|
|
96
148
|
for (let i = candidateIdx; i >= MIN_COMPACTABLE_MESSAGES; i--) {
|
|
97
|
-
if (messages[i]!.role === "user") {
|
|
149
|
+
if (messages[i]!.role === "user" && !splitOrphansToolCalls(messages, i)) {
|
|
98
150
|
return i;
|
|
99
151
|
}
|
|
100
152
|
}
|
|
101
153
|
|
|
102
|
-
// Walk forwards from candidate as fallback
|
|
154
|
+
// Walk forwards from candidate as fallback.
|
|
103
155
|
for (let i = candidateIdx + 1; i < messages.length - 1; i++) {
|
|
104
|
-
if (messages[i]!.role === "user") {
|
|
156
|
+
if (messages[i]!.role === "user" && !splitOrphansToolCalls(messages, i)) {
|
|
105
157
|
if (i < MIN_COMPACTABLE_MESSAGES) return -1;
|
|
106
158
|
return i;
|
|
107
159
|
}
|
|
@@ -110,25 +162,46 @@ export const findSafeSplitPoint = (
|
|
|
110
162
|
return -1;
|
|
111
163
|
};
|
|
112
164
|
|
|
165
|
+
/**
|
|
166
|
+
* Whether a message is itself a prior compaction summary.
|
|
167
|
+
*/
|
|
168
|
+
const isCompactionSummary = (msg: Message): boolean =>
|
|
169
|
+
msg.metadata?.isCompactionSummary === true;
|
|
170
|
+
|
|
113
171
|
/**
|
|
114
172
|
* Build the summarization messages for the generateText call.
|
|
173
|
+
*
|
|
174
|
+
* Cumulative behavior: when the FIRST compacted message is itself a prior
|
|
175
|
+
* compaction summary, it is passed in FULL (not truncated to
|
|
176
|
+
* SUMMARIZATION_MESSAGE_TRUNCATION_CHARS) and tagged `[prior-summary]`, and
|
|
177
|
+
* the prompt instructs the model to merge-and-update rather than
|
|
178
|
+
* re-summarize. All other messages keep the 1200-char truncation.
|
|
115
179
|
*/
|
|
116
180
|
const buildSummarizationMessages = (
|
|
117
181
|
messagesToCompact: Message[],
|
|
118
182
|
instructions?: string,
|
|
119
183
|
): Array<{ role: "user"; content: string }> => {
|
|
184
|
+
const hasPriorSummary =
|
|
185
|
+
messagesToCompact.length > 0 && isCompactionSummary(messagesToCompact[0]!);
|
|
186
|
+
|
|
120
187
|
const conversationLines: string[] = [];
|
|
121
|
-
for (
|
|
188
|
+
for (let i = 0; i < messagesToCompact.length; i++) {
|
|
189
|
+
const msg = messagesToCompact[i]!;
|
|
122
190
|
const text = getTextContent(msg);
|
|
123
|
-
const
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
191
|
+
const isPrior = i === 0 && hasPriorSummary;
|
|
192
|
+
// The prior summary is the working state we must not lose — pass it whole.
|
|
193
|
+
const rendered =
|
|
194
|
+
isPrior || text.length <= SUMMARIZATION_MESSAGE_TRUNCATION_CHARS
|
|
195
|
+
? text
|
|
196
|
+
: text.slice(0, SUMMARIZATION_MESSAGE_TRUNCATION_CHARS) +
|
|
197
|
+
"\n...[truncated]";
|
|
198
|
+
const tag = isPrior ? "prior-summary" : msg.role;
|
|
199
|
+
conversationLines.push(`[${tag}]: ${rendered}`);
|
|
127
200
|
}
|
|
128
201
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
202
|
+
let prompt = SUMMARIZATION_PROMPT;
|
|
203
|
+
if (hasPriorSummary) prompt = `${prompt}\n\n${CUMULATIVE_SUMMARY_PROMPT}`;
|
|
204
|
+
if (instructions) prompt = `${prompt}\n\nAdditional focus: ${instructions}`;
|
|
132
205
|
|
|
133
206
|
return [
|
|
134
207
|
{
|
|
@@ -138,6 +211,121 @@ const buildSummarizationMessages = (
|
|
|
138
211
|
];
|
|
139
212
|
};
|
|
140
213
|
|
|
214
|
+
interface SubagentLedgerEntry {
|
|
215
|
+
subagentId: string;
|
|
216
|
+
task: string;
|
|
217
|
+
status: string;
|
|
218
|
+
digest: string;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/** Match the header line of an injected subagent callback message. */
|
|
222
|
+
const SUBAGENT_RESULT_HEADER =
|
|
223
|
+
/^\[Subagent Result\] Subagent "([^"]*)" \(([^)]*)\) (\S+):/;
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Parse the metadata + text of a subagent-callback user message into a ledger
|
|
227
|
+
* entry. Returns null when the message is not a subagent callback.
|
|
228
|
+
*/
|
|
229
|
+
const parseSubagentCallback = (msg: Message): SubagentLedgerEntry | null => {
|
|
230
|
+
if (msg.role !== "user") return null;
|
|
231
|
+
const meta = (msg.metadata ?? {}) as Record<string, unknown>;
|
|
232
|
+
const text = getTextContent(msg);
|
|
233
|
+
const hasMetaFlag =
|
|
234
|
+
meta._subagentCallback === true || meta.subagentCallback === true;
|
|
235
|
+
const hasTextMarker = text.startsWith("[Subagent Result]");
|
|
236
|
+
if (!hasMetaFlag && !hasTextMarker) return null;
|
|
237
|
+
|
|
238
|
+
// Prefer structured metadata, fall back to parsing the header line.
|
|
239
|
+
const headerMatch = text.match(SUBAGENT_RESULT_HEADER);
|
|
240
|
+
const subagentId =
|
|
241
|
+
typeof meta.subagentId === "string" && meta.subagentId
|
|
242
|
+
? meta.subagentId
|
|
243
|
+
: headerMatch?.[2] ?? "";
|
|
244
|
+
if (!subagentId) return null;
|
|
245
|
+
const task =
|
|
246
|
+
typeof meta.task === "string" && meta.task
|
|
247
|
+
? meta.task
|
|
248
|
+
: headerMatch?.[1] ?? "";
|
|
249
|
+
const status = headerMatch?.[3] ?? "completed";
|
|
250
|
+
|
|
251
|
+
// Digest = the body after the header line (the result text), capped.
|
|
252
|
+
const bodyStart = text.indexOf("\n\n");
|
|
253
|
+
const body = bodyStart >= 0 ? text.slice(bodyStart + 2) : text;
|
|
254
|
+
const digest =
|
|
255
|
+
body.length > SUBAGENT_DIGEST_CHARS
|
|
256
|
+
? body.slice(0, SUBAGENT_DIGEST_CHARS) + "…"
|
|
257
|
+
: body;
|
|
258
|
+
|
|
259
|
+
return { subagentId, task, status, digest };
|
|
260
|
+
};
|
|
261
|
+
|
|
262
|
+
/**
|
|
263
|
+
* Parse a prior `## Subagents` ledger block out of an existing compaction
|
|
264
|
+
* summary's content so it can be carried forward cumulatively. The block is
|
|
265
|
+
* rendered by `renderSubagentLedger`, so we parse that same shape.
|
|
266
|
+
*/
|
|
267
|
+
const parsePriorLedger = (summaryText: string): SubagentLedgerEntry[] => {
|
|
268
|
+
const headingIdx = summaryText.indexOf(SUBAGENT_LEDGER_HEADING);
|
|
269
|
+
if (headingIdx < 0) return [];
|
|
270
|
+
const block = summaryText.slice(headingIdx + SUBAGENT_LEDGER_HEADING.length);
|
|
271
|
+
const entries: SubagentLedgerEntry[] = [];
|
|
272
|
+
// Each entry: a bullet line "- **<task>** (<id>) — <status>" then a digest
|
|
273
|
+
// line. We tolerate missing digest lines.
|
|
274
|
+
const entryRe =
|
|
275
|
+
/^- \*\*(.*?)\*\* \((.+?)\) — (\S+)\n {2}(.*)$/gm;
|
|
276
|
+
let m: RegExpExecArray | null;
|
|
277
|
+
while ((m = entryRe.exec(block)) !== null) {
|
|
278
|
+
entries.push({
|
|
279
|
+
task: m[1]!,
|
|
280
|
+
subagentId: m[2]!,
|
|
281
|
+
status: m[3]!,
|
|
282
|
+
digest: m[4]!,
|
|
283
|
+
});
|
|
284
|
+
}
|
|
285
|
+
return entries;
|
|
286
|
+
};
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* Scan the messages being compacted for subagent-callback records and any
|
|
290
|
+
* prior ledger embedded in a compaction summary, returning a combined,
|
|
291
|
+
* deduped (by subagentId, last-write-wins) list in first-seen order.
|
|
292
|
+
*/
|
|
293
|
+
const collectSubagentLedger = (
|
|
294
|
+
messagesToCompact: Message[],
|
|
295
|
+
): SubagentLedgerEntry[] => {
|
|
296
|
+
const byId = new Map<string, SubagentLedgerEntry>();
|
|
297
|
+
const order: string[] = [];
|
|
298
|
+
const upsert = (entry: SubagentLedgerEntry) => {
|
|
299
|
+
if (!byId.has(entry.subagentId)) order.push(entry.subagentId);
|
|
300
|
+
byId.set(entry.subagentId, entry);
|
|
301
|
+
};
|
|
302
|
+
|
|
303
|
+
for (const msg of messagesToCompact) {
|
|
304
|
+
if (isCompactionSummary(msg)) {
|
|
305
|
+
for (const prior of parsePriorLedger(getTextContent(msg))) upsert(prior);
|
|
306
|
+
continue;
|
|
307
|
+
}
|
|
308
|
+
const entry = parseSubagentCallback(msg);
|
|
309
|
+
if (entry) upsert(entry);
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
return order.map((id) => byId.get(id)!);
|
|
313
|
+
};
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* Render the subagent ledger as a verbatim markdown block appended to the
|
|
317
|
+
* summary AFTER the LLM text, so the model can never paraphrase it away.
|
|
318
|
+
* Returns "" when there are no subagents.
|
|
319
|
+
*/
|
|
320
|
+
const renderSubagentLedger = (entries: SubagentLedgerEntry[]): string => {
|
|
321
|
+
if (entries.length === 0) return "";
|
|
322
|
+
const lines = entries.map(
|
|
323
|
+
(e) =>
|
|
324
|
+
`- **${e.task}** (${e.subagentId}) — ${e.status}\n ${e.digest.replace(/\n/g, " ")}`,
|
|
325
|
+
);
|
|
326
|
+
return `${SUBAGENT_LEDGER_HEADING}\n${lines.join("\n")}`;
|
|
327
|
+
};
|
|
328
|
+
|
|
141
329
|
/**
|
|
142
330
|
* Build the continuation message that replaces compacted messages.
|
|
143
331
|
*/
|
|
@@ -217,7 +405,12 @@ export const compactMessages = async (
|
|
|
217
405
|
};
|
|
218
406
|
}
|
|
219
407
|
|
|
220
|
-
|
|
408
|
+
// Append the subagent ledger AFTER the LLM summary, verbatim, so the
|
|
409
|
+
// model's paraphrasing can never drop or truncate subagent results.
|
|
410
|
+
const ledger = renderSubagentLedger(collectSubagentLedger(toCompact));
|
|
411
|
+
const summaryWithLedger = ledger ? `${summary}\n\n${ledger}` : summary;
|
|
412
|
+
|
|
413
|
+
const continuationMessage = buildContinuationMessage(summaryWithLedger);
|
|
221
414
|
const compactedMessages = [continuationMessage, ...toPreserve];
|
|
222
415
|
|
|
223
416
|
return {
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
import type { Message } from "@poncho-ai/sdk";
|
|
2
|
+
import type { PendingSubagentResult } from "../state.js";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Append-only conversation entries (Phase 3 substrate).
|
|
6
|
+
*
|
|
7
|
+
* The eventual replacement for the mutable per-conversation JSON blob: a
|
|
8
|
+
* conversation becomes an ordered, append-only list of entries, and the
|
|
9
|
+
* mutable-blob clobber race (two writers serializing a stale whole-blob
|
|
10
|
+
* snapshot over each other — the root cause behind lost subagent results)
|
|
11
|
+
* stops being expressible.
|
|
12
|
+
*
|
|
13
|
+
* This module is intentionally PURE: it defines the entry shapes and the
|
|
14
|
+
* functions that rebuild a conversation's LLM context / display transcript
|
|
15
|
+
* / pending-subagent-results from an entry list. No storage engine, no DB,
|
|
16
|
+
* no wiring into the live run loop yet — so it deploys nothing and is
|
|
17
|
+
* fully unit-testable. The engine implementations (append/read on
|
|
18
|
+
* postgres/sqlite/memory) and the write-site conversions come in later PRs
|
|
19
|
+
* once this rebuild logic is proven.
|
|
20
|
+
*
|
|
21
|
+
* Ordering: every entry carries a monotonic per-conversation `seq`. Entries
|
|
22
|
+
* are assumed sorted by `seq` ascending when passed to the rebuild fns.
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
interface BaseEntry {
|
|
26
|
+
/** Stable cross-reference id (uuid). */
|
|
27
|
+
id: string;
|
|
28
|
+
/** Monotonic per-conversation order. */
|
|
29
|
+
seq: number;
|
|
30
|
+
createdAt: number;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/** A user-role display message (incl. typed subagent-callback messages). */
|
|
34
|
+
export interface UserMessageEntry extends BaseEntry {
|
|
35
|
+
type: "user_message";
|
|
36
|
+
message: Message;
|
|
37
|
+
turnId: string;
|
|
38
|
+
/** Hidden from the display transcript (e.g. a framed job prompt, an
|
|
39
|
+
* onboarding seed, or an injected subagent-result message). Still part
|
|
40
|
+
* of the record; just not rendered as a chat bubble. */
|
|
41
|
+
hidden?: boolean;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/** The final assistant bubble for a completed/cancelled/errored turn. */
|
|
45
|
+
export interface AssistantMessageEntry extends BaseEntry {
|
|
46
|
+
type: "assistant_message";
|
|
47
|
+
message: Message;
|
|
48
|
+
turnId: string;
|
|
49
|
+
runId: string;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/** A post-hoc edit to an already-emitted assistant message — replaces the
|
|
53
|
+
* orchestrator/resume "mutate the last assistant message in place" writes
|
|
54
|
+
* with an append. Applied at rebuild time. */
|
|
55
|
+
export interface AssistantAmendmentEntry extends BaseEntry {
|
|
56
|
+
type: "assistant_amendment";
|
|
57
|
+
targetEntryId: string;
|
|
58
|
+
appendText?: string;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/** One LLM-transcript message (the model-visible form). Appended from the
|
|
62
|
+
* run loop per step — never diffed from an array. */
|
|
63
|
+
export interface HarnessMessageEntry extends BaseEntry {
|
|
64
|
+
type: "harness_message";
|
|
65
|
+
message: Message;
|
|
66
|
+
turnId: string;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/** Compaction overlay: nothing is deleted. At rebuild, the LLM context is
|
|
70
|
+
* the latest compaction's `summaryMessage` followed by the harness
|
|
71
|
+
* messages from `firstKeptSeq` onward. */
|
|
72
|
+
export interface CompactionEntry extends BaseEntry {
|
|
73
|
+
type: "compaction";
|
|
74
|
+
summaryMessage: Message;
|
|
75
|
+
firstKeptSeq: number;
|
|
76
|
+
tokensBefore?: number;
|
|
77
|
+
tokensAfter?: number;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/** A finished subagent's result arriving for the parent. Pending = a
|
|
81
|
+
* subagent_result whose seq is not listed in any later callback_started. */
|
|
82
|
+
export interface SubagentResultEntry extends BaseEntry {
|
|
83
|
+
type: "subagent_result";
|
|
84
|
+
result: PendingSubagentResult;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/** Marks which subagent_result entries a callback turn consumed (by seq).
|
|
88
|
+
* Consumption is an append, never a delete. */
|
|
89
|
+
export interface CallbackStartedEntry extends BaseEntry {
|
|
90
|
+
type: "callback_started";
|
|
91
|
+
consumedSeqs: number[];
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
export type ConversationEntry =
|
|
95
|
+
| UserMessageEntry
|
|
96
|
+
| AssistantMessageEntry
|
|
97
|
+
| AssistantAmendmentEntry
|
|
98
|
+
| HarnessMessageEntry
|
|
99
|
+
| CompactionEntry
|
|
100
|
+
| SubagentResultEntry
|
|
101
|
+
| CallbackStartedEntry;
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Rebuild the LLM-visible message context from the entry log.
|
|
105
|
+
*
|
|
106
|
+
* If a compaction overlay exists, the context is its summary message
|
|
107
|
+
* followed by every harness message with seq >= firstKeptSeq (a later
|
|
108
|
+
* compaction's firstKeptSeq can point at an earlier summary that was
|
|
109
|
+
* itself appended as a harness message, so layered compactions just work).
|
|
110
|
+
* With no compaction, it's every harness message in order.
|
|
111
|
+
*/
|
|
112
|
+
export function buildLlmContext(entries: ConversationEntry[]): Message[] {
|
|
113
|
+
let latestCompaction: CompactionEntry | undefined;
|
|
114
|
+
for (const e of entries) {
|
|
115
|
+
if (e.type === "compaction" && (!latestCompaction || e.seq > latestCompaction.seq)) {
|
|
116
|
+
latestCompaction = e;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const harnessMsgs = entries.filter(
|
|
121
|
+
(e): e is HarnessMessageEntry => e.type === "harness_message",
|
|
122
|
+
);
|
|
123
|
+
|
|
124
|
+
if (latestCompaction) {
|
|
125
|
+
const kept = harnessMsgs
|
|
126
|
+
.filter((e) => e.seq >= latestCompaction!.firstKeptSeq)
|
|
127
|
+
.map((e) => e.message);
|
|
128
|
+
return [latestCompaction.summaryMessage, ...kept];
|
|
129
|
+
}
|
|
130
|
+
return harnessMsgs.map((e) => e.message);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
export interface DisplaySnapshot {
|
|
134
|
+
messages: Message[];
|
|
135
|
+
/** Total display messages available (for pagination UIs). */
|
|
136
|
+
totalMessages: number;
|
|
137
|
+
/** seq of the first message returned (a `beforeSeq` pagination cursor). */
|
|
138
|
+
headSeq: number | null;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Rebuild the display transcript (the user-visible chat) from the entry
|
|
143
|
+
* log, returning the trailing `tailN` messages. Amendments are folded into
|
|
144
|
+
* their target assistant message; hidden user messages are dropped.
|
|
145
|
+
*/
|
|
146
|
+
export function buildDisplaySnapshot(
|
|
147
|
+
entries: ConversationEntry[],
|
|
148
|
+
tailN: number,
|
|
149
|
+
): DisplaySnapshot {
|
|
150
|
+
const amendmentsByTarget = new Map<string, AssistantAmendmentEntry[]>();
|
|
151
|
+
for (const e of entries) {
|
|
152
|
+
if (e.type === "assistant_amendment") {
|
|
153
|
+
const list = amendmentsByTarget.get(e.targetEntryId) ?? [];
|
|
154
|
+
list.push(e);
|
|
155
|
+
amendmentsByTarget.set(e.targetEntryId, list);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
const built: { seq: number; message: Message }[] = [];
|
|
160
|
+
for (const e of entries) {
|
|
161
|
+
if (e.type === "user_message") {
|
|
162
|
+
if (e.hidden) continue;
|
|
163
|
+
built.push({ seq: e.seq, message: e.message });
|
|
164
|
+
} else if (e.type === "assistant_message") {
|
|
165
|
+
let content = typeof e.message.content === "string" ? e.message.content : "";
|
|
166
|
+
const amendments = amendmentsByTarget.get(e.id);
|
|
167
|
+
if (amendments) {
|
|
168
|
+
for (const a of amendments.sort((x, y) => x.seq - y.seq)) {
|
|
169
|
+
if (a.appendText) content += a.appendText;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
built.push({ seq: e.seq, message: { ...e.message, content } });
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
const total = built.length;
|
|
177
|
+
const tail = tailN >= total ? built : built.slice(total - tailN);
|
|
178
|
+
return {
|
|
179
|
+
messages: tail.map((b) => b.message),
|
|
180
|
+
totalMessages: total,
|
|
181
|
+
headSeq: tail.length > 0 ? tail[0]!.seq : null,
|
|
182
|
+
};
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Subagent results that have arrived but not yet been consumed by a
|
|
187
|
+
* callback turn — the append-only replacement for the mutable
|
|
188
|
+
* `pendingSubagentResults` array. A result is pending unless a later
|
|
189
|
+
* callback_started lists its seq in `consumedSeqs`.
|
|
190
|
+
*/
|
|
191
|
+
export function getPendingSubagentResults(
|
|
192
|
+
entries: ConversationEntry[],
|
|
193
|
+
): PendingSubagentResult[] {
|
|
194
|
+
const consumed = new Set<number>();
|
|
195
|
+
for (const e of entries) {
|
|
196
|
+
if (e.type === "callback_started") {
|
|
197
|
+
for (const s of e.consumedSeqs) consumed.add(s);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
return entries
|
|
201
|
+
.filter((e): e is SubagentResultEntry => e.type === "subagent_result")
|
|
202
|
+
.filter((e) => !consumed.has(e.seq))
|
|
203
|
+
.map((e) => e.result);
|
|
204
|
+
}
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import type { LanguageModel } from "ai";
|
|
3
|
+
import { MockLanguageModelV3 } from "ai/test";
|
|
4
|
+
import type { Message } from "@poncho-ai/sdk";
|
|
5
|
+
import {
|
|
6
|
+
compactMessages,
|
|
7
|
+
findSafeSplitPoint,
|
|
8
|
+
resolveCompactionConfig,
|
|
9
|
+
} from "../src/compaction.js";
|
|
10
|
+
|
|
11
|
+
// ── Fake model ──────────────────────────────────────────────────────────
|
|
12
|
+
// A MockLanguageModelV3 whose doGenerate returns a fixed text and records the
|
|
13
|
+
// prompt it was handed, so tests can assert what was sent to the summarizer.
|
|
14
|
+
function fakeModel(summaryText: string): {
|
|
15
|
+
model: LanguageModel;
|
|
16
|
+
prompts: string[];
|
|
17
|
+
} {
|
|
18
|
+
const prompts: string[] = [];
|
|
19
|
+
const model = new MockLanguageModelV3({
|
|
20
|
+
doGenerate: async (options) => {
|
|
21
|
+
// Flatten the prompt text we were given (the user message content).
|
|
22
|
+
for (const m of options.prompt) {
|
|
23
|
+
if (Array.isArray(m.content)) {
|
|
24
|
+
for (const part of m.content) {
|
|
25
|
+
if (part.type === "text") prompts.push(part.text);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return {
|
|
30
|
+
content: [{ type: "text", text: summaryText }],
|
|
31
|
+
finishReason: "stop",
|
|
32
|
+
usage: { inputTokens: 10, outputTokens: 10, totalTokens: 20 },
|
|
33
|
+
warnings: [],
|
|
34
|
+
};
|
|
35
|
+
},
|
|
36
|
+
});
|
|
37
|
+
return { model: model as unknown as LanguageModel, prompts };
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const userMsg = (text: string, metadata?: Message["metadata"]): Message => ({
|
|
41
|
+
role: "user",
|
|
42
|
+
content: text,
|
|
43
|
+
...(metadata ? { metadata } : {}),
|
|
44
|
+
});
|
|
45
|
+
const assistantText = (text: string): Message => ({
|
|
46
|
+
role: "assistant",
|
|
47
|
+
content: text,
|
|
48
|
+
});
|
|
49
|
+
const assistantToolCall = (text: string, toolName: string): Message => ({
|
|
50
|
+
role: "assistant",
|
|
51
|
+
content: JSON.stringify({
|
|
52
|
+
text,
|
|
53
|
+
tool_calls: [{ id: "call_1", name: toolName, arguments: {} }],
|
|
54
|
+
}),
|
|
55
|
+
});
|
|
56
|
+
const toolResult = (text: string): Message => ({ role: "tool", content: text });
|
|
57
|
+
|
|
58
|
+
describe("findSafeSplitPoint", () => {
|
|
59
|
+
it("splits at a normal user-message boundary", () => {
|
|
60
|
+
const messages: Message[] = [
|
|
61
|
+
userMsg("u0"),
|
|
62
|
+
assistantText("a0"),
|
|
63
|
+
userMsg("u1"),
|
|
64
|
+
assistantText("a1"),
|
|
65
|
+
userMsg("u2"), // index 4 — a clean user boundary
|
|
66
|
+
assistantText("a2"),
|
|
67
|
+
userMsg("u3"),
|
|
68
|
+
assistantText("a3"),
|
|
69
|
+
];
|
|
70
|
+
const idx = findSafeSplitPoint(messages, 4);
|
|
71
|
+
// candidate = 8 - 4 = 4, which is already a user message → split there.
|
|
72
|
+
expect(idx).toBe(4);
|
|
73
|
+
expect(messages[idx]!.role).toBe("user");
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
it("returns -1 when there are too few messages", () => {
|
|
77
|
+
const messages: Message[] = [userMsg("u0"), assistantText("a0")];
|
|
78
|
+
expect(findSafeSplitPoint(messages, 4)).toBe(-1);
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
it("walks earlier when the split would orphan tool_calls being moved", () => {
|
|
82
|
+
// The candidate user boundary sits right after an assistant tool-call
|
|
83
|
+
// message whose tool result is on the preserved side — splitting there
|
|
84
|
+
// would strand the tool_calls in the summary. Guard must walk earlier to
|
|
85
|
+
// the next clean user boundary (which is still >= MIN_COMPACTABLE_MESSAGES).
|
|
86
|
+
const messages: Message[] = [
|
|
87
|
+
userMsg("u0"), // 0
|
|
88
|
+
assistantText("a0"), // 1
|
|
89
|
+
userMsg("u1"), // 2
|
|
90
|
+
assistantText("a1"), // 3
|
|
91
|
+
userMsg("u2"), // 4 <- safe earlier boundary (>= MIN_COMPACTABLE_MESSAGES)
|
|
92
|
+
assistantText("a2"), // 5
|
|
93
|
+
assistantToolCall("calling tool", "search"), // 6 <- would be last-compacted if split at 7
|
|
94
|
+
userMsg("u3 (tool result delivered as user)"), // 7 <- candidate boundary
|
|
95
|
+
toolResult("result"), // 8
|
|
96
|
+
assistantText("a3"), // 9
|
|
97
|
+
];
|
|
98
|
+
// candidate = 10 - 3 = 7 (a user message), but messages[6] is an assistant
|
|
99
|
+
// with tool_calls → orphan. Must walk back to index 4.
|
|
100
|
+
const idx = findSafeSplitPoint(messages, 3);
|
|
101
|
+
expect(idx).toBe(4);
|
|
102
|
+
// Confirm the chosen split does NOT end the compacted side on a dangling
|
|
103
|
+
// assistant-with-tool_calls.
|
|
104
|
+
const lastCompacted = messages[idx - 1]!;
|
|
105
|
+
expect(
|
|
106
|
+
typeof lastCompacted.content === "string" &&
|
|
107
|
+
lastCompacted.content.includes('"tool_calls"'),
|
|
108
|
+
).toBe(false);
|
|
109
|
+
});
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
describe("compactMessages", () => {
|
|
113
|
+
const config = resolveCompactionConfig({ keepRecentMessages: 2 });
|
|
114
|
+
|
|
115
|
+
it("compacts older messages into a summary continuation message", async () => {
|
|
116
|
+
const { model } = fakeModel("SUMMARY TEXT");
|
|
117
|
+
const messages: Message[] = [
|
|
118
|
+
userMsg("u0"),
|
|
119
|
+
assistantText("a0"),
|
|
120
|
+
userMsg("u1"),
|
|
121
|
+
assistantText("a1"),
|
|
122
|
+
userMsg("u2"),
|
|
123
|
+
assistantText("a2"),
|
|
124
|
+
];
|
|
125
|
+
const res = await compactMessages(model, messages, config);
|
|
126
|
+
expect(res.compacted).toBe(true);
|
|
127
|
+
expect(res.messages[0]!.metadata?.isCompactionSummary).toBe(true);
|
|
128
|
+
expect(res.messages[0]!.content).toContain("SUMMARY TEXT");
|
|
129
|
+
// No subagents → no ledger block.
|
|
130
|
+
expect(res.messages[0]!.content).not.toContain("## Subagents");
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
it("appends a verbatim subagent ledger after the LLM summary", async () => {
|
|
134
|
+
const { model } = fakeModel("SUMMARY TEXT");
|
|
135
|
+
const messages: Message[] = [
|
|
136
|
+
userMsg("u0"),
|
|
137
|
+
assistantText("a0"),
|
|
138
|
+
userMsg(
|
|
139
|
+
'[Subagent Result] Subagent "research the API" (sub_abc) completed:\n\nFound that the endpoint returns JSON with a data array. Use /v2/items.',
|
|
140
|
+
{
|
|
141
|
+
_subagentCallback: true,
|
|
142
|
+
subagentId: "sub_abc",
|
|
143
|
+
task: "research the API",
|
|
144
|
+
} as Message["metadata"],
|
|
145
|
+
),
|
|
146
|
+
assistantText("a1"),
|
|
147
|
+
userMsg("u2"),
|
|
148
|
+
assistantText("a2"),
|
|
149
|
+
];
|
|
150
|
+
const res = await compactMessages(model, messages, config);
|
|
151
|
+
expect(res.compacted).toBe(true);
|
|
152
|
+
const content = res.messages[0]!.content as string;
|
|
153
|
+
expect(content).toContain("## Subagents");
|
|
154
|
+
expect(content).toContain("sub_abc");
|
|
155
|
+
expect(content).toContain("research the API");
|
|
156
|
+
// Digest carries the verbatim result body.
|
|
157
|
+
expect(content).toContain("endpoint returns JSON");
|
|
158
|
+
// Ledger comes AFTER the summary text.
|
|
159
|
+
expect(content.indexOf("SUMMARY TEXT")).toBeLessThan(
|
|
160
|
+
content.indexOf("## Subagents"),
|
|
161
|
+
);
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
it("detects subagent callbacks by text marker even without metadata", async () => {
|
|
165
|
+
const { model } = fakeModel("S");
|
|
166
|
+
const messages: Message[] = [
|
|
167
|
+
userMsg("u0"),
|
|
168
|
+
assistantText("a0"),
|
|
169
|
+
userMsg(
|
|
170
|
+
'[Subagent Result] Subagent "compile report" (sub_xyz) completed:\n\nThe report is ready.',
|
|
171
|
+
),
|
|
172
|
+
assistantText("a1"),
|
|
173
|
+
userMsg("u2"),
|
|
174
|
+
assistantText("a2"),
|
|
175
|
+
];
|
|
176
|
+
const res = await compactMessages(model, messages, config);
|
|
177
|
+
const content = res.messages[0]!.content as string;
|
|
178
|
+
expect(content).toContain("sub_xyz");
|
|
179
|
+
expect(content).toContain("compile report");
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
it("carries forward a prior ledger and dedupes by subagentId", async () => {
|
|
183
|
+
const { model } = fakeModel("NEW SUMMARY");
|
|
184
|
+
// First compacted message is itself a prior compaction summary that
|
|
185
|
+
// already embeds a ## Subagents block for sub_abc and sub_old.
|
|
186
|
+
const priorSummary: Message = {
|
|
187
|
+
role: "user",
|
|
188
|
+
content: [
|
|
189
|
+
"[CONTEXT COMPACTION] prior.",
|
|
190
|
+
"<summary>",
|
|
191
|
+
"Earlier work done.",
|
|
192
|
+
"",
|
|
193
|
+
"## Subagents",
|
|
194
|
+
"- **research the API** (sub_abc) — completed",
|
|
195
|
+
" Old digest about the API.",
|
|
196
|
+
"- **legacy task** (sub_old) — completed",
|
|
197
|
+
" Legacy digest text.",
|
|
198
|
+
"</summary>",
|
|
199
|
+
].join("\n"),
|
|
200
|
+
metadata: { isCompactionSummary: true },
|
|
201
|
+
};
|
|
202
|
+
const messages: Message[] = [
|
|
203
|
+
priorSummary,
|
|
204
|
+
assistantText("a0"),
|
|
205
|
+
// A fresh callback for sub_abc should OVERRIDE the prior entry.
|
|
206
|
+
userMsg(
|
|
207
|
+
'[Subagent Result] Subagent "research the API" (sub_abc) completed:\n\nUpdated finding: the endpoint moved to /v3/items.',
|
|
208
|
+
{
|
|
209
|
+
_subagentCallback: true,
|
|
210
|
+
subagentId: "sub_abc",
|
|
211
|
+
task: "research the API",
|
|
212
|
+
} as Message["metadata"],
|
|
213
|
+
),
|
|
214
|
+
assistantText("a1"),
|
|
215
|
+
userMsg("u2"),
|
|
216
|
+
assistantText("a2"),
|
|
217
|
+
];
|
|
218
|
+
const res = await compactMessages(model, messages, config);
|
|
219
|
+
const content = res.messages[0]!.content as string;
|
|
220
|
+
// Both subagents present.
|
|
221
|
+
expect(content).toContain("sub_abc");
|
|
222
|
+
expect(content).toContain("sub_old");
|
|
223
|
+
// sub_abc appears exactly once (deduped).
|
|
224
|
+
const occurrences = content.split("sub_abc").length - 1;
|
|
225
|
+
expect(occurrences).toBe(1);
|
|
226
|
+
// The newer digest won.
|
|
227
|
+
expect(content).toContain("/v3/items");
|
|
228
|
+
expect(content).not.toContain("Old digest about the API");
|
|
229
|
+
});
|
|
230
|
+
|
|
231
|
+
it("passes a prior summary in full (no 1200-char truncation) and adds the merge instruction", async () => {
|
|
232
|
+
const { model, prompts } = fakeModel("MERGED");
|
|
233
|
+
const longPrior = "PRIOR-STATE ".repeat(200); // ~2400 chars, > 1200
|
|
234
|
+
const priorSummary: Message = {
|
|
235
|
+
role: "user",
|
|
236
|
+
content: longPrior,
|
|
237
|
+
metadata: { isCompactionSummary: true },
|
|
238
|
+
};
|
|
239
|
+
const messages: Message[] = [
|
|
240
|
+
priorSummary,
|
|
241
|
+
assistantText("a0"),
|
|
242
|
+
userMsg("u1"),
|
|
243
|
+
assistantText("a1"),
|
|
244
|
+
userMsg("u2"),
|
|
245
|
+
assistantText("a2"),
|
|
246
|
+
];
|
|
247
|
+
await compactMessages(model, messages, config);
|
|
248
|
+
const sentPrompt = prompts.join("\n");
|
|
249
|
+
// The whole prior summary text was sent, untruncated.
|
|
250
|
+
expect(sentPrompt).toContain(longPrior.trim());
|
|
251
|
+
expect(sentPrompt).not.toContain("[truncated]");
|
|
252
|
+
// Tagged as prior-summary, with the merge-and-update instruction.
|
|
253
|
+
expect(sentPrompt).toContain("[prior-summary]");
|
|
254
|
+
expect(sentPrompt).toContain("MERGE AND UPDATE");
|
|
255
|
+
});
|
|
256
|
+
|
|
257
|
+
it("still truncates non-prior-summary long messages to 1200 chars", async () => {
|
|
258
|
+
const { model, prompts } = fakeModel("S");
|
|
259
|
+
const longUser = "X".repeat(3000);
|
|
260
|
+
const messages: Message[] = [
|
|
261
|
+
userMsg(longUser),
|
|
262
|
+
assistantText("a0"),
|
|
263
|
+
userMsg("u1"),
|
|
264
|
+
assistantText("a1"),
|
|
265
|
+
userMsg("u2"),
|
|
266
|
+
assistantText("a2"),
|
|
267
|
+
];
|
|
268
|
+
await compactMessages(model, messages, config);
|
|
269
|
+
const sentPrompt = prompts.join("\n");
|
|
270
|
+
expect(sentPrompt).toContain("[truncated]");
|
|
271
|
+
// The first message was NOT a prior summary, so no merge instruction.
|
|
272
|
+
expect(sentPrompt).not.toContain("MERGE AND UPDATE");
|
|
273
|
+
});
|
|
274
|
+
});
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import {
|
|
3
|
+
buildLlmContext,
|
|
4
|
+
buildDisplaySnapshot,
|
|
5
|
+
getPendingSubagentResults,
|
|
6
|
+
type ConversationEntry,
|
|
7
|
+
} from "../src/storage/entries.js";
|
|
8
|
+
import type { Message } from "@poncho-ai/sdk";
|
|
9
|
+
|
|
10
|
+
const msg = (role: Message["role"], content: string): Message => ({ role, content });
|
|
11
|
+
|
|
12
|
+
let seq = 0;
|
|
13
|
+
const reset = () => { seq = 0; };
|
|
14
|
+
const next = () => ++seq;
|
|
15
|
+
|
|
16
|
+
const harness = (content: string, turnId = "t1"): ConversationEntry => ({
|
|
17
|
+
type: "harness_message", id: `h${seq + 1}`, seq: next(), createdAt: 0,
|
|
18
|
+
message: msg("assistant", content), turnId,
|
|
19
|
+
});
|
|
20
|
+
const user = (content: string, opts: { hidden?: boolean } = {}): ConversationEntry => ({
|
|
21
|
+
type: "user_message", id: `u${seq + 1}`, seq: next(), createdAt: 0,
|
|
22
|
+
message: msg("user", content), turnId: "t1", hidden: opts.hidden,
|
|
23
|
+
});
|
|
24
|
+
const assistant = (id: string, content: string): ConversationEntry => ({
|
|
25
|
+
type: "assistant_message", id, seq: next(), createdAt: 0,
|
|
26
|
+
message: msg("assistant", content), turnId: "t1", runId: "r1",
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
describe("buildLlmContext", () => {
|
|
30
|
+
it("returns all harness messages in order with no compaction", () => {
|
|
31
|
+
reset();
|
|
32
|
+
const entries = [harness("a"), harness("b"), harness("c")];
|
|
33
|
+
expect(buildLlmContext(entries).map((m) => m.content)).toEqual(["a", "b", "c"]);
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
it("applies a compaction overlay: summary + messages from firstKeptSeq", () => {
|
|
37
|
+
reset();
|
|
38
|
+
const h1 = harness("old1"); // seq 1
|
|
39
|
+
const h2 = harness("old2"); // seq 2
|
|
40
|
+
const h3 = harness("kept3"); // seq 3
|
|
41
|
+
const h4 = harness("kept4"); // seq 4
|
|
42
|
+
const compaction: ConversationEntry = {
|
|
43
|
+
type: "compaction", id: "c1", seq: next(), createdAt: 0,
|
|
44
|
+
summaryMessage: msg("user", "[summary]"), firstKeptSeq: 3,
|
|
45
|
+
};
|
|
46
|
+
const ctx = buildLlmContext([h1, h2, h3, h4, compaction]);
|
|
47
|
+
expect(ctx.map((m) => m.content)).toEqual(["[summary]", "kept3", "kept4"]);
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
it("uses the LATEST compaction when several exist (layered)", () => {
|
|
51
|
+
reset();
|
|
52
|
+
const h1 = harness("a");
|
|
53
|
+
const c1: ConversationEntry = { type: "compaction", id: "c1", seq: next(), createdAt: 0, summaryMessage: msg("user", "[sum1]"), firstKeptSeq: 1 };
|
|
54
|
+
const h2 = harness("b"); // seq 3
|
|
55
|
+
const c2: ConversationEntry = { type: "compaction", id: "c2", seq: next(), createdAt: 0, summaryMessage: msg("user", "[sum2]"), firstKeptSeq: 3 };
|
|
56
|
+
const ctx = buildLlmContext([h1, c1, h2, c2]);
|
|
57
|
+
expect(ctx.map((m) => m.content)).toEqual(["[sum2]", "b"]);
|
|
58
|
+
});
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
describe("buildDisplaySnapshot", () => {
|
|
62
|
+
it("drops hidden user messages and returns the tail", () => {
|
|
63
|
+
reset();
|
|
64
|
+
const entries = [
|
|
65
|
+
user("hidden-framed", { hidden: true }),
|
|
66
|
+
user("hello"),
|
|
67
|
+
assistant("a1", "hi"),
|
|
68
|
+
user("again"),
|
|
69
|
+
assistant("a2", "yo"),
|
|
70
|
+
];
|
|
71
|
+
const snap = buildDisplaySnapshot(entries, 10);
|
|
72
|
+
expect(snap.messages.map((m) => m.content)).toEqual(["hello", "hi", "again", "yo"]);
|
|
73
|
+
expect(snap.totalMessages).toBe(4);
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
it("folds amendments into their target assistant message", () => {
|
|
77
|
+
reset();
|
|
78
|
+
const a = assistant("a1", "part1");
|
|
79
|
+
const amend: ConversationEntry = {
|
|
80
|
+
type: "assistant_amendment", id: "am1", seq: next(), createdAt: 0,
|
|
81
|
+
targetEntryId: "a1", appendText: " + part2",
|
|
82
|
+
};
|
|
83
|
+
const snap = buildDisplaySnapshot([user("q"), a, amend], 10);
|
|
84
|
+
expect(snap.messages.map((m) => m.content)).toEqual(["q", "part1 + part2"]);
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
it("returns only the trailing tailN messages", () => {
|
|
88
|
+
reset();
|
|
89
|
+
const entries = [user("1"), assistant("a", "2"), user("3"), assistant("b", "4")];
|
|
90
|
+
const snap = buildDisplaySnapshot(entries, 2);
|
|
91
|
+
expect(snap.messages.map((m) => m.content)).toEqual(["3", "4"]);
|
|
92
|
+
expect(snap.totalMessages).toBe(4);
|
|
93
|
+
});
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
describe("getPendingSubagentResults", () => {
|
|
97
|
+
const result = (subagentId: string): ConversationEntry => ({
|
|
98
|
+
type: "subagent_result", id: `sr-${subagentId}`, seq: next(), createdAt: 0,
|
|
99
|
+
result: { subagentId, task: "t", status: "completed", timestamp: 0 },
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
it("returns results not yet consumed by a callback", () => {
|
|
103
|
+
reset();
|
|
104
|
+
const r1 = result("s1"); // seq 1
|
|
105
|
+
const r2 = result("s2"); // seq 2
|
|
106
|
+
const callback: ConversationEntry = {
|
|
107
|
+
type: "callback_started", id: "cb1", seq: next(), createdAt: 0,
|
|
108
|
+
consumedSeqs: [1],
|
|
109
|
+
};
|
|
110
|
+
const r3 = result("s3"); // seq 4
|
|
111
|
+
const pending = getPendingSubagentResults([r1, r2, callback, r3]);
|
|
112
|
+
// s1 consumed; s2 + s3 still pending
|
|
113
|
+
expect(pending.map((p) => p.subagentId)).toEqual(["s2", "s3"]);
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it("returns empty when all consumed", () => {
|
|
117
|
+
reset();
|
|
118
|
+
const r1 = result("s1");
|
|
119
|
+
const callback: ConversationEntry = {
|
|
120
|
+
type: "callback_started", id: "cb1", seq: next(), createdAt: 0,
|
|
121
|
+
consumedSeqs: [1],
|
|
122
|
+
};
|
|
123
|
+
expect(getPendingSubagentResults([r1, callback])).toEqual([]);
|
|
124
|
+
});
|
|
125
|
+
});
|