@poncho-ai/harness 0.53.0 → 0.57.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +5 -5
- package/CHANGELOG.md +79 -0
- package/dist/index.d.ts +216 -2
- package/dist/index.js +670 -27
- package/package.json +1 -1
- package/src/compaction.ts +206 -13
- package/src/index.ts +18 -0
- package/src/orchestrator/entries-dual-write.ts +265 -0
- package/src/orchestrator/index.ts +7 -0
- package/src/orchestrator/orchestrator.ts +179 -13
- package/src/orchestrator/run-conversation-turn.ts +108 -0
- package/src/state.ts +56 -0
- package/src/storage/engine.ts +18 -0
- package/src/storage/entries.ts +217 -0
- package/src/storage/memory-engine.ts +40 -0
- package/src/storage/schema.ts +30 -0
- package/src/storage/sql-dialect.ts +112 -0
- package/src/storage/store-adapters.ts +8 -0
- package/test/compaction.test.ts +274 -0
- package/test/entries-dual-write.test.ts +172 -0
- package/test/entries-store.test.ts +165 -0
- package/test/entries.test.ts +125 -0
package/package.json
CHANGED
package/src/compaction.ts
CHANGED
|
@@ -26,6 +26,20 @@ const SUMMARIZATION_PROMPT = `Summarize the following conversation into a struct
|
|
|
26
26
|
Be concise but preserve all information needed to continue the task.
|
|
27
27
|
Omit any section that has no relevant content.`;
|
|
28
28
|
|
|
29
|
+
/**
|
|
30
|
+
* Extra instruction appended when the first compacted message is itself a
|
|
31
|
+
* prior compaction summary. The model must treat that block as the existing
|
|
32
|
+
* working state and produce an updated, merged version rather than
|
|
33
|
+
* re-summarizing the (already lossy) summary from scratch.
|
|
34
|
+
*/
|
|
35
|
+
const CUMULATIVE_SUMMARY_PROMPT = `The FIRST message below (tagged [prior-summary]) is an existing working-state summary produced by an earlier compaction. Treat it as the authoritative prior working state: MERGE AND UPDATE it with the newer messages that follow it, carrying forward all still-relevant detail. Do NOT discard or re-compress information from the prior summary just because it is older — only drop it if the newer messages explicitly supersede it.`;
|
|
36
|
+
|
|
37
|
+
/** Max chars of a subagent result text kept verbatim in the ledger digest. */
|
|
38
|
+
const SUBAGENT_DIGEST_CHARS = 500;
|
|
39
|
+
|
|
40
|
+
/** Heading used for the verbatim, model-proof subagent ledger block. */
|
|
41
|
+
const SUBAGENT_LEDGER_HEADING = "## Subagents";
|
|
42
|
+
|
|
29
43
|
export const resolveCompactionConfig = (
|
|
30
44
|
explicit?: Partial<CompactionConfig>,
|
|
31
45
|
): CompactionConfig => {
|
|
@@ -78,11 +92,48 @@ export const estimateTotalTokens = (
|
|
|
78
92
|
return tokens;
|
|
79
93
|
};
|
|
80
94
|
|
|
95
|
+
/**
|
|
96
|
+
* Whether an assistant message carries serialized tool_calls. Assistant
|
|
97
|
+
* tool-call turns serialize their content as a JSON string of the shape
|
|
98
|
+
* `{"text":...,"tool_calls":[...]}` (see the harness run loop). A plain-text
|
|
99
|
+
* assistant message returns false.
|
|
100
|
+
*/
|
|
101
|
+
const assistantHasToolCalls = (msg: Message): boolean => {
|
|
102
|
+
if (msg.role !== "assistant") return false;
|
|
103
|
+
if (typeof msg.content !== "string") return false;
|
|
104
|
+
if (!msg.content.includes('"tool_calls"')) return false;
|
|
105
|
+
try {
|
|
106
|
+
const parsed = JSON.parse(msg.content) as { tool_calls?: unknown };
|
|
107
|
+
return Array.isArray(parsed.tool_calls) && parsed.tool_calls.length > 0;
|
|
108
|
+
} catch {
|
|
109
|
+
return false;
|
|
110
|
+
}
|
|
111
|
+
};
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Whether splitting at `idx` would orphan a tool-call relationship on the
|
|
115
|
+
* COMPACTED side — i.e. the last compacted message (`messages[idx-1]`) is an
|
|
116
|
+
* assistant message with tool_calls whose answering `role:"tool"` result
|
|
117
|
+
* lives on the PRESERVED side (`messages[idx]`). Folding only the assistant
|
|
118
|
+
* call into the summary strands the tool_calls with no matching result.
|
|
119
|
+
*/
|
|
120
|
+
const splitOrphansToolCalls = (messages: Message[], idx: number): boolean => {
|
|
121
|
+
if (idx <= 0 || idx >= messages.length) return false;
|
|
122
|
+
const lastCompacted = messages[idx - 1]!;
|
|
123
|
+
return assistantHasToolCalls(lastCompacted);
|
|
124
|
+
};
|
|
125
|
+
|
|
81
126
|
/**
|
|
82
127
|
* Find the safe split index so that everything before it can be compacted
|
|
83
128
|
* and everything from it onward is preserved. The split always lands just
|
|
84
129
|
* before a `user` message to avoid breaking assistant+tool pairs.
|
|
85
130
|
*
|
|
131
|
+
* Defensive guard: even at a `user` boundary, refuse a split whose compacted
|
|
132
|
+
* side would END on an assistant message with unanswered tool_calls (its
|
|
133
|
+
* `tool` result having moved to the preserved side). Such a split would
|
|
134
|
+
* orphan the tool_calls inside the summary boundary. When that happens we
|
|
135
|
+
* walk earlier to the next safe `user` boundary.
|
|
136
|
+
*
|
|
86
137
|
* Returns -1 if no valid split point is found.
|
|
87
138
|
*/
|
|
88
139
|
export const findSafeSplitPoint = (
|
|
@@ -92,16 +143,17 @@ export const findSafeSplitPoint = (
|
|
|
92
143
|
const candidateIdx = messages.length - keepRecentMessages;
|
|
93
144
|
if (candidateIdx < MIN_COMPACTABLE_MESSAGES) return -1;
|
|
94
145
|
|
|
95
|
-
// Walk backwards from candidate to find a user message boundary
|
|
146
|
+
// Walk backwards from candidate to find a user message boundary that does
|
|
147
|
+
// not orphan a tool-call relationship on the compacted side.
|
|
96
148
|
for (let i = candidateIdx; i >= MIN_COMPACTABLE_MESSAGES; i--) {
|
|
97
|
-
if (messages[i]!.role === "user") {
|
|
149
|
+
if (messages[i]!.role === "user" && !splitOrphansToolCalls(messages, i)) {
|
|
98
150
|
return i;
|
|
99
151
|
}
|
|
100
152
|
}
|
|
101
153
|
|
|
102
|
-
// Walk forwards from candidate as fallback
|
|
154
|
+
// Walk forwards from candidate as fallback.
|
|
103
155
|
for (let i = candidateIdx + 1; i < messages.length - 1; i++) {
|
|
104
|
-
if (messages[i]!.role === "user") {
|
|
156
|
+
if (messages[i]!.role === "user" && !splitOrphansToolCalls(messages, i)) {
|
|
105
157
|
if (i < MIN_COMPACTABLE_MESSAGES) return -1;
|
|
106
158
|
return i;
|
|
107
159
|
}
|
|
@@ -110,25 +162,46 @@ export const findSafeSplitPoint = (
|
|
|
110
162
|
return -1;
|
|
111
163
|
};
|
|
112
164
|
|
|
165
|
+
/**
|
|
166
|
+
* Whether a message is itself a prior compaction summary.
|
|
167
|
+
*/
|
|
168
|
+
const isCompactionSummary = (msg: Message): boolean =>
|
|
169
|
+
msg.metadata?.isCompactionSummary === true;
|
|
170
|
+
|
|
113
171
|
/**
|
|
114
172
|
* Build the summarization messages for the generateText call.
|
|
173
|
+
*
|
|
174
|
+
* Cumulative behavior: when the FIRST compacted message is itself a prior
|
|
175
|
+
* compaction summary, it is passed in FULL (not truncated to
|
|
176
|
+
* SUMMARIZATION_MESSAGE_TRUNCATION_CHARS) and tagged `[prior-summary]`, and
|
|
177
|
+
* the prompt instructs the model to merge-and-update rather than
|
|
178
|
+
* re-summarize. All other messages keep the 1200-char truncation.
|
|
115
179
|
*/
|
|
116
180
|
const buildSummarizationMessages = (
|
|
117
181
|
messagesToCompact: Message[],
|
|
118
182
|
instructions?: string,
|
|
119
183
|
): Array<{ role: "user"; content: string }> => {
|
|
184
|
+
const hasPriorSummary =
|
|
185
|
+
messagesToCompact.length > 0 && isCompactionSummary(messagesToCompact[0]!);
|
|
186
|
+
|
|
120
187
|
const conversationLines: string[] = [];
|
|
121
|
-
for (
|
|
188
|
+
for (let i = 0; i < messagesToCompact.length; i++) {
|
|
189
|
+
const msg = messagesToCompact[i]!;
|
|
122
190
|
const text = getTextContent(msg);
|
|
123
|
-
const
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
191
|
+
const isPrior = i === 0 && hasPriorSummary;
|
|
192
|
+
// The prior summary is the working state we must not lose — pass it whole.
|
|
193
|
+
const rendered =
|
|
194
|
+
isPrior || text.length <= SUMMARIZATION_MESSAGE_TRUNCATION_CHARS
|
|
195
|
+
? text
|
|
196
|
+
: text.slice(0, SUMMARIZATION_MESSAGE_TRUNCATION_CHARS) +
|
|
197
|
+
"\n...[truncated]";
|
|
198
|
+
const tag = isPrior ? "prior-summary" : msg.role;
|
|
199
|
+
conversationLines.push(`[${tag}]: ${rendered}`);
|
|
127
200
|
}
|
|
128
201
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
202
|
+
let prompt = SUMMARIZATION_PROMPT;
|
|
203
|
+
if (hasPriorSummary) prompt = `${prompt}\n\n${CUMULATIVE_SUMMARY_PROMPT}`;
|
|
204
|
+
if (instructions) prompt = `${prompt}\n\nAdditional focus: ${instructions}`;
|
|
132
205
|
|
|
133
206
|
return [
|
|
134
207
|
{
|
|
@@ -138,6 +211,121 @@ const buildSummarizationMessages = (
|
|
|
138
211
|
];
|
|
139
212
|
};
|
|
140
213
|
|
|
214
|
+
interface SubagentLedgerEntry {
|
|
215
|
+
subagentId: string;
|
|
216
|
+
task: string;
|
|
217
|
+
status: string;
|
|
218
|
+
digest: string;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/** Match the header line of an injected subagent callback message. */
|
|
222
|
+
const SUBAGENT_RESULT_HEADER =
|
|
223
|
+
/^\[Subagent Result\] Subagent "([^"]*)" \(([^)]*)\) (\S+):/;
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Parse the metadata + text of a subagent-callback user message into a ledger
|
|
227
|
+
* entry. Returns null when the message is not a subagent callback.
|
|
228
|
+
*/
|
|
229
|
+
const parseSubagentCallback = (msg: Message): SubagentLedgerEntry | null => {
|
|
230
|
+
if (msg.role !== "user") return null;
|
|
231
|
+
const meta = (msg.metadata ?? {}) as Record<string, unknown>;
|
|
232
|
+
const text = getTextContent(msg);
|
|
233
|
+
const hasMetaFlag =
|
|
234
|
+
meta._subagentCallback === true || meta.subagentCallback === true;
|
|
235
|
+
const hasTextMarker = text.startsWith("[Subagent Result]");
|
|
236
|
+
if (!hasMetaFlag && !hasTextMarker) return null;
|
|
237
|
+
|
|
238
|
+
// Prefer structured metadata, fall back to parsing the header line.
|
|
239
|
+
const headerMatch = text.match(SUBAGENT_RESULT_HEADER);
|
|
240
|
+
const subagentId =
|
|
241
|
+
typeof meta.subagentId === "string" && meta.subagentId
|
|
242
|
+
? meta.subagentId
|
|
243
|
+
: headerMatch?.[2] ?? "";
|
|
244
|
+
if (!subagentId) return null;
|
|
245
|
+
const task =
|
|
246
|
+
typeof meta.task === "string" && meta.task
|
|
247
|
+
? meta.task
|
|
248
|
+
: headerMatch?.[1] ?? "";
|
|
249
|
+
const status = headerMatch?.[3] ?? "completed";
|
|
250
|
+
|
|
251
|
+
// Digest = the body after the header line (the result text), capped.
|
|
252
|
+
const bodyStart = text.indexOf("\n\n");
|
|
253
|
+
const body = bodyStart >= 0 ? text.slice(bodyStart + 2) : text;
|
|
254
|
+
const digest =
|
|
255
|
+
body.length > SUBAGENT_DIGEST_CHARS
|
|
256
|
+
? body.slice(0, SUBAGENT_DIGEST_CHARS) + "…"
|
|
257
|
+
: body;
|
|
258
|
+
|
|
259
|
+
return { subagentId, task, status, digest };
|
|
260
|
+
};
|
|
261
|
+
|
|
262
|
+
/**
|
|
263
|
+
* Parse a prior `## Subagents` ledger block out of an existing compaction
|
|
264
|
+
* summary's content so it can be carried forward cumulatively. The block is
|
|
265
|
+
* rendered by `renderSubagentLedger`, so we parse that same shape.
|
|
266
|
+
*/
|
|
267
|
+
const parsePriorLedger = (summaryText: string): SubagentLedgerEntry[] => {
|
|
268
|
+
const headingIdx = summaryText.indexOf(SUBAGENT_LEDGER_HEADING);
|
|
269
|
+
if (headingIdx < 0) return [];
|
|
270
|
+
const block = summaryText.slice(headingIdx + SUBAGENT_LEDGER_HEADING.length);
|
|
271
|
+
const entries: SubagentLedgerEntry[] = [];
|
|
272
|
+
// Each entry: a bullet line "- **<task>** (<id>) — <status>" then a digest
|
|
273
|
+
// line. We tolerate missing digest lines.
|
|
274
|
+
const entryRe =
|
|
275
|
+
/^- \*\*(.*?)\*\* \((.+?)\) — (\S+)\n {2}(.*)$/gm;
|
|
276
|
+
let m: RegExpExecArray | null;
|
|
277
|
+
while ((m = entryRe.exec(block)) !== null) {
|
|
278
|
+
entries.push({
|
|
279
|
+
task: m[1]!,
|
|
280
|
+
subagentId: m[2]!,
|
|
281
|
+
status: m[3]!,
|
|
282
|
+
digest: m[4]!,
|
|
283
|
+
});
|
|
284
|
+
}
|
|
285
|
+
return entries;
|
|
286
|
+
};
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* Scan the messages being compacted for subagent-callback records and any
|
|
290
|
+
* prior ledger embedded in a compaction summary, returning a combined,
|
|
291
|
+
* deduped (by subagentId, last-write-wins) list in first-seen order.
|
|
292
|
+
*/
|
|
293
|
+
const collectSubagentLedger = (
|
|
294
|
+
messagesToCompact: Message[],
|
|
295
|
+
): SubagentLedgerEntry[] => {
|
|
296
|
+
const byId = new Map<string, SubagentLedgerEntry>();
|
|
297
|
+
const order: string[] = [];
|
|
298
|
+
const upsert = (entry: SubagentLedgerEntry) => {
|
|
299
|
+
if (!byId.has(entry.subagentId)) order.push(entry.subagentId);
|
|
300
|
+
byId.set(entry.subagentId, entry);
|
|
301
|
+
};
|
|
302
|
+
|
|
303
|
+
for (const msg of messagesToCompact) {
|
|
304
|
+
if (isCompactionSummary(msg)) {
|
|
305
|
+
for (const prior of parsePriorLedger(getTextContent(msg))) upsert(prior);
|
|
306
|
+
continue;
|
|
307
|
+
}
|
|
308
|
+
const entry = parseSubagentCallback(msg);
|
|
309
|
+
if (entry) upsert(entry);
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
return order.map((id) => byId.get(id)!);
|
|
313
|
+
};
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* Render the subagent ledger as a verbatim markdown block appended to the
|
|
317
|
+
* summary AFTER the LLM text, so the model can never paraphrase it away.
|
|
318
|
+
* Returns "" when there are no subagents.
|
|
319
|
+
*/
|
|
320
|
+
const renderSubagentLedger = (entries: SubagentLedgerEntry[]): string => {
|
|
321
|
+
if (entries.length === 0) return "";
|
|
322
|
+
const lines = entries.map(
|
|
323
|
+
(e) =>
|
|
324
|
+
`- **${e.task}** (${e.subagentId}) — ${e.status}\n ${e.digest.replace(/\n/g, " ")}`,
|
|
325
|
+
);
|
|
326
|
+
return `${SUBAGENT_LEDGER_HEADING}\n${lines.join("\n")}`;
|
|
327
|
+
};
|
|
328
|
+
|
|
141
329
|
/**
|
|
142
330
|
* Build the continuation message that replaces compacted messages.
|
|
143
331
|
*/
|
|
@@ -217,7 +405,12 @@ export const compactMessages = async (
|
|
|
217
405
|
};
|
|
218
406
|
}
|
|
219
407
|
|
|
220
|
-
|
|
408
|
+
// Append the subagent ledger AFTER the LLM summary, verbatim, so the
|
|
409
|
+
// model's paraphrasing can never drop or truncate subagent results.
|
|
410
|
+
const ledger = renderSubagentLedger(collectSubagentLedger(toCompact));
|
|
411
|
+
const summaryWithLedger = ledger ? `${summary}\n\n${ledger}` : summary;
|
|
412
|
+
|
|
413
|
+
const continuationMessage = buildContinuationMessage(summaryWithLedger);
|
|
221
414
|
const compactedMessages = [continuationMessage, ...toPreserve];
|
|
222
415
|
|
|
223
416
|
return {
|
package/src/index.ts
CHANGED
|
@@ -21,6 +21,24 @@ export * from "./telemetry.js";
|
|
|
21
21
|
export * from "./secrets-store.js";
|
|
22
22
|
export * from "./storage/index.js";
|
|
23
23
|
export * from "./storage/store-adapters.js";
|
|
24
|
+
// Append-only conversation entries (Phase 3 substrate): types + rebuild fns.
|
|
25
|
+
// appendEntries/readEntries are reachable on the ConversationStore /
|
|
26
|
+
// StorageEngine.conversations surfaces already exported above.
|
|
27
|
+
export {
|
|
28
|
+
buildLlmContext,
|
|
29
|
+
buildDisplaySnapshot,
|
|
30
|
+
getPendingSubagentResults,
|
|
31
|
+
type ConversationEntry,
|
|
32
|
+
type NewConversationEntry,
|
|
33
|
+
type UserMessageEntry,
|
|
34
|
+
type AssistantMessageEntry,
|
|
35
|
+
type AssistantAmendmentEntry,
|
|
36
|
+
type HarnessMessageEntry,
|
|
37
|
+
type CompactionEntry,
|
|
38
|
+
type SubagentResultEntry,
|
|
39
|
+
type CallbackStartedEntry,
|
|
40
|
+
type DisplaySnapshot,
|
|
41
|
+
} from "./storage/entries.js";
|
|
24
42
|
export {
|
|
25
43
|
PonchoFsAdapter,
|
|
26
44
|
type VirtualMount,
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
// ---------------------------------------------------------------------------
|
|
2
|
+
// Phase 3b — dual-write + parity checker (instrumentation only)
|
|
3
|
+
//
|
|
4
|
+
// At each conversation WRITE site we ALSO append the corresponding
|
|
5
|
+
// append-only `ConversationEntry`s alongside the existing mutable-blob write.
|
|
6
|
+
// READ paths are untouched: nothing consumes these entries yet, so a bug here
|
|
7
|
+
// can only mislog — it cannot corrupt behavior. The blob remains the source of
|
|
8
|
+
// truth until the read-cutover PR (3c).
|
|
9
|
+
//
|
|
10
|
+
// Two public surfaces:
|
|
11
|
+
// - `appendEntriesSafe(...)` — fire-and-forget wrapper that swallows every
|
|
12
|
+
// error (so a dual-write failure never breaks a live turn) and stamps a
|
|
13
|
+
// uuid `id` on each entry (the engine inserts `entry.id` as a column).
|
|
14
|
+
// - `verifyEntriesParity(...)` — gated on `PONCHO_VERIFY_ENTRIES === "1"`,
|
|
15
|
+
// rebuilds LLM context + display snapshot from the entry log and diffs
|
|
16
|
+
// them against the blob's `_harnessMessages` / `messages`. Logs mismatches
|
|
17
|
+
// under `[entries-parity]`. Never throws.
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
|
|
20
|
+
import { randomUUID } from "node:crypto";
|
|
21
|
+
import { getTextContent, type Message } from "@poncho-ai/sdk";
|
|
22
|
+
import type { Logger } from "@poncho-ai/sdk";
|
|
23
|
+
import type { Conversation, ConversationStore, PendingSubagentResult } from "../state.js";
|
|
24
|
+
import {
|
|
25
|
+
buildDisplaySnapshot,
|
|
26
|
+
buildLlmContext,
|
|
27
|
+
type ConversationEntry,
|
|
28
|
+
type NewConversationEntry,
|
|
29
|
+
} from "../storage/entries.js";
|
|
30
|
+
|
|
31
|
+
/** True when dual-write parity verification is opted in via env. */
|
|
32
|
+
export const entriesParityEnabled = (): boolean =>
|
|
33
|
+
process.env.PONCHO_VERIFY_ENTRIES === "1";
|
|
34
|
+
|
|
35
|
+
// DISTRIBUTIVE omit (same reasoning as NewConversationEntry in entries.ts): a
|
|
36
|
+
// plain Omit<NewConversationEntry, "id"> over a union collapses to the keys
|
|
37
|
+
// common to every member, dropping `message`/`result`/etc. Distribute over the
|
|
38
|
+
// union so each member keeps its own discriminant fields.
|
|
39
|
+
type NewEntryNoId = NewConversationEntry extends infer T
|
|
40
|
+
? T extends NewConversationEntry
|
|
41
|
+
? Omit<T, "id">
|
|
42
|
+
: never
|
|
43
|
+
: never;
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Append entries to the conversation's append-only log, mirroring an existing
|
|
47
|
+
* blob write. Best-effort and non-blocking by contract:
|
|
48
|
+
* - stamps a fresh uuid `id` on each entry (required input column),
|
|
49
|
+
* - never throws (logs and returns [] on failure),
|
|
50
|
+
* - is safe to `void` (callers needn't await).
|
|
51
|
+
*
|
|
52
|
+
* Returns the stored entries (with seq/createdAt) for callers that want them
|
|
53
|
+
* (e.g. to learn the assistant entry's id for a later amendment), or [] on
|
|
54
|
+
* empty input / failure.
|
|
55
|
+
*/
|
|
56
|
+
export const appendEntriesSafe = async (
|
|
57
|
+
store: ConversationStore,
|
|
58
|
+
conversation: Pick<Conversation, "conversationId" | "ownerId" | "tenantId">,
|
|
59
|
+
entries: NewEntryNoId[],
|
|
60
|
+
log: Logger,
|
|
61
|
+
): Promise<ConversationEntry[]> => {
|
|
62
|
+
if (entries.length === 0) return [];
|
|
63
|
+
try {
|
|
64
|
+
const withIds = entries.map(
|
|
65
|
+
(e) => ({ id: randomUUID(), ...e }) as NewConversationEntry,
|
|
66
|
+
);
|
|
67
|
+
return await store.appendEntries(
|
|
68
|
+
conversation.conversationId,
|
|
69
|
+
conversation.ownerId,
|
|
70
|
+
conversation.tenantId ?? null,
|
|
71
|
+
withIds,
|
|
72
|
+
);
|
|
73
|
+
} catch (err) {
|
|
74
|
+
log.error(
|
|
75
|
+
`[entries-dual-write] append failed for ${conversation.conversationId}: ${
|
|
76
|
+
err instanceof Error ? err.message : String(err)
|
|
77
|
+
}`,
|
|
78
|
+
);
|
|
79
|
+
return [];
|
|
80
|
+
}
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
// --- entry builders (pure; centralize the best-effort derivation) ----------
|
|
84
|
+
|
|
85
|
+
export const userMessageEntry = (
|
|
86
|
+
message: Message,
|
|
87
|
+
turnId: string,
|
|
88
|
+
opts?: { hidden?: boolean },
|
|
89
|
+
): NewEntryNoId => ({
|
|
90
|
+
type: "user_message",
|
|
91
|
+
message,
|
|
92
|
+
turnId,
|
|
93
|
+
...(opts?.hidden ? { hidden: true } : {}),
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
export const assistantMessageEntry = (
|
|
97
|
+
message: Message,
|
|
98
|
+
turnId: string,
|
|
99
|
+
runId: string,
|
|
100
|
+
): NewEntryNoId => ({
|
|
101
|
+
type: "assistant_message",
|
|
102
|
+
message,
|
|
103
|
+
turnId,
|
|
104
|
+
runId,
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
export const harnessMessageEntries = (
|
|
108
|
+
messages: Message[],
|
|
109
|
+
turnId: string,
|
|
110
|
+
): NewEntryNoId[] =>
|
|
111
|
+
messages.map((message) => ({ type: "harness_message", message, turnId }));
|
|
112
|
+
|
|
113
|
+
export const compactionEntry = (
|
|
114
|
+
summaryMessage: Message,
|
|
115
|
+
firstKeptSeq: number,
|
|
116
|
+
opts?: { tokensBefore?: number; tokensAfter?: number },
|
|
117
|
+
): NewEntryNoId => ({
|
|
118
|
+
type: "compaction",
|
|
119
|
+
summaryMessage,
|
|
120
|
+
firstKeptSeq,
|
|
121
|
+
...(opts?.tokensBefore !== undefined ? { tokensBefore: opts.tokensBefore } : {}),
|
|
122
|
+
...(opts?.tokensAfter !== undefined ? { tokensAfter: opts.tokensAfter } : {}),
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
export const subagentResultEntry = (
|
|
126
|
+
result: PendingSubagentResult,
|
|
127
|
+
): NewEntryNoId => ({ type: "subagent_result", result });
|
|
128
|
+
|
|
129
|
+
export const callbackStartedEntry = (consumedSeqs: number[]): NewEntryNoId => ({
|
|
130
|
+
type: "callback_started",
|
|
131
|
+
consumedSeqs,
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
export const assistantAmendmentEntry = (
|
|
135
|
+
targetEntryId: string,
|
|
136
|
+
appendText: string,
|
|
137
|
+
): NewEntryNoId => ({
|
|
138
|
+
type: "assistant_amendment",
|
|
139
|
+
targetEntryId,
|
|
140
|
+
...(appendText ? { appendText } : {}),
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
// --- "new harness messages this turn" diff ---------------------------------
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* The harness messages added during the just-finished turn — i.e. the suffix
|
|
147
|
+
* of the new `_harnessMessages` array beyond what was there before the turn.
|
|
148
|
+
*
|
|
149
|
+
* BEST-EFFORT: the blob replaces `_harnessMessages` wholesale (it's not an
|
|
150
|
+
* append log), so we recover "what's new" by length-diffing prev vs next.
|
|
151
|
+
* When a compaction collapsed history this turn, `next` can be SHORTER than
|
|
152
|
+
* `prev`; in that case there's no clean suffix and we return the whole `next`
|
|
153
|
+
* so the entry log still ends up with the model-visible context (parity will
|
|
154
|
+
* flag the over-count for review). The compaction entry (appended separately)
|
|
155
|
+
* is what makes rebuild correct in that case.
|
|
156
|
+
*/
|
|
157
|
+
export const newHarnessMessagesThisTurn = (
|
|
158
|
+
prev: Message[] | undefined,
|
|
159
|
+
next: Message[] | undefined,
|
|
160
|
+
): { messages: Message[]; approximate: boolean } => {
|
|
161
|
+
const prevArr = prev ?? [];
|
|
162
|
+
const nextArr = next ?? [];
|
|
163
|
+
if (nextArr.length === 0) return { messages: [], approximate: false };
|
|
164
|
+
if (prevArr.length === 0) return { messages: nextArr, approximate: false };
|
|
165
|
+
if (nextArr.length >= prevArr.length) {
|
|
166
|
+
// Assume the new array is prev + appended suffix (the common case).
|
|
167
|
+
return { messages: nextArr.slice(prevArr.length), approximate: false };
|
|
168
|
+
}
|
|
169
|
+
// next shorter than prev — compaction or a rebuild reshaped the array.
|
|
170
|
+
return { messages: nextArr, approximate: true };
|
|
171
|
+
};
|
|
172
|
+
|
|
173
|
+
// --- parity checker ---------------------------------------------------------
|
|
174
|
+
|
|
175
|
+
/** Normalized text projection for length-insensitive content comparison. */
|
|
176
|
+
const projectText = (m: Message): string => {
|
|
177
|
+
const role = m.role;
|
|
178
|
+
const text = getTextContent(m).replace(/\s+/g, " ").trim();
|
|
179
|
+
return `${role}:${text}`;
|
|
180
|
+
};
|
|
181
|
+
|
|
182
|
+
const projectAll = (msgs: Message[]): string[] => msgs.map(projectText);
|
|
183
|
+
|
|
184
|
+
const countMismatch = (label: string, a: number, b: number): string | null =>
|
|
185
|
+
a === b ? null : `${label} length ${a} (entries) vs ${b} (blob)`;
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Rebuild LLM context + display snapshot from the entry log and diff against
|
|
189
|
+
* the blob. Logs under `[entries-parity]` with the conversationId. Never
|
|
190
|
+
* throws. No-op unless PONCHO_VERIFY_ENTRIES === "1".
|
|
191
|
+
*/
|
|
192
|
+
export const verifyEntriesParity = async (
|
|
193
|
+
store: ConversationStore,
|
|
194
|
+
conversationId: string,
|
|
195
|
+
blob: { harnessMessages?: Message[]; displayMessages?: Message[] },
|
|
196
|
+
log: Logger,
|
|
197
|
+
): Promise<void> => {
|
|
198
|
+
if (!entriesParityEnabled()) return;
|
|
199
|
+
try {
|
|
200
|
+
const entries = await store.readEntries(conversationId);
|
|
201
|
+
const mismatches: string[] = [];
|
|
202
|
+
|
|
203
|
+
if (blob.harnessMessages) {
|
|
204
|
+
const llm = buildLlmContext(entries);
|
|
205
|
+
const lenMismatch = countMismatch(
|
|
206
|
+
"llmContext",
|
|
207
|
+
llm.length,
|
|
208
|
+
blob.harnessMessages.length,
|
|
209
|
+
);
|
|
210
|
+
if (lenMismatch) mismatches.push(lenMismatch);
|
|
211
|
+
// Compare a trailing normalized text projection. We don't require
|
|
212
|
+
// byte-equality — metadata, tool-call framing, and exact whitespace
|
|
213
|
+
// differ by construction between the two representations.
|
|
214
|
+
const entriesProj = projectAll(llm);
|
|
215
|
+
const blobProj = projectAll(blob.harnessMessages);
|
|
216
|
+
const tail = Math.min(entriesProj.length, blobProj.length, 5);
|
|
217
|
+
for (let i = 1; i <= tail; i++) {
|
|
218
|
+
const ep = entriesProj[entriesProj.length - i];
|
|
219
|
+
const bp = blobProj[blobProj.length - i];
|
|
220
|
+
if (ep !== bp) {
|
|
221
|
+
mismatches.push(
|
|
222
|
+
`llmContext tail[-${i}] differs: entries=${JSON.stringify(ep).slice(0, 120)} blob=${JSON.stringify(bp).slice(0, 120)}`,
|
|
223
|
+
);
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
if (blob.displayMessages) {
|
|
229
|
+
// tailN large enough to cover the whole transcript for the diff.
|
|
230
|
+
const snap = buildDisplaySnapshot(entries, Number.MAX_SAFE_INTEGER);
|
|
231
|
+
const lenMismatch = countMismatch(
|
|
232
|
+
"display",
|
|
233
|
+
snap.totalMessages,
|
|
234
|
+
blob.displayMessages.length,
|
|
235
|
+
);
|
|
236
|
+
if (lenMismatch) mismatches.push(lenMismatch);
|
|
237
|
+
const entriesProj = projectAll(snap.messages);
|
|
238
|
+
const blobProj = projectAll(blob.displayMessages);
|
|
239
|
+
const tail = Math.min(entriesProj.length, blobProj.length, 5);
|
|
240
|
+
for (let i = 1; i <= tail; i++) {
|
|
241
|
+
const ep = entriesProj[entriesProj.length - i];
|
|
242
|
+
const bp = blobProj[blobProj.length - i];
|
|
243
|
+
if (ep !== bp) {
|
|
244
|
+
mismatches.push(
|
|
245
|
+
`display tail[-${i}] differs: entries=${JSON.stringify(ep).slice(0, 120)} blob=${JSON.stringify(bp).slice(0, 120)}`,
|
|
246
|
+
);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
if (mismatches.length > 0) {
|
|
252
|
+
log.warn(
|
|
253
|
+
`[entries-parity] ${conversationId} MISMATCH (${mismatches.length}): ${mismatches.join(" | ")}`,
|
|
254
|
+
);
|
|
255
|
+
} else {
|
|
256
|
+
log.info(`[entries-parity] ${conversationId} OK`);
|
|
257
|
+
}
|
|
258
|
+
} catch (err) {
|
|
259
|
+
log.error(
|
|
260
|
+
`[entries-parity] ${conversationId} checker threw (ignored): ${
|
|
261
|
+
err instanceof Error ? err.message : String(err)
|
|
262
|
+
}`,
|
|
263
|
+
);
|
|
264
|
+
}
|
|
265
|
+
};
|
|
@@ -61,3 +61,10 @@ export {
|
|
|
61
61
|
type RunConversationTurnOpts,
|
|
62
62
|
type RunConversationTurnResult,
|
|
63
63
|
} from "./run-conversation-turn.js";
|
|
64
|
+
|
|
65
|
+
export {
|
|
66
|
+
appendEntriesSafe,
|
|
67
|
+
verifyEntriesParity,
|
|
68
|
+
entriesParityEnabled,
|
|
69
|
+
newHarnessMessagesThisTurn,
|
|
70
|
+
} from "./entries-dual-write.js";
|