@booplex/bpx-consult 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,173 @@
1
+ /**
2
+ * consensus — stance validation + confidence scoring.
3
+ *
4
+ * The two mechanics that give "no fake consensus" actual teeth, both lifted
5
+ * from my-zen's tools/consensus.py:
6
+ *
7
+ * - stance validation: did a member actually hold its assigned stance, or did
8
+ * it return mush? A `critic` that agrees with everything is theater.
9
+ * - confidence score: 0.4·success + 0.35·agreement + 0.25·avg_alignment,
10
+ * surfaced with the synthesis so you can see how solid the read is.
11
+ *
12
+ * Cheap and heuristic by design — these are signals, not verdicts. The
13
+ * synthesizer model still does the real merging; we just give it (and you)
14
+ * honest metadata about what the members actually did.
15
+ */
16
+
17
+ import type { Stance } from "./personas.js";
18
+
19
+ // ---------------------------------------------------------------------------
20
+ // Member result shape
21
+ // ---------------------------------------------------------------------------
22
+
23
+ export interface MemberResult {
24
+ persona: string;
25
+ stance: Stance;
26
+ model: string;
27
+ /** "ok" if the member replied with usable text, "error" otherwise. */
28
+ status: "ok" | "error";
29
+ text: string;
30
+ errorMessage?: string;
31
+ /** 0..1 — did the reply actually reflect the assigned stance? */
32
+ alignment: number;
33
+ usage?: { input: number; output: number; total: number };
34
+ }
35
+
36
+ // ---------------------------------------------------------------------------
37
+ // Stance validation
38
+ // ---------------------------------------------------------------------------
39
+
40
+ /**
41
+ * Heuristic stance-alignment check. Returns a 0..1 score indicating how well
42
+ * the reply reflects the assigned stance.
43
+ *
44
+ * Not an LLM judge (my-zen has one via _validate_stance_with_llm, but that's
45
+ * another model call per member — too expensive for v1). Keyword + signal
46
+ * based:
47
+ * - `against`: looks for critique signals (flaw, risk, won't, breaks, wrong,
48
+ * assumption, failure). A reply with none of them from a critic is suspect.
49
+ * - `for`: looks for advocacy signals (sound, works, solid, agree, good).
50
+ * - `neutral`: neutral by construction — alignment is whether it weighed both
51
+ * sides (any of either signal set counts).
52
+ *
53
+ * Deliberately permissive on the high end (1.0) and strict on the low end:
54
+ * a low score flags "this member didn't do its job," which is the only signal
55
+ * that actually matters. Borderline cases default to 0.6 (trust the reply).
56
+ */
57
+ export function validateStance(text: string, stance: Stance): number {
58
+ const t = text.toLowerCase();
59
+ if (!t.trim()) return 0;
60
+
61
+ const critiqueSignals = [
62
+ "flaw", "risk", "won't", "won’t", "breaks", "broken", "wrong", "assumption",
63
+ "failure", "fail", "missing", "edge case", "fragile", "beware", "problem",
64
+ "issue", "concern", "gap", "unclear", "danger", "unhandled",
65
+ ];
66
+ const advocacySignals = [
67
+ "sound", "works", "solid", "agree", "good", "correct", "appropriate",
68
+ "reasonable", "holds up", "make sense", "fits", "right call",
69
+ ];
70
+
71
+ const hasCritique = critiqueSignals.some((s) => t.includes(s));
72
+ const hasAdvocacy = advocacySignals.some((s) => t.includes(s));
73
+
74
+ if (stance === "against") {
75
+ if (hasCritique) return 1;
76
+ if (hasAdvocacy && !hasCritique) return 0.3; // critic that only praised — suspect
77
+ return 0.6;
78
+ }
79
+ if (stance === "for") {
80
+ if (hasAdvocacy) return 1;
81
+ if (hasCritique && !hasAdvocacy) return 0.5; // advocate that only attacked — forgot its job
82
+ return 0.6;
83
+ }
84
+ // neutral: weighing both sides is the win
85
+ if (hasCritique && hasAdvocacy) return 1;
86
+ if (hasCritique || hasAdvocacy) return 0.7;
87
+ return 0.6;
88
+ }
89
+
90
+ // ---------------------------------------------------------------------------
91
+ // Confidence score (my-zen formula)
92
+ // ---------------------------------------------------------------------------
93
+
94
+ export interface ConfidenceBreakdown {
95
+ successRatio: number;
96
+ agreementRatio: number;
97
+ avgAlignment: number;
98
+ /** 0..1 weighted: 0.4·success + 0.35·agreement + 0.25·avgAlignment */
99
+ confidence: number;
100
+ }
101
+
102
+ /**
103
+ * Compute the consensus confidence over member results.
104
+ *
105
+ * - successRatio: fraction of members that returned usable text.
106
+ * - agreementRatio: the largest stance group / successful members. High when
107
+ * most members land on the same stance regardless of persona.
108
+ * - avgAlignment: mean of per-member stance-alignment scores.
109
+ *
110
+ * A council where the `critic` secretly agreed, the `architect` attacked, and
111
+ * the `simplifier` said nothing useful will score low on agreement and
112
+ * alignment — which is the honest signal, even though the stances differ by
113
+ * design. The synthesizer explains the substance; this just flags the shape.
114
+ */
115
+ export function computeConfidence(results: MemberResult[]): ConfidenceBreakdown {
116
+ const total = results.length;
117
+ if (total === 0) {
118
+ return { successRatio: 0, agreementRatio: 0, avgAlignment: 0, confidence: 0 };
119
+ }
120
+
121
+ const successful = results.filter((r) => r.status === "ok");
122
+ const successCount = successful.length;
123
+ if (successCount === 0) {
124
+ return { successRatio: 0, agreementRatio: 0, avgAlignment: 0, confidence: 0 };
125
+ }
126
+
127
+ const successRatio = successCount / total;
128
+
129
+ const stanceCounts: Record<Stance, number> = { for: 0, against: 0, neutral: 0 };
130
+ for (const r of successful) stanceCounts[r.stance]++;
131
+ const maxStanceCount = Math.max(...Object.values(stanceCounts));
132
+ const agreementRatio = maxStanceCount / successCount;
133
+
134
+ const alignmentScores = successful.map((r) => r.alignment);
135
+ const avgAlignment = alignmentScores.reduce((a, b) => a + b, 0) / alignmentScores.length;
136
+
137
+ const confidence = round2(successRatio * 0.4 + agreementRatio * 0.35 + avgAlignment * 0.25);
138
+
139
+ return {
140
+ successRatio: round2(successRatio),
141
+ agreementRatio: round2(agreementRatio),
142
+ avgAlignment: round2(avgAlignment),
143
+ confidence,
144
+ };
145
+ }
146
+
147
+ function round2(n: number): number {
148
+ return Math.round(n * 100) / 100;
149
+ }
150
+
151
+ // ---------------------------------------------------------------------------
152
+ // Disagreement detection — surface it, don't paper over it
153
+ // ---------------------------------------------------------------------------
154
+
155
+ /**
156
+ * Detect whether the council materially disagreed. Returns a short human-readable
157
+ * note for the synthesizer prompt, or undefined if members were broadly aligned.
158
+ *
159
+ * "Material disagreement" = at least one `for` and one `against` member both
160
+ * held their stance (alignment ≥ 0.7). Neutral members don't count toward the
161
+ * split — they're expected to weigh both sides.
162
+ */
163
+ export function detectDisagreement(results: MemberResult[]): string | undefined {
164
+ const heldFor = results.filter((r) => r.stance === "for" && r.alignment >= 0.7 && r.status === "ok");
165
+ const heldAgainst = results.filter((r) => r.stance === "against" && r.alignment >= 0.7 && r.status === "ok");
166
+
167
+ if (heldFor.length > 0 && heldAgainst.length > 0) {
168
+ return `The council split: ${heldFor.map((r) => r.persona).join(", ")} advocated FOR; ${heldAgainst
169
+ .map((r) => r.persona)
170
+ .join(", ")} pushed BACK. Surface this disagreement honestly in your synthesis — do not manufacture a false consensus.`;
171
+ }
172
+ return undefined;
173
+ }
@@ -0,0 +1,395 @@
1
+ /**
2
+ * context-engine — the §P fix.
3
+ *
4
+ * rpiv-advisor forwards Pi's already-compacted session context to the advisor
5
+ * model without re-fitting it to the advisor's *own* window. The executor's
6
+ * compacted context can be larger than a small-window advisor (flash-tier 32k,
7
+ * or a CLI at 32–64k), so the advisor call overflows *its* window and dies —
8
+ * exactly when the session is long enough to need it.
9
+ *
10
+ * This module re-fits Pi's compacted context to whatever window *this* advisor
11
+ * has. The pipeline (SPEC §C):
12
+ * 1. strip in-flight consult() call (lifted from rpiv-advisor/context.ts)
13
+ * 2. extract user/assistant/tool text (lifted from pi-advisor/advisor-messages.ts)
14
+ * 3. [fast-follow] stage + signal detection — not here yet, doesn't affect fit
15
+ * 4. [fast-follow] signal block
16
+ * 5. per-message char caps with [omitted] markers (pi-advisor clampText)
17
+ * 6. sliding window: keep first N + last M, drop oldest-first when still over
18
+ * 7. reserve response tokens (load-bearing — see invariant below)
19
+ * 8. assemble final Message[] + a closing context message
20
+ *
21
+ * Window-fit does NOT depend on stage/signal detection. Those improve the
22
+ * directive, not the fit. Ship the guaranteed-fit core first.
23
+ */
24
+
25
+ import type { Message, UserMessage, AssistantMessage, ToolResultMessage, TextContent } from "@earendil-works/pi-ai";
26
+
27
+ // ---------------------------------------------------------------------------
28
+ // Public types
29
+ // ---------------------------------------------------------------------------
30
+
31
+ export interface ContextBudget {
32
+ userChars: number;
33
+ assistantChars: number;
34
+ toolArgChars: number;
35
+ toolResultChars: number;
36
+ keepFirst: number;
37
+ keepLast: number;
38
+ /** Tokens reserved for the advisor's reply. The input budget is window minus this. */
39
+ responseReserveTokens: number;
40
+ }
41
+
42
+ export interface FitResult {
43
+ /** The re-fitted messages, guaranteed to fit `maxInputTokens`. */
44
+ messages: Message[];
45
+ /** How many messages were dropped by the sliding window, if any. */
46
+ omittedCount: number;
47
+ /** Estimated tokens of the final payload, for diagnostics. */
48
+ estimatedTokens: number;
49
+ /** The token budget the fit was computed against. */
50
+ maxInputTokens: number;
51
+ }
52
+
53
+ // ---------------------------------------------------------------------------
54
+ // Token estimation
55
+ // ---------------------------------------------------------------------------
56
+
57
+ /**
58
+ * Rough token estimate. The ratio is deliberately conservative.
59
+ *
60
+ * The often-cited 4 chars/token is the tiktoken average for English prose.
61
+ * But the bulk of what we forward — tool arguments, tool results, code —
62
+ * tokenizes DENSER (closer to 3-3.5 chars/token). Using 4 would UNDERESTIMATE
63
+ * tokens on code-heavy sessions, causing us to pack more than fits and
64
+ * overflow the advisor window — which reopens the exact §P bug we exist to fix.
65
+ *
66
+ * So we use 3 chars/token (overestimates tokens for prose, the safe direction)
67
+ * and apply a 1.15 safety factor on top for provider-tokenizer variance.
68
+ * Under-packing is cheap; overflow defeats the whole point of this module.
69
+ *
70
+ * No real tokenizer (tiktoken etc.) because (a) heavy native dep for an
71
+ * estimate, (b) every provider tokenizes differently, (c) the heuristic only
72
+ * needs to be conservative enough that the cap+window pass lands under budget
73
+ * with margin.
74
+ */
75
+ const CHARS_PER_TOKEN = 3;
76
+ const SAFETY_FACTOR = 1.15;
77
+
78
+ export function estimateTokens(text: string): number {
79
+ if (!text) return 0;
80
+ return Math.ceil((text.length / CHARS_PER_TOKEN) * SAFETY_FACTOR);
81
+ }
82
+
83
+ /** Sum tokens across every text-bearing field of a message. */
84
+ export function estimateMessageTokens(msg: Message): number {
85
+ return estimateTokens(stringifyMessageForEstimate(msg));
86
+ }
87
+
88
+ /**
89
+ * Flatten a message to a single string for token estimation. Only counts text
90
+ * we will actually forward — image blocks are intentionally excluded (we strip
91
+ * them in extract anyway; advisors don't need screenshots).
92
+ */
93
+ function stringifyMessageForEstimate(msg: Message): string {
94
+ if (msg.role === "user") {
95
+ return typeof msg.content === "string" ? msg.content : textBlocks(msg.content).map((b) => b.text).join("\n");
96
+ }
97
+ if (msg.role === "assistant") {
98
+ return msg.content
99
+ .map((b) => {
100
+ if (b.type === "text") return b.text;
101
+ if (b.type === "toolCall") return JSON.stringify(b.arguments ?? {});
102
+ if (b.type === "thinking") return b.thinking ?? "";
103
+ return "";
104
+ })
105
+ .join("\n");
106
+ }
107
+ // toolResult
108
+ return textBlocks(msg.content).map((b) => b.text).join("\n");
109
+ }
110
+
111
+ // ---------------------------------------------------------------------------
112
+ // Step 1 — strip in-flight consult() call (faithful fork of rpiv-advisor)
113
+ // ---------------------------------------------------------------------------
114
+
115
+ export const CONSULT_TOOL_NAME = "consult";
116
+
117
+ /**
118
+ * Remove the executor's in-flight consult() toolCall from the tail assistant
119
+ * message. That call is what invoked us — there is no matching toolResult yet,
120
+ * and providers reject payloads with orphan toolCalls. Name-targeted so other
121
+ * trailing toolCalls stay visible.
122
+ *
123
+ * Lifted from rpiv-advisor/advisor/context.ts:stripInflightAdvisorCall, renamed
124
+ * to the consult tool name.
125
+ */
126
+ export function stripInflightConsultCall(messages: Message[]): Message[] {
127
+ if (messages.length === 0) return messages;
128
+ const last = messages[messages.length - 1];
129
+ if (last.role !== "assistant") return messages;
130
+ const filtered = last.content.filter((c) => !(c.type === "toolCall" && c.name === CONSULT_TOOL_NAME));
131
+ if (filtered.length === last.content.length) return messages;
132
+ if (filtered.length === 0) return messages.slice(0, -1);
133
+ return [...messages.slice(0, -1), { ...last, content: filtered }];
134
+ }
135
+
136
+ // ---------------------------------------------------------------------------
137
+ // Step 5 — per-message char caps (fork of pi-advisor clampText)
138
+ // ---------------------------------------------------------------------------
139
+
140
+ /**
141
+ * Clamp text to a char budget with an explicit marker. Lifted from
142
+ * pi-advisor/advisor-messages.ts:clampText, simplified (we cap by chars, not
143
+ * lines — the line cap was a belt-and-braces second constraint that adds noise
144
+ * here). Marks truncation explicitly so the advisor sees content was cut.
145
+ */
146
+ export function clampText(text: string, maxChars: number): string {
147
+ const normalized = text.trim();
148
+ if (normalized.length <= maxChars) return normalized;
149
+ return `${normalized.slice(0, maxChars).trimEnd()}…\n[truncated for advisor context]`;
150
+ }
151
+
152
+ function clampUserMessage(msg: UserMessage, budget: ContextBudget): UserMessage {
153
+ if (typeof msg.content === "string") {
154
+ return { ...msg, content: clampText(msg.content, budget.userChars) };
155
+ }
156
+ const capped: TextContent[] = textBlocks(msg.content).map((b) => ({ type: "text", text: clampText(b.text, budget.userChars) }));
157
+ return { ...msg, content: capped };
158
+ }
159
+
160
+ function clampAssistantMessage(msg: AssistantMessage, budget: ContextBudget): AssistantMessage {
161
+ // Keep text + toolCalls + thinking, but cap each text block and each toolCall's
162
+ // serialized arguments. ToolCalls themselves are structurally important (the
163
+ // advisor needs to see what was attempted), so we keep the call but trim
164
+ // oversized args rather than dropping the whole call.
165
+ const content = msg.content.map((b) => {
166
+ if (b.type === "text") return { ...b, text: clampText(b.text, budget.assistantChars) };
167
+ if (b.type === "toolCall") {
168
+ const argsJson = JSON.stringify(b.arguments ?? {});
169
+ if (argsJson.length <= budget.toolArgChars) return b;
170
+ return { ...b, arguments: { _truncated: clampText(argsJson, budget.toolArgChars) } };
171
+ }
172
+ return b; // thinking blocks passed through
173
+ });
174
+ return { ...msg, content };
175
+ }
176
+
177
+ function clampToolResultMessage(msg: ToolResultMessage, budget: ContextBudget): ToolResultMessage {
178
+ const content: TextContent[] = textBlocks(msg.content).map((b) => ({ type: "text", text: clampText(b.text, budget.toolResultChars) }));
179
+ return { ...msg, content };
180
+ }
181
+
182
+ /** Apply per-message char caps. Non-mutating. */
183
+ export function applyCharCaps(messages: Message[], budget: ContextBudget): Message[] {
184
+ return messages.map((msg) => {
185
+ if (msg.role === "user") return clampUserMessage(msg, budget);
186
+ if (msg.role === "assistant") return clampAssistantMessage(msg, budget);
187
+ return clampToolResultMessage(msg, budget);
188
+ });
189
+ }
190
+
191
+ // ---------------------------------------------------------------------------
192
+ // Step 6 — sliding window (first-N + last-M, oldest-first drop)
193
+ // ---------------------------------------------------------------------------
194
+
195
+ /**
196
+ * Drop messages from the middle of the transcript until we're under the token
197
+ * budget. Keeps the first `keepFirst` (task framing) and the last `keepLast`
198
+ * (freshest evidence), inserting an [omitted] marker between them. If still
199
+ * over after one pass, shrink keepLast one message at a time until it fits.
200
+ *
201
+ * Faithful to pi-advisor's first-2 + last-N-with-omitted-marker shape, but the
202
+ * *stopping condition* is the token budget, not a fixed message count — that
203
+ * is the §P fix. pi-advisor's `maxMessages` is a guess at the window; we read
204
+ * the real window per-call instead.
205
+ */
206
+ export function fitToWindow(messages: Message[], budget: ContextBudget, maxInputTokens: number): FitResult {
207
+ if (messages.length === 0) {
208
+ return { messages: [], omittedCount: 0, estimatedTokens: 0, maxInputTokens };
209
+ }
210
+
211
+ // Quick path: already fits.
212
+ const whole = sumTokens(messages);
213
+ if (whole <= maxInputTokens) {
214
+ return { messages, omittedCount: 0, estimatedTokens: whole, maxInputTokens };
215
+ }
216
+
217
+ const keepFirst = Math.min(budget.keepFirst, messages.length);
218
+ // Start from the configured tail and shrink under budget.
219
+ let keepLast = Math.min(budget.keepLast, messages.length - keepFirst);
220
+
221
+ const head = messages.slice(0, keepFirst);
222
+
223
+ // Shrink the tail until head + marker + tail fits. Test down to keepLast=1
224
+ // (a single tail message) before falling through to the last-resort path —
225
+ // otherwise we'd skip a fit that retains the head and lose it unnecessarily.
226
+ while (keepLast >= 1) {
227
+ const tail = messages.slice(-keepLast);
228
+ const omittedCount = messages.length - keepFirst - keepLast;
229
+ const marker = omittedMarker(omittedCount);
230
+ const candidate = [...head, marker, ...tail];
231
+ if (sumTokens(candidate) <= maxInputTokens) {
232
+ return { messages: candidate, omittedCount, estimatedTokens: sumTokens(candidate), maxInputTokens };
233
+ }
234
+ keepLast--;
235
+ }
236
+
237
+ // Last resort: keep only the final message, capped. The cap pass already ran
238
+ // but re-clamp the survivor aggressively to whatever budget remains.
239
+ const only = messages[messages.length - 1];
240
+ const omittedCount = messages.length - 1;
241
+ const marker = omittedMarker(omittedCount);
242
+ const survivor = clampSurvivor(only, maxInputTokens - sumTokens([marker]));
243
+ const candidate = [marker, survivor];
244
+ return { messages: candidate, omittedCount, estimatedTokens: sumTokens(candidate), maxInputTokens };
245
+ }
246
+
247
+ function omittedMarker(omittedCount: number): UserMessage {
248
+ return {
249
+ role: "user",
250
+ content: `[${omittedCount} earlier transcript messages omitted to fit the advisor context window. Focus on the retained task framing and the most recent evidence.]`,
251
+ timestamp: Date.now(),
252
+ };
253
+ }
254
+
255
+ /**
256
+ * When even one message won't fit, clamp its text down to the remaining budget.
257
+ * Self-correcting: because estimateTokens applies a safety factor, clamping by
258
+ * chars then re-estimating can overshoot. So we clamp, check the estimate, and
259
+ * halve until it genuinely fits — never trust the char math alone on the
260
+ * last-resort path, which is exactly where overflow would reopen §P.
261
+ */
262
+ function clampSurvivor(msg: Message, remainingTokenBudget: number): Message {
263
+ let maxChars = Math.max(64, Math.floor((remainingTokenBudget * CHARS_PER_TOKEN) / SAFETY_FACTOR));
264
+ const original = stringifyMessageForEstimate(msg);
265
+ let clamped = clampText(original, maxChars);
266
+ // Guard: if the re-estimate still overshoots (ceil rounding, provider variance),
267
+ // keep shrinking until it fits. Bounded — maxChars collapses fast.
268
+ let guard = 0;
269
+ while (estimateTokens(clamped) > remainingTokenBudget && maxChars > 32 && guard < 20) {
270
+ maxChars = Math.floor(maxChars * 0.7);
271
+ clamped = clampText(original, maxChars);
272
+ guard++;
273
+ }
274
+ // Return as a single text user message — structure is already lost at this
275
+ // point, honesty about that beats a half-mangled typed payload.
276
+ return { role: "user", content: clamped, timestamp: "timestamp" in msg ? msg.timestamp : Date.now() };
277
+ }
278
+
279
+ // ---------------------------------------------------------------------------
280
+ // Step 7 — reserve + derive the input budget (load-bearing)
281
+ // ---------------------------------------------------------------------------
282
+
283
+ /**
284
+ * Derive the input token budget for this advisor call.
285
+ *
286
+ * maxInputTokens = advisor.contextWindow - responseReserveTokens
287
+ *
288
+ * This is the §P fix in one line: the budget is relative to *this* advisor's
289
+ * window, read live from the registry, never a global constant. If we can't
290
+ * read the window, fall back to a conservative 32k (typical small advisor) so
291
+ * we still re-fit rather than forwarding blindly.
292
+ */
293
+ export function deriveInputBudget(advisorContextWindow: number | undefined, budget: ContextBudget): number {
294
+ const window = advisorContextWindow ?? 32_000;
295
+ // Floor the reserve at a sane minimum; never let it eat the whole window.
296
+ const reserve = Math.min(budget.responseReserveTokens, Math.floor(window * 0.5));
297
+ return Math.max(1024, window - reserve);
298
+ }
299
+
300
+ // ---------------------------------------------------------------------------
301
+ // Step 8 — assemble: the full pipeline
302
+ // ---------------------------------------------------------------------------
303
+
304
+ export interface BuildContextInput {
305
+ /** Pi's resolved (already-compacted) session messages for the active branch. */
306
+ sessionMessages: Message[];
307
+ /** This advisor model's context window, from the registry. Undefined if unknown. */
308
+ advisorContextWindow?: number;
309
+ budget: ContextBudget;
310
+ /** Optional closing directive (stage objective etc.) appended as a final user msg. */
311
+ directive?: string;
312
+ }
313
+
314
+ /**
315
+ * Run the full re-fit pipeline. Returns messages guaranteed to fit the advisor's
316
+ * window (§I invariant: the advisor call always fits).
317
+ *
318
+ * Order matters: strip → cap → window. Stripping first means we never budget for
319
+ * our own in-flight call; capping before windowing means each message is already
320
+ * small when we count tokens for the drop decision, so the window pass makes
321
+ * fewer, better cuts.
322
+ */
323
+ export function buildConsultContext(input: BuildContextInput): FitResult {
324
+ const stripped = stripInflightConsultCall(input.sessionMessages);
325
+ const capped = applyCharCaps(stripped, input.budget);
326
+ const maxInputTokens = deriveInputBudget(input.advisorContextWindow, input.budget);
327
+
328
+ // Reserve room for the directive + closing marker before windowing, so the
329
+ // window pass accounts for them. The directive is small but non-zero.
330
+ const directiveTokens = input.directive ? estimateTokens(input.directive) + 8 : 0;
331
+ const adjustedBudget = Math.max(1024, maxInputTokens - directiveTokens);
332
+
333
+ const fit = fitToWindow(capped, input.budget, adjustedBudget);
334
+
335
+ // Append the directive as a final user message (fresh evidence last — the
336
+ // advisor's attention is strongest at the tail).
337
+ let messages = fit.messages;
338
+ if (input.directive) {
339
+ const closing: UserMessage = { role: "user", content: input.directive, timestamp: Date.now() };
340
+ messages = [...messages, closing];
341
+ }
342
+
343
+ return {
344
+ messages,
345
+ omittedCount: fit.omittedCount,
346
+ estimatedTokens: sumTokens(messages),
347
+ maxInputTokens,
348
+ };
349
+ }
350
+
351
+ // ---------------------------------------------------------------------------
352
+ // Helpers
353
+ // ---------------------------------------------------------------------------
354
+
355
+ function textBlocks(content: TextContent[] | unknown): TextContent[] {
356
+ if (!Array.isArray(content)) return [];
357
+ return content.filter((b): b is TextContent => b?.type === "text" && typeof b.text === "string");
358
+ }
359
+
360
+ function sumTokens(messages: Message[]): number {
361
+ let total = 0;
362
+ for (const m of messages) total += estimateMessageTokens(m);
363
+ return total;
364
+ }
365
+
366
+ /** Constructor helper for tests / callers building a user message. */
367
+ export function userText(text: string): UserMessage {
368
+ return { role: "user", content: text, timestamp: Date.now() };
369
+ }
370
+
371
+ /** Constructor helper for tests building an assistant text message. */
372
+ export function assistantText(text: string): AssistantMessage {
373
+ return {
374
+ role: "assistant",
375
+ content: [{ type: "text", text }],
376
+ api: "anthropic-messages" as never,
377
+ provider: "anthropic" as never,
378
+ model: "test-model",
379
+ usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
380
+ stopReason: "stop",
381
+ timestamp: Date.now(),
382
+ };
383
+ }
384
+
385
+ /** Constructor helper for tests building a tool result message. */
386
+ export function toolResultText(text: string): ToolResultMessage {
387
+ return {
388
+ role: "toolResult",
389
+ toolCallId: "test-call",
390
+ toolName: "bash",
391
+ content: [{ type: "text", text }],
392
+ isError: false,
393
+ timestamp: Date.now(),
394
+ };
395
+ }