@yeaft/webchat-agent 0.1.804 → 0.1.808
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/unify/conversation/persist.js +65 -0
- package/unify/effort.js +6 -5
- package/unify/engine.js +120 -5
- package/unify/groups/group-config.js +4 -4
- package/unify/groups/group-crud.js +1 -1
- package/unify/history-compact.js +96 -17
- package/unify/llm/adapter.js +5 -2
- package/unify/llm/anthropic.js +91 -14
- package/unify/turn-utils.js +15 -2
- package/unify/vp/seed-topup.js +19 -14
- package/unify/web-bridge.js +43 -5
package/package.json
CHANGED
|
@@ -142,6 +142,33 @@ function serializeMessage(msg) {
|
|
|
142
142
|
}
|
|
143
143
|
}
|
|
144
144
|
|
|
145
|
+
// task-327d: persist Anthropic extended-thinking blocks so the next turn
|
|
146
|
+
// can echo them back with their server-signed signature. Both fields are
|
|
147
|
+
// base64'd: thinking is multi-line text, and the signature is opaque
|
|
148
|
+
// bytes that don't need to be human-readable. Without this round-trip
|
|
149
|
+
// the next Anthropic request 400s with "content[].thinking in the
|
|
150
|
+
// thinking mode must be passed back to the API".
|
|
151
|
+
if (msg.thinkingBlocks && msg.thinkingBlocks.length > 0) {
|
|
152
|
+
fm.push(`thinkingBlocks:`);
|
|
153
|
+
for (const tb of msg.thinkingBlocks) {
|
|
154
|
+
if (!tb || typeof tb.signature !== 'string' || !tb.signature) continue;
|
|
155
|
+
if (tb.redacted) {
|
|
156
|
+
if (typeof tb.data !== 'string') continue;
|
|
157
|
+
const dataB64 = Buffer.from(tb.data, 'utf8').toString('base64');
|
|
158
|
+
const signatureB64 = Buffer.from(tb.signature, 'utf8').toString('base64');
|
|
159
|
+
fm.push(` - redacted: true`);
|
|
160
|
+
fm.push(` dataB64: ${dataB64}`);
|
|
161
|
+
fm.push(` signatureB64: ${signatureB64}`);
|
|
162
|
+
} else {
|
|
163
|
+
if (typeof tb.thinking !== 'string') continue;
|
|
164
|
+
const thinkingB64 = Buffer.from(tb.thinking, 'utf8').toString('base64');
|
|
165
|
+
const signatureB64 = Buffer.from(tb.signature, 'utf8').toString('base64');
|
|
166
|
+
fm.push(` - thinkingB64: ${thinkingB64}`);
|
|
167
|
+
fm.push(` signatureB64: ${signatureB64}`);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
145
172
|
fm.push('---');
|
|
146
173
|
fm.push('');
|
|
147
174
|
fm.push(content);
|
|
@@ -229,6 +256,44 @@ export function parseMessage(raw) {
|
|
|
229
256
|
if (toolCalls.length > 0) msg.toolCalls = toolCalls;
|
|
230
257
|
}
|
|
231
258
|
|
|
259
|
+
// task-327d: parse thinkingBlocks (mirror of toolCalls parser above)
|
|
260
|
+
if (frontmatter.includes('thinkingBlocks:')) {
|
|
261
|
+
const thinkingBlocks = [];
|
|
262
|
+
const tbMatch = frontmatter.match(/thinkingBlocks:\n((?:\s+-\s+[\s\S]*?)(?=\n\w|$))/);
|
|
263
|
+
if (tbMatch) {
|
|
264
|
+
const tbBlock = tbMatch[1];
|
|
265
|
+
const entries = tbBlock.split(/\n\s+-\s+/).filter(Boolean);
|
|
266
|
+
for (const entry of entries) {
|
|
267
|
+
const tb = {};
|
|
268
|
+
for (const line of entry.split('\n')) {
|
|
269
|
+
const trimmed = line.trim().replace(/^-\s+/, '');
|
|
270
|
+
const ci = trimmed.indexOf(':');
|
|
271
|
+
if (ci === -1) continue;
|
|
272
|
+
const k = trimmed.slice(0, ci).trim();
|
|
273
|
+
const v = trimmed.slice(ci + 1).trim();
|
|
274
|
+
if (k === 'thinkingB64') {
|
|
275
|
+
tb.thinking = Buffer.from(v, 'base64').toString('utf8');
|
|
276
|
+
} else if (k === 'dataB64') {
|
|
277
|
+
tb.data = Buffer.from(v, 'base64').toString('utf8');
|
|
278
|
+
} else if (k === 'signatureB64') {
|
|
279
|
+
tb.signature = Buffer.from(v, 'base64').toString('utf8');
|
|
280
|
+
} else if (k === 'redacted') {
|
|
281
|
+
tb.redacted = v === 'true';
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
// Both fields required — an unsigned block would 400 on replay.
|
|
285
|
+
if (tb.redacted) {
|
|
286
|
+
if (typeof tb.data === 'string' && typeof tb.signature === 'string' && tb.signature) {
|
|
287
|
+
thinkingBlocks.push(tb);
|
|
288
|
+
}
|
|
289
|
+
} else if (typeof tb.thinking === 'string' && typeof tb.signature === 'string' && tb.signature) {
|
|
290
|
+
thinkingBlocks.push(tb);
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
if (thinkingBlocks.length > 0) msg.thinkingBlocks = thinkingBlocks;
|
|
295
|
+
}
|
|
296
|
+
|
|
232
297
|
return msg;
|
|
233
298
|
}
|
|
234
299
|
|
package/unify/effort.js
CHANGED
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
* 4. null (no effort = adapter/router drops the param)
|
|
14
14
|
*
|
|
15
15
|
* Red lines:
|
|
16
|
-
* • Never error on unknown scenario — default to '
|
|
16
|
+
* • Never error on unknown scenario — default to 'max'.
|
|
17
17
|
* • Feature flag UNIFY_THINKING_V1 is enforced at the adapter/router
|
|
18
18
|
* layer; this module just computes the intended value. If the flag
|
|
19
19
|
* is off, adapters drop it anyway.
|
|
@@ -36,7 +36,8 @@ export const LONG_LOOP_TURN_THRESHOLD = 8;
|
|
|
36
36
|
* a scenario string before invoking `pickEffort()`.
|
|
37
37
|
*
|
|
38
38
|
* Tiers (6 scenarios per architect spec):
|
|
39
|
-
* chat →
|
|
39
|
+
* chat → max (default interactive pair-programming turn —
|
|
40
|
+
* quality over latency; per user 2026-05-22)
|
|
40
41
|
* consolidate → max (memory compaction — quality matters, runs once)
|
|
41
42
|
* dream → max (memory maintenance — same rationale)
|
|
42
43
|
* sub_agent → max (coordinator spawns + merges)
|
|
@@ -47,7 +48,7 @@ export const LONG_LOOP_TURN_THRESHOLD = 8;
|
|
|
47
48
|
* Unknown scenarios fall through to 'high'.
|
|
48
49
|
*/
|
|
49
50
|
export const SCENARIO_EFFORT = Object.freeze({
|
|
50
|
-
chat: '
|
|
51
|
+
chat: 'max',
|
|
51
52
|
consolidate: 'max',
|
|
52
53
|
dream: 'max',
|
|
53
54
|
sub_agent: 'max',
|
|
@@ -65,7 +66,7 @@ export const SCENARIO_EFFORT = Object.freeze({
|
|
|
65
66
|
* `/max` prefix, Settings slider, or API caller.
|
|
66
67
|
* 2. If toolLoopTurns >= LONG_LOOP_TURN_THRESHOLD, upgrade the
|
|
67
68
|
* base scenario to 'long_loop' (→ 'max').
|
|
68
|
-
* 3. Look up SCENARIO_EFFORT[scenario]; unknown → '
|
|
69
|
+
* 3. Look up SCENARIO_EFFORT[scenario]; unknown → 'max'.
|
|
69
70
|
*
|
|
70
71
|
* @param {object} ctx
|
|
71
72
|
* @param {string} [ctx.scenario='chat'] — Scenario tag; see SCENARIO_EFFORT.
|
|
@@ -92,7 +93,7 @@ export function pickEffort({ scenario = 'chat', toolLoopTurns = 0, userEffort =
|
|
|
92
93
|
}
|
|
93
94
|
|
|
94
95
|
// 3. Scenario table lookup.
|
|
95
|
-
return SCENARIO_EFFORT[scenario] || '
|
|
96
|
+
return SCENARIO_EFFORT[scenario] || 'max';
|
|
96
97
|
}
|
|
97
98
|
|
|
98
99
|
/**
|
package/unify/engine.js
CHANGED
|
@@ -35,7 +35,8 @@ import { runStopHooks } from './stop-hooks.js';
|
|
|
35
35
|
// pass a real threadId per (groupId, vpId, threadId) engine instance.
|
|
36
36
|
const MAIN_THREAD_ID = 'main';
|
|
37
37
|
import { pickEffort, parseEffortPrefix } from './effort.js';
|
|
38
|
-
import { normalizeEffort, resolveContextWindow } from './models.js';
|
|
38
|
+
import { DEFAULT_CONTEXT_WINDOW, normalizeEffort, resolveContextWindow, resolveModel } from './models.js';
|
|
39
|
+
import { countTurns } from './turn-utils.js';
|
|
39
40
|
import { attachRouterPlan, extractPriorPlan, stripMetaForWire } from './router/continuity.js';
|
|
40
41
|
import { resolveThinking } from './router/thinking.js';
|
|
41
42
|
import { approxTokens } from './memory/budget.js';
|
|
@@ -161,6 +162,51 @@ export function estimateMessagesTokens(system, messages) {
|
|
|
161
162
|
return total;
|
|
162
163
|
}
|
|
163
164
|
|
|
165
|
+
export const GROUP_CONTEXT_PRESSURE_RATIO = 0.8;
|
|
166
|
+
export const GROUP_MIN_TURNS_FOR_COMPACT = 5;
|
|
167
|
+
|
|
168
|
+
export function shouldAllowGroupReflection({
|
|
169
|
+
system = '',
|
|
170
|
+
messages = [],
|
|
171
|
+
model = null,
|
|
172
|
+
config = {},
|
|
173
|
+
groupId = null,
|
|
174
|
+
} = {}) {
|
|
175
|
+
if (!groupId) {
|
|
176
|
+
return {
|
|
177
|
+
allowed: true,
|
|
178
|
+
compactAllowed: true,
|
|
179
|
+
tokenEstimate: estimateMessagesTokens(system, messages),
|
|
180
|
+
threshold: 0,
|
|
181
|
+
contextWindow: null,
|
|
182
|
+
ratio: GROUP_CONTEXT_PRESSURE_RATIO,
|
|
183
|
+
turnCount: countTurns(messages),
|
|
184
|
+
usedFallbackContextWindow: false,
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
const contextWindow = resolveContextWindow(model, config);
|
|
188
|
+
const hasRegistryContext = !!resolveModel(model)?.contextWindow;
|
|
189
|
+
const hasConfigContext = Number.isFinite(config?.maxContextTokens) && config.maxContextTokens > 0;
|
|
190
|
+
const threshold = Math.floor(contextWindow * GROUP_CONTEXT_PRESSURE_RATIO);
|
|
191
|
+
const tokenEstimate = estimateMessagesTokens(system, messages);
|
|
192
|
+
const overThreshold = tokenEstimate >= threshold;
|
|
193
|
+
const turnCount = countTurns(messages);
|
|
194
|
+
return {
|
|
195
|
+
// Group send defaults to no reflection. Trust the model until context
|
|
196
|
+
// pressure says we are near the model window.
|
|
197
|
+
allowed: overThreshold,
|
|
198
|
+
// Durable compact is also protected for tiny histories: fewer than five
|
|
199
|
+
// turns do not compact unless they already exceed the same 80% threshold.
|
|
200
|
+
compactAllowed: overThreshold || turnCount >= GROUP_MIN_TURNS_FOR_COMPACT,
|
|
201
|
+
tokenEstimate,
|
|
202
|
+
threshold,
|
|
203
|
+
contextWindow,
|
|
204
|
+
ratio: GROUP_CONTEXT_PRESSURE_RATIO,
|
|
205
|
+
turnCount,
|
|
206
|
+
usedFallbackContextWindow: !hasRegistryContext && !hasConfigContext && contextWindow === DEFAULT_CONTEXT_WINDOW,
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
|
|
164
210
|
// ─── Engine Events (superset of adapter events) ──────────────────
|
|
165
211
|
|
|
166
212
|
/**
|
|
@@ -1024,10 +1070,30 @@ export class Engine {
|
|
|
1024
1070
|
if (!Array.isArray(messages) || messages.length === 0) return null;
|
|
1025
1071
|
|
|
1026
1072
|
const tokenCount = conversationStore.hotTokens();
|
|
1073
|
+
const groupId = messages.find(m => m && typeof m.groupId === 'string' && m.groupId)?.groupId || null;
|
|
1074
|
+
const groupContextGate = shouldAllowGroupReflection({
|
|
1075
|
+
system: '',
|
|
1076
|
+
messages,
|
|
1077
|
+
model: this.#config.model,
|
|
1078
|
+
config: this.#config,
|
|
1079
|
+
groupId,
|
|
1080
|
+
});
|
|
1081
|
+
if (groupId && groupContextGate?.usedFallbackContextWindow) {
|
|
1082
|
+
this.#trace.log?.('group_context_window_fallback', {
|
|
1083
|
+
groupId,
|
|
1084
|
+
model: this.#config.model,
|
|
1085
|
+
contextWindow: groupContextGate.contextWindow,
|
|
1086
|
+
threshold: groupContextGate.threshold,
|
|
1087
|
+
});
|
|
1088
|
+
}
|
|
1089
|
+
if (groupId && !groupContextGate.compactAllowed) return null;
|
|
1090
|
+
|
|
1027
1091
|
const trig = evaluateCompactTriggers({
|
|
1028
1092
|
messages,
|
|
1029
1093
|
tokenCount,
|
|
1030
1094
|
contextLimit: this.#config.maxContextTokens || 200000,
|
|
1095
|
+
tokenRatio: groupId ? GROUP_CONTEXT_PRESSURE_RATIO : undefined,
|
|
1096
|
+
maxMessages: groupId ? Number.POSITIVE_INFINITY : undefined,
|
|
1031
1097
|
});
|
|
1032
1098
|
if (!trig.trigger) return null;
|
|
1033
1099
|
|
|
@@ -1358,13 +1424,34 @@ export class Engine {
|
|
|
1358
1424
|
{ role: 'user', content: finalUserContent },
|
|
1359
1425
|
];
|
|
1360
1426
|
|
|
1427
|
+
const groupReflectionGate = shouldAllowGroupReflection({
|
|
1428
|
+
system: systemPrompt,
|
|
1429
|
+
messages: conversationMessages,
|
|
1430
|
+
model: this.#config.model,
|
|
1431
|
+
config: this.#config,
|
|
1432
|
+
groupId,
|
|
1433
|
+
});
|
|
1434
|
+
const groupReflectionAllowed = groupReflectionGate.allowed === true;
|
|
1435
|
+
if (groupId && groupReflectionGate?.usedFallbackContextWindow) {
|
|
1436
|
+
this.#trace.log?.('group_context_window_fallback', {
|
|
1437
|
+
groupId,
|
|
1438
|
+
model: this.#config.model,
|
|
1439
|
+
contextWindow: groupReflectionGate.contextWindow,
|
|
1440
|
+
threshold: groupReflectionGate.threshold,
|
|
1441
|
+
});
|
|
1442
|
+
}
|
|
1443
|
+
|
|
1361
1444
|
// PR-L: T2 carry-forward. If a previous query()'s end-of-turn
|
|
1362
1445
|
// reflection has resolved, rewrite that turn's range in
|
|
1363
1446
|
// `conversationMessages` to a single assistant reflection message.
|
|
1364
1447
|
// If still pending, fall back to the exec-log stub — non-blocking,
|
|
1365
1448
|
// never wait. This runs BEFORE the first adapter.stream so the
|
|
1366
|
-
// upcoming call sees the rewritten history.
|
|
1367
|
-
|
|
1449
|
+
// upcoming call sees the rewritten history. Group send defaults to no
|
|
1450
|
+
// reflection; only high context pressure (>=80% of model window)
|
|
1451
|
+
// enables the carry-forward rewrite.
|
|
1452
|
+
if (groupReflectionAllowed) {
|
|
1453
|
+
yield* this.#applyPendingT2Reflections(conversationMessages, prompt);
|
|
1454
|
+
}
|
|
1368
1455
|
|
|
1369
1456
|
// PR-L: track this query()'s tool-arc for reflection.
|
|
1370
1457
|
// `turnStartIdx` is where the current user message lives; the arc
|
|
@@ -1486,6 +1573,7 @@ export class Engine {
|
|
|
1486
1573
|
let ttfbMs = null; // Time to first token
|
|
1487
1574
|
let responseText = '';
|
|
1488
1575
|
const toolCalls = [];
|
|
1576
|
+
const thinkingBlocks = []; // task-327d: collected from adapter for round-trip
|
|
1489
1577
|
let stopReason = 'end_turn';
|
|
1490
1578
|
const totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
1491
1579
|
// task-344: capture redacted raw request / raw response for debug panel.
|
|
@@ -1660,6 +1748,22 @@ export class Engine {
|
|
|
1660
1748
|
case 'thinking_delta':
|
|
1661
1749
|
yield event;
|
|
1662
1750
|
break;
|
|
1751
|
+
case 'thinking_block_end':
|
|
1752
|
+
// task-327d: collect server-signed thinking block for
|
|
1753
|
+
// round-trip replay. Anthropic 400s the next turn if a
|
|
1754
|
+
// thinking block (regular or redacted) was emitted but not
|
|
1755
|
+
// echoed back with its original signature. Drop blocks
|
|
1756
|
+
// missing a signature — replay-without-sig 400s identically.
|
|
1757
|
+
if (event.signature) {
|
|
1758
|
+
if (event.redacted) {
|
|
1759
|
+
thinkingBlocks.push({ redacted: true, data: event.data, signature: event.signature });
|
|
1760
|
+
} else {
|
|
1761
|
+
thinkingBlocks.push({ thinking: event.thinking, signature: event.signature });
|
|
1762
|
+
}
|
|
1763
|
+
} else {
|
|
1764
|
+
console.warn('[Engine] thinking block missing signature — dropping; next turn would 400 on replay');
|
|
1765
|
+
}
|
|
1766
|
+
break;
|
|
1663
1767
|
case 'tool_call':
|
|
1664
1768
|
toolCalls.push(event);
|
|
1665
1769
|
yield event;
|
|
@@ -1843,6 +1947,17 @@ export class Engine {
|
|
|
1843
1947
|
input: tc.input,
|
|
1844
1948
|
}));
|
|
1845
1949
|
}
|
|
1950
|
+
// task-327d: persist thinking blocks for the next turn's replay.
|
|
1951
|
+
// Anthropic requires assistant.thinking blocks to be echoed back
|
|
1952
|
+
// verbatim (text + signature) when the previous turn used extended
|
|
1953
|
+
// thinking — see translateMessages in anthropic.js.
|
|
1954
|
+
if (thinkingBlocks.length > 0) {
|
|
1955
|
+
assistantMsg.thinkingBlocks = thinkingBlocks.map(tb => (
|
|
1956
|
+
tb.redacted
|
|
1957
|
+
? { redacted: true, data: tb.data, signature: tb.signature }
|
|
1958
|
+
: { thinking: tb.thinking, signature: tb.signature }
|
|
1959
|
+
));
|
|
1960
|
+
}
|
|
1846
1961
|
// Phase 8 (DESIGN.md §9.15): carry the router plan back on the
|
|
1847
1962
|
// assistant message that produced it. Stripped at the wire by
|
|
1848
1963
|
// stripMetaForWire — pure bookkeeping for priorPlan continuity.
|
|
@@ -1994,7 +2109,7 @@ export class Engine {
|
|
|
1994
2109
|
// tight-loop retries — but no collapse happened, so T2 should
|
|
1995
2110
|
// still be allowed to fall back at end_turn. Fowler-review
|
|
1996
2111
|
// critical finding.
|
|
1997
|
-
if (queryToolCount > TURN_SUMMARY_THRESHOLD && t1CollapsesDone === 0) {
|
|
2112
|
+
if (groupReflectionAllowed && queryToolCount > TURN_SUMMARY_THRESHOLD && t1CollapsesDone === 0) {
|
|
1998
2113
|
const arcStart = turnStartIdx + 1;
|
|
1999
2114
|
const arcEnd = conversationMessages.length - 1;
|
|
2000
2115
|
if (arcEnd > arcStart) {
|
|
@@ -2275,7 +2390,7 @@ export class Engine {
|
|
|
2275
2390
|
// batch within the same query gets a distinct entry — without
|
|
2276
2391
|
// this the second batch would be silently skipped.
|
|
2277
2392
|
const t1BatchDue = queryToolCount - lastT1AtToolCount >= TOOL_BATCH_SIZE;
|
|
2278
|
-
if (t1BatchDue && !abortedDuringTools && !signal?.aborted) {
|
|
2393
|
+
if (groupReflectionAllowed && t1BatchDue && !abortedDuringTools && !signal?.aborted) {
|
|
2279
2394
|
const t1DedupKey = `${queryNumber}:t1:${queryToolCount}`;
|
|
2280
2395
|
if (this.#reflectedTurns.has(t1DedupKey)) {
|
|
2281
2396
|
// Defensive: should never hit since t1BatchDue gates re-entry
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* group-config.js — Per-group
|
|
2
|
+
* group-config.js — Per-group selected model state.
|
|
3
3
|
*
|
|
4
|
-
* Each group may carry its
|
|
4
|
+
* Each group may carry its header-selected model in `config.json` at
|
|
5
5
|
* ~/.yeaft/groups/<groupId>/config.json
|
|
6
6
|
*
|
|
7
7
|
* v1 schema (intentionally tiny — extend via additive keys only):
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
*
|
|
12
12
|
* Missing file → empty object. Missing field → fall back to user-level
|
|
13
13
|
* config (`~/.yeaft/config.json` via loadConfig()). Resolution is a
|
|
14
|
-
* shallow overlay
|
|
14
|
+
* shallow overlay for send-time effective config.
|
|
15
15
|
*
|
|
16
16
|
* Storage layer only — no engine wiring, no validation of model strings
|
|
17
17
|
* against the provider registry (that's done lazily at resolve time by
|
|
@@ -25,7 +25,7 @@ import { groupsRoot, resolveGroupYeaftDir } from './group-crud.js';
|
|
|
25
25
|
|
|
26
26
|
const CONFIG_FILE = 'config.json';
|
|
27
27
|
|
|
28
|
-
/** Whitelist of
|
|
28
|
+
/** Whitelist of persisted group model-state fields. Reject everything else. */
|
|
29
29
|
const ALLOWED_KEYS = new Set(['model']);
|
|
30
30
|
|
|
31
31
|
export class GroupConfigError extends Error {
|
|
@@ -329,7 +329,7 @@ export function updateGroupAnnouncement(yeaftDir, groupId, text) {
|
|
|
329
329
|
}
|
|
330
330
|
|
|
331
331
|
/**
|
|
332
|
-
* (A.2.c)
|
|
332
|
+
* (A.2.c) Persist the model selected in the group conversation header.
|
|
333
333
|
* Returns the persisted config object so the caller can broadcast it.
|
|
334
334
|
*
|
|
335
335
|
* Throws GroupConfigError on validation failure (unknown key, bad type).
|
package/unify/history-compact.js
CHANGED
|
@@ -26,10 +26,12 @@
|
|
|
26
26
|
* 4. Keep the last `keepRecent` user→assistant turns intact so the model
|
|
27
27
|
* has fresh, untransformed context for whatever the user just said.
|
|
28
28
|
*
|
|
29
|
-
* Triggers
|
|
30
|
-
* - tokens <
|
|
29
|
+
* Triggers:
|
|
30
|
+
* - tokens < 12_000 → never compact (cheap chat, no point paying
|
|
31
31
|
* the summarizer)
|
|
32
|
-
* -
|
|
32
|
+
* - fewer than 5 turns → do not compact unless context pressure is
|
|
33
|
+
* already high
|
|
34
|
+
* - tokens > 80 % of `maxContextTokens` (defaults to 200K → 160K)
|
|
33
35
|
* - tokens > 200,000 hard ceiling
|
|
34
36
|
*
|
|
35
37
|
* The "turn > 20" trigger that an earlier revision used was dropped:
|
|
@@ -72,8 +74,10 @@ export const countTurns = countTurnsImpl;
|
|
|
72
74
|
* cost; the LLM hasn't started feeling the context yet either),
|
|
73
75
|
* - otherwise compact if ANY of:
|
|
74
76
|
* turnCount > 30 (back-stop for chats with many small turns)
|
|
75
|
-
* tokens >
|
|
77
|
+
* tokens > 80 % of `maxContextTokens` (default 200K → 160K)
|
|
76
78
|
* tokens > 200K hard ceiling
|
|
79
|
+
* Fewer than 5 turns are protected from compact unless the token
|
|
80
|
+
* threshold is already crossed.
|
|
77
81
|
*
|
|
78
82
|
* Lowered from 30K → 12K and re-enabled a turn-count back-stop because
|
|
79
83
|
* the previous "soft floor of 30K, no turn cap" combination is dead in
|
|
@@ -93,11 +97,13 @@ export const countTurns = countTurnsImpl;
|
|
|
93
97
|
export const DEFAULT_TURN_LIMIT = 30;
|
|
94
98
|
export const DEFAULT_MIN_TOKEN_FLOOR = 12_000;
|
|
95
99
|
export const DEFAULT_MAX_CONTEXT_TOKENS = 200_000;
|
|
96
|
-
export const DEFAULT_TOKEN_FRACTION = 0.
|
|
100
|
+
export const DEFAULT_TOKEN_FRACTION = 0.8;
|
|
97
101
|
export const DEFAULT_HARD_TOKEN_CEILING = 200_000;
|
|
102
|
+
export const DEFAULT_MIN_TURNS_FOR_COMPACT = 5;
|
|
103
|
+
export const DEFAULT_KEEP_TOOL_TURNS = 3;
|
|
98
104
|
/**
|
|
99
105
|
* Effective default token trigger when no `maxContextTokens` is provided:
|
|
100
|
-
* min(
|
|
106
|
+
* min(80% of 200K, 200K) = 160K. Preserved as `DEFAULT_TOKEN_LIMIT` for
|
|
101
107
|
* back-compat with existing tests that import this name.
|
|
102
108
|
*/
|
|
103
109
|
export const DEFAULT_TOKEN_LIMIT = Math.min(
|
|
@@ -183,12 +189,13 @@ export function estimateMessagesTokens(messages) {
|
|
|
183
189
|
* Pure trigger evaluator. Decides whether the in-memory history needs
|
|
184
190
|
* compaction. No I/O, no LLM call.
|
|
185
191
|
*
|
|
186
|
-
* Policy (2026-05-
|
|
187
|
-
* 1. tokens < `minTokenFloor` (default
|
|
188
|
-
* 2.
|
|
189
|
-
*
|
|
190
|
-
*
|
|
191
|
-
*
|
|
192
|
+
* Policy (2026-05-22):
|
|
193
|
+
* 1. tokens < `minTokenFloor` (default 12K) → trigger=false (always).
|
|
194
|
+
* 2. fewer than `minTurnsForCompact` turns (default 5) → trigger=false
|
|
195
|
+
* unless tokenCount already exceeds the fractional context threshold.
|
|
196
|
+
* 3. otherwise trigger if ANY of:
|
|
197
|
+
* turnCount > turnLimit (default 30 back-stop)
|
|
198
|
+
* tokenCount > maxContextTokens*fraction (default 80%, reason='token_threshold')
|
|
192
199
|
* tokenCount > hardTokenCeiling (reason='token_ceiling')
|
|
193
200
|
*
|
|
194
201
|
* `tokenLimit` is preserved as a back-compat override for callers /
|
|
@@ -198,6 +205,7 @@ export function estimateMessagesTokens(messages) {
|
|
|
198
205
|
* @param {Array<object>} messages
|
|
199
206
|
* @param {{
|
|
200
207
|
* turnLimit?: number,
|
|
208
|
+
* minTurnsForCompact?: number,
|
|
201
209
|
* tokenLimit?: number,
|
|
202
210
|
* minTokenFloor?: number,
|
|
203
211
|
* maxContextTokens?: number,
|
|
@@ -206,7 +214,7 @@ export function estimateMessagesTokens(messages) {
|
|
|
206
214
|
* }} [opts]
|
|
207
215
|
* @returns {{trigger: boolean, reason: 'turn_count'|'token_threshold'|'token_ceiling'|null,
|
|
208
216
|
* turnCount: number, tokenCount: number,
|
|
209
|
-
* turnLimit: number, tokenLimit: number,
|
|
217
|
+
* turnLimit: number, tokenLimit: number, minTurnsForCompact: number,
|
|
210
218
|
* minTokenFloor: number, hardTokenCeiling: number}}
|
|
211
219
|
*/
|
|
212
220
|
export function shouldCompactHistory(messages, opts = {}) {
|
|
@@ -215,6 +223,7 @@ export function shouldCompactHistory(messages, opts = {}) {
|
|
|
215
223
|
const hardTokenCeiling = opts.hardTokenCeiling ?? DEFAULT_HARD_TOKEN_CEILING;
|
|
216
224
|
const maxContextTokens = opts.maxContextTokens ?? DEFAULT_MAX_CONTEXT_TOKENS;
|
|
217
225
|
const tokenFraction = opts.tokenFraction ?? DEFAULT_TOKEN_FRACTION;
|
|
226
|
+
const minTurnsForCompact = opts.minTurnsForCompact ?? DEFAULT_MIN_TURNS_FOR_COMPACT;
|
|
218
227
|
// tokenLimit override wins; otherwise compute fractional threshold.
|
|
219
228
|
const tokenLimit =
|
|
220
229
|
opts.tokenLimit
|
|
@@ -225,7 +234,9 @@ export function shouldCompactHistory(messages, opts = {}) {
|
|
|
225
234
|
|
|
226
235
|
let reason = null;
|
|
227
236
|
// (1) Soft floor: never compact small conversations.
|
|
228
|
-
|
|
237
|
+
// (2) Short-history guard: fewer than five turns should not compact unless
|
|
238
|
+
// the estimated prompt is already at the context-pressure threshold.
|
|
239
|
+
if (tokenCount < minTokenFloor || (turnCount < minTurnsForCompact && tokenCount < tokenLimit)) {
|
|
229
240
|
return {
|
|
230
241
|
trigger: false,
|
|
231
242
|
reason: null,
|
|
@@ -233,6 +244,7 @@ export function shouldCompactHistory(messages, opts = {}) {
|
|
|
233
244
|
tokenCount,
|
|
234
245
|
turnLimit,
|
|
235
246
|
tokenLimit,
|
|
247
|
+
minTurnsForCompact,
|
|
236
248
|
minTokenFloor,
|
|
237
249
|
hardTokenCeiling,
|
|
238
250
|
};
|
|
@@ -240,7 +252,7 @@ export function shouldCompactHistory(messages, opts = {}) {
|
|
|
240
252
|
// (2) Trigger evaluation. Turn check is opt-in (Infinity by default).
|
|
241
253
|
if (Number.isFinite(turnLimit) && turnCount > turnLimit) reason = 'turn_count';
|
|
242
254
|
else if (tokenCount > hardTokenCeiling) reason = 'token_ceiling';
|
|
243
|
-
else if (tokenCount
|
|
255
|
+
else if (tokenCount >= tokenLimit) reason = 'token_threshold';
|
|
244
256
|
|
|
245
257
|
return {
|
|
246
258
|
trigger: reason !== null,
|
|
@@ -249,11 +261,71 @@ export function shouldCompactHistory(messages, opts = {}) {
|
|
|
249
261
|
tokenCount,
|
|
250
262
|
turnLimit,
|
|
251
263
|
tokenLimit,
|
|
264
|
+
minTurnsForCompact,
|
|
252
265
|
minTokenFloor,
|
|
253
266
|
hardTokenCeiling,
|
|
254
267
|
};
|
|
255
268
|
}
|
|
256
269
|
|
|
270
|
+
function hasContentAfterToolStrip(content) {
|
|
271
|
+
if (typeof content === 'string') return content.trim().length > 0;
|
|
272
|
+
if (Array.isArray(content)) return content.length > 0;
|
|
273
|
+
return content != null;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
function stripToolContentParts(content) {
|
|
277
|
+
if (!Array.isArray(content)) return content;
|
|
278
|
+
return content.filter(part => {
|
|
279
|
+
if (!part || typeof part !== 'object') return true;
|
|
280
|
+
return part.type !== 'tool_use'
|
|
281
|
+
&& part.type !== 'tool_result'
|
|
282
|
+
&& part.type !== 'function_call'
|
|
283
|
+
&& part.type !== 'function_call_output';
|
|
284
|
+
});
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
/**
|
|
288
|
+
* Remove tool-call / tool-result noise from turns older than the recent
|
|
289
|
+
* lossless window. The last `keepToolTurns` turns keep their full tool
|
|
290
|
+
* chains; older turns keep user/assistant text but lose `toolCalls`,
|
|
291
|
+
* Anthropic/OpenAI tool content blocks, and `role:'tool'` messages.
|
|
292
|
+
*
|
|
293
|
+
* This is deliberately a wire-history transform, not a summarizer: it
|
|
294
|
+
* never invents a summary and it never mutates input. Pair-sanitize runs
|
|
295
|
+
* afterwards so no orphan tool_use/tool_result can survive.
|
|
296
|
+
*
|
|
297
|
+
* @param {Array<object>} messages
|
|
298
|
+
* @param {{ keepToolTurns?: number }} [opts]
|
|
299
|
+
* @returns {Array<object>}
|
|
300
|
+
*/
|
|
301
|
+
export function stripToolNoiseFromOlderTurns(messages, opts = {}) {
|
|
302
|
+
if (!Array.isArray(messages) || messages.length === 0) return [];
|
|
303
|
+
const keepToolTurns = Number.isFinite(opts.keepToolTurns) && opts.keepToolTurns >= 0
|
|
304
|
+
? opts.keepToolTurns
|
|
305
|
+
: DEFAULT_KEEP_TOOL_TURNS;
|
|
306
|
+
const cutIdx = indexOfNthTurnFromEnd(messages, keepToolTurns);
|
|
307
|
+
if (cutIdx <= 0) return messages.map(m => ({ ...m }));
|
|
308
|
+
|
|
309
|
+
const older = messages.slice(0, cutIdx);
|
|
310
|
+
const recent = messages.slice(cutIdx);
|
|
311
|
+
const cleanedOlder = [];
|
|
312
|
+
|
|
313
|
+
for (const m of older) {
|
|
314
|
+
if (!m || typeof m !== 'object') continue;
|
|
315
|
+
if (m.role === 'tool') continue;
|
|
316
|
+
|
|
317
|
+
const next = { ...m };
|
|
318
|
+
if (Array.isArray(next.toolCalls)) delete next.toolCalls;
|
|
319
|
+
if (Array.isArray(next.content)) next.content = stripToolContentParts(next.content);
|
|
320
|
+
|
|
321
|
+
if (next.role === 'assistant' && !hasContentAfterToolStrip(next.content)) continue;
|
|
322
|
+
if (next.role === 'user' && Array.isArray(next.content) && next.content.length === 0) continue;
|
|
323
|
+
cleanedOlder.push(next);
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
return [...cleanedOlder, ...recent.map(m => ({ ...m }))];
|
|
327
|
+
}
|
|
328
|
+
|
|
257
329
|
/**
|
|
258
330
|
* Strip noise from a message list before sending it to the summarizer:
|
|
259
331
|
* - drop `role: 'tool'` (raw tool results — too verbose, mostly redundant)
|
|
@@ -442,6 +514,7 @@ export async function compactHistory(messages, options) {
|
|
|
442
514
|
maxContextTokens,
|
|
443
515
|
tokenFraction,
|
|
444
516
|
hardTokenCeiling,
|
|
517
|
+
minTurnsForCompact: options?.minTurnsForCompact,
|
|
445
518
|
};
|
|
446
519
|
const before = shouldCompactHistory(messages, triggerOpts);
|
|
447
520
|
if (!before.trigger) {
|
|
@@ -581,7 +654,7 @@ export async function compactHistory(messages, options) {
|
|
|
581
654
|
* between trim (per-call) and compact (global) explicit.
|
|
582
655
|
*
|
|
583
656
|
* @param {Array<object>} snapshot
|
|
584
|
-
* @param {{ messageTokenBudget?: number, recentTurnCap?: number }} [opts]
|
|
657
|
+
* @param {{ messageTokenBudget?: number, recentTurnCap?: number, keepToolTurns?: number }} [opts]
|
|
585
658
|
* @returns {Array<object>}
|
|
586
659
|
*/
|
|
587
660
|
export function trimSnapshotForBudget(snapshot, opts = {}) {
|
|
@@ -607,6 +680,12 @@ export function trimSnapshotForBudget(snapshot, opts = {}) {
|
|
|
607
680
|
tokens = estimateMessagesTokens(trimmed);
|
|
608
681
|
}
|
|
609
682
|
|
|
610
|
-
// Stage 3:
|
|
683
|
+
// Stage 3: keep only the recent tool chains lossless. Older turns
|
|
684
|
+
// retain text but drop tool_use/tool_result noise before pair safety.
|
|
685
|
+
trimmed = stripToolNoiseFromOlderTurns(trimmed, {
|
|
686
|
+
keepToolTurns: opts.keepToolTurns,
|
|
687
|
+
});
|
|
688
|
+
|
|
689
|
+
// Stage 4: pair-sanitize to drop orphan tool_use/tool_result.
|
|
611
690
|
return pairSanitize(trimmed);
|
|
612
691
|
}
|
package/unify/llm/adapter.js
CHANGED
|
@@ -40,20 +40,23 @@
|
|
|
40
40
|
/**
|
|
41
41
|
* @typedef {{ type: 'text_delta', text: string }} TextDeltaEvent
|
|
42
42
|
* @typedef {{ type: 'thinking_delta', text: string }} ThinkingDeltaEvent
|
|
43
|
+
* @typedef {{ type: 'thinking_block_end', thinking: string, signature: string }} ThinkingBlockEndEvent
|
|
43
44
|
* @typedef {{ type: 'tool_call', id: string, name: string, input: object }} ToolCallEvent
|
|
44
45
|
* @typedef {{ type: 'usage', inputTokens: number, outputTokens: number, cacheReadTokens?: number, cacheWriteTokens?: number }} UsageEvent
|
|
45
46
|
* @typedef {{ type: 'stop', stopReason: 'end_turn' | 'tool_use' | 'max_tokens' }} StopEvent
|
|
46
47
|
* @typedef {{ type: 'error', error: Error, retryable: boolean }} ErrorEvent
|
|
47
48
|
*
|
|
48
|
-
* @typedef {TextDeltaEvent | ThinkingDeltaEvent | ToolCallEvent | UsageEvent | StopEvent | ErrorEvent} StreamEvent
|
|
49
|
+
* @typedef {TextDeltaEvent | ThinkingDeltaEvent | ThinkingBlockEndEvent | ToolCallEvent | UsageEvent | StopEvent | ErrorEvent} StreamEvent
|
|
49
50
|
*/
|
|
50
51
|
|
|
51
52
|
// ─── Unified Message Types ─────────────────────────────────────
|
|
52
53
|
|
|
53
54
|
/**
|
|
55
|
+
* @typedef {{ thinking: string, signature: string }} ThinkingBlock
|
|
56
|
+
*
|
|
54
57
|
* @typedef {{ role: 'system', content: string }} SystemMessage
|
|
55
58
|
* @typedef {{ role: 'user', content: string }} UserMessage
|
|
56
|
-
* @typedef {{ role: 'assistant', content: string, toolCalls?: UnifiedToolCall[] }} AssistantMessage
|
|
59
|
+
* @typedef {{ role: 'assistant', content: string, toolCalls?: UnifiedToolCall[], thinkingBlocks?: ThinkingBlock[] }} AssistantMessage
|
|
57
60
|
* @typedef {{ role: 'tool', toolCallId: string, content: string, isError?: boolean }} ToolMessage
|
|
58
61
|
*
|
|
59
62
|
* @typedef {SystemMessage | UserMessage | AssistantMessage | ToolMessage} UnifiedMessage
|
package/unify/llm/anthropic.js
CHANGED
|
@@ -73,6 +73,24 @@ export class AnthropicAdapter extends LLMAdapter {
|
|
|
73
73
|
result.push({ role: 'user', content: msg.content });
|
|
74
74
|
} else if (msg.role === 'assistant') {
|
|
75
75
|
const content = [];
|
|
76
|
+
// task-327d: Anthropic requires thinking blocks to appear BEFORE
|
|
77
|
+
// any text / tool_use in the content array on echo-back. When the
|
|
78
|
+
// previous turn produced thinking blocks (with server-signed
|
|
79
|
+
// signature), we MUST replay them verbatim or the next request
|
|
80
|
+
// 400s with "content[].thinking in the thinking mode must be
|
|
81
|
+
// passed back to the API". Order is mandatory.
|
|
82
|
+
if (Array.isArray(msg.thinkingBlocks)) {
|
|
83
|
+
for (const tb of msg.thinkingBlocks) {
|
|
84
|
+
if (!tb || typeof tb.signature !== 'string' || !tb.signature) continue;
|
|
85
|
+
if (tb.redacted) {
|
|
86
|
+
if (typeof tb.data !== 'string') continue;
|
|
87
|
+
content.push({ type: 'redacted_thinking', data: tb.data, signature: tb.signature });
|
|
88
|
+
} else {
|
|
89
|
+
if (typeof tb.thinking !== 'string') continue;
|
|
90
|
+
content.push({ type: 'thinking', thinking: tb.thinking, signature: tb.signature });
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
76
94
|
if (msg.content) {
|
|
77
95
|
content.push({ type: 'text', text: msg.content });
|
|
78
96
|
}
|
|
@@ -216,9 +234,16 @@ export class AnthropicAdapter extends LLMAdapter {
|
|
|
216
234
|
const reader = response.body.getReader();
|
|
217
235
|
const decoder = new TextDecoder();
|
|
218
236
|
let buffer = '';
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
237
|
+
// task-327d: index-keyed per-block state. Anthropic streams content
|
|
238
|
+
// blocks sequentially today, but the protocol exposes `event.index`
|
|
239
|
+
// precisely because that's not guaranteed. Dispatch in
|
|
240
|
+
// content_block_stop must look up by index, never "whichever scalar
|
|
241
|
+
// happens to still be set." States by kind: 'tool_use', 'thinking',
|
|
242
|
+
// 'redacted_thinking'. Redacted blocks carry opaque `data` instead
|
|
243
|
+
// of `thinking` text but share the same echo-back rule (drop without
|
|
244
|
+
// signature → next turn 400s identically).
|
|
245
|
+
/** @type {Map<number, { kind: string, [k: string]: any }>} */
|
|
246
|
+
const blockByIndex = new Map();
|
|
222
247
|
// Accumulate raw SSE body verbatim for the debug panel. No truncation:
|
|
223
248
|
// see `redactRawRequest` in adapter.js for the verbatim-design rationale.
|
|
224
249
|
// Push-then-join keeps allocation bounded for multi-MiB payloads (avoids
|
|
@@ -254,38 +279,90 @@ export class AnthropicAdapter extends LLMAdapter {
|
|
|
254
279
|
|
|
255
280
|
if (type === 'content_block_start') {
|
|
256
281
|
const block = event.content_block;
|
|
282
|
+
const idx = event.index;
|
|
257
283
|
if (block?.type === 'tool_use') {
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
284
|
+
blockByIndex.set(idx, {
|
|
285
|
+
kind: 'tool_use',
|
|
286
|
+
id: block.id,
|
|
287
|
+
name: block.name,
|
|
288
|
+
input: '',
|
|
289
|
+
});
|
|
290
|
+
} else if (block?.type === 'thinking') {
|
|
291
|
+
blockByIndex.set(idx, {
|
|
292
|
+
kind: 'thinking',
|
|
293
|
+
thinking: typeof block.thinking === 'string' ? block.thinking : '',
|
|
294
|
+
signature: typeof block.signature === 'string' ? block.signature : '',
|
|
295
|
+
});
|
|
296
|
+
} else if (block?.type === 'redacted_thinking') {
|
|
297
|
+
// task-327d: API-redacted thinking. Body is opaque `data`
|
|
298
|
+
// (server-encrypted, not user-readable); we still need to
|
|
299
|
+
// echo it back with signature on the next turn or the API
|
|
300
|
+
// 400s with the same "must be passed back" error.
|
|
301
|
+
blockByIndex.set(idx, {
|
|
302
|
+
kind: 'redacted_thinking',
|
|
303
|
+
data: typeof block.data === 'string' ? block.data : '',
|
|
304
|
+
signature: typeof block.signature === 'string' ? block.signature : '',
|
|
305
|
+
});
|
|
261
306
|
}
|
|
262
307
|
} else if (type === 'content_block_delta') {
|
|
263
308
|
const delta = event.delta;
|
|
309
|
+
const idx = event.index;
|
|
310
|
+
const st = blockByIndex.get(idx);
|
|
264
311
|
if (delta?.type === 'text_delta') {
|
|
265
312
|
yield { type: 'text_delta', text: delta.text };
|
|
266
313
|
} else if (delta?.type === 'thinking_delta') {
|
|
314
|
+
// Forward delta for live UI; ALSO accumulate for round-trip.
|
|
315
|
+
if (st && st.kind === 'thinking') st.thinking += delta.thinking || '';
|
|
267
316
|
yield { type: 'thinking_delta', text: delta.thinking };
|
|
317
|
+
} else if (delta?.type === 'signature_delta') {
|
|
318
|
+
// Anthropic typically sends signature in one delta near the
|
|
319
|
+
// end of the (redacted_)thinking block. Accumulate defensively.
|
|
320
|
+
if (st && (st.kind === 'thinking' || st.kind === 'redacted_thinking')) {
|
|
321
|
+
st.signature += delta.signature || '';
|
|
322
|
+
}
|
|
268
323
|
} else if (delta?.type === 'input_json_delta') {
|
|
269
|
-
|
|
324
|
+
if (st && st.kind === 'tool_use') st.input += delta.partial_json;
|
|
270
325
|
}
|
|
271
326
|
} else if (type === 'content_block_stop') {
|
|
272
|
-
|
|
327
|
+
const idx = event.index;
|
|
328
|
+
const st = blockByIndex.get(idx);
|
|
329
|
+
if (!st) {
|
|
330
|
+
// Unknown / unhandled block kind (e.g. text — we don't track
|
|
331
|
+
// text state because text_delta is forwarded immediately).
|
|
332
|
+
} else if (st.kind === 'tool_use') {
|
|
273
333
|
let parsedInput = {};
|
|
274
334
|
try {
|
|
275
|
-
parsedInput =
|
|
335
|
+
parsedInput = st.input ? JSON.parse(st.input) : {};
|
|
276
336
|
} catch {
|
|
277
337
|
parsedInput = {};
|
|
278
338
|
}
|
|
279
339
|
yield {
|
|
280
340
|
type: 'tool_call',
|
|
281
|
-
id:
|
|
282
|
-
name:
|
|
341
|
+
id: st.id,
|
|
342
|
+
name: st.name,
|
|
283
343
|
input: parsedInput,
|
|
284
344
|
};
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
345
|
+
} else if (st.kind === 'thinking' || st.kind === 'redacted_thinking') {
|
|
346
|
+
// task-327d: emit ONE end-of-block event with the assembled
|
|
347
|
+
// payload + signature. Engine collects these for replay.
|
|
348
|
+
// We emit even when signature is empty so engine can
|
|
349
|
+
// warn-and-drop; replaying without signature would 400.
|
|
350
|
+
if (st.kind === 'thinking') {
|
|
351
|
+
yield {
|
|
352
|
+
type: 'thinking_block_end',
|
|
353
|
+
thinking: st.thinking,
|
|
354
|
+
signature: st.signature,
|
|
355
|
+
};
|
|
356
|
+
} else {
|
|
357
|
+
yield {
|
|
358
|
+
type: 'thinking_block_end',
|
|
359
|
+
redacted: true,
|
|
360
|
+
data: st.data,
|
|
361
|
+
signature: st.signature,
|
|
362
|
+
};
|
|
363
|
+
}
|
|
288
364
|
}
|
|
365
|
+
blockByIndex.delete(idx);
|
|
289
366
|
} else if (type === 'message_delta') {
|
|
290
367
|
const stopReason = event.delta?.stop_reason;
|
|
291
368
|
if (stopReason) {
|
package/unify/turn-utils.js
CHANGED
|
@@ -45,6 +45,17 @@ export function stripVpMentionPrefix(content) {
|
|
|
45
45
|
return content.replace(/^@vp-[A-Za-z0-9_-]+\s+/, '');
|
|
46
46
|
}
|
|
47
47
|
|
|
48
|
+
function canonicalUserTurnContent(content) {
|
|
49
|
+
if (typeof content === 'string') return stripVpMentionPrefix(content);
|
|
50
|
+
if (!Array.isArray(content)) return null;
|
|
51
|
+
const text = content
|
|
52
|
+
.filter(part => part && typeof part === 'object' && part.type === 'text')
|
|
53
|
+
.map(part => typeof part.text === 'string' ? part.text : '')
|
|
54
|
+
.join('\n')
|
|
55
|
+
.trim();
|
|
56
|
+
return text ? stripVpMentionPrefix(text) : null;
|
|
57
|
+
}
|
|
58
|
+
|
|
48
59
|
/**
|
|
49
60
|
* Count "turns" — distinct user prompts after `@vp-X` collapsing.
|
|
50
61
|
*
|
|
@@ -62,7 +73,8 @@ export function countTurns(messages) {
|
|
|
62
73
|
let prev = null;
|
|
63
74
|
for (const m of messages) {
|
|
64
75
|
if (!m || m.role !== 'user') continue;
|
|
65
|
-
const canonical =
|
|
76
|
+
const canonical = canonicalUserTurnContent(m.content);
|
|
77
|
+
if (canonical == null) continue;
|
|
66
78
|
if (canonical !== prev) {
|
|
67
79
|
n++;
|
|
68
80
|
prev = canonical;
|
|
@@ -101,7 +113,8 @@ export function indexOfNthTurnFromEnd(messages, n) {
|
|
|
101
113
|
let candidate = -1;
|
|
102
114
|
for (let i = messages.length - 1; i >= 0; i--) {
|
|
103
115
|
if (!messages[i] || messages[i].role !== 'user') continue;
|
|
104
|
-
const canonical =
|
|
116
|
+
const canonical = canonicalUserTurnContent(messages[i].content);
|
|
117
|
+
if (canonical == null) continue;
|
|
105
118
|
if (canonical !== openCanonical) {
|
|
106
119
|
// Boundary: a new (older) turn starts here.
|
|
107
120
|
turnsFromEnd++;
|
package/unify/vp/seed-topup.js
CHANGED
|
@@ -3,17 +3,17 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Problem: `seedDefaultVps` is first-run-only — once the library has any VP
|
|
5
5
|
* in it, that function never runs again. When we expanded the default roster
|
|
6
|
-
* from 12 to
|
|
7
|
-
* business, writing, science, arts), existing installs would never see the
|
|
8
|
-
*
|
|
6
|
+
* from 12 to 33 (philosophy, psychology, strategy, history, investing,
|
|
7
|
+
* business, writing, science, arts, Omni), existing installs would never see the
|
|
8
|
+
* new VPs without either (a) the user manually deleting their library or
|
|
9
9
|
* (b) a forced overwrite that would clobber their hand edits.
|
|
10
10
|
*
|
|
11
11
|
* This module runs on every agent start alongside `seedDefaultVps` and does
|
|
12
12
|
* two minimal, additive things:
|
|
13
13
|
*
|
|
14
|
-
* 1. **Top-up missing
|
|
15
|
-
* on disk
|
|
16
|
-
*
|
|
14
|
+
* 1. **Top-up missing stock VPs**. If a vpId from `DEFAULT_VPS` is not
|
|
15
|
+
* on disk, `createVp()` it. This keeps product-owned defaults such as
|
|
16
|
+
* Omni and the expanded role roster visible in group/member pickers.
|
|
17
17
|
*
|
|
18
18
|
* 2. **Backfill the `area` frontmatter line** on existing seeded VPs whose
|
|
19
19
|
* role.md predates the area field. The body is left BYTE-IDENTICAL —
|
|
@@ -24,9 +24,9 @@
|
|
|
24
24
|
* Hard rules:
|
|
25
25
|
* - **Never** overwrite a VP that is on disk. The user might have edited
|
|
26
26
|
* persona/role/traits; that is their truth, not ours.
|
|
27
|
-
* - **
|
|
28
|
-
*
|
|
29
|
-
*
|
|
27
|
+
* - **Keep stock defaults available.** If a shipped stock VP is missing,
|
|
28
|
+
* recreate it, but never overwrite an on-disk VP. The group/member picker
|
|
29
|
+
* depends on these product-owned defaults being present.
|
|
30
30
|
* - Best-effort: any failure is logged, never thrown.
|
|
31
31
|
*
|
|
32
32
|
* Pre-ledger deletion caveat: on the very first top-up against an existing
|
|
@@ -34,8 +34,8 @@
|
|
|
34
34
|
* deleted VP X before the expansion landed" from "X was never seeded." The
|
|
35
35
|
* bootstrap records only on-disk ids as `legacy`; an id the user had deleted
|
|
36
36
|
* BEFORE this code shipped looks identical to a brand-new default and will
|
|
37
|
-
* be recreated once.
|
|
38
|
-
*
|
|
37
|
+
* be recreated once. Stock defaults remain authoritative product entries and
|
|
38
|
+
* may be recreated later if missing; existing files are still never overwritten.
|
|
39
39
|
*
|
|
40
40
|
* Sidecar file: `<libDir>/.seeded-versions.json`
|
|
41
41
|
*
|
|
@@ -56,6 +56,7 @@ import { join } from 'path';
|
|
|
56
56
|
import { createVp, VpCrudError } from './vp-crud.js';
|
|
57
57
|
import { DEFAULT_VP_LIB_DIR, personaHash } from './vp-store.js';
|
|
58
58
|
import { DEFAULT_VPS } from './seed-defaults.js';
|
|
59
|
+
import { STOCK_VP_IDS } from './stock-ids.js';
|
|
59
60
|
|
|
60
61
|
const SEEDED_VERSIONS_FILE = '.seeded-versions.json';
|
|
61
62
|
const SEEDED_VERSIONS_VERSION = 1;
|
|
@@ -319,13 +320,17 @@ export function topUpDefaultVps(libDir = DEFAULT_VP_LIB_DIR) {
|
|
|
319
320
|
continue;
|
|
320
321
|
}
|
|
321
322
|
|
|
322
|
-
if (inLedger) {
|
|
323
|
-
// We seeded this before, user has since deleted it — respect that.
|
|
323
|
+
if (inLedger && !STOCK_VP_IDS.has(vpId)) {
|
|
324
|
+
// We seeded this custom/default VP before, user has since deleted it — respect that.
|
|
325
|
+
// Stock/default personas are product-owned roster entries and must remain
|
|
326
|
+
// available in group creation/member pickers after migrations. Recreate
|
|
327
|
+
// them below without overwriting anything that exists on disk.
|
|
324
328
|
respectedDeletes.push(vpId);
|
|
325
329
|
continue;
|
|
326
330
|
}
|
|
327
331
|
|
|
328
|
-
// Missing on disk and never seeded —
|
|
332
|
+
// Missing on disk and never seeded — or a missing stock VP that must remain available.
|
|
333
|
+
|
|
329
334
|
try {
|
|
330
335
|
createVp(vp, { libDir });
|
|
331
336
|
versions.seeded[vpId] = personaHash(vp.persona);
|
package/unify/web-bridge.js
CHANGED
|
@@ -1416,12 +1416,12 @@ export function handleUnifyUpdateGroup(msg) {
|
|
|
1416
1416
|
}
|
|
1417
1417
|
|
|
1418
1418
|
/**
|
|
1419
|
-
*
|
|
1419
|
+
* Persist the model selected in the group conversation header. Cache invalidation:
|
|
1420
1420
|
* drop every cached Engine whose key starts with `${groupId}::` so the
|
|
1421
1421
|
* next turn picks up the new model. The group meta itself is untouched.
|
|
1422
1422
|
*
|
|
1423
1423
|
* Payload: { groupId, requestId, config: { model?: string|null } }
|
|
1424
|
-
* - `model: ''` or `null` clears the
|
|
1424
|
+
* - `model: ''` or `null` clears the selected group model (falls back to user default).
|
|
1425
1425
|
*/
|
|
1426
1426
|
export function handleUnifyUpdateGroupConfig(msg) {
|
|
1427
1427
|
const requestId = msg && msg.requestId;
|
|
@@ -1651,7 +1651,7 @@ function maybeTransitionVpStatus(hctx, state) {
|
|
|
1651
1651
|
* todos, debug cards, and persistence all share the same boundary.
|
|
1652
1652
|
*
|
|
1653
1653
|
* @param {object} event — engine event (text_delta / tool_call / …)
|
|
1654
|
-
* @param {{assistantTextParts:string[], toolCallsAccum:Array, toolResultsAccum:Array, resetQueryTimer:Function, groupId?:string, vpId?:string, turnId?:string}} hctx
|
|
1654
|
+
* @param {{assistantTextParts:string[], toolCallsAccum:Array, toolResultsAccum:Array, thinkingBlocksAccum?:Array, resetQueryTimer:Function, groupId?:string, vpId?:string, turnId?:string}} hctx
|
|
1655
1655
|
*/
|
|
1656
1656
|
function handleEngineEvent(event, hctx) {
|
|
1657
1657
|
hctx.resetQueryTimer();
|
|
@@ -1679,6 +1679,30 @@ function handleEngineEvent(event, hctx) {
|
|
|
1679
1679
|
sendUnifyEvent({ type: 'thinking_delta', text: event.text }, envelope);
|
|
1680
1680
|
break;
|
|
1681
1681
|
|
|
1682
|
+
case 'thinking_block_end':
|
|
1683
|
+
// task-327d: capture the assembled thinking block (with server-
|
|
1684
|
+
// signed signature) so the group history we hand to subsequent
|
|
1685
|
+
// turns / VPs includes it. Without this echo Anthropic 400s the
|
|
1686
|
+
// next request with "content[].thinking in the thinking mode must
|
|
1687
|
+
// be passed back to the API". The signature stays server-side
|
|
1688
|
+
// only — wire serializers (stripMetaForWire / sendUnifyOutput)
|
|
1689
|
+
// never reference thinkingBlocks, so it cannot leak to the UI.
|
|
1690
|
+
if (hctx.thinkingBlocksAccum && event.signature) {
|
|
1691
|
+
if (event.redacted) {
|
|
1692
|
+
hctx.thinkingBlocksAccum.push({
|
|
1693
|
+
redacted: true,
|
|
1694
|
+
data: event.data,
|
|
1695
|
+
signature: event.signature,
|
|
1696
|
+
});
|
|
1697
|
+
} else {
|
|
1698
|
+
hctx.thinkingBlocksAccum.push({
|
|
1699
|
+
thinking: event.thinking,
|
|
1700
|
+
signature: event.signature,
|
|
1701
|
+
});
|
|
1702
|
+
}
|
|
1703
|
+
}
|
|
1704
|
+
break;
|
|
1705
|
+
|
|
1682
1706
|
case 'tool_call':
|
|
1683
1707
|
// Capture tool_call for the assistant message's toolCalls array so
|
|
1684
1708
|
// the next turn's history pairs `tool_calls` with `role:'tool'`
|
|
@@ -2534,6 +2558,7 @@ async function runVpTurn({ prompt, promptParts = null, groupId, vpId, threadId =
|
|
|
2534
2558
|
const assistantTextParts = [];
|
|
2535
2559
|
const toolCallsAccum = [];
|
|
2536
2560
|
const toolResultsAccum = [];
|
|
2561
|
+
const thinkingBlocksAccum = []; // task-327d: round-trip to next turn
|
|
2537
2562
|
const appendedUserPrompts = [];
|
|
2538
2563
|
let vpEngine = null;
|
|
2539
2564
|
|
|
@@ -2559,6 +2584,7 @@ async function runVpTurn({ prompt, promptParts = null, groupId, vpId, threadId =
|
|
|
2559
2584
|
assistantTextParts,
|
|
2560
2585
|
toolCallsAccum,
|
|
2561
2586
|
toolResultsAccum,
|
|
2587
|
+
thinkingBlocksAccum,
|
|
2562
2588
|
resetQueryTimer,
|
|
2563
2589
|
groupId,
|
|
2564
2590
|
vpId,
|
|
@@ -2599,7 +2625,7 @@ async function runVpTurn({ prompt, promptParts = null, groupId, vpId, threadId =
|
|
|
2599
2625
|
}
|
|
2600
2626
|
|
|
2601
2627
|
// Turn completed — atomically append this VP's output to shared history.
|
|
2602
|
-
appendTurnToGroupHistory(groupId, threadId, [prompt, ...appendedUserPrompts], assistantTextParts, toolCallsAccum, toolResultsAccum);
|
|
2628
|
+
appendTurnToGroupHistory(groupId, threadId, [prompt, ...appendedUserPrompts], assistantTextParts, toolCallsAccum, toolResultsAccum, thinkingBlocksAccum);
|
|
2603
2629
|
|
|
2604
2630
|
sendUnifyOutput({
|
|
2605
2631
|
type: 'assistant',
|
|
@@ -2705,7 +2731,7 @@ async function runVpTurn({ prompt, promptParts = null, groupId, vpId, threadId =
|
|
|
2705
2731
|
* a session, this in-memory tape carries the un-collapsed form — which
|
|
2706
2732
|
* is fine because each VP turn's `engine.query` re-collapses on the fly.
|
|
2707
2733
|
*/
|
|
2708
|
-
function appendTurnToGroupHistory(groupId, threadId, prompts, assistantTextParts, toolCallsAccum, toolResultsAccum) {
|
|
2734
|
+
function appendTurnToGroupHistory(groupId, threadId, prompts, assistantTextParts, toolCallsAccum, toolResultsAccum, thinkingBlocksAccum) {
|
|
2709
2735
|
if (!groupId) return;
|
|
2710
2736
|
const history = getOrCreateGroupHistory(groupId);
|
|
2711
2737
|
const promptList = Array.isArray(prompts) ? prompts : [prompts];
|
|
@@ -2725,6 +2751,18 @@ function appendTurnToGroupHistory(groupId, threadId, prompts, assistantTextParts
|
|
|
2725
2751
|
input: tc.input,
|
|
2726
2752
|
}));
|
|
2727
2753
|
}
|
|
2754
|
+
// task-327d: carry thinking blocks across turns. Anthropic protocol
|
|
2755
|
+
// requires us to echo them back on the next request or the API
|
|
2756
|
+
// returns "content[].thinking in the thinking mode must be passed
|
|
2757
|
+
// back to the API". The signature is server-private — it stays in
|
|
2758
|
+
// this in-memory history and in agent-side persistence only.
|
|
2759
|
+
if (Array.isArray(thinkingBlocksAccum) && thinkingBlocksAccum.length > 0) {
|
|
2760
|
+
assistantMsg.thinkingBlocks = thinkingBlocksAccum.map(tb => (
|
|
2761
|
+
tb.redacted
|
|
2762
|
+
? { redacted: true, data: tb.data, signature: tb.signature }
|
|
2763
|
+
: { thinking: tb.thinking, signature: tb.signature }
|
|
2764
|
+
));
|
|
2765
|
+
}
|
|
2728
2766
|
history.push(assistantMsg);
|
|
2729
2767
|
|
|
2730
2768
|
for (const tr of toolResultsAccum) {
|