@yeaft/webchat-agent 0.1.804 → 0.1.808

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yeaft/webchat-agent",
3
- "version": "0.1.804",
3
+ "version": "0.1.808",
4
4
  "description": "Remote agent for Yeaft WebChat — connects worker machines to the central server",
5
5
  "main": "index.js",
6
6
  "type": "module",
@@ -142,6 +142,33 @@ function serializeMessage(msg) {
142
142
  }
143
143
  }
144
144
 
145
+ // task-327d: persist Anthropic extended-thinking blocks so the next turn
146
+ // can echo them back with their server-signed signature. Both fields are
147
+ // base64'd: thinking is multi-line text, and the signature is opaque
148
+ // bytes that don't need to be human-readable. Without this round-trip
149
+ // the next Anthropic request 400s with "content[].thinking in the
150
+ // thinking mode must be passed back to the API".
151
+ if (msg.thinkingBlocks && msg.thinkingBlocks.length > 0) {
152
+ fm.push(`thinkingBlocks:`);
153
+ for (const tb of msg.thinkingBlocks) {
154
+ if (!tb || typeof tb.signature !== 'string' || !tb.signature) continue;
155
+ if (tb.redacted) {
156
+ if (typeof tb.data !== 'string') continue;
157
+ const dataB64 = Buffer.from(tb.data, 'utf8').toString('base64');
158
+ const signatureB64 = Buffer.from(tb.signature, 'utf8').toString('base64');
159
+ fm.push(` - redacted: true`);
160
+ fm.push(` dataB64: ${dataB64}`);
161
+ fm.push(` signatureB64: ${signatureB64}`);
162
+ } else {
163
+ if (typeof tb.thinking !== 'string') continue;
164
+ const thinkingB64 = Buffer.from(tb.thinking, 'utf8').toString('base64');
165
+ const signatureB64 = Buffer.from(tb.signature, 'utf8').toString('base64');
166
+ fm.push(` - thinkingB64: ${thinkingB64}`);
167
+ fm.push(` signatureB64: ${signatureB64}`);
168
+ }
169
+ }
170
+ }
171
+
145
172
  fm.push('---');
146
173
  fm.push('');
147
174
  fm.push(content);
@@ -229,6 +256,44 @@ export function parseMessage(raw) {
229
256
  if (toolCalls.length > 0) msg.toolCalls = toolCalls;
230
257
  }
231
258
 
259
+ // task-327d: parse thinkingBlocks (mirror of toolCalls parser above)
260
+ if (frontmatter.includes('thinkingBlocks:')) {
261
+ const thinkingBlocks = [];
262
+ const tbMatch = frontmatter.match(/thinkingBlocks:\n((?:\s+-\s+[\s\S]*?)(?=\n\w|$))/);
263
+ if (tbMatch) {
264
+ const tbBlock = tbMatch[1];
265
+ const entries = tbBlock.split(/\n\s+-\s+/).filter(Boolean);
266
+ for (const entry of entries) {
267
+ const tb = {};
268
+ for (const line of entry.split('\n')) {
269
+ const trimmed = line.trim().replace(/^-\s+/, '');
270
+ const ci = trimmed.indexOf(':');
271
+ if (ci === -1) continue;
272
+ const k = trimmed.slice(0, ci).trim();
273
+ const v = trimmed.slice(ci + 1).trim();
274
+ if (k === 'thinkingB64') {
275
+ tb.thinking = Buffer.from(v, 'base64').toString('utf8');
276
+ } else if (k === 'dataB64') {
277
+ tb.data = Buffer.from(v, 'base64').toString('utf8');
278
+ } else if (k === 'signatureB64') {
279
+ tb.signature = Buffer.from(v, 'base64').toString('utf8');
280
+ } else if (k === 'redacted') {
281
+ tb.redacted = v === 'true';
282
+ }
283
+ }
284
+ // Both fields required — an unsigned block would 400 on replay.
285
+ if (tb.redacted) {
286
+ if (typeof tb.data === 'string' && typeof tb.signature === 'string' && tb.signature) {
287
+ thinkingBlocks.push(tb);
288
+ }
289
+ } else if (typeof tb.thinking === 'string' && typeof tb.signature === 'string' && tb.signature) {
290
+ thinkingBlocks.push(tb);
291
+ }
292
+ }
293
+ }
294
+ if (thinkingBlocks.length > 0) msg.thinkingBlocks = thinkingBlocks;
295
+ }
296
+
232
297
  return msg;
233
298
  }
234
299
 
package/unify/effort.js CHANGED
@@ -13,7 +13,7 @@
13
13
  * 4. null (no effort = adapter/router drops the param)
14
14
  *
15
15
  * Red lines:
16
- * • Never error on unknown scenario — default to 'high'.
16
+ * • Never error on unknown scenario — default to 'max'.
17
17
  * • Feature flag UNIFY_THINKING_V1 is enforced at the adapter/router
18
18
  * layer; this module just computes the intended value. If the flag
19
19
  * is off, adapters drop it anyway.
@@ -36,7 +36,8 @@ export const LONG_LOOP_TURN_THRESHOLD = 8;
36
36
  * a scenario string before invoking `pickEffort()`.
37
37
  *
38
38
  * Tiers (6 scenarios per architect spec):
39
- * chat → high (default interactive pair-programming turn)
39
+ * chat → max (default interactive pair-programming turn
40
+ * quality over latency; per user 2026-05-22)
40
41
  * consolidate → max (memory compaction — quality matters, runs once)
41
42
  * dream → max (memory maintenance — same rationale)
42
43
  * sub_agent → max (coordinator spawns + merges)
@@ -47,7 +48,7 @@ export const LONG_LOOP_TURN_THRESHOLD = 8;
47
48
  * Unknown scenarios fall through to 'high'.
48
49
  */
49
50
  export const SCENARIO_EFFORT = Object.freeze({
50
- chat: 'high',
51
+ chat: 'max',
51
52
  consolidate: 'max',
52
53
  dream: 'max',
53
54
  sub_agent: 'max',
@@ -65,7 +66,7 @@ export const SCENARIO_EFFORT = Object.freeze({
65
66
  * `/max` prefix, Settings slider, or API caller.
66
67
  * 2. If toolLoopTurns >= LONG_LOOP_TURN_THRESHOLD, upgrade the
67
68
  * base scenario to 'long_loop' (→ 'max').
68
- * 3. Look up SCENARIO_EFFORT[scenario]; unknown → 'high'.
69
+ * 3. Look up SCENARIO_EFFORT[scenario]; unknown → 'max'.
69
70
  *
70
71
  * @param {object} ctx
71
72
  * @param {string} [ctx.scenario='chat'] — Scenario tag; see SCENARIO_EFFORT.
@@ -92,7 +93,7 @@ export function pickEffort({ scenario = 'chat', toolLoopTurns = 0, userEffort =
92
93
  }
93
94
 
94
95
  // 3. Scenario table lookup.
95
- return SCENARIO_EFFORT[scenario] || 'high';
96
+ return SCENARIO_EFFORT[scenario] || 'max';
96
97
  }
97
98
 
98
99
  /**
package/unify/engine.js CHANGED
@@ -35,7 +35,8 @@ import { runStopHooks } from './stop-hooks.js';
35
35
  // pass a real threadId per (groupId, vpId, threadId) engine instance.
36
36
  const MAIN_THREAD_ID = 'main';
37
37
  import { pickEffort, parseEffortPrefix } from './effort.js';
38
- import { normalizeEffort, resolveContextWindow } from './models.js';
38
+ import { DEFAULT_CONTEXT_WINDOW, normalizeEffort, resolveContextWindow, resolveModel } from './models.js';
39
+ import { countTurns } from './turn-utils.js';
39
40
  import { attachRouterPlan, extractPriorPlan, stripMetaForWire } from './router/continuity.js';
40
41
  import { resolveThinking } from './router/thinking.js';
41
42
  import { approxTokens } from './memory/budget.js';
@@ -161,6 +162,51 @@ export function estimateMessagesTokens(system, messages) {
161
162
  return total;
162
163
  }
163
164
 
165
+ export const GROUP_CONTEXT_PRESSURE_RATIO = 0.8;
166
+ export const GROUP_MIN_TURNS_FOR_COMPACT = 5;
167
+
168
+ export function shouldAllowGroupReflection({
169
+ system = '',
170
+ messages = [],
171
+ model = null,
172
+ config = {},
173
+ groupId = null,
174
+ } = {}) {
175
+ if (!groupId) {
176
+ return {
177
+ allowed: true,
178
+ compactAllowed: true,
179
+ tokenEstimate: estimateMessagesTokens(system, messages),
180
+ threshold: 0,
181
+ contextWindow: null,
182
+ ratio: GROUP_CONTEXT_PRESSURE_RATIO,
183
+ turnCount: countTurns(messages),
184
+ usedFallbackContextWindow: false,
185
+ };
186
+ }
187
+ const contextWindow = resolveContextWindow(model, config);
188
+ const hasRegistryContext = !!resolveModel(model)?.contextWindow;
189
+ const hasConfigContext = Number.isFinite(config?.maxContextTokens) && config.maxContextTokens > 0;
190
+ const threshold = Math.floor(contextWindow * GROUP_CONTEXT_PRESSURE_RATIO);
191
+ const tokenEstimate = estimateMessagesTokens(system, messages);
192
+ const overThreshold = tokenEstimate >= threshold;
193
+ const turnCount = countTurns(messages);
194
+ return {
195
+ // Group send defaults to no reflection. Trust the model until context
196
+ // pressure says we are near the model window.
197
+ allowed: overThreshold,
198
+ // Durable compact is also protected for tiny histories: fewer than five
199
+ // turns do not compact unless they already exceed the same 80% threshold.
200
+ compactAllowed: overThreshold || turnCount >= GROUP_MIN_TURNS_FOR_COMPACT,
201
+ tokenEstimate,
202
+ threshold,
203
+ contextWindow,
204
+ ratio: GROUP_CONTEXT_PRESSURE_RATIO,
205
+ turnCount,
206
+ usedFallbackContextWindow: !hasRegistryContext && !hasConfigContext && contextWindow === DEFAULT_CONTEXT_WINDOW,
207
+ };
208
+ }
209
+
164
210
  // ─── Engine Events (superset of adapter events) ──────────────────
165
211
 
166
212
  /**
@@ -1024,10 +1070,30 @@ export class Engine {
1024
1070
  if (!Array.isArray(messages) || messages.length === 0) return null;
1025
1071
 
1026
1072
  const tokenCount = conversationStore.hotTokens();
1073
+ const groupId = messages.find(m => m && typeof m.groupId === 'string' && m.groupId)?.groupId || null;
1074
+ const groupContextGate = shouldAllowGroupReflection({
1075
+ system: '',
1076
+ messages,
1077
+ model: this.#config.model,
1078
+ config: this.#config,
1079
+ groupId,
1080
+ });
1081
+ if (groupId && groupContextGate?.usedFallbackContextWindow) {
1082
+ this.#trace.log?.('group_context_window_fallback', {
1083
+ groupId,
1084
+ model: this.#config.model,
1085
+ contextWindow: groupContextGate.contextWindow,
1086
+ threshold: groupContextGate.threshold,
1087
+ });
1088
+ }
1089
+ if (groupId && !groupContextGate.compactAllowed) return null;
1090
+
1027
1091
  const trig = evaluateCompactTriggers({
1028
1092
  messages,
1029
1093
  tokenCount,
1030
1094
  contextLimit: this.#config.maxContextTokens || 200000,
1095
+ tokenRatio: groupId ? GROUP_CONTEXT_PRESSURE_RATIO : undefined,
1096
+ maxMessages: groupId ? Number.POSITIVE_INFINITY : undefined,
1031
1097
  });
1032
1098
  if (!trig.trigger) return null;
1033
1099
 
@@ -1358,13 +1424,34 @@ export class Engine {
1358
1424
  { role: 'user', content: finalUserContent },
1359
1425
  ];
1360
1426
 
1427
+ const groupReflectionGate = shouldAllowGroupReflection({
1428
+ system: systemPrompt,
1429
+ messages: conversationMessages,
1430
+ model: this.#config.model,
1431
+ config: this.#config,
1432
+ groupId,
1433
+ });
1434
+ const groupReflectionAllowed = groupReflectionGate.allowed === true;
1435
+ if (groupId && groupReflectionGate?.usedFallbackContextWindow) {
1436
+ this.#trace.log?.('group_context_window_fallback', {
1437
+ groupId,
1438
+ model: this.#config.model,
1439
+ contextWindow: groupReflectionGate.contextWindow,
1440
+ threshold: groupReflectionGate.threshold,
1441
+ });
1442
+ }
1443
+
1361
1444
  // PR-L: T2 carry-forward. If a previous query()'s end-of-turn
1362
1445
  // reflection has resolved, rewrite that turn's range in
1363
1446
  // `conversationMessages` to a single assistant reflection message.
1364
1447
  // If still pending, fall back to the exec-log stub — non-blocking,
1365
1448
  // never wait. This runs BEFORE the first adapter.stream so the
1366
- // upcoming call sees the rewritten history.
1367
- yield* this.#applyPendingT2Reflections(conversationMessages, prompt);
1449
+ // upcoming call sees the rewritten history. Group send defaults to no
1450
+ // reflection; only high context pressure (>=80% of model window)
1451
+ // enables the carry-forward rewrite.
1452
+ if (groupReflectionAllowed) {
1453
+ yield* this.#applyPendingT2Reflections(conversationMessages, prompt);
1454
+ }
1368
1455
 
1369
1456
  // PR-L: track this query()'s tool-arc for reflection.
1370
1457
  // `turnStartIdx` is where the current user message lives; the arc
@@ -1486,6 +1573,7 @@ export class Engine {
1486
1573
  let ttfbMs = null; // Time to first token
1487
1574
  let responseText = '';
1488
1575
  const toolCalls = [];
1576
+ const thinkingBlocks = []; // task-327d: collected from adapter for round-trip
1489
1577
  let stopReason = 'end_turn';
1490
1578
  const totalUsage = { inputTokens: 0, outputTokens: 0 };
1491
1579
  // task-344: capture redacted raw request / raw response for debug panel.
@@ -1660,6 +1748,22 @@ export class Engine {
1660
1748
  case 'thinking_delta':
1661
1749
  yield event;
1662
1750
  break;
1751
+ case 'thinking_block_end':
1752
+ // task-327d: collect server-signed thinking block for
1753
+ // round-trip replay. Anthropic 400s the next turn if a
1754
+ // thinking block (regular or redacted) was emitted but not
1755
+ // echoed back with its original signature. Drop blocks
1756
+ // missing a signature — replay-without-sig 400s identically.
1757
+ if (event.signature) {
1758
+ if (event.redacted) {
1759
+ thinkingBlocks.push({ redacted: true, data: event.data, signature: event.signature });
1760
+ } else {
1761
+ thinkingBlocks.push({ thinking: event.thinking, signature: event.signature });
1762
+ }
1763
+ } else {
1764
+ console.warn('[Engine] thinking block missing signature — dropping; next turn would 400 on replay');
1765
+ }
1766
+ break;
1663
1767
  case 'tool_call':
1664
1768
  toolCalls.push(event);
1665
1769
  yield event;
@@ -1843,6 +1947,17 @@ export class Engine {
1843
1947
  input: tc.input,
1844
1948
  }));
1845
1949
  }
1950
+ // task-327d: persist thinking blocks for the next turn's replay.
1951
+ // Anthropic requires assistant.thinking blocks to be echoed back
1952
+ // verbatim (text + signature) when the previous turn used extended
1953
+ // thinking — see translateMessages in anthropic.js.
1954
+ if (thinkingBlocks.length > 0) {
1955
+ assistantMsg.thinkingBlocks = thinkingBlocks.map(tb => (
1956
+ tb.redacted
1957
+ ? { redacted: true, data: tb.data, signature: tb.signature }
1958
+ : { thinking: tb.thinking, signature: tb.signature }
1959
+ ));
1960
+ }
1846
1961
  // Phase 8 (DESIGN.md §9.15): carry the router plan back on the
1847
1962
  // assistant message that produced it. Stripped at the wire by
1848
1963
  // stripMetaForWire — pure bookkeeping for priorPlan continuity.
@@ -1994,7 +2109,7 @@ export class Engine {
1994
2109
  // tight-loop retries — but no collapse happened, so T2 should
1995
2110
  // still be allowed to fall back at end_turn. Fowler-review
1996
2111
  // critical finding.
1997
- if (queryToolCount > TURN_SUMMARY_THRESHOLD && t1CollapsesDone === 0) {
2112
+ if (groupReflectionAllowed && queryToolCount > TURN_SUMMARY_THRESHOLD && t1CollapsesDone === 0) {
1998
2113
  const arcStart = turnStartIdx + 1;
1999
2114
  const arcEnd = conversationMessages.length - 1;
2000
2115
  if (arcEnd > arcStart) {
@@ -2275,7 +2390,7 @@ export class Engine {
2275
2390
  // batch within the same query gets a distinct entry — without
2276
2391
  // this the second batch would be silently skipped.
2277
2392
  const t1BatchDue = queryToolCount - lastT1AtToolCount >= TOOL_BATCH_SIZE;
2278
- if (t1BatchDue && !abortedDuringTools && !signal?.aborted) {
2393
+ if (groupReflectionAllowed && t1BatchDue && !abortedDuringTools && !signal?.aborted) {
2279
2394
  const t1DedupKey = `${queryNumber}:t1:${queryToolCount}`;
2280
2395
  if (this.#reflectedTurns.has(t1DedupKey)) {
2281
2396
  // Defensive: should never hit since t1BatchDue gates re-entry
@@ -1,7 +1,7 @@
1
1
  /**
2
- * group-config.js — Per-group configuration overrides.
2
+ * group-config.js — Per-group selected model state.
3
3
  *
4
- * Each group may carry its own `config.json` at
4
+ * Each group may carry its header-selected model in `config.json` at
5
5
  * ~/.yeaft/groups/<groupId>/config.json
6
6
  *
7
7
  * v1 schema (intentionally tiny — extend via additive keys only):
@@ -11,7 +11,7 @@
11
11
  *
12
12
  * Missing file → empty object. Missing field → fall back to user-level
13
13
  * config (`~/.yeaft/config.json` via loadConfig()). Resolution is a
14
- * shallow overlay (group fields override user fields when truthy).
14
+ * shallow overlay for send-time effective config.
15
15
  *
16
16
  * Storage layer only — no engine wiring, no validation of model strings
17
17
  * against the provider registry (that's done lazily at resolve time by
@@ -25,7 +25,7 @@ import { groupsRoot, resolveGroupYeaftDir } from './group-crud.js';
25
25
 
26
26
  const CONFIG_FILE = 'config.json';
27
27
 
28
- /** Whitelist of fields a group may override. Reject everything else. */
28
+ /** Whitelist of persisted group model-state fields. Reject everything else. */
29
29
  const ALLOWED_KEYS = new Set(['model']);
30
30
 
31
31
  export class GroupConfigError extends Error {
@@ -329,7 +329,7 @@ export function updateGroupAnnouncement(yeaftDir, groupId, text) {
329
329
  }
330
330
 
331
331
  /**
332
- * (A.2.c) Update per-group config overrides (v1: just `model`).
332
+ * (A.2.c) Persist the model selected in the group conversation header.
333
333
  * Returns the persisted config object so the caller can broadcast it.
334
334
  *
335
335
  * Throws GroupConfigError on validation failure (unknown key, bad type).
@@ -26,10 +26,12 @@
26
26
  * 4. Keep the last `keepRecent` user→assistant turns intact so the model
27
27
  * has fresh, untransformed context for whatever the user just said.
28
28
  *
29
- * Triggers (any fires, but only above a 30K token soft floor):
30
- * - tokens < 30_000 → never compact (cheap chat, no point paying
29
+ * Triggers:
30
+ * - tokens < 12_000 → never compact (cheap chat, no point paying
31
31
  * the summarizer)
32
- * - tokens > 40 % of `maxContextTokens` (defaults to 200K 80K)
32
+ * - fewer than 5 turns do not compact unless context pressure is
33
+ * already high
34
+ * - tokens > 80 % of `maxContextTokens` (defaults to 200K → 160K)
33
35
  * - tokens > 200,000 hard ceiling
34
36
  *
35
37
  * The "turn > 20" trigger that an earlier revision used was dropped:
@@ -72,8 +74,10 @@ export const countTurns = countTurnsImpl;
72
74
  * cost; the LLM hasn't started feeling the context yet either),
73
75
  * - otherwise compact if ANY of:
74
76
  * turnCount > 30 (back-stop for chats with many small turns)
75
- * tokens > 40 % of `maxContextTokens` (default 200K → 80K)
77
+ * tokens > 80 % of `maxContextTokens` (default 200K → 160K)
76
78
  * tokens > 200K hard ceiling
79
+ * Fewer than 5 turns are protected from compact unless the token
80
+ * threshold is already crossed.
77
81
  *
78
82
  * Lowered from 30K → 12K and re-enabled a turn-count back-stop because
79
83
  * the previous "soft floor of 30K, no turn cap" combination is dead in
@@ -93,11 +97,13 @@ export const countTurns = countTurnsImpl;
93
97
  export const DEFAULT_TURN_LIMIT = 30;
94
98
  export const DEFAULT_MIN_TOKEN_FLOOR = 12_000;
95
99
  export const DEFAULT_MAX_CONTEXT_TOKENS = 200_000;
96
- export const DEFAULT_TOKEN_FRACTION = 0.4;
100
+ export const DEFAULT_TOKEN_FRACTION = 0.8;
97
101
  export const DEFAULT_HARD_TOKEN_CEILING = 200_000;
102
+ export const DEFAULT_MIN_TURNS_FOR_COMPACT = 5;
103
+ export const DEFAULT_KEEP_TOOL_TURNS = 3;
98
104
  /**
99
105
  * Effective default token trigger when no `maxContextTokens` is provided:
100
- * min(40% of 200K, 200K) = 80K. Preserved as `DEFAULT_TOKEN_LIMIT` for
106
+ * min(80% of 200K, 200K) = 160K. Preserved as `DEFAULT_TOKEN_LIMIT` for
101
107
  * back-compat with existing tests that import this name.
102
108
  */
103
109
  export const DEFAULT_TOKEN_LIMIT = Math.min(
@@ -183,12 +189,13 @@ export function estimateMessagesTokens(messages) {
183
189
  * Pure trigger evaluator. Decides whether the in-memory history needs
184
190
  * compaction. No I/O, no LLM call.
185
191
  *
186
- * Policy (2026-05-01):
187
- * 1. tokens < `minTokenFloor` (default 30K) → trigger=false (always).
188
- * 2. otherwise trigger if ANY of:
189
- * turnCount > turnLimit (default Infinity effectively off;
190
- * callers can pin a number to re-enable a turn-count trigger)
191
- * tokenCount > maxContextTokens*fraction (reason='token_threshold')
192
+ * Policy (2026-05-22):
193
+ * 1. tokens < `minTokenFloor` (default 12K) → trigger=false (always).
194
+ * 2. fewer than `minTurnsForCompact` turns (default 5) → trigger=false
195
+ * unless tokenCount already exceeds the fractional context threshold.
196
+ * 3. otherwise trigger if ANY of:
197
+ * turnCount > turnLimit (default 30 back-stop)
198
+ * tokenCount > maxContextTokens*fraction (default 80%, reason='token_threshold')
192
199
  * tokenCount > hardTokenCeiling (reason='token_ceiling')
193
200
  *
194
201
  * `tokenLimit` is preserved as a back-compat override for callers /
@@ -198,6 +205,7 @@ export function estimateMessagesTokens(messages) {
198
205
  * @param {Array<object>} messages
199
206
  * @param {{
200
207
  * turnLimit?: number,
208
+ * minTurnsForCompact?: number,
201
209
  * tokenLimit?: number,
202
210
  * minTokenFloor?: number,
203
211
  * maxContextTokens?: number,
@@ -206,7 +214,7 @@ export function estimateMessagesTokens(messages) {
206
214
  * }} [opts]
207
215
  * @returns {{trigger: boolean, reason: 'turn_count'|'token_threshold'|'token_ceiling'|null,
208
216
  * turnCount: number, tokenCount: number,
209
- * turnLimit: number, tokenLimit: number,
217
+ * turnLimit: number, tokenLimit: number, minTurnsForCompact: number,
210
218
  * minTokenFloor: number, hardTokenCeiling: number}}
211
219
  */
212
220
  export function shouldCompactHistory(messages, opts = {}) {
@@ -215,6 +223,7 @@ export function shouldCompactHistory(messages, opts = {}) {
215
223
  const hardTokenCeiling = opts.hardTokenCeiling ?? DEFAULT_HARD_TOKEN_CEILING;
216
224
  const maxContextTokens = opts.maxContextTokens ?? DEFAULT_MAX_CONTEXT_TOKENS;
217
225
  const tokenFraction = opts.tokenFraction ?? DEFAULT_TOKEN_FRACTION;
226
+ const minTurnsForCompact = opts.minTurnsForCompact ?? DEFAULT_MIN_TURNS_FOR_COMPACT;
218
227
  // tokenLimit override wins; otherwise compute fractional threshold.
219
228
  const tokenLimit =
220
229
  opts.tokenLimit
@@ -225,7 +234,9 @@ export function shouldCompactHistory(messages, opts = {}) {
225
234
 
226
235
  let reason = null;
227
236
  // (1) Soft floor: never compact small conversations.
228
- if (tokenCount < minTokenFloor) {
237
+ // (2) Short-history guard: fewer than five turns should not compact unless
238
+ // the estimated prompt is already at the context-pressure threshold.
239
+ if (tokenCount < minTokenFloor || (turnCount < minTurnsForCompact && tokenCount < tokenLimit)) {
229
240
  return {
230
241
  trigger: false,
231
242
  reason: null,
@@ -233,6 +244,7 @@ export function shouldCompactHistory(messages, opts = {}) {
233
244
  tokenCount,
234
245
  turnLimit,
235
246
  tokenLimit,
247
+ minTurnsForCompact,
236
248
  minTokenFloor,
237
249
  hardTokenCeiling,
238
250
  };
@@ -240,7 +252,7 @@ export function shouldCompactHistory(messages, opts = {}) {
240
252
  // (2) Trigger evaluation. Turn check is opt-in (Infinity by default).
241
253
  if (Number.isFinite(turnLimit) && turnCount > turnLimit) reason = 'turn_count';
242
254
  else if (tokenCount > hardTokenCeiling) reason = 'token_ceiling';
243
- else if (tokenCount > tokenLimit) reason = 'token_threshold';
255
+ else if (tokenCount >= tokenLimit) reason = 'token_threshold';
244
256
 
245
257
  return {
246
258
  trigger: reason !== null,
@@ -249,11 +261,71 @@ export function shouldCompactHistory(messages, opts = {}) {
249
261
  tokenCount,
250
262
  turnLimit,
251
263
  tokenLimit,
264
+ minTurnsForCompact,
252
265
  minTokenFloor,
253
266
  hardTokenCeiling,
254
267
  };
255
268
  }
256
269
 
270
+ function hasContentAfterToolStrip(content) {
271
+ if (typeof content === 'string') return content.trim().length > 0;
272
+ if (Array.isArray(content)) return content.length > 0;
273
+ return content != null;
274
+ }
275
+
276
+ function stripToolContentParts(content) {
277
+ if (!Array.isArray(content)) return content;
278
+ return content.filter(part => {
279
+ if (!part || typeof part !== 'object') return true;
280
+ return part.type !== 'tool_use'
281
+ && part.type !== 'tool_result'
282
+ && part.type !== 'function_call'
283
+ && part.type !== 'function_call_output';
284
+ });
285
+ }
286
+
287
+ /**
288
+ * Remove tool-call / tool-result noise from turns older than the recent
289
+ * lossless window. The last `keepToolTurns` turns keep their full tool
290
+ * chains; older turns keep user/assistant text but lose `toolCalls`,
291
+ * Anthropic/OpenAI tool content blocks, and `role:'tool'` messages.
292
+ *
293
+ * This is deliberately a wire-history transform, not a summarizer: it
294
+ * never invents a summary and it never mutates input. Pair-sanitize runs
295
+ * afterwards so no orphan tool_use/tool_result can survive.
296
+ *
297
+ * @param {Array<object>} messages
298
+ * @param {{ keepToolTurns?: number }} [opts]
299
+ * @returns {Array<object>}
300
+ */
301
+ export function stripToolNoiseFromOlderTurns(messages, opts = {}) {
302
+ if (!Array.isArray(messages) || messages.length === 0) return [];
303
+ const keepToolTurns = Number.isFinite(opts.keepToolTurns) && opts.keepToolTurns >= 0
304
+ ? opts.keepToolTurns
305
+ : DEFAULT_KEEP_TOOL_TURNS;
306
+ const cutIdx = indexOfNthTurnFromEnd(messages, keepToolTurns);
307
+ if (cutIdx <= 0) return messages.map(m => ({ ...m }));
308
+
309
+ const older = messages.slice(0, cutIdx);
310
+ const recent = messages.slice(cutIdx);
311
+ const cleanedOlder = [];
312
+
313
+ for (const m of older) {
314
+ if (!m || typeof m !== 'object') continue;
315
+ if (m.role === 'tool') continue;
316
+
317
+ const next = { ...m };
318
+ if (Array.isArray(next.toolCalls)) delete next.toolCalls;
319
+ if (Array.isArray(next.content)) next.content = stripToolContentParts(next.content);
320
+
321
+ if (next.role === 'assistant' && !hasContentAfterToolStrip(next.content)) continue;
322
+ if (next.role === 'user' && Array.isArray(next.content) && next.content.length === 0) continue;
323
+ cleanedOlder.push(next);
324
+ }
325
+
326
+ return [...cleanedOlder, ...recent.map(m => ({ ...m }))];
327
+ }
328
+
257
329
  /**
258
330
  * Strip noise from a message list before sending it to the summarizer:
259
331
  * - drop `role: 'tool'` (raw tool results — too verbose, mostly redundant)
@@ -442,6 +514,7 @@ export async function compactHistory(messages, options) {
442
514
  maxContextTokens,
443
515
  tokenFraction,
444
516
  hardTokenCeiling,
517
+ minTurnsForCompact: options?.minTurnsForCompact,
445
518
  };
446
519
  const before = shouldCompactHistory(messages, triggerOpts);
447
520
  if (!before.trigger) {
@@ -581,7 +654,7 @@ export async function compactHistory(messages, options) {
581
654
  * between trim (per-call) and compact (global) explicit.
582
655
  *
583
656
  * @param {Array<object>} snapshot
584
- * @param {{ messageTokenBudget?: number, recentTurnCap?: number }} [opts]
657
+ * @param {{ messageTokenBudget?: number, recentTurnCap?: number, keepToolTurns?: number }} [opts]
585
658
  * @returns {Array<object>}
586
659
  */
587
660
  export function trimSnapshotForBudget(snapshot, opts = {}) {
@@ -607,6 +680,12 @@ export function trimSnapshotForBudget(snapshot, opts = {}) {
607
680
  tokens = estimateMessagesTokens(trimmed);
608
681
  }
609
682
 
610
- // Stage 3: pair-sanitize to drop orphan tool_use/tool_result.
683
+ // Stage 3: keep only the recent tool chains lossless. Older turns
684
+ // retain text but drop tool_use/tool_result noise before pair safety.
685
+ trimmed = stripToolNoiseFromOlderTurns(trimmed, {
686
+ keepToolTurns: opts.keepToolTurns,
687
+ });
688
+
689
+ // Stage 4: pair-sanitize to drop orphan tool_use/tool_result.
611
690
  return pairSanitize(trimmed);
612
691
  }
@@ -40,20 +40,23 @@
40
40
  /**
41
41
  * @typedef {{ type: 'text_delta', text: string }} TextDeltaEvent
42
42
  * @typedef {{ type: 'thinking_delta', text: string }} ThinkingDeltaEvent
43
+ * @typedef {{ type: 'thinking_block_end', thinking: string, signature: string }} ThinkingBlockEndEvent
43
44
  * @typedef {{ type: 'tool_call', id: string, name: string, input: object }} ToolCallEvent
44
45
  * @typedef {{ type: 'usage', inputTokens: number, outputTokens: number, cacheReadTokens?: number, cacheWriteTokens?: number }} UsageEvent
45
46
  * @typedef {{ type: 'stop', stopReason: 'end_turn' | 'tool_use' | 'max_tokens' }} StopEvent
46
47
  * @typedef {{ type: 'error', error: Error, retryable: boolean }} ErrorEvent
47
48
  *
48
- * @typedef {TextDeltaEvent | ThinkingDeltaEvent | ToolCallEvent | UsageEvent | StopEvent | ErrorEvent} StreamEvent
49
+ * @typedef {TextDeltaEvent | ThinkingDeltaEvent | ThinkingBlockEndEvent | ToolCallEvent | UsageEvent | StopEvent | ErrorEvent} StreamEvent
49
50
  */
50
51
 
51
52
  // ─── Unified Message Types ─────────────────────────────────────
52
53
 
53
54
  /**
55
+ * @typedef {{ thinking: string, signature: string }} ThinkingBlock
56
+ *
54
57
  * @typedef {{ role: 'system', content: string }} SystemMessage
55
58
  * @typedef {{ role: 'user', content: string }} UserMessage
56
- * @typedef {{ role: 'assistant', content: string, toolCalls?: UnifiedToolCall[] }} AssistantMessage
59
+ * @typedef {{ role: 'assistant', content: string, toolCalls?: UnifiedToolCall[], thinkingBlocks?: ThinkingBlock[] }} AssistantMessage
57
60
  * @typedef {{ role: 'tool', toolCallId: string, content: string, isError?: boolean }} ToolMessage
58
61
  *
59
62
  * @typedef {SystemMessage | UserMessage | AssistantMessage | ToolMessage} UnifiedMessage
@@ -73,6 +73,24 @@ export class AnthropicAdapter extends LLMAdapter {
73
73
  result.push({ role: 'user', content: msg.content });
74
74
  } else if (msg.role === 'assistant') {
75
75
  const content = [];
76
+ // task-327d: Anthropic requires thinking blocks to appear BEFORE
77
+ // any text / tool_use in the content array on echo-back. When the
78
+ // previous turn produced thinking blocks (with server-signed
79
+ // signature), we MUST replay them verbatim or the next request
80
+ // 400s with "content[].thinking in the thinking mode must be
81
+ // passed back to the API". Order is mandatory.
82
+ if (Array.isArray(msg.thinkingBlocks)) {
83
+ for (const tb of msg.thinkingBlocks) {
84
+ if (!tb || typeof tb.signature !== 'string' || !tb.signature) continue;
85
+ if (tb.redacted) {
86
+ if (typeof tb.data !== 'string') continue;
87
+ content.push({ type: 'redacted_thinking', data: tb.data, signature: tb.signature });
88
+ } else {
89
+ if (typeof tb.thinking !== 'string') continue;
90
+ content.push({ type: 'thinking', thinking: tb.thinking, signature: tb.signature });
91
+ }
92
+ }
93
+ }
76
94
  if (msg.content) {
77
95
  content.push({ type: 'text', text: msg.content });
78
96
  }
@@ -216,9 +234,16 @@ export class AnthropicAdapter extends LLMAdapter {
216
234
  const reader = response.body.getReader();
217
235
  const decoder = new TextDecoder();
218
236
  let buffer = '';
219
- let currentToolCallId = null;
220
- let currentToolName = null;
221
- let currentToolInput = '';
237
+ // task-327d: index-keyed per-block state. Anthropic streams content
238
+ // blocks sequentially today, but the protocol exposes `event.index`
239
+ // precisely because that's not guaranteed. Dispatch in
240
+ // content_block_stop must look up by index, never "whichever scalar
241
+ // happens to still be set." States by kind: 'tool_use', 'thinking',
242
+ // 'redacted_thinking'. Redacted blocks carry opaque `data` instead
243
+ // of `thinking` text but share the same echo-back rule (drop without
244
+ // signature → next turn 400s identically).
245
+ /** @type {Map<number, { kind: string, [k: string]: any }>} */
246
+ const blockByIndex = new Map();
222
247
  // Accumulate raw SSE body verbatim for the debug panel. No truncation:
223
248
  // see `redactRawRequest` in adapter.js for the verbatim-design rationale.
224
249
  // Push-then-join keeps allocation bounded for multi-MiB payloads (avoids
@@ -254,38 +279,90 @@ export class AnthropicAdapter extends LLMAdapter {
254
279
 
255
280
  if (type === 'content_block_start') {
256
281
  const block = event.content_block;
282
+ const idx = event.index;
257
283
  if (block?.type === 'tool_use') {
258
- currentToolCallId = block.id;
259
- currentToolName = block.name;
260
- currentToolInput = '';
284
+ blockByIndex.set(idx, {
285
+ kind: 'tool_use',
286
+ id: block.id,
287
+ name: block.name,
288
+ input: '',
289
+ });
290
+ } else if (block?.type === 'thinking') {
291
+ blockByIndex.set(idx, {
292
+ kind: 'thinking',
293
+ thinking: typeof block.thinking === 'string' ? block.thinking : '',
294
+ signature: typeof block.signature === 'string' ? block.signature : '',
295
+ });
296
+ } else if (block?.type === 'redacted_thinking') {
297
+ // task-327d: API-redacted thinking. Body is opaque `data`
298
+ // (server-encrypted, not user-readable); we still need to
299
+ // echo it back with signature on the next turn or the API
300
+ // 400s with the same "must be passed back" error.
301
+ blockByIndex.set(idx, {
302
+ kind: 'redacted_thinking',
303
+ data: typeof block.data === 'string' ? block.data : '',
304
+ signature: typeof block.signature === 'string' ? block.signature : '',
305
+ });
261
306
  }
262
307
  } else if (type === 'content_block_delta') {
263
308
  const delta = event.delta;
309
+ const idx = event.index;
310
+ const st = blockByIndex.get(idx);
264
311
  if (delta?.type === 'text_delta') {
265
312
  yield { type: 'text_delta', text: delta.text };
266
313
  } else if (delta?.type === 'thinking_delta') {
314
+ // Forward delta for live UI; ALSO accumulate for round-trip.
315
+ if (st && st.kind === 'thinking') st.thinking += delta.thinking || '';
267
316
  yield { type: 'thinking_delta', text: delta.thinking };
317
+ } else if (delta?.type === 'signature_delta') {
318
+ // Anthropic typically sends signature in one delta near the
319
+ // end of the (redacted_)thinking block. Accumulate defensively.
320
+ if (st && (st.kind === 'thinking' || st.kind === 'redacted_thinking')) {
321
+ st.signature += delta.signature || '';
322
+ }
268
323
  } else if (delta?.type === 'input_json_delta') {
269
- currentToolInput += delta.partial_json;
324
+ if (st && st.kind === 'tool_use') st.input += delta.partial_json;
270
325
  }
271
326
  } else if (type === 'content_block_stop') {
272
- if (currentToolCallId) {
327
+ const idx = event.index;
328
+ const st = blockByIndex.get(idx);
329
+ if (!st) {
330
+ // Unknown / unhandled block kind (e.g. text — we don't track
331
+ // text state because text_delta is forwarded immediately).
332
+ } else if (st.kind === 'tool_use') {
273
333
  let parsedInput = {};
274
334
  try {
275
- parsedInput = currentToolInput ? JSON.parse(currentToolInput) : {};
335
+ parsedInput = st.input ? JSON.parse(st.input) : {};
276
336
  } catch {
277
337
  parsedInput = {};
278
338
  }
279
339
  yield {
280
340
  type: 'tool_call',
281
- id: currentToolCallId,
282
- name: currentToolName,
341
+ id: st.id,
342
+ name: st.name,
283
343
  input: parsedInput,
284
344
  };
285
- currentToolCallId = null;
286
- currentToolName = null;
287
- currentToolInput = '';
345
+ } else if (st.kind === 'thinking' || st.kind === 'redacted_thinking') {
346
+ // task-327d: emit ONE end-of-block event with the assembled
347
+ // payload + signature. Engine collects these for replay.
348
+ // We emit even when signature is empty so engine can
349
+ // warn-and-drop; replaying without signature would 400.
350
+ if (st.kind === 'thinking') {
351
+ yield {
352
+ type: 'thinking_block_end',
353
+ thinking: st.thinking,
354
+ signature: st.signature,
355
+ };
356
+ } else {
357
+ yield {
358
+ type: 'thinking_block_end',
359
+ redacted: true,
360
+ data: st.data,
361
+ signature: st.signature,
362
+ };
363
+ }
288
364
  }
365
+ blockByIndex.delete(idx);
289
366
  } else if (type === 'message_delta') {
290
367
  const stopReason = event.delta?.stop_reason;
291
368
  if (stopReason) {
@@ -45,6 +45,17 @@ export function stripVpMentionPrefix(content) {
45
45
  return content.replace(/^@vp-[A-Za-z0-9_-]+\s+/, '');
46
46
  }
47
47
 
48
+ function canonicalUserTurnContent(content) {
49
+ if (typeof content === 'string') return stripVpMentionPrefix(content);
50
+ if (!Array.isArray(content)) return null;
51
+ const text = content
52
+ .filter(part => part && typeof part === 'object' && part.type === 'text')
53
+ .map(part => typeof part.text === 'string' ? part.text : '')
54
+ .join('\n')
55
+ .trim();
56
+ return text ? stripVpMentionPrefix(text) : null;
57
+ }
58
+
48
59
  /**
49
60
  * Count "turns" — distinct user prompts after `@vp-X` collapsing.
50
61
  *
@@ -62,7 +73,8 @@ export function countTurns(messages) {
62
73
  let prev = null;
63
74
  for (const m of messages) {
64
75
  if (!m || m.role !== 'user') continue;
65
- const canonical = stripVpMentionPrefix(m.content || '');
76
+ const canonical = canonicalUserTurnContent(m.content);
77
+ if (canonical == null) continue;
66
78
  if (canonical !== prev) {
67
79
  n++;
68
80
  prev = canonical;
@@ -101,7 +113,8 @@ export function indexOfNthTurnFromEnd(messages, n) {
101
113
  let candidate = -1;
102
114
  for (let i = messages.length - 1; i >= 0; i--) {
103
115
  if (!messages[i] || messages[i].role !== 'user') continue;
104
- const canonical = stripVpMentionPrefix(messages[i].content || '');
116
+ const canonical = canonicalUserTurnContent(messages[i].content);
117
+ if (canonical == null) continue;
105
118
  if (canonical !== openCanonical) {
106
119
  // Boundary: a new (older) turn starts here.
107
120
  turnsFromEnd++;
@@ -3,17 +3,17 @@
3
3
  *
4
4
  * Problem: `seedDefaultVps` is first-run-only — once the library has any VP
5
5
  * in it, that function never runs again. When we expanded the default roster
6
- * from 12 to 32 (philosophy, psychology, strategy, history, investing,
7
- * business, writing, science, arts), existing installs would never see the
8
- * 20 new VPs without either (a) the user manually deleting their library or
6
+ * from 12 to 33 (philosophy, psychology, strategy, history, investing,
7
+ * business, writing, science, arts, Omni), existing installs would never see the
8
+ * new VPs without either (a) the user manually deleting their library or
9
9
  * (b) a forced overwrite that would clobber their hand edits.
10
10
  *
11
11
  * This module runs on every agent start alongside `seedDefaultVps` and does
12
12
  * two minimal, additive things:
13
13
  *
14
- * 1. **Top-up missing default VPs**. If a vpId from `DEFAULT_VPS` is not
15
- * on disk AND the user has not explicitly deleted it before (tracked
16
- * via `<libDir>/.seeded-versions.json`), `createVp()` it.
14
+ * 1. **Top-up missing stock VPs**. If a vpId from `DEFAULT_VPS` is not
15
+ * on disk, `createVp()` it. This keeps product-owned defaults such as
16
+ * Omni and the expanded role roster visible in group/member pickers.
17
17
  *
18
18
  * 2. **Backfill the `area` frontmatter line** on existing seeded VPs whose
19
19
  * role.md predates the area field. The body is left BYTE-IDENTICAL —
@@ -24,9 +24,9 @@
24
24
  * Hard rules:
25
25
  * - **Never** overwrite a VP that is on disk. The user might have edited
26
26
  * persona/role/traits; that is their truth, not ours.
27
- * - **Never** recreate a VP the user has deleted. The seed-versions file
28
- * remembers "we have seeded this before" if it's gone now, the user
29
- * wants it gone.
27
+ * - **Keep stock defaults available.** If a shipped stock VP is missing,
28
+ * recreate it, but never overwrite an on-disk VP. The group/member picker
29
+ * depends on these product-owned defaults being present.
30
30
  * - Best-effort: any failure is logged, never thrown.
31
31
  *
32
32
  * Pre-ledger deletion caveat: on the very first top-up against an existing
@@ -34,8 +34,8 @@
34
34
  * deleted VP X before the expansion landed" from "X was never seeded." The
35
35
  * bootstrap records only on-disk ids as `legacy`; an id the user had deleted
36
36
  * BEFORE this code shipped looks identical to a brand-new default and will
37
- * be recreated once. After that single bootstrap event the ledger is
38
- * authoritative any subsequent delete is permanent.
37
+ * be recreated once. Stock defaults remain authoritative product entries and
38
+ * may be recreated later if missing; existing files are still never overwritten.
39
39
  *
40
40
  * Sidecar file: `<libDir>/.seeded-versions.json`
41
41
  *
@@ -56,6 +56,7 @@ import { join } from 'path';
56
56
  import { createVp, VpCrudError } from './vp-crud.js';
57
57
  import { DEFAULT_VP_LIB_DIR, personaHash } from './vp-store.js';
58
58
  import { DEFAULT_VPS } from './seed-defaults.js';
59
+ import { STOCK_VP_IDS } from './stock-ids.js';
59
60
 
60
61
  const SEEDED_VERSIONS_FILE = '.seeded-versions.json';
61
62
  const SEEDED_VERSIONS_VERSION = 1;
@@ -319,13 +320,17 @@ export function topUpDefaultVps(libDir = DEFAULT_VP_LIB_DIR) {
319
320
  continue;
320
321
  }
321
322
 
322
- if (inLedger) {
323
- // We seeded this before, user has since deleted it — respect that.
323
+ if (inLedger && !STOCK_VP_IDS.has(vpId)) {
324
+ // We seeded this custom/default VP before, user has since deleted it — respect that.
325
+ // Stock/default personas are product-owned roster entries and must remain
326
+ // available in group creation/member pickers after migrations. Recreate
327
+ // them below without overwriting anything that exists on disk.
324
328
  respectedDeletes.push(vpId);
325
329
  continue;
326
330
  }
327
331
 
328
- // Missing on disk and never seeded — create it.
332
+ // Missing on disk and never seeded — or a missing stock VP that must remain available.
333
+
329
334
  try {
330
335
  createVp(vp, { libDir });
331
336
  versions.seeded[vpId] = personaHash(vp.persona);
@@ -1416,12 +1416,12 @@ export function handleUnifyUpdateGroup(msg) {
1416
1416
  }
1417
1417
 
1418
1418
  /**
1419
- * Update per-group config overrides (v1: `model`). Cache invalidation:
1419
+ * Persist the model selected in the group conversation header. Cache invalidation:
1420
1420
  * drop every cached Engine whose key starts with `${groupId}::` so the
1421
1421
  * next turn picks up the new model. The group meta itself is untouched.
1422
1422
  *
1423
1423
  * Payload: { groupId, requestId, config: { model?: string|null } }
1424
- * - `model: ''` or `null` clears the override (group falls back to user default).
1424
+ * - `model: ''` or `null` clears the selected group model (falls back to user default).
1425
1425
  */
1426
1426
  export function handleUnifyUpdateGroupConfig(msg) {
1427
1427
  const requestId = msg && msg.requestId;
@@ -1651,7 +1651,7 @@ function maybeTransitionVpStatus(hctx, state) {
1651
1651
  * todos, debug cards, and persistence all share the same boundary.
1652
1652
  *
1653
1653
  * @param {object} event — engine event (text_delta / tool_call / …)
1654
- * @param {{assistantTextParts:string[], toolCallsAccum:Array, toolResultsAccum:Array, resetQueryTimer:Function, groupId?:string, vpId?:string, turnId?:string}} hctx
1654
+ * @param {{assistantTextParts:string[], toolCallsAccum:Array, toolResultsAccum:Array, thinkingBlocksAccum?:Array, resetQueryTimer:Function, groupId?:string, vpId?:string, turnId?:string}} hctx
1655
1655
  */
1656
1656
  function handleEngineEvent(event, hctx) {
1657
1657
  hctx.resetQueryTimer();
@@ -1679,6 +1679,30 @@ function handleEngineEvent(event, hctx) {
1679
1679
  sendUnifyEvent({ type: 'thinking_delta', text: event.text }, envelope);
1680
1680
  break;
1681
1681
 
1682
+ case 'thinking_block_end':
1683
+ // task-327d: capture the assembled thinking block (with server-
1684
+ // signed signature) so the group history we hand to subsequent
1685
+ // turns / VPs includes it. Without this echo Anthropic 400s the
1686
+ // next request with "content[].thinking in the thinking mode must
1687
+ // be passed back to the API". The signature stays server-side
1688
+ // only — wire serializers (stripMetaForWire / sendUnifyOutput)
1689
+ // never reference thinkingBlocks, so it cannot leak to the UI.
1690
+ if (hctx.thinkingBlocksAccum && event.signature) {
1691
+ if (event.redacted) {
1692
+ hctx.thinkingBlocksAccum.push({
1693
+ redacted: true,
1694
+ data: event.data,
1695
+ signature: event.signature,
1696
+ });
1697
+ } else {
1698
+ hctx.thinkingBlocksAccum.push({
1699
+ thinking: event.thinking,
1700
+ signature: event.signature,
1701
+ });
1702
+ }
1703
+ }
1704
+ break;
1705
+
1682
1706
  case 'tool_call':
1683
1707
  // Capture tool_call for the assistant message's toolCalls array so
1684
1708
  // the next turn's history pairs `tool_calls` with `role:'tool'`
@@ -2534,6 +2558,7 @@ async function runVpTurn({ prompt, promptParts = null, groupId, vpId, threadId =
2534
2558
  const assistantTextParts = [];
2535
2559
  const toolCallsAccum = [];
2536
2560
  const toolResultsAccum = [];
2561
+ const thinkingBlocksAccum = []; // task-327d: round-trip to next turn
2537
2562
  const appendedUserPrompts = [];
2538
2563
  let vpEngine = null;
2539
2564
 
@@ -2559,6 +2584,7 @@ async function runVpTurn({ prompt, promptParts = null, groupId, vpId, threadId =
2559
2584
  assistantTextParts,
2560
2585
  toolCallsAccum,
2561
2586
  toolResultsAccum,
2587
+ thinkingBlocksAccum,
2562
2588
  resetQueryTimer,
2563
2589
  groupId,
2564
2590
  vpId,
@@ -2599,7 +2625,7 @@ async function runVpTurn({ prompt, promptParts = null, groupId, vpId, threadId =
2599
2625
  }
2600
2626
 
2601
2627
  // Turn completed — atomically append this VP's output to shared history.
2602
- appendTurnToGroupHistory(groupId, threadId, [prompt, ...appendedUserPrompts], assistantTextParts, toolCallsAccum, toolResultsAccum);
2628
+ appendTurnToGroupHistory(groupId, threadId, [prompt, ...appendedUserPrompts], assistantTextParts, toolCallsAccum, toolResultsAccum, thinkingBlocksAccum);
2603
2629
 
2604
2630
  sendUnifyOutput({
2605
2631
  type: 'assistant',
@@ -2705,7 +2731,7 @@ async function runVpTurn({ prompt, promptParts = null, groupId, vpId, threadId =
2705
2731
  * a session, this in-memory tape carries the un-collapsed form — which
2706
2732
  * is fine because each VP turn's `engine.query` re-collapses on the fly.
2707
2733
  */
2708
- function appendTurnToGroupHistory(groupId, threadId, prompts, assistantTextParts, toolCallsAccum, toolResultsAccum) {
2734
+ function appendTurnToGroupHistory(groupId, threadId, prompts, assistantTextParts, toolCallsAccum, toolResultsAccum, thinkingBlocksAccum) {
2709
2735
  if (!groupId) return;
2710
2736
  const history = getOrCreateGroupHistory(groupId);
2711
2737
  const promptList = Array.isArray(prompts) ? prompts : [prompts];
@@ -2725,6 +2751,18 @@ function appendTurnToGroupHistory(groupId, threadId, prompts, assistantTextParts
2725
2751
  input: tc.input,
2726
2752
  }));
2727
2753
  }
2754
+ // task-327d: carry thinking blocks across turns. Anthropic protocol
2755
+ // requires us to echo them back on the next request or the API
2756
+ // returns "content[].thinking in the thinking mode must be passed
2757
+ // back to the API". The signature is server-private — it stays in
2758
+ // this in-memory history and in agent-side persistence only.
2759
+ if (Array.isArray(thinkingBlocksAccum) && thinkingBlocksAccum.length > 0) {
2760
+ assistantMsg.thinkingBlocks = thinkingBlocksAccum.map(tb => (
2761
+ tb.redacted
2762
+ ? { redacted: true, data: tb.data, signature: tb.signature }
2763
+ : { thinking: tb.thinking, signature: tb.signature }
2764
+ ));
2765
+ }
2728
2766
  history.push(assistantMsg);
2729
2767
 
2730
2768
  for (const tr of toolResultsAccum) {