@psiclawops/hypermem 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. package/ARCHITECTURE.md +12 -3
  2. package/README.md +30 -6
  3. package/bin/hypermem-status.mjs +166 -0
  4. package/dist/background-indexer.d.ts +132 -0
  5. package/dist/background-indexer.d.ts.map +1 -0
  6. package/dist/background-indexer.js +1044 -0
  7. package/dist/cache.d.ts +110 -0
  8. package/dist/cache.d.ts.map +1 -0
  9. package/dist/cache.js +495 -0
  10. package/dist/compaction-fence.d.ts +89 -0
  11. package/dist/compaction-fence.d.ts.map +1 -0
  12. package/dist/compaction-fence.js +153 -0
  13. package/dist/compositor.d.ts +226 -0
  14. package/dist/compositor.d.ts.map +1 -0
  15. package/dist/compositor.js +2558 -0
  16. package/dist/content-type-classifier.d.ts +41 -0
  17. package/dist/content-type-classifier.d.ts.map +1 -0
  18. package/dist/content-type-classifier.js +181 -0
  19. package/dist/cross-agent.d.ts +62 -0
  20. package/dist/cross-agent.d.ts.map +1 -0
  21. package/dist/cross-agent.js +259 -0
  22. package/dist/db.d.ts +131 -0
  23. package/dist/db.d.ts.map +1 -0
  24. package/dist/db.js +402 -0
  25. package/dist/desired-state-store.d.ts +100 -0
  26. package/dist/desired-state-store.d.ts.map +1 -0
  27. package/dist/desired-state-store.js +222 -0
  28. package/dist/doc-chunk-store.d.ts +140 -0
  29. package/dist/doc-chunk-store.d.ts.map +1 -0
  30. package/dist/doc-chunk-store.js +391 -0
  31. package/dist/doc-chunker.d.ts +99 -0
  32. package/dist/doc-chunker.d.ts.map +1 -0
  33. package/dist/doc-chunker.js +324 -0
  34. package/dist/dreaming-promoter.d.ts +86 -0
  35. package/dist/dreaming-promoter.d.ts.map +1 -0
  36. package/dist/dreaming-promoter.js +381 -0
  37. package/dist/episode-store.d.ts +49 -0
  38. package/dist/episode-store.d.ts.map +1 -0
  39. package/dist/episode-store.js +135 -0
  40. package/dist/fact-store.d.ts +75 -0
  41. package/dist/fact-store.d.ts.map +1 -0
  42. package/dist/fact-store.js +236 -0
  43. package/dist/fleet-store.d.ts +144 -0
  44. package/dist/fleet-store.d.ts.map +1 -0
  45. package/dist/fleet-store.js +276 -0
  46. package/dist/fos-mod.d.ts +178 -0
  47. package/dist/fos-mod.d.ts.map +1 -0
  48. package/dist/fos-mod.js +416 -0
  49. package/dist/hybrid-retrieval.d.ts +64 -0
  50. package/dist/hybrid-retrieval.d.ts.map +1 -0
  51. package/dist/hybrid-retrieval.js +344 -0
  52. package/dist/image-eviction.d.ts +49 -0
  53. package/dist/image-eviction.d.ts.map +1 -0
  54. package/dist/image-eviction.js +251 -0
  55. package/dist/index.d.ts +650 -0
  56. package/dist/index.d.ts.map +1 -0
  57. package/dist/index.js +1072 -0
  58. package/dist/keystone-scorer.d.ts +51 -0
  59. package/dist/keystone-scorer.d.ts.map +1 -0
  60. package/dist/keystone-scorer.js +52 -0
  61. package/dist/knowledge-graph.d.ts +110 -0
  62. package/dist/knowledge-graph.d.ts.map +1 -0
  63. package/dist/knowledge-graph.js +305 -0
  64. package/dist/knowledge-lint.d.ts +29 -0
  65. package/dist/knowledge-lint.d.ts.map +1 -0
  66. package/dist/knowledge-lint.js +116 -0
  67. package/dist/knowledge-store.d.ts +72 -0
  68. package/dist/knowledge-store.d.ts.map +1 -0
  69. package/dist/knowledge-store.js +247 -0
  70. package/dist/library-schema.d.ts +22 -0
  71. package/dist/library-schema.d.ts.map +1 -0
  72. package/dist/library-schema.js +1038 -0
  73. package/dist/message-store.d.ts +89 -0
  74. package/dist/message-store.d.ts.map +1 -0
  75. package/dist/message-store.js +323 -0
  76. package/dist/metrics-dashboard.d.ts +114 -0
  77. package/dist/metrics-dashboard.d.ts.map +1 -0
  78. package/dist/metrics-dashboard.js +260 -0
  79. package/dist/obsidian-exporter.d.ts +57 -0
  80. package/dist/obsidian-exporter.d.ts.map +1 -0
  81. package/dist/obsidian-exporter.js +274 -0
  82. package/dist/obsidian-watcher.d.ts +147 -0
  83. package/dist/obsidian-watcher.d.ts.map +1 -0
  84. package/dist/obsidian-watcher.js +403 -0
  85. package/dist/open-domain.d.ts +46 -0
  86. package/dist/open-domain.d.ts.map +1 -0
  87. package/dist/open-domain.js +125 -0
  88. package/dist/preference-store.d.ts +54 -0
  89. package/dist/preference-store.d.ts.map +1 -0
  90. package/dist/preference-store.js +109 -0
  91. package/dist/preservation-gate.d.ts +82 -0
  92. package/dist/preservation-gate.d.ts.map +1 -0
  93. package/dist/preservation-gate.js +150 -0
  94. package/dist/proactive-pass.d.ts +63 -0
  95. package/dist/proactive-pass.d.ts.map +1 -0
  96. package/dist/proactive-pass.js +239 -0
  97. package/dist/profiles.d.ts +44 -0
  98. package/dist/profiles.d.ts.map +1 -0
  99. package/dist/profiles.js +227 -0
  100. package/dist/provider-translator.d.ts +50 -0
  101. package/dist/provider-translator.d.ts.map +1 -0
  102. package/dist/provider-translator.js +403 -0
  103. package/dist/rate-limiter.d.ts +76 -0
  104. package/dist/rate-limiter.d.ts.map +1 -0
  105. package/dist/rate-limiter.js +179 -0
  106. package/dist/repair-tool-pairs.d.ts +38 -0
  107. package/dist/repair-tool-pairs.d.ts.map +1 -0
  108. package/dist/repair-tool-pairs.js +138 -0
  109. package/dist/retrieval-policy.d.ts +51 -0
  110. package/dist/retrieval-policy.d.ts.map +1 -0
  111. package/dist/retrieval-policy.js +77 -0
  112. package/dist/schema.d.ts +15 -0
  113. package/dist/schema.d.ts.map +1 -0
  114. package/dist/schema.js +229 -0
  115. package/dist/secret-scanner.d.ts +51 -0
  116. package/dist/secret-scanner.d.ts.map +1 -0
  117. package/dist/secret-scanner.js +248 -0
  118. package/dist/seed.d.ts +108 -0
  119. package/dist/seed.d.ts.map +1 -0
  120. package/dist/seed.js +177 -0
  121. package/dist/session-flusher.d.ts +53 -0
  122. package/dist/session-flusher.d.ts.map +1 -0
  123. package/dist/session-flusher.js +69 -0
  124. package/dist/session-topic-map.d.ts +41 -0
  125. package/dist/session-topic-map.d.ts.map +1 -0
  126. package/dist/session-topic-map.js +77 -0
  127. package/dist/spawn-context.d.ts +54 -0
  128. package/dist/spawn-context.d.ts.map +1 -0
  129. package/dist/spawn-context.js +159 -0
  130. package/dist/system-store.d.ts +73 -0
  131. package/dist/system-store.d.ts.map +1 -0
  132. package/dist/system-store.js +182 -0
  133. package/dist/temporal-store.d.ts +80 -0
  134. package/dist/temporal-store.d.ts.map +1 -0
  135. package/dist/temporal-store.js +149 -0
  136. package/dist/topic-detector.d.ts +35 -0
  137. package/dist/topic-detector.d.ts.map +1 -0
  138. package/dist/topic-detector.js +249 -0
  139. package/dist/topic-store.d.ts +45 -0
  140. package/dist/topic-store.d.ts.map +1 -0
  141. package/dist/topic-store.js +136 -0
  142. package/dist/topic-synthesizer.d.ts +51 -0
  143. package/dist/topic-synthesizer.d.ts.map +1 -0
  144. package/dist/topic-synthesizer.js +315 -0
  145. package/dist/trigger-registry.d.ts +63 -0
  146. package/dist/trigger-registry.d.ts.map +1 -0
  147. package/dist/trigger-registry.js +163 -0
  148. package/dist/types.d.ts +537 -0
  149. package/dist/types.d.ts.map +1 -0
  150. package/dist/types.js +9 -0
  151. package/dist/vector-store.d.ts +170 -0
  152. package/dist/vector-store.d.ts.map +1 -0
  153. package/dist/vector-store.js +677 -0
  154. package/dist/version.d.ts +34 -0
  155. package/dist/version.d.ts.map +1 -0
  156. package/dist/version.js +34 -0
  157. package/dist/wiki-page-emitter.d.ts +65 -0
  158. package/dist/wiki-page-emitter.d.ts.map +1 -0
  159. package/dist/wiki-page-emitter.js +258 -0
  160. package/dist/work-store.d.ts +112 -0
  161. package/dist/work-store.d.ts.map +1 -0
  162. package/dist/work-store.js +273 -0
  163. package/package.json +4 -1
@@ -0,0 +1,2558 @@
1
+ /**
2
+ * hypermem Compositor
3
+ *
4
+ * Assembles context for LLM calls by orchestrating all four memory layers:
5
+ * L1 Redis — hot session working memory (system, identity, recent msgs)
6
+ * L2 Messages — conversation history from messages.db
7
+ * L3 Vectors — semantic search across all indexed content
8
+ * L4 Library — structured knowledge (facts, preferences, knowledge, episodes)
9
+ *
10
+ * Token-budgeted: never exceeds the budget, prioritizes by configured order.
11
+ * Provider-neutral internally, translates at the output boundary.
12
+ */
13
+ import { filterByScope } from './retrieval-policy.js';
14
+ import { DEFAULT_TRIGGERS, matchTriggers, logRegistryStartup, } from './trigger-registry.js';
15
+ import { MessageStore } from './message-store.js';
16
+ import { SessionTopicMap } from './session-topic-map.js';
17
+ import { toProviderFormat } from './provider-translator.js';
18
+ import { DocChunkStore } from './doc-chunk-store.js';
19
+ import { hybridSearch } from './hybrid-retrieval.js';
20
+ import { ensureCompactionFenceSchema, updateCompactionFence } from './compaction-fence.js';
21
+ import { rankKeystones, scoreKeystone } from './keystone-scorer.js';
22
+ import { buildOrgRegistryFromDb, defaultOrgRegistry } from './cross-agent.js';
23
+ import { getActiveFOS, matchMOD, renderFOS, renderMOD, renderLightFOS, resolveOutputTier, buildActionVerificationSummary } from './fos-mod.js';
24
+ import { KnowledgeStore } from './knowledge-store.js';
25
+ import { TemporalStore, hasTemporalSignals } from './temporal-store.js';
26
+ import { isOpenDomainQuery, searchOpenDomain } from './open-domain.js';
27
+ /**
28
+ * Model context window sizes by provider/model string (or partial match).
29
+ * Used as fallback when tokenBudget is not passed by the runtime.
30
+ * Order matters: first match wins. Partial substring match on the model string.
31
+ */
32
+ const MODEL_CONTEXT_WINDOWS = [
33
+ // Anthropic
34
+ { pattern: 'claude-opus-4', tokens: 200_000 },
35
+ { pattern: 'claude-sonnet-4', tokens: 200_000 },
36
+ { pattern: 'claude-3-5', tokens: 200_000 },
37
+ { pattern: 'claude-3-7', tokens: 200_000 },
38
+ { pattern: 'claude', tokens: 200_000 },
39
+ // OpenAI
40
+ { pattern: 'gpt-5', tokens: 128_000 },
41
+ { pattern: 'gpt-4o', tokens: 128_000 },
42
+ { pattern: 'gpt-4', tokens: 128_000 },
43
+ { pattern: 'o3', tokens: 128_000 },
44
+ { pattern: 'o4', tokens: 128_000 },
45
+ // Google
46
+ { pattern: 'gemini-3.1-pro', tokens: 1_000_000 },
47
+ { pattern: 'gemini-3.1-flash', tokens: 1_000_000 },
48
+ { pattern: 'gemini-2.5-pro', tokens: 1_000_000 },
49
+ { pattern: 'gemini-2', tokens: 1_000_000 },
50
+ { pattern: 'gemini', tokens: 1_000_000 },
51
+ // Zhipu / GLM
52
+ { pattern: 'glm-5', tokens: 131_072 },
53
+ { pattern: 'glm-4', tokens: 131_072 },
54
+ // Alibaba / Qwen
55
+ { pattern: 'qwen3', tokens: 262_144 },
56
+ { pattern: 'qwen', tokens: 131_072 },
57
+ // DeepSeek
58
+ { pattern: 'deepseek-v3', tokens: 131_072 },
59
+ { pattern: 'deepseek', tokens: 131_072 },
60
+ ];
61
+ /**
62
+ * Resolve effective token budget from model string.
63
+ * Returns the context window for the model, minus the configured reserve fraction
64
+ * for output tokens and hypermem operational overhead.
65
+ * Default reserve: 25% (leaves 75% for input context).
66
+ * Falls back to defaultTokenBudget if no model match.
67
+ */
68
+ function resolveModelBudget(model, defaultBudget, reserve = 0.15) {
69
+ if (!model)
70
+ return defaultBudget;
71
+ const normalized = model.toLowerCase();
72
+ for (const entry of MODEL_CONTEXT_WINDOWS) {
73
+ if (normalized.includes(entry.pattern)) {
74
+ return Math.floor(entry.tokens * (1 - reserve));
75
+ }
76
+ }
77
+ return defaultBudget;
78
+ }
79
+ /**
80
+ * Resolve the raw context window size for a model (no reserve applied).
81
+ * Used as totalWindow for dynamic reserve calculation.
82
+ * Falls back to defaultBudget / 0.85 (reverse of 15% reserve default) if no match.
83
+ */
84
+ function resolveModelWindow(model, defaultBudget) {
85
+ if (!model)
86
+ return Math.floor(defaultBudget / 0.85);
87
+ const normalized = model.toLowerCase();
88
+ for (const entry of MODEL_CONTEXT_WINDOWS) {
89
+ if (normalized.includes(entry.pattern)) {
90
+ return entry.tokens;
91
+ }
92
+ }
93
+ return Math.floor(defaultBudget / 0.85);
94
+ }
95
+ /**
96
+ * Compute dynamic context window reserve based on recent turn cost.
97
+ *
98
+ * Reserve = clamp(avg_turn_cost × horizon / totalWindow, base, max)
99
+ *
100
+ * Returns the reserve fraction and diagnostics. When dynamic reserve
101
+ * is clamped at max, sessionPressureHigh is set true so callers can
102
+ * emit a warning or trigger checkpointing.
103
+ */
104
+ function computeDynamicReserve(recentMessages, totalWindow, config) {
105
+ const base = config.contextWindowReserve ?? 0.15;
106
+ const horizon = config.dynamicReserveTurnHorizon ?? 5;
107
+ const max = config.dynamicReserveMax ?? 0.50;
108
+ const enabled = config.dynamicReserveEnabled ?? true;
109
+ if (!enabled || recentMessages.length === 0 || totalWindow <= 0) {
110
+ return { reserve: base, avgTurnCost: 0, dynamic: false, pressureHigh: false };
111
+ }
112
+ // Sample the last 20 user+assistant messages for turn cost estimation.
113
+ // Tool messages are excluded — they're already compressed by the gradient
114
+ // and don't represent per-turn user intent cost.
115
+ const sample = recentMessages
116
+ .filter(m => m.role === 'user' || m.role === 'assistant')
117
+ .slice(-20);
118
+ if (sample.length === 0) {
119
+ return { reserve: base, avgTurnCost: 0, dynamic: false, pressureHigh: false };
120
+ }
121
+ const totalCost = sample.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
122
+ const avgTurnCost = Math.floor(totalCost / sample.length);
123
+ const safetyTokens = avgTurnCost * horizon;
124
+ const dynamicFrac = safetyTokens / totalWindow;
125
+ if (dynamicFrac <= base) {
126
+ return { reserve: base, avgTurnCost, dynamic: false, pressureHigh: false };
127
+ }
128
+ if (dynamicFrac >= max) {
129
+ return { reserve: max, avgTurnCost, dynamic: true, pressureHigh: true };
130
+ }
131
+ return { reserve: dynamicFrac, avgTurnCost, dynamic: true, pressureHigh: false };
132
+ }
133
+ const DEFAULT_CONFIG = {
134
+ defaultTokenBudget: 90000,
135
+ maxHistoryMessages: 250,
136
+ maxFacts: 28,
137
+ maxCrossSessionContext: 6000,
138
+ maxRecentToolPairs: 3,
139
+ maxProseToolPairs: 10,
140
+ warmHistoryBudgetFraction: 0.4,
141
+ keystoneHistoryFraction: 0.2,
142
+ keystoneMaxMessages: 15,
143
+ keystoneMinSignificance: 0.5,
144
+ contextWindowReserve: 0.15,
145
+ dynamicReserveTurnHorizon: 5,
146
+ dynamicReserveMax: 0.50,
147
+ dynamicReserveEnabled: true,
148
+ };
149
+ // Tool gradient thresholds — controls how aggressively tool results are
150
+ // truncated as they age out of the recent window.
151
+ // Recent-turn policy (2026-04-07): protect turn 0 + turn 1, budget against a
152
+ // conservative 120k planning window, and only head+tail trim large (>40k)
153
+ // recent results when projected occupancy crosses the orange zone.
154
+ const TOOL_GRADIENT_T0_TURNS = 2; // current + 2 prior completed turns: full fidelity (matches OpenClaw keepLastAssistants: 3)
155
+ const TOOL_GRADIENT_T1_TURNS = 4; // turns 2-4: moderate truncation (was 3)
156
+ const TOOL_GRADIENT_T2_TURNS = 7; // turns 4-7: aggressive truncation (was 12)
157
+ // T3 = turns 8+: one-liner stub
158
+ const TOOL_GRADIENT_T1_CHAR_CAP = 6_000; // per-message cap (was 8k)
159
+ const TOOL_GRADIENT_T1_TURN_CAP = 12_000; // per-turn-pair cap (was 16k)
160
+ const TOOL_GRADIENT_T2_CHAR_CAP = 800; // per-message cap (was 1k)
161
+ const TOOL_GRADIENT_T2_TURN_CAP = 3_000; // per-turn-pair cap (was 4k)
162
+ const TOOL_GRADIENT_T3_CHAR_CAP = 150; // oldest tier: stub only (was 200)
163
+ const TOOL_GRADIENT_T3_TURN_CAP = 800; // per-turn-pair cap (was 1k)
164
+ const TOOL_GRADIENT_MAX_TAIL_CHARS = 3_000; // tail preserve budget for T1+
165
+ const TOOL_GRADIENT_MIDDLE_MARKER = '\n[... tool output truncated ...]\n';
166
+ const TOOL_PLANNING_BASELINE_WINDOW = 120_000;
167
+ const TOOL_PLANNING_MIN_RESERVE_TOKENS = 24_000;
168
+ const TOOL_PRESSURE_YELLOW = 0.75;
169
+ const TOOL_PRESSURE_ORANGE = 0.80;
170
+ const TOOL_PRESSURE_RED = 0.85;
171
+ const TOOL_RECENT_OVERSIZE_CHAR_THRESHOLD = 40_000;
172
+ const TOOL_RECENT_OVERSIZE_TARGET_CHARS = 40_000;
173
+ const TOOL_RECENT_OVERSIZE_MAX_TAIL_CHARS = 12_000;
174
+ const TOOL_TRIM_NOTE_PREFIX = '[hypermem_tool_result_trim';
175
+ // ─── Trigger Registry ────────────────────────────────────────────
176
+ // Moved to src/trigger-registry.ts (W5).
177
+ // CollectionTrigger, DEFAULT_TRIGGERS, matchTriggers imported above.
178
+ // Re-exported below for backward compatibility with existing consumers.
179
+ export { DEFAULT_TRIGGERS, matchTriggers } from './trigger-registry.js';
180
+ // ─── Test-only exports (not part of public API) ───────────────────────────
181
+ // These are exported solely for unit testing. Do not use in production code.
182
+ export { getTurnAge, applyToolGradient, appendToolSummary, truncateWithHeadTail, applyTierPayloadCap, evictLargeToolResults };
183
+ function clusterNeutralMessages(messages) {
184
+ const clusters = [];
185
+ for (let i = 0; i < messages.length; i++) {
186
+ const current = messages[i];
187
+ const cluster = [current];
188
+ if (current.toolCalls && current.toolCalls.length > 0) {
189
+ const callIds = new Set(current.toolCalls.map(tc => tc.id).filter(Boolean));
190
+ let j = i + 1;
191
+ while (j < messages.length) {
192
+ const candidate = messages[j];
193
+ if (!candidate.toolResults || candidate.toolResults.length === 0)
194
+ break;
195
+ const resultIds = candidate.toolResults.map(tr => tr.callId).filter(Boolean);
196
+ if (callIds.size > 0 && resultIds.length > 0 && !resultIds.some(id => callIds.has(id)))
197
+ break;
198
+ cluster.push(candidate);
199
+ j++;
200
+ }
201
+ i = j - 1;
202
+ }
203
+ else if (current.toolResults && current.toolResults.length > 0) {
204
+ let j = i + 1;
205
+ while (j < messages.length) {
206
+ const candidate = messages[j];
207
+ if (!candidate.toolResults || candidate.toolResults.length === 0 || (candidate.toolCalls && candidate.toolCalls.length > 0))
208
+ break;
209
+ cluster.push(candidate);
210
+ j++;
211
+ }
212
+ i = j - 1;
213
+ }
214
+ clusters.push({
215
+ messages: cluster,
216
+ tokenCost: cluster.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0),
217
+ });
218
+ }
219
+ return clusters;
220
+ }
221
+ /**
222
+ * Public reshape helper: apply tool gradient then trim to fit within a token budget.
223
+ *
224
+ * Used by the plugin's budget-downshift pass to pre-process a Redis history window
225
+ * after a model switch to a smaller context window, before the full compose pipeline
226
+ * runs. Trims from oldest to newest until estimated token cost fits within
227
+ * tokenBudget * 0.65 (using the standard char/4 heuristic).
228
+ *
229
+ * @param messages NeutralMessage array from the Redis hot window
230
+ * @param tokenBudget Effective token budget for this session
231
+ * @returns Trimmed message array ready for setWindow()
232
+ */
233
+ export function applyToolGradientToWindow(messages, tokenBudget, totalWindowTokens) {
234
+ const reshaped = applyToolGradient(messages, { totalWindowTokens });
235
+ const targetTokens = Math.floor(tokenBudget * 0.65);
236
+ const clusters = clusterNeutralMessages(reshaped);
237
+ let totalTokens = clusters.reduce((sum, cluster) => sum + cluster.tokenCost, 0);
238
+ let start = 0;
239
+ // walk oldest to newest, drop until we fit
240
+ while (totalTokens > targetTokens && start < clusters.length - 1) {
241
+ totalTokens -= clusters[start].tokenCost;
242
+ start++;
243
+ }
244
+ return clusters.slice(start).flatMap(cluster => cluster.messages);
245
+ }
246
+ /**
247
+ * Canonical history must remain lossless for tool turns.
248
+ *
249
+ * If a window contains any structured tool calls or tool results, the caller
250
+ * should treat applyToolGradientToWindow() as a view-only transform for the
251
+ * current compose pass and avoid writing the reshaped messages back into the
252
+ * canonical cache/history store.
253
+ */
254
+ export function canPersistReshapedHistory(messages) {
255
+ return !messages.some(msg => hasToolContent(msg));
256
+ }
257
+ /**
258
+ * Rough token estimation: ~4 chars per token for English text.
259
+ * This is a heuristic — actual tokenization varies by model.
260
+ * Good enough for budget management; exact count comes from the provider.
261
+ */
262
+ function estimateTokens(text) {
263
+ if (!text)
264
+ return 0;
265
+ return Math.ceil(text.length / 4);
266
+ }
267
+ /**
268
+ * Dense token estimation for tool content (JSON, code, base64).
269
+ * Tool payloads are typically 2x denser than English prose.
270
+ */
271
+ function estimateToolTokens(text) {
272
+ return Math.ceil(text.length / 2);
273
+ }
274
+ function estimateMessageTokens(msg) {
275
+ let tokens = estimateTokens(msg.textContent);
276
+ if (msg.toolCalls) {
277
+ tokens += estimateToolTokens(JSON.stringify(msg.toolCalls)); // dense: /2 not /4
278
+ }
279
+ if (msg.toolResults) {
280
+ tokens += estimateToolTokens(JSON.stringify(msg.toolResults)); // dense: /2 not /4
281
+ }
282
+ // Overhead per message (role, formatting)
283
+ tokens += 4;
284
+ return tokens;
285
+ }
286
+ function parseToolArgs(argumentsJson) {
287
+ try {
288
+ return JSON.parse(argumentsJson);
289
+ }
290
+ catch {
291
+ return {};
292
+ }
293
+ }
294
+ function toolLabelFromCall(name, args) {
295
+ switch (name) {
296
+ case 'read':
297
+ return `read ${(args.path ?? args.file_path ?? args.filePath ?? 'file')}`;
298
+ case 'write':
299
+ return `write ${(args.path ?? args.file ?? args.filePath ?? 'file')}`;
300
+ case 'edit':
301
+ return `edit ${(args.path ?? args.file ?? args.filePath ?? 'file')}`;
302
+ case 'exec':
303
+ return `exec ${String(args.command ?? '').slice(0, 80) || 'command'}`;
304
+ case 'web_search':
305
+ return `web_search ${String(args.query ?? '').slice(0, 80) || 'query'}`;
306
+ case 'web_fetch':
307
+ return `web_fetch ${String(args.url ?? '').slice(0, 80) || 'url'}`;
308
+ case 'sessions_send':
309
+ return `sessions_send ${String(args.sessionKey ?? args.label ?? '').slice(0, 80) || 'target'}`;
310
+ case 'memory_search':
311
+ return `memory_search ${String(args.query ?? '').slice(0, 80) || 'query'}`;
312
+ default:
313
+ return name;
314
+ }
315
+ }
316
+ /**
317
+ * Strip OpenClaw's external-content security wrapper from tool results before truncation.
318
+ * web_fetch results are wrapped in <<<BEGIN_EXTERNAL_UNTRUSTED_CONTENT ... >>> blocks.
319
+ * That preamble consumes the entire head budget in truncateWithHeadTail, leaving only
320
+ * the security notice + last sentence visible — the actual body becomes the middle marker.
321
+ * Strip the wrapper first so truncation operates on the real content.
322
+ */
323
+ function stripSecurityPreamble(content) {
324
+ // Match: <<<BEGIN_EXTERNAL_UNTRUSTED_CONTENT id="...">\n...\n<<<END_EXTERNAL_UNTRUSTED_CONTENT id="...">>>
325
+ // Strip opening tag line and closing tag line; keep the content between.
326
+ const stripped = content.replace(/^[\s\S]*?<<<BEGIN_EXTERNAL_UNTRUSTED_CONTENT[^\n]*>>>?\n?/, '').replace(/\n?<<<END_EXTERNAL_UNTRUSTED_CONTENT[^\n]*>>>?[\s\S]*$/, '');
327
+ // If stripping removed everything or nearly everything, return original.
328
+ return stripped.trim().length > 20 ? stripped.trim() : content;
329
+ }
330
+ // Minimum floor: if trimming would leave less than 30% of original content, return a
331
+ // stripped sentinel instead of a misleading fragment. A partial result that looks
332
+ // complete is worse than a clear signal that the result was dropped.
333
+ // Applied only in applyTierPayloadCap (pressure-driven trimming), not in structural
334
+ // truncation paths where head+tail is always semantically useful.
335
+ const TOOL_GRADIENT_MIN_USEFUL_FRACTION = 0.30;
336
+ function truncateWithHeadTail(content, maxChars, maxTailChars = TOOL_GRADIENT_MAX_TAIL_CHARS) {
337
+ if (content.length <= maxChars)
338
+ return content;
339
+ const tailBudget = Math.min(Math.floor(maxChars * 0.30), maxTailChars);
340
+ const headBudget = Math.max(0, maxChars - tailBudget - TOOL_GRADIENT_MIDDLE_MARKER.length);
341
+ return content.slice(0, headBudget) + TOOL_GRADIENT_MIDDLE_MARKER + content.slice(-tailBudget);
342
+ }
343
+ function truncateHead(content, maxChars) {
344
+ if (content.length <= maxChars)
345
+ return content;
346
+ const marker = '…';
347
+ const keep = Math.max(0, maxChars - marker.length);
348
+ return content.slice(0, keep) + marker;
349
+ }
350
+ function firstNonEmptyLine(content) {
351
+ const line = content.split('\n').find(l => l.trim().length > 0) ?? '';
352
+ return line.trim();
353
+ }
354
+ function normalizeInline(text) {
355
+ return text.replace(/\s+/g, ' ').trim();
356
+ }
357
+ function hostFromUrl(raw) {
358
+ try {
359
+ return new URL(raw).hostname;
360
+ }
361
+ catch {
362
+ return raw;
363
+ }
364
+ }
365
+ function extractTopHeading(content) {
366
+ const heading = content.split('\n').find(line => /^#{1,3}\s+/.test(line.trim()));
367
+ return heading ? heading.replace(/^#{1,3}\s+/, '').trim() : '';
368
+ }
369
+ function extractExitCode(content) {
370
+ const match = content.match(/(?:exit code|exit|code)\s*[:=]?\s*(\d+)/i);
371
+ return match ? match[1] : null;
372
+ }
373
+ function estimateSearchResultCount(content) {
374
+ const jsonMatch = content.match(/"results"\s*:\s*\[/);
375
+ if (jsonMatch) {
376
+ const titles = content.match(/"title"\s*:/g);
377
+ if (titles?.length)
378
+ return titles.length;
379
+ }
380
+ const resultLines = content.match(/\bSource:\b|\bsiteName\b|\btitle\b/gi);
381
+ return resultLines?.length ? Math.min(resultLines.length, 20) : null;
382
+ }
383
+ function summarizeOutcome(label, content, maxChars) {
384
+ const firstLine = firstNonEmptyLine(content);
385
+ const base = firstLine ? `${label} — ${firstLine}` : `${label} — ${content.length} chars`;
386
+ return truncateHead(base, maxChars);
387
+ }
388
+ function summarizeToolInteraction(name, args, content, maxChars, compact = false) {
389
+ const line = normalizeInline(firstNonEmptyLine(content));
390
+ switch (name) {
391
+ case 'read': {
392
+ const path = String(args.path ?? args.file_path ?? args.filePath ?? 'file');
393
+ const heading = extractTopHeading(content);
394
+ const detail = heading || line || `${content.length} chars`;
395
+ return truncateHead(`Read ${path} — ${detail}`, maxChars);
396
+ }
397
+ case 'exec': {
398
+ const cmd = String(args.command ?? 'command').slice(0, compact ? 40 : 80);
399
+ const exitCode = extractExitCode(content);
400
+ const status = exitCode ? `exit ${exitCode}` : (/(error|failed|timeout|timed out)/i.test(content) ? 'failed' : 'completed');
401
+ const detail = line && !/^exit\s+\d+$/i.test(line) ? `, ${line}` : '';
402
+ return truncateHead(`Ran ${cmd} — ${status}${detail}`, maxChars);
403
+ }
404
+ case 'web_search': {
405
+ const query = String(args.query ?? 'query').slice(0, compact ? 40 : 80);
406
+ const count = estimateSearchResultCount(content);
407
+ const heading = extractTopHeading(content);
408
+ const detail = heading || line;
409
+ const countText = count ? ` — ${count} results` : '';
410
+ const summary = compact
411
+ ? `Searched '${query}'${countText}`
412
+ : `Searched '${query}'${countText}${detail ? `, top: ${detail}` : ''}`;
413
+ return truncateHead(summary, maxChars);
414
+ }
415
+ case 'web_fetch': {
416
+ const url = String(args.url ?? 'url');
417
+ const host = hostFromUrl(url);
418
+ const heading = extractTopHeading(content);
419
+ const detail = heading || line || `${content.length} chars`;
420
+ return truncateHead(`Fetched ${host} — ${detail}`, maxChars);
421
+ }
422
+ case 'memory_search': {
423
+ const query = String(args.query ?? 'query').slice(0, compact ? 40 : 80);
424
+ const count = estimateSearchResultCount(content);
425
+ return truncateHead(`Searched memory for '${query}'${count ? ` — ${count} hits` : ''}${line ? `, top: ${line}` : ''}`, maxChars);
426
+ }
427
+ default: {
428
+ const label = toolLabelFromCall(name, args);
429
+ return compact
430
+ ? truncateHead(`${label} — ${line || `${content.length} chars`}`, maxChars)
431
+ : (() => {
432
+ const prefix = `[${label}] `;
433
+ const available = Math.max(40, maxChars - prefix.length);
434
+ return prefix + truncateWithHeadTail(content, available);
435
+ })();
436
+ }
437
+ }
438
+ }
439
+ function buildTier2Envelope(label, content, maxChars, name, args) {
440
+ if (name && args)
441
+ return summarizeToolInteraction(name, args, content, maxChars, false);
442
+ const prefix = `[${label}] `;
443
+ const available = Math.max(40, maxChars - prefix.length);
444
+ return prefix + truncateWithHeadTail(content, available);
445
+ }
446
+ function buildTier3Envelope(label, content, maxChars, name, args) {
447
+ if (name && args)
448
+ return `[${summarizeToolInteraction(name, args, content, maxChars - 2, true)}]`;
449
+ return `[${summarizeOutcome(label, content, maxChars - 2)}]`;
450
+ }
451
+ /**
452
+ * Extract a heuristic prose summary from a tool call/result pair.
453
+ * Used when tool payloads are removed but continuity should remain.
454
+ */
455
+ function extractToolProseSummary(msg, perResultCap, compact = false) {
456
+ const parts = [];
457
+ if (msg.toolCalls && msg.toolCalls.length > 0) {
458
+ for (const tc of msg.toolCalls) {
459
+ const args = parseToolArgs(tc.arguments);
460
+ const label = toolLabelFromCall(tc.name, args);
461
+ const resultContent = msg.toolResults?.find(r => r.callId === tc.id)?.content ?? '';
462
+ if (resultContent) {
463
+ parts.push(compact
464
+ ? buildTier3Envelope(label, resultContent, perResultCap, tc.name, args)
465
+ : buildTier2Envelope(label, resultContent, perResultCap, tc.name, args));
466
+ }
467
+ else {
468
+ parts.push(compact ? `[${truncateHead(label, perResultCap - 2)}]` : label);
469
+ }
470
+ }
471
+ }
472
+ else if (msg.toolResults && msg.toolResults.length > 0) {
473
+ for (const tr of msg.toolResults) {
474
+ const label = tr.name || 'tool_result';
475
+ const args = {};
476
+ parts.push(compact
477
+ ? buildTier3Envelope(label, tr.content ?? '', perResultCap, tr.name || 'tool_result', args)
478
+ : buildTier2Envelope(label, tr.content ?? '', perResultCap, tr.name || 'tool_result', args));
479
+ }
480
+ }
481
+ return truncateHead(parts.join('; '), Math.max(perResultCap, 120));
482
+ }
483
+ function appendToolSummary(textContent, summary) {
484
+ const existing = textContent ?? '';
485
+ if (!summary)
486
+ return existing;
487
+ return existing ? `${existing}\n[Tools: ${summary}]` : summary;
488
+ }
489
+ function getTurnAge(messages, index) {
490
+ let turnAge = 0;
491
+ for (let i = messages.length - 1; i > index; i--) {
492
+ const candidate = messages[i];
493
+ if (candidate?.role === 'user' && (!candidate.toolResults || candidate.toolResults.length === 0)) {
494
+ turnAge++;
495
+ }
496
+ }
497
+ return turnAge;
498
+ }
499
+ function hasToolContent(msg) {
500
+ return Boolean((msg.toolCalls && msg.toolCalls.length > 0) || (msg.toolResults && msg.toolResults.length > 0));
501
+ }
502
+ function resolveToolPlanningWindow(totalWindowTokens) {
503
+ const actualWindow = totalWindowTokens && totalWindowTokens > 0
504
+ ? totalWindowTokens
505
+ : TOOL_PLANNING_BASELINE_WINDOW;
506
+ return Math.min(actualWindow, TOOL_PLANNING_BASELINE_WINDOW);
507
+ }
508
+ function computeToolPressureState(messages, totalWindowTokens) {
509
+ const planningWindowTokens = resolveToolPlanningWindow(totalWindowTokens);
510
+ const reserveTokens = Math.max(TOOL_PLANNING_MIN_RESERVE_TOKENS, Math.floor(planningWindowTokens * 0.10));
511
+ const usedTokens = messages.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
512
+ const projectedTokens = usedTokens + reserveTokens;
513
+ const occupancy = planningWindowTokens > 0 ? projectedTokens / planningWindowTokens : 1;
514
+ let zone = 'green';
515
+ if (occupancy > TOOL_PRESSURE_RED)
516
+ zone = 'red';
517
+ else if (occupancy > TOOL_PRESSURE_ORANGE)
518
+ zone = 'orange';
519
+ else if (occupancy > TOOL_PRESSURE_YELLOW)
520
+ zone = 'yellow';
521
+ return {
522
+ planningWindowTokens,
523
+ reserveTokens,
524
+ projectedTokens,
525
+ occupancy,
526
+ zone,
527
+ };
528
+ }
529
+ function isStructuredTrimNote(content) {
530
+ return content.startsWith(TOOL_TRIM_NOTE_PREFIX);
531
+ }
532
+ function buildRecentTrimNote(originalChars, keptHeadChars, keptTailChars, pressure, resultId) {
533
+ const parts = [
534
+ TOOL_TRIM_NOTE_PREFIX,
535
+ 'partial_result=true',
536
+ 'reason=oversize_turn0_trim',
537
+ `original_chars=${originalChars}`,
538
+ `kept_head_chars=${keptHeadChars}`,
539
+ `kept_tail_chars=${keptTailChars}`,
540
+ `projected_occupancy_pct=${Math.round(pressure.occupancy * 100)}`,
541
+ `planning_window_tokens=${pressure.planningWindowTokens}`,
542
+ `reserve_tokens=${pressure.reserveTokens}`,
543
+ 'retry_recommended=true',
544
+ ];
545
+ if (resultId)
546
+ parts.push(`result_id=${resultId}`);
547
+ parts.push(']');
548
+ return parts.join(' ');
549
+ }
550
+ function countHeadTailChars(content) {
551
+ const markerIdx = content.indexOf(TOOL_GRADIENT_MIDDLE_MARKER);
552
+ if (markerIdx === -1) {
553
+ return { headChars: content.length, tailChars: 0 };
554
+ }
555
+ return {
556
+ headChars: markerIdx,
557
+ tailChars: content.length - markerIdx - TOOL_GRADIENT_MIDDLE_MARKER.length,
558
+ };
559
+ }
560
+ function trimRecentToolResult(content, pressure, resultId) {
561
+ if (isStructuredTrimNote(content))
562
+ return content;
563
+ const stripped = stripSecurityPreamble(content);
564
+ const baseOriginal = stripped.length > 0 ? stripped : content;
565
+ const noteSkeleton = buildRecentTrimNote(baseOriginal.length, 0, 0, pressure, resultId);
566
+ const availableChars = Math.max(2_000, TOOL_RECENT_OVERSIZE_TARGET_CHARS - noteSkeleton.length - 1);
567
+ const truncated = truncateWithHeadTail(baseOriginal, availableChars, TOOL_RECENT_OVERSIZE_MAX_TAIL_CHARS);
568
+ const { headChars, tailChars } = countHeadTailChars(truncated);
569
+ const note = buildRecentTrimNote(baseOriginal.length, headChars, tailChars, pressure, resultId);
570
+ return `${note}
571
+ ${truncated}`;
572
+ }
573
+ function protectRecentToolContent(msg, pressure) {
574
+ if (!msg.toolResults || msg.toolResults.length === 0)
575
+ return msg;
576
+ const shouldEmergencyTrim = pressure.zone === 'orange' || pressure.zone === 'red';
577
+ const toolResults = msg.toolResults.map(result => {
578
+ const content = result.content ?? '';
579
+ if (!content)
580
+ return result;
581
+ if (!shouldEmergencyTrim)
582
+ return result;
583
+ if (content.length <= TOOL_RECENT_OVERSIZE_CHAR_THRESHOLD)
584
+ return result;
585
+ return {
586
+ ...result,
587
+ content: trimRecentToolResult(content, pressure, result.callId || result.name || undefined),
588
+ };
589
+ });
590
+ return { ...msg, toolResults };
591
+ }
592
+ function applyTierPayloadCap(msg, perResultCap, perTurnCap, usedSoFar = 0, maxTailChars = TOOL_GRADIENT_MAX_TAIL_CHARS) {
593
+ const toolResults = msg.toolResults?.map(result => {
594
+ let content = result.content ?? '';
595
+ if (content.length > perResultCap) {
596
+ // Strip security preamble before truncation so it doesn't consume the head budget.
597
+ // web_fetch results wrapped in <<<EXTERNAL_UNTRUSTED_CONTENT>>> blocks would otherwise
598
+ // render the truncated result as: [security notice] + [middle marker] + [last line].
599
+ const stripped = stripSecurityPreamble(content);
600
+ // Floor check (TUNE-015): if the cap would leave less than 30% of the stripped content
601
+ // AND less than 2000 chars absolute, return a sentinel instead of a misleading fragment.
602
+ // Partial results that look complete are worse than a clear dropped-result signal.
603
+ // The absolute floor prevents the sentinel from firing on large natural truncations
604
+ // (e.g., 110k → 16k is a meaningful slice, not a misleading fragment).
605
+ if (perResultCap < stripped.length * TOOL_GRADIENT_MIN_USEFUL_FRACTION && perResultCap < 2_000) {
606
+ content = `[result too large for current context budget \u2014 ${stripped.length} chars stripped]`;
607
+ }
608
+ else {
609
+ // Reserve space for the \n[trimmed] marker within the cap so the total
610
+ // content length stays within perResultCap and doesn't overflow the
611
+ // per-turn aggregate cap when multiple results are truncated.
612
+ const TRIMMED_MARKER = '\n[trimmed]';
613
+ content = truncateWithHeadTail(stripped, perResultCap - TRIMMED_MARKER.length, maxTailChars) + TRIMMED_MARKER;
614
+ }
615
+ }
616
+ return { ...result, content };
617
+ }) ?? null;
618
+ let usedChars = usedSoFar + (toolResults?.reduce((sum, r) => sum + (r.content?.length ?? 0), 0) ?? 0);
619
+ if (perTurnCap != null && usedChars > perTurnCap) {
620
+ const downgradeSummary = extractToolProseSummary(msg, TOOL_GRADIENT_T2_CHAR_CAP, false);
621
+ return {
622
+ msg: {
623
+ ...msg,
624
+ textContent: appendToolSummary(msg.textContent, downgradeSummary),
625
+ toolCalls: null,
626
+ toolResults: null,
627
+ },
628
+ usedChars: usedSoFar + downgradeSummary.length,
629
+ };
630
+ }
631
+ return {
632
+ msg: { ...msg, toolResults },
633
+ usedChars,
634
+ };
635
+ }
636
+ /**
637
+ * Evict tool results exceeding 800 tokens (~3200 chars) before the history
638
+ * budget-fit loop. Large stale results waste budget; replace them with a
639
+ * stub so consumers know the result existed and can re-run if needed.
640
+ *
641
+ * Applied to the already-gradient-processed history before window selection.
642
+ * Does NOT affect turn 0 or turn 1.
643
+ */
644
+ const TOOL_RESULT_EVICTION_CHAR_THRESHOLD = 3_200; // ~800 tokens at 4 chars/token
645
+ function evictLargeToolResults(messages) {
646
+ return messages.map((msg, idx) => {
647
+ // Never evict from the protected recent-turn window.
648
+ const turnAge = getTurnAge(messages, idx);
649
+ if (turnAge <= TOOL_GRADIENT_T0_TURNS)
650
+ return msg;
651
+ if (!msg.toolResults || msg.toolResults.length === 0)
652
+ return msg;
653
+ const evicted = msg.toolResults.map(result => {
654
+ const content = result.content ?? '';
655
+ if (content.length <= TOOL_RESULT_EVICTION_CHAR_THRESHOLD)
656
+ return result;
657
+ const approxKTokens = Math.round(content.length / 4 / 1000);
658
+ return {
659
+ ...result,
660
+ content: `[tool result evicted: ~${approxKTokens}k tokens \u2014 use memory_search or re-run if needed]`,
661
+ };
662
+ });
663
+ return { ...msg, toolResults: evicted };
664
+ });
665
+ }
666
+ /**
667
+ * Apply gradient tool treatment to a message array.
668
+ *
669
+ * Tiers are based on turn age, where turn age is the number of newer user
670
+ * messages after the current message.
671
+ */
672
+ function applyToolGradient(messages, opts) {
673
+ const result = [...messages];
674
+ const pressure = computeToolPressureState(messages, opts?.totalWindowTokens);
675
+ const perTurnUsage = new Map();
676
+ for (let i = result.length - 1; i >= 0; i--) {
677
+ const msg = result[i];
678
+ if (!hasToolContent(msg))
679
+ continue;
680
+ const turnAge = getTurnAge(result, i);
681
+ const usage = perTurnUsage.get(turnAge) ?? { t0: 0, t1: 0, t2: 0, t3: 0 };
682
+ if (turnAge <= TOOL_GRADIENT_T0_TURNS) {
683
+ // T0/T1: preserve full recent tool results unless we hit the conservative
684
+ // orange/red pressure zones and the payload itself is oversized (>40k).
685
+ result[i] = protectRecentToolContent(msg, pressure);
686
+ }
687
+ else if (turnAge <= TOOL_GRADIENT_T1_TURNS) {
688
+ const capped = applyTierPayloadCap(msg, TOOL_GRADIENT_T1_CHAR_CAP, TOOL_GRADIENT_T1_TURN_CAP, usage.t1);
689
+ usage.t1 = capped.usedChars;
690
+ result[i] = capped.msg;
691
+ }
692
+ else if (turnAge <= TOOL_GRADIENT_T2_TURNS) {
693
+ const summary = extractToolProseSummary(msg, TOOL_GRADIENT_T2_CHAR_CAP, false);
694
+ const allowed = Math.max(0, TOOL_GRADIENT_T2_TURN_CAP - usage.t2);
695
+ const boundedSummary = truncateHead(summary, Math.min(TOOL_GRADIENT_T2_CHAR_CAP, allowed || TOOL_GRADIENT_T3_CHAR_CAP));
696
+ usage.t2 += boundedSummary.length;
697
+ result[i] = {
698
+ ...msg,
699
+ textContent: appendToolSummary(msg.textContent, boundedSummary),
700
+ toolCalls: null,
701
+ toolResults: null,
702
+ };
703
+ }
704
+ else {
705
+ const summary = extractToolProseSummary(msg, TOOL_GRADIENT_T3_CHAR_CAP, true);
706
+ const allowed = Math.max(0, TOOL_GRADIENT_T3_TURN_CAP - usage.t3);
707
+ const boundedSummary = truncateHead(summary, Math.min(TOOL_GRADIENT_T3_CHAR_CAP, allowed || TOOL_GRADIENT_T3_CHAR_CAP));
708
+ usage.t3 += boundedSummary.length;
709
+ result[i] = {
710
+ ...msg,
711
+ textContent: appendToolSummary(msg.textContent, boundedSummary),
712
+ toolCalls: null,
713
+ toolResults: null,
714
+ };
715
+ }
716
+ perTurnUsage.set(turnAge, usage);
717
+ }
718
+ return result;
719
+ }
720
+ /** Guard: logRegistryStartup() fires only once per process, not per instance. */
721
+ let _registryLogged = false;
722
+ export class Compositor {
723
+ config;
724
+ cache;
725
+ vectorStore;
726
+ libraryDb;
727
+ triggerRegistry;
728
+ /** Cached org registry loaded from fleet_agents at construction time. */
729
+ _orgRegistry;
730
+ constructor(deps, config) {
731
+ this.cache = deps.cache;
732
+ this.vectorStore = deps.vectorStore || null;
733
+ this.libraryDb = deps.libraryDb || null;
734
+ this.triggerRegistry = deps.triggerRegistry || DEFAULT_TRIGGERS;
735
+ // Load org registry from DB on init; fall back to hardcoded if DB empty.
736
+ this._orgRegistry = this.libraryDb
737
+ ? buildOrgRegistryFromDb(this.libraryDb)
738
+ : defaultOrgRegistry();
739
+ this.config = { ...DEFAULT_CONFIG, ...config };
740
+ if (!_registryLogged) {
741
+ logRegistryStartup();
742
+ _registryLogged = true;
743
+ }
744
+ }
745
+ /**
746
+ * Set or replace the vector store after construction.
747
+ * Called by hypermem.create() once sqlite-vec is confirmed available.
748
+ */
749
+ setVectorStore(vs) {
750
+ this.vectorStore = vs;
751
+ }
752
+ /**
753
+ * Hot-reload the org registry from the fleet_agents table.
754
+ * Call after fleet membership changes (new agent, org restructure)
755
+ * to pick up the latest without a full restart.
756
+ * Falls back to the current cached registry if the DB is unavailable.
757
+ */
758
+ refreshOrgRegistry() {
759
+ if (this.libraryDb) {
760
+ this._orgRegistry = buildOrgRegistryFromDb(this.libraryDb);
761
+ }
762
+ return this._orgRegistry;
763
+ }
764
+ /**
765
+ * Return the currently cached org registry.
766
+ */
767
+ get orgRegistry() {
768
+ return this._orgRegistry;
769
+ }
770
+ /**
771
+ * Compose a complete message array for sending to an LLM.
772
+ *
773
+ * Orchestrates all four memory layers:
774
+ * 1. System prompt + identity (never truncated)
775
+ * 2. Conversation history (L1 Redis → L2 messages.db)
776
+ * 3. Active facts from library (L4)
777
+ * 4. Knowledge entries relevant to conversation (L4)
778
+ * 5. User preferences (L4)
779
+ * 6. Semantic recall via vector search (L3)
780
+ * 7. Cross-session context (L2)
781
+ *
782
+ * Each slot respects the remaining token budget.
783
+ */
784
+ async compose(request, db, libraryDb) {
785
+ const store = new MessageStore(db);
786
+ const libDb = libraryDb || this.libraryDb;
787
+ // Dynamic reserve: use a lightweight SQLite sample to estimate avg turn cost
788
+ // BEFORE assembling the full context. This gives us the reserve fraction we
789
+ // need to compute the effective token budget at the start of compose.
790
+ // Full history assembly happens later in the pipeline.
791
+ const totalWindow = resolveModelWindow(request.model, this.config.defaultTokenBudget);
792
+ const sampleConv = store.getConversation(request.sessionKey);
793
+ const sampleMessages = sampleConv
794
+ ? store.getRecentMessages(sampleConv.id, 40)
795
+ : [];
796
+ const { reserve: dynamicReserve, avgTurnCost, dynamic: isDynamic, pressureHigh } = computeDynamicReserve(sampleMessages, totalWindow, this.config);
797
+ const budget = request.tokenBudget || resolveModelBudget(request.model, this.config.defaultTokenBudget, dynamicReserve);
798
+ let remaining = budget;
799
+ const warnings = [];
800
+ const slots = {
801
+ system: 0,
802
+ identity: 0,
803
+ history: 0,
804
+ facts: 0,
805
+ context: 0,
806
+ library: 0,
807
+ };
808
+ const messages = [];
809
+ // ─── System Prompt (never truncated) ───────────────────────
810
+ const systemContent = await this.getSlotContent(request.agentId, request.sessionKey, 'system', db);
811
+ if (systemContent) {
812
+ const tokens = estimateTokens(systemContent);
813
+ messages.push({
814
+ role: 'system',
815
+ textContent: systemContent,
816
+ toolCalls: null,
817
+ toolResults: null,
818
+ });
819
+ slots.system = tokens;
820
+ remaining -= tokens;
821
+ }
822
+ // ─── Identity (never truncated) ────────────────────────────
823
+ const identityContent = await this.getSlotContent(request.agentId, request.sessionKey, 'identity', db);
824
+ if (identityContent) {
825
+ const tokens = estimateTokens(identityContent);
826
+ messages.push({
827
+ role: 'system',
828
+ textContent: identityContent,
829
+ toolCalls: null,
830
+ toolResults: null,
831
+ });
832
+ slots.identity = tokens;
833
+ remaining -= tokens;
834
+ }
835
+ // ─── Stable Output Profile Prefix ──────────────────────────
836
+ // Keep deterministic output instructions on the static side of the cache
837
+ // boundary so Anthropic and OpenAI warm-prefix caching can reuse them.
838
+ if (remaining > 100 && request.includeLibrary !== false) {
839
+ const fosEnabled = this.config?.enableFOS !== false;
840
+ const modEnabled = this.config?.enableMOD !== false;
841
+ const outputTier = resolveOutputTier((this.config?.outputProfile ?? this.config?.outputStandard), fosEnabled, modEnabled);
842
+ const stableOutputParts = [];
843
+ if (outputTier.tier === 'light') {
844
+ stableOutputParts.push(renderLightFOS().join('\n'));
845
+ }
846
+ else if (libDb) {
847
+ if (outputTier.fos) {
848
+ const fos = getActiveFOS(libDb);
849
+ if (fos) {
850
+ const fosContent = renderFOS(fos).join('\n');
851
+ if (fosContent.trim())
852
+ stableOutputParts.push(fosContent);
853
+ }
854
+ }
855
+ if (outputTier.mod) {
856
+ const mod = matchMOD(request.model, libDb);
857
+ if (mod) {
858
+ const modContent = renderMOD(mod, null, request.model || '').join('\n');
859
+ if (modContent.trim())
860
+ stableOutputParts.push(modContent);
861
+ }
862
+ }
863
+ }
864
+ if (stableOutputParts.length > 0) {
865
+ const stableOutputContent = stableOutputParts.join('\n\n');
866
+ const stableOutputTokens = estimateTokens(stableOutputContent);
867
+ if (stableOutputTokens <= remaining) {
868
+ messages.push({
869
+ role: 'system',
870
+ textContent: stableOutputContent,
871
+ toolCalls: null,
872
+ toolResults: null,
873
+ });
874
+ slots.system += stableOutputTokens;
875
+ remaining -= stableOutputTokens;
876
+ }
877
+ }
878
+ }
879
+ // ─── Conversation History ──────────────────────────────────
880
+ let diagCrossTopicKeystones = 0;
881
+ // Hoisted: activeTopicId/name resolved inside history block, used for window dual-write (VS-1) and wiki page injection
882
+ let composedActiveTopicId;
883
+ let composedActiveTopicName;
884
+ if (request.includeHistory !== false) {
885
+ // P3.4: Look up the active topic for this session (non-fatal)
886
+ let activeTopicId;
887
+ let activeTopic;
888
+ if (!request.topicId) {
889
+ try {
890
+ const topicMap = new SessionTopicMap(db);
891
+ activeTopic = topicMap.getActiveTopic(request.sessionKey) || undefined;
892
+ if (activeTopic)
893
+ activeTopicId = activeTopic.id;
894
+ }
895
+ catch {
896
+ // Topic lookup is best-effort — fall back to full history
897
+ }
898
+ }
899
+ else {
900
+ activeTopicId = request.topicId;
901
+ try {
902
+ activeTopic = db.prepare(`
903
+ SELECT id, name
904
+ FROM topics
905
+ WHERE session_key = ? AND id = ?
906
+ LIMIT 1
907
+ `).get(request.sessionKey, request.topicId);
908
+ }
909
+ catch {
910
+ // Topic lookup is best-effort — fall back to ID-only history fetch
911
+ }
912
+ }
913
+ // Hoist resolved topic id+name so the window dual-write and wiki injection sections can access them
914
+ composedActiveTopicId = activeTopicId;
915
+ composedActiveTopicName = activeTopic?.name;
916
+ const rawHistoryMessages = await this.getHistory(request.agentId, request.sessionKey, request.historyDepth || this.config.maxHistoryMessages, store, activeTopicId);
917
+ // Deduplicate history by StoredMessage.id (second line of defense after
918
+ // pushHistory() tail-check dedup). Guards against any duplicates that
919
+ // slipped through the warm path — e.g. bootstrap re-runs on existing sessions.
920
+ const seenIds = new Set();
921
+ const historyMessages = rawHistoryMessages.filter(m => {
922
+ const sm = m;
923
+ if (sm.id != null) {
924
+ if (seenIds.has(sm.id))
925
+ return false;
926
+ seenIds.add(sm.id);
927
+ }
928
+ return true;
929
+ });
930
+ // ── Transform-first: apply gradient tool treatment BEFORE budget math ──
931
+ // All tool payloads are in their final form before any token estimation.
932
+ // This ensures estimateMessageTokens() measures actual submission cost,
933
+ // not pre-transform cost (which caused overflow: dense tool JSON was
934
+ // undercounted at length/4 when it should be measured post-stub).
935
+ const transformedHistory = applyToolGradient(historyMessages, { totalWindowTokens: totalWindow });
936
+ // ── Evict large tool results (>800 tokens) before window selection ─────
937
+ // Replace oversized stale results with stubs so they don't burn budget.
938
+ // Current-turn results (turn age 0) are never evicted.
939
+ const evictedHistory = evictLargeToolResults(transformedHistory);
940
+ // ── Budget-fit: walk newest→oldest, drop whole clusters ─────────────
941
+ // Group tool_use + tool_result messages into clusters so they are kept
942
+ // or dropped as a unit. Breaking mid-cluster creates orphaned tool
943
+ // pairs that repairToolPairs has to strip downstream — wasting budget
944
+ // and leaving gaps in conversation continuity.
945
+ const budgetClusters = clusterNeutralMessages(evictedHistory);
946
+ let historyTokens = 0;
947
+ const includedClusters = [];
948
+ for (let i = budgetClusters.length - 1; i >= 0; i--) {
949
+ const cluster = budgetClusters[i];
950
+ if (historyTokens + cluster.tokenCost > remaining && includedClusters.length > 0) {
951
+ const droppedMsgCount = budgetClusters.slice(0, i + 1).reduce((s, c) => s + c.messages.length, 0);
952
+ warnings.push(`History truncated at cluster ${i + 1}/${budgetClusters.length} (${droppedMsgCount} messages dropped)`);
953
+ break;
954
+ }
955
+ includedClusters.unshift(cluster);
956
+ historyTokens += cluster.tokenCost;
957
+ }
958
+ const includedHistory = includedClusters.flatMap(c => c.messages);
959
+ // ── Keystone History Slot (P2.1) ──────────────────────────────────
960
+ // For long conversations (≥30 messages), inject high-signal older messages
961
+ // from before the recent window as recalled context. This lets the model
962
+ // see key decisions and specs that happened earlier in the conversation
963
+ // without them consuming the full recent history budget.
964
+ const keystoneFraction = this.config.keystoneHistoryFraction ?? 0.2;
965
+ const keystoneMaxMsgs = this.config.keystoneMaxMessages ?? 15;
966
+ let keystoneMessages = [];
967
+ let keystoneTokens = 0;
968
+ if (request.includeKeystones !== false && includedHistory.length >= 30 && keystoneFraction > 0) {
969
+ const keystoneResult = await this.buildKeystones(db, request.agentId, includedHistory, historyTokens, keystoneFraction, keystoneMaxMsgs, request.prompt, libDb || undefined);
970
+ if (keystoneResult) {
971
+ keystoneMessages = keystoneResult.keystoneMessages;
972
+ keystoneTokens = keystoneResult.keystoneTokens;
973
+ // Replace includedHistory and historyTokens with the trimmed versions
974
+ // (keystoneResult reflects the trimming done inside buildKeystones)
975
+ includedHistory.splice(0, includedHistory.length, ...keystoneResult.trimmedHistory);
976
+ historyTokens = keystoneResult.trimmedHistoryTokens;
977
+ warnings.push(`Keystone: injected ${keystoneMessages.length} recalled messages`);
978
+ }
979
+ }
980
+ // ── Cross-Topic Keystones (P3.5) ──────────────────────────────────
981
+ // Pull high-signal messages from OTHER topics in this session when their
982
+ // content is semantically relevant to the current topic. Non-fatal.
983
+ let crossTopicMessages = [];
984
+ let crossTopicTokens = 0;
985
+ if (request.includeKeystones !== false && activeTopic && this.vectorStore) {
986
+ try {
987
+ const rawCrossTopicKeystones = await this.getKeystonesByTopic(request.agentId, request.sessionKey, activeTopic, includedHistory, db, 3);
988
+ if (rawCrossTopicKeystones.length > 0) {
989
+ // Token budget: cap the full cross-topic block at 15% of remaining,
990
+ // including the header line.
991
+ const crossTopicHeaderTokens = estimateTokens('## Cross-Topic Context');
992
+ const crossTopicBudget = Math.max(0, Math.floor(remaining * 0.15) - crossTopicHeaderTokens);
993
+ let used = 0;
994
+ for (const candidate of rawCrossTopicKeystones) {
995
+ const msg = {
996
+ role: candidate.role,
997
+ textContent: candidate.content,
998
+ toolCalls: null,
999
+ toolResults: null,
1000
+ };
1001
+ const msgTokens = estimateMessageTokens(msg);
1002
+ if (used + msgTokens > crossTopicBudget)
1003
+ continue;
1004
+ crossTopicMessages.push(msg);
1005
+ used += msgTokens;
1006
+ }
1007
+ crossTopicTokens = used;
1008
+ diagCrossTopicKeystones = crossTopicMessages.length;
1009
+ }
1010
+ }
1011
+ catch {
1012
+ // Cross-topic retrieval is non-fatal — never block compose
1013
+ }
1014
+ }
1015
+ // Push history with keystone separators if we have keystones.
1016
+ if (keystoneMessages.length > 0 || crossTopicMessages.length > 0) {
1017
+ // Cross-topic context (from other topics) — prepended before within-session keystones
1018
+ if (crossTopicMessages.length > 0) {
1019
+ messages.push({
1020
+ role: 'system',
1021
+ textContent: '## Cross-Topic Context',
1022
+ toolCalls: null,
1023
+ toolResults: null,
1024
+ });
1025
+ messages.push(...crossTopicMessages);
1026
+ }
1027
+ // Separator before recalled context (within-session keystones)
1028
+ if (keystoneMessages.length > 0) {
1029
+ messages.push({
1030
+ role: 'system',
1031
+ textContent: '## Recalled Context (high-signal older messages)',
1032
+ toolCalls: null,
1033
+ toolResults: null,
1034
+ });
1035
+ messages.push(...keystoneMessages);
1036
+ }
1037
+ // Separator before recent conversation
1038
+ messages.push({
1039
+ role: 'system',
1040
+ textContent: '## Recent Conversation',
1041
+ toolCalls: null,
1042
+ toolResults: null,
1043
+ });
1044
+ messages.push(...includedHistory);
1045
+ // Account for separator tokens in history slot
1046
+ const crossTopicSepTokens = crossTopicMessages.length > 0
1047
+ ? estimateTokens('## Cross-Topic Context')
1048
+ : 0;
1049
+ const keystoneSepTokens = keystoneMessages.length > 0
1050
+ ? estimateTokens('## Recalled Context (high-signal older messages)')
1051
+ : 0;
1052
+ const recentSepTokens = estimateTokens('## Recent Conversation');
1053
+ const sepTokens = crossTopicSepTokens + keystoneSepTokens + recentSepTokens;
1054
+ slots.history = historyTokens + keystoneTokens + crossTopicTokens + sepTokens;
1055
+ remaining -= (historyTokens + keystoneTokens + crossTopicTokens + sepTokens);
1056
+ }
1057
+ else {
1058
+ messages.push(...includedHistory);
1059
+ slots.history = historyTokens;
1060
+ remaining -= historyTokens;
1061
+ }
1062
+ // targetBudgetFraction cap: limit total context slots to a fraction of the
1063
+ // effective budget. This gives operators a single knob to make the system
1064
+ // lighter without tuning individual slot fractions.
1065
+ const targetFraction = this.config.targetBudgetFraction ?? 0.65;
1066
+ const contextCap = Math.floor(budget * targetFraction);
1067
+ if (remaining > contextCap) {
1068
+ remaining = contextCap;
1069
+ }
1070
+ // T1.3: Ghost message suppression.
1071
+ // If the last message in the included history is a warm-seeded user message
1072
+ // AND there's a subsequent message in SQLite that wasn't included (meaning
1073
+ // the assistant already responded), drop it. This prevents the model from
1074
+ // re-answering a question that was already handled in a prior session.
1075
+ // Only triggers when: (1) message has _warmed flag, (2) it's role=user,
1076
+ // (3) SQLite has messages after it (the response exists but wasn't included).
1077
+ const lastIncluded = messages[messages.length - 1];
1078
+ if (lastIncluded?.role === 'user') {
1079
+ const sm = lastIncluded;
1080
+ const meta = sm.metadata;
1081
+ if (meta?._warmed && sm.id != null) {
1082
+ // Check if there are any messages after this one in SQLite
1083
+ try {
1084
+ const hasMore = db.prepare('SELECT 1 FROM messages WHERE conversation_id = (SELECT conversation_id FROM messages WHERE id = ?) AND id > ? LIMIT 1').get(sm.id, sm.id);
1085
+ if (hasMore) {
1086
+ messages.pop();
1087
+ warnings.push('Dropped trailing warm-seeded user message with existing response (ghost suppression)');
1088
+ }
1089
+ }
1090
+ catch {
1091
+ // Ghost check is best-effort — don't block compose
1092
+ }
1093
+ }
1094
+ }
1095
+ }
1096
+ // ─── Injected Context Block ────────────────────────────────
1097
+ // Facts, knowledge, preferences, semantic recall, and cross-session
1098
+ // context are assembled into a single system message injected before
1099
+ // conversation history (after system/identity).
1100
+ const contextParts = [];
1101
+ let contextTokens = 0;
1102
+ // ── Compose-level diagnostics tracking vars ──────────────
1103
+ let diagTriggerHits = 0;
1104
+ let diagTriggerFallbackUsed = false;
1105
+ let diagFactsIncluded = 0;
1106
+ let diagSemanticResults = 0;
1107
+ let diagDocChunkCollections = 0;
1108
+ let diagScopeFiltered = 0;
1109
+ let diagRetrievalMode = 'none';
1110
+ // ── Wiki Page (L4: Library — active topic synthesis) ──────
1111
+ // Inject synthesized wiki page for the active topic before general knowledge.
1112
+ // Token budget: capped at 15% of remaining.
1113
+ if (request.includeLibrary !== false && remaining > 300 && libDb && composedActiveTopicName) {
1114
+ const wikiContent = this.buildWikiPageContext(request.agentId, composedActiveTopicName, libDb);
1115
+ if (wikiContent) {
1116
+ const tokens = estimateTokens(wikiContent);
1117
+ const cap = Math.floor(remaining * 0.15);
1118
+ if (tokens <= cap) {
1119
+ contextParts.push(wikiContent);
1120
+ contextTokens += tokens;
1121
+ remaining -= tokens;
1122
+ slots.library += tokens;
1123
+ }
1124
+ else {
1125
+ const truncated = this.truncateToTokens(wikiContent, cap);
1126
+ const truncTokens = estimateTokens(truncated);
1127
+ contextParts.push(truncated);
1128
+ contextTokens += truncTokens;
1129
+ remaining -= truncTokens;
1130
+ slots.library += truncTokens;
1131
+ }
1132
+ }
1133
+ }
1134
+ // ── Facts (L4: Library) ──────────────────────────────────
1135
+ // scope: agent — filtered by agentId via filterByScope after fetch
1136
+ if (request.includeFacts !== false && remaining > 500) {
1137
+ const factsContent = this.buildFactsFromDb(request.agentId, request.sessionKey, libDb || db);
1138
+ if (factsContent !== null) {
1139
+ const [content, factCount, scopeFiltered] = factsContent;
1140
+ diagFactsIncluded += factCount;
1141
+ diagScopeFiltered += scopeFiltered;
1142
+ if (content) {
1143
+ const tokens = estimateTokens(content);
1144
+ if (tokens <= remaining * 0.25) { // Cap facts at 25% of remaining (W4: was 0.3)
1145
+ contextParts.push(`## Active Facts\n${content}`);
1146
+ contextTokens += tokens;
1147
+ remaining -= tokens;
1148
+ slots.facts = tokens;
1149
+ }
1150
+ else {
1151
+ // Truncate to budget
1152
+ const truncated = this.truncateToTokens(content, Math.floor(remaining * 0.25));
1153
+ const truncTokens = estimateTokens(truncated);
1154
+ contextParts.push(`## Active Facts (truncated)\n${truncated}`);
1155
+ contextTokens += truncTokens;
1156
+ remaining -= truncTokens;
1157
+ slots.facts = truncTokens;
1158
+ warnings.push('Facts truncated to fit budget');
1159
+ }
1160
+ }
1161
+ }
1162
+ // ── Temporal retrieval (L4: Library) ─────────────────────
1163
+ // Fires when the query has temporal signals (before/after/when/last etc).
1164
+ // Returns facts in time order from temporal_index. Deduplicates against
1165
+ // facts already included above. Uses ingest_at as occurred_at proxy (v1).
1166
+ const queryText = request.prompt ?? '';
1167
+ if (request.includeSemanticRecall !== false && queryText && hasTemporalSignals(queryText) && libDb && remaining > 300) {
1168
+ try {
1169
+ const temporalStore = new TemporalStore(libDb);
1170
+ const temporalFacts = temporalStore.timeRangeQuery({
1171
+ agentId: request.agentId,
1172
+ limit: 15,
1173
+ order: 'DESC',
1174
+ });
1175
+ if (temporalFacts.length > 0) {
1176
+ // Deduplicate against facts already in context
1177
+ const existingContent = contextParts.join('\n');
1178
+ const novel = temporalFacts.filter(f => !existingContent.includes(f.content.slice(0, 60)));
1179
+ if (novel.length > 0) {
1180
+ const temporalBlock = novel
1181
+ .map(f => {
1182
+ const ts = new Date(f.occurredAt).toISOString().slice(0, 10);
1183
+ return `[${ts}] ${f.content}`;
1184
+ })
1185
+ .join('\n');
1186
+ const temporalSection = `## Temporal Context\n${temporalBlock}`;
1187
+ const tempTokens = estimateTokens(temporalSection);
1188
+ const tempBudget = Math.floor(remaining * 0.20); // Cap at 20% of remaining
1189
+ if (tempTokens <= tempBudget) {
1190
+ contextParts.push(temporalSection);
1191
+ contextTokens += tempTokens;
1192
+ remaining -= tempTokens;
1193
+ slots.facts = (slots.facts ?? 0) + tempTokens;
1194
+ }
1195
+ else {
1196
+ const truncated = this.truncateToTokens(temporalSection, tempBudget);
1197
+ const truncTokens = estimateTokens(truncated);
1198
+ contextParts.push(truncated);
1199
+ contextTokens += truncTokens;
1200
+ remaining -= truncTokens;
1201
+ slots.facts = (slots.facts ?? 0) + truncTokens;
1202
+ }
1203
+ }
1204
+ }
1205
+ }
1206
+ catch {
1207
+ // Temporal index not yet available (migration pending) — skip silently
1208
+ }
1209
+ }
1210
+ // ── Open-domain FTS retrieval (L4: Library) ──────────────────
1211
+ // Fires when the query looks broad/exploratory with no topical anchor.
1212
+ // Searches raw messages_fts — bypasses isQualityFact() quality gate so
1213
+ // content filtered from library.db is still reachable for open-domain
1214
+ // questions. Primary fix for LoCoMo open-domain F1 gap (0.133 baseline).
1215
+ if (request.includeSemanticRecall !== false && queryText && isOpenDomainQuery(queryText) && db && remaining > 300) {
1216
+ try {
1217
+ const existingContent = contextParts.join('\n');
1218
+ const odResults = searchOpenDomain(db, queryText, existingContent, 10);
1219
+ if (odResults.length > 0) {
1220
+ const odBlock = odResults
1221
+ .map(r => {
1222
+ const ts = r.createdAt
1223
+ ? new Date(r.createdAt).toISOString().slice(0, 10)
1224
+ : '';
1225
+ const prefix = ts ? `[${ts}] ` : '';
1226
+ const snippet = r.content.length > 300
1227
+ ? r.content.slice(0, 300) + '…'
1228
+ : r.content;
1229
+ return `${prefix}${snippet}`;
1230
+ })
1231
+ .join('\n');
1232
+ const odSection = `## Open Domain Context\n${odBlock}`;
1233
+ const odTokens = estimateTokens(odSection);
1234
+ const odBudget = Math.floor(remaining * 0.20); // Cap at 20% of remaining
1235
+ if (odTokens <= odBudget) {
1236
+ contextParts.push(odSection);
1237
+ contextTokens += odTokens;
1238
+ remaining -= odTokens;
1239
+ slots.facts = (slots.facts ?? 0) + odTokens;
1240
+ }
1241
+ else {
1242
+ const truncated = this.truncateToTokens(odSection, odBudget);
1243
+ const truncTokens = estimateTokens(truncated);
1244
+ contextParts.push(truncated);
1245
+ contextTokens += truncTokens;
1246
+ remaining -= truncTokens;
1247
+ slots.facts = (slots.facts ?? 0) + truncTokens;
1248
+ }
1249
+ }
1250
+ }
1251
+ catch {
1252
+ // Open-domain FTS unavailable — skip silently
1253
+ }
1254
+ }
1255
+ }
1256
+ // ── Knowledge (L4: Library) ──────────────────────────────
1257
+ // scope: agent — filtered by agent_id in the SQL query (existing behavior)
1258
+ if (request.includeLibrary !== false && remaining > 500 && libDb) {
1259
+ const knowledgeContent = this.buildKnowledgeFromDb(request.agentId, libDb);
1260
+ if (knowledgeContent) {
1261
+ const tokens = estimateTokens(knowledgeContent);
1262
+ if (tokens <= remaining * 0.2) { // Cap knowledge at 20% of remaining
1263
+ contextParts.push(`## Knowledge\n${knowledgeContent}`);
1264
+ contextTokens += tokens;
1265
+ remaining -= tokens;
1266
+ slots.library += tokens;
1267
+ }
1268
+ else {
1269
+ const truncated = this.truncateToTokens(knowledgeContent, Math.floor(remaining * 0.2));
1270
+ const truncTokens = estimateTokens(truncated);
1271
+ contextParts.push(`## Knowledge (truncated)\n${truncated}`);
1272
+ contextTokens += truncTokens;
1273
+ remaining -= truncTokens;
1274
+ slots.library += truncTokens;
1275
+ warnings.push('Knowledge truncated to fit budget');
1276
+ }
1277
+ }
1278
+ }
1279
+ // ── Preferences (L4: Library) ────────────────────────────
1280
+ // scope: agent — filtered by agent_id OR NULL in the SQL query (existing behavior)
1281
+ if (request.includeLibrary !== false && remaining > 300 && libDb) {
1282
+ const prefsContent = this.buildPreferencesFromDb(request.agentId, libDb);
1283
+ if (prefsContent) {
1284
+ const tokens = estimateTokens(prefsContent);
1285
+ if (tokens <= remaining * 0.1) { // Cap preferences at 10% of remaining
1286
+ contextParts.push(`## User Preferences\n${prefsContent}`);
1287
+ contextTokens += tokens;
1288
+ remaining -= tokens;
1289
+ slots.library += tokens;
1290
+ }
1291
+ }
1292
+ }
1293
+ // ── Semantic Recall (L3: Hybrid FTS5+KNN) ───────────────
1294
+ // scope: agent — buildSemanticRecall filters by agentId internally
1295
+ // Fires when either vector store or library DB is available.
1296
+ // FTS5-only (no embeddings) still returns keyword matches.
1297
+ // KNN-only (no FTS terms) still returns semantic matches.
1298
+ // Both present → Reciprocal Rank Fusion.
1299
+ // Use request.prompt as the retrieval query when available — it is the
1300
+ // live current-turn text. Falling back to getLastUserMessage(messages)
1301
+ // reads from the already-assembled history, which is one turn stale.
1302
+ if (request.includeSemanticRecall !== false && remaining > 500 && (this.vectorStore || libDb)) {
1303
+ const lastUserMsg = request.prompt?.trim() || this.getLastUserMessage(messages);
1304
+ if (lastUserMsg) {
1305
+ try {
1306
+ // Check Redis for a pre-computed embedding from afterTurn()
1307
+ let precomputedEmbedding;
1308
+ try {
1309
+ const cached = await this.cache.getQueryEmbedding(request.agentId, request.sessionKey);
1310
+ if (cached)
1311
+ precomputedEmbedding = cached;
1312
+ }
1313
+ catch {
1314
+ // Redis lookup is best-effort — fall through to Ollama
1315
+ }
1316
+ const semanticContent = await this.buildSemanticRecall(lastUserMsg, request.agentId, Math.floor(remaining * 0.12), // Cap at 12% of remaining (W4: was 0.15)
1317
+ libDb || undefined, precomputedEmbedding);
1318
+ if (semanticContent) {
1319
+ const tokens = estimateTokens(semanticContent);
1320
+ contextParts.push(`## Related Memory\n${semanticContent}`);
1321
+ contextTokens += tokens;
1322
+ remaining -= tokens;
1323
+ // Semantic recall draws from multiple sources, attribute to context
1324
+ slots.context += tokens;
1325
+ // W3 diagnostics: count non-empty lines as rough results count
1326
+ diagSemanticResults = semanticContent.split('\n').filter(l => l.trim().length > 0).length;
1327
+ }
1328
+ }
1329
+ catch (err) {
1330
+ // Semantic search is best-effort — don't fail composition
1331
+ warnings.push(`Semantic recall failed: ${err.message}`);
1332
+ }
1333
+ }
1334
+ }
1335
+ // ── Doc Chunks (L4: Trigger-based retrieval) ─────────────
1336
+ // scope: per-tier/per-agent — queryChunks filters by agentId and tier
1337
+ // Demand-load governance, identity, and memory chunks based on
1338
+ // conversation context. Replaces full ACA file injection for
1339
+ // the files that have been seeded into the doc chunk index.
1340
+ let triggerFallbackUsed = false;
1341
+ if (request.includeDocChunks !== false && remaining > 400 && libDb) {
1342
+ // Use request.prompt when available (current-turn text, not stale history)
1343
+ const lastMsg = request.prompt?.trim() || this.getLastUserMessage(messages) || '';
1344
+ const triggered = matchTriggers(lastMsg, this.triggerRegistry);
1345
+ if (triggered.length > 0) {
1346
+ diagTriggerHits = triggered.length;
1347
+ diagRetrievalMode = 'triggered';
1348
+ const docChunkStore = new DocChunkStore(libDb);
1349
+ const docParts = [];
1350
+ const maxTotalTriggerTokens = Math.min(remaining, this.config.maxTotalTriggerTokens && this.config.maxTotalTriggerTokens > 0
1351
+ ? this.config.maxTotalTriggerTokens
1352
+ : Math.floor(remaining * 0.40));
1353
+ let totalTriggerTokens = 0;
1354
+ for (const trigger of triggered) {
1355
+ if (remaining < 200)
1356
+ break;
1357
+ const triggerBudgetRemaining = maxTotalTriggerTokens - totalTriggerTokens;
1358
+ if (triggerBudgetRemaining < 200)
1359
+ break;
1360
+ const maxTokens = Math.min(trigger.maxTokens || 1000, Math.floor(remaining * 0.12), // No single collection takes > 12% of remaining (W4: was 0.15)
1361
+ triggerBudgetRemaining);
1362
+ try {
1363
+ // Build a relevance-based FTS5 query from the user message.
1364
+ //
1365
+ // Problem: trigger keywords are stems ('escalat', 'irreversib') for
1366
+ // substring matching against user messages, but FTS5 tokenizes on word
1367
+ // boundaries. 'escalat' does not match 'escalation' in FTS5 without a
1368
+ // prefix operator.
1369
+ //
1370
+ // Solution: extract actual words from the user message that contain a
1371
+ // matched trigger keyword, then use FTS5 prefix queries (word*) for
1372
+ // each extracted word. This bridges stem-matching and FTS5 indexing.
1373
+ const msgLower = lastMsg.toLowerCase();
1374
+ const matchedKeywords = trigger.keywords.filter(kw => msgLower.includes(kw.toLowerCase()));
1375
+ // Extract whole words from the message that overlap with matched keywords
1376
+ const msgWords = lastMsg.match(/\b\w{4,}\b/g) || [];
1377
+ const relevantWords = msgWords.filter(word => matchedKeywords.some(kw => word.toLowerCase().includes(kw.toLowerCase()) ||
1378
+ kw.toLowerCase().includes(word.toLowerCase().slice(0, 5))));
1379
+ // Build FTS5 OR query: "word1* OR word2* OR word3*"
1380
+ // FTS5 treats space-separated terms as AND by default — we want OR so
1381
+ // that any relevant term is sufficient to retrieve a matching chunk.
1382
+ // Prefix operator (*) ensures stems match full words in the index.
1383
+ // Sort by keyword match specificity (longer matched keyword = more specific term),
1384
+ // then cap at 6 terms to keep FTS queries reasonable.
1385
+ // No positional slice — all relevant words participate, not just the first 3.
1386
+ const sortedWords = [...new Set(relevantWords)].sort((a, b) => {
1387
+ const aLen = Math.max(...matchedKeywords.filter(kw => a.toLowerCase().includes(kw.toLowerCase()) || kw.toLowerCase().includes(a.toLowerCase().slice(0, 5))).map(kw => kw.length), 0);
1388
+ const bLen = Math.max(...matchedKeywords.filter(kw => b.toLowerCase().includes(kw.toLowerCase()) || kw.toLowerCase().includes(b.toLowerCase().slice(0, 5))).map(kw => kw.length), 0);
1389
+ return bLen - aLen; // Most specific match first
1390
+ });
1391
+ const ftsTerms = sortedWords.length > 0
1392
+ ? sortedWords.slice(0, 6).map(w => `${w}*`).join(' OR ')
1393
+ : matchedKeywords
1394
+ .sort((a, b) => b.length - a.length)
1395
+ .slice(0, 3)
1396
+ .map(kw => `${kw}*`)
1397
+ .join(' OR ');
1398
+ const ftsKeyword = ftsTerms || lastMsg.split(/\s+/).slice(0, 3).join(' ');
1399
+ const chunks = docChunkStore.queryChunks({
1400
+ collection: trigger.collection,
1401
+ agentId: request.agentId,
1402
+ tier: request.tier,
1403
+ limit: trigger.maxChunks || 3,
1404
+ keyword: ftsKeyword,
1405
+ });
1406
+ if (chunks.length === 0)
1407
+ continue;
1408
+ const chunkLines = [];
1409
+ let chunkTokens = 0;
1410
+ for (const chunk of chunks) {
1411
+ if (chunkTokens + chunk.tokenEstimate > maxTokens)
1412
+ break;
1413
+ chunkLines.push(`### ${chunk.sectionPath}\n${chunk.content}`);
1414
+ chunkTokens += chunk.tokenEstimate;
1415
+ }
1416
+ if (chunkLines.length > 0) {
1417
+ const collectionLabel = trigger.collection.split('/').pop() || trigger.collection;
1418
+ docParts.push(`## ${collectionLabel} (retrieved)\n${chunkLines.join('\n\n')}`);
1419
+ totalTriggerTokens += chunkTokens;
1420
+ contextTokens += chunkTokens;
1421
+ remaining -= chunkTokens;
1422
+ slots.library += chunkTokens;
1423
+ diagDocChunkCollections++;
1424
+ }
1425
+ }
1426
+ catch {
1427
+ // Doc chunk retrieval is best-effort — don't fail composition
1428
+ }
1429
+ }
1430
+ if (docParts.length > 0) {
1431
+ contextParts.push(docParts.join('\n\n'));
1432
+ }
1433
+ }
1434
+ else if (remaining > 400 && (this.vectorStore || libDb)) {
1435
+ // Trigger-miss fallback: no trigger fired — attempt bounded semantic retrieval
1436
+ // so there is never a silent zero-memory path on doc chunks.
1437
+ // INVARIANT: this block is mutually exclusive with triggered-retrieval above.
1438
+ // If refactored to run both paths, cap combined semantic budget to avoid double-recall.
1439
+ try {
1440
+ const fallbackContent = await Promise.race([
1441
+ this.buildSemanticRecall(lastMsg, request.agentId, Math.floor(remaining * 0.10), libDb || undefined),
1442
+ new Promise((_, reject) => setTimeout(() => reject(new Error('fallback_knn_timeout')), 3000)),
1443
+ ]);
1444
+ if (fallbackContent) {
1445
+ contextParts.push(`## Related Memory\n${fallbackContent}`);
1446
+ const fallbackTokens = estimateTokens(fallbackContent);
1447
+ contextTokens += fallbackTokens;
1448
+ remaining -= fallbackTokens;
1449
+ slots.context += fallbackTokens;
1450
+ triggerFallbackUsed = true;
1451
+ diagTriggerFallbackUsed = true;
1452
+ diagRetrievalMode = 'fallback_knn';
1453
+ }
1454
+ }
1455
+ catch {
1456
+ // Fallback is best-effort — never fail composition (includes timeout)
1457
+ }
1458
+ }
1459
+ }
1460
+ // ── Session-Scoped Doc Chunks (spawn context inheritance) ────
1461
+ // When parentSessionKey is set, retrieve ephemeral doc chunks indexed
1462
+ // by buildSpawnContext() for this spawn session.
1463
+ if (request.parentSessionKey && remaining > 200 && libDb) {
1464
+ try {
1465
+ const spawnChunkStore = new DocChunkStore(libDb);
1466
+ const spawnQueryMsg = request.prompt?.trim() || this.getLastUserMessage(messages) || '';
1467
+ const spawnChunks = spawnChunkStore.queryDocChunks(request.agentId, spawnQueryMsg, { sessionKey: request.parentSessionKey, limit: 8 });
1468
+ if (spawnChunks.length > 0) {
1469
+ const spawnLines = [];
1470
+ let spawnTokens = 0;
1471
+ const maxSpawnTokens = Math.floor(remaining * 0.15);
1472
+ for (const chunk of spawnChunks) {
1473
+ if (spawnTokens + chunk.tokenEstimate > maxSpawnTokens)
1474
+ break;
1475
+ spawnLines.push(chunk.content);
1476
+ spawnTokens += chunk.tokenEstimate;
1477
+ }
1478
+ if (spawnLines.length > 0) {
1479
+ contextParts.push(`## Spawn Context Documents\n${spawnLines.join('\n\n')}`);
1480
+ contextTokens += spawnTokens;
1481
+ remaining -= spawnTokens;
1482
+ slots.library += spawnTokens;
1483
+ }
1484
+ }
1485
+ }
1486
+ catch {
1487
+ // Session-scoped chunk retrieval is best-effort
1488
+ }
1489
+ }
1490
+ // ── Cross-Session Context (L2: Messages) ─────────────────
1491
+ if (request.includeContext !== false && remaining > 500) {
1492
+ const crossSessionContent = this.buildCrossSessionContext(request.agentId, request.sessionKey, db, libDb);
1493
+ if (crossSessionContent) {
1494
+ const tokens = estimateTokens(crossSessionContent);
1495
+ const maxContextTokens = Math.min(this.config.maxCrossSessionContext, Math.floor(remaining * 0.2));
1496
+ if (tokens <= maxContextTokens) {
1497
+ contextParts.push(`## Other Active Sessions\n${crossSessionContent}`);
1498
+ contextTokens += tokens;
1499
+ remaining -= tokens;
1500
+ slots.context += tokens;
1501
+ }
1502
+ else {
1503
+ const truncated = this.truncateToTokens(crossSessionContent, maxContextTokens);
1504
+ const truncTokens = estimateTokens(truncated);
1505
+ contextParts.push(`## Other Active Sessions (truncated)\n${truncated}`);
1506
+ contextTokens += truncTokens;
1507
+ remaining -= truncTokens;
1508
+ slots.context += truncTokens;
1509
+ warnings.push('Cross-session context truncated');
1510
+ }
1511
+ }
1512
+ }
1513
+ // ── Action Verification Summary ─────────────────────────
1514
+ // Keep recent action history on the dynamic side of the cache boundary.
1515
+ if (remaining > 50 && request.includeLibrary !== false) {
1516
+ const pressurePct = budget > 0 ? Math.round(((budget - remaining) / budget) * 100) : 0;
1517
+ const actionSummary = buildActionVerificationSummary(messages, pressurePct);
1518
+ if (actionSummary) {
1519
+ const actionTokens = Math.ceil(actionSummary.length / 4);
1520
+ if (actionTokens <= remaining) {
1521
+ contextParts.push(actionSummary);
1522
+ contextTokens += actionTokens;
1523
+ remaining -= actionTokens;
1524
+ slots.context += actionTokens;
1525
+ }
1526
+ }
1527
+ }
1528
+ // ── Inject assembled context block ──────────────────────
1529
+ const assembledContextBlock = contextParts.length > 0 ? contextParts.join('\n\n') : undefined;
1530
+ if (assembledContextBlock) {
1531
+ const contextMsg = {
1532
+ role: 'system',
1533
+ textContent: assembledContextBlock,
1534
+ toolCalls: null,
1535
+ toolResults: null,
1536
+ // DYNAMIC_BOUNDARY: this slot is session-specific (facts, recall, episodes).
1537
+ // It must NOT be included in any prompt caching boundary that spans static content.
1538
+ // The provider translator will insert a cache_control ephemeral marker BEFORE
1539
+ // this message so providers can cache everything up to identity/system as static context.
1540
+ metadata: { dynamicBoundary: true },
1541
+ };
1542
+ // Insert after system/identity, before history
1543
+ // Insert context after all system/identity messages, before conversation history.
1544
+ // findIndex returns -1 when all messages are system-role — handle explicitly.
1545
+ const firstNonSystem = messages.findIndex(m => m.role !== 'system');
1546
+ const insertIdx = firstNonSystem === -1 ? messages.length : firstNonSystem;
1547
+ messages.splice(insertIdx, 0, contextMsg);
1548
+ }
1549
+ // ─── Safety Valve: Post-Assembly Budget Check ───────────────────
1550
+ // Re-estimate total tokens after all slots are assembled. If the
1551
+ // composition exceeds tokenBudget * 1.05 (5% tolerance for estimation
1552
+ // drift), trim history messages from the oldest until we're under budget.
1553
+ // History is the most compressible slot — system/identity are never
1554
+ // truncated, and context (facts/recall/episodes) is more valuable per-token.
1555
+ const estimatedTotal = messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
1556
+ const hardCeiling = Math.floor(budget * 1.05);
1557
+ if (estimatedTotal > hardCeiling) {
1558
+ const overage = estimatedTotal - budget;
1559
+ let trimmed = 0;
1560
+ let trimCount = 0;
1561
+ // Find history messages (non-system, after system/identity block)
1562
+ // Walk forward from the first non-system message, trimming oldest history first
1563
+ const firstNonSystemIdx = messages.findIndex(m => m.role !== 'system');
1564
+ if (firstNonSystemIdx >= 0) {
1565
+ let i = firstNonSystemIdx;
1566
+ while (i < messages.length && trimmed < overage) {
1567
+ // Don't trim the last user message (current prompt)
1568
+ if (i === messages.length - 1 && messages[i].role === 'user')
1569
+ break;
1570
+ const msgTokens = estimateMessageTokens(messages[i]);
1571
+ messages.splice(i, 1);
1572
+ trimmed += msgTokens;
1573
+ trimCount++;
1574
+ // Don't increment i — splice shifts everything down
1575
+ }
1576
+ }
1577
+ if (trimCount > 0) {
1578
+ slots.history = Math.max(0, slots.history - trimmed);
1579
+ remaining += trimmed;
1580
+ warnings.push(`Safety valve: trimmed ${trimCount} oldest history messages (${trimmed} tokens) to fit budget`);
1581
+ }
1582
+ }
1583
+ // ─── Translate to provider format (unless caller wants neutral) ───
1584
+ // When skipProviderTranslation is set, return NeutralMessages directly.
1585
+ // The context engine plugin uses this: the OpenClaw runtime handles its
1586
+ // own provider translation, so double-translating corrupts tool calls.
1587
+ const outputMessages = request.skipProviderTranslation
1588
+ ? messages
1589
+ : toProviderFormat(messages, request.provider ?? request.model ?? null);
1590
+ // T1.3: Strip warm-replay provenance flags before output.
1591
+ // _warmed is an internal tag added by warmSession() to mark messages
1592
+ // seeded from SQLite into Redis. It must not leak into provider submissions
1593
+ // or be visible to the runtime (which might misinterpret it).
1594
+ for (const msg of outputMessages) {
1595
+ const m = msg;
1596
+ if (m.metadata && m.metadata._warmed) {
1597
+ const { _warmed, ...cleanMeta } = m.metadata;
1598
+ m.metadata = Object.keys(cleanMeta).length > 0 ? cleanMeta : undefined;
1599
+ }
1600
+ }
1601
+ const totalTokens = budget - remaining;
1602
+ // ─── Slot reconciliation ─────────────────────────────────────────────────
1603
+ // totalTokens = budget - remaining is the authoritative spend figure.
1604
+ // The slot accounting can drift from this due to history trim (which
1605
+ // reduces slots.history but adds back to remaining after the budget
1606
+ // was already committed) and FOS/MOD token rounding.
1607
+ // Reconcile: assign any unaccounted tokens to slots.history so that
1608
+ // sum(slots) === totalTokens always holds.
1609
+ {
1610
+ const slotSum = (slots.system ?? 0) + (slots.identity ?? 0) +
1611
+ (slots.history ?? 0) + (slots.facts ?? 0) +
1612
+ (slots.context ?? 0) + (slots.library ?? 0);
1613
+ const delta = totalTokens - slotSum;
1614
+ if (delta !== 0) {
1615
+ slots.history = (slots.history ?? 0) + delta;
1616
+ }
1617
+ }
1618
+ // ─── Write Window Cache ─────────────────────────────
1619
+ // Cache the composed message array so the plugin can serve it directly
1620
+ // on the next assemble() call without re-running the full compose pipeline.
1621
+ // Short TTL (120s) — invalidated by afterTurn when new messages arrive.
1622
+ //
1623
+ // VS-1: Dual-write — session-scoped key for backwards compat;
1624
+ // topic-scoped key for per-topic window retrieval when activeTopicId is set.
1625
+ try {
1626
+ await this.cache.setWindow(request.agentId, request.sessionKey, messages, 120);
1627
+ }
1628
+ catch {
1629
+ // Window cache write is best-effort
1630
+ }
1631
+ // VS-1: Topic-scoped window dual-write
1632
+ if (composedActiveTopicId) {
1633
+ try {
1634
+ await this.cache.setTopicWindow(request.agentId, request.sessionKey, composedActiveTopicId, messages, 120);
1635
+ }
1636
+ catch {
1637
+ // Topic window write is best-effort
1638
+ }
1639
+ }
1640
+ // ─── Write Session Cursor ─────────────────────────────────
1641
+ // Record the newest message included in the submission window.
1642
+ // Background indexer uses this to find unprocessed high-signal content.
1643
+ if (request.includeHistory !== false && slots.history > 0) {
1644
+ try {
1645
+ const historyMsgs = messages.filter(m => m.role !== 'system');
1646
+ const lastHistoryMsg = historyMsgs.length > 0 ? historyMsgs[historyMsgs.length - 1] : null;
1647
+ if (lastHistoryMsg) {
1648
+ const sm = lastHistoryMsg;
1649
+ if (sm.id != null && sm.messageIndex != null) {
1650
+ const cursor = {
1651
+ lastSentId: sm.id,
1652
+ lastSentIndex: sm.messageIndex,
1653
+ lastSentAt: new Date().toISOString(),
1654
+ windowSize: historyMsgs.length,
1655
+ tokenCount: totalTokens,
1656
+ };
1657
+ await this.cache.setCursor(request.agentId, request.sessionKey, cursor);
1658
+ // Dual-write cursor to SQLite for durability across Redis eviction (P1.3)
1659
+ try {
1660
+ db.prepare(`
1661
+ UPDATE conversations
1662
+ SET cursor_last_sent_id = ?,
1663
+ cursor_last_sent_index = ?,
1664
+ cursor_last_sent_at = ?,
1665
+ cursor_window_size = ?,
1666
+ cursor_token_count = ?
1667
+ WHERE session_key = ?
1668
+ `).run(cursor.lastSentId, cursor.lastSentIndex, cursor.lastSentAt, cursor.windowSize, cursor.tokenCount, request.sessionKey);
1669
+ }
1670
+ catch {
1671
+ // SQLite cursor write is best-effort — don't block compose
1672
+ }
1673
+ }
1674
+ }
1675
+ }
1676
+ catch {
1677
+ // Cursor write is best-effort
1678
+ }
1679
+ }
1680
+ // ─── Compaction Fence Update ──────────────────────────────
1681
+ // Record the oldest message ID that the LLM can see in this compose
1682
+ // cycle. Everything below this ID becomes eligible for compaction.
1683
+ // If history was included, query the DB for the oldest included message.
1684
+ if (request.includeHistory !== false && slots.history > 0) {
1685
+ try {
1686
+ const conversation = store.getConversation(request.sessionKey);
1687
+ if (conversation) {
1688
+ // The compositor included N history messages (after truncation).
1689
+ // Count how many non-system messages are in the output to determine
1690
+ // how far back we reached.
1691
+ const historyMsgCount = messages.filter(m => m.role !== 'system').length;
1692
+ if (historyMsgCount > 0) {
1693
+ // Get the oldest message we would have included.
1694
+ // getRecentMessages returns the last N in chronological order,
1695
+ // so the first element is the oldest included.
1696
+ const oldestIncluded = db.prepare(`
1697
+ SELECT id FROM messages
1698
+ WHERE conversation_id = ?
1699
+ ORDER BY message_index DESC
1700
+ LIMIT 1 OFFSET ?
1701
+ `).get(conversation.id, historyMsgCount - 1);
1702
+ if (oldestIncluded) {
1703
+ ensureCompactionFenceSchema(db);
1704
+ updateCompactionFence(db, conversation.id, oldestIncluded.id);
1705
+ }
1706
+ }
1707
+ }
1708
+ }
1709
+ catch {
1710
+ // Fence update is best-effort — never fail composition
1711
+ warnings.push('Compaction fence update failed (non-fatal)');
1712
+ }
1713
+ }
1714
+ // W3: Build compose diagnostics
1715
+ let zeroResultReason;
1716
+ if (contextParts.length === 0) {
1717
+ if (diagScopeFiltered > 0 && diagFactsIncluded === 0 && diagSemanticResults === 0) {
1718
+ zeroResultReason = 'scope_filtered_all';
1719
+ }
1720
+ else if (remaining <= 0) {
1721
+ zeroResultReason = 'budget_exhausted';
1722
+ }
1723
+ else if (diagTriggerHits === 0 && !diagTriggerFallbackUsed) {
1724
+ zeroResultReason = 'no_trigger_no_fallback';
1725
+ }
1726
+ else if ((diagTriggerHits > 0 || diagTriggerFallbackUsed) && diagFactsIncluded === 0 && diagSemanticResults === 0 && diagDocChunkCollections === 0) {
1727
+ // Retrieval was attempted (trigger fired or fallback ran) but returned nothing — likely a retrieval bug
1728
+ // rather than a genuinely empty corpus. Distinguish from 'empty_corpus' for observability.
1729
+ zeroResultReason = 'unknown';
1730
+ }
1731
+ else {
1732
+ zeroResultReason = 'empty_corpus';
1733
+ }
1734
+ }
1735
+ const diagnostics = {
1736
+ triggerHits: diagTriggerHits,
1737
+ triggerFallbackUsed: diagTriggerFallbackUsed,
1738
+ factsIncluded: diagFactsIncluded,
1739
+ semanticResultsIncluded: diagSemanticResults,
1740
+ docChunksCollections: diagDocChunkCollections,
1741
+ scopeFiltered: diagScopeFiltered,
1742
+ zeroResultReason,
1743
+ retrievalMode: diagRetrievalMode,
1744
+ crossTopicKeystones: diagCrossTopicKeystones,
1745
+ reserveFraction: dynamicReserve,
1746
+ avgTurnCostTokens: avgTurnCost,
1747
+ dynamicReserveActive: isDynamic,
1748
+ sessionPressureHigh: pressureHigh,
1749
+ };
1750
+ if (pressureHigh) {
1751
+ warnings.push(`SESSION_PRESSURE_HIGH: avg_turn_cost=${avgTurnCost} tokens, dynamic reserve capped at ${Math.round(dynamicReserve * 100)}%`);
1752
+ }
1753
+ else if (dynamicReserve > 0.40) {
1754
+ console.info(`[hypermem:compositor] dynamic_reserve=${Math.round(dynamicReserve * 100)}% avg_turn_cost=${Math.round(avgTurnCost / 1000)}k horizon=${this.config.dynamicReserveTurnHorizon ?? 5}`);
1755
+ }
1756
+ console.log(`[hypermem:compose] agent=${request.agentId} triggers=${diagTriggerHits} fallback=${diagTriggerFallbackUsed} facts=${diagFactsIncluded} semantic=${diagSemanticResults} chunks=${diagDocChunkCollections} scopeFiltered=${diagScopeFiltered} mode=${diagRetrievalMode} crossTopicKeystones=${diagCrossTopicKeystones}`);
1757
+ return {
1758
+ messages: outputMessages,
1759
+ tokenCount: totalTokens,
1760
+ slots,
1761
+ truncated: remaining < 0 || estimatedTotal > hardCeiling,
1762
+ hasWarnings: warnings.length > 0,
1763
+ warnings,
1764
+ contextBlock: assembledContextBlock,
1765
+ diagnostics,
1766
+ };
1767
+ }
1768
+ /**
1769
+ * Warm a session from SQLite into Redis.
1770
+ * Called on session start or Redis cache miss.
1771
+ */
1772
+ async warmSession(agentId, sessionKey, db, opts) {
1773
+ const store = new MessageStore(db);
1774
+ const conversation = store.getConversation(sessionKey);
1775
+ if (!conversation)
1776
+ return;
1777
+ // Fetch a generous pool from SQLite, apply gradient transform, then
1778
+ // token-budget-cap the warm set. This replaces the old WARM_BOOTSTRAP_CAP
1779
+ // message-count constant which was a blunt instrument — 100 messages of
1780
+ // large tool results can massively exceed the history budget allocation.
1781
+ // Warm budget uses the same reserve fraction as compose() so warm history
1782
+ // never pre-fills more than compose() would actually allow.
1783
+ const reserve = this.config.contextWindowReserve ?? 0.15;
1784
+ const effectiveBudget = resolveModelBudget(opts?.model, this.config.defaultTokenBudget, reserve);
1785
+ const warmBudget = Math.floor(effectiveBudget * (this.config.warmHistoryBudgetFraction ?? 0.4));
1786
+ const rawHistory = store.getRecentMessages(conversation.id, this.config.maxHistoryMessages);
1787
+ const transformedForWarm = applyToolGradient(rawHistory, {
1788
+ totalWindowTokens: resolveModelWindow(opts?.model, this.config.defaultTokenBudget),
1789
+ });
1790
+ // Walk newest→oldest, accumulate transformed token cost, stop when budget exhausted
1791
+ let warmTokens = 0;
1792
+ const history = [];
1793
+ for (let i = transformedForWarm.length - 1; i >= 0; i--) {
1794
+ const cost = estimateMessageTokens(transformedForWarm[i]);
1795
+ if (warmTokens + cost > warmBudget)
1796
+ break;
1797
+ // T1.3 Provenance flag: tag warm-seeded messages so they can be identified
1798
+ // downstream. The flag is stripped before provider submission in compose().
1799
+ // This prevents the runtime from treating warm-replayed user messages as
1800
+ // new inbound queries (ghost message bug).
1801
+ const tagged = { ...transformedForWarm[i] };
1802
+ tagged.metadata = { ...(tagged.metadata || {}), _warmed: true };
1803
+ history.unshift(tagged);
1804
+ warmTokens += cost;
1805
+ }
1806
+ const libDb = opts?.libraryDb || this.libraryDb;
1807
+ // Note: facts and context are intentionally NOT cached here.
1808
+ // compose() calls buildFactsFromDb() and buildCrossSessionContext() directly
1809
+ // from SQLite on every turn (~0.3ms each) — faster than a Redis GET round-trip.
1810
+ // Caching them here would create stale entries that compose() ignores anyway.
1811
+ await this.cache.warmSession(agentId, sessionKey, {
1812
+ system: opts?.systemPrompt,
1813
+ identity: opts?.identity,
1814
+ history,
1815
+ meta: {
1816
+ agentId,
1817
+ sessionKey,
1818
+ provider: conversation.provider,
1819
+ model: conversation.model,
1820
+ channelType: conversation.channelType,
1821
+ tokenCount: conversation.tokenCountIn + conversation.tokenCountOut,
1822
+ lastActive: conversation.updatedAt,
1823
+ status: conversation.status,
1824
+ },
1825
+ });
1826
+ }
1827
+ async refreshRedisGradient(agentId, sessionKey, db, tokenBudget) {
1828
+ const store = new MessageStore(db);
1829
+ const conversation = store.getConversation(sessionKey);
1830
+ if (!conversation)
1831
+ return;
1832
+ const rawHistory = store.getRecentMessages(conversation.id, this.config.maxHistoryMessages);
1833
+ const transformedHistory = applyToolGradient(rawHistory, {
1834
+ totalWindowTokens: tokenBudget && tokenBudget > 0
1835
+ ? Math.max(tokenBudget, Math.floor(tokenBudget / 0.80))
1836
+ : TOOL_PLANNING_BASELINE_WINDOW,
1837
+ });
1838
+ // If a token budget is provided, trim the gradient-compressed window to fit
1839
+ // before writing to Redis. Without this, up to maxHistoryMessages messages
1840
+ // land in Redis regardless of size, and trimHistoryToTokenBudget fires
1841
+ // on every subsequent assemble() causing per-turn churn.
1842
+ let historyToWrite = transformedHistory;
1843
+ if (tokenBudget && tokenBudget > 0) {
1844
+ const budgetCap = Math.floor(tokenBudget * 0.8);
1845
+ let runningTokens = 0;
1846
+ const clusters = clusterNeutralMessages(transformedHistory);
1847
+ const cappedClusters = [];
1848
+ // Walk newest-first, keep whole clusters so tool-call/result pairs survive together.
1849
+ for (let i = clusters.length - 1; i >= 0; i--) {
1850
+ const cluster = clusters[i];
1851
+ if (runningTokens + cluster.tokenCost > budgetCap && cappedClusters.length > 0)
1852
+ break;
1853
+ cappedClusters.unshift(cluster);
1854
+ runningTokens += cluster.tokenCost;
1855
+ if (runningTokens >= budgetCap)
1856
+ break;
1857
+ }
1858
+ historyToWrite = cappedClusters.flatMap(cluster => cluster.messages);
1859
+ if (historyToWrite.length < transformedHistory.length) {
1860
+ console.log(`[hypermem] refreshRedisGradient: cluster-capped ${transformedHistory.length}→${historyToWrite.length} messages ` +
1861
+ `for ${agentId}/${sessionKey} (budgetCap=${budgetCap}, tokenCost=${runningTokens})`);
1862
+ }
1863
+ }
1864
+ await this.cache.replaceHistory(agentId, sessionKey, historyToWrite, this.config.maxHistoryMessages);
1865
+ }
1866
+ // ─── Slot Content Resolution ─────────────────────────────────
1867
+ /**
1868
+ * Get slot content: try Redis first, fall back to SQLite.
1869
+ */
1870
+ async getSlotContent(agentId, sessionKey, slot, db, libraryDb) {
1871
+ const cached = await this.cache.getSlot(agentId, sessionKey, slot);
1872
+ if (cached)
1873
+ return cached;
1874
+ switch (slot) {
1875
+ case 'facts': {
1876
+ const result = this.buildFactsFromDb(agentId, sessionKey, libraryDb || this.libraryDb || db);
1877
+ return result ? result[0] : null;
1878
+ }
1879
+ case 'context':
1880
+ return this.buildCrossSessionContext(agentId, sessionKey, db, libraryDb || this.libraryDb);
1881
+ default:
1882
+ return null;
1883
+ }
1884
+ }
1885
+ /**
1886
+ * Get conversation history: try Redis first, fall back to SQLite.
1887
+ *
1888
+ * When topicId is provided (P3.4), the SQLite path filters to messages
1889
+ * matching that topic OR with topic_id IS NULL (Option B transition safety).
1890
+ * The Redis path is unaffected — Redis doesn't index by topic, so topic
1891
+ * filtering only applies to the SQLite fallback.
1892
+ */
1893
+ async getHistory(agentId, sessionKey, limit, store, topicId) {
1894
+ // Pass limit through to Redis — this is the correct enforcement point.
1895
+ // Previously getHistory() ignored the limit on the Redis path (LRANGE 0 -1),
1896
+ // meaning historyDepth in the compose request had no effect on hot sessions.
1897
+ const cached = await this.cache.getHistory(agentId, sessionKey, limit);
1898
+ if (cached.length > 0)
1899
+ return cached;
1900
+ const conversation = store.getConversation(sessionKey);
1901
+ if (!conversation)
1902
+ return [];
1903
+ if (topicId) {
1904
+ // P3.4: Option B — active topic messages + legacy NULL messages
1905
+ return store.getRecentMessagesByTopic(conversation.id, topicId, limit);
1906
+ }
1907
+ return store.getRecentMessages(conversation.id, limit);
1908
+ }
1909
+ // ─── L4 Library Builders ─────────────────────────────────────
1910
+ /**
1911
+ * Build facts content from library DB.
1912
+ */
1913
+ /**
1914
+ * Build facts content from library DB.
1915
+ * Applies filterByScope (W1) to enforce retrieval access control.
1916
+ * Returns [content, factCount, scopeFilteredCount] or null if DB unavailable.
1917
+ */
1918
+ buildFactsFromDb(agentId, sessionKey, db) {
1919
+ if (!db)
1920
+ return null;
1921
+ const tableExists = db.prepare("SELECT count(*) as cnt FROM sqlite_master WHERE type='table' AND name='facts'").get();
1922
+ if (!tableExists || tableExists.cnt === 0)
1923
+ return null;
1924
+ const rawRows = db.prepare(`
1925
+ SELECT content, domain, confidence, agent_id, source_session_key AS session_key, scope FROM facts
1926
+ WHERE agent_id = ?
1927
+ AND superseded_by IS NULL
1928
+ AND (expires_at IS NULL OR expires_at > datetime('now'))
1929
+ AND decay_score < 0.8
1930
+ AND confidence >= 0.5
1931
+ ORDER BY confidence DESC, decay_score ASC
1932
+ LIMIT ?
1933
+ `).all(agentId, this.config.maxFacts);
1934
+ if (rawRows.length === 0)
1935
+ return [null, 0, 0];
1936
+ // W1: Apply scope filter — enforce retrieval access control
1937
+ const ctx = { agentId, sessionKey };
1938
+ const { allowed, filteredCount } = filterByScope(rawRows.map(r => ({
1939
+ ...r,
1940
+ agentId: r.agent_id,
1941
+ sessionKey: r.session_key,
1942
+ })), ctx);
1943
+ if (allowed.length === 0)
1944
+ return [null, 0, filteredCount];
1945
+ const content = allowed
1946
+ .map(r => {
1947
+ // Session attribution: label facts from a different session so the model
1948
+ // can distinguish current-session context from cross-session facts.
1949
+ // Shows last 8 chars of session key as a stable short identifier.
1950
+ const fromOtherSession = r.sessionKey && r.sessionKey !== sessionKey;
1951
+ const sessionSuffix = fromOtherSession
1952
+ ? `, session:${r.sessionKey.slice(-8)}`
1953
+ : '';
1954
+ return `- [${r.domain || 'general'}${sessionSuffix}] ${r.content}`;
1955
+ })
1956
+ .join('\n');
1957
+ return [content, allowed.length, filteredCount];
1958
+ }
1959
+ /**
1960
+ * Build knowledge content from library DB.
1961
+ * Prioritizes high-confidence, non-superseded entries.
1962
+ */
1963
+ buildKnowledgeFromDb(agentId, db) {
1964
+ const tableExists = db.prepare("SELECT count(*) as cnt FROM sqlite_master WHERE type='table' AND name='knowledge'").get();
1965
+ if (!tableExists || tableExists.cnt === 0)
1966
+ return null;
1967
+ const rows = db.prepare(`
1968
+ SELECT domain, key, content, confidence FROM knowledge
1969
+ WHERE agent_id = ?
1970
+ AND superseded_by IS NULL
1971
+ AND (expires_at IS NULL OR expires_at > datetime('now'))
1972
+ ORDER BY confidence DESC, updated_at DESC
1973
+ LIMIT 15
1974
+ `).all(agentId);
1975
+ if (rows.length === 0)
1976
+ return null;
1977
+ // Group by domain for cleaner presentation
1978
+ const byDomain = {};
1979
+ for (const row of rows) {
1980
+ if (!byDomain[row.domain])
1981
+ byDomain[row.domain] = [];
1982
+ byDomain[row.domain].push({ key: row.key, content: row.content });
1983
+ }
1984
+ const lines = [];
1985
+ for (const [domain, entries] of Object.entries(byDomain)) {
1986
+ lines.push(`### ${domain}`);
1987
+ for (const entry of entries) {
1988
+ lines.push(`- **${entry.key}:** ${entry.content}`);
1989
+ }
1990
+ }
1991
+ return lines.join('\n');
1992
+ }
1993
+ /**
1994
+ * Build wiki page context for the active topic.
1995
+ * Queries the knowledge table for a synthesized topic page and returns it
1996
+ * wrapped with a header. Capped at 600 tokens.
1997
+ */
1998
+ buildWikiPageContext(agentId, topicName, db) {
1999
+ const knowledgeStore = new KnowledgeStore(db);
2000
+ const knowledge = knowledgeStore.get(agentId, 'topic-synthesis', topicName);
2001
+ if (!knowledge)
2002
+ return null;
2003
+ const wrapped = `## Active Topic: ${topicName}\n${knowledge.content}`;
2004
+ return this.truncateToTokens(wrapped, 600);
2005
+ }
2006
+ /**
2007
+ * Build preferences content from library DB.
2008
+ * Shows user/operator preferences relevant to this agent.
2009
+ */
2010
+ buildPreferencesFromDb(agentId, db) {
2011
+ const tableExists = db.prepare("SELECT count(*) as cnt FROM sqlite_master WHERE type='table' AND name='preferences'").get();
2012
+ if (!tableExists || tableExists.cnt === 0)
2013
+ return null;
2014
+ // Get preferences set by this agent or marked fleet-visible
2015
+ const rows = db.prepare(`
2016
+ SELECT subject, key, value, domain, confidence FROM preferences
2017
+ WHERE (agent_id = ? OR agent_id IS NULL)
2018
+ ORDER BY confidence DESC, updated_at DESC
2019
+ LIMIT 10
2020
+ `).all(agentId);
2021
+ if (rows.length === 0)
2022
+ return null;
2023
+ // Group by subject
2024
+ const bySubject = {};
2025
+ for (const row of rows) {
2026
+ if (!bySubject[row.subject])
2027
+ bySubject[row.subject] = [];
2028
+ bySubject[row.subject].push({ key: row.key, value: row.value, domain: row.domain });
2029
+ }
2030
+ const lines = [];
2031
+ for (const [subject, prefs] of Object.entries(bySubject)) {
2032
+ lines.push(`### ${subject}`);
2033
+ for (const pref of prefs) {
2034
+ const domainTag = pref.domain ? ` [${pref.domain}]` : '';
2035
+ lines.push(`- **${pref.key}:**${domainTag} ${pref.value}`);
2036
+ }
2037
+ }
2038
+ return lines.join('\n');
2039
+ }
2040
+ // ─── L3 Hybrid Retrieval (FTS5 + KNN) ───────────────────────
2041
+ /**
2042
+ * Build semantic recall content using hybrid FTS5+KNN retrieval.
2043
+ *
2044
+ * Uses Reciprocal Rank Fusion to merge keyword and vector results.
2045
+ * Gracefully degrades: FTS5-only when no vector store, KNN-only
2046
+ * when FTS query is empty (all stop words), both when available.
2047
+ *
2048
+ * @param precomputedEmbedding — optional pre-computed embedding for the query.
2049
+ * When provided, the Ollama call inside VectorStore.search() is skipped.
2050
+ */
2051
+ async buildSemanticRecall(userMessage, agentId, maxTokens, libraryDb, precomputedEmbedding) {
2052
+ const libDb = libraryDb || this.libraryDb;
2053
+ if (!libDb && !this.vectorStore)
2054
+ return null;
2055
+ // Use hybrid search when library DB is available
2056
+ if (libDb) {
2057
+ const results = await hybridSearch(libDb, this.vectorStore, userMessage, {
2058
+ tables: ['facts', 'knowledge', 'episodes'],
2059
+ limit: 10,
2060
+ agentId,
2061
+ maxKnnDistance: 1.2,
2062
+ precomputedEmbedding,
2063
+ });
2064
+ if (results.length === 0)
2065
+ return null;
2066
+ const lines = [];
2067
+ let tokens = 0;
2068
+ // TUNE-015: apply recency decay to recall scores.
2069
+ // Messages and episodes from distant past score down even if semantically relevant.
2070
+ // A 5-day-old task-request should not compete equally with today's messages.
2071
+ // - Episodes: exponential decay, half-life 7 days
2072
+ // - Facts/knowledge: step-function penalty for items older than 48h
2073
+ // (prevents completed/stale tasks from outranking recent ones)
2074
+ // 48-72h: multiply by 0.7
2075
+ // >72h: multiply by 0.5
2076
+ const now = Date.now();
2077
+ const decayedResults = results.map(result => {
2078
+ if (!result.createdAt)
2079
+ return result;
2080
+ const ageMs = now - new Date(result.createdAt).getTime();
2081
+ const ageDays = ageMs / 86_400_000;
2082
+ if (result.sourceTable === 'episodes') {
2083
+ // Exponential half-life decay for episodes
2084
+ const decayFactor = Math.pow(0.5, ageDays / 7);
2085
+ return { ...result, score: result.score * decayFactor };
2086
+ }
2087
+ // Step-function recency penalty for facts and knowledge
2088
+ const ageHours = ageMs / 3_600_000;
2089
+ if (ageHours > 72) {
2090
+ return { ...result, score: result.score * 0.5 };
2091
+ }
2092
+ if (ageHours > 48) {
2093
+ return { ...result, score: result.score * 0.7 };
2094
+ }
2095
+ return result;
2096
+ });
2097
+ // Re-sort after decay adjustment
2098
+ decayedResults.sort((a, b) => b.score - a.score);
2099
+ for (const result of decayedResults) {
2100
+ // TUNE-001: drop very-low-relevance results (RRF scores below 0.008 are noise)
2101
+ if (result.score < 0.008)
2102
+ continue;
2103
+ // TUNE-016: FTS-only results require higher floor — low-score FTS hits are noise
2104
+ if (result.sources.length === 1 && result.sources[0] === 'fts' && result.score < 0.05)
2105
+ continue;
2106
+ // TUNE-014: episodes require higher confidence — score:2 episodes bleed adjacent
2107
+ // session context and contaminate current session. Require fts+knn agreement
2108
+ // (score >= 0.04) for episodes to make it into assembled context.
2109
+ if (result.sourceTable === 'episodes' && result.score < 0.04)
2110
+ continue;
2111
+ const label = this.formatHybridResult(result);
2112
+ const lineTokens = estimateTokens(label);
2113
+ if (tokens + lineTokens > maxTokens)
2114
+ break;
2115
+ lines.push(label);
2116
+ tokens += lineTokens;
2117
+ }
2118
+ return lines.length > 0 ? lines.join('\n') : null;
2119
+ }
2120
+ // Fallback: KNN-only when no library DB (legacy path)
2121
+ if (!this.vectorStore)
2122
+ return null;
2123
+ const results = await this.vectorStore.search(userMessage, {
2124
+ tables: ['facts', 'knowledge', 'episodes'],
2125
+ limit: 8,
2126
+ maxDistance: 1.2,
2127
+ precomputedEmbedding,
2128
+ });
2129
+ if (results.length === 0)
2130
+ return null;
2131
+ const lines = [];
2132
+ let tokens = 0;
2133
+ for (const result of results) {
2134
+ const label = this.formatVectorResult(result);
2135
+ const lineTokens = estimateTokens(label);
2136
+ if (tokens + lineTokens > maxTokens)
2137
+ break;
2138
+ lines.push(label);
2139
+ tokens += lineTokens;
2140
+ }
2141
+ return lines.length > 0 ? lines.join('\n') : null;
2142
+ }
2143
+ /**
2144
+ * Format a hybrid search result for injection into context.
2145
+ * Shows retrieval source(s) and relevance score.
2146
+ */
2147
+ formatHybridResult(result) {
2148
+ const type = result.sourceTable;
2149
+ const sourceTag = result.sources.length === 2 ? 'fts+knn' : result.sources[0];
2150
+ const scoreStr = (result.score * 100).toFixed(0);
2151
+ switch (type) {
2152
+ case 'facts':
2153
+ return `- [fact, ${sourceTag}, score:${scoreStr}] ${result.content}`;
2154
+ case 'knowledge':
2155
+ return `- [knowledge/${result.metadata || 'general'}, ${sourceTag}, score:${scoreStr}] ${result.content}`;
2156
+ case 'episodes':
2157
+ return `- [episode/${result.domain || 'event'}, ${sourceTag}, score:${scoreStr}] ${result.content}`;
2158
+ default:
2159
+ return `- [${type}, ${sourceTag}, score:${scoreStr}] ${result.content}`;
2160
+ }
2161
+ }
2162
+ /**
2163
+ * Format a vector-only search result (legacy fallback).
2164
+ */
2165
+ formatVectorResult(result) {
2166
+ const relevance = Math.max(0, Math.round((1 - result.distance) * 100));
2167
+ const type = result.sourceTable;
2168
+ switch (type) {
2169
+ case 'facts':
2170
+ return `- [fact, ${relevance}% relevant] ${result.content}`;
2171
+ case 'knowledge':
2172
+ return `- [knowledge/${result.metadata || 'general'}, ${relevance}% relevant] ${result.content}`;
2173
+ case 'episodes':
2174
+ return `- [episode/${result.domain || 'event'}, ${relevance}% relevant] ${result.content}`;
2175
+ default:
2176
+ return `- [${type}, ${relevance}% relevant] ${result.content}`;
2177
+ }
2178
+ }
2179
+ // ─── L2 Cross-Session Context ────────────────────────────────
2180
+ /**
2181
+ * Build cross-session context by finding recent activity
2182
+ * in other sessions for this agent.
2183
+ */
2184
+ buildCrossSessionContext(agentId, currentSessionKey, db, _libraryDb) {
2185
+ const conversation = db.prepare('SELECT id FROM conversations WHERE session_key = ?').get(currentSessionKey);
2186
+ if (!conversation)
2187
+ return null;
2188
+ const rows = db.prepare(`
2189
+ SELECT m.text_content, m.role, c.channel_type, m.created_at
2190
+ FROM messages m
2191
+ JOIN conversations c ON m.conversation_id = c.id
2192
+ WHERE c.agent_id = ?
2193
+ AND m.conversation_id != ?
2194
+ AND c.status = 'active'
2195
+ AND m.text_content IS NOT NULL
2196
+ AND m.is_heartbeat = 0
2197
+ ORDER BY m.created_at DESC
2198
+ LIMIT 10
2199
+ `).all(agentId, conversation.id);
2200
+ if (rows.length === 0)
2201
+ return null;
2202
+ const lines = rows.map(r => {
2203
+ const preview = r.text_content.substring(0, 200);
2204
+ return `- [${r.channel_type}/${r.role} @ ${r.created_at}] ${preview}`;
2205
+ });
2206
+ return lines.join('\n');
2207
+ }
2208
+ // ─── Utilities ───────────────────────────────────────────────
2209
+ /**
2210
+ * Extract the last user message text from the composed messages.
2211
+ */
2212
+ getLastUserMessage(messages) {
2213
+ for (let i = messages.length - 1; i >= 0; i--) {
2214
+ if (messages[i].role === 'user' && messages[i].textContent) {
2215
+ return messages[i].textContent;
2216
+ }
2217
+ }
2218
+ return null;
2219
+ }
2220
+ /**
2221
+ * Truncate text to approximately fit within a token budget.
2222
+ * Truncates at line boundaries when possible.
2223
+ */
2224
+ truncateToTokens(text, maxTokens) {
2225
+ const maxChars = maxTokens * 4; // inverse of our estimation
2226
+ if (text.length <= maxChars)
2227
+ return text;
2228
+ // Try to truncate at a line boundary
2229
+ const truncated = text.substring(0, maxChars);
2230
+ const lastNewline = truncated.lastIndexOf('\n');
2231
+ if (lastNewline > maxChars * 0.7) {
2232
+ return truncated.substring(0, lastNewline) + '\n…';
2233
+ }
2234
+ return truncated + '…';
2235
+ }
2236
+ // ─── Keystone History Builder ─────────────────────────────────────
2237
+ /**
2238
+ * Query and score keystone candidates from before the current history window.
2239
+ *
2240
+ * Trims the oldest messages from includedHistory to free a keystone budget,
2241
+ * then queries the DB for older messages scored by episode significance,
2242
+ * FTS5 relevance, and recency.
2243
+ *
2244
+ * Returns null if keystones cannot be injected (no cutoff ID found,
2245
+ * no candidates, or all errors).
2246
+ */
2247
+ async buildKeystones(db, agentId, includedHistory, historyTokens, keystoneFraction, keystoneMaxMsgs, prompt, libraryDb) {
2248
+ const keystoneBudget = Math.floor(historyTokens * keystoneFraction);
2249
+ if (keystoneBudget <= 0)
2250
+ return null;
2251
+ // Trim oldest messages from includedHistory to free keystone budget.
2252
+ const trimmedHistory = [...includedHistory];
2253
+ let trimmedHistoryTokens = historyTokens;
2254
+ let freed = 0;
2255
+ while (trimmedHistory.length > 1 && freed < keystoneBudget) {
2256
+ const oldest = trimmedHistory.shift();
2257
+ const oldestTokens = estimateMessageTokens(oldest);
2258
+ freed += oldestTokens;
2259
+ trimmedHistoryTokens -= oldestTokens;
2260
+ }
2261
+ // Find the oldest message ID in the trimmed recent window (cutoff point).
2262
+ const oldestRecentMsg = trimmedHistory[0];
2263
+ const cutoffId = oldestRecentMsg?.id ?? null;
2264
+ if (cutoffId == null)
2265
+ return null;
2266
+ // Find the current user prompt for FTS matching.
2267
+ const promptForFts = prompt?.trim() ||
2268
+ (() => {
2269
+ for (let i = trimmedHistory.length - 1; i >= 0; i--) {
2270
+ if (trimmedHistory[i].role === 'user' && trimmedHistory[i].textContent) {
2271
+ return trimmedHistory[i].textContent;
2272
+ }
2273
+ }
2274
+ return null;
2275
+ })();
2276
+ try {
2277
+ // Get the conversation ID from the oldest recent message.
2278
+ const convRow = db.prepare('SELECT conversation_id FROM messages WHERE id = ?').get(cutoffId);
2279
+ if (!convRow)
2280
+ return null;
2281
+ const conversationId = convRow.conversation_id;
2282
+ const maxAgeHours = 168; // 7 days — tighter window gives recency real scoring weight
2283
+ const nowMs = Date.now();
2284
+ // Build episode significance map from libraryDb (episodes live there, not in messages.db).
2285
+ // Key: source_message_id, Value: max significance for that message.
2286
+ const sigMap = new Map();
2287
+ if (libraryDb) {
2288
+ try {
2289
+ const episodeRows = libraryDb.prepare(`
2290
+ SELECT source_message_id, MAX(significance) AS significance
2291
+ FROM episodes
2292
+ WHERE agent_id = ? AND source_message_id IS NOT NULL
2293
+ GROUP BY source_message_id
2294
+ `).all(agentId);
2295
+ for (const row of episodeRows) {
2296
+ sigMap.set(row.source_message_id, row.significance);
2297
+ }
2298
+ }
2299
+ catch {
2300
+ // Episodes query is best-effort
2301
+ }
2302
+ }
2303
+ const baseQuery = `
2304
+ SELECT
2305
+ m.id,
2306
+ m.message_index,
2307
+ m.role,
2308
+ m.text_content,
2309
+ m.created_at
2310
+ FROM messages m
2311
+ WHERE m.conversation_id = ?
2312
+ AND m.id < ?
2313
+ AND m.text_content IS NOT NULL
2314
+ AND m.is_heartbeat = 0
2315
+ AND m.text_content != ''
2316
+ LIMIT 200
2317
+ `;
2318
+ let candidateRows;
2319
+ if (promptForFts && promptForFts.length >= 3) {
2320
+ // Build a safe FTS5 query: extract words ≥3 chars, up to 8, OR with prefix.
2321
+ const ftsTerms = (promptForFts.match(/\b\w{3,}\b/g) || [])
2322
+ .slice(0, 8)
2323
+ .map(w => `"${w.replace(/"/g, '')}"*`)
2324
+ .join(' OR ');
2325
+ if (ftsTerms) {
2326
+ try {
2327
+ candidateRows = db.prepare(`
2328
+ SELECT
2329
+ m.id,
2330
+ m.message_index,
2331
+ m.role,
2332
+ m.text_content,
2333
+ m.created_at
2334
+ FROM messages m
2335
+ WHERE m.conversation_id = ?
2336
+ AND m.id < ?
2337
+ AND m.text_content IS NOT NULL
2338
+ AND m.is_heartbeat = 0
2339
+ AND m.text_content != ''
2340
+ AND m.id IN (
2341
+ SELECT rowid FROM messages_fts
2342
+ WHERE messages_fts MATCH ?
2343
+ LIMIT 100
2344
+ )
2345
+ LIMIT 200
2346
+ `).all(conversationId, cutoffId, ftsTerms);
2347
+ }
2348
+ catch {
2349
+ // FTS query may fail on special characters — fall back to base query
2350
+ candidateRows = db.prepare(baseQuery).all(conversationId, cutoffId);
2351
+ }
2352
+ }
2353
+ else {
2354
+ candidateRows = db.prepare(baseQuery).all(conversationId, cutoffId);
2355
+ }
2356
+ }
2357
+ else {
2358
+ candidateRows = db.prepare(baseQuery).all(conversationId, cutoffId);
2359
+ }
2360
+ if (candidateRows.length === 0)
2361
+ return null;
2362
+ // Build KeystoneCandidate objects with computed ftsRank and ageHours.
2363
+ const totalCandidates = candidateRows.length;
2364
+ const candidates = candidateRows.map((row, idx) => {
2365
+ const createdMs = new Date(row.created_at).getTime();
2366
+ const ageHours = (nowMs - createdMs) / (1000 * 60 * 60);
2367
+ // Normalize FTS rank by position (best match = 1.0, worst = 0.1)
2368
+ const ftsRank = totalCandidates > 1
2369
+ ? 1.0 - (idx / totalCandidates) * 0.9
2370
+ : 1.0;
2371
+ return {
2372
+ messageId: row.id,
2373
+ messageIndex: row.message_index,
2374
+ role: row.role,
2375
+ content: row.text_content || '',
2376
+ timestamp: row.created_at,
2377
+ episodeSignificance: sigMap.get(row.id) ?? null,
2378
+ ftsRank,
2379
+ ageHours,
2380
+ };
2381
+ });
2382
+ // Score and rank candidates.
2383
+ const ranked = rankKeystones(candidates, maxAgeHours);
2384
+ // Budget-fit: take top-scored candidates until keystoneBudget exhausted.
2385
+ let kTokens = 0;
2386
+ const selectedKeystones = [];
2387
+ for (const candidate of ranked) {
2388
+ if (selectedKeystones.length >= keystoneMaxMsgs)
2389
+ break;
2390
+ const msg = {
2391
+ role: candidate.role,
2392
+ textContent: candidate.content,
2393
+ toolCalls: null,
2394
+ toolResults: null,
2395
+ };
2396
+ const msgTokens = estimateMessageTokens(msg);
2397
+ if (kTokens + msgTokens > keystoneBudget)
2398
+ continue; // skip oversized; keep trying
2399
+ selectedKeystones.push(candidate);
2400
+ kTokens += msgTokens;
2401
+ }
2402
+ if (selectedKeystones.length === 0)
2403
+ return null;
2404
+ // Sort selected keystones chronologically for injection.
2405
+ selectedKeystones.sort((a, b) => a.messageIndex - b.messageIndex);
2406
+ const keystoneMessages = selectedKeystones.map(c => ({
2407
+ role: c.role,
2408
+ textContent: c.content,
2409
+ toolCalls: null,
2410
+ toolResults: null,
2411
+ }));
2412
+ return {
2413
+ keystoneMessages,
2414
+ keystoneTokens: kTokens,
2415
+ trimmedHistory,
2416
+ trimmedHistoryTokens,
2417
+ };
2418
+ }
2419
+ catch {
2420
+ // Keystone injection is best-effort — never fail compose
2421
+ return null;
2422
+ }
2423
+ }
2424
+ // ─── Cross-Topic Keystone Retrieval (P3.5) ───────────────────────
2425
+ /**
2426
+ * Pull high-signal messages from OTHER topics in this session when their
2427
+ * content is semantically relevant to the current active topic.
2428
+ *
2429
+ * Heuristic-only: no model calls. Token overlap between the current topic
2430
+ * name + last 3 user messages and candidate message content.
2431
+ *
2432
+ * @param agentId - The agent's ID
2433
+ * @param sessionKey - Current session key
2434
+ * @param activeTopic - The current active topic (id + name)
2435
+ * @param currentMessages - Recently included history messages for query extraction
2436
+ * @param db - The messages database
2437
+ * @param maxKeystones - Max cross-topic keystones to return (default 3)
2438
+ * @returns Scored keystones sorted by score DESC, deduplicated by message id
2439
+ */
2440
+ async getKeystonesByTopic(agentId, sessionKey, activeTopic, currentMessages, db, maxKeystones = 3) {
2441
+ const otherTopics = db.prepare(`
2442
+ SELECT id, name
2443
+ FROM topics
2444
+ WHERE session_key = ? AND id != ?
2445
+ ORDER BY last_active_at DESC
2446
+ LIMIT 5
2447
+ `).all(sessionKey, activeTopic.id);
2448
+ if (otherTopics.length === 0)
2449
+ return [];
2450
+ // Extract key terms from active topic name + last 3 user messages
2451
+ const queryTerms = this.extractQueryTerms(activeTopic.name, currentMessages);
2452
+ if (queryTerms.size === 0)
2453
+ return [];
2454
+ const nowMs = Date.now();
2455
+ const maxAgeHours = 168; // 7 days, same as within-session keystones
2456
+ const seenIds = new Set();
2457
+ const allCandidates = [];
2458
+ for (const topic of otherTopics) {
2459
+ let topicMessages;
2460
+ try {
2461
+ topicMessages = db.prepare(`
2462
+ SELECT m.id, m.message_index, m.role, m.text_content, m.created_at
2463
+ FROM messages m
2464
+ JOIN conversations c ON m.conversation_id = c.id
2465
+ WHERE c.session_key = ?
2466
+ AND c.agent_id = ?
2467
+ AND m.topic_id = ?
2468
+ AND m.text_content IS NOT NULL
2469
+ AND m.text_content != ''
2470
+ AND m.is_heartbeat = 0
2471
+ ORDER BY m.message_index DESC
2472
+ LIMIT 50
2473
+ `).all(sessionKey, agentId, topic.id);
2474
+ }
2475
+ catch {
2476
+ // Corrupt topic data — skip this topic, never throw
2477
+ continue;
2478
+ }
2479
+ if (topicMessages.length === 0)
2480
+ continue;
2481
+ const topicCandidates = topicMessages.map((msg, idx) => {
2482
+ const createdMs = new Date(msg.created_at).getTime();
2483
+ const ageHours = (nowMs - createdMs) / (1000 * 60 * 60);
2484
+ const ftsRank = topicMessages.length > 1
2485
+ ? 1.0 - (idx / topicMessages.length) * 0.9
2486
+ : 1.0;
2487
+ return {
2488
+ messageId: msg.id,
2489
+ messageIndex: msg.message_index,
2490
+ role: msg.role,
2491
+ content: msg.text_content,
2492
+ timestamp: msg.created_at,
2493
+ episodeSignificance: null,
2494
+ ftsRank,
2495
+ ageHours,
2496
+ };
2497
+ });
2498
+ const topTopicKeystones = rankKeystones(topicCandidates, maxAgeHours).slice(0, 10);
2499
+ // Filter to messages with semantic overlap (≥2 matching terms)
2500
+ const relevant = topTopicKeystones.filter(candidate => {
2501
+ const contentLower = candidate.content.toLowerCase();
2502
+ let matches = 0;
2503
+ for (const term of queryTerms) {
2504
+ if (contentLower.includes(term)) {
2505
+ matches++;
2506
+ if (matches >= 2)
2507
+ return true;
2508
+ }
2509
+ }
2510
+ return false;
2511
+ });
2512
+ if (relevant.length === 0)
2513
+ continue;
2514
+ // Re-score filtered candidates so they compete on the same final scale
2515
+ for (const candidate of relevant) {
2516
+ if (seenIds.has(candidate.messageId))
2517
+ continue;
2518
+ seenIds.add(candidate.messageId);
2519
+ const score = scoreKeystone(candidate, maxAgeHours);
2520
+ allCandidates.push({ ...candidate, score });
2521
+ }
2522
+ }
2523
+ if (allCandidates.length === 0)
2524
+ return [];
2525
+ // Sort by score DESC and return top maxKeystones
2526
+ return allCandidates
2527
+ .sort((a, b) => b.score - a.score)
2528
+ .slice(0, maxKeystones);
2529
+ }
2530
+ /**
2531
+ * Extract lowercase key terms from a topic name and the last 3 user messages.
2532
+ * Terms are: tokens with ≥4 characters (skip short stop words).
2533
+ * Returns a Set for O(1) lookup.
2534
+ */
2535
+ extractQueryTerms(topicName, messages) {
2536
+ const terms = new Set();
2537
+ const MIN_TERM_LEN = 4;
2538
+ // From topic name
2539
+ const topicTokens = topicName.toLowerCase().match(/\b[a-z0-9]{4,}\b/g) ?? [];
2540
+ for (const t of topicTokens)
2541
+ terms.add(t);
2542
+ // From last 3 user messages
2543
+ let userCount = 0;
2544
+ for (let i = messages.length - 1; i >= 0 && userCount < 3; i--) {
2545
+ const msg = messages[i];
2546
+ if (msg.role === 'user' && msg.textContent) {
2547
+ const tokens = msg.textContent.toLowerCase().match(/\b[a-z0-9]{4,}\b/g) ?? [];
2548
+ for (const t of tokens) {
2549
+ if (t.length >= MIN_TERM_LEN)
2550
+ terms.add(t);
2551
+ }
2552
+ userCount++;
2553
+ }
2554
+ }
2555
+ return terms;
2556
+ }
2557
+ }
2558
+ //# sourceMappingURL=compositor.js.map