@psiclawops/hypermem 0.1.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/ARCHITECTURE.md +4 -3
  2. package/README.md +457 -174
  3. package/package.json +15 -5
  4. package/dist/background-indexer.d.ts +0 -117
  5. package/dist/background-indexer.d.ts.map +0 -1
  6. package/dist/background-indexer.js +0 -732
  7. package/dist/compaction-fence.d.ts +0 -89
  8. package/dist/compaction-fence.d.ts.map +0 -1
  9. package/dist/compaction-fence.js +0 -153
  10. package/dist/compositor.d.ts +0 -139
  11. package/dist/compositor.d.ts.map +0 -1
  12. package/dist/compositor.js +0 -1109
  13. package/dist/cross-agent.d.ts +0 -57
  14. package/dist/cross-agent.d.ts.map +0 -1
  15. package/dist/cross-agent.js +0 -254
  16. package/dist/db.d.ts +0 -131
  17. package/dist/db.d.ts.map +0 -1
  18. package/dist/db.js +0 -398
  19. package/dist/desired-state-store.d.ts +0 -100
  20. package/dist/desired-state-store.d.ts.map +0 -1
  21. package/dist/desired-state-store.js +0 -212
  22. package/dist/doc-chunk-store.d.ts +0 -115
  23. package/dist/doc-chunk-store.d.ts.map +0 -1
  24. package/dist/doc-chunk-store.js +0 -278
  25. package/dist/doc-chunker.d.ts +0 -99
  26. package/dist/doc-chunker.d.ts.map +0 -1
  27. package/dist/doc-chunker.js +0 -324
  28. package/dist/episode-store.d.ts +0 -48
  29. package/dist/episode-store.d.ts.map +0 -1
  30. package/dist/episode-store.js +0 -135
  31. package/dist/fact-store.d.ts +0 -57
  32. package/dist/fact-store.d.ts.map +0 -1
  33. package/dist/fact-store.js +0 -175
  34. package/dist/fleet-store.d.ts +0 -144
  35. package/dist/fleet-store.d.ts.map +0 -1
  36. package/dist/fleet-store.js +0 -276
  37. package/dist/hybrid-retrieval.d.ts +0 -60
  38. package/dist/hybrid-retrieval.d.ts.map +0 -1
  39. package/dist/hybrid-retrieval.js +0 -340
  40. package/dist/index.d.ts +0 -611
  41. package/dist/index.d.ts.map +0 -1
  42. package/dist/index.js +0 -1042
  43. package/dist/knowledge-graph.d.ts +0 -110
  44. package/dist/knowledge-graph.d.ts.map +0 -1
  45. package/dist/knowledge-graph.js +0 -305
  46. package/dist/knowledge-store.d.ts +0 -72
  47. package/dist/knowledge-store.d.ts.map +0 -1
  48. package/dist/knowledge-store.js +0 -241
  49. package/dist/library-schema.d.ts +0 -22
  50. package/dist/library-schema.d.ts.map +0 -1
  51. package/dist/library-schema.js +0 -717
  52. package/dist/message-store.d.ts +0 -76
  53. package/dist/message-store.d.ts.map +0 -1
  54. package/dist/message-store.js +0 -273
  55. package/dist/preference-store.d.ts +0 -54
  56. package/dist/preference-store.d.ts.map +0 -1
  57. package/dist/preference-store.js +0 -109
  58. package/dist/preservation-gate.d.ts +0 -82
  59. package/dist/preservation-gate.d.ts.map +0 -1
  60. package/dist/preservation-gate.js +0 -150
  61. package/dist/provider-translator.d.ts +0 -40
  62. package/dist/provider-translator.d.ts.map +0 -1
  63. package/dist/provider-translator.js +0 -349
  64. package/dist/rate-limiter.d.ts +0 -76
  65. package/dist/rate-limiter.d.ts.map +0 -1
  66. package/dist/rate-limiter.js +0 -179
  67. package/dist/redis.d.ts +0 -188
  68. package/dist/redis.d.ts.map +0 -1
  69. package/dist/redis.js +0 -534
  70. package/dist/schema.d.ts +0 -15
  71. package/dist/schema.d.ts.map +0 -1
  72. package/dist/schema.js +0 -203
  73. package/dist/secret-scanner.d.ts +0 -51
  74. package/dist/secret-scanner.d.ts.map +0 -1
  75. package/dist/secret-scanner.js +0 -248
  76. package/dist/seed.d.ts +0 -108
  77. package/dist/seed.d.ts.map +0 -1
  78. package/dist/seed.js +0 -177
  79. package/dist/system-store.d.ts +0 -73
  80. package/dist/system-store.d.ts.map +0 -1
  81. package/dist/system-store.js +0 -182
  82. package/dist/topic-store.d.ts +0 -45
  83. package/dist/topic-store.d.ts.map +0 -1
  84. package/dist/topic-store.js +0 -136
  85. package/dist/types.d.ts +0 -329
  86. package/dist/types.d.ts.map +0 -1
  87. package/dist/types.js +0 -9
  88. package/dist/vector-store.d.ts +0 -132
  89. package/dist/vector-store.d.ts.map +0 -1
  90. package/dist/vector-store.js +0 -498
  91. package/dist/work-store.d.ts +0 -112
  92. package/dist/work-store.d.ts.map +0 -1
  93. package/dist/work-store.js +0 -273
@@ -1,1109 +0,0 @@
1
- /**
2
- * HyperMem Compositor
3
- *
4
- * Assembles context for LLM calls by orchestrating all four memory layers:
5
- * L1 Redis — hot session working memory (system, identity, recent msgs)
6
- * L2 Messages — conversation history from messages.db
7
- * L3 Vectors — semantic search across all indexed content
8
- * L4 Library — structured knowledge (facts, preferences, knowledge, episodes)
9
- *
10
- * Token-budgeted: never exceeds the budget, prioritizes by configured order.
11
- * Provider-neutral internally, translates at the output boundary.
12
- */
13
- import { RedisLayer } from './redis.js';
14
- import { MessageStore } from './message-store.js';
15
- import { toProviderFormat } from './provider-translator.js';
16
- import { DocChunkStore } from './doc-chunk-store.js';
17
- import { hybridSearch } from './hybrid-retrieval.js';
18
- import { ensureCompactionFenceSchema, updateCompactionFence } from './compaction-fence.js';
19
- const DEFAULT_CONFIG = {
20
- defaultTokenBudget: 90000,
21
- maxHistoryMessages: 250,
22
- maxFacts: 40,
23
- maxCrossSessionContext: 8000,
24
- maxRecentToolPairs: 3,
25
- maxProseToolPairs: 10,
26
- warmHistoryBudgetFraction: 0.4,
27
- };
28
- /**
29
- * Default trigger registry for standard ACA collections.
30
- * Covers the core ACA offload use case from Anvil's spec.
31
- */
32
- export const DEFAULT_TRIGGERS = [
33
- {
34
- collection: 'governance/policy',
35
- keywords: [
36
- 'escalat', 'policy', 'decision state', 'green', 'yellow', 'red',
37
- 'council procedure', 'naming', 'mandate', 'compliance', 'governance',
38
- 'override', 'human review', 'irreversible',
39
- ],
40
- maxTokens: 1500,
41
- maxChunks: 3,
42
- },
43
- {
44
- collection: 'governance/charter',
45
- keywords: [
46
- 'charter', 'mission', 'director', 'org', 'reporting', 'boundary',
47
- 'delegation', 'authority', 'jurisdiction',
48
- ],
49
- maxTokens: 1000,
50
- maxChunks: 2,
51
- },
52
- {
53
- collection: 'governance/comms',
54
- keywords: [
55
- 'message', 'send', 'tier 1', 'tier 2', 'tier 3', 'async', 'dispatch',
56
- 'sessions_send', 'inter-agent', 'protocol', 'comms', 'ping', 'notify',
57
- ],
58
- maxTokens: 800,
59
- maxChunks: 2,
60
- },
61
- {
62
- collection: 'operations/agents',
63
- keywords: [
64
- 'boot', 'startup', 'bootstrap', 'heartbeat', 'workqueue', 'checkpoint',
65
- 'session start', 'roll call', 'memory recall', 'dispatch inbox',
66
- ],
67
- maxTokens: 800,
68
- maxChunks: 2,
69
- },
70
- {
71
- collection: 'identity/job',
72
- keywords: [
73
- 'deliberat', 'council round', 'vote', 'response contract', 'rating',
74
- 'first response', 'second response', 'handoff', 'floor open',
75
- 'performance', 'output discipline', 'assessment',
76
- ],
77
- maxTokens: 1200,
78
- maxChunks: 3,
79
- },
80
- {
81
- collection: 'identity/motivations',
82
- keywords: [
83
- 'motivation', 'fear', 'tension', 'why do you', 'how do you feel',
84
- 'drives', 'values',
85
- ],
86
- maxTokens: 600,
87
- maxChunks: 1,
88
- },
89
- {
90
- collection: 'memory/decisions',
91
- keywords: [
92
- 'remember', 'decision', 'we decided', 'previously', 'last time',
93
- 'history', 'past', 'earlier', 'recall', 'context',
94
- ],
95
- maxTokens: 1500,
96
- maxChunks: 4,
97
- },
98
- ];
99
- /**
100
- * Match a user message against the trigger registry.
101
- * Returns triggered collections (deduplicated, ordered by trigger specificity).
102
- */
103
- function matchTriggers(userMessage, triggers) {
104
- if (!userMessage)
105
- return [];
106
- const lower = userMessage.toLowerCase();
107
- return triggers.filter(t => t.keywords.some(kw => lower.includes(kw.toLowerCase())));
108
- }
109
- /**
110
- * Rough token estimation: ~4 chars per token for English text.
111
- * This is a heuristic — actual tokenization varies by model.
112
- * Good enough for budget management; exact count comes from the provider.
113
- */
114
- function estimateTokens(text) {
115
- if (!text)
116
- return 0;
117
- return Math.ceil(text.length / 4);
118
- }
119
- /**
120
- * Dense token estimation for tool content (JSON, code, base64).
121
- * Tool payloads are typically 2x denser than English prose.
122
- */
123
- function estimateToolTokens(text) {
124
- return Math.ceil(text.length / 2);
125
- }
126
- function estimateMessageTokens(msg) {
127
- let tokens = estimateTokens(msg.textContent);
128
- if (msg.toolCalls) {
129
- tokens += estimateToolTokens(JSON.stringify(msg.toolCalls)); // dense: /2 not /4
130
- }
131
- if (msg.toolResults) {
132
- tokens += estimateToolTokens(JSON.stringify(msg.toolResults)); // dense: /2 not /4
133
- }
134
- // Overhead per message (role, formatting)
135
- tokens += 4;
136
- return tokens;
137
- }
138
- /**
139
- * Extract a heuristic prose summary from a tool call/result pair.
140
- * Returns a natural-language sentence (~15-30 tokens) instead of raw payloads.
141
- * Used for Tier 2 tool treatment in applyToolGradient().
142
- */
143
- function extractToolProseSummary(msg) {
144
- const parts = [];
145
- if (msg.toolCalls && msg.toolCalls.length > 0) {
146
- for (const tc of msg.toolCalls) {
147
- let args = {};
148
- try {
149
- args = JSON.parse(tc.arguments);
150
- }
151
- catch { /* best-effort */ }
152
- const resultContent = msg.toolResults?.find(r => r.callId === tc.id)?.content ?? '';
153
- const resultKB = resultContent ? `${(resultContent.length / 1024).toFixed(1)}KB` : '';
154
- switch (tc.name) {
155
- case 'read': {
156
- const p = (args.path ?? args.file_path ?? args.filePath ?? '');
157
- parts.push(p ? `Read ${p}${resultKB ? ` (${resultKB})` : ''}` : 'Read a file');
158
- break;
159
- }
160
- case 'write': {
161
- const p = (args.path ?? args.file ?? args.filePath ?? '');
162
- parts.push(p ? `Wrote ${p}${resultKB ? ` (${resultKB})` : ''}` : 'Wrote a file');
163
- break;
164
- }
165
- case 'edit': {
166
- const p = (args.path ?? args.file ?? args.filePath ?? '');
167
- parts.push(p ? `Edited ${p}` : 'Edited a file');
168
- break;
169
- }
170
- case 'exec': {
171
- const cmd = (args.command ?? '').slice(0, 60);
172
- const firstLine = resultContent.split('\n')[0]?.slice(0, 80) ?? '';
173
- parts.push(cmd ? `Ran ${cmd}${firstLine ? ` — ${firstLine}` : ''}` : 'Ran a command');
174
- break;
175
- }
176
- case 'web_search': {
177
- const q = (args.query ?? '');
178
- parts.push(q ? `Searched '${q.slice(0, 60)}'` : 'Searched the web');
179
- break;
180
- }
181
- case 'web_fetch': {
182
- const u = (args.url ?? '');
183
- parts.push(u ? `Fetched ${u.slice(0, 80)}` : 'Fetched a URL');
184
- break;
185
- }
186
- case 'sessions_send': {
187
- const target = (args.sessionKey ?? args.label ?? '');
188
- parts.push(target ? `Sent message to ${target}` : 'Sent an inter-session message');
189
- break;
190
- }
191
- case 'memory_search': {
192
- const q = (args.query ?? '');
193
- parts.push(q ? `Searched memory for '${q.slice(0, 60)}'` : 'Searched memory');
194
- break;
195
- }
196
- default:
197
- parts.push(`Used ${tc.name}`);
198
- }
199
- }
200
- }
201
- else if (msg.toolResults && msg.toolResults.length > 0) {
202
- // Result-only message (no matching call visible)
203
- const content = msg.toolResults[0].content?.slice(0, 100) ?? '';
204
- parts.push(content ? `Tool result: ${content}` : 'Tool result received');
205
- }
206
- return parts.join('; ');
207
- }
208
- /**
209
- * Apply gradient tool treatment to a message array.
210
- *
211
- * Tiers (newest-to-oldest):
212
- * Tier 1 — last maxRecentToolPairs: verbatim (untouched)
213
- * Tier 2 — next maxProseToolPairs: heuristic prose stub replaces tool payloads
214
- * Tier 3 — beyond: tool payloads nulled, text content preserved
215
- *
216
- * Message text (assistant reasoning, user text) is NEVER modified.
217
- * This pass runs before the budget loop so estimateMessageTokens() measures
218
- * the actual cost that will be submitted, not the pre-transform cost.
219
- */
220
- function applyToolGradient(messages, maxRecentToolPairs, maxProseToolPairs) {
221
- let toolPairsSeen = 0;
222
- // Walk newest→oldest to assign tiers, transform in place (new objects)
223
- const result = [...messages];
224
- for (let i = result.length - 1; i >= 0; i--) {
225
- const msg = result[i];
226
- const hasToolContent = (msg.toolCalls && msg.toolCalls.length > 0) ||
227
- (msg.toolResults && msg.toolResults.length > 0);
228
- if (!hasToolContent)
229
- continue;
230
- toolPairsSeen++;
231
- if (toolPairsSeen <= maxRecentToolPairs) {
232
- // Tier 1: verbatim — no change
233
- continue;
234
- }
235
- else if (toolPairsSeen <= maxRecentToolPairs + maxProseToolPairs) {
236
- // Tier 2: prose stub
237
- const prose = extractToolProseSummary(msg);
238
- result[i] = {
239
- ...msg,
240
- textContent: prose || msg.textContent, // prose replaces payload; fallback to existing text
241
- toolCalls: null,
242
- toolResults: null,
243
- };
244
- }
245
- else {
246
- // Tier 3: drop tool payload entirely, preserve text
247
- result[i] = {
248
- ...msg,
249
- toolCalls: null,
250
- toolResults: null,
251
- };
252
- }
253
- }
254
- return result;
255
- }
256
- export class Compositor {
257
- config;
258
- redis;
259
- vectorStore;
260
- libraryDb;
261
- triggerRegistry;
262
- constructor(deps, config) {
263
- // Accept either old-style (RedisLayer) or new-style (CompositorDeps)
264
- if (deps instanceof RedisLayer) {
265
- console.warn('[compositor] DEPRECATED: Compositor(RedisLayer) constructor is deprecated. Pass CompositorDeps instead. Vector search and library DB are disabled in legacy mode.');
266
- this.redis = deps;
267
- this.vectorStore = null;
268
- this.libraryDb = null;
269
- this.triggerRegistry = DEFAULT_TRIGGERS;
270
- }
271
- else {
272
- this.redis = deps.redis;
273
- this.vectorStore = deps.vectorStore || null;
274
- this.libraryDb = deps.libraryDb || null;
275
- this.triggerRegistry = deps.triggerRegistry || DEFAULT_TRIGGERS;
276
- }
277
- this.config = { ...DEFAULT_CONFIG, ...config };
278
- }
279
- /**
280
- * Set or replace the vector store after construction.
281
- * Called by HyperMem.create() once sqlite-vec is confirmed available.
282
- */
283
- setVectorStore(vs) {
284
- this.vectorStore = vs;
285
- }
286
- /**
287
- * Compose a complete message array for sending to an LLM.
288
- *
289
- * Orchestrates all four memory layers:
290
- * 1. System prompt + identity (never truncated)
291
- * 2. Conversation history (L1 Redis → L2 messages.db)
292
- * 3. Active facts from library (L4)
293
- * 4. Knowledge entries relevant to conversation (L4)
294
- * 5. User preferences (L4)
295
- * 6. Semantic recall via vector search (L3)
296
- * 7. Cross-session context (L2)
297
- *
298
- * Each slot respects the remaining token budget.
299
- */
300
- async compose(request, db, libraryDb) {
301
- const store = new MessageStore(db);
302
- const libDb = libraryDb || this.libraryDb;
303
- const budget = request.tokenBudget || this.config.defaultTokenBudget;
304
- let remaining = budget;
305
- const warnings = [];
306
- const slots = {
307
- system: 0,
308
- identity: 0,
309
- history: 0,
310
- facts: 0,
311
- context: 0,
312
- library: 0,
313
- };
314
- const messages = [];
315
- // ─── System Prompt (never truncated) ───────────────────────
316
- const systemContent = await this.getSlotContent(request.agentId, request.sessionKey, 'system', db);
317
- if (systemContent) {
318
- const tokens = estimateTokens(systemContent);
319
- messages.push({
320
- role: 'system',
321
- textContent: systemContent,
322
- toolCalls: null,
323
- toolResults: null,
324
- });
325
- slots.system = tokens;
326
- remaining -= tokens;
327
- }
328
- // ─── Identity (never truncated) ────────────────────────────
329
- const identityContent = await this.getSlotContent(request.agentId, request.sessionKey, 'identity', db);
330
- if (identityContent) {
331
- const tokens = estimateTokens(identityContent);
332
- messages.push({
333
- role: 'system',
334
- textContent: identityContent,
335
- toolCalls: null,
336
- toolResults: null,
337
- });
338
- slots.identity = tokens;
339
- remaining -= tokens;
340
- }
341
- // ─── Conversation History ──────────────────────────────────
342
- if (request.includeHistory !== false) {
343
- const rawHistoryMessages = await this.getHistory(request.agentId, request.sessionKey, request.historyDepth || this.config.maxHistoryMessages, store);
344
- // Deduplicate history by StoredMessage.id (second line of defense after
345
- // pushHistory() tail-check dedup). Guards against any duplicates that
346
- // slipped through the warm path — e.g. bootstrap re-runs on existing sessions.
347
- const seenIds = new Set();
348
- const historyMessages = rawHistoryMessages.filter(m => {
349
- const sm = m;
350
- if (sm.id != null) {
351
- if (seenIds.has(sm.id))
352
- return false;
353
- seenIds.add(sm.id);
354
- }
355
- return true;
356
- });
357
- // ── Transform-first: apply gradient tool treatment BEFORE budget math ──
358
- // All tool payloads are in their final form before any token estimation.
359
- // This ensures estimateMessageTokens() measures actual submission cost,
360
- // not pre-transform cost (which caused overflow: dense tool JSON was
361
- // undercounted at length/4 when it should be measured post-stub).
362
- const transformedHistory = applyToolGradient(historyMessages, this.config.maxRecentToolPairs ?? 3, this.config.maxProseToolPairs ?? 10);
363
- // ── Budget-fit: walk newest→oldest, drop until it fits ──────────────
364
- // No transformation happens here — only include/exclude decisions.
365
- let historyTokens = 0;
366
- const includedHistory = [];
367
- for (let i = transformedHistory.length - 1; i >= 0; i--) {
368
- const msg = transformedHistory[i];
369
- const msgTokens = estimateMessageTokens(msg);
370
- if (historyTokens + msgTokens > remaining) {
371
- warnings.push(`History truncated at message ${i + 1}/${historyMessages.length}`);
372
- break;
373
- }
374
- includedHistory.unshift(msg);
375
- historyTokens += msgTokens;
376
- }
377
- messages.push(...includedHistory);
378
- slots.history = historyTokens;
379
- remaining -= historyTokens;
380
- // T1.3: Ghost message suppression.
381
- // If the last message in the included history is a warm-seeded user message
382
- // AND there's a subsequent message in SQLite that wasn't included (meaning
383
- // the assistant already responded), drop it. This prevents the model from
384
- // re-answering a question that was already handled in a prior session.
385
- // Only triggers when: (1) message has _warmed flag, (2) it's role=user,
386
- // (3) SQLite has messages after it (the response exists but wasn't included).
387
- const lastIncluded = messages[messages.length - 1];
388
- if (lastIncluded?.role === 'user') {
389
- const sm = lastIncluded;
390
- const meta = sm.metadata;
391
- if (meta?._warmed && sm.id != null) {
392
- // Check if there are any messages after this one in SQLite
393
- try {
394
- const hasMore = db.prepare('SELECT 1 FROM messages WHERE conversation_id = (SELECT conversation_id FROM messages WHERE id = ?) AND id > ? LIMIT 1').get(sm.id, sm.id);
395
- if (hasMore) {
396
- messages.pop();
397
- warnings.push('Dropped trailing warm-seeded user message with existing response (ghost suppression)');
398
- }
399
- }
400
- catch {
401
- // Ghost check is best-effort — don't block compose
402
- }
403
- }
404
- }
405
- }
406
- // ─── Injected Context Block ────────────────────────────────
407
- // Facts, knowledge, preferences, semantic recall, and cross-session
408
- // context are assembled into a single system message injected before
409
- // conversation history (after system/identity).
410
- const contextParts = [];
411
- let contextTokens = 0;
412
- // ── Facts (L4: Library) ──────────────────────────────────
413
- if (request.includeFacts !== false && remaining > 500) {
414
- const factsContent = this.buildFactsFromDb(request.agentId, libDb || db);
415
- if (factsContent) {
416
- const tokens = estimateTokens(factsContent);
417
- if (tokens <= remaining * 0.3) { // Cap facts at 30% of remaining
418
- contextParts.push(`## Active Facts\n${factsContent}`);
419
- contextTokens += tokens;
420
- remaining -= tokens;
421
- slots.facts = tokens;
422
- }
423
- else {
424
- // Truncate to budget
425
- const truncated = this.truncateToTokens(factsContent, Math.floor(remaining * 0.3));
426
- const truncTokens = estimateTokens(truncated);
427
- contextParts.push(`## Active Facts (truncated)\n${truncated}`);
428
- contextTokens += truncTokens;
429
- remaining -= truncTokens;
430
- slots.facts = truncTokens;
431
- warnings.push('Facts truncated to fit budget');
432
- }
433
- }
434
- }
435
- // ── Knowledge (L4: Library) ──────────────────────────────
436
- if (request.includeLibrary !== false && remaining > 500 && libDb) {
437
- const knowledgeContent = this.buildKnowledgeFromDb(request.agentId, libDb);
438
- if (knowledgeContent) {
439
- const tokens = estimateTokens(knowledgeContent);
440
- if (tokens <= remaining * 0.2) { // Cap knowledge at 20% of remaining
441
- contextParts.push(`## Knowledge\n${knowledgeContent}`);
442
- contextTokens += tokens;
443
- remaining -= tokens;
444
- slots.library += tokens;
445
- }
446
- else {
447
- const truncated = this.truncateToTokens(knowledgeContent, Math.floor(remaining * 0.2));
448
- const truncTokens = estimateTokens(truncated);
449
- contextParts.push(`## Knowledge (truncated)\n${truncated}`);
450
- contextTokens += truncTokens;
451
- remaining -= truncTokens;
452
- slots.library += truncTokens;
453
- warnings.push('Knowledge truncated to fit budget');
454
- }
455
- }
456
- }
457
- // ── Preferences (L4: Library) ────────────────────────────
458
- if (request.includeLibrary !== false && remaining > 300 && libDb) {
459
- const prefsContent = this.buildPreferencesFromDb(request.agentId, libDb);
460
- if (prefsContent) {
461
- const tokens = estimateTokens(prefsContent);
462
- if (tokens <= remaining * 0.1) { // Cap preferences at 10% of remaining
463
- contextParts.push(`## User Preferences\n${prefsContent}`);
464
- contextTokens += tokens;
465
- remaining -= tokens;
466
- slots.library += tokens;
467
- }
468
- }
469
- }
470
- // ── Semantic Recall (L3: Hybrid FTS5+KNN) ───────────────
471
- // Fires when either vector store or library DB is available.
472
- // FTS5-only (no embeddings) still returns keyword matches.
473
- // KNN-only (no FTS terms) still returns semantic matches.
474
- // Both present → Reciprocal Rank Fusion.
475
- // Use request.prompt as the retrieval query when available — it is the
476
- // live current-turn text. Falling back to getLastUserMessage(messages)
477
- // reads from the already-assembled history, which is one turn stale.
478
- if (remaining > 500 && (this.vectorStore || libDb)) {
479
- const lastUserMsg = request.prompt?.trim() || this.getLastUserMessage(messages);
480
- if (lastUserMsg) {
481
- try {
482
- const semanticContent = await this.buildSemanticRecall(lastUserMsg, request.agentId, Math.floor(remaining * 0.15), // Cap at 15% of remaining
483
- libDb || undefined);
484
- if (semanticContent) {
485
- const tokens = estimateTokens(semanticContent);
486
- contextParts.push(`## Related Memory\n${semanticContent}`);
487
- contextTokens += tokens;
488
- remaining -= tokens;
489
- // Semantic recall draws from multiple sources, attribute to context
490
- slots.context += tokens;
491
- }
492
- }
493
- catch (err) {
494
- // Semantic search is best-effort — don't fail composition
495
- warnings.push(`Semantic recall failed: ${err.message}`);
496
- }
497
- }
498
- }
499
- // ── Doc Chunks (L4: Trigger-based retrieval) ─────────────
500
- // Demand-load governance, identity, and memory chunks based on
501
- // conversation context. Replaces full ACA file injection for
502
- // the files that have been seeded into the doc chunk index.
503
- if (request.includeDocChunks !== false && remaining > 400 && libDb) {
504
- // Use request.prompt when available (current-turn text, not stale history)
505
- const lastMsg = request.prompt?.trim() || this.getLastUserMessage(messages) || '';
506
- const triggered = matchTriggers(lastMsg, this.triggerRegistry);
507
- if (triggered.length > 0) {
508
- const docChunkStore = new DocChunkStore(libDb);
509
- const docParts = [];
510
- for (const trigger of triggered) {
511
- if (remaining < 200)
512
- break;
513
- const maxTokens = Math.min(trigger.maxTokens || 1000, Math.floor(remaining * 0.15) // No single collection takes > 15% of remaining
514
- );
515
- try {
516
- // Build a relevance-based FTS5 query from the user message.
517
- //
518
- // Problem: trigger keywords are stems ('escalat', 'irreversib') for
519
- // substring matching against user messages, but FTS5 tokenizes on word
520
- // boundaries. 'escalat' does not match 'escalation' in FTS5 without a
521
- // prefix operator.
522
- //
523
- // Solution: extract actual words from the user message that contain a
524
- // matched trigger keyword, then use FTS5 prefix queries (word*) for
525
- // each extracted word. This bridges stem-matching and FTS5 indexing.
526
- const msgLower = lastMsg.toLowerCase();
527
- const matchedKeywords = trigger.keywords.filter(kw => msgLower.includes(kw.toLowerCase()));
528
- // Extract whole words from the message that overlap with matched keywords
529
- const msgWords = lastMsg.match(/\b\w{4,}\b/g) || [];
530
- const relevantWords = msgWords.filter(word => matchedKeywords.some(kw => word.toLowerCase().includes(kw.toLowerCase()) ||
531
- kw.toLowerCase().includes(word.toLowerCase().slice(0, 5))));
532
- // Build FTS5 OR query: "word1* OR word2* OR word3*"
533
- // FTS5 treats space-separated terms as AND by default — we want OR so
534
- // that any relevant term is sufficient to retrieve a matching chunk.
535
- // Prefix operator (*) ensures stems match full words in the index.
536
- // Sort by keyword match specificity (longer matched keyword = more specific term),
537
- // then cap at 6 terms to keep FTS queries reasonable.
538
- // No positional slice — all relevant words participate, not just the first 3.
539
- const sortedWords = [...new Set(relevantWords)].sort((a, b) => {
540
- const aLen = Math.max(...matchedKeywords.filter(kw => a.toLowerCase().includes(kw.toLowerCase()) || kw.toLowerCase().includes(a.toLowerCase().slice(0, 5))).map(kw => kw.length), 0);
541
- const bLen = Math.max(...matchedKeywords.filter(kw => b.toLowerCase().includes(kw.toLowerCase()) || kw.toLowerCase().includes(b.toLowerCase().slice(0, 5))).map(kw => kw.length), 0);
542
- return bLen - aLen; // Most specific match first
543
- });
544
- const ftsTerms = sortedWords.length > 0
545
- ? sortedWords.slice(0, 6).map(w => `${w}*`).join(' OR ')
546
- : matchedKeywords
547
- .sort((a, b) => b.length - a.length)
548
- .slice(0, 3)
549
- .map(kw => `${kw}*`)
550
- .join(' OR ');
551
- const ftsKeyword = ftsTerms || lastMsg.split(/\s+/).slice(0, 3).join(' ');
552
- const chunks = docChunkStore.queryChunks({
553
- collection: trigger.collection,
554
- agentId: request.agentId,
555
- tier: request.tier,
556
- limit: trigger.maxChunks || 3,
557
- keyword: ftsKeyword,
558
- });
559
- if (chunks.length === 0)
560
- continue;
561
- const chunkLines = [];
562
- let chunkTokens = 0;
563
- for (const chunk of chunks) {
564
- if (chunkTokens + chunk.tokenEstimate > maxTokens)
565
- break;
566
- chunkLines.push(`### ${chunk.sectionPath}\n${chunk.content}`);
567
- chunkTokens += chunk.tokenEstimate;
568
- }
569
- if (chunkLines.length > 0) {
570
- const collectionLabel = trigger.collection.split('/').pop() || trigger.collection;
571
- docParts.push(`## ${collectionLabel} (retrieved)\n${chunkLines.join('\n\n')}`);
572
- contextTokens += chunkTokens;
573
- remaining -= chunkTokens;
574
- slots.library += chunkTokens;
575
- }
576
- }
577
- catch {
578
- // Doc chunk retrieval is best-effort — don't fail composition
579
- }
580
- }
581
- if (docParts.length > 0) {
582
- contextParts.push(docParts.join('\n\n'));
583
- }
584
- }
585
- }
586
- // ── Cross-Session Context (L2: Messages) ─────────────────
587
- if (request.includeContext !== false && remaining > 500) {
588
- const crossSessionContent = this.buildCrossSessionContext(request.agentId, request.sessionKey, db, libDb);
589
- if (crossSessionContent) {
590
- const tokens = estimateTokens(crossSessionContent);
591
- const maxContextTokens = Math.min(this.config.maxCrossSessionContext, Math.floor(remaining * 0.2));
592
- if (tokens <= maxContextTokens) {
593
- contextParts.push(`## Other Active Sessions\n${crossSessionContent}`);
594
- contextTokens += tokens;
595
- remaining -= tokens;
596
- slots.context += tokens;
597
- }
598
- else {
599
- const truncated = this.truncateToTokens(crossSessionContent, maxContextTokens);
600
- const truncTokens = estimateTokens(truncated);
601
- contextParts.push(`## Other Active Sessions (truncated)\n${truncated}`);
602
- contextTokens += truncTokens;
603
- remaining -= truncTokens;
604
- slots.context += truncTokens;
605
- warnings.push('Cross-session context truncated');
606
- }
607
- }
608
- }
609
- // ── Inject assembled context block ──────────────────────
610
- const assembledContextBlock = contextParts.length > 0 ? contextParts.join('\n\n') : undefined;
611
- if (assembledContextBlock) {
612
- const contextMsg = {
613
- role: 'system',
614
- textContent: assembledContextBlock,
615
- toolCalls: null,
616
- toolResults: null,
617
- // DYNAMIC_BOUNDARY: this slot is session-specific (facts, recall, episodes).
618
- // It must NOT be included in any prompt caching boundary that spans static content.
619
- // The provider translator will insert a cache_control ephemeral marker BEFORE
620
- // this message so providers can cache everything up to identity/system as static context.
621
- metadata: { dynamicBoundary: true },
622
- };
623
- // Insert after system/identity, before history
624
- // Insert context after all system/identity messages, before conversation history.
625
- // findIndex returns -1 when all messages are system-role — handle explicitly.
626
- const firstNonSystem = messages.findIndex(m => m.role !== 'system');
627
- const insertIdx = firstNonSystem === -1 ? messages.length : firstNonSystem;
628
- messages.splice(insertIdx, 0, contextMsg);
629
- }
630
- // ─── Safety Valve: Post-Assembly Budget Check ───────────────────
631
- // Re-estimate total tokens after all slots are assembled. If the
632
- // composition exceeds tokenBudget * 1.05 (5% tolerance for estimation
633
- // drift), trim history messages from the oldest until we're under budget.
634
- // History is the most compressible slot — system/identity are never
635
- // truncated, and context (facts/recall/episodes) is more valuable per-token.
636
- const estimatedTotal = messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
637
- const hardCeiling = Math.floor(budget * 1.05);
638
- if (estimatedTotal > hardCeiling) {
639
- const overage = estimatedTotal - budget;
640
- let trimmed = 0;
641
- let trimCount = 0;
642
- // Find history messages (non-system, after system/identity block)
643
- // Walk forward from the first non-system message, trimming oldest history first
644
- const firstNonSystemIdx = messages.findIndex(m => m.role !== 'system');
645
- if (firstNonSystemIdx >= 0) {
646
- let i = firstNonSystemIdx;
647
- while (i < messages.length && trimmed < overage) {
648
- // Don't trim the last user message (current prompt)
649
- if (i === messages.length - 1 && messages[i].role === 'user')
650
- break;
651
- const msgTokens = estimateMessageTokens(messages[i]);
652
- messages.splice(i, 1);
653
- trimmed += msgTokens;
654
- trimCount++;
655
- // Don't increment i — splice shifts everything down
656
- }
657
- }
658
- if (trimCount > 0) {
659
- slots.history = Math.max(0, slots.history - trimmed);
660
- remaining += trimmed;
661
- warnings.push(`Safety valve: trimmed ${trimCount} oldest history messages (${trimmed} tokens) to fit budget`);
662
- }
663
- }
664
- // ─── Translate to provider format (unless caller wants neutral) ───
665
- // When skipProviderTranslation is set, return NeutralMessages directly.
666
- // The context engine plugin uses this: the OpenClaw runtime handles its
667
- // own provider translation, so double-translating corrupts tool calls.
668
- const outputMessages = request.skipProviderTranslation
669
- ? messages
670
- : toProviderFormat(messages, request.provider ?? request.model ?? null);
671
- // T1.3: Strip warm-replay provenance flags before output.
672
- // _warmed is an internal tag added by warmSession() to mark messages
673
- // seeded from SQLite into Redis. It must not leak into provider submissions
674
- // or be visible to the runtime (which might misinterpret it).
675
- for (const msg of outputMessages) {
676
- const m = msg;
677
- if (m.metadata && m.metadata._warmed) {
678
- const { _warmed, ...cleanMeta } = m.metadata;
679
- m.metadata = Object.keys(cleanMeta).length > 0 ? cleanMeta : undefined;
680
- }
681
- }
682
- const totalTokens = budget - remaining;
683
- // ─── Write Window Cache ───────────────────────────────────
684
- // Cache the composed message array so the plugin can serve it directly
685
- // on the next assemble() call without re-running the full compose pipeline.
686
- // Short TTL (120s) — invalidated by afterTurn when new messages arrive.
687
- try {
688
- await this.redis.setWindow(request.agentId, request.sessionKey, messages, 120);
689
- }
690
- catch {
691
- // Window cache write is best-effort
692
- }
693
- // ─── Write Session Cursor ─────────────────────────────────
694
- // Record the newest message included in the submission window.
695
- // Background indexer uses this to find unprocessed high-signal content.
696
- if (request.includeHistory !== false && slots.history > 0) {
697
- try {
698
- const historyMsgs = messages.filter(m => m.role !== 'system');
699
- const lastHistoryMsg = historyMsgs.length > 0 ? historyMsgs[historyMsgs.length - 1] : null;
700
- if (lastHistoryMsg) {
701
- const sm = lastHistoryMsg;
702
- if (sm.id != null && sm.messageIndex != null) {
703
- const cursor = {
704
- lastSentId: sm.id,
705
- lastSentIndex: sm.messageIndex,
706
- lastSentAt: new Date().toISOString(),
707
- windowSize: historyMsgs.length,
708
- tokenCount: totalTokens,
709
- };
710
- await this.redis.setCursor(request.agentId, request.sessionKey, cursor);
711
- // Dual-write cursor to SQLite for durability across Redis eviction (P1.3)
712
- try {
713
- db.prepare(`
714
- UPDATE conversations
715
- SET cursor_last_sent_id = ?,
716
- cursor_last_sent_index = ?,
717
- cursor_last_sent_at = ?,
718
- cursor_window_size = ?,
719
- cursor_token_count = ?
720
- WHERE session_key = ?
721
- `).run(cursor.lastSentId, cursor.lastSentIndex, cursor.lastSentAt, cursor.windowSize, cursor.tokenCount, request.sessionKey);
722
- }
723
- catch {
724
- // SQLite cursor write is best-effort — don't block compose
725
- }
726
- }
727
- }
728
- }
729
- catch {
730
- // Cursor write is best-effort
731
- }
732
- }
733
- // ─── Compaction Fence Update ──────────────────────────────
734
- // Record the oldest message ID that the LLM can see in this compose
735
- // cycle. Everything below this ID becomes eligible for compaction.
736
- // If history was included, query the DB for the oldest included message.
737
- if (request.includeHistory !== false && slots.history > 0) {
738
- try {
739
- const conversation = store.getConversation(request.sessionKey);
740
- if (conversation) {
741
- // The compositor included N history messages (after truncation).
742
- // Count how many non-system messages are in the output to determine
743
- // how far back we reached.
744
- const historyMsgCount = messages.filter(m => m.role !== 'system').length;
745
- if (historyMsgCount > 0) {
746
- // Get the oldest message we would have included.
747
- // getRecentMessages returns the last N in chronological order,
748
- // so the first element is the oldest included.
749
- const oldestIncluded = db.prepare(`
750
- SELECT id FROM messages
751
- WHERE conversation_id = ?
752
- ORDER BY message_index DESC
753
- LIMIT 1 OFFSET ?
754
- `).get(conversation.id, historyMsgCount - 1);
755
- if (oldestIncluded) {
756
- ensureCompactionFenceSchema(db);
757
- updateCompactionFence(db, conversation.id, oldestIncluded.id);
758
- }
759
- }
760
- }
761
- }
762
- catch {
763
- // Fence update is best-effort — never fail composition
764
- warnings.push('Compaction fence update failed (non-fatal)');
765
- }
766
- }
767
- return {
768
- messages: outputMessages,
769
- tokenCount: totalTokens,
770
- slots,
771
- truncated: remaining < 0 || estimatedTotal > hardCeiling,
772
- hasWarnings: warnings.length > 0,
773
- warnings,
774
- contextBlock: assembledContextBlock,
775
- };
776
- }
777
- /**
778
- * Warm a session from SQLite into Redis.
779
- * Called on session start or Redis cache miss.
780
- */
781
- async warmSession(agentId, sessionKey, db, opts) {
782
- const store = new MessageStore(db);
783
- const conversation = store.getConversation(sessionKey);
784
- if (!conversation)
785
- return;
786
- // Fetch a generous pool from SQLite, apply gradient transform, then
787
- // token-budget-cap the warm set. This replaces the old WARM_BOOTSTRAP_CAP
788
- // message-count constant which was a blunt instrument — 100 messages of
789
- // large tool results can massively exceed the history budget allocation.
790
- const warmBudget = Math.floor((this.config.defaultTokenBudget) * (this.config.warmHistoryBudgetFraction ?? 0.4));
791
- const rawHistory = store.getRecentMessages(conversation.id, this.config.maxHistoryMessages);
792
- const transformedForWarm = applyToolGradient(rawHistory, this.config.maxRecentToolPairs ?? 3, this.config.maxProseToolPairs ?? 10);
793
- // Walk newest→oldest, accumulate transformed token cost, stop when budget exhausted
794
- let warmTokens = 0;
795
- const history = [];
796
- for (let i = transformedForWarm.length - 1; i >= 0; i--) {
797
- const cost = estimateMessageTokens(transformedForWarm[i]);
798
- if (warmTokens + cost > warmBudget)
799
- break;
800
- // T1.3 Provenance flag: tag warm-seeded messages so they can be identified
801
- // downstream. The flag is stripped before provider submission in compose().
802
- // This prevents the runtime from treating warm-replayed user messages as
803
- // new inbound queries (ghost message bug).
804
- const tagged = { ...transformedForWarm[i] };
805
- tagged.metadata = { ...(tagged.metadata || {}), _warmed: true };
806
- history.unshift(tagged);
807
- warmTokens += cost;
808
- }
809
- const libDb = opts?.libraryDb || this.libraryDb;
810
- // Note: facts and context are intentionally NOT cached here.
811
- // compose() calls buildFactsFromDb() and buildCrossSessionContext() directly
812
- // from SQLite on every turn (~0.3ms each) — faster than a Redis GET round-trip.
813
- // Caching them here would create stale entries that compose() ignores anyway.
814
- await this.redis.warmSession(agentId, sessionKey, {
815
- system: opts?.systemPrompt,
816
- identity: opts?.identity,
817
- history,
818
- meta: {
819
- agentId,
820
- sessionKey,
821
- provider: conversation.provider,
822
- model: conversation.model,
823
- channelType: conversation.channelType,
824
- tokenCount: conversation.tokenCountIn + conversation.tokenCountOut,
825
- lastActive: conversation.updatedAt,
826
- status: conversation.status,
827
- },
828
- });
829
- }
830
- // ─── Slot Content Resolution ─────────────────────────────────
831
- /**
832
- * Get slot content: try Redis first, fall back to SQLite.
833
- */
834
- async getSlotContent(agentId, sessionKey, slot, db, libraryDb) {
835
- const cached = await this.redis.getSlot(agentId, sessionKey, slot);
836
- if (cached)
837
- return cached;
838
- switch (slot) {
839
- case 'facts':
840
- return this.buildFactsFromDb(agentId, libraryDb || this.libraryDb || db);
841
- case 'context':
842
- return this.buildCrossSessionContext(agentId, sessionKey, db, libraryDb || this.libraryDb);
843
- default:
844
- return null;
845
- }
846
- }
847
- /**
848
- * Get conversation history: try Redis first, fall back to SQLite.
849
- */
850
- async getHistory(agentId, sessionKey, limit, store) {
851
- // Pass limit through to Redis — this is the correct enforcement point.
852
- // Previously getHistory() ignored the limit on the Redis path (LRANGE 0 -1),
853
- // meaning historyDepth in the compose request had no effect on hot sessions.
854
- const cached = await this.redis.getHistory(agentId, sessionKey, limit);
855
- if (cached.length > 0)
856
- return cached;
857
- const conversation = store.getConversation(sessionKey);
858
- if (!conversation)
859
- return [];
860
- return store.getRecentMessages(conversation.id, limit);
861
- }
862
- // ─── L4 Library Builders ─────────────────────────────────────
863
- /**
864
- * Build facts content from library DB.
865
- */
866
- buildFactsFromDb(agentId, db) {
867
- if (!db)
868
- return null;
869
- const tableExists = db.prepare("SELECT count(*) as cnt FROM sqlite_master WHERE type='table' AND name='facts'").get();
870
- if (!tableExists || tableExists.cnt === 0)
871
- return null;
872
- const rows = db.prepare(`
873
- SELECT content, domain, confidence FROM facts
874
- WHERE agent_id = ?
875
- AND superseded_by IS NULL
876
- AND (expires_at IS NULL OR expires_at > datetime('now'))
877
- AND decay_score < 0.8
878
- AND confidence >= 0.5
879
- ORDER BY confidence DESC, decay_score ASC
880
- LIMIT ?
881
- `).all(agentId, this.config.maxFacts);
882
- if (rows.length === 0)
883
- return null;
884
- return rows
885
- .map(r => `- [${r.domain || 'general'}] ${r.content}`)
886
- .join('\n');
887
- }
888
- /**
889
- * Build knowledge content from library DB.
890
- * Prioritizes high-confidence, non-superseded entries.
891
- */
892
- buildKnowledgeFromDb(agentId, db) {
893
- const tableExists = db.prepare("SELECT count(*) as cnt FROM sqlite_master WHERE type='table' AND name='knowledge'").get();
894
- if (!tableExists || tableExists.cnt === 0)
895
- return null;
896
- const rows = db.prepare(`
897
- SELECT domain, key, content, confidence FROM knowledge
898
- WHERE agent_id = ?
899
- AND superseded_by IS NULL
900
- AND (expires_at IS NULL OR expires_at > datetime('now'))
901
- ORDER BY confidence DESC, updated_at DESC
902
- LIMIT 15
903
- `).all(agentId);
904
- if (rows.length === 0)
905
- return null;
906
- // Group by domain for cleaner presentation
907
- const byDomain = {};
908
- for (const row of rows) {
909
- if (!byDomain[row.domain])
910
- byDomain[row.domain] = [];
911
- byDomain[row.domain].push({ key: row.key, content: row.content });
912
- }
913
- const lines = [];
914
- for (const [domain, entries] of Object.entries(byDomain)) {
915
- lines.push(`### ${domain}`);
916
- for (const entry of entries) {
917
- lines.push(`- **${entry.key}:** ${entry.content}`);
918
- }
919
- }
920
- return lines.join('\n');
921
- }
922
- /**
923
- * Build preferences content from library DB.
924
- * Shows user/operator preferences relevant to this agent.
925
- */
926
- buildPreferencesFromDb(agentId, db) {
927
- const tableExists = db.prepare("SELECT count(*) as cnt FROM sqlite_master WHERE type='table' AND name='preferences'").get();
928
- if (!tableExists || tableExists.cnt === 0)
929
- return null;
930
- // Get preferences set by this agent or marked fleet-visible
931
- const rows = db.prepare(`
932
- SELECT subject, key, value, domain, confidence FROM preferences
933
- WHERE (agent_id = ? OR agent_id IS NULL)
934
- ORDER BY confidence DESC, updated_at DESC
935
- LIMIT 10
936
- `).all(agentId);
937
- if (rows.length === 0)
938
- return null;
939
- // Group by subject
940
- const bySubject = {};
941
- for (const row of rows) {
942
- if (!bySubject[row.subject])
943
- bySubject[row.subject] = [];
944
- bySubject[row.subject].push({ key: row.key, value: row.value, domain: row.domain });
945
- }
946
- const lines = [];
947
- for (const [subject, prefs] of Object.entries(bySubject)) {
948
- lines.push(`### ${subject}`);
949
- for (const pref of prefs) {
950
- const domainTag = pref.domain ? ` [${pref.domain}]` : '';
951
- lines.push(`- **${pref.key}:**${domainTag} ${pref.value}`);
952
- }
953
- }
954
- return lines.join('\n');
955
- }
956
- // ─── L3 Hybrid Retrieval (FTS5 + KNN) ───────────────────────
957
- /**
958
- * Build semantic recall content using hybrid FTS5+KNN retrieval.
959
- *
960
- * Uses Reciprocal Rank Fusion to merge keyword and vector results.
961
- * Gracefully degrades: FTS5-only when no vector store, KNN-only
962
- * when FTS query is empty (all stop words), both when available.
963
- */
964
- async buildSemanticRecall(userMessage, agentId, maxTokens, libraryDb) {
965
- const libDb = libraryDb || this.libraryDb;
966
- if (!libDb && !this.vectorStore)
967
- return null;
968
- // Use hybrid search when library DB is available
969
- if (libDb) {
970
- const results = await hybridSearch(libDb, this.vectorStore, userMessage, {
971
- tables: ['facts', 'knowledge', 'episodes'],
972
- limit: 10,
973
- agentId,
974
- maxKnnDistance: 1.2,
975
- });
976
- if (results.length === 0)
977
- return null;
978
- const lines = [];
979
- let tokens = 0;
980
- for (const result of results) {
981
- // TUNE-001: drop very-low-relevance results (RRF scores below 0.008 are noise)
982
- if (result.score < 0.008)
983
- continue;
984
- const label = this.formatHybridResult(result);
985
- const lineTokens = estimateTokens(label);
986
- if (tokens + lineTokens > maxTokens)
987
- break;
988
- lines.push(label);
989
- tokens += lineTokens;
990
- }
991
- return lines.length > 0 ? lines.join('\n') : null;
992
- }
993
- // Fallback: KNN-only when no library DB (legacy path)
994
- if (!this.vectorStore)
995
- return null;
996
- const results = await this.vectorStore.search(userMessage, {
997
- tables: ['facts', 'knowledge', 'episodes'],
998
- limit: 8,
999
- maxDistance: 1.2,
1000
- });
1001
- if (results.length === 0)
1002
- return null;
1003
- const lines = [];
1004
- let tokens = 0;
1005
- for (const result of results) {
1006
- const label = this.formatVectorResult(result);
1007
- const lineTokens = estimateTokens(label);
1008
- if (tokens + lineTokens > maxTokens)
1009
- break;
1010
- lines.push(label);
1011
- tokens += lineTokens;
1012
- }
1013
- return lines.length > 0 ? lines.join('\n') : null;
1014
- }
1015
- /**
1016
- * Format a hybrid search result for injection into context.
1017
- * Shows retrieval source(s) and relevance score.
1018
- */
1019
- formatHybridResult(result) {
1020
- const type = result.sourceTable;
1021
- const sourceTag = result.sources.length === 2 ? 'fts+knn' : result.sources[0];
1022
- const scoreStr = (result.score * 100).toFixed(0);
1023
- switch (type) {
1024
- case 'facts':
1025
- return `- [fact, ${sourceTag}, score:${scoreStr}] ${result.content}`;
1026
- case 'knowledge':
1027
- return `- [knowledge/${result.metadata || 'general'}, ${sourceTag}, score:${scoreStr}] ${result.content}`;
1028
- case 'episodes':
1029
- return `- [episode/${result.domain || 'event'}, ${sourceTag}, score:${scoreStr}] ${result.content}`;
1030
- default:
1031
- return `- [${type}, ${sourceTag}, score:${scoreStr}] ${result.content}`;
1032
- }
1033
- }
1034
- /**
1035
- * Format a vector-only search result (legacy fallback).
1036
- */
1037
- formatVectorResult(result) {
1038
- const relevance = Math.max(0, Math.round((1 - result.distance) * 100));
1039
- const type = result.sourceTable;
1040
- switch (type) {
1041
- case 'facts':
1042
- return `- [fact, ${relevance}% relevant] ${result.content}`;
1043
- case 'knowledge':
1044
- return `- [knowledge/${result.metadata || 'general'}, ${relevance}% relevant] ${result.content}`;
1045
- case 'episodes':
1046
- return `- [episode/${result.domain || 'event'}, ${relevance}% relevant] ${result.content}`;
1047
- default:
1048
- return `- [${type}, ${relevance}% relevant] ${result.content}`;
1049
- }
1050
- }
1051
- // ─── L2 Cross-Session Context ────────────────────────────────
1052
- /**
1053
- * Build cross-session context by finding recent activity
1054
- * in other sessions for this agent.
1055
- */
1056
- buildCrossSessionContext(agentId, currentSessionKey, db, _libraryDb) {
1057
- const conversation = db.prepare('SELECT id FROM conversations WHERE session_key = ?').get(currentSessionKey);
1058
- if (!conversation)
1059
- return null;
1060
- const rows = db.prepare(`
1061
- SELECT m.text_content, m.role, c.channel_type, m.created_at
1062
- FROM messages m
1063
- JOIN conversations c ON m.conversation_id = c.id
1064
- WHERE c.agent_id = ?
1065
- AND m.conversation_id != ?
1066
- AND c.status = 'active'
1067
- AND m.text_content IS NOT NULL
1068
- AND m.is_heartbeat = 0
1069
- ORDER BY m.created_at DESC
1070
- LIMIT 10
1071
- `).all(agentId, conversation.id);
1072
- if (rows.length === 0)
1073
- return null;
1074
- const lines = rows.map(r => {
1075
- const preview = r.text_content.substring(0, 200);
1076
- return `- [${r.channel_type}/${r.role} @ ${r.created_at}] ${preview}`;
1077
- });
1078
- return lines.join('\n');
1079
- }
1080
- // ─── Utilities ───────────────────────────────────────────────
1081
- /**
1082
- * Extract the last user message text from the composed messages.
1083
- */
1084
- getLastUserMessage(messages) {
1085
- for (let i = messages.length - 1; i >= 0; i--) {
1086
- if (messages[i].role === 'user' && messages[i].textContent) {
1087
- return messages[i].textContent;
1088
- }
1089
- }
1090
- return null;
1091
- }
1092
- /**
1093
- * Truncate text to approximately fit within a token budget.
1094
- * Truncates at line boundaries when possible.
1095
- */
1096
- truncateToTokens(text, maxTokens) {
1097
- const maxChars = maxTokens * 4; // inverse of our estimation
1098
- if (text.length <= maxChars)
1099
- return text;
1100
- // Try to truncate at a line boundary
1101
- const truncated = text.substring(0, maxChars);
1102
- const lastNewline = truncated.lastIndexOf('\n');
1103
- if (lastNewline > maxChars * 0.7) {
1104
- return truncated.substring(0, lastNewline) + '\n…';
1105
- }
1106
- return truncated + '…';
1107
- }
1108
- }
1109
- //# sourceMappingURL=compositor.js.map