@psiclawops/hypermem 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/ARCHITECTURE.md +296 -0
  2. package/LICENSE +190 -0
  3. package/README.md +243 -0
  4. package/dist/background-indexer.d.ts +117 -0
  5. package/dist/background-indexer.d.ts.map +1 -0
  6. package/dist/background-indexer.js +732 -0
  7. package/dist/compaction-fence.d.ts +89 -0
  8. package/dist/compaction-fence.d.ts.map +1 -0
  9. package/dist/compaction-fence.js +153 -0
  10. package/dist/compositor.d.ts +139 -0
  11. package/dist/compositor.d.ts.map +1 -0
  12. package/dist/compositor.js +1109 -0
  13. package/dist/cross-agent.d.ts +57 -0
  14. package/dist/cross-agent.d.ts.map +1 -0
  15. package/dist/cross-agent.js +254 -0
  16. package/dist/db.d.ts +131 -0
  17. package/dist/db.d.ts.map +1 -0
  18. package/dist/db.js +398 -0
  19. package/dist/desired-state-store.d.ts +100 -0
  20. package/dist/desired-state-store.d.ts.map +1 -0
  21. package/dist/desired-state-store.js +212 -0
  22. package/dist/doc-chunk-store.d.ts +115 -0
  23. package/dist/doc-chunk-store.d.ts.map +1 -0
  24. package/dist/doc-chunk-store.js +278 -0
  25. package/dist/doc-chunker.d.ts +99 -0
  26. package/dist/doc-chunker.d.ts.map +1 -0
  27. package/dist/doc-chunker.js +324 -0
  28. package/dist/episode-store.d.ts +48 -0
  29. package/dist/episode-store.d.ts.map +1 -0
  30. package/dist/episode-store.js +135 -0
  31. package/dist/fact-store.d.ts +57 -0
  32. package/dist/fact-store.d.ts.map +1 -0
  33. package/dist/fact-store.js +175 -0
  34. package/dist/fleet-store.d.ts +144 -0
  35. package/dist/fleet-store.d.ts.map +1 -0
  36. package/dist/fleet-store.js +276 -0
  37. package/dist/hybrid-retrieval.d.ts +60 -0
  38. package/dist/hybrid-retrieval.d.ts.map +1 -0
  39. package/dist/hybrid-retrieval.js +340 -0
  40. package/dist/index.d.ts +611 -0
  41. package/dist/index.d.ts.map +1 -0
  42. package/dist/index.js +1042 -0
  43. package/dist/knowledge-graph.d.ts +110 -0
  44. package/dist/knowledge-graph.d.ts.map +1 -0
  45. package/dist/knowledge-graph.js +305 -0
  46. package/dist/knowledge-store.d.ts +72 -0
  47. package/dist/knowledge-store.d.ts.map +1 -0
  48. package/dist/knowledge-store.js +241 -0
  49. package/dist/library-schema.d.ts +22 -0
  50. package/dist/library-schema.d.ts.map +1 -0
  51. package/dist/library-schema.js +717 -0
  52. package/dist/message-store.d.ts +76 -0
  53. package/dist/message-store.d.ts.map +1 -0
  54. package/dist/message-store.js +273 -0
  55. package/dist/preference-store.d.ts +54 -0
  56. package/dist/preference-store.d.ts.map +1 -0
  57. package/dist/preference-store.js +109 -0
  58. package/dist/preservation-gate.d.ts +82 -0
  59. package/dist/preservation-gate.d.ts.map +1 -0
  60. package/dist/preservation-gate.js +150 -0
  61. package/dist/provider-translator.d.ts +40 -0
  62. package/dist/provider-translator.d.ts.map +1 -0
  63. package/dist/provider-translator.js +349 -0
  64. package/dist/rate-limiter.d.ts +76 -0
  65. package/dist/rate-limiter.d.ts.map +1 -0
  66. package/dist/rate-limiter.js +179 -0
  67. package/dist/redis.d.ts +188 -0
  68. package/dist/redis.d.ts.map +1 -0
  69. package/dist/redis.js +534 -0
  70. package/dist/schema.d.ts +15 -0
  71. package/dist/schema.d.ts.map +1 -0
  72. package/dist/schema.js +203 -0
  73. package/dist/secret-scanner.d.ts +51 -0
  74. package/dist/secret-scanner.d.ts.map +1 -0
  75. package/dist/secret-scanner.js +248 -0
  76. package/dist/seed.d.ts +108 -0
  77. package/dist/seed.d.ts.map +1 -0
  78. package/dist/seed.js +177 -0
  79. package/dist/system-store.d.ts +73 -0
  80. package/dist/system-store.d.ts.map +1 -0
  81. package/dist/system-store.js +182 -0
  82. package/dist/topic-store.d.ts +45 -0
  83. package/dist/topic-store.d.ts.map +1 -0
  84. package/dist/topic-store.js +136 -0
  85. package/dist/types.d.ts +329 -0
  86. package/dist/types.d.ts.map +1 -0
  87. package/dist/types.js +9 -0
  88. package/dist/vector-store.d.ts +132 -0
  89. package/dist/vector-store.d.ts.map +1 -0
  90. package/dist/vector-store.js +498 -0
  91. package/dist/work-store.d.ts +112 -0
  92. package/dist/work-store.d.ts.map +1 -0
  93. package/dist/work-store.js +273 -0
  94. package/package.json +57 -0
@@ -0,0 +1,732 @@
1
+ /**
2
+ * HyperMem Background Indexer
3
+ *
4
+ * Processes message history to extract structured knowledge:
5
+ * - Facts: atomic pieces of learned information
6
+ * - Episodes: significant events worth remembering
7
+ * - Topics: conversation threads and their lifecycle
8
+ * - Knowledge: durable structured entries (domain + key)
9
+ *
10
+ * Runs as a periodic background task, processing unindexed messages
11
+ * in batches. Each batch is scored, classified, and stored in L4 (library.db).
12
+ *
13
+ * Design principles:
14
+ * - No LLM dependency: extraction uses pattern matching + heuristics
15
+ * - Idempotent: tracks watermarks per agent to avoid reprocessing
16
+ * - Bounded: processes N messages per tick to avoid blocking
17
+ * - Observable: logs extraction stats for monitoring
18
+ */
19
+ import { MessageStore } from './message-store.js';
20
+ import { FactStore } from './fact-store.js';
21
+ import { EpisodeStore } from './episode-store.js';
22
+ import { TopicStore } from './topic-store.js';
23
+ import { KnowledgeStore } from './knowledge-store.js';
24
+ import { isSafeForSharedVisibility } from './secret-scanner.js';
25
+ function extractFactCandidates(content) {
26
+ const facts = [];
27
+ if (!content || content.length < 20)
28
+ return facts;
29
+ // Decision patterns: "decided to", "agreed on", "choosing", "going with" — high confidence (0.75)
30
+ const decisionPatterns = [
31
+ /(?:we |I |they )?(?:decided|agreed|chose|selected|committed) (?:to |on |that )(.{20,200})/gi,
32
+ /(?:going|went) with (.{10,150})/gi,
33
+ /decision:\s*(.{10,200})/gi,
34
+ ];
35
+ // Learned/discovered patterns — medium-high confidence (0.65)
36
+ const learnedPatterns = [
37
+ /(?:learned|discovered|found out|realized|noticed) (?:that |)(.{20,200})/gi,
38
+ /turns out (?:that |)(.{20,200})/gi,
39
+ /(?:TIL|FYI|note to self)[:\s]+(.{10,200})/gi,
40
+ ];
41
+ // Config/setting patterns — medium confidence (0.60); matches more promiscuously
42
+ const configPatterns = [
43
+ /(?:set|changed|updated|configured) (\S+ to .{5,150})/gi,
44
+ /(?:model|config|setting)[:\s]+(\S+\s*(?:→|->|=|is)\s*.{5,100})/gi,
45
+ ];
46
+ // Preference patterns — medium confidence (0.60)
47
+ const preferencePatterns = [
48
+ /(?:prefer|always use|never use|don't use|avoid) (.{10,150})/gi,
49
+ /(?:ragesaq|operator) (?:wants|prefers|likes|hates|dislikes) (.{10,150})/gi,
50
+ ];
51
+ // Operational patterns: deployments, incidents, fixes — high confidence (0.70)
52
+ const operationalPatterns = [
53
+ /(?:deployed|shipped|released|rolled back|reverted) (.{10,200})/gi,
54
+ /(?:outage|incident|failure|broke|broken|crashed)(?:: | — | - )(.{10,200})/gi,
55
+ /(?:fixed|resolved|patched|hotfixed) (.{10,200})/gi,
56
+ ];
57
+ const patternGroups = [
58
+ { patterns: decisionPatterns, confidence: 0.75 },
59
+ { patterns: learnedPatterns, confidence: 0.65 },
60
+ { patterns: configPatterns, confidence: 0.60 },
61
+ { patterns: preferencePatterns, confidence: 0.60 },
62
+ { patterns: operationalPatterns, confidence: 0.70 },
63
+ ];
64
+ for (const { patterns, confidence } of patternGroups) {
65
+ for (const pattern of patterns) {
66
+ let match;
67
+ // Reset lastIndex for global patterns
68
+ pattern.lastIndex = 0;
69
+ while ((match = pattern.exec(content)) !== null) {
70
+ const candidate = match[1].trim();
71
+ // Quality gate: reject noise that matched patterns but isn't a real fact
72
+ if (!isQualityFact(candidate))
73
+ continue;
74
+ facts.push({ content: candidate, confidence });
75
+ }
76
+ }
77
+ }
78
+ return facts;
79
+ }
80
+ /**
81
+ * TUNE-011: Quality gate for fact extraction.
82
+ * Rejects pattern matches that are code, table fragments, questions,
83
+ * or too short to be meaningful facts.
84
+ */
85
+ function isQualityFact(content) {
86
+ // Too short — sentence fragments
87
+ if (content.length < 40)
88
+ return false;
89
+ // Too long — likely captured a paragraph, not a fact
90
+ if (content.length > 300)
91
+ return false;
92
+ // Fewer than 5 words — fragment
93
+ const wordCount = content.split(/\s+/).filter(w => w.length > 0).length;
94
+ if (wordCount < 5)
95
+ return false;
96
+ // Questions — not assertions of fact
97
+ if (content.trimEnd().endsWith('?'))
98
+ return false;
99
+ // Code indicators: braces, arrows, imports, variable declarations
100
+ if (/^[\s{}\[\]|`]/.test(content))
101
+ return false; // starts with structural char
102
+ if (/[{}].*[{}]/.test(content))
103
+ return false; // contains paired braces (code blocks)
104
+ if (/^\s*(import|export|const|let|var|function|class|interface|type|return|if|for|while|switch)\s/i.test(content))
105
+ return false;
106
+ if (/=>\s*[{(]/.test(content))
107
+ return false; // arrow functions
108
+ if (/SELECT\s|INSERT\s|UPDATE\s|DELETE\s|CREATE\s/i.test(content))
109
+ return false; // SQL
110
+ // Table cell fragments: contains pipe-delimited cells
111
+ if (/\|.*\|.*\|/.test(content))
112
+ return false;
113
+ // Regex patterns leaked from source
114
+ if (/\/[^/]+\/[gimsuvy]*[,;]/.test(content))
115
+ return false;
116
+ // Raw file paths without context (tool output, not facts)
117
+ if (/^\/[\w/.-]+$/.test(content.trim()))
118
+ return false;
119
+ // Markdown formatting artifacts
120
+ if (content.startsWith('```') || content.startsWith('---') || content.startsWith('==='))
121
+ return false;
122
+ // Git output
123
+ if (/^[a-f0-9]{7,40}\s/.test(content) || /^\+\+\+|^---\s[ab]\//.test(content))
124
+ return false;
125
+ if (/^\d+ files? changed/.test(content))
126
+ return false;
127
+ // Stack traces
128
+ if (/^\s*at\s+\S+\s+\(/.test(content) || /node:internal/.test(content))
129
+ return false;
130
+ // High non-alpha ratio indicates code/data, not natural language
131
+ const alphaChars = (content.match(/[a-zA-Z]/g) || []).length;
132
+ if (alphaChars / content.length < 0.5)
133
+ return false;
134
+ return true;
135
+ }
136
+ /**
137
+ * Classify a message for episode significance.
138
+ * Returns episode type and significance score, or null if not significant.
139
+ */
140
+ function classifyEpisode(msg) {
141
+ const content = msg.textContent || '';
142
+ if (!content || content.length < 50)
143
+ return null; // Raised from 30
144
+ // Skip heartbeats
145
+ if (msg.isHeartbeat)
146
+ return null;
147
+ // Skip messages that are primarily code/data output (tool results, logs)
148
+ const alphaRatio = (content.match(/[a-zA-Z]/g) || []).length / content.length;
149
+ if (alphaRatio < 0.4)
150
+ return null;
151
+ // Skip messages that start with structural output indicators
152
+ if (/^[\s]*[{[\d|#=+\-]/.test(content) && content.length < 200)
153
+ return null;
154
+ const lower = content.toLowerCase();
155
+ // ── Negation-aware incident detection ──────────────────────
156
+ // Only trigger on actual incidents, not "zero failures" or "no crashes"
157
+ const incidentTerms = ['outage', 'incident', 'failure', 'crash', 'broke', 'broken', 'emergency'];
158
+ const negationPrefixes = ['no ', 'zero ', 'without ', '0 ', 'never ', 'fixed ', 'resolved '];
159
+ const hasIncidentTerm = incidentTerms.some(term => lower.includes(term));
160
+ const isNegated = hasIncidentTerm && incidentTerms.some(term => {
161
+ const idx = lower.indexOf(term);
162
+ if (idx < 0)
163
+ return false;
164
+ const prefix = lower.substring(Math.max(0, idx - 15), idx).toLowerCase();
165
+ return negationPrefixes.some(neg => prefix.includes(neg.trimEnd()));
166
+ });
167
+ if (hasIncidentTerm && !isNegated && content.length > 100) {
168
+ // Genuine incident — verify it's describing a problem, not analyzing code
169
+ if (!/^\s*(\/\/|#|\*|\/\*|```|import|const|function)/.test(content)) {
170
+ const summary = content.slice(0, 200);
171
+ return { type: 'incident', significance: 0.9, summary };
172
+ }
173
+ }
174
+ // Deployment events (high significance)
175
+ if (/(?:deployed|shipped|released|went live|now live|go live)/i.test(content) &&
176
+ content.length > 60) {
177
+ const summary = content.slice(0, 200);
178
+ return { type: 'deployment', significance: 0.8, summary };
179
+ }
180
+ // Architecture decisions (high significance)
181
+ if (/(?:decided on|chose|committed to|architecture|design decision)/i.test(content) &&
182
+ content.length > 80) {
183
+ const summary = content.slice(0, 200);
184
+ return { type: 'decision', significance: 0.7, summary };
185
+ }
186
+ // Discovery/insight (medium significance)
187
+ if (/(?:discovered|found|realized|root cause|turns out)/i.test(content) && content.length > 80) {
188
+ const summary = content.slice(0, 200);
189
+ return { type: 'discovery', significance: 0.5, summary };
190
+ }
191
+ // Config changes (medium significance) — TUNE-004: raised to 0.5
192
+ if (/(?:changed|updated|migrated|switched|model.*(?:→|->|to))/i.test(content) && content.length > 60) {
193
+ // Skip if it's just a tool output confirmation
194
+ if (/^Successfully replaced|^\[main [a-f0-9]|^ok \d+ -/.test(content))
195
+ return null;
196
+ const summary = content.slice(0, 200);
197
+ return { type: 'config_change', significance: 0.5, summary };
198
+ }
199
+ // Milestone/completion (medium significance)
200
+ if (/(?:completed|finished|done|milestone|all tests pass|all green)/i.test(content) &&
201
+ content.length > 60) {
202
+ // Skip tool output that happens to contain "done"
203
+ if (/^Successfully|^\[main|^ok \d+/.test(content))
204
+ return null;
205
+ const summary = content.slice(0, 200);
206
+ return { type: 'milestone', significance: 0.5, summary };
207
+ }
208
+ return null;
209
+ }
210
+ /**
211
+ * Extract knowledge candidates — structured (domain, key, value) tuples.
212
+ */
213
+ function extractKnowledgeCandidates(content, agentId) {
214
+ const results = [];
215
+ if (!content || content.length < 30)
216
+ return results;
217
+ // TUNE-012: Broadened path extraction.
218
+ // Real messages use paths inline without explicit prefixes like "located at".
219
+ // Match any absolute path that's at least 3 segments deep (filters /tmp, /etc noise).
220
+ const pathMatches = content.matchAll(/(?:`([/][\w./-]{10,})`|(?:^|[\s:=])(\/home\/[\w./-]{10,}|\/opt\/[\w./-]{10,}|\/var\/[\w./-]{10,}))/gm);
221
+ for (const match of pathMatches) {
222
+ const value = (match[1] || match[2]).replace(/[`'".,;:)]+$/, '').trim();
223
+ if (value.length > 10 && value.split('/').length >= 4) {
224
+ const segments = value.split('/').filter(s => s.length > 0);
225
+ const lastSeg = segments[segments.length - 1] || '';
226
+ // Reject truncated paths (last segment < 3 chars unless it's a known ext)
227
+ if (lastSeg.length < 3 && !lastSeg.includes('.'))
228
+ continue;
229
+ const key = lastSeg || segments[segments.length - 2] || 'unknown';
230
+ results.push({ domain: 'paths', key, value });
231
+ }
232
+ }
233
+ // Explicit location references (original patterns, kept for completeness)
234
+ const locationPatterns = [
235
+ /(?:path|located at|lives at|stored at|found at|repo at|running at)[:\s]+(`[^`]+`|\/\S+)/gi,
236
+ /(?:workspace|directory|repo|project)[:\s]+(`[^`]+`|\/\S+)/gi,
237
+ ];
238
+ for (const pattern of locationPatterns) {
239
+ pattern.lastIndex = 0;
240
+ let match;
241
+ while ((match = pattern.exec(content)) !== null) {
242
+ const value = match[1].replace(/[`'".,;:)]+/g, '').trim();
243
+ if (value.startsWith('/') && value.length > 10 && !results.some(r => r.value === value)) {
244
+ const key = value.split('/').pop() || 'unknown';
245
+ results.push({ domain: 'paths', key, value });
246
+ }
247
+ }
248
+ }
249
+ // Service/port patterns — broadened to catch "port NNNN" and "on :NNNN"
250
+ const servicePatterns = [
251
+ /(\S+)\s+(?:runs on|listening on|port|on port)\s+(\d{2,5})/gi,
252
+ /(?:service|server|daemon)\s+(\S+)\s+(?:on |at |: )(\S+)/gi,
253
+ /(?:localhost|127\.0\.0\.1):(\d{2,5})\b/gi,
254
+ ];
255
+ for (const pattern of servicePatterns) {
256
+ pattern.lastIndex = 0;
257
+ let match;
258
+ while ((match = pattern.exec(content)) !== null) {
259
+ if (pattern.source.includes('localhost')) {
260
+ // localhost:PORT pattern — key is the port, value is the URL
261
+ results.push({ domain: 'services', key: `port:${match[1]}`, value: match[0] });
262
+ }
263
+ else {
264
+ results.push({ domain: 'services', key: match[1], value: match[2] });
265
+ }
266
+ }
267
+ }
268
+ // Agent identity patterns — broadened
269
+ const identityPatterns = [
270
+ /(\w+)\s+(?:is|was)\s+(?:the\s+)?(\w+)\s+(?:seat|director|specialist|council)/gi,
271
+ /(\w+)\s+(?:reports to|owned by|managed by)\s+(\w+)/gi,
272
+ /(?:agents?|directors?|seats?)[:\s]+(\w+)(?:\s*[,/]\s*(\w+))+/gi,
273
+ ];
274
+ for (const pattern of identityPatterns) {
275
+ pattern.lastIndex = 0;
276
+ let match;
277
+ while ((match = pattern.exec(content)) !== null) {
278
+ if (match[2]) {
279
+ results.push({ domain: 'fleet', key: match[1].toLowerCase(), value: `${match[1]} ${match[2]}` });
280
+ }
281
+ }
282
+ }
283
+ // Dedup by domain+key
284
+ const seen = new Set();
285
+ return results.filter(r => {
286
+ const k = `${r.domain}:${r.key}`;
287
+ if (seen.has(k))
288
+ return false;
289
+ seen.add(k);
290
+ return true;
291
+ });
292
+ }
293
+ /**
294
+ * Detect conversation topic from message content.
295
+ * Returns a topic name candidate or null.
296
+ */
297
+ function detectTopic(content) {
298
+ if (!content || content.length < 50)
299
+ return null;
300
+ // Product/project name detection
301
+ const productMatch = content.match(/\b(HyperMem|ClawText|ClawDash|ClawCanvas|ClawCouncil|ClawTomation|OpenClaw|ClawDispatch)\b/i);
302
+ if (productMatch)
303
+ return productMatch[1];
304
+ // Infrastructure topic detection
305
+ if (/\b(?:redis|sqlite|database|migration|deployment|docker|nginx)\b/i.test(content)) {
306
+ return 'infrastructure';
307
+ }
308
+ // Security topic detection
309
+ if (/\b(?:security|auth|permission|access|token|credential)\b/i.test(content)) {
310
+ return 'security';
311
+ }
312
+ return null;
313
+ }
314
+ // ─── Background Indexer ─────────────────────────────────────────
315
+ export class BackgroundIndexer {
316
+ getMessageDb;
317
+ getLibraryDb;
318
+ listAgents;
319
+ getCursor;
320
+ config;
321
+ intervalHandle = null;
322
+ running = false;
323
+ vectorStore = null;
324
+ constructor(config, getMessageDb, getLibraryDb, listAgents, getCursor) {
325
+ this.getMessageDb = getMessageDb;
326
+ this.getLibraryDb = getLibraryDb;
327
+ this.listAgents = listAgents;
328
+ this.getCursor = getCursor;
329
+ this.config = {
330
+ enabled: config?.enabled ?? true,
331
+ factExtractionMode: config?.factExtractionMode ?? 'tiered',
332
+ topicDormantAfter: config?.topicDormantAfter ?? '24h',
333
+ topicClosedAfter: config?.topicClosedAfter ?? '7d',
334
+ factDecayRate: config?.factDecayRate ?? 0.01,
335
+ episodeSignificanceThreshold: config?.episodeSignificanceThreshold ?? 0.5,
336
+ periodicInterval: config?.periodicInterval ?? 300000, // 5 minutes
337
+ };
338
+ }
339
+ /**
340
+ * Set the vector store for embedding new facts/episodes at index time.
341
+ * Optional — if not set, indexer runs without embedding (FTS5-only mode).
342
+ */
343
+ setVectorStore(vs) {
344
+ this.vectorStore = vs;
345
+ }
346
+ /**
347
+ * Start periodic indexing.
348
+ */
349
+ start() {
350
+ if (!this.config.enabled)
351
+ return;
352
+ if (this.intervalHandle)
353
+ return;
354
+ // Run once immediately
355
+ this.tick().catch(err => {
356
+ console.error('[indexer] Initial tick failed:', err);
357
+ });
358
+ // Then periodically
359
+ this.intervalHandle = setInterval(() => {
360
+ this.tick().catch(err => {
361
+ console.error('[indexer] Periodic tick failed:', err);
362
+ });
363
+ }, this.config.periodicInterval);
364
+ console.log(`[indexer] Started with interval ${this.config.periodicInterval}ms`);
365
+ }
366
+ /**
367
+ * Stop periodic indexing.
368
+ */
369
+ stop() {
370
+ if (this.intervalHandle) {
371
+ clearInterval(this.intervalHandle);
372
+ this.intervalHandle = null;
373
+ }
374
+ }
375
+ /**
376
+ * Run one indexing pass across all agents.
377
+ */
378
+ async tick() {
379
+ if (this.running) {
380
+ console.log('[indexer] Skipping tick — previous run still active');
381
+ return [];
382
+ }
383
+ this.running = true;
384
+ const results = [];
385
+ try {
386
+ if (!this.listAgents || !this.getMessageDb || !this.getLibraryDb) {
387
+ console.warn('[indexer] Missing database accessors — skipping');
388
+ return [];
389
+ }
390
+ const agents = this.listAgents();
391
+ const libraryDb = this.getLibraryDb();
392
+ for (const agentId of agents) {
393
+ try {
394
+ const stats = await this.processAgent(agentId, libraryDb);
395
+ if (stats.messagesProcessed > 0) {
396
+ results.push(stats);
397
+ }
398
+ }
399
+ catch (err) {
400
+ const msg = err instanceof Error ? err.message : String(err);
401
+ console.error(`[indexer] Failed to process ${agentId}: ${msg}`);
402
+ }
403
+ }
404
+ if (results.length > 0) {
405
+ const totalMessages = results.reduce((s, r) => s + r.messagesProcessed, 0);
406
+ const totalFacts = results.reduce((s, r) => s + r.factsExtracted, 0);
407
+ const totalEpisodes = results.reduce((s, r) => s + r.episodesRecorded, 0);
408
+ console.log(`[indexer] Tick complete: ${totalMessages} messages → ${totalFacts} facts, ${totalEpisodes} episodes`);
409
+ }
410
+ // Run decay on every tick
411
+ this.applyDecay(libraryDb);
412
+ }
413
+ finally {
414
+ this.running = false;
415
+ }
416
+ return results;
417
+ }
418
+ /**
419
+ * Process a single agent's unindexed messages.
420
+ *
421
+ * When a cursor fetcher is available, messages are split into two tiers:
422
+ * - Post-cursor (id > cursor.lastSentId): "unseen" by the model, high-signal priority
423
+ * - Pre-cursor (id <= cursor.lastSentId): already in the model's context window, lower priority
424
+ * Post-cursor messages are processed first. This ensures the indexer prioritizes
425
+ * content the model hasn't seen yet — decisions, incidents, and discoveries that
426
+ * happened between context windows.
427
+ */
428
+ async processAgent(agentId, libraryDb) {
429
+ const start = Date.now();
430
+ const messageDb = this.getMessageDb(agentId);
431
+ const messageStore = new MessageStore(messageDb);
432
+ const factStore = new FactStore(libraryDb);
433
+ const episodeStore = new EpisodeStore(libraryDb);
434
+ const topicStore = new TopicStore(libraryDb);
435
+ const knowledgeStore = new KnowledgeStore(libraryDb);
436
+ // Get watermark — last processed message ID for this agent
437
+ const watermark = this.getWatermark(libraryDb, agentId);
438
+ const lastProcessedId = watermark?.lastMessageId ?? 0;
439
+ // Fetch unindexed messages (batch size: 100)
440
+ const messages = this.getUnindexedMessages(messageDb, agentId, lastProcessedId, 100);
441
+ if (messages.length === 0) {
442
+ return {
443
+ agentId,
444
+ messagesProcessed: 0,
445
+ factsExtracted: 0,
446
+ episodesRecorded: 0,
447
+ topicsUpdated: 0,
448
+ knowledgeUpserted: 0,
449
+ postCursorMessages: 0,
450
+ elapsedMs: Date.now() - start,
451
+ };
452
+ }
453
+ // ── Cursor-aware prioritization ──────────────────────────────
454
+ // Fetch the cursor boundary to split messages into post-cursor (unseen)
455
+ // and pre-cursor (already in context). Post-cursor messages are processed
456
+ // first — they're the highest signal for fact/episode extraction.
457
+ let cursorBoundary = 0;
458
+ if (this.getCursor) {
459
+ try {
460
+ // Get session key from the first message's conversation
461
+ const sessionKey = this.getSessionKeyForMessage(messageDb, messages[0].conversationId);
462
+ if (sessionKey) {
463
+ const cursor = await this.getCursor(agentId, sessionKey);
464
+ if (cursor) {
465
+ cursorBoundary = cursor.lastSentId;
466
+ }
467
+ }
468
+ }
469
+ catch {
470
+ // Cursor fetch is best-effort — fall through to default ordering
471
+ }
472
+ }
473
+ // Sort: post-cursor messages first (highest signal), then pre-cursor.
474
+ // Within each tier, maintain original (ascending) order.
475
+ const postCursor = messages.filter(m => m.id > cursorBoundary);
476
+ const preCursor = messages.filter(m => m.id <= cursorBoundary);
477
+ const ordered = [...postCursor, ...preCursor];
478
+ let factsExtracted = 0;
479
+ let episodesRecorded = 0;
480
+ let topicsUpdated = 0;
481
+ let knowledgeUpserted = 0;
482
+ let maxMessageId = lastProcessedId;
483
+ for (const msg of ordered) {
484
+ const content = msg.textContent || '';
485
+ if (msg.id > maxMessageId)
486
+ maxMessageId = msg.id;
487
+ // Skip heartbeats and very short messages
488
+ if (msg.isHeartbeat || content.length < 30)
489
+ continue;
490
+ // 1. Extract facts (TUNE-003: confidence varies by extraction pattern type)
491
+ const factCandidates = extractFactCandidates(content);
492
+ for (const { content: factContent, confidence: factConfidence } of factCandidates) {
493
+ try {
494
+ const fact = factStore.addFact(agentId, factContent, {
495
+ scope: 'agent',
496
+ confidence: factConfidence,
497
+ sourceType: 'indexer',
498
+ sourceSessionKey: this.getSessionKeyForMessage(messageDb, msg.conversationId),
499
+ sourceRef: `msg:${msg.id}`,
500
+ });
501
+ factsExtracted++;
502
+ // Embed new fact for semantic recall (best-effort, non-blocking)
503
+ if (this.vectorStore && fact.id) {
504
+ this.vectorStore.indexItem('facts', fact.id, factContent, fact.domain || undefined)
505
+ .catch(() => { });
506
+ }
507
+ }
508
+ catch {
509
+ // Duplicate or constraint violation — skip
510
+ }
511
+ }
512
+ // 2. Classify episodes
513
+ const episode = classifyEpisode(msg);
514
+ if (episode && episode.significance >= this.config.episodeSignificanceThreshold) {
515
+ // Secret gate: shared visibility requires clean content.
516
+ // Downgrade to 'private' rather than drop, so we don't lose the episode.
517
+ const episodeVisibility = isSafeForSharedVisibility(episode.summary) ? 'org' : 'private';
518
+ try {
519
+ const recorded = episodeStore.record(agentId, episode.type, episode.summary, {
520
+ significance: episode.significance,
521
+ visibility: episodeVisibility,
522
+ sessionKey: this.getSessionKeyForMessage(messageDb, msg.conversationId),
523
+ });
524
+ episodesRecorded++;
525
+ // Embed high-significance episodes (decisions, incidents, deployments)
526
+ if (this.vectorStore && recorded?.id && episode.significance >= 0.7) {
527
+ this.vectorStore.indexItem('episodes', recorded.id, episode.summary, episode.type)
528
+ .catch(() => { });
529
+ }
530
+ }
531
+ catch {
532
+ // Skip duplicate episodes
533
+ }
534
+ }
535
+ // 3. Detect and update topics
536
+ const topicName = detectTopic(content);
537
+ if (topicName) {
538
+ try {
539
+ const existingTopics = topicStore.getActive(agentId, 100);
540
+ const existingTopic = existingTopics.find((t) => t.name.toLowerCase() === topicName.toLowerCase());
541
+ if (!existingTopic) {
542
+ topicStore.create(agentId, topicName, `Auto-detected from conversation`);
543
+ topicsUpdated++;
544
+ }
545
+ }
546
+ catch {
547
+ // Skip topic creation errors
548
+ }
549
+ }
550
+ // 4. Extract knowledge candidates
551
+ const knowledgeCandidates = extractKnowledgeCandidates(content, agentId);
552
+ for (const { domain, key, value } of knowledgeCandidates) {
553
+ try {
554
+ knowledgeStore.upsert(agentId, domain, key, value, {
555
+ sourceType: 'indexer',
556
+ sourceRef: `msg:${msg.id}`,
557
+ });
558
+ knowledgeUpserted++;
559
+ }
560
+ catch {
561
+ // Skip duplicates
562
+ }
563
+ }
564
+ }
565
+ // Update watermark
566
+ this.setWatermark(libraryDb, agentId, maxMessageId);
567
+ return {
568
+ agentId,
569
+ messagesProcessed: messages.length,
570
+ factsExtracted,
571
+ episodesRecorded,
572
+ topicsUpdated,
573
+ knowledgeUpserted,
574
+ postCursorMessages: postCursor.length,
575
+ elapsedMs: Date.now() - start,
576
+ };
577
+ }
578
+ /**
579
+ * Fetch unindexed messages for an agent.
580
+ */
581
+ getUnindexedMessages(db, agentId, afterId, limit) {
582
+ const rows = db.prepare(`
583
+ SELECT m.*, c.session_key
584
+ FROM messages m
585
+ JOIN conversations c ON m.conversation_id = c.id
586
+ WHERE m.agent_id = ? AND m.id > ?
587
+ ORDER BY m.id ASC
588
+ LIMIT ?
589
+ `).all(agentId, afterId, limit);
590
+ return rows.map(row => ({
591
+ id: row.id,
592
+ conversationId: row.conversation_id,
593
+ agentId: row.agent_id,
594
+ role: row.role,
595
+ textContent: row.text_content || null,
596
+ toolCalls: row.tool_calls ? JSON.parse(row.tool_calls) : null,
597
+ toolResults: row.tool_results ? JSON.parse(row.tool_results) : null,
598
+ metadata: row.metadata ? JSON.parse(row.metadata) : undefined,
599
+ messageIndex: row.message_index,
600
+ tokenCount: row.token_count || null,
601
+ isHeartbeat: row.is_heartbeat === 1,
602
+ createdAt: row.created_at,
603
+ }));
604
+ }
605
+ /**
606
+ * Get the session key for a conversation ID.
607
+ */
608
+ getSessionKeyForMessage(db, conversationId) {
609
+ const row = db.prepare('SELECT session_key FROM conversations WHERE id = ?').get(conversationId);
610
+ return row?.session_key;
611
+ }
612
+ /**
613
+ * Get the indexing watermark for an agent.
614
+ */
615
+ getWatermark(libraryDb, agentId) {
616
+ // Ensure watermarks table exists
617
+ libraryDb.prepare(`
618
+ CREATE TABLE IF NOT EXISTS indexer_watermarks (
619
+ agent_id TEXT PRIMARY KEY,
620
+ last_message_id INTEGER NOT NULL DEFAULT 0,
621
+ last_run_at TEXT NOT NULL
622
+ )
623
+ `).run();
624
+ const row = libraryDb.prepare('SELECT agent_id, last_message_id, last_run_at FROM indexer_watermarks WHERE agent_id = ?').get(agentId);
625
+ if (!row)
626
+ return null;
627
+ return {
628
+ agentId: row.agent_id,
629
+ lastMessageId: row.last_message_id,
630
+ lastRunAt: row.last_run_at,
631
+ };
632
+ }
633
+ /**
634
+ * Set the indexing watermark for an agent.
635
+ */
636
+ setWatermark(libraryDb, agentId, lastMessageId) {
637
+ const now = new Date().toISOString();
638
+ libraryDb.prepare(`
639
+ INSERT INTO indexer_watermarks (agent_id, last_message_id, last_run_at)
640
+ VALUES (?, ?, ?)
641
+ ON CONFLICT(agent_id) DO UPDATE SET
642
+ last_message_id = excluded.last_message_id,
643
+ last_run_at = excluded.last_run_at
644
+ `).run(agentId, lastMessageId, now);
645
+ }
646
+ /**
647
+ * Apply time-based decay to facts.
648
+ * Increases decay_score for older facts, making them less relevant.
649
+ */
650
+ applyDecay(libraryDb) {
651
+ const rate = this.config.factDecayRate;
652
+ // Decay facts that haven't been referenced recently
653
+ libraryDb.prepare(`
654
+ UPDATE facts
655
+ SET decay_score = MIN(1.0, decay_score + ?)
656
+ WHERE superseded_by IS NULL
657
+ AND decay_score < 1.0
658
+ AND updated_at < datetime('now', '-1 day')
659
+ `).run(rate);
660
+ // Decay episodes older than 7 days
661
+ libraryDb.prepare(`
662
+ UPDATE episodes
663
+ SET decay_score = MIN(1.0, decay_score + ?)
664
+ WHERE decay_score < 1.0
665
+ AND created_at < datetime('now', '-7 days')
666
+ `).run(rate * 0.5);
667
+ // Mark dormant topics
668
+ const dormantThreshold = this.parseDuration(this.config.topicDormantAfter);
669
+ if (dormantThreshold > 0) {
670
+ libraryDb.prepare(`
671
+ UPDATE topics
672
+ SET status = 'dormant'
673
+ WHERE status = 'active'
674
+ AND updated_at < datetime('now', '-${dormantThreshold} seconds')
675
+ `).run();
676
+ }
677
+ // Close old dormant topics
678
+ const closedThreshold = this.parseDuration(this.config.topicClosedAfter);
679
+ if (closedThreshold > 0) {
680
+ libraryDb.prepare(`
681
+ UPDATE topics
682
+ SET status = 'closed'
683
+ WHERE status = 'dormant'
684
+ AND updated_at < datetime('now', '-${closedThreshold} seconds')
685
+ `).run();
686
+ }
687
+ }
688
+ /**
689
+ * Parse a duration string like "24h", "7d" into seconds.
690
+ */
691
+ parseDuration(dur) {
692
+ const match = dur.match(/^(\d+)\s*(h|d|m|s)$/);
693
+ if (!match)
694
+ return 0;
695
+ const val = parseInt(match[1]);
696
+ switch (match[2]) {
697
+ case 's': return val;
698
+ case 'm': return val * 60;
699
+ case 'h': return val * 3600;
700
+ case 'd': return val * 86400;
701
+ default: return 0;
702
+ }
703
+ }
704
+ /**
705
+ * Get current watermarks for all agents.
706
+ */
707
+ getWatermarks(libraryDb) {
708
+ try {
709
+ const rows = libraryDb.prepare('SELECT agent_id, last_message_id, last_run_at FROM indexer_watermarks ORDER BY agent_id').all();
710
+ return rows.map(r => ({
711
+ agentId: r.agent_id,
712
+ lastMessageId: r.last_message_id,
713
+ lastRunAt: r.last_run_at,
714
+ }));
715
+ }
716
+ catch {
717
+ return [];
718
+ }
719
+ }
720
+ }
721
+ // ─── Standalone runner ──────────────────────────────────────────
722
+ /**
723
+ * Create and start a background indexer connected to HyperMem databases.
724
+ * Used by the hook or a standalone daemon.
725
+ */
726
+ export function createIndexer(getMessageDb, getLibraryDb, listAgents, config, getCursor, vectorStore) {
727
+ const indexer = new BackgroundIndexer(config, getMessageDb, getLibraryDb, listAgents, getCursor);
728
+ if (vectorStore)
729
+ indexer.setVectorStore(vectorStore);
730
+ return indexer;
731
+ }
732
+ //# sourceMappingURL=background-indexer.js.map