@psiclawops/hypermem 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/dist/background-indexer.d.ts +132 -0
  2. package/dist/background-indexer.d.ts.map +1 -0
  3. package/dist/background-indexer.js +1044 -0
  4. package/dist/cache.d.ts +110 -0
  5. package/dist/cache.d.ts.map +1 -0
  6. package/dist/cache.js +495 -0
  7. package/dist/compaction-fence.d.ts +89 -0
  8. package/dist/compaction-fence.d.ts.map +1 -0
  9. package/dist/compaction-fence.js +153 -0
  10. package/dist/compositor.d.ts +226 -0
  11. package/dist/compositor.d.ts.map +1 -0
  12. package/dist/compositor.js +2558 -0
  13. package/dist/content-type-classifier.d.ts +41 -0
  14. package/dist/content-type-classifier.d.ts.map +1 -0
  15. package/dist/content-type-classifier.js +181 -0
  16. package/dist/cross-agent.d.ts +62 -0
  17. package/dist/cross-agent.d.ts.map +1 -0
  18. package/dist/cross-agent.js +259 -0
  19. package/dist/db.d.ts +131 -0
  20. package/dist/db.d.ts.map +1 -0
  21. package/dist/db.js +402 -0
  22. package/dist/desired-state-store.d.ts +100 -0
  23. package/dist/desired-state-store.d.ts.map +1 -0
  24. package/dist/desired-state-store.js +222 -0
  25. package/dist/doc-chunk-store.d.ts +140 -0
  26. package/dist/doc-chunk-store.d.ts.map +1 -0
  27. package/dist/doc-chunk-store.js +391 -0
  28. package/dist/doc-chunker.d.ts +99 -0
  29. package/dist/doc-chunker.d.ts.map +1 -0
  30. package/dist/doc-chunker.js +324 -0
  31. package/dist/dreaming-promoter.d.ts +86 -0
  32. package/dist/dreaming-promoter.d.ts.map +1 -0
  33. package/dist/dreaming-promoter.js +381 -0
  34. package/dist/episode-store.d.ts +49 -0
  35. package/dist/episode-store.d.ts.map +1 -0
  36. package/dist/episode-store.js +135 -0
  37. package/dist/fact-store.d.ts +75 -0
  38. package/dist/fact-store.d.ts.map +1 -0
  39. package/dist/fact-store.js +236 -0
  40. package/dist/fleet-store.d.ts +144 -0
  41. package/dist/fleet-store.d.ts.map +1 -0
  42. package/dist/fleet-store.js +276 -0
  43. package/dist/fos-mod.d.ts +178 -0
  44. package/dist/fos-mod.d.ts.map +1 -0
  45. package/dist/fos-mod.js +416 -0
  46. package/dist/hybrid-retrieval.d.ts +64 -0
  47. package/dist/hybrid-retrieval.d.ts.map +1 -0
  48. package/dist/hybrid-retrieval.js +344 -0
  49. package/dist/image-eviction.d.ts +49 -0
  50. package/dist/image-eviction.d.ts.map +1 -0
  51. package/dist/image-eviction.js +251 -0
  52. package/dist/index.d.ts +650 -0
  53. package/dist/index.d.ts.map +1 -0
  54. package/dist/index.js +1072 -0
  55. package/dist/keystone-scorer.d.ts +51 -0
  56. package/dist/keystone-scorer.d.ts.map +1 -0
  57. package/dist/keystone-scorer.js +52 -0
  58. package/dist/knowledge-graph.d.ts +110 -0
  59. package/dist/knowledge-graph.d.ts.map +1 -0
  60. package/dist/knowledge-graph.js +305 -0
  61. package/dist/knowledge-lint.d.ts +29 -0
  62. package/dist/knowledge-lint.d.ts.map +1 -0
  63. package/dist/knowledge-lint.js +116 -0
  64. package/dist/knowledge-store.d.ts +72 -0
  65. package/dist/knowledge-store.d.ts.map +1 -0
  66. package/dist/knowledge-store.js +247 -0
  67. package/dist/library-schema.d.ts +22 -0
  68. package/dist/library-schema.d.ts.map +1 -0
  69. package/dist/library-schema.js +1038 -0
  70. package/dist/message-store.d.ts +89 -0
  71. package/dist/message-store.d.ts.map +1 -0
  72. package/dist/message-store.js +323 -0
  73. package/dist/metrics-dashboard.d.ts +114 -0
  74. package/dist/metrics-dashboard.d.ts.map +1 -0
  75. package/dist/metrics-dashboard.js +260 -0
  76. package/dist/obsidian-exporter.d.ts +57 -0
  77. package/dist/obsidian-exporter.d.ts.map +1 -0
  78. package/dist/obsidian-exporter.js +274 -0
  79. package/dist/obsidian-watcher.d.ts +147 -0
  80. package/dist/obsidian-watcher.d.ts.map +1 -0
  81. package/dist/obsidian-watcher.js +403 -0
  82. package/dist/open-domain.d.ts +46 -0
  83. package/dist/open-domain.d.ts.map +1 -0
  84. package/dist/open-domain.js +125 -0
  85. package/dist/preference-store.d.ts +54 -0
  86. package/dist/preference-store.d.ts.map +1 -0
  87. package/dist/preference-store.js +109 -0
  88. package/dist/preservation-gate.d.ts +82 -0
  89. package/dist/preservation-gate.d.ts.map +1 -0
  90. package/dist/preservation-gate.js +150 -0
  91. package/dist/proactive-pass.d.ts +63 -0
  92. package/dist/proactive-pass.d.ts.map +1 -0
  93. package/dist/proactive-pass.js +239 -0
  94. package/dist/profiles.d.ts +44 -0
  95. package/dist/profiles.d.ts.map +1 -0
  96. package/dist/profiles.js +227 -0
  97. package/dist/provider-translator.d.ts +50 -0
  98. package/dist/provider-translator.d.ts.map +1 -0
  99. package/dist/provider-translator.js +403 -0
  100. package/dist/rate-limiter.d.ts +76 -0
  101. package/dist/rate-limiter.d.ts.map +1 -0
  102. package/dist/rate-limiter.js +179 -0
  103. package/dist/repair-tool-pairs.d.ts +38 -0
  104. package/dist/repair-tool-pairs.d.ts.map +1 -0
  105. package/dist/repair-tool-pairs.js +138 -0
  106. package/dist/retrieval-policy.d.ts +51 -0
  107. package/dist/retrieval-policy.d.ts.map +1 -0
  108. package/dist/retrieval-policy.js +77 -0
  109. package/dist/schema.d.ts +15 -0
  110. package/dist/schema.d.ts.map +1 -0
  111. package/dist/schema.js +229 -0
  112. package/dist/secret-scanner.d.ts +51 -0
  113. package/dist/secret-scanner.d.ts.map +1 -0
  114. package/dist/secret-scanner.js +248 -0
  115. package/dist/seed.d.ts +108 -0
  116. package/dist/seed.d.ts.map +1 -0
  117. package/dist/seed.js +177 -0
  118. package/dist/session-flusher.d.ts +53 -0
  119. package/dist/session-flusher.d.ts.map +1 -0
  120. package/dist/session-flusher.js +69 -0
  121. package/dist/session-topic-map.d.ts +41 -0
  122. package/dist/session-topic-map.d.ts.map +1 -0
  123. package/dist/session-topic-map.js +77 -0
  124. package/dist/spawn-context.d.ts +54 -0
  125. package/dist/spawn-context.d.ts.map +1 -0
  126. package/dist/spawn-context.js +159 -0
  127. package/dist/system-store.d.ts +73 -0
  128. package/dist/system-store.d.ts.map +1 -0
  129. package/dist/system-store.js +182 -0
  130. package/dist/temporal-store.d.ts +80 -0
  131. package/dist/temporal-store.d.ts.map +1 -0
  132. package/dist/temporal-store.js +149 -0
  133. package/dist/topic-detector.d.ts +35 -0
  134. package/dist/topic-detector.d.ts.map +1 -0
  135. package/dist/topic-detector.js +249 -0
  136. package/dist/topic-store.d.ts +45 -0
  137. package/dist/topic-store.d.ts.map +1 -0
  138. package/dist/topic-store.js +136 -0
  139. package/dist/topic-synthesizer.d.ts +51 -0
  140. package/dist/topic-synthesizer.d.ts.map +1 -0
  141. package/dist/topic-synthesizer.js +315 -0
  142. package/dist/trigger-registry.d.ts +63 -0
  143. package/dist/trigger-registry.d.ts.map +1 -0
  144. package/dist/trigger-registry.js +163 -0
  145. package/dist/types.d.ts +533 -0
  146. package/dist/types.d.ts.map +1 -0
  147. package/dist/types.js +9 -0
  148. package/dist/vector-store.d.ts +170 -0
  149. package/dist/vector-store.d.ts.map +1 -0
  150. package/dist/vector-store.js +677 -0
  151. package/dist/version.d.ts +34 -0
  152. package/dist/version.d.ts.map +1 -0
  153. package/dist/version.js +34 -0
  154. package/dist/wiki-page-emitter.d.ts +65 -0
  155. package/dist/wiki-page-emitter.d.ts.map +1 -0
  156. package/dist/wiki-page-emitter.js +258 -0
  157. package/dist/work-store.d.ts +112 -0
  158. package/dist/work-store.d.ts.map +1 -0
  159. package/dist/work-store.js +273 -0
  160. package/package.json +1 -1
@@ -0,0 +1,1044 @@
1
+ /**
2
+ * hypermem Background Indexer
3
+ *
4
+ * Processes message history to extract structured knowledge:
5
+ * - Facts: atomic pieces of learned information
6
+ * - Episodes: significant events worth remembering
7
+ * - Topics: conversation threads and their lifecycle
8
+ * - Knowledge: durable structured entries (domain + key)
9
+ *
10
+ * Runs as a periodic background task, processing unindexed messages
11
+ * in batches. Each batch is scored, classified, and stored in L4 (library.db).
12
+ *
13
+ * Design principles:
14
+ * - No LLM dependency: extraction uses pattern matching + heuristics
15
+ * - Idempotent: tracks watermarks per agent to avoid reprocessing
16
+ * - Bounded: processes N messages per tick to avoid blocking
17
+ * - Observable: logs extraction stats for monitoring
18
+ */
19
+ import { lintKnowledge } from './knowledge-lint.js';
20
+ import { MessageStore } from './message-store.js';
21
+ import { runNoiseSweep, runToolDecay } from './proactive-pass.js';
22
+ import { TopicSynthesizer } from './topic-synthesizer.js';
23
+ import { runDreamingPassForFleet } from './dreaming-promoter.js';
24
+ import { FactStore } from './fact-store.js';
25
+ import { EpisodeStore } from './episode-store.js';
26
+ import { TopicStore } from './topic-store.js';
27
+ import { KnowledgeStore } from './knowledge-store.js';
28
+ import { TemporalStore } from './temporal-store.js';
29
+ import { isSafeForSharedVisibility } from './secret-scanner.js';
30
+ // ─── Agent-to-Domain Map ────────────────────────────────────────
31
+ // Maps well-known agent IDs to their primary domain.
32
+ // Used to populate the `domain` column on extracted facts so that
33
+ // domain-scoped retrieval (e.g. getActiveFacts({ domain: 'infrastructure' }))
34
+ // returns results. New agents default to 'general'.
35
+ const AGENT_DOMAIN_MAP = {
36
+ forge: 'infrastructure',
37
+ vigil: 'infrastructure',
38
+ pylon: 'infrastructure',
39
+ plane: 'infrastructure',
40
+ compass: 'product',
41
+ helm: 'product',
42
+ chisel: 'product',
43
+ facet: 'product',
44
+ sentinel: 'security',
45
+ bastion: 'security',
46
+ gauge: 'security',
47
+ clarity: 'ux',
48
+ anvil: 'governance',
49
+ vanguard: 'strategy',
50
+ crucible: 'development',
51
+ relay: 'communications',
52
+ main: 'general',
53
+ 'channel-mini': 'general',
54
+ };
55
+ /**
56
+ * Derive a domain label for a fact based on agent ID.
57
+ * Falls back to 'general' for unknown agents.
58
+ */
59
+ function domainForAgent(agentId) {
60
+ return AGENT_DOMAIN_MAP[agentId] ?? 'general';
61
+ }
62
+ function extractFactCandidates(content) {
63
+ const facts = [];
64
+ if (!content || content.length < 20)
65
+ return facts;
66
+ // Decision patterns: "decided to", "agreed on", "choosing", "going with" — high confidence (0.75)
67
+ const decisionPatterns = [
68
+ /(?:we |I |they )?(?:decided|agreed|chose|selected|committed) (?:to |on |that )(.{20,200})/gi,
69
+ /(?:going|went) with (.{10,150})/gi,
70
+ /decision:\s*(.{10,200})/gi,
71
+ ];
72
+ // Learned/discovered patterns — medium-high confidence (0.65)
73
+ const learnedPatterns = [
74
+ /(?:learned|discovered|found out|realized|noticed) (?:that |)(.{20,200})/gi,
75
+ /turns out (?:that |)(.{20,200})/gi,
76
+ /(?:TIL|FYI|note to self)[:\s]+(.{10,200})/gi,
77
+ ];
78
+ // Config/setting patterns — medium confidence (0.60); matches more promiscuously
79
+ const configPatterns = [
80
+ /(?:set|changed|updated|configured) (\S+ to .{5,150})/gi,
81
+ /(?:model|config|setting)[:\s]+(\S+\s*(?:→|->|=|is)\s*.{5,100})/gi,
82
+ ];
83
+ // Preference patterns — medium confidence (0.60)
84
+ const preferencePatterns = [
85
+ /(?:prefer|always use|never use|don't use|avoid) (.{10,150})/gi,
86
+ /(?:ragesaq|operator) (?:wants|prefers|likes|hates|dislikes) (.{10,150})/gi,
87
+ ];
88
+ // Operational patterns: deployments, incidents, fixes — high confidence (0.70)
89
+ const operationalPatterns = [
90
+ /(?:deployed|shipped|released|rolled back|reverted) (.{10,200})/gi,
91
+ /(?:outage|incident|failure|broke|broken|crashed)(?:: | — | - )(.{10,200})/gi,
92
+ /(?:fixed|resolved|patched|hotfixed) (.{10,200})/gi,
93
+ ];
94
+ const patternGroups = [
95
+ { patterns: decisionPatterns, confidence: 0.75 },
96
+ { patterns: learnedPatterns, confidence: 0.65 },
97
+ { patterns: configPatterns, confidence: 0.60 },
98
+ { patterns: preferencePatterns, confidence: 0.60 },
99
+ { patterns: operationalPatterns, confidence: 0.70 },
100
+ ];
101
+ for (const { patterns, confidence } of patternGroups) {
102
+ for (const pattern of patterns) {
103
+ let match;
104
+ // Reset lastIndex for global patterns
105
+ pattern.lastIndex = 0;
106
+ while ((match = pattern.exec(content)) !== null) {
107
+ const candidate = match[1].trim();
108
+ // Quality gate: reject noise that matched patterns but isn't a real fact
109
+ if (!isQualityFact(candidate))
110
+ continue;
111
+ facts.push({ content: candidate, confidence });
112
+ }
113
+ }
114
+ }
115
+ return facts;
116
+ }
117
+ /**
118
+ * TUNE-011: Quality gate for fact extraction.
119
+ * Rejects pattern matches that are code, table fragments, questions,
120
+ * or too short to be meaningful facts.
121
+ */
122
+ /**
123
+ * Operational boilerplate phrases that appear frequently across sessions
124
+ * but carry zero signal value. High knn similarity makes them *worse*
125
+ * retrieval candidates — they match everything and contaminate episodes.
126
+ */
127
+ const OPERATIONAL_BOILERPLATE = [
128
+ /timed?\s*out\s*waiting/i,
129
+ /message\s*was\s*delivered/i,
130
+ /no\s*reply\s*(back\s*)?yet/i,
131
+ /picked?\s*it\s*up\s*on\s*(next\s*)?heartbeat/i,
132
+ /session\s*not\s*found/i,
133
+ /\bretrying\b/i,
134
+ /tool\s*call\s*failed/i,
135
+ /exec\s*completed/i,
136
+ /no\s*reply\s*needed/i,
137
+ /still\s*waiting/i,
138
+ /will\s*pick\s*(it\s*)?up\s*(on\s*(next|the))?/i,
139
+ /message\s*is\s*in\s*(his|her|their|the)\s*queue/i,
140
+ /sent\s+to\s+(anvil|compass|clarity|sentinel|vanguard|forge)/i,
141
+ /dispatched\s+(it\s+)?to/i,
142
+ /timed\s*out\s*after/i,
143
+ /\bNO_REPLY\b/,
144
+ ];
145
+ function isQualityFact(content) {
146
+ // Too short — sentence fragments
147
+ if (content.length < 40)
148
+ return false;
149
+ // Too long — likely captured a paragraph, not a fact
150
+ if (content.length > 300)
151
+ return false;
152
+ // Fewer than 5 words — fragment
153
+ const wordCount = content.split(/\s+/).filter(w => w.length > 0).length;
154
+ if (wordCount < 5)
155
+ return false;
156
+ // Questions — not assertions of fact
157
+ if (content.trimEnd().endsWith('?'))
158
+ return false;
159
+ // Code indicators: braces, arrows, imports, variable declarations
160
+ if (/^[\s{}\[\]|`]/.test(content))
161
+ return false; // starts with structural char
162
+ if (/[{}].*[{}]/.test(content))
163
+ return false; // contains paired braces (code blocks)
164
+ if (/^\s*(import|export|const|let|var|function|class|interface|type|return|if|for|while|switch)\s/i.test(content))
165
+ return false;
166
+ if (/=>\s*[{(]/.test(content))
167
+ return false; // arrow functions
168
+ if (/SELECT\s|INSERT\s|UPDATE\s|DELETE\s|CREATE\s/i.test(content))
169
+ return false; // SQL
170
+ // Table cell fragments: contains pipe-delimited cells
171
+ if (/\|.*\|.*\|/.test(content))
172
+ return false;
173
+ // Regex patterns leaked from source
174
+ if (/\/[^/]+\/[gimsuvy]*[,;]/.test(content))
175
+ return false;
176
+ // Raw file paths without context (tool output, not facts)
177
+ if (/^\/[\w/.-]+$/.test(content.trim()))
178
+ return false;
179
+ // Markdown formatting artifacts
180
+ if (content.startsWith('```') || content.startsWith('---') || content.startsWith('==='))
181
+ return false;
182
+ // Git output
183
+ if (/^[a-f0-9]{7,40}\s/.test(content) || /^\+\+\+|^---\s[ab]\//.test(content))
184
+ return false;
185
+ if (/^\d+ files? changed/.test(content))
186
+ return false;
187
+ // Stack traces
188
+ if (/^\s*at\s+\S+\s+\(/.test(content) || /node:internal/.test(content))
189
+ return false;
190
+ // High non-alpha ratio indicates code/data, not natural language
191
+ const alphaChars = (content.match(/[a-zA-Z]/g) || []).length;
192
+ if (alphaChars / content.length < 0.5)
193
+ return false;
194
+ // TUNE-013: External/untrusted content markers — web search excerpts,
195
+ // external doc pulls, and injected context blocks should never become facts.
196
+ if (/<<<\s*(END_EXTERNAL|BEGIN_EXTERNAL|EXTERNAL_UNTRUSTED|UNTRUSTED_CONTENT)/i.test(content))
197
+ return false;
198
+ if (/EXTERNAL_UNTRUSTED_CONTENT\s+id=/.test(content))
199
+ return false;
200
+ // TUNE-013: Multi-paragraph content — real extracted facts are single sentences.
201
+ // More than 2 newlines means we captured a paragraph or structured block, not a fact.
202
+ const newlineCount = (content.match(/\n/g) || []).length;
203
+ if (newlineCount > 2)
204
+ return false;
205
+ // TUNE-013: URL-heavy content — external source snippets, not actionable facts
206
+ const urlMatches = content.match(/https?:\/\/\S+/g) || [];
207
+ if (urlMatches.length >= 2)
208
+ return false; // one URL in a fact is ok; multiple = source snippet
209
+ // TUNE-013: Content starting with a markdown heading is section text, not a fact
210
+ if (/^#{1,4}\s/.test(content.trim()))
211
+ return false;
212
+ // TUNE-014: Operational boilerplate — phrases common across sessions that produce
213
+ // high knn similarity scores but carry zero signal. They cross-contaminate episodes.
214
+ for (const pattern of OPERATIONAL_BOILERPLATE) {
215
+ if (pattern.test(content))
216
+ return false;
217
+ }
218
+ return true;
219
+ }
220
+ /**
221
+ * Classify a message for episode significance.
222
+ * Returns episode type and significance score, or null if not significant.
223
+ */
224
+ function classifyEpisode(msg) {
225
+ const content = msg.textContent || '';
226
+ if (!content || content.length < 50)
227
+ return null; // Raised from 30
228
+ // Skip heartbeats
229
+ if (msg.isHeartbeat)
230
+ return null;
231
+ // Skip messages that are primarily code/data output (tool results, logs)
232
+ const alphaRatio = (content.match(/[a-zA-Z]/g) || []).length / content.length;
233
+ if (alphaRatio < 0.4)
234
+ return null;
235
+ // Skip messages that start with structural output indicators
236
+ if (/^[\s]*[{[\d|#=+\-]/.test(content) && content.length < 200)
237
+ return null;
238
+ const lower = content.toLowerCase();
239
+ // ── Negation-aware incident detection ──────────────────────
240
+ // Only trigger on actual incidents, not "zero failures" or "no crashes"
241
+ const incidentTerms = ['outage', 'incident', 'failure', 'crash', 'broke', 'broken', 'emergency'];
242
+ const negationPrefixes = ['no ', 'zero ', 'without ', '0 ', 'never ', 'fixed ', 'resolved '];
243
+ const hasIncidentTerm = incidentTerms.some(term => lower.includes(term));
244
+ const isNegated = hasIncidentTerm && incidentTerms.some(term => {
245
+ const idx = lower.indexOf(term);
246
+ if (idx < 0)
247
+ return false;
248
+ const prefix = lower.substring(Math.max(0, idx - 15), idx).toLowerCase();
249
+ return negationPrefixes.some(neg => prefix.includes(neg.trimEnd()));
250
+ });
251
+ if (hasIncidentTerm && !isNegated && content.length > 100) {
252
+ // Genuine incident — verify it's describing a problem, not analyzing code
253
+ if (!/^\s*(\/\/|#|\*|\/\*|```|import|const|function)/.test(content)) {
254
+ const summary = content.slice(0, 200);
255
+ return { type: 'incident', significance: 0.9, summary };
256
+ }
257
+ }
258
+ // Deployment events (high significance)
259
+ if (/(?:deployed|shipped|released|went live|now live|go live)/i.test(content) &&
260
+ content.length > 60) {
261
+ const summary = content.slice(0, 200);
262
+ return { type: 'deployment', significance: 0.8, summary };
263
+ }
264
+ // Architecture decisions (high significance)
265
+ if (/(?:decided on|chose|committed to|architecture|design decision)/i.test(content) &&
266
+ content.length > 80) {
267
+ const summary = content.slice(0, 200);
268
+ return { type: 'decision', significance: 0.7, summary };
269
+ }
270
+ // Discovery/insight (medium significance)
271
+ if (/(?:discovered|found|realized|root cause|turns out)/i.test(content) && content.length > 80) {
272
+ const summary = content.slice(0, 200);
273
+ return { type: 'discovery', significance: 0.5, summary };
274
+ }
275
+ // Config changes (medium significance) — TUNE-004: raised to 0.5
276
+ if (/(?:changed|updated|migrated|switched|model.*(?:→|->|to))/i.test(content) && content.length > 60) {
277
+ // Skip if it's just a tool output confirmation
278
+ if (/^Successfully replaced|^\[main [a-f0-9]|^ok \d+ -/.test(content))
279
+ return null;
280
+ const summary = content.slice(0, 200);
281
+ return { type: 'config_change', significance: 0.5, summary };
282
+ }
283
+ // Milestone/completion (medium significance)
284
+ if (/(?:completed|finished|done|milestone|all tests pass|all green)/i.test(content) &&
285
+ content.length > 60) {
286
+ // Skip tool output that happens to contain "done"
287
+ if (/^Successfully|^\[main|^ok \d+/.test(content))
288
+ return null;
289
+ const summary = content.slice(0, 200);
290
+ return { type: 'milestone', significance: 0.5, summary };
291
+ }
292
+ return null;
293
+ }
294
+ /**
295
+ * Extract knowledge candidates — structured (domain, key, value) tuples.
296
+ */
297
+ function extractKnowledgeCandidates(content, agentId) {
298
+ const results = [];
299
+ if (!content || content.length < 30)
300
+ return results;
301
+ // TUNE-012: Broadened path extraction.
302
+ // Real messages use paths inline without explicit prefixes like "located at".
303
+ // Match any absolute path that's at least 3 segments deep (filters /tmp, /etc noise).
304
+ const pathMatches = content.matchAll(/(?:`([/][\w./-]{10,})`|(?:^|[\s:=])(\/home\/[\w./-]{10,}|\/opt\/[\w./-]{10,}|\/var\/[\w./-]{10,}))/gm);
305
+ for (const match of pathMatches) {
306
+ const value = (match[1] || match[2]).replace(/[`'".,;:)]+$/, '').trim();
307
+ if (value.length > 10 && value.split('/').length >= 4) {
308
+ const segments = value.split('/').filter(s => s.length > 0);
309
+ const lastSeg = segments[segments.length - 1] || '';
310
+ // Reject truncated paths (last segment < 3 chars unless it's a known ext)
311
+ if (lastSeg.length < 3 && !lastSeg.includes('.'))
312
+ continue;
313
+ const key = lastSeg || segments[segments.length - 2] || 'unknown';
314
+ results.push({ domain: 'paths', key, value });
315
+ }
316
+ }
317
+ // Explicit location references (original patterns, kept for completeness)
318
+ const locationPatterns = [
319
+ /(?:path|located at|lives at|stored at|found at|repo at|running at)[:\s]+(`[^`]+`|\/\S+)/gi,
320
+ /(?:workspace|directory|repo|project)[:\s]+(`[^`]+`|\/\S+)/gi,
321
+ ];
322
+ for (const pattern of locationPatterns) {
323
+ pattern.lastIndex = 0;
324
+ let match;
325
+ while ((match = pattern.exec(content)) !== null) {
326
+ const value = match[1].replace(/[`'".,;:)]+/g, '').trim();
327
+ if (value.startsWith('/') && value.length > 10 && !results.some(r => r.value === value)) {
328
+ const key = value.split('/').pop() || 'unknown';
329
+ results.push({ domain: 'paths', key, value });
330
+ }
331
+ }
332
+ }
333
+ // Service/port patterns — broadened to catch "port NNNN" and "on :NNNN"
334
+ const servicePatterns = [
335
+ /(\S+)\s+(?:runs on|listening on|port|on port)\s+(\d{2,5})/gi,
336
+ /(?:service|server|daemon)\s+(\S+)\s+(?:on |at |: )(\S+)/gi,
337
+ /(?:localhost|127\.0\.0\.1):(\d{2,5})\b/gi,
338
+ ];
339
+ for (const pattern of servicePatterns) {
340
+ pattern.lastIndex = 0;
341
+ let match;
342
+ while ((match = pattern.exec(content)) !== null) {
343
+ if (pattern.source.includes('localhost')) {
344
+ // localhost:PORT pattern — key is the port, value is the URL
345
+ results.push({ domain: 'services', key: `port:${match[1]}`, value: match[0] });
346
+ }
347
+ else {
348
+ results.push({ domain: 'services', key: match[1], value: match[2] });
349
+ }
350
+ }
351
+ }
352
+ // Agent identity patterns — broadened
353
+ const identityPatterns = [
354
+ /(\w+)\s+(?:is|was)\s+(?:the\s+)?(\w+)\s+(?:seat|director|specialist|council)/gi,
355
+ /(\w+)\s+(?:reports to|owned by|managed by)\s+(\w+)/gi,
356
+ /(?:agents?|directors?|seats?)[:\s]+(\w+)(?:\s*[,/]\s*(\w+))+/gi,
357
+ ];
358
+ for (const pattern of identityPatterns) {
359
+ pattern.lastIndex = 0;
360
+ let match;
361
+ while ((match = pattern.exec(content)) !== null) {
362
+ if (match[2]) {
363
+ results.push({ domain: 'fleet', key: match[1].toLowerCase(), value: `${match[1]} ${match[2]}` });
364
+ }
365
+ }
366
+ }
367
+ // Dedup by domain+key
368
+ const seen = new Set();
369
+ return results.filter(r => {
370
+ const k = `${r.domain}:${r.key}`;
371
+ if (seen.has(k))
372
+ return false;
373
+ seen.add(k);
374
+ return true;
375
+ });
376
+ }
377
+ /**
378
+ * Detect conversation topic from message content.
379
+ * Returns a topic name candidate or null.
380
+ */
381
+ function detectTopic(content) {
382
+ if (!content || content.length < 50)
383
+ return null;
384
+ // Product/project name detection
385
+ const productMatch = content.match(/\b(HyperMem|ClawText|ClawDash|ClawCanvas|ClawCouncil|ClawTomation|OpenClaw|ClawDispatch)\b/i);
386
+ if (productMatch)
387
+ return productMatch[1];
388
+ // Infrastructure topic detection
389
+ if (/\b(?:redis|sqlite|database|migration|deployment|docker|nginx)\b/i.test(content)) {
390
+ return 'infrastructure';
391
+ }
392
+ // Security topic detection
393
+ if (/\b(?:security|auth|permission|access|token|credential)\b/i.test(content)) {
394
+ return 'security';
395
+ }
396
+ return null;
397
+ }
398
+ // ─── Background Indexer ─────────────────────────────────────────
399
+ export class BackgroundIndexer {
400
+ getMessageDb;
401
+ getLibraryDb;
402
+ listAgents;
403
+ getCursor;
404
+ config;
405
+ dreamerConfig;
406
+ intervalHandle = null;
407
+ running = false;
408
+ vectorStore = null;
409
+ synthesizer = null;
410
+ tickCount = 0;
411
+ constructor(config, getMessageDb, getLibraryDb, listAgents, getCursor, dreamerConfig) {
412
+ this.getMessageDb = getMessageDb;
413
+ this.getLibraryDb = getLibraryDb;
414
+ this.listAgents = listAgents;
415
+ this.getCursor = getCursor;
416
+ // Initialize synthesizer if libraryDb accessor is available
417
+ if (getLibraryDb) {
418
+ const libDb = getLibraryDb();
419
+ if (libDb) {
420
+ this.synthesizer = new TopicSynthesizer(libDb, (agentId) => {
421
+ if (!getMessageDb)
422
+ return null;
423
+ try {
424
+ return getMessageDb(agentId);
425
+ }
426
+ catch {
427
+ return null;
428
+ }
429
+ });
430
+ }
431
+ }
432
+ this.config = {
433
+ enabled: config?.enabled ?? true,
434
+ factExtractionMode: config?.factExtractionMode ?? 'tiered',
435
+ topicDormantAfter: config?.topicDormantAfter ?? '24h',
436
+ topicClosedAfter: config?.topicClosedAfter ?? '7d',
437
+ factDecayRate: config?.factDecayRate ?? 0.01,
438
+ episodeSignificanceThreshold: config?.episodeSignificanceThreshold ?? 0.5,
439
+ periodicInterval: config?.periodicInterval ?? 60000, // 1 minute
440
+ batchSize: config?.batchSize ?? 128,
441
+ maxMessagesPerTick: config?.maxMessagesPerTick ?? 500,
442
+ };
443
+ this.dreamerConfig = dreamerConfig ?? {};
444
+ }
445
+ /**
446
+ * Set the vector store for embedding new facts/episodes at index time.
447
+ * Optional — if not set, indexer runs without embedding (FTS5-only mode).
448
+ */
449
+ setVectorStore(vs) {
450
+ this.vectorStore = vs;
451
+ }
452
+ /**
453
+ * Start periodic indexing.
454
+ */
455
+ start() {
456
+ if (!this.config.enabled)
457
+ return;
458
+ if (this.intervalHandle)
459
+ return;
460
+ // Run once immediately
461
+ this.tick().catch(err => {
462
+ console.error('[indexer] Initial tick failed:', err);
463
+ });
464
+ // Run episode vector backfill once at startup (no-op if already done)
465
+ if (this.vectorStore && this.getLibraryDb) {
466
+ this.backfillEpisodeVectors().catch(err => {
467
+ console.error('[indexer] Episode backfill failed:', err);
468
+ });
469
+ }
470
+ // Then periodically
471
+ this.intervalHandle = setInterval(() => {
472
+ this.tick().catch(err => {
473
+ console.error('[indexer] Periodic tick failed:', err);
474
+ });
475
+ }, this.config.periodicInterval);
476
+ console.log(`[indexer] Started with interval ${this.config.periodicInterval}ms, batchSize ${this.config.batchSize}, maxPerTick ${this.config.maxMessagesPerTick}`);
477
+ }
478
+ /**
479
+ * Stop periodic indexing.
480
+ */
481
+ stop() {
482
+ if (this.intervalHandle) {
483
+ clearInterval(this.intervalHandle);
484
+ this.intervalHandle = null;
485
+ }
486
+ }
487
+ /**
488
+ * Run one indexing pass across all agents.
489
+ */
490
+ async tick() {
491
+ if (this.running) {
492
+ console.log('[indexer] Skipping tick — previous run still active');
493
+ return [];
494
+ }
495
+ this.running = true;
496
+ const results = [];
497
+ try {
498
+ if (!this.listAgents || !this.getMessageDb || !this.getLibraryDb) {
499
+ console.warn('[indexer] Missing database accessors — skipping');
500
+ return [];
501
+ }
502
+ const agents = this.listAgents();
503
+ const libraryDb = this.getLibraryDb();
504
+ let tickTotal = 0;
505
+ for (const agentId of agents) {
506
+ if (tickTotal >= this.config.maxMessagesPerTick) {
507
+ console.log(`[indexer] maxMessagesPerTick (${this.config.maxMessagesPerTick}) reached — deferring remaining agents`);
508
+ break;
509
+ }
510
+ try {
511
+ const stats = await this.processAgent(agentId, libraryDb);
512
+ tickTotal += stats.messagesProcessed;
513
+ if (stats.messagesProcessed > 0 || stats.tombstoned > 0) {
514
+ results.push(stats);
515
+ }
516
+ }
517
+ catch (err) {
518
+ const msg = err instanceof Error ? err.message : String(err);
519
+ console.error(`[indexer] Failed to process ${agentId}: ${msg}`);
520
+ }
521
+ }
522
+ if (results.length > 0) {
523
+ const totalMessages = results.reduce((s, r) => s + r.messagesProcessed, 0);
524
+ const totalFacts = results.reduce((s, r) => s + r.factsExtracted, 0);
525
+ const totalEpisodes = results.reduce((s, r) => s + r.episodesRecorded, 0);
526
+ const totalTombstoned = results.reduce((s, r) => s + r.tombstoned, 0);
527
+ const tombstonedPart = totalTombstoned > 0 ? `, ${totalTombstoned} tombstoned` : '';
528
+ console.log(`[indexer] Tick complete: ${totalMessages} messages → ${totalFacts} facts, ${totalEpisodes} episodes${tombstonedPart}`);
529
+ }
530
+ // Run decay on every tick
531
+ this.applyDecay(libraryDb);
532
+ // Topic synthesis — run for each agent after main indexer tick
533
+ if (this.synthesizer) {
534
+ for (const agentId of agents) {
535
+ try {
536
+ const synthResult = this.synthesizer.tick(agentId);
537
+ if (synthResult.topicsSynthesized > 0) {
538
+ console.log(`[indexer] Synthesized ${synthResult.topicsSynthesized} topics for ${agentId}, ${synthResult.knowledgeEntriesWritten} knowledge entries`);
539
+ }
540
+ }
541
+ catch {
542
+ // Non-fatal
543
+ }
544
+ }
545
+ }
546
+ // Knowledge lint — every LINT_FREQUENCY ticks
547
+ this.tickCount++;
548
+ if (this.tickCount % 10 === 0 && this.getLibraryDb) {
549
+ try {
550
+ const libDb = this.getLibraryDb();
551
+ if (libDb) {
552
+ const lint = lintKnowledge(libDb);
553
+ if (lint.staleDecayed > 0 || lint.coverageGaps.length > 0) {
554
+ console.log(`[indexer] Lint: ${lint.staleDecayed} stale decayed, ${lint.orphansFound} orphans, ${lint.coverageGaps.length} coverage gaps`);
555
+ }
556
+ }
557
+ }
558
+ catch {
559
+ // Non-fatal
560
+ }
561
+ }
562
+ // Dreaming promotion pass — every tickInterval ticks (default 12 = ~1hr)
563
+ const dreamerEnabled = this.dreamerConfig.enabled ?? false;
564
+ const dreamerTickInterval = this.dreamerConfig.tickInterval ?? 12;
565
+ if (dreamerEnabled && this.tickCount % dreamerTickInterval === 0 && this.getLibraryDb) {
566
+ try {
567
+ const libDb = this.getLibraryDb();
568
+ if (libDb) {
569
+ const dreamResults = await runDreamingPassForFleet(agents, libDb, this.dreamerConfig);
570
+ const totalPromoted = dreamResults.reduce((s, r) => s + r.promoted, 0);
571
+ if (totalPromoted > 0) {
572
+ console.log(`[indexer] Dreaming: promoted ${totalPromoted} facts across ${dreamResults.length} agents`);
573
+ }
574
+ }
575
+ }
576
+ catch (err) {
577
+ // Non-fatal — dreaming failures never block indexing
578
+ console.warn('[indexer] Dreaming pass failed (non-fatal):', err.message);
579
+ }
580
+ }
581
+ // Run proactive passes on each agent's message DB
582
+ for (const agentId of agents) {
583
+ const messageDb = this.getMessageDb(agentId);
584
+ if (!messageDb)
585
+ continue;
586
+ // Get active conversations for this agent
587
+ let convRows;
588
+ try {
589
+ convRows = messageDb.prepare(`SELECT id FROM conversations WHERE agent_id = ? AND status = 'active' ORDER BY updated_at DESC LIMIT 10`).all(agentId);
590
+ }
591
+ catch {
592
+ continue;
593
+ }
594
+ for (const conv of convRows) {
595
+ const noiseSweepResult = runNoiseSweep(messageDb, conv.id);
596
+ const toolDecayResult = runToolDecay(messageDb, conv.id);
597
+ // Log only if something changed
598
+ if (noiseSweepResult.messagesDeleted > 0 || toolDecayResult.messagesUpdated > 0) {
599
+ console.log(`[indexer] Proactive pass (conv ${conv.id}): swept ${noiseSweepResult.messagesDeleted} noise msgs, ` +
600
+ `decayed ${toolDecayResult.messagesUpdated} tool results (${toolDecayResult.bytesFreed} bytes freed)`);
601
+ }
602
+ }
603
+ }
604
+ }
605
+ finally {
606
+ this.running = false;
607
+ }
608
+ return results;
609
+ }
610
+ /**
611
+ * Process a single agent's unindexed messages.
612
+ *
613
+ * When a cursor fetcher is available, messages are split into two tiers:
614
+ * - Post-cursor (id > cursor.lastSentId): "unseen" by the model, high-signal priority
615
+ * - Pre-cursor (id <= cursor.lastSentId): already in the model's context window, lower priority
616
+ * Post-cursor messages are processed first. This ensures the indexer prioritizes
617
+ * content the model hasn't seen yet — decisions, incidents, and discoveries that
618
+ * happened between context windows.
619
+ */
620
+ async processAgent(agentId, libraryDb) {
621
+ const start = Date.now();
622
+ const messageDb = this.getMessageDb(agentId);
623
+ const messageStore = new MessageStore(messageDb);
624
+ const factStore = new FactStore(libraryDb);
625
+ const episodeStore = new EpisodeStore(libraryDb);
626
+ const topicStore = new TopicStore(libraryDb);
627
+ const knowledgeStore = new KnowledgeStore(libraryDb);
628
+ const temporalStore = new TemporalStore(libraryDb);
629
+ // Get watermark — last processed message ID for this agent
630
+ const watermark = this.getWatermark(libraryDb, agentId);
631
+ const lastProcessedId = watermark?.lastMessageId ?? 0;
632
+ // Fetch unindexed messages (batch size from config)
633
+ const messages = this.getUnindexedMessages(messageDb, agentId, lastProcessedId, this.config.batchSize);
634
+ if (messages.length === 0) {
635
+ // Even with no new messages, run tombstone cleanup in case supersedes
636
+ // were written externally (e.g. via FactStore.markSuperseded()).
637
+ let tombstoned = 0;
638
+ if (this.vectorStore) {
639
+ tombstoned = this.vectorStore.tombstoneSuperseded();
640
+ }
641
+ return {
642
+ agentId,
643
+ messagesProcessed: 0,
644
+ factsExtracted: 0,
645
+ episodesRecorded: 0,
646
+ topicsUpdated: 0,
647
+ knowledgeUpserted: 0,
648
+ tombstoned,
649
+ postCursorMessages: 0,
650
+ elapsedMs: Date.now() - start,
651
+ };
652
+ }
653
+ // ── Cursor-aware prioritization ──────────────────────────────
654
+ // Fetch the cursor boundary to split messages into post-cursor (unseen)
655
+ // and pre-cursor (already in context). Post-cursor messages are processed
656
+ // first — they're the highest signal for fact/episode extraction.
657
+ let cursorBoundary = 0;
658
+ if (this.getCursor) {
659
+ try {
660
+ // Get session key from the first message's conversation
661
+ const sessionKey = this.getSessionKeyForMessage(messageDb, messages[0].conversationId);
662
+ if (sessionKey) {
663
+ const cursor = await this.getCursor(agentId, sessionKey);
664
+ if (cursor) {
665
+ cursorBoundary = cursor.lastSentId;
666
+ }
667
+ }
668
+ }
669
+ catch {
670
+ // Cursor fetch is best-effort — fall through to default ordering
671
+ }
672
+ }
673
+ // Sort: post-cursor messages first (highest signal), then pre-cursor.
674
+ // Within each tier, maintain original (ascending) order.
675
+ const postCursor = messages.filter(m => m.id > cursorBoundary);
676
+ const preCursor = messages.filter(m => m.id <= cursorBoundary);
677
+ const ordered = [...postCursor, ...preCursor];
678
+ let factsExtracted = 0;
679
+ let episodesRecorded = 0;
680
+ let topicsUpdated = 0;
681
+ let knowledgeUpserted = 0;
682
+ let supersededFacts = 0;
683
+ let maxMessageId = lastProcessedId;
684
+ for (const msg of ordered) {
685
+ const content = msg.textContent || '';
686
+ if (msg.id > maxMessageId)
687
+ maxMessageId = msg.id;
688
+ // Skip heartbeats and very short messages
689
+ if (msg.isHeartbeat || content.length < 30)
690
+ continue;
691
+ // 1. Extract facts (TUNE-003: confidence varies by extraction pattern type)
692
+ const factCandidates = extractFactCandidates(content);
693
+ for (const { content: factContent, confidence: factConfidence } of factCandidates) {
694
+ try {
695
+ const fact = factStore.addFact(agentId, factContent, {
696
+ scope: 'agent',
697
+ domain: domainForAgent(agentId),
698
+ confidence: factConfidence,
699
+ sourceType: 'indexer',
700
+ sourceSessionKey: this.getSessionKeyForMessage(messageDb, msg.conversationId),
701
+ sourceRef: `msg:${msg.id}`,
702
+ });
703
+ factsExtracted++;
704
+ // ── Supersedes detection ─────────────────────────────────
705
+ // Check if the newly extracted fact supersedes an existing one.
706
+ // A supersede is detected when an existing active fact shares the
707
+ // same 60-char prefix (same topic, different phrasing/update).
708
+ if (fact.id) {
709
+ // Index into temporal store (ingest_at as proxy, confidence=0.5)
710
+ temporalStore.indexFact(fact.id, agentId, fact.createdAt);
711
+ const oldFactId = factStore.findSupersedableByContent(agentId, factContent);
712
+ if (oldFactId !== null && oldFactId !== fact.id) {
713
+ const didSupersede = factStore.markSuperseded(oldFactId, fact.id);
714
+ if (didSupersede) {
715
+ supersededFacts++;
716
+ // Immediately remove the stale vector so it can't surface in KNN recall
717
+ if (this.vectorStore) {
718
+ this.vectorStore.removeItem('facts', oldFactId);
719
+ }
720
+ }
721
+ }
722
+ }
723
+ // Embed new fact for semantic recall (best-effort, non-blocking)
724
+ if (this.vectorStore && fact.id) {
725
+ this.vectorStore.indexItem('facts', fact.id, factContent, fact.domain || undefined)
726
+ .catch(() => { });
727
+ }
728
+ }
729
+ catch {
730
+ // Duplicate or constraint violation — skip
731
+ }
732
+ }
733
+ // 2. Classify episodes
734
+ const episode = classifyEpisode(msg);
735
+ if (episode && episode.significance >= this.config.episodeSignificanceThreshold) {
736
+ // Secret gate: shared visibility requires clean content.
737
+ // Downgrade to 'private' rather than drop, so we don't lose the episode.
738
+ const episodeVisibility = isSafeForSharedVisibility(episode.summary) ? 'org' : 'private';
739
+ try {
740
+ const recorded = episodeStore.record(agentId, episode.type, episode.summary, {
741
+ significance: episode.significance,
742
+ visibility: episodeVisibility,
743
+ sessionKey: this.getSessionKeyForMessage(messageDb, msg.conversationId),
744
+ sourceMessageId: msg.id,
745
+ });
746
+ episodesRecorded++;
747
+ // Embed episodes at sig>=0.5 (lowered from 0.7 — discovery/config_change events
748
+ // at sig=0.5 are real operational events, not noise).
749
+ if (this.vectorStore && recorded?.id && episode.significance >= 0.5) {
750
+ this.vectorStore.indexItem('episodes', recorded.id, episode.summary, episode.type)
751
+ .catch(() => { });
752
+ }
753
+ }
754
+ catch {
755
+ // Skip duplicate episodes
756
+ }
757
+ }
758
+ // 3. Detect and update topics
759
+ const topicName = detectTopic(content);
760
+ if (topicName) {
761
+ try {
762
+ const existingTopics = topicStore.getActive(agentId, 100);
763
+ const existingTopic = existingTopics.find((t) => t.name.toLowerCase() === topicName.toLowerCase());
764
+ if (!existingTopic) {
765
+ topicStore.create(agentId, topicName, `Auto-detected from conversation`);
766
+ topicsUpdated++;
767
+ }
768
+ }
769
+ catch {
770
+ // Skip topic creation errors
771
+ }
772
+ }
773
+ // 4. Extract knowledge candidates
774
+ const knowledgeCandidates = extractKnowledgeCandidates(content, agentId);
775
+ for (const { domain, key, value } of knowledgeCandidates) {
776
+ try {
777
+ knowledgeStore.upsert(agentId, domain, key, value, {
778
+ sourceType: 'indexer',
779
+ sourceRef: `msg:${msg.id}`,
780
+ });
781
+ knowledgeUpserted++;
782
+ }
783
+ catch {
784
+ // Skip duplicates
785
+ }
786
+ }
787
+ }
788
+ // Update watermark
789
+ this.setWatermark(libraryDb, agentId, maxMessageId);
790
+ // Run tombstone pass: remove vector entries for any facts marked superseded
791
+ // (covers both the supersedes detected above and external markSuperseded calls).
792
+ let tombstoned = 0;
793
+ if (this.vectorStore) {
794
+ tombstoned = this.vectorStore.tombstoneSuperseded();
795
+ }
796
+ return {
797
+ agentId,
798
+ messagesProcessed: messages.length,
799
+ factsExtracted,
800
+ episodesRecorded,
801
+ topicsUpdated,
802
+ knowledgeUpserted,
803
+ tombstoned,
804
+ postCursorMessages: postCursor.length,
805
+ elapsedMs: Date.now() - start,
806
+ };
807
+ }
808
+ /**
809
+ * Fetch unindexed messages for an agent.
810
+ */
811
+ getUnindexedMessages(db, agentId, afterId, limit) {
812
+ const rows = db.prepare(`
813
+ SELECT m.*, c.session_key
814
+ FROM messages m
815
+ JOIN conversations c ON m.conversation_id = c.id
816
+ WHERE m.agent_id = ? AND m.id > ?
817
+ ORDER BY m.id ASC
818
+ LIMIT ?
819
+ `).all(agentId, afterId, limit);
820
+ return rows.map(row => ({
821
+ id: row.id,
822
+ conversationId: row.conversation_id,
823
+ agentId: row.agent_id,
824
+ role: row.role,
825
+ textContent: row.text_content || null,
826
+ toolCalls: row.tool_calls ? JSON.parse(row.tool_calls) : null,
827
+ toolResults: row.tool_results ? JSON.parse(row.tool_results) : null,
828
+ metadata: row.metadata ? JSON.parse(row.metadata) : undefined,
829
+ messageIndex: row.message_index,
830
+ tokenCount: row.token_count || null,
831
+ isHeartbeat: row.is_heartbeat === 1,
832
+ createdAt: row.created_at,
833
+ }));
834
+ }
835
+ /**
836
+ * Get the session key for a conversation ID.
837
+ */
838
+ getSessionKeyForMessage(db, conversationId) {
839
+ const row = db.prepare('SELECT session_key FROM conversations WHERE id = ?').get(conversationId);
840
+ return row?.session_key;
841
+ }
842
+ /**
843
+ * Get the indexing watermark for an agent.
844
+ */
845
+ getWatermark(libraryDb, agentId) {
846
+ // Ensure watermarks table exists
847
+ libraryDb.prepare(`
848
+ CREATE TABLE IF NOT EXISTS indexer_watermarks (
849
+ agent_id TEXT PRIMARY KEY,
850
+ last_message_id INTEGER NOT NULL DEFAULT 0,
851
+ last_run_at TEXT NOT NULL
852
+ )
853
+ `).run();
854
+ const row = libraryDb.prepare('SELECT agent_id, last_message_id, last_run_at FROM indexer_watermarks WHERE agent_id = ?').get(agentId);
855
+ if (!row)
856
+ return null;
857
+ return {
858
+ agentId: row.agent_id,
859
+ lastMessageId: row.last_message_id,
860
+ lastRunAt: row.last_run_at,
861
+ };
862
+ }
863
+ /**
864
+ * Set the indexing watermark for an agent.
865
+ */
866
+ setWatermark(libraryDb, agentId, lastMessageId) {
867
+ const now = new Date().toISOString();
868
+ libraryDb.prepare(`
869
+ INSERT INTO indexer_watermarks (agent_id, last_message_id, last_run_at)
870
+ VALUES (?, ?, ?)
871
+ ON CONFLICT(agent_id) DO UPDATE SET
872
+ last_message_id = excluded.last_message_id,
873
+ last_run_at = excluded.last_run_at
874
+ `).run(agentId, lastMessageId, now);
875
+ }
876
+ /**
877
+ * Apply time-based decay to facts.
878
+ * Increases decay_score for older facts, making them less relevant.
879
+ */
880
+ applyDecay(libraryDb) {
881
+ const rate = this.config.factDecayRate;
882
+ // Decay facts that haven't been referenced recently
883
+ libraryDb.prepare(`
884
+ UPDATE facts
885
+ SET decay_score = MIN(1.0, decay_score + ?)
886
+ WHERE superseded_by IS NULL
887
+ AND decay_score < 1.0
888
+ AND updated_at < datetime('now', '-1 day')
889
+ `).run(rate);
890
+ // Decay episodes older than 7 days
891
+ libraryDb.prepare(`
892
+ UPDATE episodes
893
+ SET decay_score = MIN(1.0, decay_score + ?)
894
+ WHERE decay_score < 1.0
895
+ AND created_at < datetime('now', '-7 days')
896
+ `).run(rate * 0.5);
897
+ // Mark dormant topics
898
+ const dormantThreshold = this.parseDuration(this.config.topicDormantAfter);
899
+ if (dormantThreshold > 0) {
900
+ // Compute threshold timestamp in JS and pass as parameter — avoids SQL template interpolation.
901
+ const dormantBefore = new Date(Date.now() - dormantThreshold * 1000).toISOString();
902
+ libraryDb.prepare(`
903
+ UPDATE topics
904
+ SET status = 'dormant'
905
+ WHERE status = 'active'
906
+ AND updated_at < ?
907
+ `).run(dormantBefore);
908
+ }
909
+ // Close old dormant topics
910
+ const closedThreshold = this.parseDuration(this.config.topicClosedAfter);
911
+ if (closedThreshold > 0) {
912
+ const closedBefore = new Date(Date.now() - closedThreshold * 1000).toISOString();
913
+ libraryDb.prepare(`
914
+ UPDATE topics
915
+ SET status = 'closed'
916
+ WHERE status = 'dormant'
917
+ AND updated_at < ?
918
+ `).run(closedBefore);
919
+ }
920
+ }
921
+ /**
922
+ * Parse a duration string like "24h", "7d" into seconds.
923
+ */
924
+ parseDuration(dur) {
925
+ const match = dur.match(/^(\d+)\s*(h|d|m|s)$/);
926
+ if (!match)
927
+ return 0;
928
+ const val = parseInt(match[1]);
929
+ switch (match[2]) {
930
+ case 's': return val;
931
+ case 'm': return val * 60;
932
+ case 'h': return val * 3600;
933
+ case 'd': return val * 86400;
934
+ default: return 0;
935
+ }
936
+ }
937
+ /**
938
+ * One-time backfill: embed episodes with sig>=0.5 that were missed by the
939
+ * old >=0.7 vectorization threshold.
940
+ *
941
+ * Gated by a system_state flag 'indexer:episode_backfill_v1' so it runs
942
+ * exactly once even across gateway restarts. Safe to re-run manually
943
+ * (delete the flag row first) if re-backfill is ever needed.
944
+ */
945
+ async backfillEpisodeVectors() {
946
+ if (!this.vectorStore || !this.getLibraryDb)
947
+ return;
948
+ const libraryDb = this.getLibraryDb();
949
+ const BACKFILL_FLAG = 'episode_backfill_v1';
950
+ // Ensure system_state table exists (schema may not have been applied yet)
951
+ try {
952
+ libraryDb.prepare(`
953
+ CREATE TABLE IF NOT EXISTS system_state (
954
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
955
+ category TEXT NOT NULL,
956
+ key TEXT NOT NULL,
957
+ value TEXT,
958
+ updated_at TEXT NOT NULL,
959
+ updated_by TEXT,
960
+ ttl TEXT,
961
+ UNIQUE(category, key)
962
+ )
963
+ `).run();
964
+ }
965
+ catch {
966
+ // Table already exists — safe to ignore
967
+ }
968
+ // Check if backfill already completed
969
+ const existing = libraryDb.prepare("SELECT value FROM system_state WHERE category = 'indexer' AND key = ?").get(BACKFILL_FLAG);
970
+ if (existing) {
971
+ // Already done
972
+ return;
973
+ }
974
+ console.log('[indexer] Starting episode vector backfill (sig>=0.5, not yet vectorized)...');
975
+ // Find episodes with sig>=0.5 that have no vec_index_map entry.
976
+ // We join against vec_index_map using a fallback: if the table is in a
977
+ // separate DB (vectors.db), we query it directly via the VectorStore.
978
+ let episodes;
979
+ try {
980
+ episodes = libraryDb.prepare(`
981
+ SELECT id, summary, event_type
982
+ FROM episodes
983
+ WHERE significance >= 0.5
984
+ ORDER BY created_at DESC
985
+ `).all();
986
+ }
987
+ catch {
988
+ console.warn('[indexer] Backfill: could not query episodes table');
989
+ return;
990
+ }
991
+ let queued = 0;
992
+ let skipped = 0;
993
+ for (const ep of episodes) {
994
+ // Check if already vectorized
995
+ if (this.vectorStore.hasItem('episodes', ep.id)) {
996
+ skipped++;
997
+ continue;
998
+ }
999
+ try {
1000
+ await this.vectorStore.indexItem('episodes', ep.id, ep.summary, ep.event_type);
1001
+ queued++;
1002
+ }
1003
+ catch {
1004
+ // Non-fatal — keep going
1005
+ }
1006
+ }
1007
+ // Mark backfill complete
1008
+ const now = new Date().toISOString();
1009
+ libraryDb.prepare(`
1010
+ INSERT INTO system_state (category, key, value, updated_at, updated_by)
1011
+ VALUES ('indexer', ?, ?, ?, 'indexer')
1012
+ ON CONFLICT(category, key) DO UPDATE SET value = excluded.value, updated_at = excluded.updated_at
1013
+ `).run(BACKFILL_FLAG, JSON.stringify({ completedAt: now, queued, skipped }), now);
1014
+ console.log(`[indexer] Episode backfill complete: ${queued} queued, ${skipped} already vectorized`);
1015
+ }
1016
+ /**
1017
+ * Get current watermarks for all agents.
1018
+ */
1019
+ getWatermarks(libraryDb) {
1020
+ try {
1021
+ const rows = libraryDb.prepare('SELECT agent_id, last_message_id, last_run_at FROM indexer_watermarks ORDER BY agent_id').all();
1022
+ return rows.map(r => ({
1023
+ agentId: r.agent_id,
1024
+ lastMessageId: r.last_message_id,
1025
+ lastRunAt: r.last_run_at,
1026
+ }));
1027
+ }
1028
+ catch {
1029
+ return [];
1030
+ }
1031
+ }
1032
+ }
1033
+ // ─── Standalone runner ──────────────────────────────────────────
1034
+ /**
1035
+ * Create and start a background indexer connected to hypermem databases.
1036
+ * Used by the hook or a standalone daemon.
1037
+ */
1038
+ export function createIndexer(getMessageDb, getLibraryDb, listAgents, config, getCursor, vectorStore, dreamerConfig) {
1039
+ const indexer = new BackgroundIndexer(config, getMessageDb, getLibraryDb, listAgents, getCursor, dreamerConfig);
1040
+ if (vectorStore)
1041
+ indexer.setVectorStore(vectorStore);
1042
+ return indexer;
1043
+ }
1044
+ //# sourceMappingURL=background-indexer.js.map