@psiclawops/hypermem 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/background-indexer.d.ts +132 -0
- package/dist/background-indexer.d.ts.map +1 -0
- package/dist/background-indexer.js +1044 -0
- package/dist/cache.d.ts +110 -0
- package/dist/cache.d.ts.map +1 -0
- package/dist/cache.js +495 -0
- package/dist/compaction-fence.d.ts +89 -0
- package/dist/compaction-fence.d.ts.map +1 -0
- package/dist/compaction-fence.js +153 -0
- package/dist/compositor.d.ts +226 -0
- package/dist/compositor.d.ts.map +1 -0
- package/dist/compositor.js +2558 -0
- package/dist/content-type-classifier.d.ts +41 -0
- package/dist/content-type-classifier.d.ts.map +1 -0
- package/dist/content-type-classifier.js +181 -0
- package/dist/cross-agent.d.ts +62 -0
- package/dist/cross-agent.d.ts.map +1 -0
- package/dist/cross-agent.js +259 -0
- package/dist/db.d.ts +131 -0
- package/dist/db.d.ts.map +1 -0
- package/dist/db.js +402 -0
- package/dist/desired-state-store.d.ts +100 -0
- package/dist/desired-state-store.d.ts.map +1 -0
- package/dist/desired-state-store.js +222 -0
- package/dist/doc-chunk-store.d.ts +140 -0
- package/dist/doc-chunk-store.d.ts.map +1 -0
- package/dist/doc-chunk-store.js +391 -0
- package/dist/doc-chunker.d.ts +99 -0
- package/dist/doc-chunker.d.ts.map +1 -0
- package/dist/doc-chunker.js +324 -0
- package/dist/dreaming-promoter.d.ts +86 -0
- package/dist/dreaming-promoter.d.ts.map +1 -0
- package/dist/dreaming-promoter.js +381 -0
- package/dist/episode-store.d.ts +49 -0
- package/dist/episode-store.d.ts.map +1 -0
- package/dist/episode-store.js +135 -0
- package/dist/fact-store.d.ts +75 -0
- package/dist/fact-store.d.ts.map +1 -0
- package/dist/fact-store.js +236 -0
- package/dist/fleet-store.d.ts +144 -0
- package/dist/fleet-store.d.ts.map +1 -0
- package/dist/fleet-store.js +276 -0
- package/dist/fos-mod.d.ts +178 -0
- package/dist/fos-mod.d.ts.map +1 -0
- package/dist/fos-mod.js +416 -0
- package/dist/hybrid-retrieval.d.ts +64 -0
- package/dist/hybrid-retrieval.d.ts.map +1 -0
- package/dist/hybrid-retrieval.js +344 -0
- package/dist/image-eviction.d.ts +49 -0
- package/dist/image-eviction.d.ts.map +1 -0
- package/dist/image-eviction.js +251 -0
- package/dist/index.d.ts +650 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +1072 -0
- package/dist/keystone-scorer.d.ts +51 -0
- package/dist/keystone-scorer.d.ts.map +1 -0
- package/dist/keystone-scorer.js +52 -0
- package/dist/knowledge-graph.d.ts +110 -0
- package/dist/knowledge-graph.d.ts.map +1 -0
- package/dist/knowledge-graph.js +305 -0
- package/dist/knowledge-lint.d.ts +29 -0
- package/dist/knowledge-lint.d.ts.map +1 -0
- package/dist/knowledge-lint.js +116 -0
- package/dist/knowledge-store.d.ts +72 -0
- package/dist/knowledge-store.d.ts.map +1 -0
- package/dist/knowledge-store.js +247 -0
- package/dist/library-schema.d.ts +22 -0
- package/dist/library-schema.d.ts.map +1 -0
- package/dist/library-schema.js +1038 -0
- package/dist/message-store.d.ts +89 -0
- package/dist/message-store.d.ts.map +1 -0
- package/dist/message-store.js +323 -0
- package/dist/metrics-dashboard.d.ts +114 -0
- package/dist/metrics-dashboard.d.ts.map +1 -0
- package/dist/metrics-dashboard.js +260 -0
- package/dist/obsidian-exporter.d.ts +57 -0
- package/dist/obsidian-exporter.d.ts.map +1 -0
- package/dist/obsidian-exporter.js +274 -0
- package/dist/obsidian-watcher.d.ts +147 -0
- package/dist/obsidian-watcher.d.ts.map +1 -0
- package/dist/obsidian-watcher.js +403 -0
- package/dist/open-domain.d.ts +46 -0
- package/dist/open-domain.d.ts.map +1 -0
- package/dist/open-domain.js +125 -0
- package/dist/preference-store.d.ts +54 -0
- package/dist/preference-store.d.ts.map +1 -0
- package/dist/preference-store.js +109 -0
- package/dist/preservation-gate.d.ts +82 -0
- package/dist/preservation-gate.d.ts.map +1 -0
- package/dist/preservation-gate.js +150 -0
- package/dist/proactive-pass.d.ts +63 -0
- package/dist/proactive-pass.d.ts.map +1 -0
- package/dist/proactive-pass.js +239 -0
- package/dist/profiles.d.ts +44 -0
- package/dist/profiles.d.ts.map +1 -0
- package/dist/profiles.js +227 -0
- package/dist/provider-translator.d.ts +50 -0
- package/dist/provider-translator.d.ts.map +1 -0
- package/dist/provider-translator.js +403 -0
- package/dist/rate-limiter.d.ts +76 -0
- package/dist/rate-limiter.d.ts.map +1 -0
- package/dist/rate-limiter.js +179 -0
- package/dist/repair-tool-pairs.d.ts +38 -0
- package/dist/repair-tool-pairs.d.ts.map +1 -0
- package/dist/repair-tool-pairs.js +138 -0
- package/dist/retrieval-policy.d.ts +51 -0
- package/dist/retrieval-policy.d.ts.map +1 -0
- package/dist/retrieval-policy.js +77 -0
- package/dist/schema.d.ts +15 -0
- package/dist/schema.d.ts.map +1 -0
- package/dist/schema.js +229 -0
- package/dist/secret-scanner.d.ts +51 -0
- package/dist/secret-scanner.d.ts.map +1 -0
- package/dist/secret-scanner.js +248 -0
- package/dist/seed.d.ts +108 -0
- package/dist/seed.d.ts.map +1 -0
- package/dist/seed.js +177 -0
- package/dist/session-flusher.d.ts +53 -0
- package/dist/session-flusher.d.ts.map +1 -0
- package/dist/session-flusher.js +69 -0
- package/dist/session-topic-map.d.ts +41 -0
- package/dist/session-topic-map.d.ts.map +1 -0
- package/dist/session-topic-map.js +77 -0
- package/dist/spawn-context.d.ts +54 -0
- package/dist/spawn-context.d.ts.map +1 -0
- package/dist/spawn-context.js +159 -0
- package/dist/system-store.d.ts +73 -0
- package/dist/system-store.d.ts.map +1 -0
- package/dist/system-store.js +182 -0
- package/dist/temporal-store.d.ts +80 -0
- package/dist/temporal-store.d.ts.map +1 -0
- package/dist/temporal-store.js +149 -0
- package/dist/topic-detector.d.ts +35 -0
- package/dist/topic-detector.d.ts.map +1 -0
- package/dist/topic-detector.js +249 -0
- package/dist/topic-store.d.ts +45 -0
- package/dist/topic-store.d.ts.map +1 -0
- package/dist/topic-store.js +136 -0
- package/dist/topic-synthesizer.d.ts +51 -0
- package/dist/topic-synthesizer.d.ts.map +1 -0
- package/dist/topic-synthesizer.js +315 -0
- package/dist/trigger-registry.d.ts +63 -0
- package/dist/trigger-registry.d.ts.map +1 -0
- package/dist/trigger-registry.js +163 -0
- package/dist/types.d.ts +533 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +9 -0
- package/dist/vector-store.d.ts +170 -0
- package/dist/vector-store.d.ts.map +1 -0
- package/dist/vector-store.js +677 -0
- package/dist/version.d.ts +34 -0
- package/dist/version.d.ts.map +1 -0
- package/dist/version.js +34 -0
- package/dist/wiki-page-emitter.d.ts +65 -0
- package/dist/wiki-page-emitter.d.ts.map +1 -0
- package/dist/wiki-page-emitter.js +258 -0
- package/dist/work-store.d.ts +112 -0
- package/dist/work-store.d.ts.map +1 -0
- package/dist/work-store.js +273 -0
- package/package.json +1 -1
|
@@ -0,0 +1,1044 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* hypermem Background Indexer
|
|
3
|
+
*
|
|
4
|
+
* Processes message history to extract structured knowledge:
|
|
5
|
+
* - Facts: atomic pieces of learned information
|
|
6
|
+
* - Episodes: significant events worth remembering
|
|
7
|
+
* - Topics: conversation threads and their lifecycle
|
|
8
|
+
* - Knowledge: durable structured entries (domain + key)
|
|
9
|
+
*
|
|
10
|
+
* Runs as a periodic background task, processing unindexed messages
|
|
11
|
+
* in batches. Each batch is scored, classified, and stored in L4 (library.db).
|
|
12
|
+
*
|
|
13
|
+
* Design principles:
|
|
14
|
+
* - No LLM dependency: extraction uses pattern matching + heuristics
|
|
15
|
+
* - Idempotent: tracks watermarks per agent to avoid reprocessing
|
|
16
|
+
* - Bounded: processes N messages per tick to avoid blocking
|
|
17
|
+
* - Observable: logs extraction stats for monitoring
|
|
18
|
+
*/
|
|
19
|
+
import { lintKnowledge } from './knowledge-lint.js';
|
|
20
|
+
import { MessageStore } from './message-store.js';
|
|
21
|
+
import { runNoiseSweep, runToolDecay } from './proactive-pass.js';
|
|
22
|
+
import { TopicSynthesizer } from './topic-synthesizer.js';
|
|
23
|
+
import { runDreamingPassForFleet } from './dreaming-promoter.js';
|
|
24
|
+
import { FactStore } from './fact-store.js';
|
|
25
|
+
import { EpisodeStore } from './episode-store.js';
|
|
26
|
+
import { TopicStore } from './topic-store.js';
|
|
27
|
+
import { KnowledgeStore } from './knowledge-store.js';
|
|
28
|
+
import { TemporalStore } from './temporal-store.js';
|
|
29
|
+
import { isSafeForSharedVisibility } from './secret-scanner.js';
|
|
30
|
+
// ─── Agent-to-Domain Map ────────────────────────────────────────
|
|
31
|
+
// Maps well-known agent IDs to their primary domain.
|
|
32
|
+
// Used to populate the `domain` column on extracted facts so that
|
|
33
|
+
// domain-scoped retrieval (e.g. getActiveFacts({ domain: 'infrastructure' }))
|
|
34
|
+
// returns results. New agents default to 'general'.
|
|
35
|
+
const AGENT_DOMAIN_MAP = {
|
|
36
|
+
forge: 'infrastructure',
|
|
37
|
+
vigil: 'infrastructure',
|
|
38
|
+
pylon: 'infrastructure',
|
|
39
|
+
plane: 'infrastructure',
|
|
40
|
+
compass: 'product',
|
|
41
|
+
helm: 'product',
|
|
42
|
+
chisel: 'product',
|
|
43
|
+
facet: 'product',
|
|
44
|
+
sentinel: 'security',
|
|
45
|
+
bastion: 'security',
|
|
46
|
+
gauge: 'security',
|
|
47
|
+
clarity: 'ux',
|
|
48
|
+
anvil: 'governance',
|
|
49
|
+
vanguard: 'strategy',
|
|
50
|
+
crucible: 'development',
|
|
51
|
+
relay: 'communications',
|
|
52
|
+
main: 'general',
|
|
53
|
+
'channel-mini': 'general',
|
|
54
|
+
};
|
|
55
|
+
/**
|
|
56
|
+
* Derive a domain label for a fact based on agent ID.
|
|
57
|
+
* Falls back to 'general' for unknown agents.
|
|
58
|
+
*/
|
|
59
|
+
function domainForAgent(agentId) {
|
|
60
|
+
return AGENT_DOMAIN_MAP[agentId] ?? 'general';
|
|
61
|
+
}
|
|
62
|
+
function extractFactCandidates(content) {
|
|
63
|
+
const facts = [];
|
|
64
|
+
if (!content || content.length < 20)
|
|
65
|
+
return facts;
|
|
66
|
+
// Decision patterns: "decided to", "agreed on", "choosing", "going with" — high confidence (0.75)
|
|
67
|
+
const decisionPatterns = [
|
|
68
|
+
/(?:we |I |they )?(?:decided|agreed|chose|selected|committed) (?:to |on |that )(.{20,200})/gi,
|
|
69
|
+
/(?:going|went) with (.{10,150})/gi,
|
|
70
|
+
/decision:\s*(.{10,200})/gi,
|
|
71
|
+
];
|
|
72
|
+
// Learned/discovered patterns — medium-high confidence (0.65)
|
|
73
|
+
const learnedPatterns = [
|
|
74
|
+
/(?:learned|discovered|found out|realized|noticed) (?:that |)(.{20,200})/gi,
|
|
75
|
+
/turns out (?:that |)(.{20,200})/gi,
|
|
76
|
+
/(?:TIL|FYI|note to self)[:\s]+(.{10,200})/gi,
|
|
77
|
+
];
|
|
78
|
+
// Config/setting patterns — medium confidence (0.60); matches more promiscuously
|
|
79
|
+
const configPatterns = [
|
|
80
|
+
/(?:set|changed|updated|configured) (\S+ to .{5,150})/gi,
|
|
81
|
+
/(?:model|config|setting)[:\s]+(\S+\s*(?:→|->|=|is)\s*.{5,100})/gi,
|
|
82
|
+
];
|
|
83
|
+
// Preference patterns — medium confidence (0.60)
|
|
84
|
+
const preferencePatterns = [
|
|
85
|
+
/(?:prefer|always use|never use|don't use|avoid) (.{10,150})/gi,
|
|
86
|
+
/(?:ragesaq|operator) (?:wants|prefers|likes|hates|dislikes) (.{10,150})/gi,
|
|
87
|
+
];
|
|
88
|
+
// Operational patterns: deployments, incidents, fixes — high confidence (0.70)
|
|
89
|
+
const operationalPatterns = [
|
|
90
|
+
/(?:deployed|shipped|released|rolled back|reverted) (.{10,200})/gi,
|
|
91
|
+
/(?:outage|incident|failure|broke|broken|crashed)(?:: | — | - )(.{10,200})/gi,
|
|
92
|
+
/(?:fixed|resolved|patched|hotfixed) (.{10,200})/gi,
|
|
93
|
+
];
|
|
94
|
+
const patternGroups = [
|
|
95
|
+
{ patterns: decisionPatterns, confidence: 0.75 },
|
|
96
|
+
{ patterns: learnedPatterns, confidence: 0.65 },
|
|
97
|
+
{ patterns: configPatterns, confidence: 0.60 },
|
|
98
|
+
{ patterns: preferencePatterns, confidence: 0.60 },
|
|
99
|
+
{ patterns: operationalPatterns, confidence: 0.70 },
|
|
100
|
+
];
|
|
101
|
+
for (const { patterns, confidence } of patternGroups) {
|
|
102
|
+
for (const pattern of patterns) {
|
|
103
|
+
let match;
|
|
104
|
+
// Reset lastIndex for global patterns
|
|
105
|
+
pattern.lastIndex = 0;
|
|
106
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
107
|
+
const candidate = match[1].trim();
|
|
108
|
+
// Quality gate: reject noise that matched patterns but isn't a real fact
|
|
109
|
+
if (!isQualityFact(candidate))
|
|
110
|
+
continue;
|
|
111
|
+
facts.push({ content: candidate, confidence });
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
return facts;
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* TUNE-011: Quality gate for fact extraction.
|
|
119
|
+
* Rejects pattern matches that are code, table fragments, questions,
|
|
120
|
+
* or too short to be meaningful facts.
|
|
121
|
+
*/
|
|
122
|
+
/**
|
|
123
|
+
* Operational boilerplate phrases that appear frequently across sessions
|
|
124
|
+
* but carry zero signal value. High knn similarity makes them *worse*
|
|
125
|
+
* retrieval candidates — they match everything and contaminate episodes.
|
|
126
|
+
*/
|
|
127
|
+
const OPERATIONAL_BOILERPLATE = [
|
|
128
|
+
/timed?\s*out\s*waiting/i,
|
|
129
|
+
/message\s*was\s*delivered/i,
|
|
130
|
+
/no\s*reply\s*(back\s*)?yet/i,
|
|
131
|
+
/picked?\s*it\s*up\s*on\s*(next\s*)?heartbeat/i,
|
|
132
|
+
/session\s*not\s*found/i,
|
|
133
|
+
/\bretrying\b/i,
|
|
134
|
+
/tool\s*call\s*failed/i,
|
|
135
|
+
/exec\s*completed/i,
|
|
136
|
+
/no\s*reply\s*needed/i,
|
|
137
|
+
/still\s*waiting/i,
|
|
138
|
+
/will\s*pick\s*(it\s*)?up\s*(on\s*(next|the))?/i,
|
|
139
|
+
/message\s*is\s*in\s*(his|her|their|the)\s*queue/i,
|
|
140
|
+
/sent\s+to\s+(anvil|compass|clarity|sentinel|vanguard|forge)/i,
|
|
141
|
+
/dispatched\s+(it\s+)?to/i,
|
|
142
|
+
/timed\s*out\s*after/i,
|
|
143
|
+
/\bNO_REPLY\b/,
|
|
144
|
+
];
|
|
145
|
+
function isQualityFact(content) {
|
|
146
|
+
// Too short — sentence fragments
|
|
147
|
+
if (content.length < 40)
|
|
148
|
+
return false;
|
|
149
|
+
// Too long — likely captured a paragraph, not a fact
|
|
150
|
+
if (content.length > 300)
|
|
151
|
+
return false;
|
|
152
|
+
// Fewer than 5 words — fragment
|
|
153
|
+
const wordCount = content.split(/\s+/).filter(w => w.length > 0).length;
|
|
154
|
+
if (wordCount < 5)
|
|
155
|
+
return false;
|
|
156
|
+
// Questions — not assertions of fact
|
|
157
|
+
if (content.trimEnd().endsWith('?'))
|
|
158
|
+
return false;
|
|
159
|
+
// Code indicators: braces, arrows, imports, variable declarations
|
|
160
|
+
if (/^[\s{}\[\]|`]/.test(content))
|
|
161
|
+
return false; // starts with structural char
|
|
162
|
+
if (/[{}].*[{}]/.test(content))
|
|
163
|
+
return false; // contains paired braces (code blocks)
|
|
164
|
+
if (/^\s*(import|export|const|let|var|function|class|interface|type|return|if|for|while|switch)\s/i.test(content))
|
|
165
|
+
return false;
|
|
166
|
+
if (/=>\s*[{(]/.test(content))
|
|
167
|
+
return false; // arrow functions
|
|
168
|
+
if (/SELECT\s|INSERT\s|UPDATE\s|DELETE\s|CREATE\s/i.test(content))
|
|
169
|
+
return false; // SQL
|
|
170
|
+
// Table cell fragments: contains pipe-delimited cells
|
|
171
|
+
if (/\|.*\|.*\|/.test(content))
|
|
172
|
+
return false;
|
|
173
|
+
// Regex patterns leaked from source
|
|
174
|
+
if (/\/[^/]+\/[gimsuvy]*[,;]/.test(content))
|
|
175
|
+
return false;
|
|
176
|
+
// Raw file paths without context (tool output, not facts)
|
|
177
|
+
if (/^\/[\w/.-]+$/.test(content.trim()))
|
|
178
|
+
return false;
|
|
179
|
+
// Markdown formatting artifacts
|
|
180
|
+
if (content.startsWith('```') || content.startsWith('---') || content.startsWith('==='))
|
|
181
|
+
return false;
|
|
182
|
+
// Git output
|
|
183
|
+
if (/^[a-f0-9]{7,40}\s/.test(content) || /^\+\+\+|^---\s[ab]\//.test(content))
|
|
184
|
+
return false;
|
|
185
|
+
if (/^\d+ files? changed/.test(content))
|
|
186
|
+
return false;
|
|
187
|
+
// Stack traces
|
|
188
|
+
if (/^\s*at\s+\S+\s+\(/.test(content) || /node:internal/.test(content))
|
|
189
|
+
return false;
|
|
190
|
+
// High non-alpha ratio indicates code/data, not natural language
|
|
191
|
+
const alphaChars = (content.match(/[a-zA-Z]/g) || []).length;
|
|
192
|
+
if (alphaChars / content.length < 0.5)
|
|
193
|
+
return false;
|
|
194
|
+
// TUNE-013: External/untrusted content markers — web search excerpts,
|
|
195
|
+
// external doc pulls, and injected context blocks should never become facts.
|
|
196
|
+
if (/<<<\s*(END_EXTERNAL|BEGIN_EXTERNAL|EXTERNAL_UNTRUSTED|UNTRUSTED_CONTENT)/i.test(content))
|
|
197
|
+
return false;
|
|
198
|
+
if (/EXTERNAL_UNTRUSTED_CONTENT\s+id=/.test(content))
|
|
199
|
+
return false;
|
|
200
|
+
// TUNE-013: Multi-paragraph content — real extracted facts are single sentences.
|
|
201
|
+
// More than 2 newlines means we captured a paragraph or structured block, not a fact.
|
|
202
|
+
const newlineCount = (content.match(/\n/g) || []).length;
|
|
203
|
+
if (newlineCount > 2)
|
|
204
|
+
return false;
|
|
205
|
+
// TUNE-013: URL-heavy content — external source snippets, not actionable facts
|
|
206
|
+
const urlMatches = content.match(/https?:\/\/\S+/g) || [];
|
|
207
|
+
if (urlMatches.length >= 2)
|
|
208
|
+
return false; // one URL in a fact is ok; multiple = source snippet
|
|
209
|
+
// TUNE-013: Content starting with a markdown heading is section text, not a fact
|
|
210
|
+
if (/^#{1,4}\s/.test(content.trim()))
|
|
211
|
+
return false;
|
|
212
|
+
// TUNE-014: Operational boilerplate — phrases common across sessions that produce
|
|
213
|
+
// high knn similarity scores but carry zero signal. They cross-contaminate episodes.
|
|
214
|
+
for (const pattern of OPERATIONAL_BOILERPLATE) {
|
|
215
|
+
if (pattern.test(content))
|
|
216
|
+
return false;
|
|
217
|
+
}
|
|
218
|
+
return true;
|
|
219
|
+
}
|
|
220
|
+
/**
|
|
221
|
+
* Classify a message for episode significance.
|
|
222
|
+
* Returns episode type and significance score, or null if not significant.
|
|
223
|
+
*/
|
|
224
|
+
function classifyEpisode(msg) {
|
|
225
|
+
const content = msg.textContent || '';
|
|
226
|
+
if (!content || content.length < 50)
|
|
227
|
+
return null; // Raised from 30
|
|
228
|
+
// Skip heartbeats
|
|
229
|
+
if (msg.isHeartbeat)
|
|
230
|
+
return null;
|
|
231
|
+
// Skip messages that are primarily code/data output (tool results, logs)
|
|
232
|
+
const alphaRatio = (content.match(/[a-zA-Z]/g) || []).length / content.length;
|
|
233
|
+
if (alphaRatio < 0.4)
|
|
234
|
+
return null;
|
|
235
|
+
// Skip messages that start with structural output indicators
|
|
236
|
+
if (/^[\s]*[{[\d|#=+\-]/.test(content) && content.length < 200)
|
|
237
|
+
return null;
|
|
238
|
+
const lower = content.toLowerCase();
|
|
239
|
+
// ── Negation-aware incident detection ──────────────────────
|
|
240
|
+
// Only trigger on actual incidents, not "zero failures" or "no crashes"
|
|
241
|
+
const incidentTerms = ['outage', 'incident', 'failure', 'crash', 'broke', 'broken', 'emergency'];
|
|
242
|
+
const negationPrefixes = ['no ', 'zero ', 'without ', '0 ', 'never ', 'fixed ', 'resolved '];
|
|
243
|
+
const hasIncidentTerm = incidentTerms.some(term => lower.includes(term));
|
|
244
|
+
const isNegated = hasIncidentTerm && incidentTerms.some(term => {
|
|
245
|
+
const idx = lower.indexOf(term);
|
|
246
|
+
if (idx < 0)
|
|
247
|
+
return false;
|
|
248
|
+
const prefix = lower.substring(Math.max(0, idx - 15), idx).toLowerCase();
|
|
249
|
+
return negationPrefixes.some(neg => prefix.includes(neg.trimEnd()));
|
|
250
|
+
});
|
|
251
|
+
if (hasIncidentTerm && !isNegated && content.length > 100) {
|
|
252
|
+
// Genuine incident — verify it's describing a problem, not analyzing code
|
|
253
|
+
if (!/^\s*(\/\/|#|\*|\/\*|```|import|const|function)/.test(content)) {
|
|
254
|
+
const summary = content.slice(0, 200);
|
|
255
|
+
return { type: 'incident', significance: 0.9, summary };
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
// Deployment events (high significance)
|
|
259
|
+
if (/(?:deployed|shipped|released|went live|now live|go live)/i.test(content) &&
|
|
260
|
+
content.length > 60) {
|
|
261
|
+
const summary = content.slice(0, 200);
|
|
262
|
+
return { type: 'deployment', significance: 0.8, summary };
|
|
263
|
+
}
|
|
264
|
+
// Architecture decisions (high significance)
|
|
265
|
+
if (/(?:decided on|chose|committed to|architecture|design decision)/i.test(content) &&
|
|
266
|
+
content.length > 80) {
|
|
267
|
+
const summary = content.slice(0, 200);
|
|
268
|
+
return { type: 'decision', significance: 0.7, summary };
|
|
269
|
+
}
|
|
270
|
+
// Discovery/insight (medium significance)
|
|
271
|
+
if (/(?:discovered|found|realized|root cause|turns out)/i.test(content) && content.length > 80) {
|
|
272
|
+
const summary = content.slice(0, 200);
|
|
273
|
+
return { type: 'discovery', significance: 0.5, summary };
|
|
274
|
+
}
|
|
275
|
+
// Config changes (medium significance) — TUNE-004: raised to 0.5
|
|
276
|
+
if (/(?:changed|updated|migrated|switched|model.*(?:→|->|to))/i.test(content) && content.length > 60) {
|
|
277
|
+
// Skip if it's just a tool output confirmation
|
|
278
|
+
if (/^Successfully replaced|^\[main [a-f0-9]|^ok \d+ -/.test(content))
|
|
279
|
+
return null;
|
|
280
|
+
const summary = content.slice(0, 200);
|
|
281
|
+
return { type: 'config_change', significance: 0.5, summary };
|
|
282
|
+
}
|
|
283
|
+
// Milestone/completion (medium significance)
|
|
284
|
+
if (/(?:completed|finished|done|milestone|all tests pass|all green)/i.test(content) &&
|
|
285
|
+
content.length > 60) {
|
|
286
|
+
// Skip tool output that happens to contain "done"
|
|
287
|
+
if (/^Successfully|^\[main|^ok \d+/.test(content))
|
|
288
|
+
return null;
|
|
289
|
+
const summary = content.slice(0, 200);
|
|
290
|
+
return { type: 'milestone', significance: 0.5, summary };
|
|
291
|
+
}
|
|
292
|
+
return null;
|
|
293
|
+
}
|
|
294
|
+
/**
|
|
295
|
+
* Extract knowledge candidates — structured (domain, key, value) tuples.
|
|
296
|
+
*/
|
|
297
|
+
function extractKnowledgeCandidates(content, agentId) {
|
|
298
|
+
const results = [];
|
|
299
|
+
if (!content || content.length < 30)
|
|
300
|
+
return results;
|
|
301
|
+
// TUNE-012: Broadened path extraction.
|
|
302
|
+
// Real messages use paths inline without explicit prefixes like "located at".
|
|
303
|
+
// Match any absolute path that's at least 3 segments deep (filters /tmp, /etc noise).
|
|
304
|
+
const pathMatches = content.matchAll(/(?:`([/][\w./-]{10,})`|(?:^|[\s:=])(\/home\/[\w./-]{10,}|\/opt\/[\w./-]{10,}|\/var\/[\w./-]{10,}))/gm);
|
|
305
|
+
for (const match of pathMatches) {
|
|
306
|
+
const value = (match[1] || match[2]).replace(/[`'".,;:)]+$/, '').trim();
|
|
307
|
+
if (value.length > 10 && value.split('/').length >= 4) {
|
|
308
|
+
const segments = value.split('/').filter(s => s.length > 0);
|
|
309
|
+
const lastSeg = segments[segments.length - 1] || '';
|
|
310
|
+
// Reject truncated paths (last segment < 3 chars unless it's a known ext)
|
|
311
|
+
if (lastSeg.length < 3 && !lastSeg.includes('.'))
|
|
312
|
+
continue;
|
|
313
|
+
const key = lastSeg || segments[segments.length - 2] || 'unknown';
|
|
314
|
+
results.push({ domain: 'paths', key, value });
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
// Explicit location references (original patterns, kept for completeness)
|
|
318
|
+
const locationPatterns = [
|
|
319
|
+
/(?:path|located at|lives at|stored at|found at|repo at|running at)[:\s]+(`[^`]+`|\/\S+)/gi,
|
|
320
|
+
/(?:workspace|directory|repo|project)[:\s]+(`[^`]+`|\/\S+)/gi,
|
|
321
|
+
];
|
|
322
|
+
for (const pattern of locationPatterns) {
|
|
323
|
+
pattern.lastIndex = 0;
|
|
324
|
+
let match;
|
|
325
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
326
|
+
const value = match[1].replace(/[`'".,;:)]+/g, '').trim();
|
|
327
|
+
if (value.startsWith('/') && value.length > 10 && !results.some(r => r.value === value)) {
|
|
328
|
+
const key = value.split('/').pop() || 'unknown';
|
|
329
|
+
results.push({ domain: 'paths', key, value });
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
// Service/port patterns — broadened to catch "port NNNN" and "on :NNNN"
|
|
334
|
+
const servicePatterns = [
|
|
335
|
+
/(\S+)\s+(?:runs on|listening on|port|on port)\s+(\d{2,5})/gi,
|
|
336
|
+
/(?:service|server|daemon)\s+(\S+)\s+(?:on |at |: )(\S+)/gi,
|
|
337
|
+
/(?:localhost|127\.0\.0\.1):(\d{2,5})\b/gi,
|
|
338
|
+
];
|
|
339
|
+
for (const pattern of servicePatterns) {
|
|
340
|
+
pattern.lastIndex = 0;
|
|
341
|
+
let match;
|
|
342
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
343
|
+
if (pattern.source.includes('localhost')) {
|
|
344
|
+
// localhost:PORT pattern — key is the port, value is the URL
|
|
345
|
+
results.push({ domain: 'services', key: `port:${match[1]}`, value: match[0] });
|
|
346
|
+
}
|
|
347
|
+
else {
|
|
348
|
+
results.push({ domain: 'services', key: match[1], value: match[2] });
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
// Agent identity patterns — broadened
|
|
353
|
+
const identityPatterns = [
|
|
354
|
+
/(\w+)\s+(?:is|was)\s+(?:the\s+)?(\w+)\s+(?:seat|director|specialist|council)/gi,
|
|
355
|
+
/(\w+)\s+(?:reports to|owned by|managed by)\s+(\w+)/gi,
|
|
356
|
+
/(?:agents?|directors?|seats?)[:\s]+(\w+)(?:\s*[,/]\s*(\w+))+/gi,
|
|
357
|
+
];
|
|
358
|
+
for (const pattern of identityPatterns) {
|
|
359
|
+
pattern.lastIndex = 0;
|
|
360
|
+
let match;
|
|
361
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
362
|
+
if (match[2]) {
|
|
363
|
+
results.push({ domain: 'fleet', key: match[1].toLowerCase(), value: `${match[1]} ${match[2]}` });
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
// Dedup by domain+key
|
|
368
|
+
const seen = new Set();
|
|
369
|
+
return results.filter(r => {
|
|
370
|
+
const k = `${r.domain}:${r.key}`;
|
|
371
|
+
if (seen.has(k))
|
|
372
|
+
return false;
|
|
373
|
+
seen.add(k);
|
|
374
|
+
return true;
|
|
375
|
+
});
|
|
376
|
+
}
|
|
377
|
+
/**
|
|
378
|
+
* Detect conversation topic from message content.
|
|
379
|
+
* Returns a topic name candidate or null.
|
|
380
|
+
*/
|
|
381
|
+
function detectTopic(content) {
|
|
382
|
+
if (!content || content.length < 50)
|
|
383
|
+
return null;
|
|
384
|
+
// Product/project name detection
|
|
385
|
+
const productMatch = content.match(/\b(HyperMem|ClawText|ClawDash|ClawCanvas|ClawCouncil|ClawTomation|OpenClaw|ClawDispatch)\b/i);
|
|
386
|
+
if (productMatch)
|
|
387
|
+
return productMatch[1];
|
|
388
|
+
// Infrastructure topic detection
|
|
389
|
+
if (/\b(?:redis|sqlite|database|migration|deployment|docker|nginx)\b/i.test(content)) {
|
|
390
|
+
return 'infrastructure';
|
|
391
|
+
}
|
|
392
|
+
// Security topic detection
|
|
393
|
+
if (/\b(?:security|auth|permission|access|token|credential)\b/i.test(content)) {
|
|
394
|
+
return 'security';
|
|
395
|
+
}
|
|
396
|
+
return null;
|
|
397
|
+
}
|
|
398
|
+
// ─── Background Indexer ─────────────────────────────────────────
|
|
399
|
+
export class BackgroundIndexer {
|
|
400
|
+
getMessageDb;
|
|
401
|
+
getLibraryDb;
|
|
402
|
+
listAgents;
|
|
403
|
+
getCursor;
|
|
404
|
+
config;
|
|
405
|
+
dreamerConfig;
|
|
406
|
+
intervalHandle = null;
|
|
407
|
+
running = false;
|
|
408
|
+
vectorStore = null;
|
|
409
|
+
synthesizer = null;
|
|
410
|
+
tickCount = 0;
|
|
411
|
+
constructor(config, getMessageDb, getLibraryDb, listAgents, getCursor, dreamerConfig) {
|
|
412
|
+
this.getMessageDb = getMessageDb;
|
|
413
|
+
this.getLibraryDb = getLibraryDb;
|
|
414
|
+
this.listAgents = listAgents;
|
|
415
|
+
this.getCursor = getCursor;
|
|
416
|
+
// Initialize synthesizer if libraryDb accessor is available
|
|
417
|
+
if (getLibraryDb) {
|
|
418
|
+
const libDb = getLibraryDb();
|
|
419
|
+
if (libDb) {
|
|
420
|
+
this.synthesizer = new TopicSynthesizer(libDb, (agentId) => {
|
|
421
|
+
if (!getMessageDb)
|
|
422
|
+
return null;
|
|
423
|
+
try {
|
|
424
|
+
return getMessageDb(agentId);
|
|
425
|
+
}
|
|
426
|
+
catch {
|
|
427
|
+
return null;
|
|
428
|
+
}
|
|
429
|
+
});
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
this.config = {
|
|
433
|
+
enabled: config?.enabled ?? true,
|
|
434
|
+
factExtractionMode: config?.factExtractionMode ?? 'tiered',
|
|
435
|
+
topicDormantAfter: config?.topicDormantAfter ?? '24h',
|
|
436
|
+
topicClosedAfter: config?.topicClosedAfter ?? '7d',
|
|
437
|
+
factDecayRate: config?.factDecayRate ?? 0.01,
|
|
438
|
+
episodeSignificanceThreshold: config?.episodeSignificanceThreshold ?? 0.5,
|
|
439
|
+
periodicInterval: config?.periodicInterval ?? 60000, // 1 minute
|
|
440
|
+
batchSize: config?.batchSize ?? 128,
|
|
441
|
+
maxMessagesPerTick: config?.maxMessagesPerTick ?? 500,
|
|
442
|
+
};
|
|
443
|
+
this.dreamerConfig = dreamerConfig ?? {};
|
|
444
|
+
}
|
|
445
|
+
/**
|
|
446
|
+
* Set the vector store for embedding new facts/episodes at index time.
|
|
447
|
+
* Optional — if not set, indexer runs without embedding (FTS5-only mode).
|
|
448
|
+
*/
|
|
449
|
+
setVectorStore(vs) {
|
|
450
|
+
this.vectorStore = vs;
|
|
451
|
+
}
|
|
452
|
+
/**
|
|
453
|
+
* Start periodic indexing.
|
|
454
|
+
*/
|
|
455
|
+
start() {
|
|
456
|
+
if (!this.config.enabled)
|
|
457
|
+
return;
|
|
458
|
+
if (this.intervalHandle)
|
|
459
|
+
return;
|
|
460
|
+
// Run once immediately
|
|
461
|
+
this.tick().catch(err => {
|
|
462
|
+
console.error('[indexer] Initial tick failed:', err);
|
|
463
|
+
});
|
|
464
|
+
// Run episode vector backfill once at startup (no-op if already done)
|
|
465
|
+
if (this.vectorStore && this.getLibraryDb) {
|
|
466
|
+
this.backfillEpisodeVectors().catch(err => {
|
|
467
|
+
console.error('[indexer] Episode backfill failed:', err);
|
|
468
|
+
});
|
|
469
|
+
}
|
|
470
|
+
// Then periodically
|
|
471
|
+
this.intervalHandle = setInterval(() => {
|
|
472
|
+
this.tick().catch(err => {
|
|
473
|
+
console.error('[indexer] Periodic tick failed:', err);
|
|
474
|
+
});
|
|
475
|
+
}, this.config.periodicInterval);
|
|
476
|
+
console.log(`[indexer] Started with interval ${this.config.periodicInterval}ms, batchSize ${this.config.batchSize}, maxPerTick ${this.config.maxMessagesPerTick}`);
|
|
477
|
+
}
|
|
478
|
+
/**
|
|
479
|
+
* Stop periodic indexing.
|
|
480
|
+
*/
|
|
481
|
+
stop() {
|
|
482
|
+
if (this.intervalHandle) {
|
|
483
|
+
clearInterval(this.intervalHandle);
|
|
484
|
+
this.intervalHandle = null;
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
/**
|
|
488
|
+
* Run one indexing pass across all agents.
|
|
489
|
+
*/
|
|
490
|
+
async tick() {
|
|
491
|
+
if (this.running) {
|
|
492
|
+
console.log('[indexer] Skipping tick — previous run still active');
|
|
493
|
+
return [];
|
|
494
|
+
}
|
|
495
|
+
this.running = true;
|
|
496
|
+
const results = [];
|
|
497
|
+
try {
|
|
498
|
+
if (!this.listAgents || !this.getMessageDb || !this.getLibraryDb) {
|
|
499
|
+
console.warn('[indexer] Missing database accessors — skipping');
|
|
500
|
+
return [];
|
|
501
|
+
}
|
|
502
|
+
const agents = this.listAgents();
|
|
503
|
+
const libraryDb = this.getLibraryDb();
|
|
504
|
+
let tickTotal = 0;
|
|
505
|
+
for (const agentId of agents) {
|
|
506
|
+
if (tickTotal >= this.config.maxMessagesPerTick) {
|
|
507
|
+
console.log(`[indexer] maxMessagesPerTick (${this.config.maxMessagesPerTick}) reached — deferring remaining agents`);
|
|
508
|
+
break;
|
|
509
|
+
}
|
|
510
|
+
try {
|
|
511
|
+
const stats = await this.processAgent(agentId, libraryDb);
|
|
512
|
+
tickTotal += stats.messagesProcessed;
|
|
513
|
+
if (stats.messagesProcessed > 0 || stats.tombstoned > 0) {
|
|
514
|
+
results.push(stats);
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
catch (err) {
|
|
518
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
519
|
+
console.error(`[indexer] Failed to process ${agentId}: ${msg}`);
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
if (results.length > 0) {
|
|
523
|
+
const totalMessages = results.reduce((s, r) => s + r.messagesProcessed, 0);
|
|
524
|
+
const totalFacts = results.reduce((s, r) => s + r.factsExtracted, 0);
|
|
525
|
+
const totalEpisodes = results.reduce((s, r) => s + r.episodesRecorded, 0);
|
|
526
|
+
const totalTombstoned = results.reduce((s, r) => s + r.tombstoned, 0);
|
|
527
|
+
const tombstonedPart = totalTombstoned > 0 ? `, ${totalTombstoned} tombstoned` : '';
|
|
528
|
+
console.log(`[indexer] Tick complete: ${totalMessages} messages → ${totalFacts} facts, ${totalEpisodes} episodes${tombstonedPart}`);
|
|
529
|
+
}
|
|
530
|
+
// Run decay on every tick
|
|
531
|
+
this.applyDecay(libraryDb);
|
|
532
|
+
// Topic synthesis — run for each agent after main indexer tick
|
|
533
|
+
if (this.synthesizer) {
|
|
534
|
+
for (const agentId of agents) {
|
|
535
|
+
try {
|
|
536
|
+
const synthResult = this.synthesizer.tick(agentId);
|
|
537
|
+
if (synthResult.topicsSynthesized > 0) {
|
|
538
|
+
console.log(`[indexer] Synthesized ${synthResult.topicsSynthesized} topics for ${agentId}, ${synthResult.knowledgeEntriesWritten} knowledge entries`);
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
catch {
|
|
542
|
+
// Non-fatal
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
// Knowledge lint — every LINT_FREQUENCY ticks
|
|
547
|
+
this.tickCount++;
|
|
548
|
+
if (this.tickCount % 10 === 0 && this.getLibraryDb) {
|
|
549
|
+
try {
|
|
550
|
+
const libDb = this.getLibraryDb();
|
|
551
|
+
if (libDb) {
|
|
552
|
+
const lint = lintKnowledge(libDb);
|
|
553
|
+
if (lint.staleDecayed > 0 || lint.coverageGaps.length > 0) {
|
|
554
|
+
console.log(`[indexer] Lint: ${lint.staleDecayed} stale decayed, ${lint.orphansFound} orphans, ${lint.coverageGaps.length} coverage gaps`);
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
catch {
|
|
559
|
+
// Non-fatal
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
// Dreaming promotion pass — every tickInterval ticks (default 12 = ~1hr)
|
|
563
|
+
const dreamerEnabled = this.dreamerConfig.enabled ?? false;
|
|
564
|
+
const dreamerTickInterval = this.dreamerConfig.tickInterval ?? 12;
|
|
565
|
+
if (dreamerEnabled && this.tickCount % dreamerTickInterval === 0 && this.getLibraryDb) {
|
|
566
|
+
try {
|
|
567
|
+
const libDb = this.getLibraryDb();
|
|
568
|
+
if (libDb) {
|
|
569
|
+
const dreamResults = await runDreamingPassForFleet(agents, libDb, this.dreamerConfig);
|
|
570
|
+
const totalPromoted = dreamResults.reduce((s, r) => s + r.promoted, 0);
|
|
571
|
+
if (totalPromoted > 0) {
|
|
572
|
+
console.log(`[indexer] Dreaming: promoted ${totalPromoted} facts across ${dreamResults.length} agents`);
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
catch (err) {
|
|
577
|
+
// Non-fatal — dreaming failures never block indexing
|
|
578
|
+
console.warn('[indexer] Dreaming pass failed (non-fatal):', err.message);
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
// Run proactive passes on each agent's message DB
|
|
582
|
+
for (const agentId of agents) {
|
|
583
|
+
const messageDb = this.getMessageDb(agentId);
|
|
584
|
+
if (!messageDb)
|
|
585
|
+
continue;
|
|
586
|
+
// Get active conversations for this agent
|
|
587
|
+
let convRows;
|
|
588
|
+
try {
|
|
589
|
+
convRows = messageDb.prepare(`SELECT id FROM conversations WHERE agent_id = ? AND status = 'active' ORDER BY updated_at DESC LIMIT 10`).all(agentId);
|
|
590
|
+
}
|
|
591
|
+
catch {
|
|
592
|
+
continue;
|
|
593
|
+
}
|
|
594
|
+
for (const conv of convRows) {
|
|
595
|
+
const noiseSweepResult = runNoiseSweep(messageDb, conv.id);
|
|
596
|
+
const toolDecayResult = runToolDecay(messageDb, conv.id);
|
|
597
|
+
// Log only if something changed
|
|
598
|
+
if (noiseSweepResult.messagesDeleted > 0 || toolDecayResult.messagesUpdated > 0) {
|
|
599
|
+
console.log(`[indexer] Proactive pass (conv ${conv.id}): swept ${noiseSweepResult.messagesDeleted} noise msgs, ` +
|
|
600
|
+
`decayed ${toolDecayResult.messagesUpdated} tool results (${toolDecayResult.bytesFreed} bytes freed)`);
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
finally {
|
|
606
|
+
this.running = false;
|
|
607
|
+
}
|
|
608
|
+
return results;
|
|
609
|
+
}
|
|
610
|
+
/**
|
|
611
|
+
* Process a single agent's unindexed messages.
|
|
612
|
+
*
|
|
613
|
+
* When a cursor fetcher is available, messages are split into two tiers:
|
|
614
|
+
* - Post-cursor (id > cursor.lastSentId): "unseen" by the model, high-signal priority
|
|
615
|
+
* - Pre-cursor (id <= cursor.lastSentId): already in the model's context window, lower priority
|
|
616
|
+
* Post-cursor messages are processed first. This ensures the indexer prioritizes
|
|
617
|
+
* content the model hasn't seen yet — decisions, incidents, and discoveries that
|
|
618
|
+
* happened between context windows.
|
|
619
|
+
*/
|
|
620
|
+
async processAgent(agentId, libraryDb) {
|
|
621
|
+
const start = Date.now();
|
|
622
|
+
const messageDb = this.getMessageDb(agentId);
|
|
623
|
+
const messageStore = new MessageStore(messageDb);
|
|
624
|
+
const factStore = new FactStore(libraryDb);
|
|
625
|
+
const episodeStore = new EpisodeStore(libraryDb);
|
|
626
|
+
const topicStore = new TopicStore(libraryDb);
|
|
627
|
+
const knowledgeStore = new KnowledgeStore(libraryDb);
|
|
628
|
+
const temporalStore = new TemporalStore(libraryDb);
|
|
629
|
+
// Get watermark — last processed message ID for this agent
|
|
630
|
+
const watermark = this.getWatermark(libraryDb, agentId);
|
|
631
|
+
const lastProcessedId = watermark?.lastMessageId ?? 0;
|
|
632
|
+
// Fetch unindexed messages (batch size from config)
|
|
633
|
+
const messages = this.getUnindexedMessages(messageDb, agentId, lastProcessedId, this.config.batchSize);
|
|
634
|
+
if (messages.length === 0) {
|
|
635
|
+
// Even with no new messages, run tombstone cleanup in case supersedes
|
|
636
|
+
// were written externally (e.g. via FactStore.markSuperseded()).
|
|
637
|
+
let tombstoned = 0;
|
|
638
|
+
if (this.vectorStore) {
|
|
639
|
+
tombstoned = this.vectorStore.tombstoneSuperseded();
|
|
640
|
+
}
|
|
641
|
+
return {
|
|
642
|
+
agentId,
|
|
643
|
+
messagesProcessed: 0,
|
|
644
|
+
factsExtracted: 0,
|
|
645
|
+
episodesRecorded: 0,
|
|
646
|
+
topicsUpdated: 0,
|
|
647
|
+
knowledgeUpserted: 0,
|
|
648
|
+
tombstoned,
|
|
649
|
+
postCursorMessages: 0,
|
|
650
|
+
elapsedMs: Date.now() - start,
|
|
651
|
+
};
|
|
652
|
+
}
|
|
653
|
+
// ── Cursor-aware prioritization ──────────────────────────────
|
|
654
|
+
// Fetch the cursor boundary to split messages into post-cursor (unseen)
|
|
655
|
+
// and pre-cursor (already in context). Post-cursor messages are processed
|
|
656
|
+
// first — they're the highest signal for fact/episode extraction.
|
|
657
|
+
let cursorBoundary = 0;
|
|
658
|
+
if (this.getCursor) {
|
|
659
|
+
try {
|
|
660
|
+
// Get session key from the first message's conversation
|
|
661
|
+
const sessionKey = this.getSessionKeyForMessage(messageDb, messages[0].conversationId);
|
|
662
|
+
if (sessionKey) {
|
|
663
|
+
const cursor = await this.getCursor(agentId, sessionKey);
|
|
664
|
+
if (cursor) {
|
|
665
|
+
cursorBoundary = cursor.lastSentId;
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
catch {
|
|
670
|
+
// Cursor fetch is best-effort — fall through to default ordering
|
|
671
|
+
}
|
|
672
|
+
}
|
|
673
|
+
// Sort: post-cursor messages first (highest signal), then pre-cursor.
|
|
674
|
+
// Within each tier, maintain original (ascending) order.
|
|
675
|
+
const postCursor = messages.filter(m => m.id > cursorBoundary);
|
|
676
|
+
const preCursor = messages.filter(m => m.id <= cursorBoundary);
|
|
677
|
+
const ordered = [...postCursor, ...preCursor];
|
|
678
|
+
let factsExtracted = 0;
|
|
679
|
+
let episodesRecorded = 0;
|
|
680
|
+
let topicsUpdated = 0;
|
|
681
|
+
let knowledgeUpserted = 0;
|
|
682
|
+
let supersededFacts = 0;
|
|
683
|
+
let maxMessageId = lastProcessedId;
|
|
684
|
+
for (const msg of ordered) {
|
|
685
|
+
const content = msg.textContent || '';
|
|
686
|
+
if (msg.id > maxMessageId)
|
|
687
|
+
maxMessageId = msg.id;
|
|
688
|
+
// Skip heartbeats and very short messages
|
|
689
|
+
if (msg.isHeartbeat || content.length < 30)
|
|
690
|
+
continue;
|
|
691
|
+
// 1. Extract facts (TUNE-003: confidence varies by extraction pattern type)
|
|
692
|
+
const factCandidates = extractFactCandidates(content);
|
|
693
|
+
for (const { content: factContent, confidence: factConfidence } of factCandidates) {
|
|
694
|
+
try {
|
|
695
|
+
const fact = factStore.addFact(agentId, factContent, {
|
|
696
|
+
scope: 'agent',
|
|
697
|
+
domain: domainForAgent(agentId),
|
|
698
|
+
confidence: factConfidence,
|
|
699
|
+
sourceType: 'indexer',
|
|
700
|
+
sourceSessionKey: this.getSessionKeyForMessage(messageDb, msg.conversationId),
|
|
701
|
+
sourceRef: `msg:${msg.id}`,
|
|
702
|
+
});
|
|
703
|
+
factsExtracted++;
|
|
704
|
+
// ── Supersedes detection ─────────────────────────────────
|
|
705
|
+
// Check if the newly extracted fact supersedes an existing one.
|
|
706
|
+
// A supersede is detected when an existing active fact shares the
|
|
707
|
+
// same 60-char prefix (same topic, different phrasing/update).
|
|
708
|
+
if (fact.id) {
|
|
709
|
+
// Index into temporal store (ingest_at as proxy, confidence=0.5)
|
|
710
|
+
temporalStore.indexFact(fact.id, agentId, fact.createdAt);
|
|
711
|
+
const oldFactId = factStore.findSupersedableByContent(agentId, factContent);
|
|
712
|
+
if (oldFactId !== null && oldFactId !== fact.id) {
|
|
713
|
+
const didSupersede = factStore.markSuperseded(oldFactId, fact.id);
|
|
714
|
+
if (didSupersede) {
|
|
715
|
+
supersededFacts++;
|
|
716
|
+
// Immediately remove the stale vector so it can't surface in KNN recall
|
|
717
|
+
if (this.vectorStore) {
|
|
718
|
+
this.vectorStore.removeItem('facts', oldFactId);
|
|
719
|
+
}
|
|
720
|
+
}
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
// Embed new fact for semantic recall (best-effort, non-blocking)
|
|
724
|
+
if (this.vectorStore && fact.id) {
|
|
725
|
+
this.vectorStore.indexItem('facts', fact.id, factContent, fact.domain || undefined)
|
|
726
|
+
.catch(() => { });
|
|
727
|
+
}
|
|
728
|
+
}
|
|
729
|
+
catch {
|
|
730
|
+
// Duplicate or constraint violation — skip
|
|
731
|
+
}
|
|
732
|
+
}
|
|
733
|
+
// 2. Classify episodes
|
|
734
|
+
const episode = classifyEpisode(msg);
|
|
735
|
+
if (episode && episode.significance >= this.config.episodeSignificanceThreshold) {
|
|
736
|
+
// Secret gate: shared visibility requires clean content.
|
|
737
|
+
// Downgrade to 'private' rather than drop, so we don't lose the episode.
|
|
738
|
+
const episodeVisibility = isSafeForSharedVisibility(episode.summary) ? 'org' : 'private';
|
|
739
|
+
try {
|
|
740
|
+
const recorded = episodeStore.record(agentId, episode.type, episode.summary, {
|
|
741
|
+
significance: episode.significance,
|
|
742
|
+
visibility: episodeVisibility,
|
|
743
|
+
sessionKey: this.getSessionKeyForMessage(messageDb, msg.conversationId),
|
|
744
|
+
sourceMessageId: msg.id,
|
|
745
|
+
});
|
|
746
|
+
episodesRecorded++;
|
|
747
|
+
// Embed episodes at sig>=0.5 (lowered from 0.7 — discovery/config_change events
|
|
748
|
+
// at sig=0.5 are real operational events, not noise).
|
|
749
|
+
if (this.vectorStore && recorded?.id && episode.significance >= 0.5) {
|
|
750
|
+
this.vectorStore.indexItem('episodes', recorded.id, episode.summary, episode.type)
|
|
751
|
+
.catch(() => { });
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
catch {
|
|
755
|
+
// Skip duplicate episodes
|
|
756
|
+
}
|
|
757
|
+
}
|
|
758
|
+
// 3. Detect and update topics
|
|
759
|
+
const topicName = detectTopic(content);
|
|
760
|
+
if (topicName) {
|
|
761
|
+
try {
|
|
762
|
+
const existingTopics = topicStore.getActive(agentId, 100);
|
|
763
|
+
const existingTopic = existingTopics.find((t) => t.name.toLowerCase() === topicName.toLowerCase());
|
|
764
|
+
if (!existingTopic) {
|
|
765
|
+
topicStore.create(agentId, topicName, `Auto-detected from conversation`);
|
|
766
|
+
topicsUpdated++;
|
|
767
|
+
}
|
|
768
|
+
}
|
|
769
|
+
catch {
|
|
770
|
+
// Skip topic creation errors
|
|
771
|
+
}
|
|
772
|
+
}
|
|
773
|
+
// 4. Extract knowledge candidates
|
|
774
|
+
const knowledgeCandidates = extractKnowledgeCandidates(content, agentId);
|
|
775
|
+
for (const { domain, key, value } of knowledgeCandidates) {
|
|
776
|
+
try {
|
|
777
|
+
knowledgeStore.upsert(agentId, domain, key, value, {
|
|
778
|
+
sourceType: 'indexer',
|
|
779
|
+
sourceRef: `msg:${msg.id}`,
|
|
780
|
+
});
|
|
781
|
+
knowledgeUpserted++;
|
|
782
|
+
}
|
|
783
|
+
catch {
|
|
784
|
+
// Skip duplicates
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
}
|
|
788
|
+
// Update watermark
|
|
789
|
+
this.setWatermark(libraryDb, agentId, maxMessageId);
|
|
790
|
+
// Run tombstone pass: remove vector entries for any facts marked superseded
|
|
791
|
+
// (covers both the supersedes detected above and external markSuperseded calls).
|
|
792
|
+
let tombstoned = 0;
|
|
793
|
+
if (this.vectorStore) {
|
|
794
|
+
tombstoned = this.vectorStore.tombstoneSuperseded();
|
|
795
|
+
}
|
|
796
|
+
return {
|
|
797
|
+
agentId,
|
|
798
|
+
messagesProcessed: messages.length,
|
|
799
|
+
factsExtracted,
|
|
800
|
+
episodesRecorded,
|
|
801
|
+
topicsUpdated,
|
|
802
|
+
knowledgeUpserted,
|
|
803
|
+
tombstoned,
|
|
804
|
+
postCursorMessages: postCursor.length,
|
|
805
|
+
elapsedMs: Date.now() - start,
|
|
806
|
+
};
|
|
807
|
+
}
|
|
808
|
+
/**
|
|
809
|
+
* Fetch unindexed messages for an agent.
|
|
810
|
+
*/
|
|
811
|
+
getUnindexedMessages(db, agentId, afterId, limit) {
|
|
812
|
+
const rows = db.prepare(`
|
|
813
|
+
SELECT m.*, c.session_key
|
|
814
|
+
FROM messages m
|
|
815
|
+
JOIN conversations c ON m.conversation_id = c.id
|
|
816
|
+
WHERE m.agent_id = ? AND m.id > ?
|
|
817
|
+
ORDER BY m.id ASC
|
|
818
|
+
LIMIT ?
|
|
819
|
+
`).all(agentId, afterId, limit);
|
|
820
|
+
return rows.map(row => ({
|
|
821
|
+
id: row.id,
|
|
822
|
+
conversationId: row.conversation_id,
|
|
823
|
+
agentId: row.agent_id,
|
|
824
|
+
role: row.role,
|
|
825
|
+
textContent: row.text_content || null,
|
|
826
|
+
toolCalls: row.tool_calls ? JSON.parse(row.tool_calls) : null,
|
|
827
|
+
toolResults: row.tool_results ? JSON.parse(row.tool_results) : null,
|
|
828
|
+
metadata: row.metadata ? JSON.parse(row.metadata) : undefined,
|
|
829
|
+
messageIndex: row.message_index,
|
|
830
|
+
tokenCount: row.token_count || null,
|
|
831
|
+
isHeartbeat: row.is_heartbeat === 1,
|
|
832
|
+
createdAt: row.created_at,
|
|
833
|
+
}));
|
|
834
|
+
}
|
|
835
|
+
/**
|
|
836
|
+
* Get the session key for a conversation ID.
|
|
837
|
+
*/
|
|
838
|
+
getSessionKeyForMessage(db, conversationId) {
|
|
839
|
+
const row = db.prepare('SELECT session_key FROM conversations WHERE id = ?').get(conversationId);
|
|
840
|
+
return row?.session_key;
|
|
841
|
+
}
|
|
842
|
+
/**
|
|
843
|
+
* Get the indexing watermark for an agent.
|
|
844
|
+
*/
|
|
845
|
+
getWatermark(libraryDb, agentId) {
|
|
846
|
+
// Ensure watermarks table exists
|
|
847
|
+
libraryDb.prepare(`
|
|
848
|
+
CREATE TABLE IF NOT EXISTS indexer_watermarks (
|
|
849
|
+
agent_id TEXT PRIMARY KEY,
|
|
850
|
+
last_message_id INTEGER NOT NULL DEFAULT 0,
|
|
851
|
+
last_run_at TEXT NOT NULL
|
|
852
|
+
)
|
|
853
|
+
`).run();
|
|
854
|
+
const row = libraryDb.prepare('SELECT agent_id, last_message_id, last_run_at FROM indexer_watermarks WHERE agent_id = ?').get(agentId);
|
|
855
|
+
if (!row)
|
|
856
|
+
return null;
|
|
857
|
+
return {
|
|
858
|
+
agentId: row.agent_id,
|
|
859
|
+
lastMessageId: row.last_message_id,
|
|
860
|
+
lastRunAt: row.last_run_at,
|
|
861
|
+
};
|
|
862
|
+
}
|
|
863
|
+
/**
|
|
864
|
+
* Set the indexing watermark for an agent.
|
|
865
|
+
*/
|
|
866
|
+
setWatermark(libraryDb, agentId, lastMessageId) {
|
|
867
|
+
const now = new Date().toISOString();
|
|
868
|
+
libraryDb.prepare(`
|
|
869
|
+
INSERT INTO indexer_watermarks (agent_id, last_message_id, last_run_at)
|
|
870
|
+
VALUES (?, ?, ?)
|
|
871
|
+
ON CONFLICT(agent_id) DO UPDATE SET
|
|
872
|
+
last_message_id = excluded.last_message_id,
|
|
873
|
+
last_run_at = excluded.last_run_at
|
|
874
|
+
`).run(agentId, lastMessageId, now);
|
|
875
|
+
}
|
|
876
|
+
/**
|
|
877
|
+
* Apply time-based decay to facts.
|
|
878
|
+
* Increases decay_score for older facts, making them less relevant.
|
|
879
|
+
*/
|
|
880
|
+
applyDecay(libraryDb) {
|
|
881
|
+
const rate = this.config.factDecayRate;
|
|
882
|
+
// Decay facts that haven't been referenced recently
|
|
883
|
+
libraryDb.prepare(`
|
|
884
|
+
UPDATE facts
|
|
885
|
+
SET decay_score = MIN(1.0, decay_score + ?)
|
|
886
|
+
WHERE superseded_by IS NULL
|
|
887
|
+
AND decay_score < 1.0
|
|
888
|
+
AND updated_at < datetime('now', '-1 day')
|
|
889
|
+
`).run(rate);
|
|
890
|
+
// Decay episodes older than 7 days
|
|
891
|
+
libraryDb.prepare(`
|
|
892
|
+
UPDATE episodes
|
|
893
|
+
SET decay_score = MIN(1.0, decay_score + ?)
|
|
894
|
+
WHERE decay_score < 1.0
|
|
895
|
+
AND created_at < datetime('now', '-7 days')
|
|
896
|
+
`).run(rate * 0.5);
|
|
897
|
+
// Mark dormant topics
|
|
898
|
+
const dormantThreshold = this.parseDuration(this.config.topicDormantAfter);
|
|
899
|
+
if (dormantThreshold > 0) {
|
|
900
|
+
// Compute threshold timestamp in JS and pass as parameter — avoids SQL template interpolation.
|
|
901
|
+
const dormantBefore = new Date(Date.now() - dormantThreshold * 1000).toISOString();
|
|
902
|
+
libraryDb.prepare(`
|
|
903
|
+
UPDATE topics
|
|
904
|
+
SET status = 'dormant'
|
|
905
|
+
WHERE status = 'active'
|
|
906
|
+
AND updated_at < ?
|
|
907
|
+
`).run(dormantBefore);
|
|
908
|
+
}
|
|
909
|
+
// Close old dormant topics
|
|
910
|
+
const closedThreshold = this.parseDuration(this.config.topicClosedAfter);
|
|
911
|
+
if (closedThreshold > 0) {
|
|
912
|
+
const closedBefore = new Date(Date.now() - closedThreshold * 1000).toISOString();
|
|
913
|
+
libraryDb.prepare(`
|
|
914
|
+
UPDATE topics
|
|
915
|
+
SET status = 'closed'
|
|
916
|
+
WHERE status = 'dormant'
|
|
917
|
+
AND updated_at < ?
|
|
918
|
+
`).run(closedBefore);
|
|
919
|
+
}
|
|
920
|
+
}
|
|
921
|
+
/**
|
|
922
|
+
* Parse a duration string like "24h", "7d" into seconds.
|
|
923
|
+
*/
|
|
924
|
+
parseDuration(dur) {
|
|
925
|
+
const match = dur.match(/^(\d+)\s*(h|d|m|s)$/);
|
|
926
|
+
if (!match)
|
|
927
|
+
return 0;
|
|
928
|
+
const val = parseInt(match[1]);
|
|
929
|
+
switch (match[2]) {
|
|
930
|
+
case 's': return val;
|
|
931
|
+
case 'm': return val * 60;
|
|
932
|
+
case 'h': return val * 3600;
|
|
933
|
+
case 'd': return val * 86400;
|
|
934
|
+
default: return 0;
|
|
935
|
+
}
|
|
936
|
+
}
|
|
937
|
+
/**
|
|
938
|
+
* One-time backfill: embed episodes with sig>=0.5 that were missed by the
|
|
939
|
+
* old >=0.7 vectorization threshold.
|
|
940
|
+
*
|
|
941
|
+
* Gated by a system_state flag 'indexer:episode_backfill_v1' so it runs
|
|
942
|
+
* exactly once even across gateway restarts. Safe to re-run manually
|
|
943
|
+
* (delete the flag row first) if re-backfill is ever needed.
|
|
944
|
+
*/
|
|
945
|
+
async backfillEpisodeVectors() {
|
|
946
|
+
if (!this.vectorStore || !this.getLibraryDb)
|
|
947
|
+
return;
|
|
948
|
+
const libraryDb = this.getLibraryDb();
|
|
949
|
+
const BACKFILL_FLAG = 'episode_backfill_v1';
|
|
950
|
+
// Ensure system_state table exists (schema may not have been applied yet)
|
|
951
|
+
try {
|
|
952
|
+
libraryDb.prepare(`
|
|
953
|
+
CREATE TABLE IF NOT EXISTS system_state (
|
|
954
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
955
|
+
category TEXT NOT NULL,
|
|
956
|
+
key TEXT NOT NULL,
|
|
957
|
+
value TEXT,
|
|
958
|
+
updated_at TEXT NOT NULL,
|
|
959
|
+
updated_by TEXT,
|
|
960
|
+
ttl TEXT,
|
|
961
|
+
UNIQUE(category, key)
|
|
962
|
+
)
|
|
963
|
+
`).run();
|
|
964
|
+
}
|
|
965
|
+
catch {
|
|
966
|
+
// Table already exists — safe to ignore
|
|
967
|
+
}
|
|
968
|
+
// Check if backfill already completed
|
|
969
|
+
const existing = libraryDb.prepare("SELECT value FROM system_state WHERE category = 'indexer' AND key = ?").get(BACKFILL_FLAG);
|
|
970
|
+
if (existing) {
|
|
971
|
+
// Already done
|
|
972
|
+
return;
|
|
973
|
+
}
|
|
974
|
+
console.log('[indexer] Starting episode vector backfill (sig>=0.5, not yet vectorized)...');
|
|
975
|
+
// Find episodes with sig>=0.5 that have no vec_index_map entry.
|
|
976
|
+
// We join against vec_index_map using a fallback: if the table is in a
|
|
977
|
+
// separate DB (vectors.db), we query it directly via the VectorStore.
|
|
978
|
+
let episodes;
|
|
979
|
+
try {
|
|
980
|
+
episodes = libraryDb.prepare(`
|
|
981
|
+
SELECT id, summary, event_type
|
|
982
|
+
FROM episodes
|
|
983
|
+
WHERE significance >= 0.5
|
|
984
|
+
ORDER BY created_at DESC
|
|
985
|
+
`).all();
|
|
986
|
+
}
|
|
987
|
+
catch {
|
|
988
|
+
console.warn('[indexer] Backfill: could not query episodes table');
|
|
989
|
+
return;
|
|
990
|
+
}
|
|
991
|
+
let queued = 0;
|
|
992
|
+
let skipped = 0;
|
|
993
|
+
for (const ep of episodes) {
|
|
994
|
+
// Check if already vectorized
|
|
995
|
+
if (this.vectorStore.hasItem('episodes', ep.id)) {
|
|
996
|
+
skipped++;
|
|
997
|
+
continue;
|
|
998
|
+
}
|
|
999
|
+
try {
|
|
1000
|
+
await this.vectorStore.indexItem('episodes', ep.id, ep.summary, ep.event_type);
|
|
1001
|
+
queued++;
|
|
1002
|
+
}
|
|
1003
|
+
catch {
|
|
1004
|
+
// Non-fatal — keep going
|
|
1005
|
+
}
|
|
1006
|
+
}
|
|
1007
|
+
// Mark backfill complete
|
|
1008
|
+
const now = new Date().toISOString();
|
|
1009
|
+
libraryDb.prepare(`
|
|
1010
|
+
INSERT INTO system_state (category, key, value, updated_at, updated_by)
|
|
1011
|
+
VALUES ('indexer', ?, ?, ?, 'indexer')
|
|
1012
|
+
ON CONFLICT(category, key) DO UPDATE SET value = excluded.value, updated_at = excluded.updated_at
|
|
1013
|
+
`).run(BACKFILL_FLAG, JSON.stringify({ completedAt: now, queued, skipped }), now);
|
|
1014
|
+
console.log(`[indexer] Episode backfill complete: ${queued} queued, ${skipped} already vectorized`);
|
|
1015
|
+
}
|
|
1016
|
+
/**
|
|
1017
|
+
* Get current watermarks for all agents.
|
|
1018
|
+
*/
|
|
1019
|
+
getWatermarks(libraryDb) {
|
|
1020
|
+
try {
|
|
1021
|
+
const rows = libraryDb.prepare('SELECT agent_id, last_message_id, last_run_at FROM indexer_watermarks ORDER BY agent_id').all();
|
|
1022
|
+
return rows.map(r => ({
|
|
1023
|
+
agentId: r.agent_id,
|
|
1024
|
+
lastMessageId: r.last_message_id,
|
|
1025
|
+
lastRunAt: r.last_run_at,
|
|
1026
|
+
}));
|
|
1027
|
+
}
|
|
1028
|
+
catch {
|
|
1029
|
+
return [];
|
|
1030
|
+
}
|
|
1031
|
+
}
|
|
1032
|
+
}
|
|
1033
|
+
// ─── Standalone runner ──────────────────────────────────────────
|
|
1034
|
+
/**
|
|
1035
|
+
* Create and start a background indexer connected to hypermem databases.
|
|
1036
|
+
* Used by the hook or a standalone daemon.
|
|
1037
|
+
*/
|
|
1038
|
+
export function createIndexer(getMessageDb, getLibraryDb, listAgents, config, getCursor, vectorStore, dreamerConfig) {
|
|
1039
|
+
const indexer = new BackgroundIndexer(config, getMessageDb, getLibraryDb, listAgents, getCursor, dreamerConfig);
|
|
1040
|
+
if (vectorStore)
|
|
1041
|
+
indexer.setVectorStore(vectorStore);
|
|
1042
|
+
return indexer;
|
|
1043
|
+
}
|
|
1044
|
+
//# sourceMappingURL=background-indexer.js.map
|