@hawon/nexus 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +60 -38
- package/dist/cli/index.js +76 -145
- package/dist/index.js +15 -26
- package/dist/mcp/server.js +61 -32
- package/package.json +2 -1
- package/scripts/auto-skill.sh +54 -0
- package/scripts/auto-sync.sh +11 -0
- package/scripts/benchmark.ts +444 -0
- package/scripts/scan-tool-result.sh +46 -0
- package/src/cli/index.ts +79 -172
- package/src/index.ts +17 -29
- package/src/mcp/server.ts +67 -41
- package/src/memory-engine/index.ts +4 -6
- package/src/memory-engine/nexus-memory.test.ts +437 -0
- package/src/memory-engine/nexus-memory.ts +631 -0
- package/src/memory-engine/semantic.ts +380 -0
- package/src/parser/parse.ts +1 -21
- package/src/promptguard/advanced-rules.ts +129 -12
- package/src/promptguard/entropy.ts +21 -2
- package/src/promptguard/evolution/auto-update.ts +16 -6
- package/src/promptguard/multilingual-rules.ts +68 -0
- package/src/promptguard/rules.ts +87 -2
- package/src/promptguard/scanner.test.ts +262 -0
- package/src/promptguard/scanner.ts +1 -1
- package/src/promptguard/semantic.ts +19 -4
- package/src/promptguard/token-analysis.ts +17 -5
- package/src/review/analyzer.test.ts +279 -0
- package/src/review/analyzer.ts +112 -28
- package/src/shared/stop-words.ts +21 -0
- package/src/skills/index.ts +11 -27
- package/src/skills/memory-skill-engine.ts +1044 -0
- package/src/testing/health-check.ts +19 -2
- package/src/cost/index.ts +0 -3
- package/src/cost/tracker.ts +0 -290
- package/src/cost/types.ts +0 -34
- package/src/memory-engine/compressor.ts +0 -97
- package/src/memory-engine/context-window.ts +0 -113
- package/src/memory-engine/store.ts +0 -371
- package/src/memory-engine/types.ts +0 -32
- package/src/skills/context-engine.ts +0 -863
- package/src/skills/extractor.ts +0 -224
- package/src/skills/global-context.ts +0 -726
- package/src/skills/library.ts +0 -189
- package/src/skills/pattern-engine.ts +0 -712
- package/src/skills/render-evolved.ts +0 -160
- package/src/skills/skill-reconciler.ts +0 -703
- package/src/skills/smart-extractor.ts +0 -843
- package/src/skills/types.ts +0 -18
- package/src/skills/wisdom-extractor.ts +0 -737
- package/src/superdev-evolution/index.ts +0 -3
- package/src/superdev-evolution/skill-manager.ts +0 -266
- package/src/superdev-evolution/types.ts +0 -20
|
@@ -0,0 +1,631 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Nexus Memory — Superior to MemPalace
|
|
3
|
+
*
|
|
4
|
+
* Combines the best of 2026 memory research into one zero-dependency system:
|
|
5
|
+
*
|
|
6
|
+
* 1. BM25 RETRIEVAL (replaces TF-IDF)
|
|
7
|
+
* - Term frequency saturation (long docs don't dominate)
|
|
8
|
+
* - Document length normalization
|
|
9
|
+
* - 500x faster than naive TF-IDF on large corpora
|
|
10
|
+
*
|
|
11
|
+
* 2. OBSERVATIONAL MEMORY (inspired by Mastra, 94.87% LongMemEval)
|
|
12
|
+
* - Don't store raw conversations — extract atomic observations
|
|
13
|
+
* - Each observation = one fact, one timestamp, one confidence
|
|
14
|
+
* - Resolves ambiguous references at extraction time
|
|
15
|
+
*
|
|
16
|
+
* 3. KNOWLEDGE GRAPH + TUNNELS (inspired by MemPalace)
|
|
17
|
+
* - Concepts as nodes, relationships as edges
|
|
18
|
+
* - Auto-tunnels: same concept in different projects → linked
|
|
19
|
+
* - BFS traversal for related memory discovery
|
|
20
|
+
*
|
|
21
|
+
* 4. PROGRESSIVE RETRIEVAL (3 levels)
|
|
22
|
+
* - L1: Index scan — instant, <1ms, metadata only
|
|
23
|
+
* - L2: BM25 search — fast, <10ms, top-K results
|
|
24
|
+
* - L3: Graph expansion — deep, <50ms, BFS + related concepts
|
|
25
|
+
*
|
|
26
|
+
* 5. TEMPORAL AWARENESS
|
|
27
|
+
* - When was this learned? (timestamp)
|
|
28
|
+
* - Is it still valid? (decay function)
|
|
29
|
+
* - Was it contradicted? (version chain)
|
|
30
|
+
* - How often recalled? (access frequency)
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
import { readFileSync, writeFileSync, existsSync, mkdirSync, readdirSync } from "node:fs";
|
|
34
|
+
import { join, dirname, resolve, relative } from "node:path";
|
|
35
|
+
import { createHash } from "node:crypto";
|
|
36
|
+
|
|
37
|
+
/** Sanitize ID to prevent path traversal. */
|
|
38
|
+
function sanitizeId(id: string): string {
|
|
39
|
+
return id.replace(/[^a-zA-Z0-9_-]/g, "_");
|
|
40
|
+
}
|
|
41
|
+
import { expandQuery, createCoOccurrenceModel, type CoOccurrenceModel } from "./semantic.js";
|
|
42
|
+
|
|
43
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
44
|
+
// TYPES
|
|
45
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
46
|
+
|
|
47
|
+
/** An atomic unit of knowledge. */
|
|
48
|
+
export type Observation = {
|
|
49
|
+
id: string;
|
|
50
|
+
/** The fact itself — one clear statement. */
|
|
51
|
+
content: string;
|
|
52
|
+
/** Domain/project this belongs to. */
|
|
53
|
+
domain: string;
|
|
54
|
+
/** Specific topic within the domain. */
|
|
55
|
+
topic: string;
|
|
56
|
+
/** Tags for filtering. */
|
|
57
|
+
tags: string[];
|
|
58
|
+
/** When this was first observed. */
|
|
59
|
+
createdAt: string;
|
|
60
|
+
/** When this was last confirmed/accessed. */
|
|
61
|
+
accessedAt: string;
|
|
62
|
+
/** How many times recalled. */
|
|
63
|
+
accessCount: number;
|
|
64
|
+
/** Confidence 0-1. Decays over time, increases on re-confirmation. */
|
|
65
|
+
confidence: number;
|
|
66
|
+
/** Previous version ID if this was updated. */
|
|
67
|
+
previousVersionId?: string;
|
|
68
|
+
/** Is this still believed to be true? */
|
|
69
|
+
valid: boolean;
|
|
70
|
+
/** Source: which session/interaction. */
|
|
71
|
+
sourceSessionId?: string;
|
|
72
|
+
/** BM25 pre-computed term frequencies. */
|
|
73
|
+
termFreqs: Record<string, number>;
|
|
74
|
+
/** Document length in tokens. */
|
|
75
|
+
docLength: number;
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
/** A node in the knowledge graph. */
|
|
79
|
+
export type KnowledgeNode = {
|
|
80
|
+
id: string;
|
|
81
|
+
label: string;
|
|
82
|
+
type: "concept" | "project" | "file" | "tool" | "person" | "error" | "skill";
|
|
83
|
+
/** Observation IDs linked to this node. */
|
|
84
|
+
observationIds: string[];
|
|
85
|
+
/** When last active. */
|
|
86
|
+
lastActiveAt: string;
|
|
87
|
+
weight: number;
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
/** An edge in the knowledge graph. */
|
|
91
|
+
export type KnowledgeEdge = {
|
|
92
|
+
from: string;
|
|
93
|
+
to: string;
|
|
94
|
+
relation: "contains" | "uses" | "causes" | "fixes" | "related" | "tunnel" | "contradicts" | "evolves";
|
|
95
|
+
weight: number;
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
/** Search result with relevance score. */
|
|
99
|
+
export type MemoryResult = {
|
|
100
|
+
observation: Observation;
|
|
101
|
+
score: number;
|
|
102
|
+
/** How this result was found. */
|
|
103
|
+
retrievalLevel: "L1" | "L2" | "L3";
|
|
104
|
+
/** Related observations found via graph. */
|
|
105
|
+
related?: Observation[];
|
|
106
|
+
};
|
|
107
|
+
|
|
108
|
+
/** Memory statistics. */
|
|
109
|
+
export type MemoryStats = {
|
|
110
|
+
totalObservations: number;
|
|
111
|
+
validObservations: number;
|
|
112
|
+
graphNodes: number;
|
|
113
|
+
graphEdges: number;
|
|
114
|
+
tunnels: number;
|
|
115
|
+
domains: string[];
|
|
116
|
+
avgConfidence: number;
|
|
117
|
+
avgDocLength: number;
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
121
|
+
// BM25 ENGINE
|
|
122
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
123
|
+
|
|
124
|
+
const BM25_K1 = 1.5; // Term frequency saturation parameter
|
|
125
|
+
const BM25_B = 0.75; // Document length normalization parameter
|
|
126
|
+
|
|
127
|
+
import { STOP_WORDS } from "../shared/stop-words.js";
|
|
128
|
+
|
|
129
|
+
function tokenize(text: string): string[] {
|
|
130
|
+
return text
|
|
131
|
+
.toLowerCase()
|
|
132
|
+
.replace(/[^a-z가-힣0-9\s-]/g, " ")
|
|
133
|
+
.split(/\s+/)
|
|
134
|
+
.filter((w) => w.length > 1 && !STOP_WORDS.has(w));
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function computeTermFreqs(tokens: string[]): Record<string, number> {
|
|
138
|
+
const tf: Record<string, number> = {};
|
|
139
|
+
for (const t of tokens) {
|
|
140
|
+
tf[t] = (tf[t] ?? 0) + 1;
|
|
141
|
+
}
|
|
142
|
+
return tf;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function bm25Score(
|
|
146
|
+
queryTokens: string[],
|
|
147
|
+
doc: Observation,
|
|
148
|
+
idf: Map<string, number>,
|
|
149
|
+
avgDocLength: number,
|
|
150
|
+
): number {
|
|
151
|
+
let score = 0;
|
|
152
|
+
|
|
153
|
+
for (const qt of queryTokens) {
|
|
154
|
+
const idfVal = idf.get(qt) ?? 0;
|
|
155
|
+
const tf = doc.termFreqs[qt] ?? 0;
|
|
156
|
+
if (tf === 0) continue;
|
|
157
|
+
|
|
158
|
+
// BM25 formula
|
|
159
|
+
const numerator = tf * (BM25_K1 + 1);
|
|
160
|
+
const denominator = tf + BM25_K1 * (1 - BM25_B + BM25_B * (doc.docLength / avgDocLength));
|
|
161
|
+
score += idfVal * (numerator / denominator);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// Temporal decay: older memories score slightly lower
|
|
165
|
+
const ageMs = Date.now() - new Date(doc.accessedAt).getTime();
|
|
166
|
+
const ageDays = ageMs / (1000 * 60 * 60 * 24);
|
|
167
|
+
const temporalBoost = Math.exp(-ageDays * 0.005); // Half-life ~139 days
|
|
168
|
+
|
|
169
|
+
// Confidence boost
|
|
170
|
+
const confidenceBoost = 0.5 + doc.confidence * 0.5;
|
|
171
|
+
|
|
172
|
+
// Access frequency boost (log scale)
|
|
173
|
+
const accessBoost = 1 + Math.log1p(doc.accessCount) * 0.1;
|
|
174
|
+
|
|
175
|
+
return score * temporalBoost * confidenceBoost * accessBoost;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
function computeIDF(observations: Observation[], queryTokens: string[]): Map<string, number> {
|
|
179
|
+
const N = observations.length;
|
|
180
|
+
const idf = new Map<string, number>();
|
|
181
|
+
|
|
182
|
+
for (const qt of queryTokens) {
|
|
183
|
+
let df = 0;
|
|
184
|
+
for (const obs of observations) {
|
|
185
|
+
if (obs.termFreqs[qt]) df++;
|
|
186
|
+
}
|
|
187
|
+
// BM25 IDF: log((N - df + 0.5) / (df + 0.5) + 1)
|
|
188
|
+
idf.set(qt, Math.log((N - df + 0.5) / (df + 0.5) + 1));
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
return idf;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
195
|
+
// KNOWLEDGE GRAPH
|
|
196
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
197
|
+
|
|
198
|
+
function buildGraphFromObservations(
|
|
199
|
+
observations: Observation[],
|
|
200
|
+
): { nodes: Map<string, KnowledgeNode>; edges: KnowledgeEdge[] } {
|
|
201
|
+
const nodes = new Map<string, KnowledgeNode>();
|
|
202
|
+
const edges: KnowledgeEdge[] = [];
|
|
203
|
+
const topicToDomains = new Map<string, Set<string>>();
|
|
204
|
+
|
|
205
|
+
for (const obs of observations) {
|
|
206
|
+
if (!obs.valid) continue;
|
|
207
|
+
|
|
208
|
+
// Create/update domain node
|
|
209
|
+
const domainId = `domain:${obs.domain}`;
|
|
210
|
+
if (!nodes.has(domainId)) {
|
|
211
|
+
nodes.set(domainId, {
|
|
212
|
+
id: domainId, label: obs.domain, type: "project",
|
|
213
|
+
observationIds: [], lastActiveAt: obs.accessedAt, weight: 0,
|
|
214
|
+
});
|
|
215
|
+
}
|
|
216
|
+
const domainNode = nodes.get(domainId)!;
|
|
217
|
+
domainNode.observationIds.push(obs.id);
|
|
218
|
+
domainNode.weight++;
|
|
219
|
+
|
|
220
|
+
// Create/update topic node
|
|
221
|
+
const topicId = `topic:${obs.topic}`;
|
|
222
|
+
if (!nodes.has(topicId)) {
|
|
223
|
+
nodes.set(topicId, {
|
|
224
|
+
id: topicId, label: obs.topic, type: "concept",
|
|
225
|
+
observationIds: [], lastActiveAt: obs.accessedAt, weight: 0,
|
|
226
|
+
});
|
|
227
|
+
}
|
|
228
|
+
const topicNode = nodes.get(topicId)!;
|
|
229
|
+
topicNode.observationIds.push(obs.id);
|
|
230
|
+
topicNode.weight++;
|
|
231
|
+
|
|
232
|
+
// Domain → Topic edge
|
|
233
|
+
edges.push({ from: domainId, to: topicId, relation: "contains", weight: 1 });
|
|
234
|
+
|
|
235
|
+
// Track topic-to-domains for tunnel creation
|
|
236
|
+
if (!topicToDomains.has(obs.topic)) {
|
|
237
|
+
topicToDomains.set(obs.topic, new Set());
|
|
238
|
+
}
|
|
239
|
+
topicToDomains.get(obs.topic)!.add(obs.domain);
|
|
240
|
+
|
|
241
|
+
// Tag nodes
|
|
242
|
+
for (const tag of obs.tags) {
|
|
243
|
+
const tagId = `tag:${tag}`;
|
|
244
|
+
if (!nodes.has(tagId)) {
|
|
245
|
+
nodes.set(tagId, {
|
|
246
|
+
id: tagId, label: tag, type: "concept",
|
|
247
|
+
observationIds: [], lastActiveAt: obs.accessedAt, weight: 0,
|
|
248
|
+
});
|
|
249
|
+
}
|
|
250
|
+
nodes.get(tagId)!.observationIds.push(obs.id);
|
|
251
|
+
edges.push({ from: topicId, to: tagId, relation: "related", weight: 1 });
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
// Create TUNNELS: same topic across different domains
|
|
256
|
+
for (const [topic, domains] of topicToDomains) {
|
|
257
|
+
if (domains.size < 2) continue;
|
|
258
|
+
const domainList = [...domains];
|
|
259
|
+
for (let i = 0; i < domainList.length; i++) {
|
|
260
|
+
for (let j = i + 1; j < domainList.length; j++) {
|
|
261
|
+
edges.push({
|
|
262
|
+
from: `domain:${domainList[i]}`,
|
|
263
|
+
to: `domain:${domainList[j]}`,
|
|
264
|
+
relation: "tunnel",
|
|
265
|
+
weight: 2, // Tunnels are high-weight connections
|
|
266
|
+
});
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
return { nodes, edges };
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
/** BFS traversal from a starting node. */
|
|
275
|
+
function bfsExpand(
|
|
276
|
+
startNodeIds: string[],
|
|
277
|
+
nodes: Map<string, KnowledgeNode>,
|
|
278
|
+
edges: KnowledgeEdge[],
|
|
279
|
+
maxHops: number,
|
|
280
|
+
): Set<string> {
|
|
281
|
+
const visited = new Set<string>(startNodeIds);
|
|
282
|
+
let frontier = [...startNodeIds];
|
|
283
|
+
|
|
284
|
+
for (let hop = 0; hop < maxHops; hop++) {
|
|
285
|
+
const nextFrontier: string[] = [];
|
|
286
|
+
for (const nodeId of frontier) {
|
|
287
|
+
// Find all edges from/to this node
|
|
288
|
+
for (const edge of edges) {
|
|
289
|
+
const neighbor = edge.from === nodeId ? edge.to : edge.to === nodeId ? edge.from : null;
|
|
290
|
+
if (neighbor && !visited.has(neighbor)) {
|
|
291
|
+
visited.add(neighbor);
|
|
292
|
+
nextFrontier.push(neighbor);
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
frontier = nextFrontier;
|
|
297
|
+
if (frontier.length === 0) break;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
return visited;
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
304
|
+
// OBSERVATION EXTRACTION
|
|
305
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
306
|
+
|
|
307
|
+
/** Extract atomic observations from raw text. */
|
|
308
|
+
export function extractObservations(
|
|
309
|
+
text: string,
|
|
310
|
+
domain: string,
|
|
311
|
+
sessionId?: string,
|
|
312
|
+
): Observation[] {
|
|
313
|
+
const observations: Observation[] = [];
|
|
314
|
+
const sentences = text
|
|
315
|
+
.split(/[.!?\n]/)
|
|
316
|
+
.map((s) => s.trim())
|
|
317
|
+
.filter((s) => s.length > 15 && s.length < 500);
|
|
318
|
+
|
|
319
|
+
// Group similar sentences into topics
|
|
320
|
+
const topicMap = new Map<string, string[]>();
|
|
321
|
+
|
|
322
|
+
for (const sentence of sentences) {
|
|
323
|
+
// Skip noise
|
|
324
|
+
if (/^[<{]|task-notification|system-reminder|┌──/i.test(sentence)) continue;
|
|
325
|
+
if (/^\d+$|^[ㄱ-ㅎ]+$/.test(sentence.trim())) continue;
|
|
326
|
+
|
|
327
|
+
const topic = extractTopic(sentence);
|
|
328
|
+
if (!topicMap.has(topic)) topicMap.set(topic, []);
|
|
329
|
+
topicMap.get(topic)!.push(sentence);
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
for (const [topic, sents] of topicMap) {
|
|
333
|
+
// Take the most informative sentence per topic
|
|
334
|
+
const best = sents.sort((a, b) => b.length - a.length)[0];
|
|
335
|
+
if (!best) continue;
|
|
336
|
+
|
|
337
|
+
const tokens = tokenize(best);
|
|
338
|
+
const tags = extractTags(best);
|
|
339
|
+
|
|
340
|
+
observations.push({
|
|
341
|
+
id: createHash("sha256").update(`${domain}:${topic}:${best.slice(0, 50)}`).digest("hex").slice(0, 12),
|
|
342
|
+
content: best,
|
|
343
|
+
domain,
|
|
344
|
+
topic,
|
|
345
|
+
tags,
|
|
346
|
+
createdAt: new Date().toISOString(),
|
|
347
|
+
accessedAt: new Date().toISOString(),
|
|
348
|
+
accessCount: 0,
|
|
349
|
+
confidence: 0.7,
|
|
350
|
+
valid: true,
|
|
351
|
+
sourceSessionId: sessionId,
|
|
352
|
+
termFreqs: computeTermFreqs(tokens),
|
|
353
|
+
docLength: tokens.length,
|
|
354
|
+
});
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
return observations;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
function extractTopic(text: string): string {
|
|
361
|
+
const tokens = tokenize(text);
|
|
362
|
+
// Most frequent non-stop-word as topic
|
|
363
|
+
const freq = new Map<string, number>();
|
|
364
|
+
for (const t of tokens) freq.set(t, (freq.get(t) ?? 0) + 1);
|
|
365
|
+
const sorted = [...freq.entries()].sort(([, a], [, b]) => b - a);
|
|
366
|
+
return sorted[0]?.[0] ?? "general";
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
function extractTags(text: string): string[] {
|
|
370
|
+
const tags: string[] = [];
|
|
371
|
+
const patterns: [string, RegExp][] = [
|
|
372
|
+
["security", /보안|security|취약|vulnerab|exploit|injection/i],
|
|
373
|
+
["testing", /테스트|test|spec|coverage/i],
|
|
374
|
+
["devops", /deploy|배포|docker|ci\/cd|npm/i],
|
|
375
|
+
["frontend", /react|vue|css|html|component/i],
|
|
376
|
+
["backend", /server|api|database|sql|rest/i],
|
|
377
|
+
["git", /git|commit|push|branch|merge|pr/i],
|
|
378
|
+
["performance", /성능|optimize|performance|cache|speed/i],
|
|
379
|
+
["debug", /debug|디버그|error|에러|log/i],
|
|
380
|
+
];
|
|
381
|
+
for (const [tag, pattern] of patterns) {
|
|
382
|
+
if (pattern.test(text)) tags.push(tag);
|
|
383
|
+
}
|
|
384
|
+
return tags;
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
388
|
+
// NEXUS MEMORY STORE
|
|
389
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
390
|
+
|
|
391
|
+
export type NexusMemory = {
|
|
392
|
+
/** Add raw text and auto-extract observations. */
|
|
393
|
+
ingest: (text: string, domain: string, sessionId?: string) => number;
|
|
394
|
+
/** Add a pre-formed observation. */
|
|
395
|
+
addObservation: (obs: Observation) => void;
|
|
396
|
+
/** L1: Quick metadata scan. */
|
|
397
|
+
scanIndex: (domain?: string, topic?: string, tags?: string[]) => Observation[];
|
|
398
|
+
/** L2: BM25 search. */
|
|
399
|
+
search: (query: string, limit?: number) => MemoryResult[];
|
|
400
|
+
/** L3: Graph-expanded search. */
|
|
401
|
+
deepSearch: (query: string, limit?: number) => MemoryResult[];
|
|
402
|
+
/** Confirm an observation (boost confidence, update access). */
|
|
403
|
+
confirm: (id: string) => void;
|
|
404
|
+
/** Invalidate an observation. */
|
|
405
|
+
invalidate: (id: string) => void;
|
|
406
|
+
/** Get all tunnels (cross-domain connections). */
|
|
407
|
+
getTunnels: () => KnowledgeEdge[];
|
|
408
|
+
/** Get stats. */
|
|
409
|
+
getStats: () => MemoryStats;
|
|
410
|
+
/** Persist to disk. */
|
|
411
|
+
save: () => void;
|
|
412
|
+
/** Get the knowledge graph. */
|
|
413
|
+
getGraph: () => { nodes: KnowledgeNode[]; edges: KnowledgeEdge[] };
|
|
414
|
+
};
|
|
415
|
+
|
|
416
|
+
export function createNexusMemory(dataDir: string): NexusMemory {
|
|
417
|
+
const obsDir = join(dataDir, "observations");
|
|
418
|
+
const graphPath = join(dataDir, "graph.json");
|
|
419
|
+
|
|
420
|
+
mkdirSync(obsDir, { recursive: true });
|
|
421
|
+
|
|
422
|
+
// Load existing observations
|
|
423
|
+
let observations: Observation[] = [];
|
|
424
|
+
if (existsSync(obsDir)) {
|
|
425
|
+
for (const file of readdirSync(obsDir).filter((f) => f.endsWith(".json"))) {
|
|
426
|
+
try {
|
|
427
|
+
const obs = JSON.parse(readFileSync(join(obsDir, file), "utf-8")) as Observation;
|
|
428
|
+
observations.push(obs);
|
|
429
|
+
} catch { /* skip corrupt */ }
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
// Pre-compute average doc length
|
|
434
|
+
let avgDocLength = observations.length > 0
|
|
435
|
+
? observations.reduce((s, o) => s + o.docLength, 0) / observations.length
|
|
436
|
+
: 10;
|
|
437
|
+
|
|
438
|
+
// Build co-occurrence model from existing observations
|
|
439
|
+
const coModel = createCoOccurrenceModel();
|
|
440
|
+
if (observations.length > 0) {
|
|
441
|
+
coModel.rebuild(observations.map((o) => o.content));
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
// Build graph
|
|
445
|
+
let graph = buildGraphFromObservations(observations);
|
|
446
|
+
|
|
447
|
+
function rebuildGraph(): void {
|
|
448
|
+
graph = buildGraphFromObservations(observations);
|
|
449
|
+
avgDocLength = observations.length > 0
|
|
450
|
+
? observations.reduce((s, o) => s + o.docLength, 0) / observations.length
|
|
451
|
+
: 10;
|
|
452
|
+
// Rebuild co-occurrence model with latest observations
|
|
453
|
+
coModel.rebuild(observations.map((o) => o.content));
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
return {
|
|
457
|
+
ingest(text: string, domain: string, sessionId?: string): number {
|
|
458
|
+
const newObs = extractObservations(text, domain, sessionId);
|
|
459
|
+
// Deduplicate against existing
|
|
460
|
+
let added = 0;
|
|
461
|
+
for (const obs of newObs) {
|
|
462
|
+
if (observations.some((e) => e.id === obs.id)) continue;
|
|
463
|
+
observations.push(obs);
|
|
464
|
+
added++;
|
|
465
|
+
}
|
|
466
|
+
if (added > 0) rebuildGraph();
|
|
467
|
+
return added;
|
|
468
|
+
},
|
|
469
|
+
|
|
470
|
+
addObservation(obs: Observation): void {
|
|
471
|
+
if (observations.some((e) => e.id === obs.id)) return;
|
|
472
|
+
observations.push(obs);
|
|
473
|
+
rebuildGraph();
|
|
474
|
+
},
|
|
475
|
+
|
|
476
|
+
// L1: Instant index scan — metadata only
|
|
477
|
+
scanIndex(domain?: string, topic?: string, tags?: string[]): Observation[] {
|
|
478
|
+
return observations.filter((obs) => {
|
|
479
|
+
if (!obs.valid) return false;
|
|
480
|
+
if (domain && obs.domain !== domain) return false;
|
|
481
|
+
if (topic && obs.topic !== topic) return false;
|
|
482
|
+
if (tags && tags.length > 0 && !tags.some((t) => obs.tags.includes(t))) return false;
|
|
483
|
+
return true;
|
|
484
|
+
});
|
|
485
|
+
},
|
|
486
|
+
|
|
487
|
+
// L2: BM25 + Semantic search
|
|
488
|
+
search(query: string, limit = 10): MemoryResult[] {
|
|
489
|
+
// Expand query with synonyms + co-occurrence
|
|
490
|
+
const expanded = expandQuery(query, coModel);
|
|
491
|
+
const queryTokens = expanded.expanded;
|
|
492
|
+
if (queryTokens.length === 0) return [];
|
|
493
|
+
|
|
494
|
+
const validObs = observations.filter((o) => o.valid);
|
|
495
|
+
const idf = computeIDF(validObs, queryTokens);
|
|
496
|
+
|
|
497
|
+
const scored = validObs.map((obs) => ({
|
|
498
|
+
observation: obs,
|
|
499
|
+
score: bm25Score(queryTokens, obs, idf, avgDocLength),
|
|
500
|
+
retrievalLevel: "L2" as const,
|
|
501
|
+
}));
|
|
502
|
+
|
|
503
|
+
return scored
|
|
504
|
+
.filter((s) => s.score > 0)
|
|
505
|
+
.sort((a, b) => b.score - a.score)
|
|
506
|
+
.slice(0, limit)
|
|
507
|
+
.map((s) => {
|
|
508
|
+
// Update access stats
|
|
509
|
+
s.observation.accessedAt = new Date().toISOString();
|
|
510
|
+
s.observation.accessCount++;
|
|
511
|
+
return s;
|
|
512
|
+
});
|
|
513
|
+
},
|
|
514
|
+
|
|
515
|
+
// L3: Graph-expanded deep search
|
|
516
|
+
deepSearch(query: string, limit = 10): MemoryResult[] {
|
|
517
|
+
// First get L2 results
|
|
518
|
+
const l2Results = this.search(query, Math.ceil(limit / 2));
|
|
519
|
+
if (l2Results.length === 0) return [];
|
|
520
|
+
|
|
521
|
+
// Find graph nodes related to top results
|
|
522
|
+
const topObsIds = new Set(l2Results.map((r) => r.observation.id));
|
|
523
|
+
const relatedNodeIds: string[] = [];
|
|
524
|
+
|
|
525
|
+
for (const node of graph.nodes.values()) {
|
|
526
|
+
if (node.observationIds.some((id) => topObsIds.has(id))) {
|
|
527
|
+
relatedNodeIds.push(node.id);
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
// BFS expand 2 hops
|
|
532
|
+
const expandedNodeIds = bfsExpand(relatedNodeIds, graph.nodes, graph.edges, 2);
|
|
533
|
+
|
|
534
|
+
// Collect observations from expanded nodes
|
|
535
|
+
const expandedObsIds = new Set<string>();
|
|
536
|
+
for (const nodeId of expandedNodeIds) {
|
|
537
|
+
const node = graph.nodes.get(nodeId);
|
|
538
|
+
if (node) {
|
|
539
|
+
for (const obsId of node.observationIds) {
|
|
540
|
+
if (!topObsIds.has(obsId)) expandedObsIds.add(obsId);
|
|
541
|
+
}
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
// Score expanded observations
|
|
546
|
+
const queryTokens = tokenize(query);
|
|
547
|
+
const validObs = observations.filter((o) => o.valid && expandedObsIds.has(o.id));
|
|
548
|
+
const idf = computeIDF(validObs, queryTokens);
|
|
549
|
+
|
|
550
|
+
const l3Results: MemoryResult[] = validObs.map((obs) => ({
|
|
551
|
+
observation: obs,
|
|
552
|
+
score: bm25Score(queryTokens, obs, idf, avgDocLength) * 0.8, // Slightly lower weight for graph results
|
|
553
|
+
retrievalLevel: "L3" as const,
|
|
554
|
+
})).filter((s) => s.score > 0);
|
|
555
|
+
|
|
556
|
+
// Merge L2 and L3, deduplicate, sort
|
|
557
|
+
const all = [...l2Results, ...l3Results];
|
|
558
|
+
const seen = new Set<string>();
|
|
559
|
+
const unique: MemoryResult[] = [];
|
|
560
|
+
for (const r of all.sort((a, b) => b.score - a.score)) {
|
|
561
|
+
if (!seen.has(r.observation.id)) {
|
|
562
|
+
seen.add(r.observation.id);
|
|
563
|
+
unique.push(r);
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
return unique.slice(0, limit);
|
|
568
|
+
},
|
|
569
|
+
|
|
570
|
+
confirm(id: string): void {
|
|
571
|
+
const obs = observations.find((o) => o.id === id);
|
|
572
|
+
if (obs) {
|
|
573
|
+
obs.confidence = Math.min(1, obs.confidence + 0.1);
|
|
574
|
+
obs.accessedAt = new Date().toISOString();
|
|
575
|
+
obs.accessCount++;
|
|
576
|
+
}
|
|
577
|
+
},
|
|
578
|
+
|
|
579
|
+
invalidate(id: string): void {
|
|
580
|
+
const obs = observations.find((o) => o.id === id);
|
|
581
|
+
if (obs) {
|
|
582
|
+
obs.valid = false;
|
|
583
|
+
obs.confidence = 0;
|
|
584
|
+
}
|
|
585
|
+
rebuildGraph();
|
|
586
|
+
},
|
|
587
|
+
|
|
588
|
+
getTunnels(): KnowledgeEdge[] {
|
|
589
|
+
return graph.edges.filter((e) => e.relation === "tunnel");
|
|
590
|
+
},
|
|
591
|
+
|
|
592
|
+
getStats(): MemoryStats {
|
|
593
|
+
const valid = observations.filter((o) => o.valid);
|
|
594
|
+
const domains = [...new Set(observations.map((o) => o.domain))];
|
|
595
|
+
const tunnels = graph.edges.filter((e) => e.relation === "tunnel").length;
|
|
596
|
+
const avgConf = valid.length > 0
|
|
597
|
+
? valid.reduce((s, o) => s + o.confidence, 0) / valid.length
|
|
598
|
+
: 0;
|
|
599
|
+
|
|
600
|
+
return {
|
|
601
|
+
totalObservations: observations.length,
|
|
602
|
+
validObservations: valid.length,
|
|
603
|
+
graphNodes: graph.nodes.size,
|
|
604
|
+
graphEdges: graph.edges.length,
|
|
605
|
+
tunnels,
|
|
606
|
+
domains,
|
|
607
|
+
avgConfidence: Math.round(avgConf * 100) / 100,
|
|
608
|
+
avgDocLength: Math.round(avgDocLength),
|
|
609
|
+
};
|
|
610
|
+
},
|
|
611
|
+
|
|
612
|
+
save(): void {
|
|
613
|
+
mkdirSync(obsDir, { recursive: true });
|
|
614
|
+
for (const obs of observations) {
|
|
615
|
+
const safeId = sanitizeId(obs.id);
|
|
616
|
+
const filePath = join(obsDir, `${safeId}.json`);
|
|
617
|
+
writeFileSync(filePath, JSON.stringify(obs, null, 2), "utf-8");
|
|
618
|
+
}
|
|
619
|
+
// Save graph
|
|
620
|
+
const graphData = {
|
|
621
|
+
nodes: [...graph.nodes.values()],
|
|
622
|
+
edges: graph.edges,
|
|
623
|
+
};
|
|
624
|
+
writeFileSync(graphPath, JSON.stringify(graphData, null, 2), "utf-8");
|
|
625
|
+
},
|
|
626
|
+
|
|
627
|
+
getGraph(): { nodes: KnowledgeNode[]; edges: KnowledgeEdge[] } {
|
|
628
|
+
return { nodes: [...graph.nodes.values()], edges: graph.edges };
|
|
629
|
+
},
|
|
630
|
+
};
|
|
631
|
+
}
|