vektor-slipstream 1.4.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/README.md +67 -306
  2. package/package.json +14 -146
  3. package/CHANGELOG.md +0 -139
  4. package/LICENSE +0 -33
  5. package/TENETS.md +0 -189
  6. package/audn-log.js +0 -143
  7. package/axon.js +0 -389
  8. package/boot-patch.js +0 -33
  9. package/boot-screen.html +0 -210
  10. package/briefing.js +0 -150
  11. package/cerebellum.js +0 -439
  12. package/cloak-behaviour.js +0 -596
  13. package/cloak-captcha.js +0 -541
  14. package/cloak-core.js +0 -499
  15. package/cloak-identity.js +0 -484
  16. package/cloak-index.js +0 -261
  17. package/cloak-llms.js +0 -163
  18. package/cloak-pattern-store.js +0 -471
  19. package/cloak-recorder-auto.js +0 -297
  20. package/cloak-recorder-snippet.js +0 -119
  21. package/cloak-turbo-quant.js +0 -357
  22. package/cloak-warmup.js +0 -240
  23. package/cortex.js +0 -221
  24. package/detect-hardware.js +0 -181
  25. package/entity-resolver.js +0 -298
  26. package/errors.js +0 -66
  27. package/examples/example-claude-mcp.js +0 -220
  28. package/examples/example-langchain-researcher.js +0 -82
  29. package/examples/example-openai-assistant.js +0 -84
  30. package/examples/examples-README.md +0 -161
  31. package/export-import.js +0 -221
  32. package/forget.js +0 -148
  33. package/inspect.js +0 -199
  34. package/mistral/README-mistral.md +0 -123
  35. package/mistral/mistral-bridge.js +0 -218
  36. package/mistral/mistral-setup.js +0 -220
  37. package/mistral/vektor-tool-manifest.json +0 -41
  38. package/models/model_quantized.onnx +0 -0
  39. package/models/vocab.json +0 -1
  40. package/namespace.js +0 -186
  41. package/pin.js +0 -91
  42. package/slipstream-core-extended.js +0 -134
  43. package/slipstream-core.js +0 -1
  44. package/slipstream-db.js +0 -140
  45. package/slipstream-embedder.js +0 -338
  46. package/sovereign.js +0 -142
  47. package/token.js +0 -322
  48. package/types/index.d.ts +0 -269
  49. package/vektor-banner-loader.js +0 -109
  50. package/vektor-cli.js +0 -259
  51. package/vektor-licence-prompt.js +0 -128
  52. package/vektor-licence.js +0 -192
  53. package/vektor-setup.js +0 -270
  54. package/vektor-slipstream.dxt +0 -0
  55. package/vektor-tui.js +0 -373
  56. package/visualize.js +0 -235
package/cortex.js DELETED
@@ -1,221 +0,0 @@
1
- /**
2
- * cloak_cortex.js
3
- * File anatomy scanner — builds a token-aware project index in Vektor's entity graph.
4
- * Called once on init, then on significant file changes (not per-session).
5
- *
6
- * Writes to Vektor as entity nodes:
7
- * "file:src/server.ts | purpose: Express HTTP server | tokens: ~520 | last_scanned: <iso>"
8
- *
9
- * Architecture: CLOAK layer → Vektor entity graph (MAGMA §3.2)
10
- * Research: MAGMA arXiv:2601.03236, OpenWolf anatomy pattern
11
- */
12
-
13
- 'use strict';
14
-
15
- const fs = require('fs');
16
- const path = require('path');
17
-
18
- // ---------------------------------------------------------------------------
19
- // Token estimation (OpenWolf ratios — accurate to ±15%)
20
- // ---------------------------------------------------------------------------
21
- const TOKEN_RATIOS = {
22
- code : 3.5, // .js .ts .py .go .rs etc.
23
- prose: 4.0, // .md .txt .rst
24
- mixed: 3.75, // .json .yaml .toml .html
25
- };
26
-
27
- const CODE_EXTS = new Set(['.js','.ts','.jsx','.tsx','.py','.go','.rs','.rb','.java','.c','.cpp','.cs','.php','.swift','.kt']);
28
- const PROSE_EXTS = new Set(['.md','.txt','.rst','.mdx']);
29
- const SKIP_DIRS = new Set(['node_modules','.git','dist','build','.next','__pycache__','.cache','coverage','.wolf']);
30
- const SKIP_FILES = new Set(['.DS_Store','package-lock.json','yarn.lock','pnpm-lock.yaml']);
31
-
32
- function estimateTokens(filePath, bytes) {
33
- const ext = path.extname(filePath).toLowerCase();
34
- if (CODE_EXTS.has(ext)) return Math.round(bytes / TOKEN_RATIOS.code);
35
- if (PROSE_EXTS.has(ext)) return Math.round(bytes / TOKEN_RATIOS.prose);
36
- return Math.round(bytes / TOKEN_RATIOS.mixed);
37
- }
38
-
39
- // ---------------------------------------------------------------------------
40
- // File scanner
41
- // ---------------------------------------------------------------------------
42
- function scanDirectory(dirPath, results = [], rootPath = dirPath, visited = new Set()) {
43
- // Only resolve realpath for symlinks — not for every directory.
44
- // Running fs.realpathSync on all directories is ~4x slower on large codebases
45
- // (especially Windows NTFS) and the visited Set grows unnecessarily large.
46
- // Normal directories cannot create cycles; only symlinks can.
47
-
48
- let entries;
49
- try {
50
- entries = fs.readdirSync(dirPath, { withFileTypes: true });
51
- } catch {
52
- return results;
53
- }
54
-
55
- for (const entry of entries) {
56
- if (SKIP_FILES.has(entry.name)) continue;
57
-
58
- const fullPath = path.join(dirPath, entry.name);
59
- const relPath = path.relative(rootPath, fullPath);
60
-
61
- if (entry.isSymbolicLink()) {
62
- // Symlinks only: resolve real path and check for cycles
63
- let realPath;
64
- try { realPath = fs.realpathSync(fullPath); } catch { continue; }
65
-
66
- if (visited.has(realPath)) continue; // cycle detected — skip
67
-
68
- let stat;
69
- try { stat = fs.statSync(fullPath); } catch { continue; }
70
-
71
- if (stat.isDirectory()) {
72
- if (!SKIP_DIRS.has(entry.name)) {
73
- visited.add(realPath); // mark before recursing
74
- scanDirectory(fullPath, results, rootPath, visited);
75
- }
76
- } else if (stat.isFile()) {
77
- const ext = path.extname(entry.name).toLowerCase();
78
- const tokens = estimateTokens(fullPath, stat.size);
79
- results.push({ relPath, ext, tokens, mtime: stat.mtimeMs, bytes: stat.size });
80
- }
81
- continue;
82
- }
83
-
84
- if (entry.isDirectory()) {
85
- // Normal directory — no realpathSync needed, no cycle possible
86
- if (!SKIP_DIRS.has(entry.name)) {
87
- scanDirectory(fullPath, results, rootPath, visited);
88
- }
89
- continue;
90
- }
91
-
92
- if (!entry.isFile()) continue;
93
-
94
- let stat;
95
- try { stat = fs.statSync(fullPath); } catch { continue; }
96
-
97
- const ext = path.extname(entry.name).toLowerCase();
98
- const tokens = estimateTokens(fullPath, stat.size);
99
- const mtime = stat.mtimeMs;
100
-
101
- results.push({ relPath, ext, tokens, mtime, bytes: stat.size });
102
- }
103
-
104
- return results;
105
- }
106
-
107
- // isSymlinkDir removed — logic inlined above for clarity
108
-
109
- // ---------------------------------------------------------------------------
110
- // Build Vektor memory strings for entity graph
111
- // ---------------------------------------------------------------------------
112
- function buildEntityString(file, projectName) {
113
- return [
114
- `[CLOAK_CORTEX] project:${projectName}`,
115
- `file:${file.relPath}`,
116
- `tokens:~${file.tokens}`,
117
- `size:${file.bytes}b`,
118
- `last_modified:${new Date(file.mtime).toISOString()}`,
119
- ].join(' | ');
120
- }
121
-
122
- // ---------------------------------------------------------------------------
123
- // Load/save scan cache (.wolf/cortex-cache.json) to avoid rescanning unchanged files
124
- // ---------------------------------------------------------------------------
125
- function loadCache(projectPath) {
126
- const cachePath = path.join(projectPath, '.wolf', 'cortex-cache.json');
127
- try {
128
- return JSON.parse(fs.readFileSync(cachePath, 'utf8'));
129
- } catch {
130
- return {};
131
- }
132
- }
133
-
134
- function saveCache(projectPath, cache) {
135
- const wolfDir = path.join(projectPath, '.wolf');
136
- if (!fs.existsSync(wolfDir)) fs.mkdirSync(wolfDir, { recursive: true });
137
- fs.writeFileSync(
138
- path.join(wolfDir, 'cortex-cache.json'),
139
- JSON.stringify(cache, null, 2)
140
- );
141
- }
142
-
143
- // ---------------------------------------------------------------------------
144
- // Main export
145
- // ---------------------------------------------------------------------------
146
-
147
- /**
148
- * runCortex({ projectPath, memory, force })
149
- *
150
- * @param {string} projectPath - Absolute path to project root
151
- * @param {object} memory - Vektor memory instance (vektor-slipstream)
152
- * @param {boolean} force - Re-scan all files even if unchanged
153
- * @returns {object} - { scanned, skipped, written, totalTokens, anatomy }
154
- */
155
- async function runCortex({ projectPath, memory, force = false } = {}) {
156
- if (!projectPath) throw new Error('cloak_cortex: projectPath is required');
157
- if (!memory) throw new Error('cloak_cortex: memory instance is required');
158
-
159
- const projectName = path.basename(projectPath);
160
- const cache = force ? {} : loadCache(projectPath);
161
- const files = scanDirectory(projectPath);
162
-
163
- const stats = { scanned: 0, skipped: 0, written: 0, totalTokens: 0 };
164
- const anatomy = [];
165
-
166
- for (const file of files) {
167
- stats.totalTokens += file.tokens;
168
- anatomy.push({ path: file.relPath, tokens: file.tokens });
169
-
170
- // Skip if file hasn't changed since last scan
171
- const cacheKey = file.relPath;
172
- if (!force && cache[cacheKey] && cache[cacheKey].mtime === file.mtime) {
173
- stats.skipped++;
174
- continue;
175
- }
176
-
177
- stats.scanned++;
178
-
179
- const entityStr = buildEntityString(file, projectName);
180
-
181
- try {
182
- await memory.remember(entityStr);
183
- cache[cacheKey] = { mtime: file.mtime, tokens: file.tokens };
184
- stats.written++;
185
- } catch (err) {
186
- console.error(`[cloak_cortex] Failed to write entity for ${file.relPath}:`, err.message);
187
- }
188
- }
189
-
190
- // Write project-level summary node
191
- const summaryStr = [
192
- `[CLOAK_CORTEX_SUMMARY] project:${projectName}`,
193
- `total_files:${files.length}`,
194
- `total_tokens:~${stats.totalTokens}`,
195
- `scanned_at:${new Date().toISOString()}`,
196
- ].join(' | ');
197
-
198
- try {
199
- await memory.remember(summaryStr);
200
- } catch (err) {
201
- console.error('[cloak_cortex] Failed to write summary node:', err.message);
202
- }
203
-
204
- saveCache(projectPath, cache);
205
-
206
- return { ...stats, anatomy };
207
- }
208
-
209
- /**
210
- * getAnatomy({ projectPath })
211
- * Returns the cached anatomy without hitting Vektor — for fast pre-read hints.
212
- */
213
- function getAnatomy(projectPath) {
214
- const cache = loadCache(projectPath);
215
- return Object.entries(cache).map(([relPath, data]) => ({
216
- path: relPath,
217
- tokens: data.tokens,
218
- })).sort((a, b) => b.tokens - a.tokens);
219
- }
220
-
221
- module.exports = { runCortex, getAnatomy, scanDirectory, estimateTokens };
@@ -1,181 +0,0 @@
1
- 'use strict';
2
-
3
- /**
4
- * VEKTOR SLIPSTREAM
5
- * detect-hardware.js — Execution Provider Probe
6
- * ─────────────────────────────────────────────────────────────────────────────
7
- * Probes the host system for hardware acceleration capabilities WITHOUT
8
- * attempting to load any native binaries. Uses only OS-level CLI tools that
9
- * ship with the relevant drivers/OS — so this file itself has zero native deps
10
- * and zero risk of crashing the terminal.
11
- *
12
- * Returns: 'cuda' | 'coreml' | 'cpu'
13
- * Execution time: < 10ms (CLI probe only, no driver load)
14
- *
15
- * Consumed by:
16
- * - postinstall.js → to decide which onnxruntime variant to install
17
- * - slipstream.js → to decide which EP to pass to InferenceSession.create()
18
- * ─────────────────────────────────────────────────────────────────────────────
19
- */
20
-
21
- const os = require('os');
22
- const { execSync } = require('child_process');
23
-
24
- // ─── Constants ───────────────────────────────────────────────────────────────
25
-
26
- const EP = Object.freeze({
27
- CUDA : 'cuda',
28
- COREML : 'coreml',
29
- CPU : 'cpu',
30
- });
31
-
32
- // ─── Probe Helpers ───────────────────────────────────────────────────────────
33
-
34
- /**
35
- * macOS: Check for Apple Silicon via sysctl.
36
- * sysctl is a native macOS syscall utility — always present, zero install.
37
- * Returns 'coreml' on M-series chips, 'cpu' on Intel Macs.
38
- */
39
- function probeDarwin() {
40
- try {
41
- const cpuBrand = execSync(
42
- 'sysctl -n machdep.cpu.brand_string 2>/dev/null',
43
- { stdio: 'pipe', timeout: 2000 }
44
- ).toString().trim();
45
-
46
- if (cpuBrand.includes('Apple')) {
47
- return EP.COREML;
48
- }
49
- } catch (_) {
50
- // sysctl unavailable or returned non-zero — safe fallback
51
- }
52
- return EP.CPU;
53
- }
54
-
55
- /**
56
- * Linux / Windows: Check for NVIDIA GPU via nvidia-smi.
57
- * nvidia-smi ships with the NVIDIA driver package.
58
- * If it exits 0, the GPU is present AND the driver is loaded — both required.
59
- * stdio: 'ignore' suppresses the full nvidia-smi output table.
60
- * timeout: 3000ms prevents hangs on misconfigured driver environments.
61
- */
62
- function probeNvidia() {
63
- try {
64
- execSync('nvidia-smi', { stdio: 'ignore', timeout: 3000 });
65
- return EP.CUDA;
66
- } catch (_) {
67
- // nvidia-smi not found, or GPU present but driver not loaded
68
- return EP.CPU;
69
- }
70
- }
71
-
72
- // ─── Main Export ─────────────────────────────────────────────────────────────
73
-
74
- /**
75
- * detectHardware()
76
- *
77
- * Synchronous hardware probe. Returns the highest-performance execution
78
- * provider available on the current machine. Decision tree:
79
- *
80
- * macOS + Apple Silicon → 'coreml' (Neural Engine via CoreML)
81
- * macOS + Intel → 'cpu' (no CoreML acceleration)
82
- * Linux/Win + NVIDIA → 'cuda' (GPU via CUDA)
83
- * Linux/Win + no GPU → 'cpu' (optimised WASM SIMD path)
84
- * Any other platform → 'cpu' (safe universal fallback)
85
- *
86
- * @returns {'cuda'|'coreml'|'cpu'}
87
- */
88
- function detectHardware() {
89
- const platform = os.platform();
90
-
91
- switch (platform) {
92
- case 'darwin':
93
- return probeDarwin();
94
-
95
- case 'linux':
96
- case 'win32':
97
- return probeNvidia();
98
-
99
- default:
100
- // FreeBSD, Android, unknown — CPU is always safe
101
- return EP.CPU;
102
- }
103
- }
104
-
105
- /**
106
- * detectHardwareAsync()
107
- *
108
- * Non-blocking wrapper. Runs the probe in a setImmediate tick so it doesn't
109
- * stall the event loop during module initialisation. Preferred for use inside
110
- * slipstream.js createMemory() boot sequence.
111
- *
112
- * @returns {Promise<'cuda'|'coreml'|'cpu'>}
113
- */
114
- function detectHardwareAsync() {
115
- return new Promise((resolve) => {
116
- setImmediate(() => resolve(detectHardware()));
117
- });
118
- }
119
-
120
- /**
121
- * getEPLabel()
122
- * Human-readable label for the audit log banner.
123
- *
124
- * @param {'cuda'|'coreml'|'cpu'} ep
125
- * @returns {string}
126
- */
127
- function getEPLabel(ep) {
128
- const labels = {
129
- [EP.CUDA] : 'CUDA (NVIDIA GPU)',
130
- [EP.COREML] : 'CoreML (Apple Neural Engine)',
131
- [EP.CPU] : 'CPU (WASM SIMD)',
132
- };
133
- return labels[ep] ?? 'CPU (WASM SIMD)';
134
- }
135
-
136
- module.exports = {
137
- detectHardware,
138
- detectHardwareAsync,
139
- getEPLabel,
140
- EP, // export constants so consumers don't hardcode strings
141
- };
142
-
143
- // ─── CLI Self-Test ───────────────────────────────────────────────────────────
144
- // Run directly to validate the probe on any target machine:
145
- // node detect-hardware.js
146
- //
147
- // Expected outputs:
148
- // Apple M-series → [SLIPSTREAM PROBE] EP: CoreML (Apple Neural Engine) ✓
149
- // NVIDIA machine → [SLIPSTREAM PROBE] EP: CUDA (NVIDIA GPU) ✓
150
- // Any other → [SLIPSTREAM PROBE] EP: CPU (WASM SIMD) ✓
151
-
152
- if (require.main === module) {
153
- const start = Date.now();
154
- const ep = detectHardware();
155
- const ms = Date.now() - start;
156
-
157
- const icon = ep === EP.CPU ? '⚙️ ' : '🚀';
158
-
159
- console.log('');
160
- console.log(' ╔══════════════════════════════════════════╗');
161
- console.log(' ║ VEKTOR SLIPSTREAM — PROBE ║');
162
- console.log(' ╚══════════════════════════════════════════╝');
163
- console.log('');
164
- console.log(` ${icon} EP: ${getEPLabel(ep)}`);
165
- console.log(` ⏱ Probe: ${ms}ms`);
166
- console.log(` 🖥 Platform: ${os.platform()} / ${os.arch()}`);
167
- console.log(` 💾 RAM: ${(os.totalmem() / 1024 ** 3).toFixed(1)} GB`);
168
- console.log('');
169
-
170
- if (ep === EP.CPU) {
171
- console.log(' ℹ️ No GPU acceleration detected.');
172
- console.log(' Slipstream will run on optimised CPU (WASM SIMD).');
173
- console.log(' For CUDA: ensure nvidia-smi is accessible in PATH.');
174
- console.log(' For CoreML: Apple Silicon Mac required.');
175
- } else {
176
- console.log(` ✓ Hardware acceleration confirmed. Slipstream will`);
177
- console.log(` engage ${getEPLabel(ep)} for sub-12ms embeddings.`);
178
- }
179
-
180
- console.log('');
181
- }
@@ -1,298 +0,0 @@
1
- /**
2
- * VEKTOR EntityResolver
3
- * Canonical entity resolution layer for vektor-slipstream.
4
- * Sits between your app and memory.remember() — normalises
5
- * entity references before they hit the MAGMA graph.
6
- *
7
- * Usage:
8
- * const resolver = new EntityResolver(memory, db);
9
- * await resolver.remember('the CFO approved the budget');
10
- * // → stored as "Sarah Chen approved the budget" (entity_0042)
11
- */
12
-
13
- import Database from 'better-sqlite3';
14
-
15
- export class EntityResolver {
16
- /**
17
- * @param {object} memory - vektor-slipstream memory instance
18
- * @param {string} dbPath - path to your SQLite db (same one vektor uses, or separate)
19
- */
20
- constructor(memory, dbPath = './vektor-entities.db') {
21
- this.memory = memory;
22
- this.db = new Database(dbPath);
23
- this._init();
24
- }
25
-
26
- // ─── Schema ────────────────────────────────────────────────────────────────
27
-
28
- _init() {
29
- this.db.exec(`
30
- CREATE TABLE IF NOT EXISTS entities (
31
- id TEXT PRIMARY KEY,
32
- canonical TEXT NOT NULL,
33
- type TEXT DEFAULT 'unknown',
34
- confidence REAL DEFAULT 1.0,
35
- created_at INTEGER DEFAULT (unixepoch()),
36
- updated_at INTEGER DEFAULT (unixepoch())
37
- );
38
-
39
- CREATE TABLE IF NOT EXISTS aliases (
40
- alias TEXT PRIMARY KEY,
41
- entity_id TEXT NOT NULL,
42
- source TEXT DEFAULT 'manual',
43
- created_at INTEGER DEFAULT (unixepoch()),
44
- FOREIGN KEY (entity_id) REFERENCES entities(id)
45
- );
46
-
47
- CREATE INDEX IF NOT EXISTS idx_aliases_entity ON aliases(entity_id);
48
- `);
49
- }
50
-
51
- // ─── Public API ────────────────────────────────────────────────────────────
52
-
53
- /**
54
- * Drop-in replacement for memory.remember().
55
- * Resolves entity references in text before storing.
56
- *
57
- * @param {string} text - raw input text
58
- * @param {object} opts - passed through to memory.remember()
59
- * @returns {object} - { normalised, entityIds, result }
60
- */
61
- async remember(text, opts = {}) {
62
- const { normalised, entityIds } = await this._resolveText(text);
63
- const result = await this.memory.remember(normalised, {
64
- ...opts,
65
- metadata: { ...opts.metadata, entityIds }
66
- });
67
- return { normalised, entityIds, result };
68
- }
69
-
70
- /**
71
- * Resolve a single reference string to a canonical entity.
72
- * Returns null if no match found above threshold.
73
- *
74
- * @param {string} ref - e.g. "the CFO", "Sarah", "S. Chen"
75
- * @param {number} threshold - similarity threshold (default 0.88)
76
- * @returns {object|null} - { id, canonical, type } or null
77
- */
78
- async resolve(ref, threshold = 0.88) {
79
- const normalised = ref.trim().toLowerCase();
80
-
81
- // 1. Exact alias lookup (fast path)
82
- const exact = this.db
83
- .prepare('SELECT e.* FROM entities e JOIN aliases a ON a.entity_id = e.id WHERE lower(a.alias) = ?')
84
- .get(normalised);
85
- if (exact) return exact;
86
-
87
- // 2. Fuzzy alias lookup (handles minor typos/punctuation)
88
- const fuzzy = this._fuzzyAliasLookup(normalised);
89
- if (fuzzy) return fuzzy;
90
-
91
- // 3. Semantic similarity via VEKTOR recall
92
- const candidates = await this.memory.recall(`entity: ${ref}`, 5);
93
- for (const c of candidates) {
94
- if (c.score >= threshold && c.metadata?.entityId) {
95
- const entity = this.getEntity(c.metadata.entityId);
96
- if (entity) {
97
- // Auto-learn this alias
98
- this.addAlias(ref, entity.id, 'auto');
99
- return entity;
100
- }
101
- }
102
- }
103
-
104
- return null;
105
- }
106
-
107
- /**
108
- * Register a new canonical entity.
109
- *
110
- * @param {string} canonical - e.g. "Sarah Chen"
111
- * @param {string} type - person | org | project | concept
112
- * @param {string[]} aliases - known aliases: ["Sarah", "the CFO", "S. Chen"]
113
- * @returns {string} - entity id
114
- */
115
- addEntity(canonical, type = 'unknown', aliases = []) {
116
- const id = `entity_${Date.now()}_${Math.random().toString(36).slice(2, 7)}`;
117
-
118
- this.db.prepare(`
119
- INSERT INTO entities (id, canonical, type)
120
- VALUES (?, ?, ?)
121
- ON CONFLICT(id) DO NOTHING
122
- `).run(id, canonical, type);
123
-
124
- // Always register canonical name as an alias
125
- const allAliases = [canonical, ...aliases];
126
- for (const alias of allAliases) {
127
- this.addAlias(alias, id, 'manual');
128
- }
129
-
130
- return id;
131
- }
132
-
133
- /**
134
- * Add a new alias to an existing entity.
135
- */
136
- addAlias(alias, entityId, source = 'manual') {
137
- this.db.prepare(`
138
- INSERT INTO aliases (alias, entity_id, source)
139
- VALUES (?, ?, ?)
140
- ON CONFLICT(alias) DO UPDATE SET entity_id = excluded.entity_id
141
- `).run(alias.trim(), entityId, source);
142
- }
143
-
144
- /**
145
- * Get entity by id.
146
- */
147
- getEntity(id) {
148
- return this.db.prepare('SELECT * FROM entities WHERE id = ?').get(id) || null;
149
- }
150
-
151
- /**
152
- * List all entities (for debugging / UI).
153
- */
154
- listEntities() {
155
- const entities = this.db.prepare('SELECT * FROM entities ORDER BY canonical').all();
156
- return entities.map(e => ({
157
- ...e,
158
- aliases: this.db
159
- .prepare('SELECT alias, source FROM aliases WHERE entity_id = ?')
160
- .all(e.id)
161
- .map(a => a.alias)
162
- }));
163
- }
164
-
165
- /**
166
- * Merge two entities — keeps targetId, moves all aliases.
167
- * Use when you discover two entity nodes are the same person/thing.
168
- */
169
- mergeEntities(sourceId, targetId) {
170
- this.db.prepare('UPDATE aliases SET entity_id = ? WHERE entity_id = ?').run(targetId, sourceId);
171
- this.db.prepare('DELETE FROM entities WHERE id = ?').run(sourceId);
172
- return this.getEntity(targetId);
173
- }
174
-
175
- /**
176
- * Scan memory graph for likely duplicate entities using
177
- * recall similarity — returns candidate pairs for review.
178
- *
179
- * @param {number} threshold - 0.88 is a good starting point
180
- */
181
- async findDuplicateCandidates(threshold = 0.88) {
182
- const entities = this.listEntities();
183
- const candidates = [];
184
-
185
- for (const entity of entities) {
186
- const results = await this.memory.recall(`entity: ${entity.canonical}`, 5);
187
- for (const r of results) {
188
- if (
189
- r.score >= threshold &&
190
- r.metadata?.entityId &&
191
- r.metadata.entityId !== entity.id
192
- ) {
193
- candidates.push({
194
- entity_a: entity,
195
- entity_b: this.getEntity(r.metadata.entityId),
196
- score: r.score
197
- });
198
- }
199
- }
200
- }
201
-
202
- // Deduplicate candidate pairs
203
- return candidates.filter((c, i, arr) =>
204
- c.entity_b &&
205
- arr.findIndex(x =>
206
- (x.entity_a.id === c.entity_b.id && x.entity_b?.id === c.entity_a.id)
207
- ) < i
208
- );
209
- }
210
-
211
- // ─── Private ───────────────────────────────────────────────────────────────
212
-
213
- /**
214
- * Scan text for known entity aliases and replace with canonical names.
215
- * Returns normalised text and a list of resolved entity IDs.
216
- */
217
- async _resolveText(text) {
218
- const allAliases = this.db
219
- .prepare('SELECT alias, entity_id FROM aliases ORDER BY length(alias) DESC')
220
- .all();
221
-
222
- let normalised = text;
223
- const entityIds = new Set();
224
-
225
- for (const { alias, entity_id } of allAliases) {
226
- const regex = new RegExp(`\\b${this._escapeRegex(alias)}\\b`, 'gi');
227
- if (regex.test(normalised)) {
228
- const entity = this.getEntity(entity_id);
229
- if (entity) {
230
- normalised = normalised.replace(regex, entity.canonical);
231
- entityIds.add(entity_id);
232
- }
233
- }
234
- }
235
-
236
- return { normalised, entityIds: [...entityIds] };
237
- }
238
-
239
- _fuzzyAliasLookup(input) {
240
- const allAliases = this.db
241
- .prepare('SELECT alias, entity_id FROM aliases')
242
- .all();
243
-
244
- for (const { alias, entity_id } of allAliases) {
245
- if (this._levenshtein(input, alias.toLowerCase()) <= 2) {
246
- return this.getEntity(entity_id);
247
- }
248
- }
249
- return null;
250
- }
251
-
252
- _levenshtein(a, b) {
253
- const m = a.length, n = b.length;
254
- const dp = Array.from({ length: m + 1 }, (_, i) =>
255
- Array.from({ length: n + 1 }, (_, j) => i === 0 ? j : j === 0 ? i : 0)
256
- );
257
- for (let i = 1; i <= m; i++)
258
- for (let j = 1; j <= n; j++)
259
- dp[i][j] = a[i-1] === b[j-1]
260
- ? dp[i-1][j-1]
261
- : 1 + Math.min(dp[i-1][j], dp[i][j-1], dp[i-1][j-1]);
262
- return dp[m][n];
263
- }
264
-
265
- _escapeRegex(str) {
266
- return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
267
- }
268
- }
269
-
270
-
271
- // ─── Example usage ───────────────────────────────────────────────────────────
272
- //
273
- // import { createMemory } from 'vektor-slipstream';
274
- // import { EntityResolver } from './entity-resolver.js';
275
- //
276
- // const memory = await createMemory({ provider: 'gemini', apiKey, agentId: 'my-agent' });
277
- // const resolver = new EntityResolver(memory);
278
- //
279
- // // Register known entities once (or load from a config file)
280
- // resolver.addEntity('Sarah Chen', 'person', ['Sarah', 'the CFO', 'S. Chen', 'Ms Chen']);
281
- // resolver.addEntity('VEKTOR Memory', 'org', ['Vektor', 'VEKTOR', 'the company']);
282
- // resolver.addEntity('Project Atlas', 'project', ['Atlas', 'the project', 'our roadmap']);
283
- //
284
- // // Drop-in for memory.remember() — resolves aliases automatically
285
- // await resolver.remember('the CFO approved Project Atlas budget');
286
- // // → stored as "Sarah Chen approved Project Atlas budget"
287
- // // → entityIds: ['entity_...sarah', 'entity_...atlas']
288
- //
289
- // // Resolve a reference without storing
290
- // const entity = await resolver.resolve('S. Chen');
291
- // console.log(entity.canonical); // → "Sarah Chen"
292
- //
293
- // // Find likely duplicate nodes for manual review
294
- // const dupes = await resolver.findDuplicateCandidates(0.88);
295
- // console.log(dupes); // → [{ entity_a, entity_b, score }]
296
- //
297
- // // Merge duplicates once confirmed
298
- // resolver.mergeEntities('entity_old_id', 'entity_keep_id');