@icex-labs/openclaw-memory-engine 3.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/backup.js ADDED
@@ -0,0 +1,99 @@
1
+ /**
2
+ * Export/import for backup and migration.
3
+ */
4
+
5
+ import { readFileSync, writeFileSync, existsSync } from "node:fs";
6
+ import { join } from "node:path";
7
+ import { readCore, writeCore } from "./core.js";
8
+ import { loadArchival, appendRecord, rewriteArchival } from "./archival.js";
9
+ import { loadEmbeddingCache, saveEmbeddingCache } from "./embedding.js";
10
+
11
+ const FORMAT_TAG = "openclaw-memory-engine";
12
+ const FORMAT_VERSION = "1.2.0";
13
+
14
+ /**
15
+ * Export core + archival + embeddings to a JSON file.
16
+ * @returns {{ path: string, stats: object }}
17
+ */
18
+ export function exportMemory(ws, outputPath) {
19
+ const date = new Date().toISOString().slice(0, 10);
20
+ const outPath = outputPath || join(ws, "memory", `export-${date}.json`);
21
+
22
+ const core = readCore(ws);
23
+ const records = loadArchival(ws);
24
+ const embeddings = loadEmbeddingCache(ws);
25
+
26
+ const data = {
27
+ _meta: {
28
+ format: FORMAT_TAG,
29
+ version: FORMAT_VERSION,
30
+ exported_at: new Date().toISOString(),
31
+ workspace: ws,
32
+ },
33
+ core,
34
+ archival: records,
35
+ embeddings,
36
+ stats: {
37
+ core_size: JSON.stringify(core).length,
38
+ archival_count: records.length,
39
+ embedding_count: Object.keys(embeddings).length,
40
+ },
41
+ };
42
+
43
+ writeFileSync(outPath, JSON.stringify(data, null, 2), "utf-8");
44
+ return { path: outPath, stats: data.stats };
45
+ }
46
+
47
+ /**
48
+ * Import from an export file.
49
+ * @param {string} mode - "replace" or "merge"
50
+ * @returns {string} result description
51
+ */
52
+ export function importMemory(ws, inputPath, mode = "merge") {
53
+ if (!existsSync(inputPath)) throw new Error(`File not found: ${inputPath}`);
54
+
55
+ const raw = readFileSync(inputPath, "utf-8");
56
+ let data;
57
+ try { data = JSON.parse(raw); } catch (e) { throw new Error(`Invalid JSON: ${e.message}`); }
58
+
59
+ if (data._meta?.format !== FORMAT_TAG) {
60
+ throw new Error("Not a memory-engine export file.");
61
+ }
62
+
63
+ if (mode === "replace") {
64
+ if (data.core) writeCore(ws, data.core);
65
+ if (data.archival) rewriteArchival(ws, data.archival);
66
+ if (data.embeddings) {
67
+ // Direct write to cache + disk
68
+ const cache = loadEmbeddingCache(ws);
69
+ Object.assign(cache, data.embeddings);
70
+ saveEmbeddingCache(ws);
71
+ }
72
+ return `REPLACED: core + ${data.archival?.length || 0} archival + ${Object.keys(data.embeddings || {}).length} embeddings`;
73
+ }
74
+
75
+ // Merge mode
76
+ const existing = loadArchival(ws);
77
+ const existingContents = new Set(existing.map((r) => r.content));
78
+ const importRecords = data.archival || [];
79
+
80
+ let added = 0;
81
+ for (const r of importRecords) {
82
+ if (!existingContents.has(r.content)) {
83
+ appendRecord(ws, { content: r.content, entity: r.entity, tags: r.tags, source: "import" });
84
+ existingContents.add(r.content);
85
+ added++;
86
+ }
87
+ }
88
+
89
+ let embAdded = 0;
90
+ if (data.embeddings) {
91
+ const embCache = loadEmbeddingCache(ws);
92
+ for (const [id, emb] of Object.entries(data.embeddings)) {
93
+ if (!embCache[id]) { embCache[id] = emb; embAdded++; }
94
+ }
95
+ saveEmbeddingCache(ws);
96
+ }
97
+
98
+ return `MERGED: ${added} new records (${importRecords.length - added} skipped), ${embAdded} new embeddings`;
99
+ }
@@ -0,0 +1,102 @@
1
+ /**
2
+ * Auto-extract structured facts from text blocks.
3
+ * Splits by sentence boundaries (Chinese + English), infers entity, deduplicates.
4
+ */
5
+
6
+ import { loadArchival, appendRecord } from "./archival.js";
7
+ import { indexEmbedding } from "./embedding.js";
8
+
9
+ /** Generic entity inference patterns (no personal data). */
10
+ const ENTITY_PATTERNS = [
11
+ [/\b(IBKR|Interactive Brokers)\b/i, "IBKR"],
12
+ [/\b(immigration|PR|IRCC|CBSA|visa)\b/i, "immigration"],
13
+ [/\b(quant|trading|backtest|portfolio)\b/i, "trading"],
14
+ [/\b(doctor|医生|hospital|医院|clinic)\b/i, "health"],
15
+ [/\b(car|vehicle|SUV|sedan|truck|Tesla|Toyota|Lexus|BMW)\b/i, "vehicles"],
16
+ [/\b(house|home|mortgage|rent|property)\b/i, "property"],
17
+ [/\b(school|university|college|学校)\b/i, "education"],
18
+ [/\b(insurance|保险)\b/i, "insurance"],
19
+ [/\b(lawyer|律师|attorney|legal)\b/i, "legal"],
20
+ ];
21
+
22
+ function inferEntity(text, fallback) {
23
+ for (const [pat, name] of ENTITY_PATTERNS) {
24
+ if (pat.test(text)) return name;
25
+ }
26
+ return fallback;
27
+ }
28
+
29
+ /** Split text into sentence-level fact candidates. */
30
+ function extractCandidates(text) {
31
+ const rawLines = text.split(/\n/).map((l) => l.trim()).filter(Boolean);
32
+ const segments = [];
33
+
34
+ for (const line of rawLines) {
35
+ const sentences = line
36
+ .split(/(?<=[。.!!??;;])\s*/)
37
+ .map((s) => s.trim())
38
+ .filter(Boolean);
39
+ segments.push(...(sentences.length > 1 ? sentences : [line]));
40
+ }
41
+
42
+ return segments
43
+ .filter((seg) => {
44
+ if (seg.startsWith("#") || seg.length < 10) return false;
45
+ if (/^(##|===|---|\*\*\*)/.test(seg)) return false;
46
+ return true;
47
+ })
48
+ .map((seg) => seg.replace(/^[-*•]\s*/, "").replace(/^\d+\.\s*/, "").trim())
49
+ .filter((s) => s.length >= 10);
50
+ }
51
+
52
+ /** Check if a fact is a near-duplicate of existing content (keyword overlap >70%). */
53
+ function isDuplicate(factLower, existingTexts) {
54
+ const factWords = new Set(factLower.split(/\s+/).filter((w) => w.length > 2));
55
+ if (factWords.size === 0) return false;
56
+
57
+ for (const ex of existingTexts) {
58
+ const exWords = new Set(ex.split(/\s+/).filter((w) => w.length > 2));
59
+ let overlap = 0;
60
+ for (const w of factWords) {
61
+ if (exWords.has(w)) overlap++;
62
+ }
63
+ if (overlap / factWords.size > 0.7) return true;
64
+ }
65
+ return false;
66
+ }
67
+
68
+ /**
69
+ * Extract facts from text, deduplicate, and insert into archival.
70
+ * @returns {{ inserted: string[], skipped: string[], total: number }}
71
+ */
72
+ export async function consolidateText(ws, text, defaultEntity = "", defaultTags = []) {
73
+ const candidates = extractCandidates(text);
74
+ if (candidates.length === 0) return { inserted: [], skipped: [], total: 0 };
75
+
76
+ const existing = loadArchival(ws);
77
+ const existingTexts = existing.map((r) => (r.content || "").toLowerCase());
78
+ const inserted = [];
79
+ const skipped = [];
80
+
81
+ for (const fact of candidates) {
82
+ const factLower = fact.toLowerCase();
83
+
84
+ if (isDuplicate(factLower, existingTexts)) {
85
+ skipped.push(fact.slice(0, 60));
86
+ continue;
87
+ }
88
+
89
+ const entity = inferEntity(fact, defaultEntity);
90
+ const record = appendRecord(ws, {
91
+ content: fact,
92
+ entity,
93
+ tags: defaultTags,
94
+ source: "consolidate",
95
+ });
96
+ indexEmbedding(ws, record).catch(() => {});
97
+ inserted.push(record.id);
98
+ existingTexts.push(factLower);
99
+ }
100
+
101
+ return { inserted, skipped, total: candidates.length };
102
+ }
package/lib/core.js ADDED
@@ -0,0 +1,76 @@
1
+ /**
2
+ * Core memory: small structured identity block (~500 tokens).
3
+ * Stored as memory/core.json. Agent reads at session start, updates atomically.
4
+ */
5
+
6
+ import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs";
7
+ import { join } from "node:path";
8
+ import { corePath } from "./paths.js";
9
+
10
+ const DEFAULT_CORE = {
11
+ _meta: { version: 1, updated_at: "", description: "Core memory block." },
12
+ user: {},
13
+ relationship: {},
14
+ preferences: {},
15
+ current_focus: [],
16
+ };
17
+
18
+ export function readCore(ws) {
19
+ const p = corePath(ws);
20
+ if (!existsSync(p)) {
21
+ mkdirSync(join(ws, "memory"), { recursive: true });
22
+ const init = {
23
+ ...DEFAULT_CORE,
24
+ _meta: { ...DEFAULT_CORE._meta, updated_at: new Date().toISOString() },
25
+ };
26
+ writeFileSync(p, JSON.stringify(init, null, 2), "utf-8");
27
+ return init;
28
+ }
29
+ return JSON.parse(readFileSync(p, "utf-8"));
30
+ }
31
+
32
+ export function writeCore(ws, data) {
33
+ data._meta = data._meta || {};
34
+ data._meta.updated_at = new Date().toISOString();
35
+ writeFileSync(corePath(ws), JSON.stringify(data, null, 2), "utf-8");
36
+ }
37
+
38
+ /** Navigate a dot-path to read a value. */
39
+ export function dotGet(obj, path) {
40
+ const parts = path.split(".");
41
+ let cur = obj;
42
+ for (const p of parts) {
43
+ if (cur == null || typeof cur !== "object") return undefined;
44
+ cur = cur[p];
45
+ }
46
+ return cur;
47
+ }
48
+
49
+ /** Navigate a dot-path to set a value. Returns old value. */
50
+ export function dotSet(obj, path, value) {
51
+ const parts = path.split(".");
52
+ let cur = obj;
53
+ for (let i = 0; i < parts.length - 1; i++) {
54
+ if (cur[parts[i]] === undefined) cur[parts[i]] = {};
55
+ cur = cur[parts[i]];
56
+ }
57
+ const old = cur[parts[parts.length - 1]];
58
+ cur[parts[parts.length - 1]] = value;
59
+ return old;
60
+ }
61
+
62
+ /**
63
+ * Auto-parse: if value is a JSON string that looks like an array/object, parse it.
64
+ * Fixes LLMs passing '["a","b"]' as a string instead of an actual array.
65
+ */
66
+ export function autoParse(value) {
67
+ if (typeof value !== "string") return value;
68
+ const trimmed = value.trim();
69
+ if (
70
+ (trimmed.startsWith("[") && trimmed.endsWith("]")) ||
71
+ (trimmed.startsWith("{") && trimmed.endsWith("}"))
72
+ ) {
73
+ try { return JSON.parse(trimmed); } catch { /* keep as string */ }
74
+ }
75
+ return value;
76
+ }
@@ -0,0 +1,235 @@
1
+ /**
2
+ * Dashboard: generates a self-contained HTML file for browsing memory.
3
+ * Timeline view, graph visualization, search explorer, stats.
4
+ */
5
+
6
+ import { readFileSync, writeFileSync, existsSync } from "node:fs";
7
+ import { join } from "node:path";
8
+ import { readCore } from "./core.js";
9
+ import { loadArchival } from "./archival.js";
10
+ import { loadGraph } from "./graph.js";
11
+ import { loadEpisodes } from "./episodes.js";
12
+ import { loadEmbeddingCache } from "./embedding.js";
13
+ import { analyzePatterns } from "./reflection.js";
14
+
15
+ /**
16
+ * Generate a self-contained HTML dashboard file.
17
+ * @returns {string} output path
18
+ */
19
+ export function generateDashboard(ws, outputPath = null) {
20
+ const outPath = outputPath || join(ws, "memory", "dashboard.html");
21
+
22
+ const core = readCore(ws);
23
+ const archival = loadArchival(ws);
24
+ const graph = loadGraph(ws);
25
+ const episodes = loadEpisodes(ws);
26
+ const embCache = loadEmbeddingCache(ws);
27
+ const reflection = analyzePatterns(ws, 30);
28
+
29
+ const embCount = Object.keys(embCache).length;
30
+
31
+ // Build data for the HTML
32
+ const data = {
33
+ generatedAt: new Date().toISOString(),
34
+ core,
35
+ stats: {
36
+ archival: archival.length,
37
+ graph: graph.length,
38
+ episodes: episodes.length,
39
+ embeddings: embCount,
40
+ },
41
+ reflection,
42
+ // Limit data size for HTML
43
+ recentArchival: archival.slice(-50).reverse(),
44
+ recentEpisodes: episodes.slice(-20).reverse(),
45
+ graphTriples: graph.slice(-100),
46
+ entities: [...new Set([
47
+ ...archival.map((r) => r.entity).filter(Boolean),
48
+ ...graph.map((t) => t.s),
49
+ ...graph.map((t) => t.o),
50
+ ])].sort(),
51
+ };
52
+
53
+ const html = renderHtml(data);
54
+ writeFileSync(outPath, html, "utf-8");
55
+ return outPath;
56
+ }
57
+
58
+ function renderHtml(data) {
59
+ const coreJson = JSON.stringify(data.core, null, 2);
60
+ const archivalJson = JSON.stringify(data.recentArchival);
61
+ const episodesJson = JSON.stringify(data.recentEpisodes);
62
+ const graphJson = JSON.stringify(data.graphTriples);
63
+ const reflectionJson = JSON.stringify(data.reflection);
64
+ const entitiesJson = JSON.stringify(data.entities);
65
+
66
+ return `<!DOCTYPE html>
67
+ <html lang="en">
68
+ <head>
69
+ <meta charset="utf-8">
70
+ <meta name="viewport" content="width=device-width, initial-scale=1">
71
+ <title>Memory Engine Dashboard</title>
72
+ <style>
73
+ :root { --bg: #0d1117; --card: #161b22; --border: #30363d; --text: #c9d1d9; --accent: #58a6ff; --green: #3fb950; --yellow: #d29922; --red: #f85149; }
74
+ * { box-sizing: border-box; margin: 0; padding: 0; }
75
+ body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; background: var(--bg); color: var(--text); padding: 20px; max-width: 1200px; margin: 0 auto; }
76
+ h1 { color: var(--accent); margin-bottom: 8px; font-size: 24px; }
77
+ h2 { color: var(--accent); margin: 24px 0 12px; font-size: 18px; border-bottom: 1px solid var(--border); padding-bottom: 8px; }
78
+ h3 { color: var(--text); margin: 16px 0 8px; font-size: 15px; }
79
+ .subtitle { color: #8b949e; font-size: 13px; margin-bottom: 20px; }
80
+ .grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 12px; margin-bottom: 20px; }
81
+ .stat-card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 16px; text-align: center; }
82
+ .stat-num { font-size: 28px; font-weight: 700; color: var(--accent); }
83
+ .stat-label { font-size: 12px; color: #8b949e; margin-top: 4px; }
84
+ .card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 16px; margin-bottom: 12px; }
85
+ .card-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 8px; }
86
+ .card-date { color: #8b949e; font-size: 12px; }
87
+ .card-entity { background: var(--accent); color: #000; padding: 2px 8px; border-radius: 12px; font-size: 11px; font-weight: 600; }
88
+ .card-content { font-size: 14px; line-height: 1.5; }
89
+ .tag { display: inline-block; background: #21262d; border: 1px solid var(--border); color: #8b949e; padding: 1px 6px; border-radius: 4px; font-size: 11px; margin: 2px; }
90
+ .importance { display: inline-block; width: 8px; height: 8px; border-radius: 50%; margin-right: 6px; }
91
+ .importance.high { background: var(--red); }
92
+ .importance.mid { background: var(--yellow); }
93
+ .importance.low { background: var(--green); }
94
+ .triple { font-family: monospace; font-size: 13px; padding: 6px 0; border-bottom: 1px solid var(--border); }
95
+ .triple:last-child { border-bottom: none; }
96
+ .episode { border-left: 3px solid var(--accent); padding-left: 12px; margin-bottom: 16px; }
97
+ .episode-mood { color: var(--yellow); font-size: 12px; }
98
+ .episode-decisions { color: var(--green); font-size: 13px; margin-top: 4px; }
99
+ pre { background: #0d1117; border: 1px solid var(--border); border-radius: 6px; padding: 12px; overflow-x: auto; font-size: 12px; color: var(--green); }
100
+ .tabs { display: flex; gap: 0; margin-bottom: 0; border-bottom: 1px solid var(--border); }
101
+ .tab { padding: 8px 16px; cursor: pointer; color: #8b949e; border-bottom: 2px solid transparent; font-size: 14px; }
102
+ .tab.active { color: var(--accent); border-bottom-color: var(--accent); }
103
+ .tab-content { display: none; padding-top: 16px; }
104
+ .tab-content.active { display: block; }
105
+ .search-box { width: 100%; padding: 8px 12px; background: var(--bg); border: 1px solid var(--border); border-radius: 6px; color: var(--text); font-size: 14px; margin-bottom: 12px; }
106
+ .bar { display: inline-block; height: 16px; background: var(--accent); border-radius: 2px; margin-right: 4px; vertical-align: middle; }
107
+ </style>
108
+ </head>
109
+ <body>
110
+
111
+ <h1>Memory Engine Dashboard</h1>
112
+ <div class="subtitle">Generated: ${data.generatedAt.slice(0, 19).replace("T", " ")} UTC</div>
113
+
114
+ <div class="grid">
115
+ <div class="stat-card"><div class="stat-num">${data.stats.archival}</div><div class="stat-label">Facts</div></div>
116
+ <div class="stat-card"><div class="stat-num">${data.stats.graph}</div><div class="stat-label">Graph Triples</div></div>
117
+ <div class="stat-card"><div class="stat-num">${data.stats.episodes}</div><div class="stat-label">Episodes</div></div>
118
+ <div class="stat-card"><div class="stat-num">${data.stats.embeddings}</div><div class="stat-label">Embeddings</div></div>
119
+ </div>
120
+
121
+ <div class="tabs">
122
+ <div class="tab active" onclick="switchTab('facts')">Facts</div>
123
+ <div class="tab" onclick="switchTab('graph')">Graph</div>
124
+ <div class="tab" onclick="switchTab('episodes')">Episodes</div>
125
+ <div class="tab" onclick="switchTab('core')">Core Memory</div>
126
+ <div class="tab" onclick="switchTab('reflection')">Reflection</div>
127
+ </div>
128
+
129
+ <div id="tab-facts" class="tab-content active">
130
+ <input class="search-box" id="fact-search" placeholder="Search facts..." oninput="filterFacts()">
131
+ <div id="fact-list"></div>
132
+ </div>
133
+
134
+ <div id="tab-graph" class="tab-content">
135
+ <h3>Knowledge Graph (${data.stats.graph} triples)</h3>
136
+ <div id="graph-list"></div>
137
+ </div>
138
+
139
+ <div id="tab-episodes" class="tab-content">
140
+ <h3>Conversation Episodes (${data.stats.episodes})</h3>
141
+ <div id="episode-list"></div>
142
+ </div>
143
+
144
+ <div id="tab-core" class="tab-content">
145
+ <h3>Core Memory Block</h3>
146
+ <pre>${escapeHtml(coreJson)}</pre>
147
+ </div>
148
+
149
+ <div id="tab-reflection" class="tab-content">
150
+ <h3>30-Day Reflection</h3>
151
+ <div id="reflection-content"></div>
152
+ </div>
153
+
154
+ <script>
155
+ const archival = ${archivalJson};
156
+ const episodes = ${episodesJson};
157
+ const graph = ${graphJson};
158
+ const reflection = ${reflectionJson};
159
+ const coreJson = ${JSON.stringify(coreJson)};
160
+
161
+ function escapeHtml(s) { return s.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;'); }
162
+
163
+ function switchTab(name) {
164
+ document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
165
+ document.querySelectorAll('.tab-content').forEach(t => t.classList.remove('active'));
166
+ event.target.classList.add('active');
167
+ document.getElementById('tab-' + name).classList.add('active');
168
+ }
169
+
170
+ function impClass(v) { return v >= 7 ? 'high' : v <= 3 ? 'low' : 'mid'; }
171
+
172
+ function renderFacts(facts) {
173
+ return facts.map(r => \`
174
+ <div class="card">
175
+ <div class="card-header">
176
+ <span><span class="importance \${impClass(r.importance||5)}"></span>\${r.entity ? '<span class="card-entity">'+r.entity+'</span>' : ''}</span>
177
+ <span class="card-date">\${(r.ts||'').slice(0,10)} · imp=\${r.importance||5} · accessed=\${r.access_count||0}</span>
178
+ </div>
179
+ <div class="card-content">\${escapeHtml(r.content)}</div>
180
+ <div>\${(r.tags||[]).map(t => '<span class="tag">#'+t+'</span>').join('')}</div>
181
+ </div>
182
+ \`).join('');
183
+ }
184
+
185
+ function filterFacts() {
186
+ const q = document.getElementById('fact-search').value.toLowerCase();
187
+ const filtered = q ? archival.filter(r =>
188
+ (r.content||'').toLowerCase().includes(q) ||
189
+ (r.entity||'').toLowerCase().includes(q) ||
190
+ (r.tags||[]).some(t => t.toLowerCase().includes(q))
191
+ ) : archival;
192
+ document.getElementById('fact-list').innerHTML = renderFacts(filtered);
193
+ }
194
+
195
+ // Init facts
196
+ document.getElementById('fact-list').innerHTML = renderFacts(archival);
197
+
198
+ // Init graph
199
+ document.getElementById('graph-list').innerHTML = graph.map(t =>
200
+ '<div class="triple">(' + escapeHtml(t.s) + ') <span style="color:var(--accent)">—' + escapeHtml(t.r) + '→</span> (' + escapeHtml(t.o) + ')</div>'
201
+ ).join('') || '<p style="color:#8b949e">No graph triples yet.</p>';
202
+
203
+ // Init episodes
204
+ document.getElementById('episode-list').innerHTML = episodes.map(ep => \`
205
+ <div class="episode">
206
+ <div class="card-date">\${(ep.ts||'').slice(0,10)} \${ep.mood ? '<span class="episode-mood">[\${ep.mood}]</span>' : ''}</div>
207
+ <div class="card-content" style="margin:4px 0">\${escapeHtml(ep.summary||'')}</div>
208
+ \${(ep.decisions||[]).length ? '<div class="episode-decisions">Decisions: '+(ep.decisions||[]).join('; ')+'</div>' : ''}
209
+ <div>\${(ep.topics||[]).map(t => '<span class="tag">'+t+'</span>').join('')}</div>
210
+ </div>
211
+ \`).join('') || '<p style="color:#8b949e">No episodes yet.</p>';
212
+
213
+ // Init reflection
214
+ const ref = reflection;
215
+ const topTopics = (ref.top_topics||[]).map(([t,c]) => '<div>'+t+': <span class="bar" style="width:'+Math.min(c*20,200)+'px"></span> '+c+'</div>').join('');
216
+ document.getElementById('reflection-content').innerHTML = \`
217
+ <div class="grid">
218
+ <div class="stat-card"><div class="stat-num">\${ref.activity?.new_facts||0}</div><div class="stat-label">New Facts (30d)</div></div>
219
+ <div class="stat-card"><div class="stat-num">\${ref.activity?.new_episodes||0}</div><div class="stat-label">New Episodes</div></div>
220
+ <div class="stat-card"><div class="stat-num">\${ref.activity?.total_decisions||0}</div><div class="stat-label">Decisions</div></div>
221
+ <div class="stat-card"><div class="stat-num">\${ref.health?.forgetting_candidates||0}</div><div class="stat-label">Forgetting Candidates</div></div>
222
+ </div>
223
+ <h3>Top Topics</h3>\${topTopics||'<p style="color:#8b949e">No data</p>'}
224
+ <h3>Time Distribution</h3>
225
+ <div>Morning: \${ref.time_distribution?.morning||0} · Afternoon: \${ref.time_distribution?.afternoon||0} · Evening: \${ref.time_distribution?.evening||0} · Night: \${ref.time_distribution?.night||0}</div>
226
+ \${ref.health?.neglected_entities?.length ? '<h3>Neglected Entities</h3><div>'+(ref.health.neglected_entities||[]).join(', ')+'</div>' : ''}
227
+ \`;
228
+ </script>
229
+ </body>
230
+ </html>`;
231
+ }
232
+
233
+ function escapeHtml(s) {
234
+ return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;");
235
+ }
package/lib/dedup.js ADDED
@@ -0,0 +1,68 @@
1
+ /**
2
+ * Deduplication via embedding cosine similarity.
3
+ */
4
+
5
+ import { loadArchival, rewriteArchival } from "./archival.js";
6
+ import {
7
+ loadEmbeddingCache, saveEmbeddingCache, getEmbedding, cosineSimilarity,
8
+ } from "./embedding.js";
9
+ import { DEDUP_SIMILARITY_THRESHOLD } from "./paths.js";
10
+
11
+ /**
12
+ * Find near-duplicate pairs in archival memory.
13
+ * @returns {Array<{ keep: object, drop: object, similarity: number }>}
14
+ */
15
+ export async function findDuplicates(ws) {
16
+ const records = loadArchival(ws);
17
+ const embCache = loadEmbeddingCache(ws);
18
+
19
+ // Build missing embeddings
20
+ for (const r of records) {
21
+ if (!embCache[r.id]) {
22
+ const emb = await getEmbedding(
23
+ [r.content, r.entity, ...(r.tags || [])].filter(Boolean).join(" "),
24
+ );
25
+ if (emb) embCache[r.id] = emb;
26
+ }
27
+ }
28
+ saveEmbeddingCache(ws);
29
+
30
+ // O(n²) pairwise comparison
31
+ const dupes = [];
32
+ for (let i = 0; i < records.length; i++) {
33
+ for (let j = i + 1; j < records.length; j++) {
34
+ const embA = embCache[records[i].id];
35
+ const embB = embCache[records[j].id];
36
+ if (!embA || !embB) continue;
37
+ const sim = cosineSimilarity(embA, embB);
38
+ if (sim >= DEDUP_SIMILARITY_THRESHOLD) {
39
+ const keepIdx =
40
+ (records[j].access_count || 0) >= (records[i].access_count || 0) ? j : i;
41
+ const dropIdx = keepIdx === i ? j : i;
42
+ dupes.push({
43
+ keep: records[keepIdx],
44
+ drop: records[dropIdx],
45
+ similarity: Math.round(sim * 1000) / 1000,
46
+ });
47
+ }
48
+ }
49
+ }
50
+ return dupes;
51
+ }
52
+
53
+ /**
54
+ * Remove duplicate records from archival.
55
+ * @returns {{ removed: number, remaining: number }}
56
+ */
57
+ export function applyDedup(ws, dupes) {
58
+ const records = loadArchival(ws);
59
+ const dropIds = new Set(dupes.map((d) => d.drop.id));
60
+ const cleaned = records.filter((r) => !dropIds.has(r.id));
61
+ rewriteArchival(ws, cleaned);
62
+
63
+ const embCache = loadEmbeddingCache(ws);
64
+ for (const id of dropIds) delete embCache[id];
65
+ saveEmbeddingCache(ws);
66
+
67
+ return { removed: dupes.length, remaining: cleaned.length };
68
+ }
@@ -0,0 +1,70 @@
1
+ /**
2
+ * OpenAI embedding API + local file cache.
3
+ */
4
+
5
+ import { readFileSync, writeFileSync, existsSync } from "node:fs";
6
+ import { embeddingCachePath, EMBEDDING_MODEL, EMBEDDING_DIM } from "./paths.js";
7
+
8
+ /** In-memory embedding cache keyed by workspace path. */
9
+ const cacheMap = new Map();
10
+
11
+ export function loadEmbeddingCache(ws) {
12
+ if (cacheMap.has(ws)) return cacheMap.get(ws);
13
+ const p = embeddingCachePath(ws);
14
+ let data = {};
15
+ if (existsSync(p)) {
16
+ try { data = JSON.parse(readFileSync(p, "utf-8")); } catch { /* ignore */ }
17
+ }
18
+ cacheMap.set(ws, data);
19
+ return data;
20
+ }
21
+
22
+ export function saveEmbeddingCache(ws) {
23
+ const data = cacheMap.get(ws);
24
+ if (!data) return;
25
+ writeFileSync(embeddingCachePath(ws), JSON.stringify(data), "utf-8");
26
+ }
27
+
28
+ export function resolveApiKey() {
29
+ return process.env.OPENAI_API_KEY || null;
30
+ }
31
+
32
+ /** Fetch embedding vector from OpenAI. Returns float[] or null. */
33
+ export async function getEmbedding(text) {
34
+ const apiKey = resolveApiKey();
35
+ if (!apiKey) return null;
36
+ try {
37
+ const res = await fetch("https://api.openai.com/v1/embeddings", {
38
+ method: "POST",
39
+ headers: { Authorization: `Bearer ${apiKey}`, "Content-Type": "application/json" },
40
+ body: JSON.stringify({ input: text, model: EMBEDDING_MODEL, dimensions: EMBEDDING_DIM }),
41
+ });
42
+ if (!res.ok) return null;
43
+ const data = await res.json();
44
+ return data?.data?.[0]?.embedding || null;
45
+ } catch { return null; }
46
+ }
47
+
48
+ export function cosineSimilarity(a, b) {
49
+ if (!a || !b || a.length !== b.length) return 0;
50
+ let dot = 0, normA = 0, normB = 0;
51
+ for (let i = 0; i < a.length; i++) {
52
+ dot += a[i] * b[i];
53
+ normA += a[i] * a[i];
54
+ normB += b[i] * b[i];
55
+ }
56
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
57
+ return denom === 0 ? 0 : dot / denom;
58
+ }
59
+
60
+ /** Compute and cache embedding for a record (non-blocking). */
61
+ export async function indexEmbedding(ws, record) {
62
+ if (!resolveApiKey()) return;
63
+ const text = [record.content, record.entity, ...(record.tags || [])].filter(Boolean).join(" ");
64
+ const emb = await getEmbedding(text);
65
+ if (emb) {
66
+ const cache = loadEmbeddingCache(ws);
67
+ cache[record.id] = emb;
68
+ saveEmbeddingCache(ws);
69
+ }
70
+ }