@onenomad/engram-mcp 1.1.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -32
- package/dist/auth/login.d.ts +107 -68
- package/dist/auth/login.js +227 -216
- package/dist/auth/login.js.map +1 -1
- package/dist/consolidator.js +519 -519
- package/dist/context-pressure.js +91 -91
- package/dist/handoff.d.ts +53 -53
- package/dist/handoff.js +156 -156
- package/dist/server.js +204 -49
- package/dist/server.js.map +1 -1
- package/dist/source-dedup.d.ts +86 -86
- package/dist/source-dedup.js +147 -147
- package/dist/update-metadata.d.ts +29 -29
- package/dist/update-metadata.js +51 -51
- package/dist/wal.d.ts +95 -95
- package/dist/wal.js +295 -295
- package/package.json +1 -1
package/dist/wal.js
CHANGED
|
@@ -1,296 +1,296 @@
|
|
|
1
|
-
import { randomUUID } from 'node:crypto';
|
|
2
|
-
import { embed } from './llm.js';
|
|
3
|
-
import { buildContextPrefix } from './utils.js';
|
|
4
|
-
import { chunkContent } from './chunker.js';
|
|
5
|
-
import { extractAndPersistTriples } from './kg-extractor.js';
|
|
6
|
-
import { sourceDedup } from './source-dedup.js';
|
|
7
|
-
// Lightweight poisoning patterns checked at ingest time (no LLM, no search)
|
|
8
|
-
const POISON_PATTERNS = [
|
|
9
|
-
/\b(ignore previous instructions|ignore all instructions|disregard|forget everything)\b/i,
|
|
10
|
-
/^(system|SYSTEM)\s*:/m,
|
|
11
|
-
/\b(act as|you are now|pretend to be|new persona|new identity)\b/i,
|
|
12
|
-
];
|
|
13
|
-
function checkContentPoisoning(content) {
|
|
14
|
-
for (const pattern of POISON_PATTERNS) {
|
|
15
|
-
if (pattern.test(content))
|
|
16
|
-
return 'Suspicious content pattern detected — flagged for review';
|
|
17
|
-
}
|
|
18
|
-
return null;
|
|
19
|
-
}
|
|
20
|
-
// ─────────────────────────────────────────────────────────────────────
|
|
21
|
-
// Background side-effect tracking
|
|
22
|
-
// ─────────────────────────────────────────────────────────────────────
|
|
23
|
-
const pendingSideEffects = new Set();
|
|
24
|
-
/**
|
|
25
|
-
* Wait for all in-flight background side-effects (KG extraction +
|
|
26
|
-
* daily-entry append fired with `awaitSideEffects: false`) to
|
|
27
|
-
* complete. No-op when nothing is pending.
|
|
28
|
-
*
|
|
29
|
-
* Tests should call this between ingest and assert; shutdown code
|
|
30
|
-
* should call before process exit to avoid losing KG writes.
|
|
31
|
-
*/
|
|
32
|
-
export async function flushPendingSideEffects() {
|
|
33
|
-
// Snapshot — new promises added during await won't be drained by
|
|
34
|
-
// this call (they get the next one). Loop until empty in case of
|
|
35
|
-
// long-running chains.
|
|
36
|
-
let attempts = 0;
|
|
37
|
-
while (pendingSideEffects.size > 0 && attempts < 100) {
|
|
38
|
-
const snapshot = Array.from(pendingSideEffects);
|
|
39
|
-
await Promise.allSettled(snapshot);
|
|
40
|
-
attempts++;
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
/** Pending count — for tests + telemetry. */
|
|
44
|
-
export function pendingSideEffectCount() {
|
|
45
|
-
return pendingSideEffects.size;
|
|
46
|
-
}
|
|
47
|
-
/**
|
|
48
|
-
* Immediately persist one or more memory entries.
|
|
49
|
-
* Designed to be called mid-conversation, before the agent responds.
|
|
50
|
-
*/
|
|
51
|
-
export async function ingest(config, storage, entries) {
|
|
52
|
-
const chunks = [];
|
|
53
|
-
// Freshly-minted chunks that still need persisting. Cached-source
|
|
54
|
-
// stubs are added to `chunks` (so callers get them back) but skipped
|
|
55
|
-
// here, since the underlying rows are already on disk from a prior
|
|
56
|
-
// ingest. Flushed via storage.saveChunks() in one shot after the
|
|
57
|
-
// entries loop — replaces N round-trips with 1 against the backend.
|
|
58
|
-
const newChunks = [];
|
|
59
|
-
for (const entry of entries) {
|
|
60
|
-
if (!entry.content || entry.content.trim().length < 5)
|
|
61
|
-
continue;
|
|
62
|
-
const trimmedContent = entry.content.trim();
|
|
63
|
-
// Advisory poisoning check — log warning but never block
|
|
64
|
-
const poisonFlag = checkContentPoisoning(trimmedContent);
|
|
65
|
-
if (poisonFlag) {
|
|
66
|
-
console.error(`Engram governance: ${poisonFlag} in "${trimmedContent.slice(0, 80)}..."`);
|
|
67
|
-
}
|
|
68
|
-
// Same-source ingest dedup. When the agent re-reads a stable file
|
|
69
|
-
// or re-polls an unchanged endpoint within the same Engram process,
|
|
70
|
-
// we've already chunked + embedded + saved this content. Look up
|
|
71
|
-
// the (source, content-hash) pair in the in-memory cache and short-
|
|
72
|
-
// circuit the rest of the pipeline on a hit. Reuses the prior
|
|
73
|
-
// chunk(s) rather than writing duplicates.
|
|
74
|
-
//
|
|
75
|
-
// Bounded session-scoped cache (max 64 sources × 8 hashes); see
|
|
76
|
-
// source-dedup.ts. Persistence layer doesn't change.
|
|
77
|
-
const cached = sourceDedup.lookup(entry.source, trimmedContent);
|
|
78
|
-
if (cached) {
|
|
79
|
-
// Materialize a chunk reference for the caller from the cached
|
|
80
|
-
// metadata. We don't re-fetch the actual StoredChunk from disk —
|
|
81
|
-
// the caller's response only needs id + content + minimal meta,
|
|
82
|
-
// and the agent's history is keyed off `id`.
|
|
83
|
-
const stub = {
|
|
84
|
-
id: cached.chunkId,
|
|
85
|
-
tier: entry.tier ?? 'short-term',
|
|
86
|
-
type: entry.type ?? 'context',
|
|
87
|
-
cognitiveLayer: entry.layer ?? 'episodic',
|
|
88
|
-
tags: entry.tags ?? [],
|
|
89
|
-
domain: entry.domain ?? '',
|
|
90
|
-
topic: entry.topic ?? '',
|
|
91
|
-
source: entry.source ?? '',
|
|
92
|
-
importance: entry.importance ?? 0.5,
|
|
93
|
-
sentiment: entry.sentiment ?? 'neutral',
|
|
94
|
-
createdAt: new Date().toISOString(),
|
|
95
|
-
lastRecalledAt: null,
|
|
96
|
-
recallCount: 0,
|
|
97
|
-
relatedMemories: [],
|
|
98
|
-
recallOutcomes: [],
|
|
99
|
-
origin: entry.origin ?? 'derived',
|
|
100
|
-
content: trimmedContent,
|
|
101
|
-
};
|
|
102
|
-
chunks.push(stub);
|
|
103
|
-
continue;
|
|
104
|
-
}
|
|
105
|
-
const baseType = entry.type ?? inferType(trimmedContent);
|
|
106
|
-
const baseLayer = entry.layer ?? inferLayer(trimmedContent);
|
|
107
|
-
// Emotion-weighted importance: high-arousal events get stronger encoding
|
|
108
|
-
// Matches amygdala research — negative high-arousal memories form faster (0.8 LR)
|
|
109
|
-
// than positive ones (0.2 LR). Neutral emotions don't modify importance.
|
|
110
|
-
let effectiveImportance = entry.importance ?? 0.5;
|
|
111
|
-
if (entry.emotionalArousal !== undefined && entry.emotionalArousal > 0.3) {
|
|
112
|
-
const valence = entry.emotionalValence ?? 0;
|
|
113
|
-
// Negative-biased boost: negative emotions boost more than positive
|
|
114
|
-
const emotionBoost = entry.emotionalArousal * (valence < 0 ? 0.3 : 0.15);
|
|
115
|
-
effectiveImportance = Math.min(1, effectiveImportance + emotionBoost);
|
|
116
|
-
}
|
|
117
|
-
const baseMeta = {
|
|
118
|
-
tier: entry.tier ?? 'short-term',
|
|
119
|
-
type: baseType,
|
|
120
|
-
cognitiveLayer: baseLayer,
|
|
121
|
-
tags: entry.tags ?? [],
|
|
122
|
-
domain: entry.domain ?? '',
|
|
123
|
-
topic: entry.topic ?? '',
|
|
124
|
-
source: entry.source ?? `wal:${Date.now()}`,
|
|
125
|
-
importance: effectiveImportance,
|
|
126
|
-
sentiment: entry.sentiment ?? 'neutral',
|
|
127
|
-
// Honor caller-provided createdAt (for backfilled memories with
|
|
128
|
-
// a known original time) — defaults to "now" when omitted.
|
|
129
|
-
createdAt: entry.createdAt ?? new Date().toISOString(),
|
|
130
|
-
lastRecalledAt: null,
|
|
131
|
-
recallCount: 0,
|
|
132
|
-
relatedMemories: [],
|
|
133
|
-
recallOutcomes: [],
|
|
134
|
-
origin: entry.origin ?? 'derived',
|
|
135
|
-
};
|
|
136
|
-
// Check if content should be split into sub-chunks
|
|
137
|
-
const splitResult = config.enableChunking ? chunkContent(trimmedContent) : { chunks: [trimmedContent], needsSplit: false };
|
|
138
|
-
if (splitResult.needsSplit) {
|
|
139
|
-
// Save parent chunk (no embedding, used for keyword search)
|
|
140
|
-
const parentChunk = {
|
|
141
|
-
id: randomUUID(),
|
|
142
|
-
...baseMeta,
|
|
143
|
-
content: trimmedContent,
|
|
144
|
-
consolidationLevel: -1, // Sentinel: parent container
|
|
145
|
-
};
|
|
146
|
-
newChunks.push(parentChunk);
|
|
147
|
-
chunks.push(parentChunk);
|
|
148
|
-
// Remember the parent chunk id keyed by source so a re-ingest
|
|
149
|
-
// of the identical content within the same process returns this
|
|
150
|
-
// same id and skips chunk+embed+save entirely.
|
|
151
|
-
sourceDedup.remember(entry.source, trimmedContent, parentChunk.id);
|
|
152
|
-
// Save sub-chunks with embeddings
|
|
153
|
-
for (const subContent of splitResult.chunks) {
|
|
154
|
-
const subChunk = {
|
|
155
|
-
id: randomUUID(),
|
|
156
|
-
...baseMeta,
|
|
157
|
-
content: subContent,
|
|
158
|
-
parentChunkId: parentChunk.id,
|
|
159
|
-
};
|
|
160
|
-
// Detect temporal anchor
|
|
161
|
-
const dateMatch = subContent.match(/\b(\d{4})-(\d{2})-(\d{2})\b/) ??
|
|
162
|
-
subContent.match(/\b(january|february|march|april|may|june|july|august|september|october|november|december)\s+(\d{1,2})[,.]?\s+(\d{4})\b/i);
|
|
163
|
-
if (dateMatch) {
|
|
164
|
-
try {
|
|
165
|
-
const parsed = new Date(dateMatch[0]);
|
|
166
|
-
if (!isNaN(parsed.getTime()))
|
|
167
|
-
subChunk.temporalAnchor = parsed.getTime();
|
|
168
|
-
}
|
|
169
|
-
catch { /* skip */ }
|
|
170
|
-
}
|
|
171
|
-
try {
|
|
172
|
-
const prefix = buildContextPrefix(subChunk);
|
|
173
|
-
subChunk.embedding = await embed(config, subContent, prefix);
|
|
174
|
-
subChunk.embeddingVersion = 1;
|
|
175
|
-
}
|
|
176
|
-
catch { /* skip */ }
|
|
177
|
-
newChunks.push(subChunk);
|
|
178
|
-
chunks.push(subChunk);
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
else {
|
|
182
|
-
// Single chunk path (original behavior)
|
|
183
|
-
const chunk = {
|
|
184
|
-
id: randomUUID(),
|
|
185
|
-
...baseMeta,
|
|
186
|
-
content: trimmedContent,
|
|
187
|
-
};
|
|
188
|
-
const dateMatch = chunk.content.match(/\b(\d{4})-(\d{2})-(\d{2})\b/) ??
|
|
189
|
-
chunk.content.match(/\b(january|february|march|april|may|june|july|august|september|october|november|december)\s+(\d{1,2})[,.]?\s+(\d{4})\b/i);
|
|
190
|
-
if (dateMatch) {
|
|
191
|
-
try {
|
|
192
|
-
const parsed = new Date(dateMatch[0]);
|
|
193
|
-
if (!isNaN(parsed.getTime()))
|
|
194
|
-
chunk.temporalAnchor = parsed.getTime();
|
|
195
|
-
}
|
|
196
|
-
catch { /* skip */ }
|
|
197
|
-
}
|
|
198
|
-
try {
|
|
199
|
-
const prefix = buildContextPrefix(chunk);
|
|
200
|
-
chunk.embedding = await embed(config, chunk.content, prefix);
|
|
201
|
-
chunk.embeddingVersion = 1;
|
|
202
|
-
}
|
|
203
|
-
catch { /* skip */ }
|
|
204
|
-
newChunks.push(chunk);
|
|
205
|
-
chunks.push(chunk);
|
|
206
|
-
// Single-chunk path: cache the chunk id keyed by source.
|
|
207
|
-
sourceDedup.remember(entry.source, trimmedContent, chunk.id);
|
|
208
|
-
}
|
|
209
|
-
}
|
|
210
|
-
// One batched write for every new chunk in the call.
|
|
211
|
-
if (newChunks.length > 0) {
|
|
212
|
-
await storage.saveChunks(newChunks);
|
|
213
|
-
}
|
|
214
|
-
// Per-batch side effects. Both opt-out via flags on any entry in
|
|
215
|
-
// the batch (typical:
|
|
216
|
-
// so a single flag controls the path). Benchmark harnesses set
|
|
217
|
-
// these to match what engram/benchmarks/locomo.ts does — its
|
|
218
|
-
// direct-saveChunk path skips both, which is the source of the
|
|
219
|
-
// ~50× wall-clock gap between standalone and MCP-boundary benches.
|
|
220
|
-
const skipDaily = entries.some(e => e.skipDailyEntry);
|
|
221
|
-
const skipKg = entries.some(e => e.skipKgExtraction);
|
|
222
|
-
// awaitSideEffects defaults TRUE — only flip to async when EVERY
|
|
223
|
-
// entry in the batch opts out, to avoid surprising a sync caller
|
|
224
|
-
// batched with an async one.
|
|
225
|
-
const runAsync = entries.length > 0 && entries.every(e => e.awaitSideEffects === false);
|
|
226
|
-
if (chunks.length > 0) {
|
|
227
|
-
const sideEffectsTask = async () => {
|
|
228
|
-
if (!skipDaily) {
|
|
229
|
-
const date = new Date().toISOString().split('T')[0];
|
|
230
|
-
try {
|
|
231
|
-
await storage.appendDailyEntry(date, {
|
|
232
|
-
timestamp: new Date().toISOString(),
|
|
233
|
-
conversationId: chunks[0].source,
|
|
234
|
-
summary: `WAL ingest: ${chunks.length} entries`,
|
|
235
|
-
extractedFacts: chunks.map(c => c.content),
|
|
236
|
-
});
|
|
237
|
-
}
|
|
238
|
-
catch {
|
|
239
|
-
// best-effort: a daily-entry append failure must not break
|
|
240
|
-
// the rest of the side-effects task
|
|
241
|
-
}
|
|
242
|
-
}
|
|
243
|
-
if (!skipKg) {
|
|
244
|
-
// Auto-populate knowledge graph from ingested content
|
|
245
|
-
for (const chunk of chunks) {
|
|
246
|
-
if (chunk.consolidationLevel === -1)
|
|
247
|
-
continue; // skip parent containers
|
|
248
|
-
try {
|
|
249
|
-
await extractAndPersistTriples(storage, chunk.content, {
|
|
250
|
-
domain: chunk.domain,
|
|
251
|
-
topic: chunk.topic,
|
|
252
|
-
source: chunk.source,
|
|
253
|
-
});
|
|
254
|
-
}
|
|
255
|
-
catch {
|
|
256
|
-
// KG extraction is best-effort — never block ingestion
|
|
257
|
-
}
|
|
258
|
-
}
|
|
259
|
-
}
|
|
260
|
-
};
|
|
261
|
-
if (runAsync) {
|
|
262
|
-
// Fire and forget — track in pendingSideEffects so tests or
|
|
263
|
-
// shutdown code can drain via flushPendingSideEffects().
|
|
264
|
-
const p = sideEffectsTask()
|
|
265
|
-
.catch(() => { })
|
|
266
|
-
.finally(() => { pendingSideEffects.delete(p); });
|
|
267
|
-
pendingSideEffects.add(p);
|
|
268
|
-
}
|
|
269
|
-
else {
|
|
270
|
-
await sideEffectsTask();
|
|
271
|
-
}
|
|
272
|
-
}
|
|
273
|
-
return chunks;
|
|
274
|
-
}
|
|
275
|
-
// ── Type/Layer inference heuristics ──────────────────────────────────
|
|
276
|
-
function inferType(content) {
|
|
277
|
-
const lower = content.toLowerCase();
|
|
278
|
-
if (lower.includes('prefer') || lower.includes('like') || lower.includes('want'))
|
|
279
|
-
return 'preference';
|
|
280
|
-
if (lower.includes('decided') || lower.includes('going with') || lower.includes('chose') || lower.includes('use '))
|
|
281
|
-
return 'decision';
|
|
282
|
-
if (lower.includes('not ') || lower.includes('wrong') || lower.includes('correct') || lower.includes('instead'))
|
|
283
|
-
return 'correction';
|
|
284
|
-
if (lower.includes('working on') || lower.includes('currently') || lower.includes('right now'))
|
|
285
|
-
return 'context';
|
|
286
|
-
return 'fact';
|
|
287
|
-
}
|
|
288
|
-
function inferLayer(content) {
|
|
289
|
-
const lower = content.toLowerCase();
|
|
290
|
-
if (lower.includes('always') || lower.includes('never') || lower.includes('rule') || lower.includes('should'))
|
|
291
|
-
return 'procedural';
|
|
292
|
-
if (lower.includes('today') || lower.includes('yesterday') || lower.includes('just ') || lower.includes('session'))
|
|
293
|
-
return 'episodic';
|
|
294
|
-
return 'semantic';
|
|
295
|
-
}
|
|
1
|
+
import { randomUUID } from 'node:crypto';
|
|
2
|
+
import { embed } from './llm.js';
|
|
3
|
+
import { buildContextPrefix } from './utils.js';
|
|
4
|
+
import { chunkContent } from './chunker.js';
|
|
5
|
+
import { extractAndPersistTriples } from './kg-extractor.js';
|
|
6
|
+
import { sourceDedup } from './source-dedup.js';
|
|
7
|
+
// Lightweight poisoning patterns checked at ingest time (no LLM, no search)
|
|
8
|
+
const POISON_PATTERNS = [
|
|
9
|
+
/\b(ignore previous instructions|ignore all instructions|disregard|forget everything)\b/i,
|
|
10
|
+
/^(system|SYSTEM)\s*:/m,
|
|
11
|
+
/\b(act as|you are now|pretend to be|new persona|new identity)\b/i,
|
|
12
|
+
];
|
|
13
|
+
function checkContentPoisoning(content) {
|
|
14
|
+
for (const pattern of POISON_PATTERNS) {
|
|
15
|
+
if (pattern.test(content))
|
|
16
|
+
return 'Suspicious content pattern detected — flagged for review';
|
|
17
|
+
}
|
|
18
|
+
return null;
|
|
19
|
+
}
|
|
20
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
21
|
+
// Background side-effect tracking
|
|
22
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
23
|
+
const pendingSideEffects = new Set();
|
|
24
|
+
/**
|
|
25
|
+
* Wait for all in-flight background side-effects (KG extraction +
|
|
26
|
+
* daily-entry append fired with `awaitSideEffects: false`) to
|
|
27
|
+
* complete. No-op when nothing is pending.
|
|
28
|
+
*
|
|
29
|
+
* Tests should call this between ingest and assert; shutdown code
|
|
30
|
+
* should call before process exit to avoid losing KG writes.
|
|
31
|
+
*/
|
|
32
|
+
export async function flushPendingSideEffects() {
|
|
33
|
+
// Snapshot — new promises added during await won't be drained by
|
|
34
|
+
// this call (they get the next one). Loop until empty in case of
|
|
35
|
+
// long-running chains.
|
|
36
|
+
let attempts = 0;
|
|
37
|
+
while (pendingSideEffects.size > 0 && attempts < 100) {
|
|
38
|
+
const snapshot = Array.from(pendingSideEffects);
|
|
39
|
+
await Promise.allSettled(snapshot);
|
|
40
|
+
attempts++;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
/** Pending count — for tests + telemetry. */
|
|
44
|
+
export function pendingSideEffectCount() {
|
|
45
|
+
return pendingSideEffects.size;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Immediately persist one or more memory entries.
|
|
49
|
+
* Designed to be called mid-conversation, before the agent responds.
|
|
50
|
+
*/
|
|
51
|
+
export async function ingest(config, storage, entries) {
|
|
52
|
+
const chunks = [];
|
|
53
|
+
// Freshly-minted chunks that still need persisting. Cached-source
|
|
54
|
+
// stubs are added to `chunks` (so callers get them back) but skipped
|
|
55
|
+
// here, since the underlying rows are already on disk from a prior
|
|
56
|
+
// ingest. Flushed via storage.saveChunks() in one shot after the
|
|
57
|
+
// entries loop — replaces N round-trips with 1 against the backend.
|
|
58
|
+
const newChunks = [];
|
|
59
|
+
for (const entry of entries) {
|
|
60
|
+
if (!entry.content || entry.content.trim().length < 5)
|
|
61
|
+
continue;
|
|
62
|
+
const trimmedContent = entry.content.trim();
|
|
63
|
+
// Advisory poisoning check — log warning but never block
|
|
64
|
+
const poisonFlag = checkContentPoisoning(trimmedContent);
|
|
65
|
+
if (poisonFlag) {
|
|
66
|
+
console.error(`Engram governance: ${poisonFlag} in "${trimmedContent.slice(0, 80)}..."`);
|
|
67
|
+
}
|
|
68
|
+
// Same-source ingest dedup. When the agent re-reads a stable file
|
|
69
|
+
// or re-polls an unchanged endpoint within the same Engram process,
|
|
70
|
+
// we've already chunked + embedded + saved this content. Look up
|
|
71
|
+
// the (source, content-hash) pair in the in-memory cache and short-
|
|
72
|
+
// circuit the rest of the pipeline on a hit. Reuses the prior
|
|
73
|
+
// chunk(s) rather than writing duplicates.
|
|
74
|
+
//
|
|
75
|
+
// Bounded session-scoped cache (max 64 sources × 8 hashes); see
|
|
76
|
+
// source-dedup.ts. Persistence layer doesn't change.
|
|
77
|
+
const cached = sourceDedup.lookup(entry.source, trimmedContent);
|
|
78
|
+
if (cached) {
|
|
79
|
+
// Materialize a chunk reference for the caller from the cached
|
|
80
|
+
// metadata. We don't re-fetch the actual StoredChunk from disk —
|
|
81
|
+
// the caller's response only needs id + content + minimal meta,
|
|
82
|
+
// and the agent's history is keyed off `id`.
|
|
83
|
+
const stub = {
|
|
84
|
+
id: cached.chunkId,
|
|
85
|
+
tier: entry.tier ?? 'short-term',
|
|
86
|
+
type: entry.type ?? 'context',
|
|
87
|
+
cognitiveLayer: entry.layer ?? 'episodic',
|
|
88
|
+
tags: entry.tags ?? [],
|
|
89
|
+
domain: entry.domain ?? '',
|
|
90
|
+
topic: entry.topic ?? '',
|
|
91
|
+
source: entry.source ?? '',
|
|
92
|
+
importance: entry.importance ?? 0.5,
|
|
93
|
+
sentiment: entry.sentiment ?? 'neutral',
|
|
94
|
+
createdAt: new Date().toISOString(),
|
|
95
|
+
lastRecalledAt: null,
|
|
96
|
+
recallCount: 0,
|
|
97
|
+
relatedMemories: [],
|
|
98
|
+
recallOutcomes: [],
|
|
99
|
+
origin: entry.origin ?? 'derived',
|
|
100
|
+
content: trimmedContent,
|
|
101
|
+
};
|
|
102
|
+
chunks.push(stub);
|
|
103
|
+
continue;
|
|
104
|
+
}
|
|
105
|
+
const baseType = entry.type ?? inferType(trimmedContent);
|
|
106
|
+
const baseLayer = entry.layer ?? inferLayer(trimmedContent);
|
|
107
|
+
// Emotion-weighted importance: high-arousal events get stronger encoding
|
|
108
|
+
// Matches amygdala research — negative high-arousal memories form faster (0.8 LR)
|
|
109
|
+
// than positive ones (0.2 LR). Neutral emotions don't modify importance.
|
|
110
|
+
let effectiveImportance = entry.importance ?? 0.5;
|
|
111
|
+
if (entry.emotionalArousal !== undefined && entry.emotionalArousal > 0.3) {
|
|
112
|
+
const valence = entry.emotionalValence ?? 0;
|
|
113
|
+
// Negative-biased boost: negative emotions boost more than positive
|
|
114
|
+
const emotionBoost = entry.emotionalArousal * (valence < 0 ? 0.3 : 0.15);
|
|
115
|
+
effectiveImportance = Math.min(1, effectiveImportance + emotionBoost);
|
|
116
|
+
}
|
|
117
|
+
const baseMeta = {
|
|
118
|
+
tier: entry.tier ?? 'short-term',
|
|
119
|
+
type: baseType,
|
|
120
|
+
cognitiveLayer: baseLayer,
|
|
121
|
+
tags: entry.tags ?? [],
|
|
122
|
+
domain: entry.domain ?? '',
|
|
123
|
+
topic: entry.topic ?? '',
|
|
124
|
+
source: entry.source ?? `wal:${Date.now()}`,
|
|
125
|
+
importance: effectiveImportance,
|
|
126
|
+
sentiment: entry.sentiment ?? 'neutral',
|
|
127
|
+
// Honor caller-provided createdAt (for backfilled memories with
|
|
128
|
+
// a known original time) — defaults to "now" when omitted.
|
|
129
|
+
createdAt: entry.createdAt ?? new Date().toISOString(),
|
|
130
|
+
lastRecalledAt: null,
|
|
131
|
+
recallCount: 0,
|
|
132
|
+
relatedMemories: [],
|
|
133
|
+
recallOutcomes: [],
|
|
134
|
+
origin: entry.origin ?? 'derived',
|
|
135
|
+
};
|
|
136
|
+
// Check if content should be split into sub-chunks
|
|
137
|
+
const splitResult = config.enableChunking ? chunkContent(trimmedContent) : { chunks: [trimmedContent], needsSplit: false };
|
|
138
|
+
if (splitResult.needsSplit) {
|
|
139
|
+
// Save parent chunk (no embedding, used for keyword search)
|
|
140
|
+
const parentChunk = {
|
|
141
|
+
id: randomUUID(),
|
|
142
|
+
...baseMeta,
|
|
143
|
+
content: trimmedContent,
|
|
144
|
+
consolidationLevel: -1, // Sentinel: parent container
|
|
145
|
+
};
|
|
146
|
+
newChunks.push(parentChunk);
|
|
147
|
+
chunks.push(parentChunk);
|
|
148
|
+
// Remember the parent chunk id keyed by source so a re-ingest
|
|
149
|
+
// of the identical content within the same process returns this
|
|
150
|
+
// same id and skips chunk+embed+save entirely.
|
|
151
|
+
sourceDedup.remember(entry.source, trimmedContent, parentChunk.id);
|
|
152
|
+
// Save sub-chunks with embeddings
|
|
153
|
+
for (const subContent of splitResult.chunks) {
|
|
154
|
+
const subChunk = {
|
|
155
|
+
id: randomUUID(),
|
|
156
|
+
...baseMeta,
|
|
157
|
+
content: subContent,
|
|
158
|
+
parentChunkId: parentChunk.id,
|
|
159
|
+
};
|
|
160
|
+
// Detect temporal anchor
|
|
161
|
+
const dateMatch = subContent.match(/\b(\d{4})-(\d{2})-(\d{2})\b/) ??
|
|
162
|
+
subContent.match(/\b(january|february|march|april|may|june|july|august|september|october|november|december)\s+(\d{1,2})[,.]?\s+(\d{4})\b/i);
|
|
163
|
+
if (dateMatch) {
|
|
164
|
+
try {
|
|
165
|
+
const parsed = new Date(dateMatch[0]);
|
|
166
|
+
if (!isNaN(parsed.getTime()))
|
|
167
|
+
subChunk.temporalAnchor = parsed.getTime();
|
|
168
|
+
}
|
|
169
|
+
catch { /* skip */ }
|
|
170
|
+
}
|
|
171
|
+
try {
|
|
172
|
+
const prefix = buildContextPrefix(subChunk);
|
|
173
|
+
subChunk.embedding = await embed(config, subContent, prefix);
|
|
174
|
+
subChunk.embeddingVersion = 1;
|
|
175
|
+
}
|
|
176
|
+
catch { /* skip */ }
|
|
177
|
+
newChunks.push(subChunk);
|
|
178
|
+
chunks.push(subChunk);
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
else {
|
|
182
|
+
// Single chunk path (original behavior)
|
|
183
|
+
const chunk = {
|
|
184
|
+
id: randomUUID(),
|
|
185
|
+
...baseMeta,
|
|
186
|
+
content: trimmedContent,
|
|
187
|
+
};
|
|
188
|
+
const dateMatch = chunk.content.match(/\b(\d{4})-(\d{2})-(\d{2})\b/) ??
|
|
189
|
+
chunk.content.match(/\b(january|february|march|april|may|june|july|august|september|october|november|december)\s+(\d{1,2})[,.]?\s+(\d{4})\b/i);
|
|
190
|
+
if (dateMatch) {
|
|
191
|
+
try {
|
|
192
|
+
const parsed = new Date(dateMatch[0]);
|
|
193
|
+
if (!isNaN(parsed.getTime()))
|
|
194
|
+
chunk.temporalAnchor = parsed.getTime();
|
|
195
|
+
}
|
|
196
|
+
catch { /* skip */ }
|
|
197
|
+
}
|
|
198
|
+
try {
|
|
199
|
+
const prefix = buildContextPrefix(chunk);
|
|
200
|
+
chunk.embedding = await embed(config, chunk.content, prefix);
|
|
201
|
+
chunk.embeddingVersion = 1;
|
|
202
|
+
}
|
|
203
|
+
catch { /* skip */ }
|
|
204
|
+
newChunks.push(chunk);
|
|
205
|
+
chunks.push(chunk);
|
|
206
|
+
// Single-chunk path: cache the chunk id keyed by source.
|
|
207
|
+
sourceDedup.remember(entry.source, trimmedContent, chunk.id);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
// One batched write for every new chunk in the call.
|
|
211
|
+
if (newChunks.length > 0) {
|
|
212
|
+
await storage.saveChunks(newChunks);
|
|
213
|
+
}
|
|
214
|
+
// Per-batch side effects. Both opt-out via flags on any entry in
|
|
215
|
+
// the batch (typical: engram-ingest calls ingest() with one entry,
|
|
216
|
+
// so a single flag controls the path). Benchmark harnesses set
|
|
217
|
+
// these to match what engram/benchmarks/locomo.ts does — its
|
|
218
|
+
// direct-saveChunk path skips both, which is the source of the
|
|
219
|
+
// ~50× wall-clock gap between standalone and MCP-boundary benches.
|
|
220
|
+
const skipDaily = entries.some(e => e.skipDailyEntry);
|
|
221
|
+
const skipKg = entries.some(e => e.skipKgExtraction);
|
|
222
|
+
// awaitSideEffects defaults TRUE — only flip to async when EVERY
|
|
223
|
+
// entry in the batch opts out, to avoid surprising a sync caller
|
|
224
|
+
// batched with an async one.
|
|
225
|
+
const runAsync = entries.length > 0 && entries.every(e => e.awaitSideEffects === false);
|
|
226
|
+
if (chunks.length > 0) {
|
|
227
|
+
const sideEffectsTask = async () => {
|
|
228
|
+
if (!skipDaily) {
|
|
229
|
+
const date = new Date().toISOString().split('T')[0];
|
|
230
|
+
try {
|
|
231
|
+
await storage.appendDailyEntry(date, {
|
|
232
|
+
timestamp: new Date().toISOString(),
|
|
233
|
+
conversationId: chunks[0].source,
|
|
234
|
+
summary: `WAL ingest: ${chunks.length} entries`,
|
|
235
|
+
extractedFacts: chunks.map(c => c.content),
|
|
236
|
+
});
|
|
237
|
+
}
|
|
238
|
+
catch {
|
|
239
|
+
// best-effort: a daily-entry append failure must not break
|
|
240
|
+
// the rest of the side-effects task
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
if (!skipKg) {
|
|
244
|
+
// Auto-populate knowledge graph from ingested content
|
|
245
|
+
for (const chunk of chunks) {
|
|
246
|
+
if (chunk.consolidationLevel === -1)
|
|
247
|
+
continue; // skip parent containers
|
|
248
|
+
try {
|
|
249
|
+
await extractAndPersistTriples(storage, chunk.content, {
|
|
250
|
+
domain: chunk.domain,
|
|
251
|
+
topic: chunk.topic,
|
|
252
|
+
source: chunk.source,
|
|
253
|
+
});
|
|
254
|
+
}
|
|
255
|
+
catch {
|
|
256
|
+
// KG extraction is best-effort — never block ingestion
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
};
|
|
261
|
+
if (runAsync) {
|
|
262
|
+
// Fire and forget — track in pendingSideEffects so tests or
|
|
263
|
+
// shutdown code can drain via flushPendingSideEffects().
|
|
264
|
+
const p = sideEffectsTask()
|
|
265
|
+
.catch(() => { })
|
|
266
|
+
.finally(() => { pendingSideEffects.delete(p); });
|
|
267
|
+
pendingSideEffects.add(p);
|
|
268
|
+
}
|
|
269
|
+
else {
|
|
270
|
+
await sideEffectsTask();
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
return chunks;
|
|
274
|
+
}
|
|
275
|
+
// ── Type/Layer inference heuristics ──────────────────────────────────
|
|
276
|
+
function inferType(content) {
|
|
277
|
+
const lower = content.toLowerCase();
|
|
278
|
+
if (lower.includes('prefer') || lower.includes('like') || lower.includes('want'))
|
|
279
|
+
return 'preference';
|
|
280
|
+
if (lower.includes('decided') || lower.includes('going with') || lower.includes('chose') || lower.includes('use '))
|
|
281
|
+
return 'decision';
|
|
282
|
+
if (lower.includes('not ') || lower.includes('wrong') || lower.includes('correct') || lower.includes('instead'))
|
|
283
|
+
return 'correction';
|
|
284
|
+
if (lower.includes('working on') || lower.includes('currently') || lower.includes('right now'))
|
|
285
|
+
return 'context';
|
|
286
|
+
return 'fact';
|
|
287
|
+
}
|
|
288
|
+
function inferLayer(content) {
|
|
289
|
+
const lower = content.toLowerCase();
|
|
290
|
+
if (lower.includes('always') || lower.includes('never') || lower.includes('rule') || lower.includes('should'))
|
|
291
|
+
return 'procedural';
|
|
292
|
+
if (lower.includes('today') || lower.includes('yesterday') || lower.includes('just ') || lower.includes('session'))
|
|
293
|
+
return 'episodic';
|
|
294
|
+
return 'semantic';
|
|
295
|
+
}
|
|
296
296
|
//# sourceMappingURL=wal.js.map
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@onenomad/engram-mcp",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "2.0.0",
|
|
4
4
|
"mcpName": "io.github.onenomad-llc/engram-mcp",
|
|
5
5
|
"description": "Engram — memory MCP server for Claude Code, plus a library API for direct in-process use. LLM-powered extraction, hybrid ANN search, tier lifecycle, spreading activation, procedural rules, real-time ingest, Mem0, and session-state.",
|
|
6
6
|
"type": "module",
|