npm - mcp-super-memory - Versions diffs - 0.10.2 → 0.11.0 - Mend

mcp-super-memory 0.10.2 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/memoryGraph.js CHANGED Viewed

@@ -6,6 +6,7 @@ import { Mutex } from "async-mutex";
 import MiniSearch from "minisearch";
 import { embedTextAsync, EMBEDDING_BACKEND, embeddingFingerprint, getThresholdProfile, isShortConcept, inContradictionBand } from "./embedding.js";
 import { rerankEnabled, rerankScores } from "./reranker.js";
+import { RecallBuffer, decidePromotion, AUTOKEY_ENABLED, AUTOKEY_BUFFER_CAPACITY, AUTOKEY_BUFFER_TTL_SECONDS, AUTOKEY_PROMOTE_N, AUTOKEY_MAX_ALIASES, AUTOKEY_PRUNE_AGE_SECONDS, } from "./autokey.js";
 const DATA_DIR = process.env.SUPER_MEMORY_DATA_DIR ?? join(homedir(), ".super-memory");
 const GRAPH_FILE = join(DATA_DIR, "graph.json");
 const CONVERSATIONS_DIR = join(DATA_DIR, "conversations");
@@ -33,10 +34,34 @@ const DENSE_RESULT_DEPTH = 50;
 // key (shared by many) can't flood the chain. Keeps recall→related→related navigable.
 const RELATED_LIMIT = Number(process.env.SUPER_MEMORY_RELATED_LIMIT ?? 20);
 const RELATED_EXPLICIT_BONUS = 1.0; // an explicit link is the strongest connection signal
+const _hubMinLinks = Number(process.env.SUPER_MEMORY_KEY_HUB_MIN_LINKS ?? 3);
+const KEY_HUB_MIN_LINKS = Number.isFinite(_hubMinLinks)
+    ? Math.max(2, Math.floor(_hubMinLinks))
+    : 3;
 // When the cross-encoder reranker is on (SUPER_MEMORY_RERANK), re-score this many of the
 // top fused candidates by joint (query, memory) relevance, then keep the requested top_k.
 // A wider pool than top_k lets the reranker rescue a right answer the fused score buried.
 const RERANK_POOL = Number(process.env.SUPER_MEMORY_RERANK_POOL ?? 30);
+// Rerank-based not-found gate (opt-in). The cross-encoder's absolute relevance logit is a
+// stronger "does this memory actually answer the query" signal than bi-encoder cosine, so a
+// low top logit means the query is unanswerable → return []. Unset = disabled. A definite
+// key anchor (literal name/proper-noun match) bypasses it. NOTE: reliable for SAME-LANGUAGE
+// queries only — cross-lingual relevance logits run low even when relevant, so cross-lingual
+// not-found must lean on key anchors, not this floor.
+const RERANK_MIN_SCORE = process.env.SUPER_MEMORY_RERANK_MIN_SCORE !== undefined ? Number(process.env.SUPER_MEMORY_RERANK_MIN_SCORE) : null;
+// KR↔Latin script check. The rerank not-found gate only trusts its logit when the query and
+// the top candidate share script — cross-lingual (e.g. Korean query ↔ English memory) logits
+// run low even when relevant, so a script mismatch means "don't trust the low logit, keep it".
+const hasHangul = (s) => /[㄰-㆏가-힣]/.test(s);
+// Literal key match must land on a word boundary (unicode-aware) so a short common-noun key
+// like "name" does not spuriously match inside a longer word ("namespace") and spike to the
+// top. Terms shorter than 2 chars never match literally.
+function literalKeyMatch(queryLower, term) {
+    if (!term || term.length < 2)
+        return false;
+    const esc = term.toLowerCase().replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+    return new RegExp(`(^|[^\\p{L}\\p{N}])${esc}($|[^\\p{L}\\p{N}])`, "u").test(queryLower);
+}
 const LINK_WEIGHT_DEFAULT = 1.0;
 const LINK_WEIGHT_MIN = 0.1;
 const LINK_WEIGHT_MAX = 3.0;
@@ -148,8 +173,18 @@ export class MemoryGraph {
     // provenance). Drives re-embed on a same-dimension model swap; see embeddingFingerprint.
     _storedFingerprint = null;
     _lock = new Mutex();
+    // Serializes disk writes independently of _lock so a flush() done OUTSIDE _lock
+    // (recall's tail) can never race another save on the temp file or interleave
+    // renames. Lock order is always _lock → _saveLock (writes) or _saveLock alone
+    // (recall flush); nothing acquires _saveLock then _lock, so no deadlock.
+    _saveLock = new Mutex();
+    _saveSeq = 0;
     _dirty = false;
     _bm25;
+    _recallBuffer = new RecallBuffer({
+        capacity: AUTOKEY_BUFFER_CAPACITY,
+        ttlSeconds: AUTOKEY_BUFFER_TTL_SECONDS,
+    });
     constructor() {
         this._bm25 = new MiniSearch({
             fields: ["content"],
@@ -340,6 +375,44 @@ export class MemoryGraph {
             idf *= 0.5;
         return idf;
     }
+    _recordKeyAlias(keyId, alias) {
+        const key = this.keys[keyId];
+        if (!key)
+            return;
+        const clean = alias.trim();
+        if (clean.length < 2 || key.concept.toLowerCase() === clean.toLowerCase())
+            return;
+        key.aliases ??= [];
+        if (!key.aliases.some((existing) => existing.toLowerCase() === clean.toLowerCase())) {
+            key.aliases.push(clean);
+        }
+    }
+    _activeMemoryIdsForKey(keyId, namespace) {
+        const active = [];
+        for (const mid of this._keyToMems[keyId]?.keys() ?? []) {
+            const mem = this.memories[mid];
+            if (!mem || this._isExpired(mem) || mid in this._supersededBy)
+                continue;
+            if (namespace && mem.namespace !== namespace)
+                continue;
+            active.push(mid);
+        }
+        return active;
+    }
+    _keyView(keyId, namespace) {
+        const key = this.keys[keyId];
+        const memoryCount = this._activeMemoryIdsForKey(keyId, namespace).length;
+        return {
+            key_id: keyId,
+            concept: key.concept,
+            aliases: key.aliases ?? [],
+            learned_aliases: (key.learnedAliases ?? []).map((l) => l.alias),
+            key_type: key.key_type,
+            memory_count: memoryCount,
+            is_hub: memoryCount >= KEY_HUB_MIN_LINKS,
+            specificity: memoryCount > 0 ? Math.round((1 / memoryCount) * 1000) / 1000 : 0,
+        };
+    }
     _findDuplicate(embedding) {
         const activeMems = Object.entries(this.memories).filter(([mid]) => !(mid in this._supersededBy));
         if (activeMems.length === 0)
@@ -422,7 +495,30 @@ export class MemoryGraph {
         }
         this._storedFingerprint = raw.meta?.embeddingFingerprint ?? null;
         for (const [kid, k] of Object.entries(raw.keys ?? {})) {
-            this.keys[kid] = k;
+            const seen = new Set();
+            const aliases = (Array.isArray(k.aliases) ? k.aliases : []).filter((alias) => {
+                if (typeof alias !== "string" || alias.trim().length < 2)
+                    return false;
+                const normalized = alias.trim().toLowerCase();
+                if (normalized === k.concept.toLowerCase() || seen.has(normalized))
+                    return false;
+                seen.add(normalized);
+                return true;
+            });
+            const aliasCandidates = k.aliasCandidates && typeof k.aliasCandidates === "object" && !Array.isArray(k.aliasCandidates)
+                ? Object.fromEntries(Object.entries(k.aliasCandidates).filter((entry) => {
+                    const v = entry[1];
+                    return (!!v &&
+                        typeof v === "object" &&
+                        typeof v.count === "number" &&
+                        typeof v.lastSeen === "number" &&
+                        typeof v.queryText === "string");
+                }))
+                : undefined;
+            const learnedAliases = Array.isArray(k.learnedAliases)
+                ? k.learnedAliases.filter((l) => !!l && typeof l.alias === "string" && typeof l.addedAt === "number" && typeof l.hits === "number")
+                : undefined;
+            this.keys[kid] = { ...k, aliases, aliasCandidates, learnedAliases };
         }
         for (const [mid, m] of Object.entries(raw.memories ?? {})) {
             const defaults = {
@@ -469,7 +565,6 @@ export class MemoryGraph {
             `${Object.keys(this.memories).length} memories, ${this.linkCount} links`);
     }
     async save() {
-        await mkdir(DATA_DIR, { recursive: true });
         const links = [];
         for (const [kid, mids] of Object.entries(this._keyToMems)) {
             for (const [mid, weight] of mids) {
@@ -486,9 +581,17 @@ export class MemoryGraph {
             links,
             meta: { embeddingFingerprint: fingerprint },
         };
-        const tmp = GRAPH_FILE + ".tmp";
-        await writeFile(tmp, JSON.stringify(data, null, 2), "utf-8");
-        await rename(tmp, GRAPH_FILE);
+        // Snapshot is built synchronously above (callers mutate under _lock without
+        // awaiting mid-mutation, so this read is consistent). Serialize the actual I/O
+        // so concurrent saves can't collide: a per-write unique temp name + single-flight
+        // _saveLock together guarantee one clean writeFile→rename at a time.
+        const json = JSON.stringify(data, null, 2);
+        await this._saveLock.runExclusive(async () => {
+            await mkdir(DATA_DIR, { recursive: true });
+            const tmp = `${GRAPH_FILE}.${process.pid}.${++this._saveSeq}.tmp`;
+            await writeFile(tmp, json, "utf-8");
+            await rename(tmp, GRAPH_FILE);
+        });
         this._dirty = false;
     }
     markDirty() {
@@ -509,19 +612,25 @@ export class MemoryGraph {
             this.keys[kid] = {
                 id: kid,
                 concept,
+                aliases: [],
                 embedding: await embedTextAsync(concept),
                 key_type: keyType,
             };
             return kid;
         }
+        const normalizedConcept = concept.toLowerCase();
+        for (const [kid, key] of Object.entries(this.keys)) {
+            if (key.key_type !== "concept")
+                continue;
+            const terms = [key.concept, ...(key.aliases ?? [])];
+            if (terms.some((term) => term.toLowerCase() === normalizedConcept)) {
+                this._recordKeyAlias(kid, concept);
+                return kid;
+            }
+        }
         // Short concept keys merge only on exact (case-insensitive) string match, so
         // near-identical-but-distinct short keys ("Agent A" vs "Agent B") stay separate.
         if (isShortConcept(concept)) {
-            const lc = concept.toLowerCase();
-            for (const [kid, k] of Object.entries(this.keys)) {
-                if (k.key_type === "concept" && k.concept.toLowerCase() === lc)
-                    return kid;
-            }
             const emb = await embedTextAsync(concept);
             // Conservative semantic merge: fold an incoming short key into an existing concept
             // key only at high cosine (clear synonym). Reconciles state-blind LLM key choices
@@ -536,12 +645,15 @@ export class MemoryGraph {
                             bestSim = sims[i];
                             bestIdx = i;
                         }
-                    if (bestSim >= SHORT_KEY_MERGE_THRESHOLD)
-                        return conceptKeys[bestIdx][0];
+                    if (bestSim >= SHORT_KEY_MERGE_THRESHOLD) {
+                        const existingId = conceptKeys[bestIdx][0];
+                        this._recordKeyAlias(existingId, concept);
+                        return existingId;
+                    }
                 }
             }
             const kid = uid();
-            this.keys[kid] = { id: kid, concept, embedding: emb, key_type: "concept" };
+            this.keys[kid] = { id: kid, concept, aliases: [], embedding: emb, key_type: "concept" };
             return kid;
         }
         const emb = await embedTextAsync(concept);
@@ -556,33 +668,30 @@ export class MemoryGraph {
                     bestIdx = i;
                 }
             }
-            if (bestSim >= KEY_MERGE_THRESHOLD)
-                return conceptKeys[bestIdx][0];
+            if (bestSim >= KEY_MERGE_THRESHOLD) {
+                const existingId = conceptKeys[bestIdx][0];
+                this._recordKeyAlias(existingId, concept);
+                return existingId;
+            }
         }
         const kid = uid();
-        this.keys[kid] = { id: kid, concept, embedding: emb, key_type: "concept" };
+        this.keys[kid] = { id: kid, concept, aliases: [], embedding: emb, key_type: "concept" };
         return kid;
     }
     // ── Add ──
     async add(content, keyConcepts, options = {}) {
         const embedding = await embedTextAsync(content); // outside lock
+        // Duplicate detection and insertion run under a SINGLE lock acquisition so they are
+        // atomic: two concurrent identical adds serialize, and the second observes the first's
+        // memory as a duplicate instead of both clearing the check and inserting twice. The dup
+        // path defers to supersede() only AFTER releasing the lock (the mutex is non-reentrant).
         let dupId = null;
+        let resultMid = "";
         await this._lock.runExclusive(async () => {
             this._checkDim(embedding);
             dupId = this._findDuplicate(embedding);
-        });
-        if (dupId !== null) {
-            const newId = await this.supersede(dupId, content, {
-                keyConcepts,
-                keyTypes: options.keyTypes ?? undefined,
-                source: options.source,
-                namespace: options.namespace,
-                relatedTo: options.relatedTo,
-            });
-            return [newId, true];
-        }
-        let resultMid = "";
-        await this._lock.runExclusive(async () => {
+            if (dupId !== null)
+                return; // defer to supersede() once the lock is released
             const mid = uid();
             resultMid = mid;
             const now = Date.now() / 1000;
@@ -625,6 +734,16 @@ export class MemoryGraph {
             this._bm25.add({ id: mid, content });
             await this.save();
         });
+        if (dupId !== null) {
+            const newId = await this.supersede(dupId, content, {
+                keyConcepts,
+                keyTypes: options.keyTypes ?? undefined,
+                source: options.source,
+                namespace: options.namespace,
+                relatedTo: options.relatedTo,
+            });
+            return [newId, true];
+        }
         return [resultMid, false];
     }
     // ── Supersede ──
@@ -632,8 +751,21 @@ export class MemoryGraph {
         const newEmbedding = await embedTextAsync(newContent); // outside lock
         let resultMid = "";
         await this._lock.runExclusive(async () => {
+            // Follow the supersession chain to the current live head. Normally oldId is already
+            // live (callers pass an id from _findDuplicate, which skips superseded memories) so
+            // this is a no-op. Under concurrency it serializes multiple supersedes of the same
+            // target into one linear chain instead of forking parallel successors.
+            while (oldId in this._supersededBy)
+                oldId = this._supersededBy[oldId];
             if (!(oldId in this.memories)) {
-                throw new Error(`Memory ${oldId} not found`);
+                // The head was superseded and pruned by a concurrent supersede (grandparent cleanup
+                // deletes it). Re-resolve against the current live state so concurrent supersedes of
+                // the same content collapse into one chain instead of erroring or forking successors.
+                const reResolved = this._findDuplicate(newEmbedding);
+                if (reResolved === null) {
+                    throw new Error(`Memory ${oldId} not found`);
+                }
+                oldId = reResolved;
             }
             const old = this.memories[oldId];
             // Chain cleanup: keep depth max 1 (new -> old; grandparent deleted)
@@ -723,7 +855,246 @@ export class MemoryGraph {
         });
         return resultMid;
     }
-    // ── Recall ──
+    // ── Agent-driven key navigation ──
+    async searchKeys(query, topK = 8, namespace) {
+        const cleanQuery = query.trim();
+        if (!cleanQuery || Object.keys(this.keys).length === 0)
+            return [];
+        const qEmb = await embedTextAsync(cleanQuery, "query");
+        this._checkDim(qEmb);
+        topK = Math.max(1, Math.min(20, Math.floor(topK)));
+        // Content signal: max cosine of a key's member memories to the query. Lets a key whose
+        // CONTENT matches surface even when its coined concept does not lexically/semantically hit
+        // the query — the cure for key-coining dependence. Computed OUTSIDE the lock (read-only
+        // cosine over a synchronous snapshot) to keep the lock hold short, matching the rerank/flush
+        // off-lock design. A memory added after this snapshot simply scores 0 for this query.
+        const memIds = Object.keys(this.memories);
+        const memSimArr = batchCosineSim(qEmb, memIds.map((mid) => this.memories[mid].embedding));
+        const memSim = new Map();
+        for (let j = 0; j < memIds.length; j++)
+            memSim.set(memIds[j], memSimArr[j]);
+        return this._lock.runExclusive(async () => {
+            const queryLower = cleanQuery.toLowerCase();
+            const keyIds = Object.keys(this.keys);
+            const sims = batchCosineSim(qEmb, keyIds.map((kid) => this.keys[kid].embedding));
+            const candidates = [];
+            for (let i = 0; i < keyIds.length; i++) {
+                const kid = keyIds[i];
+                const key = this.keys[kid];
+                const activeIds = this._activeMemoryIdsForKey(kid, namespace);
+                if (activeIds.length === 0)
+                    continue;
+                const aliases = key.aliases ?? [];
+                const conceptLiteral = literalKeyMatch(queryLower, key.concept);
+                const matchedAlias = aliases.find((alias) => literalKeyMatch(queryLower, alias));
+                if (matchedAlias && key.learnedAliases) {
+                    const la = key.learnedAliases.find((l) => l.alias.toLowerCase() === matchedAlias.toLowerCase());
+                    if (la)
+                        la.hits += 1;
+                }
+                const literal = conceptLiteral || matchedAlias !== undefined;
+                let contentSim = 0;
+                let contentMid = "";
+                for (const mid of activeIds) {
+                    const s = memSim.get(mid) ?? 0;
+                    if (s > contentSim) {
+                        contentSim = s;
+                        contentMid = mid;
+                    }
+                }
+                const keySim = sims[i];
+                if ((key.key_type === "name" || key.key_type === "proper_noun")
+                    ? !literal
+                    : !literal && keySim < KEY_RECALL_THRESHOLD && contentSim < CONTENT_RECALL_THRESHOLD) {
+                    continue;
+                }
+                const relevance = literal ? 1 : Math.max(keySim, contentSim);
+                const memoryCount = activeIds.length;
+                candidates.push({
+                    key_id: kid,
+                    concept: key.concept,
+                    aliases,
+                    key_type: key.key_type,
+                    score: Math.round(relevance * 1000) / 1000,
+                    match_type: matchedAlias ? "alias" : conceptLiteral ? "concept" : contentSim > keySim ? "content" : "semantic",
+                    memory_count: memoryCount,
+                    is_hub: memoryCount >= KEY_HUB_MIN_LINKS,
+                    specificity: Math.round((1 / memoryCount) * 1000) / 1000,
+                    cluster_size: 1 + aliases.length,
+                    evidence: "index_only",
+                    suggested_tool: "read_key",
+                    _literal: literal,
+                    _contentMid: contentMid,
+                });
+            }
+            const claimedContentMids = new Set();
+            const result = candidates
+                .sort((a, b) => Number(b._literal) - Number(a._literal) || b.score - a.score || b.specificity - a.specificity)
+                .filter((c) => {
+                // Collapse synonym keys that surface only because they share the same content-matched
+                // memory: keep the highest-ranked one so one memory's aliases can't flood the results.
+                if (c.match_type !== "content")
+                    return true;
+                if (claimedContentMids.has(c._contentMid))
+                    return false;
+                claimedContentMids.add(c._contentMid);
+                return true;
+            })
+                .slice(0, topK)
+                .map(({ _literal, _contentMid, ...candidate }) => candidate);
+            if (AUTOKEY_ENABLED) {
+                const weak = result.filter((c) => c.match_type === "semantic");
+                if (weak.length > 0) {
+                    this._recallBuffer.push({
+                        queryText: cleanQuery,
+                        weakKeyScores: new Map(weak.map((c) => [c.key_id, c.score])),
+                    });
+                }
+            }
+            return result;
+        });
+    }
+    async readKey(keyId, options = {}) {
+        if (!(keyId in this.keys))
+            throw new Error(`Key ${keyId} not found`);
+        const namespace = options.namespace ?? null;
+        const limit = Math.max(1, Math.min(50, Math.floor(options.limit ?? 10)));
+        const offset = Math.max(0, Math.floor(options.offset ?? 0));
+        // Query-aware ranking: when a query is supplied, order this key's memories by content
+        // relevance to it (not only by link weight). This is what makes a generic hub key usable —
+        // the target rises to the top instead of being buried among the hub's other members.
+        // Omitted query reproduces the prior link-weight ordering exactly (rel = 1).
+        const cleanQuery = options.query?.trim();
+        const qEmb = cleanQuery ? await embedTextAsync(cleanQuery, "query") : null;
+        if (qEmb)
+            this._checkDim(qEmb);
+        const ranked = this._activeMemoryIdsForKey(keyId, namespace)
+            .map((mid) => {
+            const mem = this.memories[mid];
+            const linkWeight = this._getLinkWeight(keyId, mid);
+            const rel = qEmb ? cosineSim(qEmb, mem.embedding) : 1;
+            const score = rel * linkWeight * (0.9 + mem.depth * 0.1) * this._timeFactor(mem);
+            return { mid, mem, linkWeight, score };
+        })
+            .sort((a, b) => b.score - a.score || b.mem.created_at - a.mem.created_at);
+        const page = ranked.slice(offset, offset + limit).map(({ mid, mem, linkWeight, score }) => ({
+            memory_id: mid,
+            evidence: "unread",
+            suggested_tool: "read_memory",
+            depth: Math.round(mem.depth * 1000) / 1000,
+            created_at: mem.created_at,
+            namespace: mem.namespace,
+            link_weight: Math.round(linkWeight * 1000) / 1000,
+            score: Math.round(score * 1000) / 1000,
+        }));
+        return {
+            key: this._keyView(keyId, namespace),
+            memories: page,
+            total: ranked.length,
+            next_offset: offset + limit < ranked.length ? offset + limit : null,
+        };
+    }
+    // Auto-key self-healing: a memory was just confirmed (read) via viaKeyId. If that key
+    // was a recent WEAK (semantic) recall match, the originating query is candidate
+    // vocabulary the key is missing. Accumulate heat; promote at threshold. Runs inside
+    // readMemory's lock; readMemory's unconditional save() persists any mutation.
+    async _maybeLearnAlias(keyId, memoryId) {
+        const entry = this._recallBuffer.consumeWeakMatch(keyId);
+        if (!entry)
+            return;
+        const key = this.keys[keyId];
+        if (!key)
+            return;
+        const q = entry.queryText.trim();
+        if (q.length < 2)
+            return;
+        const norm = q.toLowerCase();
+        if (key.concept.toLowerCase() === norm)
+            return;
+        if ((key.aliases ?? []).some((a) => a.toLowerCase() === norm))
+            return;
+        key.aliasCandidates ??= {};
+        const prev = key.aliasCandidates[norm];
+        const candidate = { count: (prev?.count ?? 0) + 1, lastSeen: Date.now() / 1000, queryText: q };
+        key.aliasCandidates[norm] = candidate;
+        const decision = decidePromotion({
+            count: candidate.count,
+            query: q,
+            cosine: entry.weakKeyScores.get(keyId) ?? 0,
+            learnedAliasCount: key.learnedAliases?.length ?? 0,
+            aliasThreshold: KEY_MERGE_THRESHOLD,
+            newKeyThreshold: KEY_AUTO_LINK_THRESHOLD,
+            promoteN: AUTOKEY_PROMOTE_N,
+            maxAliases: AUTOKEY_MAX_ALIASES,
+        });
+        if (decision === "alias") {
+            this._recordKeyAlias(keyId, q);
+            key.learnedAliases ??= [];
+            key.learnedAliases.push({ alias: q, addedAt: Date.now() / 1000, hits: 0 });
+            delete key.aliasCandidates[norm];
+        }
+        else if (decision === "newKey") {
+            const newKid = await this.findOrCreateKey(q, "concept");
+            this._link(newKid, memoryId);
+            delete key.aliasCandidates[norm];
+        }
+    }
+    async readMemory(memoryId, viaKeyId, namespace) {
+        return this._lock.runExclusive(async () => {
+            const mem = this.memories[memoryId];
+            if (!mem || this._isExpired(mem))
+                throw new Error(`Memory ${memoryId} not found`);
+            if (namespace && mem.namespace !== namespace)
+                throw new Error(`Memory ${memoryId} not found`);
+            if (memoryId in this._supersededBy) {
+                throw new Error(`Memory ${memoryId} was superseded by ${this._supersededBy[memoryId]}`);
+            }
+            if (viaKeyId && !this._hasLink(viaKeyId, memoryId)) {
+                throw new Error(`Key ${viaKeyId} is not linked to memory ${memoryId}`);
+            }
+            mem.depth = Math.min(mem.depth + DEPTH_INCREMENT, DEPTH_MAX);
+            mem.access_count += 1;
+            mem.last_accessed = Date.now() / 1000;
+            if (viaKeyId) {
+                this._setLinkWeight(viaKeyId, memoryId, this._getLinkWeight(viaKeyId, memoryId) + LINK_REINFORCE_AMOUNT);
+            }
+            if (AUTOKEY_ENABLED && viaKeyId) {
+                await this._maybeLearnAlias(viaKeyId, memoryId);
+            }
+            const connectedKeys = [...(this._memToKeys[memoryId] ?? new Map())]
+                .filter(([kid]) => kid in this.keys)
+                .map(([kid, weight]) => ({
+                ...this._keyView(kid, mem.namespace),
+                link_weight: Math.round(weight * 1000) / 1000,
+                traversed_from: kid === viaKeyId,
+            }))
+                .sort((a, b) => b.link_weight - a.link_weight);
+            await this.save();
+            return {
+                evidence: "read",
+                grounded: true,
+                suggested_tool: null,
+                memory: {
+                    id: memoryId,
+                    content: mem.content,
+                    depth: Math.round(mem.depth * 1000) / 1000,
+                    access_count: mem.access_count,
+                    last_accessed: mem.last_accessed,
+                    created_at: mem.created_at,
+                    source: mem.source,
+                    namespace: mem.namespace,
+                    expires_at: mem.ttl,
+                    supersedes: mem.supersedes,
+                    superseded_by: this._supersededBy[memoryId] ?? null,
+                    related_to: mem.links,
+                    contradicts: mem.contradicts ?? [],
+                },
+                keys: connectedKeys,
+                via_key_id: viaKeyId ?? null,
+            };
+        });
+    }
+    // ── Direct memory recall (internal / compatibility mode) ──
     async recall(query, topK = 5, namespace, expand = false, maxHops = 2, minRelScore = 0, minScore = MIN_SCORE_THRESHOLD, minZ = GATE_Z_THRESHOLD, minKeyGate = KEY_GATE_THRESHOLD, minDepth = 0) {
         if (Object.keys(this.memories).length === 0)
             return [];
@@ -742,28 +1113,37 @@ export class MemoryGraph {
         const qEmb = await embedTextAsync(query, "query"); // outside lock
         this._checkDim(qEmb);
         const results = [];
+        const queryLower = query.toLowerCase().trim();
+        const memMatchedKeys = {};
+        const memHop = {};
+        let keyScores = [];
+        // Hoisted to method scope so Phase 3 (a separate locked section) can reuse it.
+        const skip = (mid) => {
+            if (!(mid in this.memories))
+                return true;
+            const mem = this.memories[mid];
+            if (this._isExpired(mem))
+                return true;
+            if (namespace && mem.namespace !== namespace)
+                return true;
+            if (mid in this._supersededBy)
+                return true;
+            return false;
+        };
+        // Phase-1 outputs, consumed by the unlocked rerank (Phase 2) + commit (Phase 3).
+        let gated = [];
+        let definiteAnchor = false;
+        const actualTopK = expand ? topK * 2 : topK;
+        // ── Phase 1 (locked, fully synchronous) ── retrieve + fuse + gate. No await runs
+        // inside this section, so the lock is held only for fast in-memory work, never
+        // across model inference or disk I/O.
         await this._lock.runExclusive(async () => {
-            const queryLower = query.toLowerCase().trim();
-            const memMatchedKeys = {};
-            const memHop = {};
             const memRawSim = {};
             const allContentSims = [];
             const bumpRaw = (mid, sim) => {
                 if (sim > (memRawSim[mid] ?? -Infinity))
                     memRawSim[mid] = sim;
             };
-            const skip = (mid) => {
-                if (!(mid in this.memories))
-                    return true;
-                const mem = this.memories[mid];
-                if (this._isExpired(mem))
-                    return true;
-                if (namespace && mem.namespace !== namespace)
-                    return true;
-                if (mid in this._supersededBy)
-                    return true;
-                return false;
-            };
             // ── BM25 sparse search ──
             const bm25Ranked = [];
             const bm25Results = this._bm25.search(query, { fuzzy: 0.2, prefix: true });
@@ -786,12 +1166,12 @@ export class MemoryGraph {
                     maxConceptKeySim = keySims[i];
                 }
             }
-            const keyScores = [];
+            keyScores = [];
             for (let i = 0; i < keyIds.length; i++) {
                 const kid = keyIds[i];
                 const key = this.keys[kid];
                 if (key.key_type === "name" || key.key_type === "proper_noun") {
-                    if (queryLower.includes(key.concept.toLowerCase())) {
+                    if (literalKeyMatch(queryLower, key.concept)) {
                         keyScores.push([1.0, kid]);
                     }
                 }
@@ -878,7 +1258,7 @@ export class MemoryGraph {
                 const concept = this.keys[kid]?.concept;
                 if (!concept || concept.length < 2)
                     continue;
-                if (!queryLower.includes(concept.toLowerCase()))
+                if (!literalKeyMatch(queryLower, concept))
                     continue;
                 const bonus = (1 / (RRF_K + 1)) * this._keyIdf(kid);
                 for (const memId of this._keyToMems[kid]?.keys() ?? []) {
@@ -957,7 +1337,6 @@ export class MemoryGraph {
                         memScores[mid] *= 0.7;
                 }
             }
-            const actualTopK = expand ? topK * 2 : topK;
             const sorted = Object.entries(memScores).sort(([, a], [, b]) => b - a);
             // Absolute score gate (anchor-based): the query counts as "found" only if at
             // least one candidate has a direct dense similarity >= minScore. With no such
@@ -972,7 +1351,7 @@ export class MemoryGraph {
             // absolute gate false-positives. minZ (gateZ) = 0 disables it, leaving the
             // 0.7.0 absolute-only behavior unchanged for bge-m3 and other profiles.
             const candidateIds = Object.keys(memScores);
-            const definiteAnchor = candidateIds.some((mid) => (memRawSim[mid] ?? 0) >= 0.999);
+            definiteAnchor = candidateIds.some((mid) => (memRawSim[mid] ?? 0) >= 0.999);
             const absoluteAnchor = candidateIds.some((mid) => passesAbsoluteGate(memRawSim[mid] ?? 0, minScore));
             let maxContentSim = 0;
             for (const s of allContentSims)
@@ -996,26 +1375,44 @@ export class MemoryGraph {
             // (e.g. 0.05) trims that flood while keeping genuine associations (~15%+).
             // Default 0 = keep everything (no behavior change).
             const floor = sorted.length ? sorted[0][1] * minRelScore : 0;
-            const gated = (hasAnchor ? sorted : [])
+            gated = (hasAnchor ? sorted : [])
                 .filter(([, score]) => score >= floor)
                 .filter(([mid]) => minDepth <= 0 || (this.memories[mid]?.depth ?? 0) >= minDepth);
-            let ranked = gated.slice(0, actualTopK);
-            // ── Cross-encoder rerank (opt-in) ── Re-score a wider pool of gated candidates by
-            // joint (query, memory) relevance and reorder, then keep top_k. Pure precision pass:
-            // it only reorders memories that already passed the gate, so it never turns a
-            // not-found into a found. Falls back to the fused order if the model is unavailable.
-            if (rerankEnabled() && gated.length > 1) {
-                const pool = gated.slice(0, Math.max(actualTopK, RERANK_POOL));
-                const scores = await rerankScores(query, pool.map(([mid]) => this.memories[mid]?.content ?? ""));
-                if (scores) {
-                    ranked = pool
-                        .map((entry, i) => ({ entry, s: scores[i] }))
-                        .sort((a, b) => b.s - a.s)
-                        .map((x) => x.entry)
-                        .slice(0, actualTopK);
+        });
+        // ── Phase 2 (UNLOCKED) ── cross-encoder rerank (opt-in). Model inference is the
+        // only slow, I/O-like await in recall; running it outside the lock lets other
+        // recalls and writes proceed meanwhile. It only READS immutable memory content
+        // (all reads happen synchronously before the await) and mutates nothing shared.
+        let ranked = gated.slice(0, actualTopK);
+        if (rerankEnabled() && gated.length > 0) {
+            const pool = gated.slice(0, Math.max(actualTopK, RERANK_POOL));
+            const scores = await rerankScores(query, pool.map(([mid]) => this.memories[mid]?.content ?? ""));
+            if (scores) {
+                const reordered = pool
+                    .map((entry, i) => ({ entry, s: scores[i] }))
+                    .sort((a, b) => b.s - a.s);
+                // Not-found gate (opt-in): a low top relevance logit means nothing answers the
+                // query → []. Trusted only when the query and the top candidate share script —
+                // cross-lingual logits run low even when relevant, so on a script mismatch we keep
+                // the result (the cosine/key gate already vouched). This catches same-language
+                // distractors; cross-lingual not-found stays a known limitation (use bilingual keys).
+                const topContent = this.memories[reordered[0]?.entry[0]]?.content ?? "";
+                const sameScript = hasHangul(query) === hasHangul(topContent);
+                if (RERANK_MIN_SCORE !== null && sameScript && reordered[0].s < RERANK_MIN_SCORE) {
+                    ranked = [];
+                }
+                else {
+                    ranked = reordered.map((x) => x.entry).slice(0, actualTopK);
                 }
             }
+        }
+        // ── Phase 3 (locked, fully synchronous) ── commit reinforcement + assemble the
+        // result payload. Re-validate every id with skip(): a concurrent forget/supersede/
+        // expiry may have landed during the unlocked rerank above.
+        await this._lock.runExclusive(async () => {
             for (const [mid, score] of ranked) {
+                if (skip(mid))
+                    continue;
                 const mem = this.memories[mid];
                 mem.depth = Math.min(mem.depth + DEPTH_INCREMENT, DEPTH_MAX);
                 mem.access_count += 1;
@@ -1047,6 +1444,8 @@ export class MemoryGraph {
             // for a different key, slowly polluting the graph. This mirrors the decay
             // side, which is already scoped to matched keys.
             for (const [mid] of ranked) {
+                if (skip(mid))
+                    continue;
                 for (const kid of this._memToKeys[mid]?.keys() ?? []) {
                     if (!matchedKeyIds.has(kid))
                         continue;
@@ -1065,7 +1464,7 @@ export class MemoryGraph {
             }
             this.markDirty();
         });
-        await this.flush(); // outside lock
+        await this.flush(); // outside lock; save() is serialized + atomic (see _saveLock)
         return results;
     }
     // ── Related ──
@@ -1225,7 +1624,35 @@ export class MemoryGraph {
             }
             this._removeMemoryReferences(expired);
             this._pruneOrphanKeys();
-            if (expired.length > 0)
+            let pruned = false;
+            const now = Date.now() / 1000;
+            for (const key of Object.values(this.keys)) {
+                if (!key.learnedAliases?.length)
+                    continue;
+                const keep = key.learnedAliases.filter((l) => l.hits > 0 || now - l.addedAt < AUTOKEY_PRUNE_AGE_SECONDS);
+                if (keep.length === key.learnedAliases.length)
+                    continue;
+                const dropped = new Set(key.learnedAliases.filter((l) => !keep.includes(l)).map((l) => l.alias.toLowerCase()));
+                key.learnedAliases = keep;
+                key.aliases = (key.aliases ?? []).filter((a) => !dropped.has(a.toLowerCase()));
+                pruned = true;
+            }
+            // Drop stale alias candidates — heat that never reached promotion (e.g. long
+            // non-promotable queries that fail isShortConcept) — so the persisted ledger
+            // cannot grow without bound on a long-lived key.
+            for (const key of Object.values(this.keys)) {
+                if (!key.aliasCandidates)
+                    continue;
+                for (const [norm, cand] of Object.entries(key.aliasCandidates)) {
+                    if (now - cand.lastSeen >= AUTOKEY_PRUNE_AGE_SECONDS) {
+                        delete key.aliasCandidates[norm];
+                        pruned = true;
+                    }
+                }
+                if (Object.keys(key.aliasCandidates).length === 0)
+                    delete key.aliasCandidates;
+            }
+            if (expired.length > 0 || pruned)
                 await this.save();
             return expired.length;
         });