mcp-super-memory 0.10.2 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,6 +6,7 @@ import { Mutex } from "async-mutex";
6
6
  import MiniSearch from "minisearch";
7
7
  import { embedTextAsync, EMBEDDING_BACKEND, embeddingFingerprint, getThresholdProfile, isShortConcept, inContradictionBand } from "./embedding.js";
8
8
  import { rerankEnabled, rerankScores } from "./reranker.js";
9
+ import { RecallBuffer, decidePromotion, AUTOKEY_ENABLED, AUTOKEY_BUFFER_CAPACITY, AUTOKEY_BUFFER_TTL_SECONDS, AUTOKEY_PROMOTE_N, AUTOKEY_MAX_ALIASES, AUTOKEY_PRUNE_AGE_SECONDS, } from "./autokey.js";
9
10
  const DATA_DIR = process.env.SUPER_MEMORY_DATA_DIR ?? join(homedir(), ".super-memory");
10
11
  const GRAPH_FILE = join(DATA_DIR, "graph.json");
11
12
  const CONVERSATIONS_DIR = join(DATA_DIR, "conversations");
@@ -33,10 +34,34 @@ const DENSE_RESULT_DEPTH = 50;
33
34
  // key (shared by many) can't flood the chain. Keeps recall→related→related navigable.
34
35
  const RELATED_LIMIT = Number(process.env.SUPER_MEMORY_RELATED_LIMIT ?? 20);
35
36
  const RELATED_EXPLICIT_BONUS = 1.0; // an explicit link is the strongest connection signal
37
+ const _hubMinLinks = Number(process.env.SUPER_MEMORY_KEY_HUB_MIN_LINKS ?? 3);
38
+ const KEY_HUB_MIN_LINKS = Number.isFinite(_hubMinLinks)
39
+ ? Math.max(2, Math.floor(_hubMinLinks))
40
+ : 3;
36
41
  // When the cross-encoder reranker is on (SUPER_MEMORY_RERANK), re-score this many of the
37
42
  // top fused candidates by joint (query, memory) relevance, then keep the requested top_k.
38
43
  // A wider pool than top_k lets the reranker rescue a right answer the fused score buried.
39
44
  const RERANK_POOL = Number(process.env.SUPER_MEMORY_RERANK_POOL ?? 30);
45
+ // Rerank-based not-found gate (opt-in). The cross-encoder's absolute relevance logit is a
46
+ // stronger "does this memory actually answer the query" signal than bi-encoder cosine, so a
47
+ // low top logit means the query is unanswerable → return []. Unset = disabled. A definite
48
+ // key anchor (literal name/proper-noun match) bypasses it. NOTE: reliable for SAME-LANGUAGE
49
+ // queries only — cross-lingual relevance logits run low even when relevant, so cross-lingual
50
+ // not-found must lean on key anchors, not this floor.
51
+ const RERANK_MIN_SCORE = process.env.SUPER_MEMORY_RERANK_MIN_SCORE !== undefined ? Number(process.env.SUPER_MEMORY_RERANK_MIN_SCORE) : null;
52
+ // KR↔Latin script check. The rerank not-found gate only trusts its logit when the query and
53
+ // the top candidate share script — cross-lingual (e.g. Korean query ↔ English memory) logits
54
+ // run low even when relevant, so a script mismatch means "don't trust the low logit, keep it".
55
+ const hasHangul = (s) => /[㄰-㆏가-힣]/.test(s);
56
+ // Literal key match must land on a word boundary (unicode-aware) so a short common-noun key
57
+ // like "name" does not spuriously match inside a longer word ("namespace") and spike to the
58
+ // top. Terms shorter than 2 chars never match literally.
59
+ function literalKeyMatch(queryLower, term) {
60
+ if (!term || term.length < 2)
61
+ return false;
62
+ const esc = term.toLowerCase().replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
63
+ return new RegExp(`(^|[^\\p{L}\\p{N}])${esc}($|[^\\p{L}\\p{N}])`, "u").test(queryLower);
64
+ }
40
65
  const LINK_WEIGHT_DEFAULT = 1.0;
41
66
  const LINK_WEIGHT_MIN = 0.1;
42
67
  const LINK_WEIGHT_MAX = 3.0;
@@ -148,8 +173,18 @@ export class MemoryGraph {
148
173
  // provenance). Drives re-embed on a same-dimension model swap; see embeddingFingerprint.
149
174
  _storedFingerprint = null;
150
175
  _lock = new Mutex();
176
+ // Serializes disk writes independently of _lock so a flush() done OUTSIDE _lock
177
+ // (recall's tail) can never race another save on the temp file or interleave
178
+ // renames. Lock order is always _lock → _saveLock (writes) or _saveLock alone
179
+ // (recall flush); nothing acquires _saveLock then _lock, so no deadlock.
180
+ _saveLock = new Mutex();
181
+ _saveSeq = 0;
151
182
  _dirty = false;
152
183
  _bm25;
184
+ _recallBuffer = new RecallBuffer({
185
+ capacity: AUTOKEY_BUFFER_CAPACITY,
186
+ ttlSeconds: AUTOKEY_BUFFER_TTL_SECONDS,
187
+ });
153
188
  constructor() {
154
189
  this._bm25 = new MiniSearch({
155
190
  fields: ["content"],
@@ -340,6 +375,44 @@ export class MemoryGraph {
340
375
  idf *= 0.5;
341
376
  return idf;
342
377
  }
378
+ _recordKeyAlias(keyId, alias) {
379
+ const key = this.keys[keyId];
380
+ if (!key)
381
+ return;
382
+ const clean = alias.trim();
383
+ if (clean.length < 2 || key.concept.toLowerCase() === clean.toLowerCase())
384
+ return;
385
+ key.aliases ??= [];
386
+ if (!key.aliases.some((existing) => existing.toLowerCase() === clean.toLowerCase())) {
387
+ key.aliases.push(clean);
388
+ }
389
+ }
390
+ _activeMemoryIdsForKey(keyId, namespace) {
391
+ const active = [];
392
+ for (const mid of this._keyToMems[keyId]?.keys() ?? []) {
393
+ const mem = this.memories[mid];
394
+ if (!mem || this._isExpired(mem) || mid in this._supersededBy)
395
+ continue;
396
+ if (namespace && mem.namespace !== namespace)
397
+ continue;
398
+ active.push(mid);
399
+ }
400
+ return active;
401
+ }
402
+ _keyView(keyId, namespace) {
403
+ const key = this.keys[keyId];
404
+ const memoryCount = this._activeMemoryIdsForKey(keyId, namespace).length;
405
+ return {
406
+ key_id: keyId,
407
+ concept: key.concept,
408
+ aliases: key.aliases ?? [],
409
+ learned_aliases: (key.learnedAliases ?? []).map((l) => l.alias),
410
+ key_type: key.key_type,
411
+ memory_count: memoryCount,
412
+ is_hub: memoryCount >= KEY_HUB_MIN_LINKS,
413
+ specificity: memoryCount > 0 ? Math.round((1 / memoryCount) * 1000) / 1000 : 0,
414
+ };
415
+ }
343
416
  _findDuplicate(embedding) {
344
417
  const activeMems = Object.entries(this.memories).filter(([mid]) => !(mid in this._supersededBy));
345
418
  if (activeMems.length === 0)
@@ -422,7 +495,30 @@ export class MemoryGraph {
422
495
  }
423
496
  this._storedFingerprint = raw.meta?.embeddingFingerprint ?? null;
424
497
  for (const [kid, k] of Object.entries(raw.keys ?? {})) {
425
- this.keys[kid] = k;
498
+ const seen = new Set();
499
+ const aliases = (Array.isArray(k.aliases) ? k.aliases : []).filter((alias) => {
500
+ if (typeof alias !== "string" || alias.trim().length < 2)
501
+ return false;
502
+ const normalized = alias.trim().toLowerCase();
503
+ if (normalized === k.concept.toLowerCase() || seen.has(normalized))
504
+ return false;
505
+ seen.add(normalized);
506
+ return true;
507
+ });
508
+ const aliasCandidates = k.aliasCandidates && typeof k.aliasCandidates === "object" && !Array.isArray(k.aliasCandidates)
509
+ ? Object.fromEntries(Object.entries(k.aliasCandidates).filter((entry) => {
510
+ const v = entry[1];
511
+ return (!!v &&
512
+ typeof v === "object" &&
513
+ typeof v.count === "number" &&
514
+ typeof v.lastSeen === "number" &&
515
+ typeof v.queryText === "string");
516
+ }))
517
+ : undefined;
518
+ const learnedAliases = Array.isArray(k.learnedAliases)
519
+ ? k.learnedAliases.filter((l) => !!l && typeof l.alias === "string" && typeof l.addedAt === "number" && typeof l.hits === "number")
520
+ : undefined;
521
+ this.keys[kid] = { ...k, aliases, aliasCandidates, learnedAliases };
426
522
  }
427
523
  for (const [mid, m] of Object.entries(raw.memories ?? {})) {
428
524
  const defaults = {
@@ -469,7 +565,6 @@ export class MemoryGraph {
469
565
  `${Object.keys(this.memories).length} memories, ${this.linkCount} links`);
470
566
  }
471
567
  async save() {
472
- await mkdir(DATA_DIR, { recursive: true });
473
568
  const links = [];
474
569
  for (const [kid, mids] of Object.entries(this._keyToMems)) {
475
570
  for (const [mid, weight] of mids) {
@@ -486,9 +581,17 @@ export class MemoryGraph {
486
581
  links,
487
582
  meta: { embeddingFingerprint: fingerprint },
488
583
  };
489
- const tmp = GRAPH_FILE + ".tmp";
490
- await writeFile(tmp, JSON.stringify(data, null, 2), "utf-8");
491
- await rename(tmp, GRAPH_FILE);
584
+ // Snapshot is built synchronously above (callers mutate under _lock without
585
+ // awaiting mid-mutation, so this read is consistent). Serialize the actual I/O
586
+ // so concurrent saves can't collide: a per-write unique temp name + single-flight
587
+ // _saveLock together guarantee one clean writeFile→rename at a time.
588
+ const json = JSON.stringify(data, null, 2);
589
+ await this._saveLock.runExclusive(async () => {
590
+ await mkdir(DATA_DIR, { recursive: true });
591
+ const tmp = `${GRAPH_FILE}.${process.pid}.${++this._saveSeq}.tmp`;
592
+ await writeFile(tmp, json, "utf-8");
593
+ await rename(tmp, GRAPH_FILE);
594
+ });
492
595
  this._dirty = false;
493
596
  }
494
597
  markDirty() {
@@ -509,19 +612,25 @@ export class MemoryGraph {
509
612
  this.keys[kid] = {
510
613
  id: kid,
511
614
  concept,
615
+ aliases: [],
512
616
  embedding: await embedTextAsync(concept),
513
617
  key_type: keyType,
514
618
  };
515
619
  return kid;
516
620
  }
621
+ const normalizedConcept = concept.toLowerCase();
622
+ for (const [kid, key] of Object.entries(this.keys)) {
623
+ if (key.key_type !== "concept")
624
+ continue;
625
+ const terms = [key.concept, ...(key.aliases ?? [])];
626
+ if (terms.some((term) => term.toLowerCase() === normalizedConcept)) {
627
+ this._recordKeyAlias(kid, concept);
628
+ return kid;
629
+ }
630
+ }
517
631
  // Short concept keys merge only on exact (case-insensitive) string match, so
518
632
  // near-identical-but-distinct short keys ("Agent A" vs "Agent B") stay separate.
519
633
  if (isShortConcept(concept)) {
520
- const lc = concept.toLowerCase();
521
- for (const [kid, k] of Object.entries(this.keys)) {
522
- if (k.key_type === "concept" && k.concept.toLowerCase() === lc)
523
- return kid;
524
- }
525
634
  const emb = await embedTextAsync(concept);
526
635
  // Conservative semantic merge: fold an incoming short key into an existing concept
527
636
  // key only at high cosine (clear synonym). Reconciles state-blind LLM key choices
@@ -536,12 +645,15 @@ export class MemoryGraph {
536
645
  bestSim = sims[i];
537
646
  bestIdx = i;
538
647
  }
539
- if (bestSim >= SHORT_KEY_MERGE_THRESHOLD)
540
- return conceptKeys[bestIdx][0];
648
+ if (bestSim >= SHORT_KEY_MERGE_THRESHOLD) {
649
+ const existingId = conceptKeys[bestIdx][0];
650
+ this._recordKeyAlias(existingId, concept);
651
+ return existingId;
652
+ }
541
653
  }
542
654
  }
543
655
  const kid = uid();
544
- this.keys[kid] = { id: kid, concept, embedding: emb, key_type: "concept" };
656
+ this.keys[kid] = { id: kid, concept, aliases: [], embedding: emb, key_type: "concept" };
545
657
  return kid;
546
658
  }
547
659
  const emb = await embedTextAsync(concept);
@@ -556,33 +668,30 @@ export class MemoryGraph {
556
668
  bestIdx = i;
557
669
  }
558
670
  }
559
- if (bestSim >= KEY_MERGE_THRESHOLD)
560
- return conceptKeys[bestIdx][0];
671
+ if (bestSim >= KEY_MERGE_THRESHOLD) {
672
+ const existingId = conceptKeys[bestIdx][0];
673
+ this._recordKeyAlias(existingId, concept);
674
+ return existingId;
675
+ }
561
676
  }
562
677
  const kid = uid();
563
- this.keys[kid] = { id: kid, concept, embedding: emb, key_type: "concept" };
678
+ this.keys[kid] = { id: kid, concept, aliases: [], embedding: emb, key_type: "concept" };
564
679
  return kid;
565
680
  }
566
681
  // ── Add ──
567
682
  async add(content, keyConcepts, options = {}) {
568
683
  const embedding = await embedTextAsync(content); // outside lock
684
+ // Duplicate detection and insertion run under a SINGLE lock acquisition so they are
685
+ // atomic: two concurrent identical adds serialize, and the second observes the first's
686
+ // memory as a duplicate instead of both clearing the check and inserting twice. The dup
687
+ // path defers to supersede() only AFTER releasing the lock (the mutex is non-reentrant).
569
688
  let dupId = null;
689
+ let resultMid = "";
570
690
  await this._lock.runExclusive(async () => {
571
691
  this._checkDim(embedding);
572
692
  dupId = this._findDuplicate(embedding);
573
- });
574
- if (dupId !== null) {
575
- const newId = await this.supersede(dupId, content, {
576
- keyConcepts,
577
- keyTypes: options.keyTypes ?? undefined,
578
- source: options.source,
579
- namespace: options.namespace,
580
- relatedTo: options.relatedTo,
581
- });
582
- return [newId, true];
583
- }
584
- let resultMid = "";
585
- await this._lock.runExclusive(async () => {
693
+ if (dupId !== null)
694
+ return; // defer to supersede() once the lock is released
586
695
  const mid = uid();
587
696
  resultMid = mid;
588
697
  const now = Date.now() / 1000;
@@ -625,6 +734,16 @@ export class MemoryGraph {
625
734
  this._bm25.add({ id: mid, content });
626
735
  await this.save();
627
736
  });
737
+ if (dupId !== null) {
738
+ const newId = await this.supersede(dupId, content, {
739
+ keyConcepts,
740
+ keyTypes: options.keyTypes ?? undefined,
741
+ source: options.source,
742
+ namespace: options.namespace,
743
+ relatedTo: options.relatedTo,
744
+ });
745
+ return [newId, true];
746
+ }
628
747
  return [resultMid, false];
629
748
  }
630
749
  // ── Supersede ──
@@ -632,8 +751,21 @@ export class MemoryGraph {
632
751
  const newEmbedding = await embedTextAsync(newContent); // outside lock
633
752
  let resultMid = "";
634
753
  await this._lock.runExclusive(async () => {
754
+ // Follow the supersession chain to the current live head. Normally oldId is already
755
+ // live (callers pass an id from _findDuplicate, which skips superseded memories) so
756
+ // this is a no-op. Under concurrency it serializes multiple supersedes of the same
757
+ // target into one linear chain instead of forking parallel successors.
758
+ while (oldId in this._supersededBy)
759
+ oldId = this._supersededBy[oldId];
635
760
  if (!(oldId in this.memories)) {
636
- throw new Error(`Memory ${oldId} not found`);
761
+ // The head was superseded and pruned by a concurrent supersede (grandparent cleanup
762
+ // deletes it). Re-resolve against the current live state so concurrent supersedes of
763
+ // the same content collapse into one chain instead of erroring or forking successors.
764
+ const reResolved = this._findDuplicate(newEmbedding);
765
+ if (reResolved === null) {
766
+ throw new Error(`Memory ${oldId} not found`);
767
+ }
768
+ oldId = reResolved;
637
769
  }
638
770
  const old = this.memories[oldId];
639
771
  // Chain cleanup: keep depth max 1 (new -> old; grandparent deleted)
@@ -723,7 +855,246 @@ export class MemoryGraph {
723
855
  });
724
856
  return resultMid;
725
857
  }
726
- // ── Recall ──
858
+ // ── Agent-driven key navigation ──
859
+ async searchKeys(query, topK = 8, namespace) {
860
+ const cleanQuery = query.trim();
861
+ if (!cleanQuery || Object.keys(this.keys).length === 0)
862
+ return [];
863
+ const qEmb = await embedTextAsync(cleanQuery, "query");
864
+ this._checkDim(qEmb);
865
+ topK = Math.max(1, Math.min(20, Math.floor(topK)));
866
+ // Content signal: max cosine of a key's member memories to the query. Lets a key whose
867
+ // CONTENT matches surface even when its coined concept does not lexically/semantically hit
868
+ // the query — the cure for key-coining dependence. Computed OUTSIDE the lock (read-only
869
+ // cosine over a synchronous snapshot) to keep the lock hold short, matching the rerank/flush
870
+ // off-lock design. A memory added after this snapshot simply scores 0 for this query.
871
+ const memIds = Object.keys(this.memories);
872
+ const memSimArr = batchCosineSim(qEmb, memIds.map((mid) => this.memories[mid].embedding));
873
+ const memSim = new Map();
874
+ for (let j = 0; j < memIds.length; j++)
875
+ memSim.set(memIds[j], memSimArr[j]);
876
+ return this._lock.runExclusive(async () => {
877
+ const queryLower = cleanQuery.toLowerCase();
878
+ const keyIds = Object.keys(this.keys);
879
+ const sims = batchCosineSim(qEmb, keyIds.map((kid) => this.keys[kid].embedding));
880
+ const candidates = [];
881
+ for (let i = 0; i < keyIds.length; i++) {
882
+ const kid = keyIds[i];
883
+ const key = this.keys[kid];
884
+ const activeIds = this._activeMemoryIdsForKey(kid, namespace);
885
+ if (activeIds.length === 0)
886
+ continue;
887
+ const aliases = key.aliases ?? [];
888
+ const conceptLiteral = literalKeyMatch(queryLower, key.concept);
889
+ const matchedAlias = aliases.find((alias) => literalKeyMatch(queryLower, alias));
890
+ if (matchedAlias && key.learnedAliases) {
891
+ const la = key.learnedAliases.find((l) => l.alias.toLowerCase() === matchedAlias.toLowerCase());
892
+ if (la)
893
+ la.hits += 1;
894
+ }
895
+ const literal = conceptLiteral || matchedAlias !== undefined;
896
+ let contentSim = 0;
897
+ let contentMid = "";
898
+ for (const mid of activeIds) {
899
+ const s = memSim.get(mid) ?? 0;
900
+ if (s > contentSim) {
901
+ contentSim = s;
902
+ contentMid = mid;
903
+ }
904
+ }
905
+ const keySim = sims[i];
906
+ if ((key.key_type === "name" || key.key_type === "proper_noun")
907
+ ? !literal
908
+ : !literal && keySim < KEY_RECALL_THRESHOLD && contentSim < CONTENT_RECALL_THRESHOLD) {
909
+ continue;
910
+ }
911
+ const relevance = literal ? 1 : Math.max(keySim, contentSim);
912
+ const memoryCount = activeIds.length;
913
+ candidates.push({
914
+ key_id: kid,
915
+ concept: key.concept,
916
+ aliases,
917
+ key_type: key.key_type,
918
+ score: Math.round(relevance * 1000) / 1000,
919
+ match_type: matchedAlias ? "alias" : conceptLiteral ? "concept" : contentSim > keySim ? "content" : "semantic",
920
+ memory_count: memoryCount,
921
+ is_hub: memoryCount >= KEY_HUB_MIN_LINKS,
922
+ specificity: Math.round((1 / memoryCount) * 1000) / 1000,
923
+ cluster_size: 1 + aliases.length,
924
+ evidence: "index_only",
925
+ suggested_tool: "read_key",
926
+ _literal: literal,
927
+ _contentMid: contentMid,
928
+ });
929
+ }
930
+ const claimedContentMids = new Set();
931
+ const result = candidates
932
+ .sort((a, b) => Number(b._literal) - Number(a._literal) || b.score - a.score || b.specificity - a.specificity)
933
+ .filter((c) => {
934
+ // Collapse synonym keys that surface only because they share the same content-matched
935
+ // memory: keep the highest-ranked one so one memory's aliases can't flood the results.
936
+ if (c.match_type !== "content")
937
+ return true;
938
+ if (claimedContentMids.has(c._contentMid))
939
+ return false;
940
+ claimedContentMids.add(c._contentMid);
941
+ return true;
942
+ })
943
+ .slice(0, topK)
944
+ .map(({ _literal, _contentMid, ...candidate }) => candidate);
945
+ if (AUTOKEY_ENABLED) {
946
+ const weak = result.filter((c) => c.match_type === "semantic");
947
+ if (weak.length > 0) {
948
+ this._recallBuffer.push({
949
+ queryText: cleanQuery,
950
+ weakKeyScores: new Map(weak.map((c) => [c.key_id, c.score])),
951
+ });
952
+ }
953
+ }
954
+ return result;
955
+ });
956
+ }
957
+ async readKey(keyId, options = {}) {
958
+ if (!(keyId in this.keys))
959
+ throw new Error(`Key ${keyId} not found`);
960
+ const namespace = options.namespace ?? null;
961
+ const limit = Math.max(1, Math.min(50, Math.floor(options.limit ?? 10)));
962
+ const offset = Math.max(0, Math.floor(options.offset ?? 0));
963
+ // Query-aware ranking: when a query is supplied, order this key's memories by content
964
+ // relevance to it (not only by link weight). This is what makes a generic hub key usable —
965
+ // the target rises to the top instead of being buried among the hub's other members.
966
+ // Omitted query reproduces the prior link-weight ordering exactly (rel = 1).
967
+ const cleanQuery = options.query?.trim();
968
+ const qEmb = cleanQuery ? await embedTextAsync(cleanQuery, "query") : null;
969
+ if (qEmb)
970
+ this._checkDim(qEmb);
971
+ const ranked = this._activeMemoryIdsForKey(keyId, namespace)
972
+ .map((mid) => {
973
+ const mem = this.memories[mid];
974
+ const linkWeight = this._getLinkWeight(keyId, mid);
975
+ const rel = qEmb ? cosineSim(qEmb, mem.embedding) : 1;
976
+ const score = rel * linkWeight * (0.9 + mem.depth * 0.1) * this._timeFactor(mem);
977
+ return { mid, mem, linkWeight, score };
978
+ })
979
+ .sort((a, b) => b.score - a.score || b.mem.created_at - a.mem.created_at);
980
+ const page = ranked.slice(offset, offset + limit).map(({ mid, mem, linkWeight, score }) => ({
981
+ memory_id: mid,
982
+ evidence: "unread",
983
+ suggested_tool: "read_memory",
984
+ depth: Math.round(mem.depth * 1000) / 1000,
985
+ created_at: mem.created_at,
986
+ namespace: mem.namespace,
987
+ link_weight: Math.round(linkWeight * 1000) / 1000,
988
+ score: Math.round(score * 1000) / 1000,
989
+ }));
990
+ return {
991
+ key: this._keyView(keyId, namespace),
992
+ memories: page,
993
+ total: ranked.length,
994
+ next_offset: offset + limit < ranked.length ? offset + limit : null,
995
+ };
996
+ }
997
+ // Auto-key self-healing: a memory was just confirmed (read) via viaKeyId. If that key
998
+ // was a recent WEAK (semantic) recall match, the originating query is candidate
999
+ // vocabulary the key is missing. Accumulate heat; promote at threshold. Runs inside
1000
+ // readMemory's lock; readMemory's unconditional save() persists any mutation.
1001
+ async _maybeLearnAlias(keyId, memoryId) {
1002
+ const entry = this._recallBuffer.consumeWeakMatch(keyId);
1003
+ if (!entry)
1004
+ return;
1005
+ const key = this.keys[keyId];
1006
+ if (!key)
1007
+ return;
1008
+ const q = entry.queryText.trim();
1009
+ if (q.length < 2)
1010
+ return;
1011
+ const norm = q.toLowerCase();
1012
+ if (key.concept.toLowerCase() === norm)
1013
+ return;
1014
+ if ((key.aliases ?? []).some((a) => a.toLowerCase() === norm))
1015
+ return;
1016
+ key.aliasCandidates ??= {};
1017
+ const prev = key.aliasCandidates[norm];
1018
+ const candidate = { count: (prev?.count ?? 0) + 1, lastSeen: Date.now() / 1000, queryText: q };
1019
+ key.aliasCandidates[norm] = candidate;
1020
+ const decision = decidePromotion({
1021
+ count: candidate.count,
1022
+ query: q,
1023
+ cosine: entry.weakKeyScores.get(keyId) ?? 0,
1024
+ learnedAliasCount: key.learnedAliases?.length ?? 0,
1025
+ aliasThreshold: KEY_MERGE_THRESHOLD,
1026
+ newKeyThreshold: KEY_AUTO_LINK_THRESHOLD,
1027
+ promoteN: AUTOKEY_PROMOTE_N,
1028
+ maxAliases: AUTOKEY_MAX_ALIASES,
1029
+ });
1030
+ if (decision === "alias") {
1031
+ this._recordKeyAlias(keyId, q);
1032
+ key.learnedAliases ??= [];
1033
+ key.learnedAliases.push({ alias: q, addedAt: Date.now() / 1000, hits: 0 });
1034
+ delete key.aliasCandidates[norm];
1035
+ }
1036
+ else if (decision === "newKey") {
1037
+ const newKid = await this.findOrCreateKey(q, "concept");
1038
+ this._link(newKid, memoryId);
1039
+ delete key.aliasCandidates[norm];
1040
+ }
1041
+ }
1042
+ async readMemory(memoryId, viaKeyId, namespace) {
1043
+ return this._lock.runExclusive(async () => {
1044
+ const mem = this.memories[memoryId];
1045
+ if (!mem || this._isExpired(mem))
1046
+ throw new Error(`Memory ${memoryId} not found`);
1047
+ if (namespace && mem.namespace !== namespace)
1048
+ throw new Error(`Memory ${memoryId} not found`);
1049
+ if (memoryId in this._supersededBy) {
1050
+ throw new Error(`Memory ${memoryId} was superseded by ${this._supersededBy[memoryId]}`);
1051
+ }
1052
+ if (viaKeyId && !this._hasLink(viaKeyId, memoryId)) {
1053
+ throw new Error(`Key ${viaKeyId} is not linked to memory ${memoryId}`);
1054
+ }
1055
+ mem.depth = Math.min(mem.depth + DEPTH_INCREMENT, DEPTH_MAX);
1056
+ mem.access_count += 1;
1057
+ mem.last_accessed = Date.now() / 1000;
1058
+ if (viaKeyId) {
1059
+ this._setLinkWeight(viaKeyId, memoryId, this._getLinkWeight(viaKeyId, memoryId) + LINK_REINFORCE_AMOUNT);
1060
+ }
1061
+ if (AUTOKEY_ENABLED && viaKeyId) {
1062
+ await this._maybeLearnAlias(viaKeyId, memoryId);
1063
+ }
1064
+ const connectedKeys = [...(this._memToKeys[memoryId] ?? new Map())]
1065
+ .filter(([kid]) => kid in this.keys)
1066
+ .map(([kid, weight]) => ({
1067
+ ...this._keyView(kid, mem.namespace),
1068
+ link_weight: Math.round(weight * 1000) / 1000,
1069
+ traversed_from: kid === viaKeyId,
1070
+ }))
1071
+ .sort((a, b) => b.link_weight - a.link_weight);
1072
+ await this.save();
1073
+ return {
1074
+ evidence: "read",
1075
+ grounded: true,
1076
+ suggested_tool: null,
1077
+ memory: {
1078
+ id: memoryId,
1079
+ content: mem.content,
1080
+ depth: Math.round(mem.depth * 1000) / 1000,
1081
+ access_count: mem.access_count,
1082
+ last_accessed: mem.last_accessed,
1083
+ created_at: mem.created_at,
1084
+ source: mem.source,
1085
+ namespace: mem.namespace,
1086
+ expires_at: mem.ttl,
1087
+ supersedes: mem.supersedes,
1088
+ superseded_by: this._supersededBy[memoryId] ?? null,
1089
+ related_to: mem.links,
1090
+ contradicts: mem.contradicts ?? [],
1091
+ },
1092
+ keys: connectedKeys,
1093
+ via_key_id: viaKeyId ?? null,
1094
+ };
1095
+ });
1096
+ }
1097
+ // ── Direct memory recall (internal / compatibility mode) ──
727
1098
  async recall(query, topK = 5, namespace, expand = false, maxHops = 2, minRelScore = 0, minScore = MIN_SCORE_THRESHOLD, minZ = GATE_Z_THRESHOLD, minKeyGate = KEY_GATE_THRESHOLD, minDepth = 0) {
728
1099
  if (Object.keys(this.memories).length === 0)
729
1100
  return [];
@@ -742,28 +1113,37 @@ export class MemoryGraph {
742
1113
  const qEmb = await embedTextAsync(query, "query"); // outside lock
743
1114
  this._checkDim(qEmb);
744
1115
  const results = [];
1116
+ const queryLower = query.toLowerCase().trim();
1117
+ const memMatchedKeys = {};
1118
+ const memHop = {};
1119
+ let keyScores = [];
1120
+ // Hoisted to method scope so Phase 3 (a separate locked section) can reuse it.
1121
+ const skip = (mid) => {
1122
+ if (!(mid in this.memories))
1123
+ return true;
1124
+ const mem = this.memories[mid];
1125
+ if (this._isExpired(mem))
1126
+ return true;
1127
+ if (namespace && mem.namespace !== namespace)
1128
+ return true;
1129
+ if (mid in this._supersededBy)
1130
+ return true;
1131
+ return false;
1132
+ };
1133
+ // Phase-1 outputs, consumed by the unlocked rerank (Phase 2) + commit (Phase 3).
1134
+ let gated = [];
1135
+ let definiteAnchor = false;
1136
+ const actualTopK = expand ? topK * 2 : topK;
1137
+ // ── Phase 1 (locked, fully synchronous) ── retrieve + fuse + gate. No await runs
1138
+ // inside this section, so the lock is held only for fast in-memory work, never
1139
+ // across model inference or disk I/O.
745
1140
  await this._lock.runExclusive(async () => {
746
- const queryLower = query.toLowerCase().trim();
747
- const memMatchedKeys = {};
748
- const memHop = {};
749
1141
  const memRawSim = {};
750
1142
  const allContentSims = [];
751
1143
  const bumpRaw = (mid, sim) => {
752
1144
  if (sim > (memRawSim[mid] ?? -Infinity))
753
1145
  memRawSim[mid] = sim;
754
1146
  };
755
- const skip = (mid) => {
756
- if (!(mid in this.memories))
757
- return true;
758
- const mem = this.memories[mid];
759
- if (this._isExpired(mem))
760
- return true;
761
- if (namespace && mem.namespace !== namespace)
762
- return true;
763
- if (mid in this._supersededBy)
764
- return true;
765
- return false;
766
- };
767
1147
  // ── BM25 sparse search ──
768
1148
  const bm25Ranked = [];
769
1149
  const bm25Results = this._bm25.search(query, { fuzzy: 0.2, prefix: true });
@@ -786,12 +1166,12 @@ export class MemoryGraph {
786
1166
  maxConceptKeySim = keySims[i];
787
1167
  }
788
1168
  }
789
- const keyScores = [];
1169
+ keyScores = [];
790
1170
  for (let i = 0; i < keyIds.length; i++) {
791
1171
  const kid = keyIds[i];
792
1172
  const key = this.keys[kid];
793
1173
  if (key.key_type === "name" || key.key_type === "proper_noun") {
794
- if (queryLower.includes(key.concept.toLowerCase())) {
1174
+ if (literalKeyMatch(queryLower, key.concept)) {
795
1175
  keyScores.push([1.0, kid]);
796
1176
  }
797
1177
  }
@@ -878,7 +1258,7 @@ export class MemoryGraph {
878
1258
  const concept = this.keys[kid]?.concept;
879
1259
  if (!concept || concept.length < 2)
880
1260
  continue;
881
- if (!queryLower.includes(concept.toLowerCase()))
1261
+ if (!literalKeyMatch(queryLower, concept))
882
1262
  continue;
883
1263
  const bonus = (1 / (RRF_K + 1)) * this._keyIdf(kid);
884
1264
  for (const memId of this._keyToMems[kid]?.keys() ?? []) {
@@ -957,7 +1337,6 @@ export class MemoryGraph {
957
1337
  memScores[mid] *= 0.7;
958
1338
  }
959
1339
  }
960
- const actualTopK = expand ? topK * 2 : topK;
961
1340
  const sorted = Object.entries(memScores).sort(([, a], [, b]) => b - a);
962
1341
  // Absolute score gate (anchor-based): the query counts as "found" only if at
963
1342
  // least one candidate has a direct dense similarity >= minScore. With no such
@@ -972,7 +1351,7 @@ export class MemoryGraph {
972
1351
  // absolute gate false-positives. minZ (gateZ) = 0 disables it, leaving the
973
1352
  // 0.7.0 absolute-only behavior unchanged for bge-m3 and other profiles.
974
1353
  const candidateIds = Object.keys(memScores);
975
- const definiteAnchor = candidateIds.some((mid) => (memRawSim[mid] ?? 0) >= 0.999);
1354
+ definiteAnchor = candidateIds.some((mid) => (memRawSim[mid] ?? 0) >= 0.999);
976
1355
  const absoluteAnchor = candidateIds.some((mid) => passesAbsoluteGate(memRawSim[mid] ?? 0, minScore));
977
1356
  let maxContentSim = 0;
978
1357
  for (const s of allContentSims)
@@ -996,26 +1375,44 @@ export class MemoryGraph {
996
1375
  // (e.g. 0.05) trims that flood while keeping genuine associations (~15%+).
997
1376
  // Default 0 = keep everything (no behavior change).
998
1377
  const floor = sorted.length ? sorted[0][1] * minRelScore : 0;
999
- const gated = (hasAnchor ? sorted : [])
1378
+ gated = (hasAnchor ? sorted : [])
1000
1379
  .filter(([, score]) => score >= floor)
1001
1380
  .filter(([mid]) => minDepth <= 0 || (this.memories[mid]?.depth ?? 0) >= minDepth);
1002
- let ranked = gated.slice(0, actualTopK);
1003
- // ── Cross-encoder rerank (opt-in) ── Re-score a wider pool of gated candidates by
1004
- // joint (query, memory) relevance and reorder, then keep top_k. Pure precision pass:
1005
- // it only reorders memories that already passed the gate, so it never turns a
1006
- // not-found into a found. Falls back to the fused order if the model is unavailable.
1007
- if (rerankEnabled() && gated.length > 1) {
1008
- const pool = gated.slice(0, Math.max(actualTopK, RERANK_POOL));
1009
- const scores = await rerankScores(query, pool.map(([mid]) => this.memories[mid]?.content ?? ""));
1010
- if (scores) {
1011
- ranked = pool
1012
- .map((entry, i) => ({ entry, s: scores[i] }))
1013
- .sort((a, b) => b.s - a.s)
1014
- .map((x) => x.entry)
1015
- .slice(0, actualTopK);
1381
+ });
1382
+ // ── Phase 2 (UNLOCKED) ── cross-encoder rerank (opt-in). Model inference is the
1383
+ // only slow, I/O-like await in recall; running it outside the lock lets other
1384
+ // recalls and writes proceed meanwhile. It only READS immutable memory content
1385
+ // (all reads happen synchronously before the await) and mutates nothing shared.
1386
+ let ranked = gated.slice(0, actualTopK);
1387
+ if (rerankEnabled() && gated.length > 0) {
1388
+ const pool = gated.slice(0, Math.max(actualTopK, RERANK_POOL));
1389
+ const scores = await rerankScores(query, pool.map(([mid]) => this.memories[mid]?.content ?? ""));
1390
+ if (scores) {
1391
+ const reordered = pool
1392
+ .map((entry, i) => ({ entry, s: scores[i] }))
1393
+ .sort((a, b) => b.s - a.s);
1394
+ // Not-found gate (opt-in): a low top relevance logit means nothing answers the
1395
+ // query → []. Trusted only when the query and the top candidate share script —
1396
+ // cross-lingual logits run low even when relevant, so on a script mismatch we keep
1397
+ // the result (the cosine/key gate already vouched). This catches same-language
1398
+ // distractors; cross-lingual not-found stays a known limitation (use bilingual keys).
1399
+ const topContent = this.memories[reordered[0]?.entry[0]]?.content ?? "";
1400
+ const sameScript = hasHangul(query) === hasHangul(topContent);
1401
+ if (RERANK_MIN_SCORE !== null && sameScript && reordered[0].s < RERANK_MIN_SCORE) {
1402
+ ranked = [];
1403
+ }
1404
+ else {
1405
+ ranked = reordered.map((x) => x.entry).slice(0, actualTopK);
1016
1406
  }
1017
1407
  }
1408
+ }
1409
+ // ── Phase 3 (locked, fully synchronous) ── commit reinforcement + assemble the
1410
+ // result payload. Re-validate every id with skip(): a concurrent forget/supersede/
1411
+ // expiry may have landed during the unlocked rerank above.
1412
+ await this._lock.runExclusive(async () => {
1018
1413
  for (const [mid, score] of ranked) {
1414
+ if (skip(mid))
1415
+ continue;
1019
1416
  const mem = this.memories[mid];
1020
1417
  mem.depth = Math.min(mem.depth + DEPTH_INCREMENT, DEPTH_MAX);
1021
1418
  mem.access_count += 1;
@@ -1047,6 +1444,8 @@ export class MemoryGraph {
1047
1444
  // for a different key, slowly polluting the graph. This mirrors the decay
1048
1445
  // side, which is already scoped to matched keys.
1049
1446
  for (const [mid] of ranked) {
1447
+ if (skip(mid))
1448
+ continue;
1050
1449
  for (const kid of this._memToKeys[mid]?.keys() ?? []) {
1051
1450
  if (!matchedKeyIds.has(kid))
1052
1451
  continue;
@@ -1065,7 +1464,7 @@ export class MemoryGraph {
1065
1464
  }
1066
1465
  this.markDirty();
1067
1466
  });
1068
- await this.flush(); // outside lock
1467
+ await this.flush(); // outside lock; save() is serialized + atomic (see _saveLock)
1069
1468
  return results;
1070
1469
  }
1071
1470
  // ── Related ──
@@ -1225,7 +1624,35 @@ export class MemoryGraph {
1225
1624
  }
1226
1625
  this._removeMemoryReferences(expired);
1227
1626
  this._pruneOrphanKeys();
1228
- if (expired.length > 0)
1627
+ let pruned = false;
1628
+ const now = Date.now() / 1000;
1629
+ for (const key of Object.values(this.keys)) {
1630
+ if (!key.learnedAliases?.length)
1631
+ continue;
1632
+ const keep = key.learnedAliases.filter((l) => l.hits > 0 || now - l.addedAt < AUTOKEY_PRUNE_AGE_SECONDS);
1633
+ if (keep.length === key.learnedAliases.length)
1634
+ continue;
1635
+ const dropped = new Set(key.learnedAliases.filter((l) => !keep.includes(l)).map((l) => l.alias.toLowerCase()));
1636
+ key.learnedAliases = keep;
1637
+ key.aliases = (key.aliases ?? []).filter((a) => !dropped.has(a.toLowerCase()));
1638
+ pruned = true;
1639
+ }
1640
+ // Drop stale alias candidates — heat that never reached promotion (e.g. long
1641
+ // non-promotable queries that fail isShortConcept) — so the persisted ledger
1642
+ // cannot grow without bound on a long-lived key.
1643
+ for (const key of Object.values(this.keys)) {
1644
+ if (!key.aliasCandidates)
1645
+ continue;
1646
+ for (const [norm, cand] of Object.entries(key.aliasCandidates)) {
1647
+ if (now - cand.lastSeen >= AUTOKEY_PRUNE_AGE_SECONDS) {
1648
+ delete key.aliasCandidates[norm];
1649
+ pruned = true;
1650
+ }
1651
+ }
1652
+ if (Object.keys(key.aliasCandidates).length === 0)
1653
+ delete key.aliasCandidates;
1654
+ }
1655
+ if (expired.length > 0 || pruned)
1229
1656
  await this.save();
1230
1657
  return expired.length;
1231
1658
  });