mcp-super-memory 0.10.1 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -33,10 +33,25 @@ const DENSE_RESULT_DEPTH = 50;
33
33
  // key (shared by many) can't flood the chain. Keeps recall→related→related navigable.
34
34
  const RELATED_LIMIT = Number(process.env.SUPER_MEMORY_RELATED_LIMIT ?? 20);
35
35
  const RELATED_EXPLICIT_BONUS = 1.0; // an explicit link is the strongest connection signal
36
+ const _hubMinLinks = Number(process.env.SUPER_MEMORY_KEY_HUB_MIN_LINKS ?? 3);
37
+ const KEY_HUB_MIN_LINKS = Number.isFinite(_hubMinLinks)
38
+ ? Math.max(2, Math.floor(_hubMinLinks))
39
+ : 3;
36
40
  // When the cross-encoder reranker is on (SUPER_MEMORY_RERANK), re-score this many of the
37
41
  // top fused candidates by joint (query, memory) relevance, then keep the requested top_k.
38
42
  // A wider pool than top_k lets the reranker rescue a right answer the fused score buried.
39
43
  const RERANK_POOL = Number(process.env.SUPER_MEMORY_RERANK_POOL ?? 30);
44
+ // Rerank-based not-found gate (opt-in). The cross-encoder's absolute relevance logit is a
45
+ // stronger "does this memory actually answer the query" signal than bi-encoder cosine, so a
46
+ // low top logit means the query is unanswerable → return []. Unset = disabled. A definite
47
+ // key anchor (literal name/proper-noun match) bypasses it. NOTE: reliable for SAME-LANGUAGE
48
+ // queries only — cross-lingual relevance logits run low even when relevant, so cross-lingual
49
+ // not-found must lean on key anchors, not this floor.
50
+ const RERANK_MIN_SCORE = process.env.SUPER_MEMORY_RERANK_MIN_SCORE !== undefined ? Number(process.env.SUPER_MEMORY_RERANK_MIN_SCORE) : null;
51
+ // KR↔Latin script check. The rerank not-found gate only trusts its logit when the query and
52
+ // the top candidate share script — cross-lingual (e.g. Korean query ↔ English memory) logits
53
+ // run low even when relevant, so a script mismatch means "don't trust the low logit, keep it".
54
+ const hasHangul = (s) => /[㄰-㆏가-힣]/.test(s);
40
55
  const LINK_WEIGHT_DEFAULT = 1.0;
41
56
  const LINK_WEIGHT_MIN = 0.1;
42
57
  const LINK_WEIGHT_MAX = 3.0;
@@ -148,6 +163,12 @@ export class MemoryGraph {
148
163
  // provenance). Drives re-embed on a same-dimension model swap; see embeddingFingerprint.
149
164
  _storedFingerprint = null;
150
165
  _lock = new Mutex();
166
+ // Serializes disk writes independently of _lock so a flush() done OUTSIDE _lock
167
+ // (recall's tail) can never race another save on the temp file or interleave
168
+ // renames. Lock order is always _lock → _saveLock (writes) or _saveLock alone
169
+ // (recall flush); nothing acquires _saveLock then _lock, so no deadlock.
170
+ _saveLock = new Mutex();
171
+ _saveSeq = 0;
151
172
  _dirty = false;
152
173
  _bm25;
153
174
  constructor() {
@@ -340,6 +361,43 @@ export class MemoryGraph {
340
361
  idf *= 0.5;
341
362
  return idf;
342
363
  }
364
+ _recordKeyAlias(keyId, alias) {
365
+ const key = this.keys[keyId];
366
+ if (!key)
367
+ return;
368
+ const clean = alias.trim();
369
+ if (clean.length < 2 || key.concept.toLowerCase() === clean.toLowerCase())
370
+ return;
371
+ key.aliases ??= [];
372
+ if (!key.aliases.some((existing) => existing.toLowerCase() === clean.toLowerCase())) {
373
+ key.aliases.push(clean);
374
+ }
375
+ }
376
+ _activeMemoryIdsForKey(keyId, namespace) {
377
+ const active = [];
378
+ for (const mid of this._keyToMems[keyId]?.keys() ?? []) {
379
+ const mem = this.memories[mid];
380
+ if (!mem || this._isExpired(mem) || mid in this._supersededBy)
381
+ continue;
382
+ if (namespace && mem.namespace !== namespace)
383
+ continue;
384
+ active.push(mid);
385
+ }
386
+ return active;
387
+ }
388
+ _keyView(keyId, namespace) {
389
+ const key = this.keys[keyId];
390
+ const memoryCount = this._activeMemoryIdsForKey(keyId, namespace).length;
391
+ return {
392
+ key_id: keyId,
393
+ concept: key.concept,
394
+ aliases: key.aliases ?? [],
395
+ key_type: key.key_type,
396
+ memory_count: memoryCount,
397
+ is_hub: memoryCount >= KEY_HUB_MIN_LINKS,
398
+ specificity: memoryCount > 0 ? Math.round((1 / memoryCount) * 1000) / 1000 : 0,
399
+ };
400
+ }
343
401
  _findDuplicate(embedding) {
344
402
  const activeMems = Object.entries(this.memories).filter(([mid]) => !(mid in this._supersededBy));
345
403
  if (activeMems.length === 0)
@@ -422,7 +480,17 @@ export class MemoryGraph {
422
480
  }
423
481
  this._storedFingerprint = raw.meta?.embeddingFingerprint ?? null;
424
482
  for (const [kid, k] of Object.entries(raw.keys ?? {})) {
425
- this.keys[kid] = k;
483
+ const seen = new Set();
484
+ const aliases = (Array.isArray(k.aliases) ? k.aliases : []).filter((alias) => {
485
+ if (typeof alias !== "string" || alias.trim().length < 2)
486
+ return false;
487
+ const normalized = alias.trim().toLowerCase();
488
+ if (normalized === k.concept.toLowerCase() || seen.has(normalized))
489
+ return false;
490
+ seen.add(normalized);
491
+ return true;
492
+ });
493
+ this.keys[kid] = { ...k, aliases };
426
494
  }
427
495
  for (const [mid, m] of Object.entries(raw.memories ?? {})) {
428
496
  const defaults = {
@@ -469,7 +537,6 @@ export class MemoryGraph {
469
537
  `${Object.keys(this.memories).length} memories, ${this.linkCount} links`);
470
538
  }
471
539
  async save() {
472
- await mkdir(DATA_DIR, { recursive: true });
473
540
  const links = [];
474
541
  for (const [kid, mids] of Object.entries(this._keyToMems)) {
475
542
  for (const [mid, weight] of mids) {
@@ -486,9 +553,17 @@ export class MemoryGraph {
486
553
  links,
487
554
  meta: { embeddingFingerprint: fingerprint },
488
555
  };
489
- const tmp = GRAPH_FILE + ".tmp";
490
- await writeFile(tmp, JSON.stringify(data, null, 2), "utf-8");
491
- await rename(tmp, GRAPH_FILE);
556
+ // Snapshot is built synchronously above (callers mutate under _lock without
557
+ // awaiting mid-mutation, so this read is consistent). Serialize the actual I/O
558
+ // so concurrent saves can't collide: a per-write unique temp name + single-flight
559
+ // _saveLock together guarantee one clean writeFile→rename at a time.
560
+ const json = JSON.stringify(data, null, 2);
561
+ await this._saveLock.runExclusive(async () => {
562
+ await mkdir(DATA_DIR, { recursive: true });
563
+ const tmp = `${GRAPH_FILE}.${process.pid}.${++this._saveSeq}.tmp`;
564
+ await writeFile(tmp, json, "utf-8");
565
+ await rename(tmp, GRAPH_FILE);
566
+ });
492
567
  this._dirty = false;
493
568
  }
494
569
  markDirty() {
@@ -509,19 +584,25 @@ export class MemoryGraph {
509
584
  this.keys[kid] = {
510
585
  id: kid,
511
586
  concept,
587
+ aliases: [],
512
588
  embedding: await embedTextAsync(concept),
513
589
  key_type: keyType,
514
590
  };
515
591
  return kid;
516
592
  }
593
+ const normalizedConcept = concept.toLowerCase();
594
+ for (const [kid, key] of Object.entries(this.keys)) {
595
+ if (key.key_type !== "concept")
596
+ continue;
597
+ const terms = [key.concept, ...(key.aliases ?? [])];
598
+ if (terms.some((term) => term.toLowerCase() === normalizedConcept)) {
599
+ this._recordKeyAlias(kid, concept);
600
+ return kid;
601
+ }
602
+ }
517
603
  // Short concept keys merge only on exact (case-insensitive) string match, so
518
604
  // near-identical-but-distinct short keys ("Agent A" vs "Agent B") stay separate.
519
605
  if (isShortConcept(concept)) {
520
- const lc = concept.toLowerCase();
521
- for (const [kid, k] of Object.entries(this.keys)) {
522
- if (k.key_type === "concept" && k.concept.toLowerCase() === lc)
523
- return kid;
524
- }
525
606
  const emb = await embedTextAsync(concept);
526
607
  // Conservative semantic merge: fold an incoming short key into an existing concept
527
608
  // key only at high cosine (clear synonym). Reconciles state-blind LLM key choices
@@ -536,12 +617,15 @@ export class MemoryGraph {
536
617
  bestSim = sims[i];
537
618
  bestIdx = i;
538
619
  }
539
- if (bestSim >= SHORT_KEY_MERGE_THRESHOLD)
540
- return conceptKeys[bestIdx][0];
620
+ if (bestSim >= SHORT_KEY_MERGE_THRESHOLD) {
621
+ const existingId = conceptKeys[bestIdx][0];
622
+ this._recordKeyAlias(existingId, concept);
623
+ return existingId;
624
+ }
541
625
  }
542
626
  }
543
627
  const kid = uid();
544
- this.keys[kid] = { id: kid, concept, embedding: emb, key_type: "concept" };
628
+ this.keys[kid] = { id: kid, concept, aliases: [], embedding: emb, key_type: "concept" };
545
629
  return kid;
546
630
  }
547
631
  const emb = await embedTextAsync(concept);
@@ -556,33 +640,30 @@ export class MemoryGraph {
556
640
  bestIdx = i;
557
641
  }
558
642
  }
559
- if (bestSim >= KEY_MERGE_THRESHOLD)
560
- return conceptKeys[bestIdx][0];
643
+ if (bestSim >= KEY_MERGE_THRESHOLD) {
644
+ const existingId = conceptKeys[bestIdx][0];
645
+ this._recordKeyAlias(existingId, concept);
646
+ return existingId;
647
+ }
561
648
  }
562
649
  const kid = uid();
563
- this.keys[kid] = { id: kid, concept, embedding: emb, key_type: "concept" };
650
+ this.keys[kid] = { id: kid, concept, aliases: [], embedding: emb, key_type: "concept" };
564
651
  return kid;
565
652
  }
566
653
  // ── Add ──
567
654
  async add(content, keyConcepts, options = {}) {
568
655
  const embedding = await embedTextAsync(content); // outside lock
656
+ // Duplicate detection and insertion run under a SINGLE lock acquisition so they are
657
+ // atomic: two concurrent identical adds serialize, and the second observes the first's
658
+ // memory as a duplicate instead of both clearing the check and inserting twice. The dup
659
+ // path defers to supersede() only AFTER releasing the lock (the mutex is non-reentrant).
569
660
  let dupId = null;
661
+ let resultMid = "";
570
662
  await this._lock.runExclusive(async () => {
571
663
  this._checkDim(embedding);
572
664
  dupId = this._findDuplicate(embedding);
573
- });
574
- if (dupId !== null) {
575
- const newId = await this.supersede(dupId, content, {
576
- keyConcepts,
577
- keyTypes: options.keyTypes ?? undefined,
578
- source: options.source,
579
- namespace: options.namespace,
580
- relatedTo: options.relatedTo,
581
- });
582
- return [newId, true];
583
- }
584
- let resultMid = "";
585
- await this._lock.runExclusive(async () => {
665
+ if (dupId !== null)
666
+ return; // defer to supersede() once the lock is released
586
667
  const mid = uid();
587
668
  resultMid = mid;
588
669
  const now = Date.now() / 1000;
@@ -625,6 +706,16 @@ export class MemoryGraph {
625
706
  this._bm25.add({ id: mid, content });
626
707
  await this.save();
627
708
  });
709
+ if (dupId !== null) {
710
+ const newId = await this.supersede(dupId, content, {
711
+ keyConcepts,
712
+ keyTypes: options.keyTypes ?? undefined,
713
+ source: options.source,
714
+ namespace: options.namespace,
715
+ relatedTo: options.relatedTo,
716
+ });
717
+ return [newId, true];
718
+ }
628
719
  return [resultMid, false];
629
720
  }
630
721
  // ── Supersede ──
@@ -632,8 +723,21 @@ export class MemoryGraph {
632
723
  const newEmbedding = await embedTextAsync(newContent); // outside lock
633
724
  let resultMid = "";
634
725
  await this._lock.runExclusive(async () => {
726
+ // Follow the supersession chain to the current live head. Normally oldId is already
727
+ // live (callers pass an id from _findDuplicate, which skips superseded memories) so
728
+ // this is a no-op. Under concurrency it serializes multiple supersedes of the same
729
+ // target into one linear chain instead of forking parallel successors.
730
+ while (oldId in this._supersededBy)
731
+ oldId = this._supersededBy[oldId];
635
732
  if (!(oldId in this.memories)) {
636
- throw new Error(`Memory ${oldId} not found`);
733
+ // The head was superseded and pruned by a concurrent supersede (grandparent cleanup
734
+ // deletes it). Re-resolve against the current live state so concurrent supersedes of
735
+ // the same content collapse into one chain instead of erroring or forking successors.
736
+ const reResolved = this._findDuplicate(newEmbedding);
737
+ if (reResolved === null) {
738
+ throw new Error(`Memory ${oldId} not found`);
739
+ }
740
+ oldId = reResolved;
637
741
  }
638
742
  const old = this.memories[oldId];
639
743
  // Chain cleanup: keep depth max 1 (new -> old; grandparent deleted)
@@ -723,7 +827,141 @@ export class MemoryGraph {
723
827
  });
724
828
  return resultMid;
725
829
  }
726
- // ── Recall ──
830
+ // ── Agent-driven key navigation ──
831
+ async searchKeys(query, topK = 8, namespace) {
832
+ const cleanQuery = query.trim();
833
+ if (!cleanQuery || Object.keys(this.keys).length === 0)
834
+ return [];
835
+ const qEmb = await embedTextAsync(cleanQuery, "query");
836
+ this._checkDim(qEmb);
837
+ topK = Math.max(1, Math.min(20, Math.floor(topK)));
838
+ return this._lock.runExclusive(async () => {
839
+ const queryLower = cleanQuery.toLowerCase();
840
+ const keyIds = Object.keys(this.keys);
841
+ const sims = batchCosineSim(qEmb, keyIds.map((kid) => this.keys[kid].embedding));
842
+ const candidates = [];
843
+ for (let i = 0; i < keyIds.length; i++) {
844
+ const kid = keyIds[i];
845
+ const key = this.keys[kid];
846
+ const activeIds = this._activeMemoryIdsForKey(kid, namespace);
847
+ if (activeIds.length === 0)
848
+ continue;
849
+ const aliases = key.aliases ?? [];
850
+ const conceptLiteral = key.concept.length >= 2 && queryLower.includes(key.concept.toLowerCase());
851
+ const matchedAlias = aliases.find((alias) => alias.length >= 2 && queryLower.includes(alias.toLowerCase()));
852
+ const literal = conceptLiteral || matchedAlias !== undefined;
853
+ if ((key.key_type === "name" || key.key_type === "proper_noun")
854
+ ? !literal
855
+ : !literal && sims[i] < KEY_RECALL_THRESHOLD) {
856
+ continue;
857
+ }
858
+ const memoryCount = activeIds.length;
859
+ candidates.push({
860
+ key_id: kid,
861
+ concept: key.concept,
862
+ aliases,
863
+ key_type: key.key_type,
864
+ score: Math.round((literal ? 1 : sims[i]) * 1000) / 1000,
865
+ match_type: matchedAlias ? "alias" : conceptLiteral ? "concept" : "semantic",
866
+ memory_count: memoryCount,
867
+ is_hub: memoryCount >= KEY_HUB_MIN_LINKS,
868
+ specificity: Math.round((1 / memoryCount) * 1000) / 1000,
869
+ cluster_size: 1 + aliases.length,
870
+ evidence: "index_only",
871
+ suggested_tool: "read_key",
872
+ _literal: literal,
873
+ });
874
+ }
875
+ return candidates
876
+ .sort((a, b) => Number(b._literal) - Number(a._literal) || b.score - a.score || b.specificity - a.specificity)
877
+ .slice(0, topK)
878
+ .map(({ _literal, ...candidate }) => candidate);
879
+ });
880
+ }
881
+ readKey(keyId, options = {}) {
882
+ if (!(keyId in this.keys))
883
+ throw new Error(`Key ${keyId} not found`);
884
+ const namespace = options.namespace ?? null;
885
+ const limit = Math.max(1, Math.min(50, Math.floor(options.limit ?? 10)));
886
+ const offset = Math.max(0, Math.floor(options.offset ?? 0));
887
+ const ranked = this._activeMemoryIdsForKey(keyId, namespace)
888
+ .map((mid) => {
889
+ const mem = this.memories[mid];
890
+ const linkWeight = this._getLinkWeight(keyId, mid);
891
+ const score = linkWeight * (0.9 + mem.depth * 0.1) * this._timeFactor(mem);
892
+ return { mid, mem, linkWeight, score };
893
+ })
894
+ .sort((a, b) => b.score - a.score || b.mem.created_at - a.mem.created_at);
895
+ const page = ranked.slice(offset, offset + limit).map(({ mid, mem, linkWeight, score }) => ({
896
+ memory_id: mid,
897
+ evidence: "unread",
898
+ suggested_tool: "read_memory",
899
+ depth: Math.round(mem.depth * 1000) / 1000,
900
+ created_at: mem.created_at,
901
+ namespace: mem.namespace,
902
+ link_weight: Math.round(linkWeight * 1000) / 1000,
903
+ score: Math.round(score * 1000) / 1000,
904
+ }));
905
+ return {
906
+ key: this._keyView(keyId, namespace),
907
+ memories: page,
908
+ total: ranked.length,
909
+ next_offset: offset + limit < ranked.length ? offset + limit : null,
910
+ };
911
+ }
912
+ async readMemory(memoryId, viaKeyId, namespace) {
913
+ return this._lock.runExclusive(async () => {
914
+ const mem = this.memories[memoryId];
915
+ if (!mem || this._isExpired(mem))
916
+ throw new Error(`Memory ${memoryId} not found`);
917
+ if (namespace && mem.namespace !== namespace)
918
+ throw new Error(`Memory ${memoryId} not found`);
919
+ if (memoryId in this._supersededBy) {
920
+ throw new Error(`Memory ${memoryId} was superseded by ${this._supersededBy[memoryId]}`);
921
+ }
922
+ if (viaKeyId && !this._hasLink(viaKeyId, memoryId)) {
923
+ throw new Error(`Key ${viaKeyId} is not linked to memory ${memoryId}`);
924
+ }
925
+ mem.depth = Math.min(mem.depth + DEPTH_INCREMENT, DEPTH_MAX);
926
+ mem.access_count += 1;
927
+ mem.last_accessed = Date.now() / 1000;
928
+ if (viaKeyId) {
929
+ this._setLinkWeight(viaKeyId, memoryId, this._getLinkWeight(viaKeyId, memoryId) + LINK_REINFORCE_AMOUNT);
930
+ }
931
+ const connectedKeys = [...(this._memToKeys[memoryId] ?? new Map())]
932
+ .filter(([kid]) => kid in this.keys)
933
+ .map(([kid, weight]) => ({
934
+ ...this._keyView(kid, mem.namespace),
935
+ link_weight: Math.round(weight * 1000) / 1000,
936
+ traversed_from: kid === viaKeyId,
937
+ }))
938
+ .sort((a, b) => b.link_weight - a.link_weight);
939
+ await this.save();
940
+ return {
941
+ evidence: "read",
942
+ grounded: true,
943
+ suggested_tool: null,
944
+ memory: {
945
+ id: memoryId,
946
+ content: mem.content,
947
+ depth: Math.round(mem.depth * 1000) / 1000,
948
+ access_count: mem.access_count,
949
+ last_accessed: mem.last_accessed,
950
+ created_at: mem.created_at,
951
+ source: mem.source,
952
+ namespace: mem.namespace,
953
+ expires_at: mem.ttl,
954
+ supersedes: mem.supersedes,
955
+ superseded_by: this._supersededBy[memoryId] ?? null,
956
+ related_to: mem.links,
957
+ contradicts: mem.contradicts ?? [],
958
+ },
959
+ keys: connectedKeys,
960
+ via_key_id: viaKeyId ?? null,
961
+ };
962
+ });
963
+ }
964
+ // ── Direct memory recall (internal / compatibility mode) ──
727
965
  async recall(query, topK = 5, namespace, expand = false, maxHops = 2, minRelScore = 0, minScore = MIN_SCORE_THRESHOLD, minZ = GATE_Z_THRESHOLD, minKeyGate = KEY_GATE_THRESHOLD, minDepth = 0) {
728
966
  if (Object.keys(this.memories).length === 0)
729
967
  return [];
@@ -742,28 +980,37 @@ export class MemoryGraph {
742
980
  const qEmb = await embedTextAsync(query, "query"); // outside lock
743
981
  this._checkDim(qEmb);
744
982
  const results = [];
983
+ const queryLower = query.toLowerCase().trim();
984
+ const memMatchedKeys = {};
985
+ const memHop = {};
986
+ let keyScores = [];
987
+ // Hoisted to method scope so Phase 3 (a separate locked section) can reuse it.
988
+ const skip = (mid) => {
989
+ if (!(mid in this.memories))
990
+ return true;
991
+ const mem = this.memories[mid];
992
+ if (this._isExpired(mem))
993
+ return true;
994
+ if (namespace && mem.namespace !== namespace)
995
+ return true;
996
+ if (mid in this._supersededBy)
997
+ return true;
998
+ return false;
999
+ };
1000
+ // Phase-1 outputs, consumed by the unlocked rerank (Phase 2) + commit (Phase 3).
1001
+ let gated = [];
1002
+ let definiteAnchor = false;
1003
+ const actualTopK = expand ? topK * 2 : topK;
1004
+ // ── Phase 1 (locked, fully synchronous) ── retrieve + fuse + gate. No await runs
1005
+ // inside this section, so the lock is held only for fast in-memory work, never
1006
+ // across model inference or disk I/O.
745
1007
  await this._lock.runExclusive(async () => {
746
- const queryLower = query.toLowerCase().trim();
747
- const memMatchedKeys = {};
748
- const memHop = {};
749
1008
  const memRawSim = {};
750
1009
  const allContentSims = [];
751
1010
  const bumpRaw = (mid, sim) => {
752
1011
  if (sim > (memRawSim[mid] ?? -Infinity))
753
1012
  memRawSim[mid] = sim;
754
1013
  };
755
- const skip = (mid) => {
756
- if (!(mid in this.memories))
757
- return true;
758
- const mem = this.memories[mid];
759
- if (this._isExpired(mem))
760
- return true;
761
- if (namespace && mem.namespace !== namespace)
762
- return true;
763
- if (mid in this._supersededBy)
764
- return true;
765
- return false;
766
- };
767
1014
  // ── BM25 sparse search ──
768
1015
  const bm25Ranked = [];
769
1016
  const bm25Results = this._bm25.search(query, { fuzzy: 0.2, prefix: true });
@@ -786,7 +1033,7 @@ export class MemoryGraph {
786
1033
  maxConceptKeySim = keySims[i];
787
1034
  }
788
1035
  }
789
- const keyScores = [];
1036
+ keyScores = [];
790
1037
  for (let i = 0; i < keyIds.length; i++) {
791
1038
  const kid = keyIds[i];
792
1039
  const key = this.keys[kid];
@@ -957,7 +1204,6 @@ export class MemoryGraph {
957
1204
  memScores[mid] *= 0.7;
958
1205
  }
959
1206
  }
960
- const actualTopK = expand ? topK * 2 : topK;
961
1207
  const sorted = Object.entries(memScores).sort(([, a], [, b]) => b - a);
962
1208
  // Absolute score gate (anchor-based): the query counts as "found" only if at
963
1209
  // least one candidate has a direct dense similarity >= minScore. With no such
@@ -972,7 +1218,7 @@ export class MemoryGraph {
972
1218
  // absolute gate false-positives. minZ (gateZ) = 0 disables it, leaving the
973
1219
  // 0.7.0 absolute-only behavior unchanged for bge-m3 and other profiles.
974
1220
  const candidateIds = Object.keys(memScores);
975
- const definiteAnchor = candidateIds.some((mid) => (memRawSim[mid] ?? 0) >= 0.999);
1221
+ definiteAnchor = candidateIds.some((mid) => (memRawSim[mid] ?? 0) >= 0.999);
976
1222
  const absoluteAnchor = candidateIds.some((mid) => passesAbsoluteGate(memRawSim[mid] ?? 0, minScore));
977
1223
  let maxContentSim = 0;
978
1224
  for (const s of allContentSims)
@@ -996,26 +1242,44 @@ export class MemoryGraph {
996
1242
  // (e.g. 0.05) trims that flood while keeping genuine associations (~15%+).
997
1243
  // Default 0 = keep everything (no behavior change).
998
1244
  const floor = sorted.length ? sorted[0][1] * minRelScore : 0;
999
- const gated = (hasAnchor ? sorted : [])
1245
+ gated = (hasAnchor ? sorted : [])
1000
1246
  .filter(([, score]) => score >= floor)
1001
1247
  .filter(([mid]) => minDepth <= 0 || (this.memories[mid]?.depth ?? 0) >= minDepth);
1002
- let ranked = gated.slice(0, actualTopK);
1003
- // ── Cross-encoder rerank (opt-in) ── Re-score a wider pool of gated candidates by
1004
- // joint (query, memory) relevance and reorder, then keep top_k. Pure precision pass:
1005
- // it only reorders memories that already passed the gate, so it never turns a
1006
- // not-found into a found. Falls back to the fused order if the model is unavailable.
1007
- if (rerankEnabled() && gated.length > 1) {
1008
- const pool = gated.slice(0, Math.max(actualTopK, RERANK_POOL));
1009
- const scores = await rerankScores(query, pool.map(([mid]) => this.memories[mid]?.content ?? ""));
1010
- if (scores) {
1011
- ranked = pool
1012
- .map((entry, i) => ({ entry, s: scores[i] }))
1013
- .sort((a, b) => b.s - a.s)
1014
- .map((x) => x.entry)
1015
- .slice(0, actualTopK);
1248
+ });
1249
+ // ── Phase 2 (UNLOCKED) ── cross-encoder rerank (opt-in). Model inference is the
1250
+ // only slow, I/O-like await in recall; running it outside the lock lets other
1251
+ // recalls and writes proceed meanwhile. It only READS immutable memory content
1252
+ // (all reads happen synchronously before the await) and mutates nothing shared.
1253
+ let ranked = gated.slice(0, actualTopK);
1254
+ if (rerankEnabled() && gated.length > 0) {
1255
+ const pool = gated.slice(0, Math.max(actualTopK, RERANK_POOL));
1256
+ const scores = await rerankScores(query, pool.map(([mid]) => this.memories[mid]?.content ?? ""));
1257
+ if (scores) {
1258
+ const reordered = pool
1259
+ .map((entry, i) => ({ entry, s: scores[i] }))
1260
+ .sort((a, b) => b.s - a.s);
1261
+ // Not-found gate (opt-in): a low top relevance logit means nothing answers the
1262
+ // query → []. Trusted only when the query and the top candidate share script —
1263
+ // cross-lingual logits run low even when relevant, so on a script mismatch we keep
1264
+ // the result (the cosine/key gate already vouched). This catches same-language
1265
+ // distractors; cross-lingual not-found stays a known limitation (use bilingual keys).
1266
+ const topContent = this.memories[reordered[0]?.entry[0]]?.content ?? "";
1267
+ const sameScript = hasHangul(query) === hasHangul(topContent);
1268
+ if (RERANK_MIN_SCORE !== null && sameScript && reordered[0].s < RERANK_MIN_SCORE) {
1269
+ ranked = [];
1270
+ }
1271
+ else {
1272
+ ranked = reordered.map((x) => x.entry).slice(0, actualTopK);
1016
1273
  }
1017
1274
  }
1275
+ }
1276
+ // ── Phase 3 (locked, fully synchronous) ── commit reinforcement + assemble the
1277
+ // result payload. Re-validate every id with skip(): a concurrent forget/supersede/
1278
+ // expiry may have landed during the unlocked rerank above.
1279
+ await this._lock.runExclusive(async () => {
1018
1280
  for (const [mid, score] of ranked) {
1281
+ if (skip(mid))
1282
+ continue;
1019
1283
  const mem = this.memories[mid];
1020
1284
  mem.depth = Math.min(mem.depth + DEPTH_INCREMENT, DEPTH_MAX);
1021
1285
  mem.access_count += 1;
@@ -1047,6 +1311,8 @@ export class MemoryGraph {
1047
1311
  // for a different key, slowly polluting the graph. This mirrors the decay
1048
1312
  // side, which is already scoped to matched keys.
1049
1313
  for (const [mid] of ranked) {
1314
+ if (skip(mid))
1315
+ continue;
1050
1316
  for (const kid of this._memToKeys[mid]?.keys() ?? []) {
1051
1317
  if (!matchedKeyIds.has(kid))
1052
1318
  continue;
@@ -1065,7 +1331,7 @@ export class MemoryGraph {
1065
1331
  }
1066
1332
  this.markDirty();
1067
1333
  });
1068
- await this.flush(); // outside lock
1334
+ await this.flush(); // outside lock; save() is serialized + atomic (see _saveLock)
1069
1335
  return results;
1070
1336
  }
1071
1337
  // ── Related ──