@psiclawops/hypermem 0.9.7 → 0.9.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/CHANGELOG.md +16 -0
  2. package/INSTALL.md +29 -9
  3. package/README.md +5 -1
  4. package/assets/default-config.json +20 -5
  5. package/assets/runtime-validation-fixture.json +123 -0
  6. package/bin/hypermem-cleanup.mjs +334 -0
  7. package/bin/hypermem-doctor.mjs +71 -0
  8. package/bin/hypermem-validate-runtime.mjs +282 -0
  9. package/dist/compositor.d.ts +43 -5
  10. package/dist/compositor.d.ts.map +1 -1
  11. package/dist/compositor.js +802 -30
  12. package/dist/entity-bridge-backfill.d.ts +66 -0
  13. package/dist/entity-bridge-backfill.d.ts.map +1 -0
  14. package/dist/entity-bridge-backfill.js +145 -0
  15. package/dist/entity-bridge-store.d.ts +164 -0
  16. package/dist/entity-bridge-store.d.ts.map +1 -0
  17. package/dist/entity-bridge-store.js +488 -0
  18. package/dist/entity-extractor.d.ts +124 -0
  19. package/dist/entity-extractor.d.ts.map +1 -0
  20. package/dist/entity-extractor.js +382 -0
  21. package/dist/entity-ppr.d.ts +55 -0
  22. package/dist/entity-ppr.d.ts.map +1 -0
  23. package/dist/entity-ppr.js +180 -0
  24. package/dist/hybrid-retrieval.d.ts +27 -0
  25. package/dist/hybrid-retrieval.d.ts.map +1 -1
  26. package/dist/hybrid-retrieval.js +26 -1
  27. package/dist/index.d.ts +19 -0
  28. package/dist/index.d.ts.map +1 -1
  29. package/dist/index.js +63 -13
  30. package/dist/message-store.d.ts +36 -0
  31. package/dist/message-store.d.ts.map +1 -1
  32. package/dist/message-store.js +155 -1
  33. package/dist/open-domain.d.ts +13 -4
  34. package/dist/open-domain.d.ts.map +1 -1
  35. package/dist/open-domain.js +222 -20
  36. package/dist/profiles.js +13 -13
  37. package/dist/question-shape.d.ts +73 -0
  38. package/dist/question-shape.d.ts.map +1 -0
  39. package/dist/question-shape.js +230 -0
  40. package/dist/schema.d.ts +1 -1
  41. package/dist/schema.d.ts.map +1 -1
  42. package/dist/schema.js +92 -1
  43. package/dist/topic-detector.d.ts.map +1 -1
  44. package/dist/topic-detector.js +22 -9
  45. package/dist/types.d.ts +176 -2
  46. package/dist/types.d.ts.map +1 -1
  47. package/dist/vector-store.d.ts +6 -0
  48. package/dist/vector-store.d.ts.map +1 -1
  49. package/dist/vector-store.js +3 -0
  50. package/docs/DIAGNOSTICS.md +32 -0
  51. package/docs/INTEGRATION_VALIDATION.md +9 -4
  52. package/docs/TUNING.md +21 -21
  53. package/memory-plugin/dist/index.js +3 -1
  54. package/memory-plugin/package.json +8 -7
  55. package/package.json +10 -4
  56. package/plugin/dist/index.d.ts.map +1 -1
  57. package/plugin/dist/index.js +114 -11
  58. package/plugin/dist/index.js.map +1 -1
  59. package/plugin/package.json +9 -8
  60. package/scripts/install-runtime.mjs +4 -1
@@ -17,7 +17,7 @@ import { MessageStore } from './message-store.js';
17
17
  import { SessionTopicMap } from './session-topic-map.js';
18
18
  import { toProviderFormat, detectProvider as s4DetectProvider } from './provider-translator.js';
19
19
  import { DocChunkStore } from './doc-chunk-store.js';
20
- import { hybridSearch } from './hybrid-retrieval.js';
20
+ import { buildFtsQuery, hybridSearch, reciprocalRankFuse } from './hybrid-retrieval.js';
21
21
  import { ensureCompactionFenceSchema, updateCompactionFence, getCompactionFence, getCompactionEligibility } from './compaction-fence.js';
22
22
  import { getActiveContext, getOrCreateActiveContext } from './context-store.js';
23
23
  import { rankKeystones, scoreKeystone } from './keystone-scorer.js';
@@ -25,7 +25,11 @@ import { buildOrgRegistryFromDb, defaultOrgRegistry } from './cross-agent.js';
25
25
  import { getActiveFOS, matchMOD, renderFOS, renderMOD, renderLightFOS, resolveOutputTier, buildActionVerificationSummary } from './fos-mod.js';
26
26
  import { KnowledgeStore } from './knowledge-store.js';
27
27
  import { TemporalStore, hasTemporalSignals } from './temporal-store.js';
28
- import { isOpenDomainQuery, searchOpenDomain } from './open-domain.js';
28
+ import { buildOpenDomainFtsQueries, expandOpenDomainQueryTerms, isOpenDomainQuery, scoreOpenDomainEvidence, searchOpenDomain } from './open-domain.js';
29
+ import { detectQuestionShape, extractQueryEntities, extractQueryFacets } from './question-shape.js';
30
+ import { annotateRecallGroups, formatStructuredHandoffBlock, buildStructuredHandoffInstruction, normalizeEntityKey, normalizeFacetKey } from './entity-extractor.js';
31
+ import { EntityBridgeStore } from './entity-bridge-store.js';
32
+ import { runPersonalizedPageRank } from './entity-ppr.js';
29
33
  import { TRIM_BUDGET_POLICY, resolveTrimBudgets } from './budget-policy.js';
30
34
  import { resolveAdaptiveLifecyclePolicy, countTopicBearingTurns } from './adaptive-lifecycle.js';
31
35
  import { formatToolChainStub, parseToolChainStub, formatArtifactRef, isArtifactRef } from './degradation.js';
@@ -350,11 +354,16 @@ export function computeUnifiedPressure(usedTokens, budgetTokens, source) {
350
354
  * and a /new surge does not blow up hybrid search cost.
351
355
  */
352
356
  export const RECALL_BREADTH_BASE = Object.freeze({
353
- mainBudgetFraction: 0.12,
354
- fallbackBudgetFraction: 0.10,
355
- candidateLimit: 10,
356
- candidateLimitMin: 6,
357
- candidateLimitMax: 16,
357
+ // 0.9.8 LoCoMo tuning: Mem0-class runs spend ~7k tokens/query on
358
+ // add-only, entity-linked long-horizon recall. Our prior 12% memory slice
359
+ // under-filled benchmark turns on 128k-class models and then starved exact
360
+ // single-hop/temporal evidence. Raise the steady-state recall envelope while
361
+ // preserving pressure clamps for normal production sessions.
362
+ mainBudgetFraction: 0.18,
363
+ fallbackBudgetFraction: 0.14,
364
+ candidateLimit: 18,
365
+ candidateLimitMin: 8,
366
+ candidateLimitMax: 32,
358
367
  });
359
368
  /**
360
369
  * Apply the adaptive lifecycle smartRecallMultiplier to recall breadth.
@@ -370,9 +379,96 @@ export function scaleRecallBreadth(remainingTokens, multiplier) {
370
379
  const candidateLimit = Math.min(RECALL_BREADTH_BASE.candidateLimitMax, Math.max(RECALL_BREADTH_BASE.candidateLimitMin, limitRaw));
371
380
  return { mainBudgetTokens, fallbackBudgetTokens, candidateLimit, multiplier: safeMultiplier };
372
381
  }
382
+ const LOCOMO_LONG_HORIZON_SIGNALS = [
383
+ 'when', 'where', 'what', 'which', 'who', 'how long', 'how many', 'both',
384
+ 'relationship', 'activities', 'activity', 'identity', 'career', 'planning', 'likely',
385
+ 'before', 'after', 'since', 'first', 'last', 'year', 'month',
386
+ 'common', 'share', 'shared', 'same', 'places', 'events', 'items', 'bought',
387
+ ];
388
+ const MULTI_HOP_RECALL_FACETS = [
389
+ { pattern: /\b(martial art|martial arts|kickbox|taekwondo|karate|boxing|mma)\b/i, terms: ['martial', 'arts', 'kickboxing', 'taekwondo', 'karate', 'boxing', 'mma', 'training'] },
390
+ { pattern: /\b(indoor|activities|activity|girlfriend|boyfriend|partner|dog|dogs)\b/i, terms: ['indoor', 'activity', 'activities', 'girlfriend', 'boyfriend', 'partner', 'cooking', 'cook', 'baking', 'bake', 'games', 'boardgames', 'board', 'volunteering', 'volunteer', 'shelter', 'wine', 'tasting', 'growing', 'flowers', 'movie', 'movies', 'dog', 'dogs', 'treats'] },
391
+ { pattern: /\b(destress|de-stress|relax|stress|dance|dancing)\b/i, terms: ['destress', 'relax', 'stress', 'dance', 'dancing', 'music', 'studio'] },
392
+ { pattern: /\b(common|both|share|shared|same|interests|like|likes)\b/i, terms: ['both', 'common', 'share', 'shared', 'same', 'lost', 'job', 'jobs', 'business', 'businesses', 'interests', 'movies', 'desserts', 'dessert', 'baking', 'music', 'games'] },
393
+ { pattern: /\b(place|places|event|events|meet|met|planned|planning)\b/i, terms: ['place', 'places', 'event', 'events', 'meet', 'met', 'planned', 'planning', 'starbucks', 'mcgee', 'pub', 'vr', 'club', 'baseball', 'game', 'restaurant', 'bar'] },
394
+ { pattern: /\b(item|items|buy|bought|purchase|purchased|march)\b/i, terms: ['item', 'items', 'buy', 'bought', 'purchase', 'purchased', 'march', 'car', 'mansion', 'house'] },
395
+ { pattern: /\b(family|friend|friends|passed away|died|death|mother|father|pet|pets)\b/i, terms: ['family', 'friend', 'friends', 'passed', 'away', 'died', 'death', 'mother', 'father', 'karlie', 'pet', 'pets', 'dog', 'cat'] },
396
+ { pattern: /\b(goal|goals|career|basketball|team|position|championship)\b/i, terms: ['goal', 'goals', 'career', 'basketball', 'team', 'position', 'championship', 'shooting', 'endorsement', 'brand'] },
397
+ ];
398
+ function expandMultiHopQueryTerms(query, terms) {
399
+ const expanded = [...terms, ...matchedMultiHopFacetTerms(query)];
400
+ return [...new Set(expanded)].slice(0, 48);
401
+ }
402
+ function matchedMultiHopFacetTerms(query) {
403
+ const expanded = [];
404
+ for (const facet of MULTI_HOP_RECALL_FACETS) {
405
+ if (facet.pattern.test(query))
406
+ expanded.push(...facet.terms);
407
+ }
408
+ return [...new Set(expanded)];
409
+ }
410
+ const RECALL_QUERY_STOP_WORDS = new Set([
411
+ 'what', 'when', 'where', 'which', 'would', 'could', 'should', 'about', 'with',
412
+ 'from', 'have', 'has', 'had', 'does', 'did', 'were', 'was', 'are', 'the',
413
+ 'and', 'for', 'that', 'this', 'there', 'their', 'them', 'they', 'your', 'you',
414
+ 'his', 'her', 'she', 'him', 'how', 'many', 'long', 'likely', 'still', 'current',
415
+ ]);
416
+ function baseRecallQueryTerms(query) {
417
+ const words = query
418
+ .toLowerCase()
419
+ .replace(/[^a-z0-9\s-]/g, ' ')
420
+ .replace(/-/g, ' ')
421
+ .split(/\s+/)
422
+ .map(w => w.trim())
423
+ .filter(w => w.length >= 3 && !RECALL_QUERY_STOP_WORDS.has(w));
424
+ return [...new Set(words)];
425
+ }
426
+ function recallQueryTerms(query, opts = {}) {
427
+ const unique = baseRecallQueryTerms(query);
428
+ const openExpanded = expandOpenDomainQueryTerms(query, unique);
429
+ return opts.expandMultiHop === false
430
+ ? openExpanded.slice(0, 48)
431
+ : expandMultiHopQueryTerms(query, openExpanded).slice(0, 80);
432
+ }
433
+ function toRecallFtsQuery(terms, limit) {
434
+ const unique = [...new Set(terms)]
435
+ .map(w => w.replace(/"/g, '').trim())
436
+ .filter(Boolean)
437
+ .slice(0, limit);
438
+ if (unique.length === 0)
439
+ return null;
440
+ return unique.map(w => `"${w}"*`).join(' OR ');
441
+ }
442
+ function buildQueryMessageFtsQueries(query, opts = {}) {
443
+ const primary = buildFtsQuery(query);
444
+ const terms = recallQueryTerms(query, opts);
445
+ const naturalLimit = opts.naturalTermLimit ?? (opts.expandMultiHop === false ? 16 : 32);
446
+ const specificityLimit = opts.specificityTermLimit ?? (opts.expandMultiHop === false ? 12 : 24);
447
+ const naturalOrder = toRecallFtsQuery(terms, naturalLimit);
448
+ const specificityOrder = toRecallFtsQuery([...terms].sort((a, b) => b.length - a.length), specificityLimit);
449
+ const openDomainQueries = isOpenDomainQuery(query) ? buildOpenDomainFtsQueries(query) : [];
450
+ return [...openDomainQueries, primary, naturalOrder, specificityOrder].filter((q) => Boolean(q));
451
+ }
452
+ function isLongHorizonRecallQuery(query) {
453
+ const lower = query.toLowerCase();
454
+ if (hasTemporalSignals(query))
455
+ return true;
456
+ return LOCOMO_LONG_HORIZON_SIGNALS.some(signal => lower.includes(signal));
457
+ }
458
+ function scoreRecallTermOverlap(content, terms) {
459
+ if (terms.length === 0)
460
+ return 0;
461
+ const lower = content.toLowerCase();
462
+ let score = 0;
463
+ for (const term of terms) {
464
+ if (lower.includes(term))
465
+ score += term.length >= 6 ? 2 : 1;
466
+ }
467
+ return score;
468
+ }
373
469
  const DEFAULT_CONFIG = {
374
470
  // Primary budget controls
375
- budgetFraction: 0.703,
471
+ budgetFraction: 0.6,
376
472
  reserveFraction: 0.25,
377
473
  historyFraction: 0.40,
378
474
  memoryFraction: 0.40,
@@ -380,13 +476,13 @@ const DEFAULT_CONFIG = {
380
476
  defaultTokenBudget: 90000,
381
477
  // History internals
382
478
  maxHistoryMessages: 250,
383
- warmHistoryBudgetFraction: 0.4,
384
- keystoneHistoryFraction: 0.2,
385
- keystoneMaxMessages: 15,
479
+ warmHistoryBudgetFraction: 0.27,
480
+ keystoneHistoryFraction: 0.15,
481
+ keystoneMaxMessages: 12,
386
482
  keystoneMinSignificance: 0.5,
387
483
  // Memory internals
388
- maxFacts: 28,
389
- maxCrossSessionContext: 6000,
484
+ maxFacts: 25,
485
+ maxCrossSessionContext: 0,
390
486
  // Tool gradient (internal)
391
487
  maxRecentToolPairs: 3,
392
488
  maxProseToolPairs: 10,
@@ -395,6 +491,35 @@ const DEFAULT_CONFIG = {
395
491
  dynamicReserveMax: 0.50,
396
492
  dynamicReserveEnabled: true,
397
493
  };
494
+ const DEFAULT_QUERY_MESSAGE_RECALL_CONFIG = {
495
+ openDomainRemainingFraction: 0.34,
496
+ temporalRemainingFraction: 0.22,
497
+ multiHopRemainingFraction: 0.44,
498
+ openDomainMaxTokens: 4200,
499
+ temporalMaxTokens: 2400,
500
+ multiHopMaxTokens: 6500,
501
+ openDomainHitLimit: 24,
502
+ temporalHitLimit: 12,
503
+ multiHopHitLimit: 48,
504
+ openDomainNeighborWindow: 4,
505
+ temporalNeighborWindow: 2,
506
+ multiHopNeighborWindow: 8,
507
+ openDomainLineCharLimit: 420,
508
+ temporalLineCharLimit: 420,
509
+ multiHopLineCharLimit: 760,
510
+ temporalFtsNaturalTermLimit: 16,
511
+ temporalFtsSpecificTermLimit: 12,
512
+ multiHopFtsNaturalTermLimit: 32,
513
+ multiHopFtsSpecificTermLimit: 24,
514
+ multiHopScopedFacetTermLimit: 24,
515
+ multiHopSpecificFacetTermLimit: 16,
516
+ multiHopRareFacetFanoutLimit: 12,
517
+ multiHopRareFacetPerTermLimit: 3,
518
+ multiHopSameConversationDirectFirst: false,
519
+ };
520
+ function resolveQueryMessageRecallConfig(config) {
521
+ return { ...DEFAULT_QUERY_MESSAGE_RECALL_CONFIG, ...(config ?? {}) };
522
+ }
398
523
  // Tool gradient thresholds — controls how aggressively tool results are
399
524
  // truncated as they age out of the recent window.
400
525
  // Recent-turn policy (2026-04-07): protect turn 0 + turn 1, budget against a
@@ -2281,6 +2406,9 @@ export class Compositor {
2281
2406
  let diagTriggerFallbackUsed = false;
2282
2407
  let diagFactsIncluded = 0;
2283
2408
  let diagSemanticResults = 0;
2409
+ let diagQueryMessageRecallHits = 0;
2410
+ let diagQueryMessageRecallExpanded = 0;
2411
+ let diagQueryMessageRecallIncluded = 0;
2284
2412
  let diagDocChunkCollections = 0;
2285
2413
  let diagScopeFiltered = 0;
2286
2414
  let diagFingerprintDedups = 0;
@@ -2290,6 +2418,12 @@ export class Compositor {
2290
2418
  let diagRerankerStatus;
2291
2419
  let diagRerankerCandidates;
2292
2420
  let diagRerankerProvider;
2421
+ // Sprint A: Multi-hop structured handoff diagnostics
2422
+ let diagQuestionShape;
2423
+ let diagStructuredHandoffApplied;
2424
+ let diagStructuredHandoffEntityGroups;
2425
+ let diagStructuredHandoffFacetGroups;
2426
+ let diagEntityBridgeRecall;
2293
2427
  function normalizeFingerprintText(text) {
2294
2428
  return text.toLowerCase().replace(/\s+/g, ' ').trim();
2295
2429
  }
@@ -2411,14 +2545,27 @@ export class Compositor {
2411
2545
  if (request.includeSemanticRecall !== false && queryText && hasTemporalSignals(queryText) && libDb && remaining > 300) {
2412
2546
  try {
2413
2547
  const temporalStore = new TemporalStore(libDb);
2548
+ const temporalTerms = recallQueryTerms(queryText, { expandMultiHop: false });
2414
2549
  const temporalFacts = temporalStore.timeRangeQuery({
2415
2550
  agentId: request.agentId,
2416
- limit: 15,
2551
+ // 0.9.8: over-fetch then query-shape in memory. The old path took
2552
+ // the latest 15 temporal rows, which is exactly how a "when did X"
2553
+ // benchmark question gets a plausible but wrong recent date.
2554
+ limit: 80,
2417
2555
  order: 'DESC',
2418
2556
  });
2419
2557
  if (temporalFacts.length > 0) {
2420
2558
  const beforeCount = temporalFacts.length;
2421
- const novel = temporalFacts.filter(f => !isDuplicate(f.content));
2559
+ const novel = temporalFacts
2560
+ .filter(f => !isDuplicate(f.content))
2561
+ .map(f => ({ ...f, recallScore: scoreRecallTermOverlap(f.content, temporalTerms) }))
2562
+ .filter(f => f.recallScore > 0)
2563
+ .sort((a, b) => {
2564
+ if (b.recallScore !== a.recallScore)
2565
+ return b.recallScore - a.recallScore;
2566
+ return b.occurredAt - a.occurredAt;
2567
+ })
2568
+ .slice(0, 24);
2422
2569
  diagFingerprintDedups += beforeCount - novel.length;
2423
2570
  if (novel.length > 0) {
2424
2571
  const temporalBlock = novel
@@ -2431,7 +2578,7 @@ export class Compositor {
2431
2578
  .join('\n');
2432
2579
  const temporalSection = `## Temporal Context\n${temporalBlock}`;
2433
2580
  const tempTokens = estimateTokens(temporalSection);
2434
- const tempBudget = Math.floor(remaining * 0.20);
2581
+ const tempBudget = Math.floor(remaining * 0.30);
2435
2582
  if (tempTokens <= tempBudget) {
2436
2583
  volatileContextParts.push(temporalSection);
2437
2584
  contextTokens += tempTokens;
@@ -2556,6 +2703,148 @@ export class Compositor {
2556
2703
  }
2557
2704
  }
2558
2705
  }
2706
+ // ── Query-matched message recall (L2 rescue lane) ───────────
2707
+ // Long-horizon and LoCoMo-style questions often need raw dialogue turns that
2708
+ // have not been promoted into facts/knowledge/episodes yet. Semantic recall
2709
+ // searches the library/vector surfaces; this bounded FTS lane searches the
2710
+ // message transcript directly and expands around matching turns so multi-hop
2711
+ // questions can see neighboring supporting evidence. It uses only the user
2712
+ // prompt, never benchmark gold evidence.
2713
+ let openDomainRawRecallStrong = false;
2714
+ if (request.includeSemanticRecall !== false && remaining > 400) {
2715
+ const queryText = request.prompt?.trim() || this.getLastUserMessage(messages) || '';
2716
+ const longHorizonRecall = isLongHorizonRecallQuery(queryText);
2717
+ const openDomainRecall = isOpenDomainQuery(queryText);
2718
+ const temporalRecall = hasTemporalSignals(queryText);
2719
+ const temporalStrictRecall = temporalRecall && !openDomainRecall;
2720
+ if (queryText && (longHorizonRecall || openDomainRecall)) {
2721
+ // Sprint A: detect question shape for structured handoff gate
2722
+ const questionShape = !openDomainRecall && !temporalStrictRecall
2723
+ ? detectQuestionShape(queryText)
2724
+ : null;
2725
+ if (questionShape) {
2726
+ diagQuestionShape = {
2727
+ kind: questionShape.kind,
2728
+ entities: questionShape.entities,
2729
+ facets: questionShape.facets,
2730
+ confidence: questionShape.confidence,
2731
+ };
2732
+ }
2733
+ // Sprint A: structured handoff fires when question is multi-hop AND flag is on
2734
+ const entityBridgeCfg = this.config.entityBridge;
2735
+ const structuredHandoffEnabled = Boolean(entityBridgeCfg?.structuredHandoff);
2736
+ const useStructuredHandoff = structuredHandoffEnabled &&
2737
+ questionShape?.kind === 'multi-hop';
2738
+ try {
2739
+ const qmr = resolveQueryMessageRecallConfig(this.config.queryMessageRecall);
2740
+ const recallMode = openDomainRecall ? 'openDomain' : temporalStrictRecall ? 'temporal' : 'multiHop';
2741
+ const recall = this.buildQueryMessageRecall(db, queryText, {
2742
+ maxTokens: Math.min(Math.floor(remaining * (recallMode === 'openDomain' ? qmr.openDomainRemainingFraction : recallMode === 'temporal' ? qmr.temporalRemainingFraction : qmr.multiHopRemainingFraction)), recallMode === 'openDomain' ? qmr.openDomainMaxTokens : recallMode === 'temporal' ? qmr.temporalMaxTokens : qmr.multiHopMaxTokens),
2743
+ hitLimit: recallMode === 'openDomain' ? qmr.openDomainHitLimit : recallMode === 'temporal' ? qmr.temporalHitLimit : qmr.multiHopHitLimit,
2744
+ neighborWindow: recallMode === 'openDomain' ? qmr.openDomainNeighborWindow : recallMode === 'temporal' ? qmr.temporalNeighborWindow : qmr.multiHopNeighborWindow,
2745
+ lineCharLimit: recallMode === 'openDomain' ? qmr.openDomainLineCharLimit : recallMode === 'temporal' ? qmr.temporalLineCharLimit : qmr.multiHopLineCharLimit,
2746
+ ftsNaturalTermLimit: temporalStrictRecall ? qmr.temporalFtsNaturalTermLimit : qmr.multiHopFtsNaturalTermLimit,
2747
+ ftsSpecificTermLimit: temporalStrictRecall ? qmr.temporalFtsSpecificTermLimit : qmr.multiHopFtsSpecificTermLimit,
2748
+ scopedFacetTermLimit: qmr.multiHopScopedFacetTermLimit,
2749
+ specificFacetTermLimit: qmr.multiHopSpecificFacetTermLimit,
2750
+ rareFacetFanoutLimit: qmr.multiHopRareFacetFanoutLimit,
2751
+ rareFacetPerTermLimit: qmr.multiHopRareFacetPerTermLimit,
2752
+ sameConversationDirectFirst: qmr.multiHopSameConversationDirectFirst,
2753
+ openDomain: openDomainRecall,
2754
+ expandMultiHop: !temporalStrictRecall,
2755
+ isDuplicate,
2756
+ addFingerprint,
2757
+ });
2758
+ if (recall) {
2759
+ let recallBlock;
2760
+ if (openDomainRecall) {
2761
+ recallBlock = `## Query-Matched Conversation Memory\nUse these raw transcript lines as primary evidence for the current question. If a line supports an answer, do not answer that no information is available.\n${recall.content}`;
2762
+ }
2763
+ else if (temporalStrictRecall) {
2764
+ recallBlock = `## Query-Matched Conversation Memory\nUse these date-ordered raw transcript lines as primary evidence for the current time-sensitive question. Prefer the latest directly supported answer when the question asks current, recent, before, after, first, last, or when. Do not broaden the answer with unrelated anchors from the same group.\n${recall.content}`;
2765
+ }
2766
+ else if (useStructuredHandoff && questionShape) {
2767
+ // Sprint A: structured handoff — annotate groups with entity/facet tags
2768
+ try {
2769
+ const annotated = annotateRecallGroups(recall.content, questionShape.entities, questionShape.facets);
2770
+ const formatted = formatStructuredHandoffBlock(annotated, questionShape.entities, questionShape.facets);
2771
+ const instruction = buildStructuredHandoffInstruction(questionShape.entities, questionShape.facets);
2772
+ recallBlock = `${instruction}\n${formatted.content}`;
2773
+ diagStructuredHandoffApplied = true;
2774
+ diagStructuredHandoffEntityGroups = formatted.entityGroupCount;
2775
+ diagStructuredHandoffFacetGroups = formatted.facetGroupCount;
2776
+ }
2777
+ catch {
2778
+ // Structured handoff is best-effort — fall back to flat format
2779
+ recallBlock = `## Query-Matched Conversation Memory\nUse these grouped raw transcript lines as primary evidence. For multi-part questions, collect every relevant item across the group before answering. If the question asks what people share, have in common, bought, planned, pursued, or lost, scan the whole group and include every matching item before summarizing. Prefer the shortest complete list of supported items; do not add unsupported extras. For names, places, events, purchases, deaths, goals, or activities, preserve each distinct transcript anchor you find instead of collapsing to a generic category. Do not answer that no information is available when the group contains supporting evidence.\n${recall.content}`;
2780
+ diagStructuredHandoffApplied = false;
2781
+ }
2782
+ }
2783
+ else {
2784
+ recallBlock = `## Query-Matched Conversation Memory\nUse these grouped raw transcript lines as primary evidence. For multi-part questions, collect every relevant item across the group before answering. If the question asks what people share, have in common, bought, planned, pursued, or lost, scan the whole group and include every matching item before summarizing. Prefer the shortest complete list of supported items; do not add unsupported extras. For names, places, events, purchases, deaths, goals, or activities, preserve each distinct transcript anchor you find instead of collapsing to a generic category. Do not answer that no information is available when the group contains supporting evidence.\n${recall.content}`;
2785
+ }
2786
+ volatileContextParts.push(recallBlock);
2787
+ contextTokens += recall.tokens;
2788
+ remaining -= recall.tokens;
2789
+ slots.context += recall.tokens;
2790
+ diagQueryMessageRecallHits = recall.hitCount;
2791
+ diagQueryMessageRecallExpanded = recall.expandedCount;
2792
+ diagQueryMessageRecallIncluded = recall.includedCount;
2793
+ if (openDomainRecall) {
2794
+ diagRetrievalMode = 'open_domain_raw';
2795
+ openDomainRawRecallStrong = recall.includedCount >= 8;
2796
+ }
2797
+ else {
2798
+ if (diagRetrievalMode === 'none')
2799
+ diagRetrievalMode = 'raw_message_fts';
2800
+ openDomainRawRecallStrong = !temporalStrictRecall && recall.includedCount >= 10;
2801
+ }
2802
+ }
2803
+ }
2804
+ catch {
2805
+ // Raw message recall is best-effort — never fail composition.
2806
+ }
2807
+ }
2808
+ }
2809
+ // ── Sprint B: Entity-Bridge Conversation Memory (PPR lane) ────────────
2810
+ // Inserted after the query-matched raw message recall and before semantic
2811
+ // recall. Disabled by default. Requires `entityBridge.enabled` AND
2812
+ // `entityBridge.pprEnabled` AND a query with at least one seed entity or
2813
+ // facet AND the v12 bridge tables to exist. On any failure the lane
2814
+ // degrades to a metadata-only diagnostic and the existing semantic recall
2815
+ // path runs unchanged.
2816
+ const entityBridgeDiagnostics = { attempted: false, applied: false };
2817
+ {
2818
+ const cfg = this.config.entityBridge;
2819
+ const queryText = request.prompt?.trim() || this.getLastUserMessage(messages) || '';
2820
+ if (cfg?.enabled && cfg?.pprEnabled && queryText && remaining > 200) {
2821
+ entityBridgeDiagnostics.attempted = true;
2822
+ try {
2823
+ const recall = this.buildEntityBridgeRecall(db, queryText, cfg, { isDuplicate, addFingerprint, remaining, agentId: request.agentId }, entityBridgeDiagnostics);
2824
+ if (recall && recall.tokens > 0) {
2825
+ volatileContextParts.push(recall.content);
2826
+ contextTokens += recall.tokens;
2827
+ remaining -= recall.tokens;
2828
+ slots.context += recall.tokens;
2829
+ entityBridgeDiagnostics.applied = true;
2830
+ entityBridgeDiagnostics.tokensEmitted = recall.tokens;
2831
+ }
2832
+ }
2833
+ catch (err) {
2834
+ entityBridgeDiagnostics.applied = false;
2835
+ entityBridgeDiagnostics.reason = 'failed';
2836
+ // Best-effort lane: never fail composition.
2837
+ void err;
2838
+ }
2839
+ }
2840
+ else if (cfg?.enabled && cfg?.pprEnabled) {
2841
+ entityBridgeDiagnostics.attempted = true;
2842
+ entityBridgeDiagnostics.reason = queryText ? 'no_seeds' : 'no_seeds';
2843
+ }
2844
+ }
2845
+ if (entityBridgeDiagnostics.attempted) {
2846
+ diagEntityBridgeRecall = entityBridgeDiagnostics;
2847
+ }
2559
2848
  // ── Semantic Recall (L3: Hybrid FTS5+KNN) ───────────────
2560
2849
  // scope: agent — buildSemanticRecall filters by agentId internally
2561
2850
  // Fires when either vector store or library DB is available.
@@ -2588,7 +2877,7 @@ export class Compositor {
2588
2877
  let diagAdaptiveRecallCandidateLimit;
2589
2878
  let diagComposeAdjacencyBoosted = 0;
2590
2879
  let diagComposeAdjacencyDeltaTotalMs = 0;
2591
- if (request.includeSemanticRecall !== false && remaining > 500 && (this.vectorStore || libDb)) {
2880
+ if (request.includeSemanticRecall !== false && remaining > 500 && (this.vectorStore || libDb) && !openDomainRawRecallStrong) {
2592
2881
  const lastUserMsg = request.prompt?.trim() || this.getLastUserMessage(messages);
2593
2882
  if (lastUserMsg) {
2594
2883
  try {
@@ -2604,8 +2893,9 @@ export class Compositor {
2604
2893
  }
2605
2894
  diagAdaptiveRecallBudgetTokens = recallBreadth.mainBudgetTokens;
2606
2895
  diagAdaptiveRecallCandidateLimit = recallBreadth.candidateLimit;
2896
+ const longHorizonRecall = isLongHorizonRecallQuery(lastUserMsg);
2607
2897
  const semanticContent = await this.buildSemanticRecall(lastUserMsg, request.agentId,
2608
- // 0.9.0: recall token budget = base 0.12 of remaining * lifecycle multiplier.
2898
+ // 0.9.0: recall token budget = base fraction of remaining * lifecycle multiplier.
2609
2899
  recallBreadth.mainBudgetTokens, libDb || undefined, precomputedEmbedding, contextFingerprints, // C2: skip results already in Active Facts
2610
2900
  // Sprint 1: capture reranker telemetry at assemble level
2611
2901
  (ev) => {
@@ -2615,7 +2905,7 @@ export class Compositor {
2615
2905
  }, (ev) => {
2616
2906
  diagComposeAdjacencyBoosted += ev.boostedCount;
2617
2907
  diagComposeAdjacencyDeltaTotalMs += ev.averageDeltaMs * ev.boostedCount;
2618
- }, recallBreadth.candidateLimit);
2908
+ }, recallBreadth.candidateLimit, longHorizonRecall);
2619
2909
  if (semanticContent) {
2620
2910
  const tokens = estimateTokens(semanticContent);
2621
2911
  volatileContextParts.push(`## Related Memory\n${semanticContent}`);
@@ -2751,7 +3041,7 @@ export class Compositor {
2751
3041
  volatileContextParts.push(docParts.join('\n\n'));
2752
3042
  }
2753
3043
  }
2754
- else if (request.includeSemanticRecall !== false && remaining > 400 && (this.vectorStore || libDb)) {
3044
+ else if (request.includeSemanticRecall !== false && remaining > 400 && (this.vectorStore || libDb) && !openDomainRawRecallStrong) {
2755
3045
  // Trigger-miss fallback: no trigger fired — attempt bounded semantic retrieval
2756
3046
  // so there is never a silent zero-memory path on doc chunks.
2757
3047
  // INVARIANT: this block is mutually exclusive with triggered-retrieval above.
@@ -2763,9 +3053,10 @@ export class Compositor {
2763
3053
  diagAdaptiveRecallBudgetTokens = recallBreadth.fallbackBudgetTokens;
2764
3054
  diagAdaptiveRecallCandidateLimit = recallBreadth.candidateLimit;
2765
3055
  }
3056
+ const fallbackLongHorizonRecall = isLongHorizonRecallQuery(lastMsg);
2766
3057
  const fallbackContent = await Promise.race([
2767
3058
  this.buildSemanticRecall(lastMsg, request.agentId, recallBreadth.fallbackBudgetTokens, libDb || undefined, undefined, contextFingerprints, // C2: skip results already in Active Facts
2768
- undefined, undefined, recallBreadth.candidateLimit),
3059
+ undefined, undefined, recallBreadth.candidateLimit, fallbackLongHorizonRecall),
2769
3060
  new Promise((_, reject) => setTimeout(() => reject(new Error('fallback_knn_timeout')), 3000)),
2770
3061
  ]);
2771
3062
  if (fallbackContent) {
@@ -3181,6 +3472,15 @@ export class Compositor {
3181
3472
  triggerFallbackUsed: diagTriggerFallbackUsed,
3182
3473
  factsIncluded: diagFactsIncluded,
3183
3474
  semanticResultsIncluded: diagSemanticResults,
3475
+ queryMessageRecallHits: diagQueryMessageRecallHits,
3476
+ queryMessageRecallExpanded: diagQueryMessageRecallExpanded,
3477
+ queryMessageRecallIncluded: diagQueryMessageRecallIncluded,
3478
+ // Sprint A: Multi-hop structured handoff diagnostics
3479
+ questionShape: diagQuestionShape,
3480
+ structuredHandoffApplied: diagStructuredHandoffApplied,
3481
+ structuredHandoffEntityGroups: diagStructuredHandoffEntityGroups,
3482
+ structuredHandoffFacetGroups: diagStructuredHandoffFacetGroups,
3483
+ entityBridgeRecall: diagEntityBridgeRecall,
3184
3484
  docChunksCollections: diagDocChunkCollections,
3185
3485
  scopeFiltered: diagScopeFiltered,
3186
3486
  zeroResultReason,
@@ -3920,7 +4220,8 @@ export class Compositor {
3920
4220
  */
3921
4221
  async buildSemanticRecall(userMessage, agentId, maxTokens, libraryDb, precomputedEmbedding, existingFingerprints, // C2: skip results already in Active Facts
3922
4222
  onRerankerTelemetry, // Sprint 1: surface reranker status at assemble level
3923
- onAdjacencyTelemetry, resultLimit) {
4223
+ onAdjacencyTelemetry, resultLimit, // 0.9.0: lifecycle-scaled candidate limit for hybrid + KNN-only fallback
4224
+ longHorizonRecall) {
3924
4225
  const libDb = libraryDb || this.libraryDb;
3925
4226
  if (!libDb && !this.vectorStore)
3926
4227
  return null;
@@ -3944,10 +4245,23 @@ export class Compositor {
3944
4245
  agentId,
3945
4246
  maxKnnDistance: 1.2,
3946
4247
  precomputedEmbedding,
4248
+ allowInlineQueryEmbedding: false,
4249
+ // 0.9.8 LoCoMo tuning: MemPal/MemPalace-style benchmark gains came
4250
+ // from broad candidate pools plus exact keyword/person/date boosts.
4251
+ // Keep normal production fusion unchanged, but for long-horizon QA use
4252
+ // a lower RRF k so top exact matches separate, and a modest FTS weight
4253
+ // so exact names/dates/objects can beat semantically-near old chatter.
4254
+ rrfK: longHorizonRecall ? 20 : undefined,
4255
+ ftsWeight: longHorizonRecall ? 1.35 : undefined,
4256
+ knnWeight: 1.0,
3947
4257
  reranker: this.reranker,
3948
4258
  rerankerMinCandidates: this.rerankerMinCandidates,
3949
- rerankerMaxDocuments: this.rerankerMaxDocuments,
3950
- rerankerTopK: this.rerankerTopK,
4259
+ rerankerMaxDocuments: longHorizonRecall
4260
+ ? Math.max(this.rerankerMaxDocuments ?? 0, Math.min(hybridLimit, 32))
4261
+ : this.rerankerMaxDocuments,
4262
+ rerankerTopK: longHorizonRecall
4263
+ ? Math.max(this.rerankerTopK ?? 0, Math.min(hybridLimit, 18))
4264
+ : this.rerankerTopK,
3951
4265
  // Sprint 1: thread reranker telemetry into compose diagnostics
3952
4266
  onRerankerTelemetry,
3953
4267
  onAdjacencyTelemetry,
@@ -3966,6 +4280,8 @@ export class Compositor {
3966
4280
  // >72h: multiply by 0.5
3967
4281
  const now = Date.now();
3968
4282
  const decayedResults = results.map(result => {
4283
+ if (longHorizonRecall)
4284
+ return result;
3969
4285
  if (!result.createdAt)
3970
4286
  return result;
3971
4287
  const ageMs = now - new Date(result.createdAt).getTime();
@@ -3991,13 +4307,16 @@ export class Compositor {
3991
4307
  // TUNE-001: drop very-low-relevance results (RRF scores below 0.008 are noise)
3992
4308
  if (result.score < 0.008)
3993
4309
  continue;
3994
- // TUNE-016: FTS-only results require higher floor — low-score FTS hits are noise
3995
- if (result.sources.length === 1 && result.sources[0] === 'fts' && result.score < 0.05)
4310
+ // TUNE-016: FTS-only results require higher floor — low-score FTS hits are noise.
4311
+ // 0.9.8: for long-horizon QA, exact FTS-only episode hits are often the
4312
+ // only evidence path when embeddings are unavailable or too broad. Relax
4313
+ // the floor there, but keep the production floor for ordinary turns.
4314
+ if (result.sources.length === 1 && result.sources[0] === 'fts' && result.score < (longHorizonRecall ? 0.015 : 0.05))
3996
4315
  continue;
3997
- // TUNE-014: episodes require higher confidence score:2 episodes bleed adjacent
3998
- // session context and contaminate current session. Require fts+knn agreement
3999
- // (score >= 0.04) for episodes to make it into assembled context.
4000
- if (result.sourceTable === 'episodes' && result.score < 0.04)
4316
+ // TUNE-014: episodes require higher confidence in normal production turns.
4317
+ // Long-horizon LoCoMo-style recall is explicitly asking for old episodic
4318
+ // evidence, so do not drop relevant old episodes just because they are old.
4319
+ if (!longHorizonRecall && result.sourceTable === 'episodes' && result.score < 0.04)
4001
4320
  continue;
4002
4321
  // C2: Skip results whose content is already fingerprinted (e.g. in Active Facts)
4003
4322
  // Dedup count is not tracked separately here — compose-level counter covers the other paths.
@@ -4035,6 +4354,459 @@ export class Compositor {
4035
4354
  }
4036
4355
  return lines.length > 0 ? lines.join('\n') : null;
4037
4356
  }
4357
+ /**
4358
+ * Bounded prompt-only FTS recall over raw message history.
4359
+ *
4360
+ * This is intentionally separate from benchmark evidence tracing. The only
4361
+ * input is the user query, so it is safe for product compose and for LoCoMo
4362
+ * evaluation. Neighbor expansion gives the reader local dialogue context for
4363
+ * multi-hop questions where the first FTS hit is only one side of the answer.
4364
+ */
4365
+ /**
4366
+ * Sprint B: build the entity-bridge conversation memory block.
4367
+ *
4368
+ * Pipeline:
4369
+ * 1. Detect question shape → seed entity/facet keys.
4370
+ * 2. Use EntityBridgeStore to build a capped graph snapshot.
4371
+ * 3. Run sparse personalized PageRank.
4372
+ * 4. Pull top-K candidate messages, hydrate text, emit a capped block.
4373
+ *
4374
+ * Degrades safely:
4375
+ * - Tables missing: returns null with reason=tables_missing.
4376
+ * - No seeds: returns null with reason=no_seeds.
4377
+ * - Empty graph or no candidates: returns null with reason set.
4378
+ * - PPR or DB error: thrown to caller, which records reason=failed.
4379
+ */
4380
+ buildEntityBridgeRecall(db, query, cfg, opts, diag) {
4381
+ const store = new EntityBridgeStore(db);
4382
+ if (!store.tablesExist()) {
4383
+ diag.reason = 'tables_missing';
4384
+ return null;
4385
+ }
4386
+ // Cheap question-shape extraction. Normalize seeds to the same keys used
4387
+ // by the ingest path so query lookup and message indexing join correctly.
4388
+ const shape = {
4389
+ entities: extractQueryEntities(query),
4390
+ facets: extractQueryFacets(query),
4391
+ };
4392
+ const maxSeeds = Math.max(1, Math.min(16, cfg.maxSeedEntities ?? 4));
4393
+ const maxFacets = Math.max(1, Math.min(16, cfg.maxSeedFacets ?? 4));
4394
+ const seedEntityKeys = [...new Set(shape.entities.map(normalizeEntityKey).filter(Boolean))].slice(0, maxSeeds);
4395
+ const seedFacetKeys = [...new Set(shape.facets.map(normalizeFacetKey).filter(Boolean))].slice(0, maxFacets);
4396
+ diag.seedEntityCount = seedEntityKeys.length;
4397
+ diag.seedFacetCount = seedFacetKeys.length;
4398
+ if (seedEntityKeys.length === 0 && seedFacetKeys.length === 0) {
4399
+ diag.reason = 'no_seeds';
4400
+ return null;
4401
+ }
4402
+ const agentId = opts.agentId
4403
+ ?? this._activeAgentId
4404
+ ?? this.getCurrentAgentIdForBridge(db);
4405
+ if (!agentId) {
4406
+ diag.reason = 'no_seeds';
4407
+ return null;
4408
+ }
4409
+ const snapshot = store.buildGraphSnapshot({
4410
+ agentId,
4411
+ seedEntityKeys,
4412
+ seedFacetKeys,
4413
+ maxNodes: cfg.maxGraphNodes ?? 2000,
4414
+ maxEdges: cfg.maxGraphEdges ?? 5000,
4415
+ perSeedMessageLimit: cfg.perSeedMessageLimit ?? 200,
4416
+ });
4417
+ const graphMessageIds = new Set([
4418
+ ...snapshot.messageEntities.keys(),
4419
+ ...snapshot.messageFacets.keys(),
4420
+ ]);
4421
+ diag.graphNodeCount = snapshot.diagnostics.nodeCount;
4422
+ diag.graphMessageCount = graphMessageIds.size;
4423
+ diag.graphEdgeCount = snapshot.diagnostics.edgeCount;
4424
+ diag.graphNodesCapped = snapshot.diagnostics.nodesCapped || undefined;
4425
+ diag.graphEdgesCapped = snapshot.diagnostics.edgesCapped || undefined;
4426
+ diag.capFired = [
4427
+ snapshot.diagnostics.nodesCapped ? 'node_cap' : '',
4428
+ snapshot.diagnostics.edgesCapped ? 'edge_cap' : '',
4429
+ ].filter(Boolean);
4430
+ if (snapshot.messageEntities.size === 0 && snapshot.messageFacets.size === 0) {
4431
+ diag.reason = 'empty_graph';
4432
+ return null;
4433
+ }
4434
+ const ppr = runPersonalizedPageRank(snapshot, seedEntityKeys, seedFacetKeys, {
4435
+ teleportProbability: cfg.pprTeleportProbability,
4436
+ maxIterations: cfg.pprMaxIterations,
4437
+ convergenceTolerance: cfg.pprConvergenceTolerance,
4438
+ topK: cfg.pprTopK ?? 20,
4439
+ });
4440
+ diag.pprIterations = ppr.diagnostics.iterations;
4441
+ diag.pprConverged = ppr.diagnostics.converged;
4442
+ if (ppr.ranked.length === 0) {
4443
+ diag.reason = 'no_candidates';
4444
+ return null;
4445
+ }
4446
+ const pprCandidateIds = ppr.ranked.map(r => r.messageId);
4447
+ const ftsCandidateIds = this.rankBridgeCandidatesByFts(db, query, pprCandidateIds);
4448
+ diag.ftsCandidates = ftsCandidateIds.length;
4449
+ const fused = reciprocalRankFuse([
4450
+ {
4451
+ ranked: pprCandidateIds.map(id => ({ key: String(id), item: id })),
4452
+ weight: 1.25,
4453
+ },
4454
+ ...(ftsCandidateIds.length > 0
4455
+ ? [{ ranked: ftsCandidateIds.map(id => ({ key: String(id), item: id })), weight: 1.0 }]
4456
+ : []),
4457
+ ]);
4458
+ diag.rrfCandidates = fused.length;
4459
+ const candidateIds = fused.slice(0, cfg.pprTopK ?? 20).map(r => r.item);
4460
+ const candidates = store.fetchCandidates({ agentId, messageIds: candidateIds });
4461
+ if (candidates.length === 0) {
4462
+ diag.reason = 'no_candidates';
4463
+ return null;
4464
+ }
4465
+ // Hydrate text from `messages`. The store deliberately does not load
4466
+ // message text; we do the join here so payloads stay scoped to compose.
4467
+ const placeholders = candidates.map(() => '?').join(',');
4468
+ const rows = db.prepare(`SELECT id, role, text_content, conversation_id, created_at
4469
+ FROM messages WHERE id IN (${placeholders}) AND COALESCE(text_content,'') != ''`).all(...candidates.map(c => c.messageId));
4470
+ const rowById = new Map(rows.map(r => [r.id, r]));
4471
+ const maxTokens = Math.max(120, Math.min(cfg.maxTokens ?? 1200, Math.floor(opts.remaining * 0.4)));
4472
+ const lines = ['## Entity-Bridge Conversation Memory'];
4473
+ const subjectParts = [];
4474
+ if (seedEntityKeys.length)
4475
+ subjectParts.push(`entities: ${seedEntityKeys.slice(0, 4).join(', ')}`);
4476
+ if (seedFacetKeys.length)
4477
+ subjectParts.push(`facets: ${seedFacetKeys.slice(0, 4).join(', ')}`);
4478
+ if (subjectParts.length)
4479
+ lines.push(`Bridge subjects — ${subjectParts.join('; ')}.`);
4480
+ lines.push('Use these PPR-ranked transcript anchors as supplementary evidence.');
4481
+ let used = estimateTokens(lines.join('\n'));
4482
+ let emitted = 0;
4483
+ for (const cand of candidates) {
4484
+ const row = rowById.get(cand.messageId);
4485
+ if (!row)
4486
+ continue;
4487
+ const snippet = row.text_content.slice(0, 320).replace(/\s+/g, ' ').trim();
4488
+ if (!snippet)
4489
+ continue;
4490
+ if (opts.isDuplicate(snippet))
4491
+ continue;
4492
+ const annot = [];
4493
+ if (cand.matchedEntities.length)
4494
+ annot.push(`e: ${cand.matchedEntities.slice(0, 3).join(', ')}`);
4495
+ if (cand.matchedFacets.length)
4496
+ annot.push(`f: ${cand.matchedFacets.slice(0, 3).join(', ')}`);
4497
+ const header = annot.length
4498
+ ? `### Bridge message ${row.id} [${annot.join('; ')}]`
4499
+ : `### Bridge message ${row.id}`;
4500
+ const block = `${header}\n- [${row.role}] ${snippet}`;
4501
+ const cost = estimateTokens(block);
4502
+ if (used + cost > maxTokens)
4503
+ break;
4504
+ lines.push(block);
4505
+ opts.addFingerprint(snippet);
4506
+ used += cost;
4507
+ emitted++;
4508
+ if (emitted >= (cfg.pprTopK ?? 20))
4509
+ break;
4510
+ }
4511
+ if (emitted === 0) {
4512
+ diag.reason = 'no_candidates';
4513
+ return null;
4514
+ }
4515
+ diag.candidatesEmitted = emitted;
4516
+ return { content: lines.join('\n'), tokens: used };
4517
+ }
4518
+ /**
4519
+ * Metadata-only FTS rank over the PPR candidate set. This lets the Sprint B
4520
+ * bridge lane use the same generic RRF math for message-FTS + PPR ordering
4521
+ * without changing the existing raw recall block or hybridSearch() semantics.
4522
+ */
4523
+ rankBridgeCandidatesByFts(db, query, candidateIds) {
4524
+ if (candidateIds.length === 0)
4525
+ return [];
4526
+ const ftsQuery = buildFtsQuery(query);
4527
+ if (!ftsQuery)
4528
+ return [];
4529
+ try {
4530
+ const placeholders = candidateIds.map(() => '?').join(',');
4531
+ const rows = db.prepare(`SELECT m.id AS id, messages_fts.rank AS rank
4532
+ FROM messages_fts
4533
+ JOIN messages m ON m.id = messages_fts.rowid
4534
+ WHERE messages_fts MATCH ?
4535
+ AND m.id IN (${placeholders})
4536
+ ORDER BY messages_fts.rank
4537
+ LIMIT ?`).all(ftsQuery, ...candidateIds, candidateIds.length);
4538
+ return rows.map(r => r.id);
4539
+ }
4540
+ catch {
4541
+ return [];
4542
+ }
4543
+ }
4544
+ /**
4545
+ * Best-effort lookup for an agent id usable by the entity-bridge lane.
4546
+ * The bridge index is per-agent, so we need to resolve which agent's
4547
+ * messages belong to this DB. Falls back to the most recent conversation
4548
+ * row's `agent_id`.
4549
+ */
4550
+ getCurrentAgentIdForBridge(db) {
4551
+ try {
4552
+ const row = db.prepare('SELECT agent_id FROM conversations ORDER BY id DESC LIMIT 1').get();
4553
+ return row?.agent_id ?? null;
4554
+ }
4555
+ catch {
4556
+ return null;
4557
+ }
4558
+ }
4559
+ buildQueryMessageRecall(db, query, opts) {
4560
+ const ftsQueries = [...new Set(buildQueryMessageFtsQueries(query, {
4561
+ expandMultiHop: opts.expandMultiHop,
4562
+ naturalTermLimit: opts.ftsNaturalTermLimit,
4563
+ specificityTermLimit: opts.ftsSpecificTermLimit,
4564
+ }))];
4565
+ if (ftsQueries.length === 0)
4566
+ return null;
4567
+ const queryTerms = recallQueryTerms(query, { expandMultiHop: opts.expandMultiHop });
4568
+ const openDomainRecall = Boolean(opts.openDomain || isOpenDomainQuery(query));
4569
+ const lineCharLimit = Math.max(240, Math.min(1000, Math.floor(opts.lineCharLimit || 420)));
4570
+ const hitLimit = Math.max(1, Math.min(60, Math.floor(opts.hitLimit || 5)));
4571
+ const neighborWindow = Math.max(0, Math.min(8, Math.floor(opts.neighborWindow || 0)));
4572
+ const hitStmt = db.prepare(`
4573
+ WITH fts_matches AS (
4574
+ SELECT rowid, rank
4575
+ FROM messages_fts
4576
+ WHERE messages_fts MATCH ?
4577
+ ORDER BY rank
4578
+ LIMIT ?
4579
+ )
4580
+ SELECT
4581
+ m.id,
4582
+ m.conversation_id,
4583
+ m.role,
4584
+ m.text_content,
4585
+ m.message_index,
4586
+ m.created_at,
4587
+ fts_matches.rank AS rank
4588
+ FROM messages m
4589
+ JOIN fts_matches ON m.id = fts_matches.rowid
4590
+ WHERE m.role IN ('user', 'assistant')
4591
+ AND m.text_content IS NOT NULL
4592
+ AND trim(m.text_content) != ''
4593
+ AND m.is_heartbeat = 0
4594
+ ORDER BY fts_matches.rank
4595
+ `);
4596
+ const rowsById = new Map();
4597
+ const seenHitIds = new Set();
4598
+ const hits = [];
4599
+ let expandedCount = 0;
4600
+ for (const ftsQuery of ftsQueries) {
4601
+ const queryHits = hitStmt.all(ftsQuery, hitLimit);
4602
+ for (const hit of queryHits) {
4603
+ if (seenHitIds.has(hit.id))
4604
+ continue;
4605
+ seenHitIds.add(hit.id);
4606
+ hit.overlap = openDomainRecall
4607
+ ? scoreOpenDomainEvidence(hit.text_content ?? '', query, queryTerms)
4608
+ : scoreRecallTermOverlap(hit.text_content ?? '', queryTerms);
4609
+ hits.push(hit);
4610
+ }
4611
+ }
4612
+ if (hits.length === 0)
4613
+ return null;
4614
+ if (!openDomainRecall && opts.expandMultiHop !== false) {
4615
+ const genericFacetTerms = new Set([
4616
+ 'activity', 'activities', 'place', 'places', 'event', 'events', 'meet', 'met',
4617
+ 'planned', 'planning', 'item', 'items', 'buy', 'bought', 'purchase', 'purchased',
4618
+ 'family', 'friend', 'friends', 'goal', 'goals', 'career', 'common', 'both',
4619
+ 'share', 'shared', 'same', 'like', 'likes', 'team', 'game', 'games',
4620
+ ]);
4621
+ const facetTerms = matchedMultiHopFacetTerms(query)
4622
+ .filter(term => term.length >= 4 || /^[a-z]{2,3}$/i.test(term))
4623
+ .sort((a, b) => b.length - a.length);
4624
+ const specificFacetTerms = facetTerms.filter(term => !genericFacetTerms.has(term.toLowerCase()));
4625
+ const addFacetHits = (facetHits) => {
4626
+ for (const hit of facetHits) {
4627
+ if (seenHitIds.has(hit.id))
4628
+ continue;
4629
+ seenHitIds.add(hit.id);
4630
+ hit.overlap = scoreRecallTermOverlap(hit.text_content ?? '', queryTerms);
4631
+ hits.push(hit);
4632
+ }
4633
+ };
4634
+ const scopedFacetQuery = toRecallFtsQuery(facetTerms, opts.scopedFacetTermLimit ?? 24);
4635
+ if (scopedFacetQuery) {
4636
+ const scopedFacetStmt = db.prepare(`
4637
+ SELECT
4638
+ m.id,
4639
+ m.conversation_id,
4640
+ m.role,
4641
+ m.text_content,
4642
+ m.message_index,
4643
+ m.created_at,
4644
+ messages_fts.rank AS rank
4645
+ FROM messages_fts
4646
+ JOIN messages m ON m.id = messages_fts.rowid
4647
+ WHERE messages_fts MATCH ?
4648
+ AND m.conversation_id = ?
4649
+ AND m.role IN ('user', 'assistant')
4650
+ AND m.text_content IS NOT NULL
4651
+ AND trim(m.text_content) != ''
4652
+ AND m.is_heartbeat = 0
4653
+ ORDER BY messages_fts.rank
4654
+ LIMIT ?
4655
+ `);
4656
+ const hitConversationIds = [...new Set(hits.map(hit => hit.conversation_id))];
4657
+ for (const conversationId of hitConversationIds) {
4658
+ addFacetHits(scopedFacetStmt.all(scopedFacetQuery, conversationId, Math.min(12, hitLimit)));
4659
+ }
4660
+ }
4661
+ const specificFacetQuery = toRecallFtsQuery(specificFacetTerms, opts.specificFacetTermLimit ?? 16);
4662
+ if (specificFacetQuery) {
4663
+ const specificFacetStmt = db.prepare(`
4664
+ SELECT
4665
+ m.id,
4666
+ m.conversation_id,
4667
+ m.role,
4668
+ m.text_content,
4669
+ m.message_index,
4670
+ m.created_at,
4671
+ messages_fts.rank AS rank
4672
+ FROM messages_fts
4673
+ JOIN messages m ON m.id = messages_fts.rowid
4674
+ WHERE messages_fts MATCH ?
4675
+ AND m.role IN ('user', 'assistant')
4676
+ AND m.text_content IS NOT NULL
4677
+ AND trim(m.text_content) != ''
4678
+ AND m.is_heartbeat = 0
4679
+ ORDER BY messages_fts.rank
4680
+ LIMIT ?
4681
+ `);
4682
+ addFacetHits(specificFacetStmt.all(specificFacetQuery, Math.min(10, hitLimit)));
4683
+ // LoCoMo-style multi-hop failures often hinge on one rare anchor
4684
+ // (for example a named friend, venue acronym, or uncommon activity)
4685
+ // that loses the combined OR-query rank contest to common facet terms.
4686
+ // Add a tiny per-term fanout for specific facet terms so rare anchors
4687
+ // are admitted without reopening the broad rank-packing blast radius.
4688
+ let rareFacetFanout = 0;
4689
+ for (const term of specificFacetTerms) {
4690
+ if (rareFacetFanout >= (opts.rareFacetFanoutLimit ?? 12))
4691
+ break;
4692
+ const perTermQuery = toRecallFtsQuery([term], 1);
4693
+ if (!perTermQuery)
4694
+ continue;
4695
+ const before = hits.length;
4696
+ addFacetHits(specificFacetStmt.all(perTermQuery, opts.rareFacetPerTermLimit ?? 3));
4697
+ rareFacetFanout += Math.max(0, hits.length - before);
4698
+ }
4699
+ }
4700
+ }
4701
+ for (const hit of hits) {
4702
+ if (!rowsById.has(hit.id))
4703
+ rowsById.set(hit.id, hit);
4704
+ if (neighborWindow === 0)
4705
+ continue;
4706
+ const neighbors = db.prepare(`
4707
+ SELECT
4708
+ id,
4709
+ conversation_id,
4710
+ role,
4711
+ text_content,
4712
+ message_index,
4713
+ created_at
4714
+ FROM messages
4715
+ WHERE conversation_id = ?
4716
+ AND message_index BETWEEN ? AND ?
4717
+ AND role IN ('user', 'assistant')
4718
+ AND text_content IS NOT NULL
4719
+ AND trim(text_content) != ''
4720
+ AND is_heartbeat = 0
4721
+ ORDER BY message_index ASC
4722
+ `).all(hit.conversation_id, hit.message_index - neighborWindow, hit.message_index + neighborWindow);
4723
+ for (const neighbor of neighbors) {
4724
+ if (!rowsById.has(neighbor.id)) {
4725
+ rowsById.set(neighbor.id, neighbor);
4726
+ expandedCount += 1;
4727
+ }
4728
+ }
4729
+ }
4730
+ const bestRankByConversation = new Map();
4731
+ const bestOverlapByConversation = new Map();
4732
+ for (const hit of hits) {
4733
+ const rank = hit.rank ?? Number.MAX_SAFE_INTEGER;
4734
+ const prevRank = bestRankByConversation.get(hit.conversation_id) ?? Number.MAX_SAFE_INTEGER;
4735
+ if (rank < prevRank)
4736
+ bestRankByConversation.set(hit.conversation_id, rank);
4737
+ const overlap = hit.overlap ?? (openDomainRecall
4738
+ ? scoreOpenDomainEvidence(hit.text_content ?? '', query, queryTerms)
4739
+ : scoreRecallTermOverlap(hit.text_content ?? '', queryTerms));
4740
+ const prevOverlap = bestOverlapByConversation.get(hit.conversation_id) ?? 0;
4741
+ if (overlap > prevOverlap)
4742
+ bestOverlapByConversation.set(hit.conversation_id, overlap);
4743
+ }
4744
+ for (const row of rowsById.values()) {
4745
+ row.overlap = openDomainRecall
4746
+ ? scoreOpenDomainEvidence(row.text_content ?? '', query, queryTerms)
4747
+ : scoreRecallTermOverlap(row.text_content ?? '', queryTerms);
4748
+ }
4749
+ const rows = [...rowsById.values()]
4750
+ .sort((a, b) => {
4751
+ const convOverlapA = bestOverlapByConversation.get(a.conversation_id) ?? 0;
4752
+ const convOverlapB = bestOverlapByConversation.get(b.conversation_id) ?? 0;
4753
+ if (convOverlapA !== convOverlapB)
4754
+ return convOverlapB - convOverlapA;
4755
+ const rankA = bestRankByConversation.get(a.conversation_id) ?? a.rank ?? Number.MAX_SAFE_INTEGER;
4756
+ const rankB = bestRankByConversation.get(b.conversation_id) ?? b.rank ?? Number.MAX_SAFE_INTEGER;
4757
+ if (rankA !== rankB)
4758
+ return rankA - rankB;
4759
+ const rowOverlapA = a.overlap ?? 0;
4760
+ const rowOverlapB = b.overlap ?? 0;
4761
+ if (a.conversation_id === b.conversation_id) {
4762
+ if (!openDomainRecall && opts.expandMultiHop !== false && opts.sameConversationDirectFirst === true) {
4763
+ const directA = seenHitIds.has(a.id) ? 1 : 0;
4764
+ const directB = seenHitIds.has(b.id) ? 1 : 0;
4765
+ if (directA !== directB)
4766
+ return directB - directA;
4767
+ }
4768
+ return a.message_index - b.message_index;
4769
+ }
4770
+ if (rowOverlapA !== rowOverlapB)
4771
+ return rowOverlapB - rowOverlapA;
4772
+ if (a.conversation_id !== b.conversation_id)
4773
+ return a.conversation_id - b.conversation_id;
4774
+ return a.message_index - b.message_index;
4775
+ });
4776
+ const lines = [];
4777
+ let tokens = 0;
4778
+ let currentConversationId = null;
4779
+ let includedCount = 0;
4780
+ for (const row of rows) {
4781
+ const text = String(row.text_content || '').trim();
4782
+ if (!text || text.length < 8)
4783
+ continue;
4784
+ if (opts.isDuplicate(text))
4785
+ continue;
4786
+ if (currentConversationId !== row.conversation_id) {
4787
+ const header = `### Raw transcript group ${row.conversation_id}`;
4788
+ const headerTokens = estimateTokens(header);
4789
+ if (tokens + headerTokens > opts.maxTokens)
4790
+ break;
4791
+ lines.push(header);
4792
+ tokens += headerTokens;
4793
+ currentConversationId = row.conversation_id;
4794
+ }
4795
+ const date = row.created_at ? new Date(row.created_at).toISOString().slice(0, 10) : '';
4796
+ const prefix = date ? `[${date}] ` : '';
4797
+ const line = `- ${prefix}${row.role}: ${text.length > lineCharLimit ? `${text.slice(0, lineCharLimit)}…` : text}`;
4798
+ const lineTokens = estimateTokens(line);
4799
+ if (tokens + lineTokens > opts.maxTokens)
4800
+ break;
4801
+ lines.push(line);
4802
+ tokens += lineTokens;
4803
+ includedCount += 1;
4804
+ opts.addFingerprint(text);
4805
+ }
4806
+ return includedCount > 0
4807
+ ? { content: lines.join('\n'), tokens, hitCount: hits.length, expandedCount, includedCount }
4808
+ : null;
4809
+ }
4038
4810
  /**
4039
4811
  * Format a hybrid search result for injection into context.
4040
4812
  * Shows retrieval source(s) and relevance score.