hippo-memory 0.36.0 → 0.37.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -0
- package/dist/api.d.ts +20 -0
- package/dist/api.d.ts.map +1 -1
- package/dist/api.js +23 -3
- package/dist/api.js.map +1 -1
- package/dist/benchmarks/e1.3/incident-recall-eval.js +74 -0
- package/dist/benchmarks/e1.3/incident-recall-eval.js.map +1 -0
- package/dist/benchmarks/e1.3/scenarios.json +2587 -0
- package/dist/benchmarks/e1.3/slack-1000-event-smoke.js +102 -0
- package/dist/benchmarks/e1.3/slack-1000-event-smoke.js.map +1 -0
- package/dist/cli.js +82 -0
- package/dist/cli.js.map +1 -1
- package/dist/connectors/slack/backfill.d.ts +42 -0
- package/dist/connectors/slack/backfill.d.ts.map +1 -0
- package/dist/connectors/slack/backfill.js +76 -0
- package/dist/connectors/slack/backfill.js.map +1 -0
- package/dist/connectors/slack/deletion.d.ts +14 -0
- package/dist/connectors/slack/deletion.d.ts.map +1 -0
- package/dist/connectors/slack/deletion.js +46 -0
- package/dist/connectors/slack/deletion.js.map +1 -0
- package/dist/connectors/slack/dlq.d.ts +21 -0
- package/dist/connectors/slack/dlq.d.ts.map +1 -0
- package/dist/connectors/slack/dlq.js +23 -0
- package/dist/connectors/slack/dlq.js.map +1 -0
- package/dist/connectors/slack/idempotency.d.ts +5 -0
- package/dist/connectors/slack/idempotency.d.ts.map +1 -0
- package/dist/connectors/slack/idempotency.js +13 -0
- package/dist/connectors/slack/idempotency.js.map +1 -0
- package/dist/connectors/slack/ingest.d.ts +27 -0
- package/dist/connectors/slack/ingest.d.ts.map +1 -0
- package/dist/connectors/slack/ingest.js +48 -0
- package/dist/connectors/slack/ingest.js.map +1 -0
- package/dist/connectors/slack/ratelimit.d.ts +9 -0
- package/dist/connectors/slack/ratelimit.d.ts.map +1 -0
- package/dist/connectors/slack/ratelimit.js +18 -0
- package/dist/connectors/slack/ratelimit.js.map +1 -0
- package/dist/connectors/slack/scope.d.ts +16 -0
- package/dist/connectors/slack/scope.d.ts.map +1 -0
- package/dist/connectors/slack/scope.js +13 -0
- package/dist/connectors/slack/scope.js.map +1 -0
- package/dist/connectors/slack/signature.d.ts +12 -0
- package/dist/connectors/slack/signature.d.ts.map +1 -0
- package/dist/connectors/slack/signature.js +20 -0
- package/dist/connectors/slack/signature.js.map +1 -0
- package/dist/connectors/slack/tenant-routing.d.ts +13 -0
- package/dist/connectors/slack/tenant-routing.d.ts.map +1 -0
- package/dist/connectors/slack/tenant-routing.js +17 -0
- package/dist/connectors/slack/tenant-routing.js.map +1 -0
- package/dist/connectors/slack/transform.d.ts +20 -0
- package/dist/connectors/slack/transform.d.ts.map +1 -0
- package/dist/connectors/slack/transform.js +31 -0
- package/dist/connectors/slack/transform.js.map +1 -0
- package/dist/connectors/slack/types.d.ts +35 -0
- package/dist/connectors/slack/types.d.ts.map +1 -0
- package/dist/connectors/slack/types.js +23 -0
- package/dist/connectors/slack/types.js.map +1 -0
- package/dist/connectors/slack/web-client.d.ts +12 -0
- package/dist/connectors/slack/web-client.d.ts.map +1 -0
- package/dist/connectors/slack/web-client.js +43 -0
- package/dist/connectors/slack/web-client.js.map +1 -0
- package/dist/db.d.ts.map +1 -1
- package/dist/db.js +46 -1
- package/dist/db.js.map +1 -1
- package/dist/importers.js +3 -3
- package/dist/importers.js.map +1 -1
- package/dist/mcp/server.js +1 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +174 -2
- package/dist/server.js.map +1 -1
- package/dist/src/ambient.js +147 -0
- package/dist/src/ambient.js.map +1 -0
- package/dist/src/api.js +343 -0
- package/dist/src/api.js.map +1 -0
- package/dist/src/audit.js +152 -0
- package/dist/src/audit.js.map +1 -0
- package/dist/src/auth.js +65 -0
- package/dist/src/auth.js.map +1 -0
- package/dist/src/autolearn.js +143 -0
- package/dist/src/autolearn.js.map +1 -0
- package/dist/src/capture.js +512 -0
- package/dist/src/capture.js.map +1 -0
- package/dist/src/cli.js +4971 -0
- package/dist/src/cli.js.map +1 -0
- package/dist/src/client.js +181 -0
- package/dist/src/client.js.map +1 -0
- package/dist/src/config.js +108 -0
- package/dist/src/config.js.map +1 -0
- package/dist/src/connectors/slack/backfill.js +76 -0
- package/dist/src/connectors/slack/backfill.js.map +1 -0
- package/dist/src/connectors/slack/deletion.js +46 -0
- package/dist/src/connectors/slack/deletion.js.map +1 -0
- package/dist/src/connectors/slack/dlq.js +23 -0
- package/dist/src/connectors/slack/dlq.js.map +1 -0
- package/dist/src/connectors/slack/idempotency.js +13 -0
- package/dist/src/connectors/slack/idempotency.js.map +1 -0
- package/dist/src/connectors/slack/ingest.js +48 -0
- package/dist/src/connectors/slack/ingest.js.map +1 -0
- package/dist/src/connectors/slack/ratelimit.js +18 -0
- package/dist/src/connectors/slack/ratelimit.js.map +1 -0
- package/dist/src/connectors/slack/scope.js +13 -0
- package/dist/src/connectors/slack/scope.js.map +1 -0
- package/dist/src/connectors/slack/signature.js +20 -0
- package/dist/src/connectors/slack/signature.js.map +1 -0
- package/dist/src/connectors/slack/tenant-routing.js +17 -0
- package/dist/src/connectors/slack/tenant-routing.js.map +1 -0
- package/dist/src/connectors/slack/transform.js +31 -0
- package/dist/src/connectors/slack/transform.js.map +1 -0
- package/dist/src/connectors/slack/types.js +23 -0
- package/dist/src/connectors/slack/types.js.map +1 -0
- package/dist/src/connectors/slack/web-client.js +43 -0
- package/dist/src/connectors/slack/web-client.js.map +1 -0
- package/dist/src/consolidate.js +517 -0
- package/dist/src/consolidate.js.map +1 -0
- package/dist/src/dag.js +104 -0
- package/dist/src/dag.js.map +1 -0
- package/dist/src/dashboard.js +409 -0
- package/dist/src/dashboard.js.map +1 -0
- package/dist/src/db.js +584 -0
- package/dist/src/db.js.map +1 -0
- package/dist/src/embeddings.js +344 -0
- package/dist/src/embeddings.js.map +1 -0
- package/dist/src/eval-suite.js +289 -0
- package/dist/src/eval-suite.js.map +1 -0
- package/dist/src/eval.js +187 -0
- package/dist/src/eval.js.map +1 -0
- package/dist/src/extract.js +87 -0
- package/dist/src/extract.js.map +1 -0
- package/dist/src/handoff.js +30 -0
- package/dist/src/handoff.js.map +1 -0
- package/dist/src/hooks.js +582 -0
- package/dist/src/hooks.js.map +1 -0
- package/dist/src/importers.js +399 -0
- package/dist/src/importers.js.map +1 -0
- package/dist/src/index.js +25 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/invalidation.js +94 -0
- package/dist/src/invalidation.js.map +1 -0
- package/dist/src/mcp/framing.js +45 -0
- package/dist/src/mcp/framing.js.map +1 -0
- package/dist/src/mcp/server.js +510 -0
- package/dist/src/mcp/server.js.map +1 -0
- package/dist/src/memory.js +280 -0
- package/dist/src/memory.js.map +1 -0
- package/dist/src/multihop.js +32 -0
- package/dist/src/multihop.js.map +1 -0
- package/dist/src/path-context.js +32 -0
- package/dist/src/path-context.js.map +1 -0
- package/dist/src/physics-config.js +26 -0
- package/dist/src/physics-config.js.map +1 -0
- package/dist/src/physics-state.js +163 -0
- package/dist/src/physics-state.js.map +1 -0
- package/dist/src/physics.js +361 -0
- package/dist/src/physics.js.map +1 -0
- package/dist/src/postinstall.js +68 -0
- package/dist/src/postinstall.js.map +1 -0
- package/dist/src/raw-archive.js +72 -0
- package/dist/src/raw-archive.js.map +1 -0
- package/dist/src/refine-llm.js +147 -0
- package/dist/src/refine-llm.js.map +1 -0
- package/dist/src/replay.js +117 -0
- package/dist/src/replay.js.map +1 -0
- package/dist/src/salience.js +74 -0
- package/dist/src/salience.js.map +1 -0
- package/dist/src/scheduler.js +67 -0
- package/dist/src/scheduler.js.map +1 -0
- package/dist/src/scope.js +35 -0
- package/dist/src/scope.js.map +1 -0
- package/dist/src/search.js +801 -0
- package/dist/src/search.js.map +1 -0
- package/dist/src/server-detect.js +70 -0
- package/dist/src/server-detect.js.map +1 -0
- package/dist/src/server.js +784 -0
- package/dist/src/server.js.map +1 -0
- package/dist/src/shared.js +309 -0
- package/dist/src/shared.js.map +1 -0
- package/dist/src/sso.js +22 -0
- package/dist/src/sso.js.map +1 -0
- package/dist/src/store.js +1390 -0
- package/dist/src/store.js.map +1 -0
- package/dist/src/tenant.js +17 -0
- package/dist/src/tenant.js.map +1 -0
- package/dist/src/trace.js +64 -0
- package/dist/src/trace.js.map +1 -0
- package/dist/src/working-memory.js +149 -0
- package/dist/src/working-memory.js.map +1 -0
- package/dist/src/yaml.js +98 -0
- package/dist/src/yaml.js.map +1 -0
- package/dist/store.d.ts +9 -1
- package/dist/store.d.ts.map +1 -1
- package/dist/store.js +30 -2
- package/dist/store.js.map +1 -1
- package/extensions/openclaw-plugin/openclaw.plugin.json +1 -1
- package/extensions/openclaw-plugin/package.json +1 -1
- package/openclaw.plugin.json +1 -1
- package/package.json +2 -2
- package/dist/import.d.ts +0 -31
- package/dist/import.d.ts.map +0 -1
- package/dist/import.js +0 -307
- package/dist/import.js.map +0 -1
|
@@ -0,0 +1,801 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BM25 search + optional embedding hybrid search for Hippo.
|
|
3
|
+
* Zero external dependencies when embeddings are not available.
|
|
4
|
+
*/
|
|
5
|
+
import { calculateStrength } from './memory.js';
|
|
6
|
+
import { extractPathTags, pathOverlapScore } from './path-context.js';
|
|
7
|
+
import { detectScope, scopeMatch } from './scope.js';
|
|
8
|
+
import { isEmbeddingAvailable, getEmbedding, cosineSimilarity, embeddingModelRequiresReindex, loadEmbeddingIndex, resolveEmbeddingModel, } from './embeddings.js';
|
|
9
|
+
import { physicsScore as computePhysicsScores } from './physics.js';
|
|
10
|
+
import { DEFAULT_PHYSICS_CONFIG } from './physics-config.js';
|
|
11
|
+
import { loadPhysicsState } from './physics-state.js';
|
|
12
|
+
import { openHippoDb, closeHippoDb } from './db.js';
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
// Tokenizer
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
export function tokenize(text) {
|
|
17
|
+
return text
|
|
18
|
+
.toLowerCase()
|
|
19
|
+
.replace(/[^\w\s]/g, ' ')
|
|
20
|
+
.split(/\s+/)
|
|
21
|
+
.filter((t) => t.length > 1);
|
|
22
|
+
}
|
|
23
|
+
const BM25_K1 = 1.5;
|
|
24
|
+
const BM25_B = 0.75;
|
|
25
|
+
export function buildCorpus(texts) {
|
|
26
|
+
const docs = texts.map(tokenize);
|
|
27
|
+
const N = docs.length;
|
|
28
|
+
const df = new Map();
|
|
29
|
+
let totalLen = 0;
|
|
30
|
+
for (const doc of docs) {
|
|
31
|
+
totalLen += doc.length;
|
|
32
|
+
const seen = new Set();
|
|
33
|
+
for (const term of doc) {
|
|
34
|
+
if (!seen.has(term)) {
|
|
35
|
+
df.set(term, (df.get(term) ?? 0) + 1);
|
|
36
|
+
seen.add(term);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
const avgLen = N > 0 ? totalLen / N : 1;
|
|
41
|
+
return { docs, avgLen, df, N };
|
|
42
|
+
}
|
|
43
|
+
function bm25Score(corpus, docIdx, queryTerms) {
|
|
44
|
+
const doc = corpus.docs[docIdx];
|
|
45
|
+
const docLen = doc.length;
|
|
46
|
+
let score = 0;
|
|
47
|
+
// Term frequency map for this doc
|
|
48
|
+
const tf = new Map();
|
|
49
|
+
for (const t of doc)
|
|
50
|
+
tf.set(t, (tf.get(t) ?? 0) + 1);
|
|
51
|
+
for (const term of queryTerms) {
|
|
52
|
+
const f = tf.get(term) ?? 0;
|
|
53
|
+
if (f === 0)
|
|
54
|
+
continue;
|
|
55
|
+
const df = corpus.df.get(term) ?? 0;
|
|
56
|
+
const idf = Math.log((corpus.N - df + 0.5) / (df + 0.5) + 1);
|
|
57
|
+
const numerator = f * (BM25_K1 + 1);
|
|
58
|
+
const denominator = f + BM25_K1 * (1 - BM25_B + BM25_B * (docLen / corpus.avgLen));
|
|
59
|
+
score += idf * (numerator / denominator);
|
|
60
|
+
}
|
|
61
|
+
return score;
|
|
62
|
+
}
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
// Token budget estimation
|
|
65
|
+
// ---------------------------------------------------------------------------
|
|
66
|
+
/**
|
|
67
|
+
* Rough token estimate: characters / 4 (works well for English text).
|
|
68
|
+
*/
|
|
69
|
+
export function estimateTokens(text) {
|
|
70
|
+
return Math.ceil(text.length / 4);
|
|
71
|
+
}
|
|
72
|
+
// ---------------------------------------------------------------------------
|
|
73
|
+
// Recency boost
|
|
74
|
+
// ---------------------------------------------------------------------------
|
|
75
|
+
function recencyBoost(entry, now) {
|
|
76
|
+
const created = new Date(entry.created);
|
|
77
|
+
const ageDays = (now.getTime() - created.getTime()) / (1000 * 60 * 60 * 24);
|
|
78
|
+
// Exponential decay: memories < 1 day get boost ~1.0, older get less
|
|
79
|
+
return Math.exp(-ageDays / 30);
|
|
80
|
+
}
|
|
81
|
+
// ---------------------------------------------------------------------------
|
|
82
|
+
// Temporal-aware scoring
|
|
83
|
+
// ---------------------------------------------------------------------------
|
|
84
|
+
const TEMPORAL_RECENT_CUES = new Set(['recently', 'latest', 'last', 'newest', 'current', 'today']);
|
|
85
|
+
const TEMPORAL_OLDEST_CUES = new Set(['first', 'earliest', 'oldest', 'initially', 'originally']);
|
|
86
|
+
export function detectTemporalDirection(query) {
|
|
87
|
+
const words = query.toLowerCase().split(/\s+/);
|
|
88
|
+
for (const w of words) {
|
|
89
|
+
if (TEMPORAL_RECENT_CUES.has(w))
|
|
90
|
+
return 'recent';
|
|
91
|
+
if (TEMPORAL_OLDEST_CUES.has(w))
|
|
92
|
+
return 'oldest';
|
|
93
|
+
}
|
|
94
|
+
return null;
|
|
95
|
+
}
|
|
96
|
+
export function computeTemporalRange(entries) {
|
|
97
|
+
let minTime = Infinity;
|
|
98
|
+
let maxTime = -Infinity;
|
|
99
|
+
for (const e of entries) {
|
|
100
|
+
const t = new Date(e.created).getTime();
|
|
101
|
+
if (t < minTime)
|
|
102
|
+
minTime = t;
|
|
103
|
+
if (t > maxTime)
|
|
104
|
+
maxTime = t;
|
|
105
|
+
}
|
|
106
|
+
return { minTime, maxTime };
|
|
107
|
+
}
|
|
108
|
+
export function temporalBoost(entry, direction, range) {
|
|
109
|
+
if (!direction)
|
|
110
|
+
return 1.0;
|
|
111
|
+
const span = range.maxTime - range.minTime;
|
|
112
|
+
if (span === 0)
|
|
113
|
+
return 1.0;
|
|
114
|
+
const entryTime = new Date(entry.created).getTime();
|
|
115
|
+
const normalized = (entryTime - range.minTime) / span;
|
|
116
|
+
if (direction === 'recent') {
|
|
117
|
+
return 0.8 + 0.4 * normalized;
|
|
118
|
+
}
|
|
119
|
+
else {
|
|
120
|
+
return 0.8 + 0.4 * (1 - normalized);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Hybrid search: BM25 + cosine similarity (when embeddings are available).
|
|
125
|
+
* score = 0.4 * bm25_norm + 0.6 * cosine_sim (with embeddings)
|
|
126
|
+
* score = bm25_norm * strength * recency (BM25-only fallback)
|
|
127
|
+
*
|
|
128
|
+
* embeddingWeight: weight for the cosine similarity component (0.0 to 1.0).
|
|
129
|
+
*/
|
|
130
|
+
export async function hybridSearch(query, entries, options = {}) {
|
|
131
|
+
const now = options.now ?? new Date();
|
|
132
|
+
const budget = options.budget ?? 4000;
|
|
133
|
+
const minResults = options.minResults ?? 1;
|
|
134
|
+
const embeddingWeight = options.embeddingWeight ?? 0.6;
|
|
135
|
+
const bm25Weight = 1 - embeddingWeight;
|
|
136
|
+
const scoringMode = options.scoring ?? 'blend';
|
|
137
|
+
const explain = options.explain ?? false;
|
|
138
|
+
const mmrEnabled = options.mmr ?? true;
|
|
139
|
+
const mmrLambda = options.mmrLambda ?? 0.7;
|
|
140
|
+
// Bi-temporal filtering
|
|
141
|
+
if (options.asOf) {
|
|
142
|
+
const asOfDate = new Date(options.asOf);
|
|
143
|
+
const successorValidFrom = new Map();
|
|
144
|
+
for (const e of entries) {
|
|
145
|
+
if (e.superseded_by) {
|
|
146
|
+
const successor = entries.find(s => s.id === e.superseded_by);
|
|
147
|
+
if (successor)
|
|
148
|
+
successorValidFrom.set(e.id, successor.valid_from);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
entries = entries.filter(e => {
|
|
152
|
+
if (new Date(e.valid_from) > asOfDate)
|
|
153
|
+
return false;
|
|
154
|
+
if (!e.superseded_by)
|
|
155
|
+
return true;
|
|
156
|
+
const succVf = successorValidFrom.get(e.id);
|
|
157
|
+
return succVf ? new Date(succVf) > asOfDate : true;
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
else if (!options.includeSuperseded) {
|
|
161
|
+
entries = entries.filter(e => !e.superseded_by);
|
|
162
|
+
}
|
|
163
|
+
if (entries.length === 0)
|
|
164
|
+
return [];
|
|
165
|
+
const queryTerms = tokenize(query);
|
|
166
|
+
if (queryTerms.length === 0)
|
|
167
|
+
return [];
|
|
168
|
+
// Build BM25 corpus (or reuse one the caller already built).
|
|
169
|
+
const corpus = options.preparedCorpus
|
|
170
|
+
?? buildCorpus(entries.map((e) => `${e.content} ${e.tags.join(' ')}`));
|
|
171
|
+
// Score all entries with BM25
|
|
172
|
+
const bm25Scores = entries.map((_, i) => bm25Score(corpus, i, queryTerms));
|
|
173
|
+
const maxBm25 = bm25Scores.reduce((a, b) => Math.max(a, b), 1e-9);
|
|
174
|
+
// Try to get embedding scores if available
|
|
175
|
+
let useEmbeddings = false;
|
|
176
|
+
let embeddingIndex = {};
|
|
177
|
+
let queryVector = [];
|
|
178
|
+
if (isEmbeddingAvailable() && options.hippoRoot) {
|
|
179
|
+
try {
|
|
180
|
+
const model = resolveEmbeddingModel(options.hippoRoot);
|
|
181
|
+
if (!embeddingModelRequiresReindex(options.hippoRoot, model)) {
|
|
182
|
+
queryVector = await getEmbedding(query, model);
|
|
183
|
+
if (queryVector.length > 0) {
|
|
184
|
+
embeddingIndex = loadEmbeddingIndex(options.hippoRoot);
|
|
185
|
+
useEmbeddings = true;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
catch {
|
|
190
|
+
// Fall through to BM25-only
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
// Compute cosine similarities for RRF ranking (need all before scoring)
|
|
194
|
+
const cosineScores = new Array(entries.length).fill(0);
|
|
195
|
+
const hadCachedVecs = new Array(entries.length).fill(false);
|
|
196
|
+
if (useEmbeddings) {
|
|
197
|
+
for (let i = 0; i < entries.length; i++) {
|
|
198
|
+
const cached = embeddingIndex[entries[i].id];
|
|
199
|
+
hadCachedVecs[i] = Boolean(cached && queryVector.length > 0);
|
|
200
|
+
cosineScores[i] = hadCachedVecs[i]
|
|
201
|
+
? Math.max(0, cosineSimilarity(queryVector, cached))
|
|
202
|
+
: 0;
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
// For RRF: build rank maps from BM25 and cosine orderings
|
|
206
|
+
let rrfScores = null;
|
|
207
|
+
if (useEmbeddings && scoringMode === 'rrf') {
|
|
208
|
+
const RRF_K = 60;
|
|
209
|
+
const bm25Ranked = entries.map((_, i) => i).filter(i => bm25Scores[i] > 0 || cosineScores[i] > 0);
|
|
210
|
+
bm25Ranked.sort((a, b) => bm25Scores[b] - bm25Scores[a]);
|
|
211
|
+
const cosineRanked = entries.map((_, i) => i).filter(i => bm25Scores[i] > 0 || cosineScores[i] > 0);
|
|
212
|
+
cosineRanked.sort((a, b) => cosineScores[b] - cosineScores[a]);
|
|
213
|
+
const bm25RankMap = new Map();
|
|
214
|
+
bm25Ranked.forEach((idx, rank) => bm25RankMap.set(idx, rank + 1));
|
|
215
|
+
const cosineRankMap = new Map();
|
|
216
|
+
cosineRanked.forEach((idx, rank) => cosineRankMap.set(idx, rank + 1));
|
|
217
|
+
rrfScores = new Map();
|
|
218
|
+
const allCandidates = new Set([...bm25Ranked, ...cosineRanked]);
|
|
219
|
+
for (const idx of allCandidates) {
|
|
220
|
+
const bm25Rank = bm25RankMap.get(idx) ?? (entries.length + 1);
|
|
221
|
+
const cosineRank = cosineRankMap.get(idx) ?? (entries.length + 1);
|
|
222
|
+
rrfScores.set(idx, bm25Weight / (RRF_K + bm25Rank) + embeddingWeight / (RRF_K + cosineRank));
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
// Score each entry
|
|
226
|
+
const scored = [];
|
|
227
|
+
const currentPathTags = extractPathTags(process.cwd());
|
|
228
|
+
const activeScope = options.scope !== undefined ? options.scope : detectScope();
|
|
229
|
+
const queryTermSet = new Set(queryTerms);
|
|
230
|
+
const temporalDirAsync = detectTemporalDirection(query);
|
|
231
|
+
const temporalRangeAsync = temporalDirAsync ? computeTemporalRange(entries) : { minTime: 0, maxTime: 0 };
|
|
232
|
+
for (let i = 0; i < entries.length; i++) {
|
|
233
|
+
const rawBm25 = bm25Scores[i];
|
|
234
|
+
const cosineScore = cosineScores[i];
|
|
235
|
+
const hadCachedVec = hadCachedVecs[i];
|
|
236
|
+
if (!useEmbeddings && rawBm25 <= 0)
|
|
237
|
+
continue;
|
|
238
|
+
const normBm25 = rawBm25 / maxBm25;
|
|
239
|
+
const strength = calculateStrength(entries[i], now);
|
|
240
|
+
const recency = recencyBoost(entries[i], now);
|
|
241
|
+
const strengthMultiplier = 0.5 + 0.5 * strength;
|
|
242
|
+
const recencyMultiplier = 0.8 + 0.2 * recency;
|
|
243
|
+
let compositeScore;
|
|
244
|
+
let base;
|
|
245
|
+
let modeLabel;
|
|
246
|
+
if (useEmbeddings) {
|
|
247
|
+
if (rrfScores) {
|
|
248
|
+
base = rrfScores.get(i) ?? 0;
|
|
249
|
+
}
|
|
250
|
+
else {
|
|
251
|
+
base = bm25Weight * normBm25 + embeddingWeight * cosineScore;
|
|
252
|
+
}
|
|
253
|
+
compositeScore = base * strengthMultiplier * recencyMultiplier;
|
|
254
|
+
modeLabel = hadCachedVec ? 'hybrid' : 'hybrid-no-vec';
|
|
255
|
+
}
|
|
256
|
+
else {
|
|
257
|
+
base = queryTerms.length > 0 ? rawBm25 / queryTerms.length : rawBm25;
|
|
258
|
+
compositeScore = base * strengthMultiplier * recencyMultiplier;
|
|
259
|
+
modeLabel = 'bm25-only';
|
|
260
|
+
}
|
|
261
|
+
// Decision-tagged memories get a 1.2x recall boost
|
|
262
|
+
const decisionBoost = entries[i].tags.includes('decision') ? 1.2 : 1.0;
|
|
263
|
+
compositeScore *= decisionBoost;
|
|
264
|
+
// Path-based boost: memories tagged with matching path segments get up to 1.3x
|
|
265
|
+
const memPathTags = entries[i].tags.filter(t => t.startsWith('path:'));
|
|
266
|
+
const pathScore = pathOverlapScore(memPathTags, currentPathTags);
|
|
267
|
+
const pathBoost = 1.0 + (pathScore * 0.3);
|
|
268
|
+
compositeScore *= pathBoost;
|
|
269
|
+
// Retrieval-time outcome personalization: nudge up/down from user feedback.
|
|
270
|
+
// Distinct from reward-factor-via-strength (slow); this is immediate.
|
|
271
|
+
const pos = entries[i].outcome_positive ?? 0;
|
|
272
|
+
const neg = entries[i].outcome_negative ?? 0;
|
|
273
|
+
const outcomeBoost = pos === 0 && neg === 0
|
|
274
|
+
? 1.0
|
|
275
|
+
: Math.max(0.85, Math.min(1.15, 1 + 0.15 * Math.tanh((pos - neg) / 2)));
|
|
276
|
+
compositeScore *= outcomeBoost;
|
|
277
|
+
// Scope boost: memories tagged with the active scope get 1.5x; mismatching scopes get 0.5x
|
|
278
|
+
const scopeSignal = scopeMatch(entries[i].tags, activeScope);
|
|
279
|
+
const scopeBoost = scopeSignal === 1 ? 1.5 : scopeSignal === -1 ? 0.5 : 1.0;
|
|
280
|
+
compositeScore *= scopeBoost;
|
|
281
|
+
const extractionBoost = entries[i].tags.includes('extracted') ? 1.3 : 1.0;
|
|
282
|
+
compositeScore *= extractionBoost;
|
|
283
|
+
compositeScore *= temporalBoost(entries[i], temporalDirAsync, temporalRangeAsync);
|
|
284
|
+
if (compositeScore <= 0)
|
|
285
|
+
continue;
|
|
286
|
+
const tokens = estimateTokens(entries[i].content);
|
|
287
|
+
const result = {
|
|
288
|
+
entry: entries[i],
|
|
289
|
+
score: compositeScore,
|
|
290
|
+
bm25: rawBm25,
|
|
291
|
+
cosine: cosineScore,
|
|
292
|
+
tokens,
|
|
293
|
+
};
|
|
294
|
+
if (explain) {
|
|
295
|
+
const docTerms = new Set(tokenize(`${entries[i].content} ${entries[i].tags.join(' ')}`));
|
|
296
|
+
const matchedTerms = [];
|
|
297
|
+
for (const t of queryTermSet)
|
|
298
|
+
if (docTerms.has(t))
|
|
299
|
+
matchedTerms.push(t);
|
|
300
|
+
const ageDays = Math.max(0, Math.floor((now.getTime() - new Date(entries[i].created).getTime()) / 86_400_000));
|
|
301
|
+
result.breakdown = {
|
|
302
|
+
mode: modeLabel,
|
|
303
|
+
normBm25,
|
|
304
|
+
bm25Weight: useEmbeddings ? bm25Weight : 1,
|
|
305
|
+
embeddingWeight: useEmbeddings ? embeddingWeight : 0,
|
|
306
|
+
cosine: cosineScore,
|
|
307
|
+
base,
|
|
308
|
+
strengthMultiplier,
|
|
309
|
+
recencyMultiplier,
|
|
310
|
+
decisionBoost,
|
|
311
|
+
pathBoost,
|
|
312
|
+
scopeBoost,
|
|
313
|
+
sourceBump: 1,
|
|
314
|
+
outcomeBoost,
|
|
315
|
+
matchedTerms,
|
|
316
|
+
final: compositeScore,
|
|
317
|
+
ageDays,
|
|
318
|
+
};
|
|
319
|
+
}
|
|
320
|
+
scored.push(result);
|
|
321
|
+
}
|
|
322
|
+
// Sort by composite score descending
|
|
323
|
+
scored.sort((a, b) => b.score - a.score);
|
|
324
|
+
// Deduplicate: when an extracted fact and its source both appear,
|
|
325
|
+
// keep only the higher-scoring one (typically the fact).
|
|
326
|
+
const seenExtractedFrom = new Set();
|
|
327
|
+
const deduped = [];
|
|
328
|
+
for (const result of scored) {
|
|
329
|
+
const entry = result.entry;
|
|
330
|
+
if (entry.extracted_from) {
|
|
331
|
+
seenExtractedFrom.add(entry.extracted_from);
|
|
332
|
+
const sourceIdx = deduped.findIndex((d) => d.entry.id === entry.extracted_from);
|
|
333
|
+
if (sourceIdx >= 0)
|
|
334
|
+
deduped.splice(sourceIdx, 1);
|
|
335
|
+
deduped.push(result);
|
|
336
|
+
}
|
|
337
|
+
else if (seenExtractedFrom.has(entry.id)) {
|
|
338
|
+
continue;
|
|
339
|
+
}
|
|
340
|
+
else {
|
|
341
|
+
deduped.push(result);
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
const scoredDeduped = deduped;
|
|
345
|
+
// DAG drill-down: when a summary node matches, inject its children
|
|
346
|
+
const summaryIdsAsync = scoredDeduped
|
|
347
|
+
.filter((r) => r.entry.tags.includes('dag-summary'))
|
|
348
|
+
.map((r) => r.entry.id);
|
|
349
|
+
if (summaryIdsAsync.length > 0) {
|
|
350
|
+
const childEntriesAsync = entries.filter((e) => e.dag_parent_id && summaryIdsAsync.includes(e.dag_parent_id));
|
|
351
|
+
for (const child of childEntriesAsync) {
|
|
352
|
+
if (!scoredDeduped.some((r) => r.entry.id === child.id)) {
|
|
353
|
+
const parentResult = scoredDeduped.find((r) => r.entry.id === child.dag_parent_id);
|
|
354
|
+
const childScore = parentResult ? parentResult.score * 0.9 : 0;
|
|
355
|
+
scoredDeduped.push({
|
|
356
|
+
entry: child,
|
|
357
|
+
score: childScore,
|
|
358
|
+
bm25: 0,
|
|
359
|
+
cosine: 0,
|
|
360
|
+
tokens: estimateTokens(child.content),
|
|
361
|
+
});
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
scoredDeduped.sort((a, b) => b.score - a.score);
|
|
365
|
+
}
|
|
366
|
+
// MMR re-ranking: de-cluster near-duplicates by trading relevance for
|
|
367
|
+
// diversity. Only applies when embeddings are loaded (doc-to-doc similarity
|
|
368
|
+
// is via cosine of cached vectors); otherwise we return the pure-relevance
|
|
369
|
+
// ordering unchanged.
|
|
370
|
+
//
|
|
371
|
+
// MMR is O(K^2) in cosine similarity ops, which on large corpora (1000+
|
|
372
|
+
// candidates) dominates query time. Cap the re-ranking window to the top
|
|
373
|
+
// relevance-scored candidates — anything below top-K was never going to
|
|
374
|
+
// surface anyway after budget filtering.
|
|
375
|
+
const MMR_CANDIDATE_CAP = 100;
|
|
376
|
+
const applyMmr = mmrEnabled && useEmbeddings && scoredDeduped.length > 1 && mmrLambda < 1;
|
|
377
|
+
let ordered;
|
|
378
|
+
if (applyMmr) {
|
|
379
|
+
const head = scoredDeduped.slice(0, MMR_CANDIDATE_CAP);
|
|
380
|
+
const tail = scoredDeduped.slice(MMR_CANDIDATE_CAP);
|
|
381
|
+
ordered = [...mmrRerank(head, embeddingIndex, mmrLambda, explain), ...tail];
|
|
382
|
+
}
|
|
383
|
+
else {
|
|
384
|
+
ordered = scoredDeduped;
|
|
385
|
+
}
|
|
386
|
+
// Apply token budget (guarantee at least minResults items)
|
|
387
|
+
const results = [];
|
|
388
|
+
let usedTokens = 0;
|
|
389
|
+
for (let i = 0; i < ordered.length; i++) {
|
|
390
|
+
const tokens = ordered[i].tokens;
|
|
391
|
+
if (results.length >= minResults && usedTokens + tokens > budget)
|
|
392
|
+
continue;
|
|
393
|
+
usedTokens += tokens;
|
|
394
|
+
results.push(ordered[i]);
|
|
395
|
+
}
|
|
396
|
+
return results;
|
|
397
|
+
}
|
|
398
|
+
/**
|
|
399
|
+
* MMR (Maximal Marginal Relevance) re-ranking.
|
|
400
|
+
*
|
|
401
|
+
* Iteratively picks the candidate that maximises
|
|
402
|
+
* lambda * relevance - (1 - lambda) * max(cos(cand, picked))
|
|
403
|
+
*
|
|
404
|
+
* Inputs must already be sorted by relevance descending. When `explain` is
|
|
405
|
+
* true, attaches `preMmrRank` / `postMmrRank` to each result's breakdown.
|
|
406
|
+
* Exported for unit tests; production callers go through hybridSearch.
|
|
407
|
+
*/
|
|
408
|
+
export function mmrRerank(scored, embeddingIndex, lambda, explain) {
|
|
409
|
+
if (scored.length === 0)
|
|
410
|
+
return scored;
|
|
411
|
+
const maxScore = scored[0].score || 1;
|
|
412
|
+
const normScore = scored.map((r) => r.score / maxScore);
|
|
413
|
+
const vectors = scored.map((r) => embeddingIndex[r.entry.id] ?? null);
|
|
414
|
+
const picked = [];
|
|
415
|
+
const remaining = new Set(scored.map((_, i) => i));
|
|
416
|
+
while (remaining.size > 0) {
|
|
417
|
+
let bestIdx = -1;
|
|
418
|
+
let bestMmr = -Infinity;
|
|
419
|
+
for (const i of remaining) {
|
|
420
|
+
const rel = normScore[i];
|
|
421
|
+
let maxSim = 0;
|
|
422
|
+
const vi = vectors[i];
|
|
423
|
+
if (vi) {
|
|
424
|
+
for (const p of picked) {
|
|
425
|
+
const vp = embeddingIndex[p.entry.id];
|
|
426
|
+
if (!vp || vp.length !== vi.length)
|
|
427
|
+
continue;
|
|
428
|
+
const sim = Math.max(0, cosineSimilarity(vi, vp));
|
|
429
|
+
if (sim > maxSim)
|
|
430
|
+
maxSim = sim;
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
const mmr = lambda * rel - (1 - lambda) * maxSim;
|
|
434
|
+
if (mmr > bestMmr) {
|
|
435
|
+
bestMmr = mmr;
|
|
436
|
+
bestIdx = i;
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
if (bestIdx === -1)
|
|
440
|
+
break;
|
|
441
|
+
remaining.delete(bestIdx);
|
|
442
|
+
picked.push(scored[bestIdx]);
|
|
443
|
+
}
|
|
444
|
+
if (explain) {
|
|
445
|
+
const preRank = new Map();
|
|
446
|
+
scored.forEach((r, i) => preRank.set(r.entry.id, i + 1));
|
|
447
|
+
picked.forEach((r, i) => {
|
|
448
|
+
if (r.breakdown) {
|
|
449
|
+
r.breakdown.preMmrRank = preRank.get(r.entry.id);
|
|
450
|
+
r.breakdown.postMmrRank = i + 1;
|
|
451
|
+
}
|
|
452
|
+
});
|
|
453
|
+
}
|
|
454
|
+
return picked;
|
|
455
|
+
}
|
|
456
|
+
/**
|
|
457
|
+
* Physics-based search: scores memories using gravitational force, momentum,
|
|
458
|
+
* and cluster amplification. Falls back to classic hybrid for memories
|
|
459
|
+
* without physics state.
|
|
460
|
+
*/
|
|
461
|
+
export async function physicsSearch(query, entries, options = {}) {
|
|
462
|
+
const now = options.now ?? new Date();
|
|
463
|
+
const budget = options.budget ?? 4000;
|
|
464
|
+
const minResults = options.minResults ?? 1;
|
|
465
|
+
const config = options.physicsConfig ?? DEFAULT_PHYSICS_CONFIG;
|
|
466
|
+
const explain = options.explain ?? false;
|
|
467
|
+
if (entries.length === 0 || !options.hippoRoot)
|
|
468
|
+
return [];
|
|
469
|
+
// Get query embedding (use pre-computed if provided)
|
|
470
|
+
let queryVector = options.queryEmbedding ?? [];
|
|
471
|
+
if (queryVector.length === 0) {
|
|
472
|
+
if (!isEmbeddingAvailable()) {
|
|
473
|
+
return hybridSearch(query, entries, options);
|
|
474
|
+
}
|
|
475
|
+
const model = resolveEmbeddingModel(options.hippoRoot);
|
|
476
|
+
if (embeddingModelRequiresReindex(options.hippoRoot, model)) {
|
|
477
|
+
return hybridSearch(query, entries, options);
|
|
478
|
+
}
|
|
479
|
+
queryVector = await getEmbedding(query, model);
|
|
480
|
+
if (queryVector.length === 0) {
|
|
481
|
+
return hybridSearch(query, entries, options);
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
// Load physics state
|
|
485
|
+
let physicsMap;
|
|
486
|
+
try {
|
|
487
|
+
const db = openHippoDb(options.hippoRoot);
|
|
488
|
+
try {
|
|
489
|
+
physicsMap = loadPhysicsState(db);
|
|
490
|
+
}
|
|
491
|
+
finally {
|
|
492
|
+
closeHippoDb(db);
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
catch {
|
|
496
|
+
return hybridSearch(query, entries, options);
|
|
497
|
+
}
|
|
498
|
+
// Split entries into physics-enabled and classic
|
|
499
|
+
const physicsEntries = [];
|
|
500
|
+
const physicsParticles = [];
|
|
501
|
+
const classicEntries = [];
|
|
502
|
+
for (const entry of entries) {
|
|
503
|
+
const particle = physicsMap.get(entry.id);
|
|
504
|
+
if (particle
|
|
505
|
+
&& particle.position.length > 0
|
|
506
|
+
&& particle.position.length === queryVector.length
|
|
507
|
+
&& particle.velocity.length === queryVector.length) {
|
|
508
|
+
physicsEntries.push(entry);
|
|
509
|
+
physicsParticles.push(particle);
|
|
510
|
+
}
|
|
511
|
+
else {
|
|
512
|
+
classicEntries.push(entry);
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
// Score physics-enabled memories
|
|
516
|
+
const physicsResults = [];
|
|
517
|
+
if (physicsParticles.length > 0) {
|
|
518
|
+
const scored = computePhysicsScores(physicsParticles, queryVector, config);
|
|
519
|
+
const entryMap = new Map(physicsEntries.map(e => [e.id, e]));
|
|
520
|
+
for (const s of scored) {
|
|
521
|
+
if (s.finalScore <= 0)
|
|
522
|
+
continue;
|
|
523
|
+
const entry = entryMap.get(s.memoryId);
|
|
524
|
+
if (!entry)
|
|
525
|
+
continue;
|
|
526
|
+
const result = {
|
|
527
|
+
entry,
|
|
528
|
+
score: s.finalScore,
|
|
529
|
+
bm25: 0,
|
|
530
|
+
cosine: s.baseScore,
|
|
531
|
+
tokens: estimateTokens(entry.content),
|
|
532
|
+
};
|
|
533
|
+
if (explain) {
|
|
534
|
+
const ageDays = Math.max(0, Math.floor((now.getTime() - new Date(entry.created).getTime()) / 86_400_000));
|
|
535
|
+
result.breakdown = {
|
|
536
|
+
mode: 'physics',
|
|
537
|
+
normBm25: 0,
|
|
538
|
+
bm25Weight: 0,
|
|
539
|
+
embeddingWeight: 1,
|
|
540
|
+
cosine: s.baseScore,
|
|
541
|
+
base: s.baseScore,
|
|
542
|
+
strengthMultiplier: 1,
|
|
543
|
+
recencyMultiplier: 1,
|
|
544
|
+
decisionBoost: 1,
|
|
545
|
+
pathBoost: 1,
|
|
546
|
+
scopeBoost: 1,
|
|
547
|
+
sourceBump: 1,
|
|
548
|
+
outcomeBoost: 1,
|
|
549
|
+
matchedTerms: [],
|
|
550
|
+
final: s.finalScore,
|
|
551
|
+
ageDays,
|
|
552
|
+
};
|
|
553
|
+
}
|
|
554
|
+
physicsResults.push(result);
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
// Score classic memories (no physics state)
|
|
558
|
+
const classicResults = classicEntries.length > 0
|
|
559
|
+
? await hybridSearch(query, classicEntries, { ...options, budget: Infinity, explain })
|
|
560
|
+
: [];
|
|
561
|
+
// Normalize both pools to [0, 1] and merge
|
|
562
|
+
const merged = mergeScorePools(physicsResults, classicResults);
|
|
563
|
+
// Sort and apply budget
|
|
564
|
+
merged.sort((a, b) => b.score - a.score);
|
|
565
|
+
const results = [];
|
|
566
|
+
let usedTokens = 0;
|
|
567
|
+
for (let i = 0; i < merged.length; i++) {
|
|
568
|
+
const tokens = merged[i].tokens;
|
|
569
|
+
if (results.length >= minResults && usedTokens + tokens > budget)
|
|
570
|
+
continue;
|
|
571
|
+
usedTokens += tokens;
|
|
572
|
+
results.push(merged[i]);
|
|
573
|
+
}
|
|
574
|
+
return results;
|
|
575
|
+
}
|
|
576
|
+
/** Normalize two score pools to [0,1] and combine. */
|
|
577
|
+
function mergeScorePools(poolA, poolB) {
|
|
578
|
+
const maxA = poolA.reduce((m, r) => Math.max(m, r.score), 1e-9);
|
|
579
|
+
const maxB = poolB.reduce((m, r) => Math.max(m, r.score), 1e-9);
|
|
580
|
+
const merged = [];
|
|
581
|
+
for (const r of poolA) {
|
|
582
|
+
merged.push({ ...r, score: r.score / maxA });
|
|
583
|
+
}
|
|
584
|
+
for (const r of poolB) {
|
|
585
|
+
merged.push({ ...r, score: r.score / maxB });
|
|
586
|
+
}
|
|
587
|
+
return merged;
|
|
588
|
+
}
|
|
589
|
+
/**
|
|
590
|
+
* Search entries using BM25 + strength + recency composite score.
|
|
591
|
+
* When embeddings are available and hippoRoot is provided, uses hybrid scoring.
|
|
592
|
+
* Returns results sorted by score, capped at token budget.
|
|
593
|
+
*
|
|
594
|
+
* Also updates retrieval metadata on returned entries (side effect: caller
|
|
595
|
+
* must persist the updated entries).
|
|
596
|
+
*/
|
|
597
|
+
export function search(query, entries, options = {}) {
|
|
598
|
+
// Synchronous path: BM25 only (no async hybrid)
|
|
599
|
+
const now = options.now ?? new Date();
|
|
600
|
+
const budget = options.budget ?? 4000;
|
|
601
|
+
const minResults = options.minResults ?? 1;
|
|
602
|
+
// Bi-temporal filtering
|
|
603
|
+
if (options.asOf) {
|
|
604
|
+
const asOfDate = new Date(options.asOf);
|
|
605
|
+
const successorValidFrom = new Map();
|
|
606
|
+
for (const e of entries) {
|
|
607
|
+
if (e.superseded_by) {
|
|
608
|
+
const successor = entries.find(s => s.id === e.superseded_by);
|
|
609
|
+
if (successor)
|
|
610
|
+
successorValidFrom.set(e.id, successor.valid_from);
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
entries = entries.filter(e => {
|
|
614
|
+
if (new Date(e.valid_from) > asOfDate)
|
|
615
|
+
return false;
|
|
616
|
+
if (!e.superseded_by)
|
|
617
|
+
return true;
|
|
618
|
+
const succVf = successorValidFrom.get(e.id);
|
|
619
|
+
return succVf ? new Date(succVf) > asOfDate : true;
|
|
620
|
+
});
|
|
621
|
+
}
|
|
622
|
+
else if (!options.includeSuperseded) {
|
|
623
|
+
entries = entries.filter(e => !e.superseded_by);
|
|
624
|
+
}
|
|
625
|
+
if (entries.length === 0)
|
|
626
|
+
return [];
|
|
627
|
+
const queryTerms = tokenize(query);
|
|
628
|
+
if (queryTerms.length === 0)
|
|
629
|
+
return [];
|
|
630
|
+
// Build corpus from all entries (content + tags joined)
|
|
631
|
+
const texts = entries.map((e) => `${e.content} ${e.tags.join(' ')}`);
|
|
632
|
+
const corpus = buildCorpus(texts);
|
|
633
|
+
// Score each entry
|
|
634
|
+
const scored = [];
|
|
635
|
+
const currentPathTagsSync = extractPathTags(process.cwd());
|
|
636
|
+
const activeScopeSync = detectScope();
|
|
637
|
+
const temporalDir = detectTemporalDirection(query);
|
|
638
|
+
const temporalRangeSync = temporalDir ? computeTemporalRange(entries) : { minTime: 0, maxTime: 0 };
|
|
639
|
+
for (let i = 0; i < entries.length; i++) {
|
|
640
|
+
const bm25 = bm25Score(corpus, i, queryTerms);
|
|
641
|
+
if (bm25 <= 0)
|
|
642
|
+
continue;
|
|
643
|
+
const strength = calculateStrength(entries[i], now);
|
|
644
|
+
const recency = recencyBoost(entries[i], now);
|
|
645
|
+
// Composite: BM25 relevance * strength * recency
|
|
646
|
+
// Normalise BM25 against query term count to keep scale consistent
|
|
647
|
+
const normBm25 = queryTerms.length > 0 ? bm25 / queryTerms.length : bm25;
|
|
648
|
+
let composite = normBm25 * (0.5 + 0.5 * strength) * (0.8 + 0.2 * recency);
|
|
649
|
+
// Decision-tagged memories get a 1.2x recall boost
|
|
650
|
+
const decisionBoost = entries[i].tags.includes('decision') ? 1.2 : 1.0;
|
|
651
|
+
composite *= decisionBoost;
|
|
652
|
+
// Path-based boost: memories tagged with matching path segments get up to 1.3x
|
|
653
|
+
const memPathTagsSync = entries[i].tags.filter(t => t.startsWith('path:'));
|
|
654
|
+
const pathScoreSync = pathOverlapScore(memPathTagsSync, currentPathTagsSync);
|
|
655
|
+
const pathBoostSync = 1.0 + (pathScoreSync * 0.3);
|
|
656
|
+
composite *= pathBoostSync;
|
|
657
|
+
// Scope boost (sync path)
|
|
658
|
+
const scopeSignalSync = scopeMatch(entries[i].tags, activeScopeSync);
|
|
659
|
+
const scopeBoostSync = scopeSignalSync === 1 ? 1.5 : scopeSignalSync === -1 ? 0.5 : 1.0;
|
|
660
|
+
composite *= scopeBoostSync;
|
|
661
|
+
const extractionBoostSync = entries[i].tags.includes('extracted') ? 1.3 : 1.0;
|
|
662
|
+
composite *= extractionBoostSync;
|
|
663
|
+
composite *= temporalBoost(entries[i], temporalDir, temporalRangeSync);
|
|
664
|
+
const tokens = estimateTokens(entries[i].content);
|
|
665
|
+
scored.push({ entry: entries[i], score: composite, bm25, cosine: 0, tokens });
|
|
666
|
+
}
|
|
667
|
+
// Sort by composite score descending
|
|
668
|
+
scored.sort((a, b) => b.score - a.score);
|
|
669
|
+
const seenExtractedFromSync = new Set();
|
|
670
|
+
const dedupedSync = [];
|
|
671
|
+
for (const result of scored) {
|
|
672
|
+
const entry = result.entry;
|
|
673
|
+
if (entry.extracted_from) {
|
|
674
|
+
seenExtractedFromSync.add(entry.extracted_from);
|
|
675
|
+
const sourceIdx = dedupedSync.findIndex((d) => d.entry.id === entry.extracted_from);
|
|
676
|
+
if (sourceIdx >= 0)
|
|
677
|
+
dedupedSync.splice(sourceIdx, 1);
|
|
678
|
+
dedupedSync.push(result);
|
|
679
|
+
}
|
|
680
|
+
else if (seenExtractedFromSync.has(entry.id)) {
|
|
681
|
+
continue;
|
|
682
|
+
}
|
|
683
|
+
else {
|
|
684
|
+
dedupedSync.push(result);
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
// DAG drill-down: when a summary node matches, inject its children
|
|
688
|
+
const summaryIdsSync = dedupedSync
|
|
689
|
+
.filter((r) => r.entry.tags.includes('dag-summary'))
|
|
690
|
+
.map((r) => r.entry.id);
|
|
691
|
+
if (summaryIdsSync.length > 0) {
|
|
692
|
+
const childEntries = entries.filter((e) => e.dag_parent_id && summaryIdsSync.includes(e.dag_parent_id));
|
|
693
|
+
for (const child of childEntries) {
|
|
694
|
+
if (!dedupedSync.some((r) => r.entry.id === child.id)) {
|
|
695
|
+
const parentResult = dedupedSync.find((r) => r.entry.id === child.dag_parent_id);
|
|
696
|
+
const childScore = parentResult ? parentResult.score * 0.9 : 0;
|
|
697
|
+
dedupedSync.push({
|
|
698
|
+
entry: child,
|
|
699
|
+
score: childScore,
|
|
700
|
+
bm25: 0,
|
|
701
|
+
cosine: 0,
|
|
702
|
+
tokens: estimateTokens(child.content),
|
|
703
|
+
});
|
|
704
|
+
}
|
|
705
|
+
}
|
|
706
|
+
dedupedSync.sort((a, b) => b.score - a.score);
|
|
707
|
+
}
|
|
708
|
+
// Apply token budget
|
|
709
|
+
const results = [];
|
|
710
|
+
let usedTokens = 0;
|
|
711
|
+
for (let i = 0; i < dedupedSync.length; i++) {
|
|
712
|
+
const tokens = dedupedSync[i].tokens;
|
|
713
|
+
if (results.length >= minResults && usedTokens + tokens > budget)
|
|
714
|
+
continue;
|
|
715
|
+
usedTokens += tokens;
|
|
716
|
+
results.push(dedupedSync[i]);
|
|
717
|
+
}
|
|
718
|
+
return results;
|
|
719
|
+
}
|
|
720
|
+
/**
|
|
721
|
+
* Update retrieval metadata on entries that were returned by a search.
|
|
722
|
+
* Returns the mutated copies (caller must persist to disk).
|
|
723
|
+
*/
|
|
724
|
+
export function markRetrieved(entries, now = new Date()) {
|
|
725
|
+
return entries.map((e) => {
|
|
726
|
+
if (e.superseded_by)
|
|
727
|
+
return e;
|
|
728
|
+
const updated = {
|
|
729
|
+
...e,
|
|
730
|
+
retrieval_count: e.retrieval_count + 1,
|
|
731
|
+
last_retrieved: now.toISOString(),
|
|
732
|
+
// Extend half-life by +2 days per retrieval (PLAN.md)
|
|
733
|
+
half_life_days: e.half_life_days + 2,
|
|
734
|
+
// A stale memory that gets used again becomes live context.
|
|
735
|
+
confidence: e.confidence === 'stale' ? 'observed' : e.confidence,
|
|
736
|
+
};
|
|
737
|
+
updated.strength = calculateStrength(updated, now);
|
|
738
|
+
return updated;
|
|
739
|
+
});
|
|
740
|
+
}
|
|
741
|
+
/**
|
|
742
|
+
* Explain why a search result matched a query.
|
|
743
|
+
* Computes which query terms overlapped with the document and whether
|
|
744
|
+
* BM25 and/or embedding similarity contributed to the composite score.
|
|
745
|
+
*/
|
|
746
|
+
export function explainMatch(query, result) {
|
|
747
|
+
const queryTerms = new Set(tokenize(query));
|
|
748
|
+
const docTerms = new Set(tokenize(`${result.entry.content} ${result.entry.tags.join(' ')}`));
|
|
749
|
+
const matchedTerms = [];
|
|
750
|
+
for (const term of queryTerms) {
|
|
751
|
+
if (docTerms.has(term)) {
|
|
752
|
+
matchedTerms.push(term);
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
const hasBm25 = result.bm25 > 0;
|
|
756
|
+
const hasEmbedding = result.cosine > 0;
|
|
757
|
+
const parts = [];
|
|
758
|
+
if (hasBm25) {
|
|
759
|
+
parts.push(`BM25: matched terms [${matchedTerms.join(', ')}]`);
|
|
760
|
+
}
|
|
761
|
+
if (hasEmbedding) {
|
|
762
|
+
parts.push(`embedding similarity: ${result.cosine.toFixed(3)}`);
|
|
763
|
+
}
|
|
764
|
+
if (parts.length === 0) {
|
|
765
|
+
parts.push('no direct term or embedding match');
|
|
766
|
+
}
|
|
767
|
+
return {
|
|
768
|
+
reason: parts.join('; '),
|
|
769
|
+
matchedTerms,
|
|
770
|
+
hasBm25,
|
|
771
|
+
hasEmbedding,
|
|
772
|
+
cosineSimilarity: result.cosine,
|
|
773
|
+
envelope: {
|
|
774
|
+
kind: result.entry.kind ?? 'distilled',
|
|
775
|
+
scope: result.entry.scope ?? null,
|
|
776
|
+
owner: result.entry.owner ?? null,
|
|
777
|
+
artifact_ref: result.entry.artifact_ref ?? null,
|
|
778
|
+
session_id: result.entry.source_session_id ?? null,
|
|
779
|
+
confidence: result.entry.confidence ?? 'observed',
|
|
780
|
+
},
|
|
781
|
+
};
|
|
782
|
+
}
|
|
783
|
+
/**
|
|
784
|
+
* Compute text overlap ratio between two strings (Jaccard on token sets).
|
|
785
|
+
*/
|
|
786
|
+
export function textOverlap(a, b) {
|
|
787
|
+
const setA = new Set(tokenize(a));
|
|
788
|
+
const setB = new Set(tokenize(b));
|
|
789
|
+
if (setA.size === 0 && setB.size === 0)
|
|
790
|
+
return 1;
|
|
791
|
+
if (setA.size === 0 || setB.size === 0)
|
|
792
|
+
return 0;
|
|
793
|
+
let intersection = 0;
|
|
794
|
+
for (const t of setA) {
|
|
795
|
+
if (setB.has(t))
|
|
796
|
+
intersection++;
|
|
797
|
+
}
|
|
798
|
+
const union = setA.size + setB.size - intersection;
|
|
799
|
+
return intersection / union;
|
|
800
|
+
}
|
|
801
|
+
//# sourceMappingURL=search.js.map
|