hippo-memory 0.26.0 → 0.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -0
- package/dist/cli.js +377 -2
- package/dist/cli.js.map +1 -1
- package/dist/config.d.ts +8 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +5 -0
- package/dist/config.js.map +1 -1
- package/dist/eval.d.ts +68 -0
- package/dist/eval.d.ts.map +1 -0
- package/dist/eval.js +127 -0
- package/dist/eval.js.map +1 -0
- package/dist/search.d.ts +65 -0
- package/dist/search.d.ts.map +1 -1
- package/dist/search.js +155 -13
- package/dist/search.js.map +1 -1
- package/dist/shared.d.ts +3 -0
- package/dist/shared.d.ts.map +1 -1
- package/dist/shared.js +23 -7
- package/dist/shared.js.map +1 -1
- package/extensions/openclaw-plugin/openclaw.plugin.json +1 -1
- package/extensions/openclaw-plugin/package.json +1 -1
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
package/dist/config.js
CHANGED
|
@@ -28,6 +28,10 @@ const DEFAULT_CONFIG = {
|
|
|
28
28
|
'refactor', 'perf', 'chore', 'breaking', 'deprecate',
|
|
29
29
|
],
|
|
30
30
|
physics: { ...DEFAULT_PHYSICS_CONFIG },
|
|
31
|
+
mmr: {
|
|
32
|
+
enabled: true,
|
|
33
|
+
lambda: 0.7,
|
|
34
|
+
},
|
|
31
35
|
};
|
|
32
36
|
export function loadConfig(hippoRoot) {
|
|
33
37
|
const configPath = path.join(hippoRoot, 'config.json');
|
|
@@ -49,6 +53,7 @@ export function loadConfig(hippoRoot) {
|
|
|
49
53
|
global: { ...DEFAULT_CONFIG.global, ...(raw.global ?? {}) },
|
|
50
54
|
gitLearnPatterns: raw.gitLearnPatterns ?? DEFAULT_CONFIG.gitLearnPatterns,
|
|
51
55
|
physics: mergePhysicsConfig(raw.physics),
|
|
56
|
+
mmr: { ...DEFAULT_CONFIG.mmr, ...(raw.mmr ?? {}) },
|
|
52
57
|
};
|
|
53
58
|
}
|
|
54
59
|
catch (err) {
|
package/dist/config.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AACzB,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,EAAsB,sBAAsB,EAAE,kBAAkB,EAAE,MAAM,qBAAqB,CAAC;
|
|
1
|
+
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AACzB,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,EAAsB,sBAAsB,EAAE,kBAAkB,EAAE,MAAM,qBAAqB,CAAC;AAmCrG,MAAM,cAAc,GAAgB;IAClC,mBAAmB,EAAE,CAAC;IACtB,aAAa,EAAE,IAAI;IACnB,oBAAoB,EAAE,IAAI;IAC1B,UAAU,EAAE,UAAU;IACtB,gBAAgB,EAAE,IAAI;IACtB,gBAAgB,EAAE,IAAI;IACtB,SAAS,EAAE;QACT,OAAO,EAAE,IAAI;QACb,SAAS,EAAE,EAAE;KACd;IACD,UAAU,EAAE;QACV,OAAO,EAAE,MAAM;QACf,KAAK,EAAE,yBAAyB;QAChC,YAAY,EAAE,GAAG;KAClB;IACD,MAAM,EAAE;QACN,OAAO,EAAE,IAAI;KACd;IACD,gBAAgB,EAAE;QAChB,KAAK,EAAE,QAAQ,EAAE,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAE,QAAQ;QACnD,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,WAAW;KACrD;IACD,OAAO,EAAE,EAAE,GAAG,sBAAsB,EAAE;IACtC,GAAG,EAAE;QACH,OAAO,EAAE,IAAI;QACb,MAAM,EAAE,GAAG;KACZ;CACF,CAAC;AAEF,MAAM,UAAU,UAAU,CAAC,SAAiB;IAC1C,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,aAAa,CAAC,CAAC;IACvD,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC;QAAE,OAAO,EAAE,GAAG,cAAc,EAAE,CAAC;IAC7D,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,UAAU,EAAE,MAAM,CAAC,CAAyB,CAAC;QACpF,MAAM,KAAK,GAAG,GAAG,CAAC,UAAU,CAAC;QAC7B,MAAM,UAAU,GAAG,KAAK,KAAK,OAAO,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,UAAU,CAAC;QACpF,OAAO;YACL,mBAAmB,EAAE,GAAG,CAAC,mBAAmB,IAAI,cAAc,CAAC,mBAAmB;YAClF,aAAa,EAAE,GAAG,CAAC,aAAa,IAAI,cAAc,CAAC,aAAa;YAChE,oBAAoB,EAAE,GAAG,CAAC,oBAAoB,IAAI,cAAc,CAAC,oBAAoB;YACrF,UAAU,EAAE,UAAU,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,cAAc,CAAC,UAAU;YAC1D,gBAAgB,EAAE,GAAG,CAAC,gBAAgB,IAAI,cAAc,CAAC,gBAAgB;YACzE,gBAAgB,EAAE,GAAG,CAAC,gBAAgB,IAAI,cAAc,CAAC,gBAAgB;YACzE,SAAS,EAAE,EAAE,GAAG,cAAc,CAAC,SAAS,EAAE,GAAG,CAAC,GAAG,CAAC,SAAS,IAAI,EAAE,CAAC,EAAE;YACpE,UAAU,EAAE,EAAE,GAAG,cAAc,CAAC,UAAU,EAAE,GAAG,CAAC,GAAG,CAAC,UAAU,IAAI,EAAE,CAAC,EAAE;YACvE,MAAM,EAAE,EAAE,GAAG,cAAc,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,CAAC,MAAM,IAAI,EAAE,CAAC,EAAE;YAC3D,gBAAgB,EAAE,GAAG,CAAC,gBAAgB,IAAI,cAAc,CAAC,gBAAgB;YACzE,OAAO,EAAE,kBAAkB,CAAC,GAAG,CAAC,OAA6C,CAAC;YAC9E,GAAG,EAAE,EAAE,GAAG,cAAc,CAAC,GAAG,EAAE,GAAG,CAAC,GAAG,CAAC,GAAG,IAAI,EAAE,CAAC,EAAE;SACnD,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAI,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAC9B,OAAO,CAAC,KAAK,CAAC,4BAA4B,UAAU,KAAK,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;QACvG,CAAC;QACD,OAAO,EAAE,GAAG,cAAc,EAAE,CAAC;IAC/B,CAAC;AACH,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,SAAiB,EAAE,MAAmB;IAC/D,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,aAAa,CAAC,CAAC;IACvD,EAAE,CAAC,aAAa,CAAC,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;AACxE,CAAC"}
|
package/dist/eval.d.ts
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Recall eval harness.
|
|
3
|
+
*
|
|
4
|
+
* Given a corpus of (query, expected_memory_ids) cases, run recall against
|
|
5
|
+
* the store and report ranking-quality metrics: MRR, Recall@K, NDCG@K.
|
|
6
|
+
*
|
|
7
|
+
* The goal is to make recall quality measurable so MMR lambda, embedding
|
|
8
|
+
* weights, and future scoring tweaks can be tuned against evidence instead
|
|
9
|
+
* of intuition.
|
|
10
|
+
*/
|
|
11
|
+
import type { MemoryEntry } from './memory.js';
|
|
12
|
+
export interface EvalCase {
|
|
13
|
+
/** Free-form ID for humans to reference the case. */
|
|
14
|
+
id: string;
|
|
15
|
+
/** The query text to run through recall. */
|
|
16
|
+
query: string;
|
|
17
|
+
/** Memory IDs considered relevant. At least one required. */
|
|
18
|
+
expectedIds: string[];
|
|
19
|
+
/** Optional short description so a failure report is self-explaining. */
|
|
20
|
+
description?: string;
|
|
21
|
+
}
|
|
22
|
+
export interface EvalCaseResult {
|
|
23
|
+
case: EvalCase;
|
|
24
|
+
returnedIds: string[];
|
|
25
|
+
/** 1 / rank of the first expected id, else 0. */
|
|
26
|
+
mrr: number;
|
|
27
|
+
/** |expected ∩ returned[0..K]| / |expected|, 0 when expected is empty. */
|
|
28
|
+
recallAt5: number;
|
|
29
|
+
recallAt10: number;
|
|
30
|
+
/** Normalized DCG at 10 using binary relevance (expected = 1, else 0). */
|
|
31
|
+
ndcgAt10: number;
|
|
32
|
+
}
|
|
33
|
+
export interface EvalSummary {
|
|
34
|
+
cases: EvalCaseResult[];
|
|
35
|
+
/** Simple arithmetic means across cases. */
|
|
36
|
+
meanMrr: number;
|
|
37
|
+
meanRecallAt5: number;
|
|
38
|
+
meanRecallAt10: number;
|
|
39
|
+
meanNdcgAt10: number;
|
|
40
|
+
/** Wall-clock runtime in ms for the whole eval. */
|
|
41
|
+
durationMs: number;
|
|
42
|
+
}
|
|
43
|
+
export interface RunEvalOptions {
|
|
44
|
+
mmr?: boolean;
|
|
45
|
+
mmrLambda?: number;
|
|
46
|
+
embeddingWeight?: number;
|
|
47
|
+
/** Max returned results per case. Larger than K-at-10 so metrics stay honest. */
|
|
48
|
+
budget?: number;
|
|
49
|
+
hippoRoot?: string;
|
|
50
|
+
now?: Date;
|
|
51
|
+
}
|
|
52
|
+
/** Mean Reciprocal Rank for a single ranking given expected ids. */
|
|
53
|
+
export declare function mrr(returned: string[], expected: string[]): number;
|
|
54
|
+
/** Recall@K — fraction of expected items found in the top-K. */
|
|
55
|
+
export declare function recallAtK(returned: string[], expected: string[], k: number): number;
|
|
56
|
+
/**
|
|
57
|
+
* Normalized Discounted Cumulative Gain at K with binary relevance.
|
|
58
|
+
* gain_i = 1 if returned[i] ∈ expected else 0. discount = log2(i + 2).
|
|
59
|
+
*/
|
|
60
|
+
export declare function ndcgAtK(returned: string[], expected: string[], k: number): number;
|
|
61
|
+
export declare function runEval(cases: EvalCase[], entries: MemoryEntry[], options?: RunEvalOptions): Promise<EvalSummary>;
|
|
62
|
+
/**
|
|
63
|
+
* For each memory, take its first 8 content words as a trivial query and
|
|
64
|
+
* expect that memory back. Useful as a smoke test: if recall can't find a
|
|
65
|
+
* memory by its own opening words, something is broken.
|
|
66
|
+
*/
|
|
67
|
+
export declare function bootstrapCorpus(entries: MemoryEntry[], maxCases?: number): EvalCase[];
|
|
68
|
+
//# sourceMappingURL=eval.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval.d.ts","sourceRoot":"","sources":["../src/eval.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAO/C,MAAM,WAAW,QAAQ;IACvB,qDAAqD;IACrD,EAAE,EAAE,MAAM,CAAC;IACX,4CAA4C;IAC5C,KAAK,EAAE,MAAM,CAAC;IACd,6DAA6D;IAC7D,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,yEAAyE;IACzE,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,QAAQ,CAAC;IACf,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,iDAAiD;IACjD,GAAG,EAAE,MAAM,CAAC;IACZ,0EAA0E;IAC1E,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,0EAA0E;IAC1E,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,cAAc,EAAE,CAAC;IACxB,4CAA4C;IAC5C,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,EAAE,MAAM,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,mDAAmD;IACnD,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,cAAc;IAC7B,GAAG,CAAC,EAAE,OAAO,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,iFAAiF;IACjF,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,GAAG,CAAC,EAAE,IAAI,CAAC;CACZ;AAMD,oEAAoE;AACpE,wBAAgB,GAAG,CAAC,QAAQ,EAAE,MAAM,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,MAAM,CAOlE;AAED,gEAAgE;AAChE,wBAAgB,SAAS,CAAC,QAAQ,EAAE,MAAM,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,GAAG,MAAM,CASnF;AAED;;;GAGG;AACH,wBAAgB,OAAO,CAAC,QAAQ,EAAE,MAAM,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,GAAG,MAAM,CAgBjF;AAMD,wBAAsB,OAAO,CAC3B,KAAK,EAAE,QAAQ,EAAE,EACjB,OAAO,EAAE,WAAW,EAAE,EACtB,OAAO,GAAE,cAAmB,GAC3B,OAAO,CAAC,WAAW,CAAC,CAuCtB;AAMD;;;;GAIG;AACH,wBAAgB,eAAe,CAAC,OAAO,EAAE,WAAW,EAAE,EAAE,QAAQ,SAAK,GAAG,QAAQ,EAAE,CAejF"}
|
package/dist/eval.js
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Recall eval harness.
|
|
3
|
+
*
|
|
4
|
+
* Given a corpus of (query, expected_memory_ids) cases, run recall against
|
|
5
|
+
* the store and report ranking-quality metrics: MRR, Recall@K, NDCG@K.
|
|
6
|
+
*
|
|
7
|
+
* The goal is to make recall quality measurable so MMR lambda, embedding
|
|
8
|
+
* weights, and future scoring tweaks can be tuned against evidence instead
|
|
9
|
+
* of intuition.
|
|
10
|
+
*/
|
|
11
|
+
import { hybridSearch } from './search.js';
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
// Metrics — pure functions. K is inclusive of position K.
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
/** Mean Reciprocal Rank for a single ranking given expected ids. */
|
|
16
|
+
export function mrr(returned, expected) {
|
|
17
|
+
if (expected.length === 0)
|
|
18
|
+
return 0;
|
|
19
|
+
const expectedSet = new Set(expected);
|
|
20
|
+
for (let i = 0; i < returned.length; i++) {
|
|
21
|
+
if (expectedSet.has(returned[i]))
|
|
22
|
+
return 1 / (i + 1);
|
|
23
|
+
}
|
|
24
|
+
return 0;
|
|
25
|
+
}
|
|
26
|
+
/** Recall@K — fraction of expected items found in the top-K. */
|
|
27
|
+
export function recallAtK(returned, expected, k) {
|
|
28
|
+
if (expected.length === 0)
|
|
29
|
+
return 0;
|
|
30
|
+
const expectedSet = new Set(expected);
|
|
31
|
+
const topK = returned.slice(0, k);
|
|
32
|
+
let hits = 0;
|
|
33
|
+
for (const id of topK) {
|
|
34
|
+
if (expectedSet.has(id))
|
|
35
|
+
hits++;
|
|
36
|
+
}
|
|
37
|
+
return hits / expected.length;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Normalized Discounted Cumulative Gain at K with binary relevance.
|
|
41
|
+
* gain_i = 1 if returned[i] ∈ expected else 0. discount = log2(i + 2).
|
|
42
|
+
*/
|
|
43
|
+
export function ndcgAtK(returned, expected, k) {
|
|
44
|
+
if (expected.length === 0)
|
|
45
|
+
return 0;
|
|
46
|
+
const expectedSet = new Set(expected);
|
|
47
|
+
let dcg = 0;
|
|
48
|
+
for (let i = 0; i < Math.min(k, returned.length); i++) {
|
|
49
|
+
if (expectedSet.has(returned[i])) {
|
|
50
|
+
dcg += 1 / Math.log2(i + 2);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
// Ideal DCG: all relevant items at top positions.
|
|
54
|
+
const idealHits = Math.min(k, expected.length);
|
|
55
|
+
let idcg = 0;
|
|
56
|
+
for (let i = 0; i < idealHits; i++) {
|
|
57
|
+
idcg += 1 / Math.log2(i + 2);
|
|
58
|
+
}
|
|
59
|
+
return idcg === 0 ? 0 : dcg / idcg;
|
|
60
|
+
}
|
|
61
|
+
// ---------------------------------------------------------------------------
|
|
62
|
+
// Runner
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
export async function runEval(cases, entries, options = {}) {
|
|
65
|
+
const budget = options.budget ?? 100_000; // generous so metrics aren't truncated
|
|
66
|
+
const start = Date.now();
|
|
67
|
+
const results = [];
|
|
68
|
+
for (const c of cases) {
|
|
69
|
+
const ranked = await hybridSearch(c.query, entries, {
|
|
70
|
+
budget,
|
|
71
|
+
now: options.now,
|
|
72
|
+
hippoRoot: options.hippoRoot,
|
|
73
|
+
embeddingWeight: options.embeddingWeight,
|
|
74
|
+
mmr: options.mmr,
|
|
75
|
+
mmrLambda: options.mmrLambda,
|
|
76
|
+
});
|
|
77
|
+
const returnedIds = ranked.map((r) => r.entry.id);
|
|
78
|
+
results.push({
|
|
79
|
+
case: c,
|
|
80
|
+
returnedIds,
|
|
81
|
+
mrr: mrr(returnedIds, c.expectedIds),
|
|
82
|
+
recallAt5: recallAtK(returnedIds, c.expectedIds, 5),
|
|
83
|
+
recallAt10: recallAtK(returnedIds, c.expectedIds, 10),
|
|
84
|
+
ndcgAt10: ndcgAtK(returnedIds, c.expectedIds, 10),
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
const n = Math.max(1, results.length);
|
|
88
|
+
const meanMrr = results.reduce((s, r) => s + r.mrr, 0) / n;
|
|
89
|
+
const meanRecallAt5 = results.reduce((s, r) => s + r.recallAt5, 0) / n;
|
|
90
|
+
const meanRecallAt10 = results.reduce((s, r) => s + r.recallAt10, 0) / n;
|
|
91
|
+
const meanNdcgAt10 = results.reduce((s, r) => s + r.ndcgAt10, 0) / n;
|
|
92
|
+
return {
|
|
93
|
+
cases: results,
|
|
94
|
+
meanMrr,
|
|
95
|
+
meanRecallAt5,
|
|
96
|
+
meanRecallAt10,
|
|
97
|
+
meanNdcgAt10,
|
|
98
|
+
durationMs: Date.now() - start,
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
// ---------------------------------------------------------------------------
|
|
102
|
+
// Bootstrap — generate a synthetic corpus from current memories
|
|
103
|
+
// ---------------------------------------------------------------------------
|
|
104
|
+
/**
|
|
105
|
+
* For each memory, take its first 8 content words as a trivial query and
|
|
106
|
+
* expect that memory back. Useful as a smoke test: if recall can't find a
|
|
107
|
+
* memory by its own opening words, something is broken.
|
|
108
|
+
*/
|
|
109
|
+
export function bootstrapCorpus(entries, maxCases = 50) {
|
|
110
|
+
const cases = [];
|
|
111
|
+
for (const e of entries) {
|
|
112
|
+
if (cases.length >= maxCases)
|
|
113
|
+
break;
|
|
114
|
+
const words = e.content.trim().split(/\s+/).filter((w) => w.length > 2);
|
|
115
|
+
if (words.length < 3)
|
|
116
|
+
continue;
|
|
117
|
+
const query = words.slice(0, 8).join(' ');
|
|
118
|
+
cases.push({
|
|
119
|
+
id: `bootstrap_${e.id}`,
|
|
120
|
+
query,
|
|
121
|
+
expectedIds: [e.id],
|
|
122
|
+
description: `trivial self-query on memory ${e.id}`,
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
return cases;
|
|
126
|
+
}
|
|
127
|
+
//# sourceMappingURL=eval.js.map
|
package/dist/eval.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval.js","sourceRoot":"","sources":["../src/eval.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAGH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAkD3C,8EAA8E;AAC9E,0DAA0D;AAC1D,8EAA8E;AAE9E,oEAAoE;AACpE,MAAM,UAAU,GAAG,CAAC,QAAkB,EAAE,QAAkB;IACxD,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACpC,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;IACtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACzC,IAAI,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;YAAE,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACvD,CAAC;IACD,OAAO,CAAC,CAAC;AACX,CAAC;AAED,gEAAgE;AAChE,MAAM,UAAU,SAAS,CAAC,QAAkB,EAAE,QAAkB,EAAE,CAAS;IACzE,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACpC,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;IACtC,MAAM,IAAI,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAClC,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,MAAM,EAAE,IAAI,IAAI,EAAE,CAAC;QACtB,IAAI,WAAW,CAAC,GAAG,CAAC,EAAE,CAAC;YAAE,IAAI,EAAE,CAAC;IAClC,CAAC;IACD,OAAO,IAAI,GAAG,QAAQ,CAAC,MAAM,CAAC;AAChC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,OAAO,CAAC,QAAkB,EAAE,QAAkB,EAAE,CAAS;IACvE,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACpC,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;IACtC,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QACtD,IAAI,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YACjC,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAC9B,CAAC;IACH,CAAC;IACD,kDAAkD;IAClD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;IAC/C,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;QACnC,IAAI,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC/B,CAAC;IACD,OAAO,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,IAAI,CAAC;AACrC,CAAC;AAED,8EAA8E;AAC9E,SAAS;AACT,8EAA8E;AAE9E,MAAM,CAAC,KAAK,UAAU,OAAO,CAC3B,KAAiB,EACjB,OAAsB,EACtB,UAA0B,EAAE;IAE5B,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,IAAI,OAAO,CAAC,CAAG,uCAAuC;IACnF,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACzB,MAAM,OAAO,GAAqB,EAAE,CAAC;IAErC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,CAAC,CAAC,KAAK,EAAE,OAAO,EAAE;YAClD,MAAM;YACN,GAAG,EAAE,OAAO,CAAC,GAAG;YAChB,SAAS,EAAE,OAAO,CAAC,SAAS;YAC5B,eAAe,EAAE,OAAO,CAAC,eAAe;YACxC,GAAG,EAAE,OAAO,CAAC,GAAG;YAChB,SAAS,EAAE,OAAO,CAAC,SAAS;SAC7B,CAAC,CAAC;QACH,MAAM,WAAW,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAClD,OAAO,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,CAAC;YACP,WAAW;YACX,GAAG,EAAE,GAAG,CAAC,WAAW,EAAE,CAAC,CAAC,WAAW,CAAC;YACpC,SAAS,EAAE,SAAS,CAAC,WAAW,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;YACnD,UAAU,EAAE,SAAS,CAAC,WAAW,EAAE,CAAC,CAAC,WAAW,EAAE,EAAE,CAAC;YACrD,QAAQ,EAAE,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,WAAW,EAAE,EAAE,CAAC;SAClD,CAAC,CAAC;IACL,CAAC;IAED,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;IACtC,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC;IAC3D,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC;IACvE,MAAM,cAAc,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC;IACzE,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC;IAErE,OAAO;QACL,KAAK,EAAE,OAAO;QACd,OAAO;QACP,aAAa;QACb,cAAc;QACd,YAAY;QACZ,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;KAC/B,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,gEAAgE;AAChE,8EAA8E;AAE9E;;;;GAIG;AACH,MAAM,UAAU,eAAe,CAAC,OAAsB,EAAE,QAAQ,GAAG,EAAE;IACnE,MAAM,KAAK,GAAe,EAAE,CAAC;IAC7B,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,IAAI,KAAK,CAAC,MAAM,IAAI,QAAQ;YAAE,MAAM;QACpC,MAAM,KAAK,GAAG,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACxE,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAC/B,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC1C,KAAK,CAAC,IAAI,CAAC;YACT,EAAE,EAAE,aAAa,CAAC,CAAC,EAAE,EAAE;YACvB,KAAK;YACL,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACnB,WAAW,EAAE,gCAAgC,CAAC,CAAC,EAAE,EAAE;SACpD,CAAC,CAAC;IACL,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC"}
|
package/dist/search.d.ts
CHANGED
|
@@ -15,6 +15,54 @@ export interface SearchResult {
|
|
|
15
15
|
bm25: number;
|
|
16
16
|
cosine: number;
|
|
17
17
|
tokens: number;
|
|
18
|
+
/** Populated when search is called with options.explain === true. */
|
|
19
|
+
breakdown?: ScoreBreakdown;
|
|
20
|
+
}
|
|
21
|
+
export interface ScoreBreakdown {
|
|
22
|
+
/**
|
|
23
|
+
* - `hybrid`: BM25 blended with a non-zero cosine from a cached doc vector.
|
|
24
|
+
* - `hybrid-no-vec`: Query was embedded but this doc had no cached vector,
|
|
25
|
+
* so the effective score came from BM25 alone even though weights say
|
|
26
|
+
* otherwise. Usually means `hippo embed` hasn't run on this memory.
|
|
27
|
+
* - `bm25-only`: Embedding pipeline unavailable or the model requires re-index.
|
|
28
|
+
* - `physics`: Scored by the physics engine (gravity + momentum + cluster).
|
|
29
|
+
*/
|
|
30
|
+
mode: 'hybrid' | 'hybrid-no-vec' | 'bm25-only' | 'physics';
|
|
31
|
+
/** BM25 score after normalization by max-in-corpus (0..1). */
|
|
32
|
+
normBm25: number;
|
|
33
|
+
/** Weight applied to BM25 in the hybrid blend. */
|
|
34
|
+
bm25Weight: number;
|
|
35
|
+
/** Weight applied to cosine in the hybrid blend. */
|
|
36
|
+
embeddingWeight: number;
|
|
37
|
+
/** Cosine similarity (0 when embeddings not used). */
|
|
38
|
+
cosine: number;
|
|
39
|
+
/** Blended base score before multipliers. */
|
|
40
|
+
base: number;
|
|
41
|
+
/** Multiplier from memory strength: 0.5 + 0.5*strength. */
|
|
42
|
+
strengthMultiplier: number;
|
|
43
|
+
/** Multiplier from age: 0.8 + 0.2*recencyBoost. */
|
|
44
|
+
recencyMultiplier: number;
|
|
45
|
+
/** 1.2 if tagged 'decision', else 1.0. */
|
|
46
|
+
decisionBoost: number;
|
|
47
|
+
/** 1.0..1.3 based on cwd path tag overlap. */
|
|
48
|
+
pathBoost: number;
|
|
49
|
+
/** Extra multiplier applied post-hybrid (e.g. 1.2x for local hits in a
|
|
50
|
+
* local+global merged search). 1.0 when not applicable. */
|
|
51
|
+
sourceBump: number;
|
|
52
|
+
/** Retrieval-time outcome personalization: 1 + 0.15*tanh(pos - neg), clipped
|
|
53
|
+
* to [0.85, 1.15]. Immediate nudge from `hippo outcome --good/--bad`.
|
|
54
|
+
* Separate from the slow strength-via-reward-factor path. */
|
|
55
|
+
outcomeBoost: number;
|
|
56
|
+
/** Pre-MMR rank (1-indexed). Only set when MMR re-ranking ran. */
|
|
57
|
+
preMmrRank?: number;
|
|
58
|
+
/** Post-MMR rank (1-indexed). Only set when MMR re-ranking ran. */
|
|
59
|
+
postMmrRank?: number;
|
|
60
|
+
/** Query terms that appeared verbatim in the doc. */
|
|
61
|
+
matchedTerms: string[];
|
|
62
|
+
/** Final composite score (= base * multipliers). */
|
|
63
|
+
final: number;
|
|
64
|
+
/** Age of the memory in whole days, at scoring time. */
|
|
65
|
+
ageDays: number;
|
|
18
66
|
}
|
|
19
67
|
/**
|
|
20
68
|
* Hybrid search: BM25 + cosine similarity (when embeddings are available).
|
|
@@ -28,7 +76,23 @@ export declare function hybridSearch(query: string, entries: MemoryEntry[], opti
|
|
|
28
76
|
now?: Date;
|
|
29
77
|
hippoRoot?: string;
|
|
30
78
|
embeddingWeight?: number;
|
|
79
|
+
explain?: boolean;
|
|
80
|
+
/** Disable MMR re-ranking even when embeddings are available. */
|
|
81
|
+
mmr?: boolean;
|
|
82
|
+
/** MMR balance: 1.0 = pure relevance, 0.0 = pure diversity. Default 0.7. */
|
|
83
|
+
mmrLambda?: number;
|
|
31
84
|
}): Promise<SearchResult[]>;
|
|
85
|
+
/**
|
|
86
|
+
* MMR (Maximal Marginal Relevance) re-ranking.
|
|
87
|
+
*
|
|
88
|
+
* Iteratively picks the candidate that maximises
|
|
89
|
+
* lambda * relevance - (1 - lambda) * max(cos(cand, picked))
|
|
90
|
+
*
|
|
91
|
+
* Inputs must already be sorted by relevance descending. When `explain` is
|
|
92
|
+
* true, attaches `preMmrRank` / `postMmrRank` to each result's breakdown.
|
|
93
|
+
* Exported for unit tests; production callers go through hybridSearch.
|
|
94
|
+
*/
|
|
95
|
+
export declare function mmrRerank(scored: SearchResult[], embeddingIndex: Record<string, number[]>, lambda: number, explain: boolean): SearchResult[];
|
|
32
96
|
/**
|
|
33
97
|
* Physics-based search: scores memories using gravitational force, momentum,
|
|
34
98
|
* and cluster amplification. Falls back to classic hybrid for memories
|
|
@@ -40,6 +104,7 @@ export declare function physicsSearch(query: string, entries: MemoryEntry[], opt
|
|
|
40
104
|
hippoRoot?: string;
|
|
41
105
|
physicsConfig?: PhysicsConfig;
|
|
42
106
|
queryEmbedding?: number[];
|
|
107
|
+
explain?: boolean;
|
|
43
108
|
}): Promise<SearchResult[]>;
|
|
44
109
|
/**
|
|
45
110
|
* Search entries using BM25 + strength + recency composite score.
|
package/dist/search.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"search.d.ts","sourceRoot":"","sources":["../src/search.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,WAAW,EAAqB,MAAM,aAAa,CAAC;AAY7D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AASzD,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAM/C;AAgED;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEnD;AAiBD,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,WAAW,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"search.d.ts","sourceRoot":"","sources":["../src/search.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,WAAW,EAAqB,MAAM,aAAa,CAAC;AAY7D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AASzD,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAM/C;AAgED;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEnD;AAiBD,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,WAAW,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,qEAAqE;IACrE,SAAS,CAAC,EAAE,cAAc,CAAC;CAC5B;AAED,MAAM,WAAW,cAAc;IAC7B;;;;;;;OAOG;IACH,IAAI,EAAE,QAAQ,GAAG,eAAe,GAAG,WAAW,GAAG,SAAS,CAAC;IAC3D,8DAA8D;IAC9D,QAAQ,EAAE,MAAM,CAAC;IACjB,kDAAkD;IAClD,UAAU,EAAE,MAAM,CAAC;IACnB,oDAAoD;IACpD,eAAe,EAAE,MAAM,CAAC;IACxB,sDAAsD;IACtD,MAAM,EAAE,MAAM,CAAC;IACf,6CAA6C;IAC7C,IAAI,EAAE,MAAM,CAAC;IACb,2DAA2D;IAC3D,kBAAkB,EAAE,MAAM,CAAC;IAC3B,mDAAmD;IACnD,iBAAiB,EAAE,MAAM,CAAC;IAC1B,0CAA0C;IAC1C,aAAa,EAAE,MAAM,CAAC;IACtB,8CAA8C;IAC9C,SAAS,EAAE,MAAM,CAAC;IAClB;gEAC4D;IAC5D,UAAU,EAAE,MAAM,CAAC;IACnB;;kEAE8D;IAC9D,YAAY,EAAE,MAAM,CAAC;IACrB,kEAAkE;IAClE,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,mEAAmE;IACnE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,qDAAqD;IACrD,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,oDAAoD;IACpD,KAAK,EAAE,MAAM,CAAC;IACd,wDAAwD;IACxD,OAAO,EAAE,MAAM,CAAC;CACjB;AAED;;;;;;GAMG;AACH,wBAAsB,YAAY,CAChC,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,WAAW,EAAE,EACtB,OAAO,GAAE;IACP,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,GAAG,CAAC,EAAE,IAAI,CAAC;IACX,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,iEAAiE;IACjE,GAAG,CAAC,EAAE,OAAO,CAAC;IACd,4EAA4E;IAC5E,SAAS,CAAC,EAAE,MAAM,CAAC;CACf,GACL,OAAO,CAAC,YAAY,EAAE,CAAC,CAwKzB;AAED;;;;;;;;;GASG;AACH,wBAAgB,SAAS,CACvB,MAAM,EAAE,YAAY,EAAE,EACtB,cAAc,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,EACxC,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,OAAO,GACf,YAAY,EAAE,CAgDhB;AAED;;;;GAIG;AACH,wBAAsB,aAAa,CACjC,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,WAAW,EAAE,EACtB,OAAO,GAAE;IACP,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,GAAG,CAAC,EAAE,IAAI,CAAC;IACX,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,OAAO,CAAC,EAAE,OAAO,CAAC;CACd,GACL,OAAO,CAAC,YAAY,EAAE,CAAC,CA0HzB;AAiBD;;;;;;;GAOG;AACH,wBAAgB,MAAM,CACpB,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,WAAW,EAAE,EACtB,OAAO,GAAE;IAAE,MAAM,CAAC,EAAE,MAAM,CAAC;IAAC,GAAG,CAAC,EAAE,IAAI,CAAC;IAAC,SAAS,CAAC,EAAE,MAAM,CAAA;CAAO,GAChE,YAAY,EAAE,CA2DhB;AAED;;;GAGG;AACH,wBAAgB,aAAa,CAAC,OAAO,EAAE,WAAW,EAAE,EAAE,GAAG,GAAE,IAAiB,GAAG,WAAW,EAAE,CAc3F;AAMD,MAAM,WAAW,gBAAgB;IAC/B,mCAAmC;IACnC,MAAM,EAAE,MAAM,CAAC;IACf,iEAAiE;IACjE,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,4CAA4C;IAC5C,OAAO,EAAE,OAAO,CAAC;IACjB,4DAA4D;IAC5D,YAAY,EAAE,OAAO,CAAC;IACtB,yDAAyD;IACzD,gBAAgB,EAAE,MAAM,CAAC;CAC1B;AAED;;;;GAIG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,GAAG,gBAAgB,CAgClF;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,GAAG,MAAM,CAaxD"}
|
package/dist/search.js
CHANGED
|
@@ -89,6 +89,9 @@ export async function hybridSearch(query, entries, options = {}) {
|
|
|
89
89
|
const budget = options.budget ?? 4000;
|
|
90
90
|
const embeddingWeight = options.embeddingWeight ?? 0.6;
|
|
91
91
|
const bm25Weight = 1 - embeddingWeight;
|
|
92
|
+
const explain = options.explain ?? false;
|
|
93
|
+
const mmrEnabled = options.mmr ?? true;
|
|
94
|
+
const mmrLambda = options.mmrLambda ?? 0.7;
|
|
92
95
|
if (entries.length === 0)
|
|
93
96
|
return [];
|
|
94
97
|
const queryTerms = tokenize(query);
|
|
@@ -122,6 +125,7 @@ export async function hybridSearch(query, entries, options = {}) {
|
|
|
122
125
|
// Score each entry
|
|
123
126
|
const scored = [];
|
|
124
127
|
const currentPathTags = extractPathTags(process.cwd());
|
|
128
|
+
const queryTermSet = new Set(queryTerms);
|
|
125
129
|
for (let i = 0; i < entries.length; i++) {
|
|
126
130
|
const rawBm25 = bm25Scores[i];
|
|
127
131
|
if (!useEmbeddings && rawBm25 <= 0)
|
|
@@ -129,21 +133,31 @@ export async function hybridSearch(query, entries, options = {}) {
|
|
|
129
133
|
const normBm25 = rawBm25 / maxBm25;
|
|
130
134
|
const strength = calculateStrength(entries[i], now);
|
|
131
135
|
const recency = recencyBoost(entries[i], now);
|
|
136
|
+
const strengthMultiplier = 0.5 + 0.5 * strength;
|
|
137
|
+
const recencyMultiplier = 0.8 + 0.2 * recency;
|
|
132
138
|
let compositeScore;
|
|
133
139
|
let cosineScore = 0;
|
|
140
|
+
let base;
|
|
141
|
+
let modeLabel;
|
|
142
|
+
let hadCachedVec = false;
|
|
134
143
|
if (useEmbeddings) {
|
|
135
144
|
const cached = embeddingIndex[entries[i].id];
|
|
136
|
-
|
|
145
|
+
hadCachedVec = Boolean(cached && queryVector.length > 0);
|
|
146
|
+
cosineScore = hadCachedVec
|
|
137
147
|
? Math.max(0, cosineSimilarity(queryVector, cached))
|
|
138
148
|
: 0;
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
149
|
+
base = bm25Weight * normBm25 + embeddingWeight * cosineScore;
|
|
150
|
+
compositeScore = base * strengthMultiplier * recencyMultiplier;
|
|
151
|
+
// Distinguish "real hybrid" from "query embedded but doc has no cached
|
|
152
|
+
// vector" so explain can tell users their index is stale without lying
|
|
153
|
+
// that the search actually used embeddings.
|
|
154
|
+
modeLabel = hadCachedVec ? 'hybrid' : 'hybrid-no-vec';
|
|
142
155
|
}
|
|
143
156
|
else {
|
|
144
157
|
// Pure BM25 path: identical to original behavior
|
|
145
|
-
|
|
146
|
-
compositeScore =
|
|
158
|
+
base = queryTerms.length > 0 ? rawBm25 / queryTerms.length : rawBm25;
|
|
159
|
+
compositeScore = base * strengthMultiplier * recencyMultiplier;
|
|
160
|
+
modeLabel = 'bm25-only';
|
|
147
161
|
}
|
|
148
162
|
// Decision-tagged memories get a 1.2x recall boost
|
|
149
163
|
const decisionBoost = entries[i].tags.includes('decision') ? 1.2 : 1.0;
|
|
@@ -153,25 +167,131 @@ export async function hybridSearch(query, entries, options = {}) {
|
|
|
153
167
|
const pathScore = pathOverlapScore(memPathTags, currentPathTags);
|
|
154
168
|
const pathBoost = 1.0 + (pathScore * 0.3);
|
|
155
169
|
compositeScore *= pathBoost;
|
|
170
|
+
// Retrieval-time outcome personalization: nudge up/down from user feedback.
|
|
171
|
+
// Distinct from reward-factor-via-strength (slow); this is immediate.
|
|
172
|
+
const pos = entries[i].outcome_positive ?? 0;
|
|
173
|
+
const neg = entries[i].outcome_negative ?? 0;
|
|
174
|
+
const outcomeBoost = pos === 0 && neg === 0
|
|
175
|
+
? 1.0
|
|
176
|
+
: Math.max(0.85, Math.min(1.15, 1 + 0.15 * Math.tanh((pos - neg) / 2)));
|
|
177
|
+
compositeScore *= outcomeBoost;
|
|
156
178
|
if (compositeScore <= 0)
|
|
157
179
|
continue;
|
|
158
180
|
const tokens = estimateTokens(entries[i].content);
|
|
159
|
-
|
|
181
|
+
const result = {
|
|
182
|
+
entry: entries[i],
|
|
183
|
+
score: compositeScore,
|
|
184
|
+
bm25: rawBm25,
|
|
185
|
+
cosine: cosineScore,
|
|
186
|
+
tokens,
|
|
187
|
+
};
|
|
188
|
+
if (explain) {
|
|
189
|
+
const docTerms = new Set(tokenize(`${entries[i].content} ${entries[i].tags.join(' ')}`));
|
|
190
|
+
const matchedTerms = [];
|
|
191
|
+
for (const t of queryTermSet)
|
|
192
|
+
if (docTerms.has(t))
|
|
193
|
+
matchedTerms.push(t);
|
|
194
|
+
const ageDays = Math.max(0, Math.floor((now.getTime() - new Date(entries[i].created).getTime()) / 86_400_000));
|
|
195
|
+
result.breakdown = {
|
|
196
|
+
mode: modeLabel,
|
|
197
|
+
normBm25,
|
|
198
|
+
bm25Weight: useEmbeddings ? bm25Weight : 1,
|
|
199
|
+
embeddingWeight: useEmbeddings ? embeddingWeight : 0,
|
|
200
|
+
cosine: cosineScore,
|
|
201
|
+
base,
|
|
202
|
+
strengthMultiplier,
|
|
203
|
+
recencyMultiplier,
|
|
204
|
+
decisionBoost,
|
|
205
|
+
pathBoost,
|
|
206
|
+
sourceBump: 1,
|
|
207
|
+
outcomeBoost,
|
|
208
|
+
matchedTerms,
|
|
209
|
+
final: compositeScore,
|
|
210
|
+
ageDays,
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
scored.push(result);
|
|
160
214
|
}
|
|
161
215
|
// Sort by composite score descending
|
|
162
216
|
scored.sort((a, b) => b.score - a.score);
|
|
217
|
+
// MMR re-ranking: de-cluster near-duplicates by trading relevance for
|
|
218
|
+
// diversity. Only applies when embeddings are loaded (doc-to-doc similarity
|
|
219
|
+
// is via cosine of cached vectors); otherwise we return the pure-relevance
|
|
220
|
+
// ordering unchanged.
|
|
221
|
+
const applyMmr = mmrEnabled && useEmbeddings && scored.length > 1 && mmrLambda < 1;
|
|
222
|
+
const ordered = applyMmr
|
|
223
|
+
? mmrRerank(scored, embeddingIndex, mmrLambda, explain)
|
|
224
|
+
: scored;
|
|
163
225
|
// Apply token budget
|
|
164
226
|
const results = [];
|
|
165
227
|
let usedTokens = 0;
|
|
166
|
-
for (let i = 0; i <
|
|
167
|
-
const tokens =
|
|
228
|
+
for (let i = 0; i < ordered.length; i++) {
|
|
229
|
+
const tokens = ordered[i].tokens;
|
|
168
230
|
if (i > 0 && usedTokens + tokens > budget)
|
|
169
231
|
continue; // always include first result
|
|
170
232
|
usedTokens += tokens;
|
|
171
|
-
results.push(
|
|
233
|
+
results.push(ordered[i]);
|
|
172
234
|
}
|
|
173
235
|
return results;
|
|
174
236
|
}
|
|
237
|
+
/**
|
|
238
|
+
* MMR (Maximal Marginal Relevance) re-ranking.
|
|
239
|
+
*
|
|
240
|
+
* Iteratively picks the candidate that maximises
|
|
241
|
+
* lambda * relevance - (1 - lambda) * max(cos(cand, picked))
|
|
242
|
+
*
|
|
243
|
+
* Inputs must already be sorted by relevance descending. When `explain` is
|
|
244
|
+
* true, attaches `preMmrRank` / `postMmrRank` to each result's breakdown.
|
|
245
|
+
* Exported for unit tests; production callers go through hybridSearch.
|
|
246
|
+
*/
|
|
247
|
+
export function mmrRerank(scored, embeddingIndex, lambda, explain) {
|
|
248
|
+
if (scored.length === 0)
|
|
249
|
+
return scored;
|
|
250
|
+
const maxScore = scored[0].score || 1;
|
|
251
|
+
const normScore = scored.map((r) => r.score / maxScore);
|
|
252
|
+
const vectors = scored.map((r) => embeddingIndex[r.entry.id] ?? null);
|
|
253
|
+
const picked = [];
|
|
254
|
+
const remaining = new Set(scored.map((_, i) => i));
|
|
255
|
+
while (remaining.size > 0) {
|
|
256
|
+
let bestIdx = -1;
|
|
257
|
+
let bestMmr = -Infinity;
|
|
258
|
+
for (const i of remaining) {
|
|
259
|
+
const rel = normScore[i];
|
|
260
|
+
let maxSim = 0;
|
|
261
|
+
const vi = vectors[i];
|
|
262
|
+
if (vi) {
|
|
263
|
+
for (const p of picked) {
|
|
264
|
+
const vp = embeddingIndex[p.entry.id];
|
|
265
|
+
if (!vp || vp.length !== vi.length)
|
|
266
|
+
continue;
|
|
267
|
+
const sim = Math.max(0, cosineSimilarity(vi, vp));
|
|
268
|
+
if (sim > maxSim)
|
|
269
|
+
maxSim = sim;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
const mmr = lambda * rel - (1 - lambda) * maxSim;
|
|
273
|
+
if (mmr > bestMmr) {
|
|
274
|
+
bestMmr = mmr;
|
|
275
|
+
bestIdx = i;
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
if (bestIdx === -1)
|
|
279
|
+
break;
|
|
280
|
+
remaining.delete(bestIdx);
|
|
281
|
+
picked.push(scored[bestIdx]);
|
|
282
|
+
}
|
|
283
|
+
if (explain) {
|
|
284
|
+
const preRank = new Map();
|
|
285
|
+
scored.forEach((r, i) => preRank.set(r.entry.id, i + 1));
|
|
286
|
+
picked.forEach((r, i) => {
|
|
287
|
+
if (r.breakdown) {
|
|
288
|
+
r.breakdown.preMmrRank = preRank.get(r.entry.id);
|
|
289
|
+
r.breakdown.postMmrRank = i + 1;
|
|
290
|
+
}
|
|
291
|
+
});
|
|
292
|
+
}
|
|
293
|
+
return picked;
|
|
294
|
+
}
|
|
175
295
|
/**
|
|
176
296
|
* Physics-based search: scores memories using gravitational force, momentum,
|
|
177
297
|
* and cluster amplification. Falls back to classic hybrid for memories
|
|
@@ -181,6 +301,7 @@ export async function physicsSearch(query, entries, options = {}) {
|
|
|
181
301
|
const now = options.now ?? new Date();
|
|
182
302
|
const budget = options.budget ?? 4000;
|
|
183
303
|
const config = options.physicsConfig ?? DEFAULT_PHYSICS_CONFIG;
|
|
304
|
+
const explain = options.explain ?? false;
|
|
184
305
|
if (entries.length === 0 || !options.hippoRoot)
|
|
185
306
|
return [];
|
|
186
307
|
// Get query embedding (use pre-computed if provided)
|
|
@@ -240,18 +361,39 @@ export async function physicsSearch(query, entries, options = {}) {
|
|
|
240
361
|
const entry = entryMap.get(s.memoryId);
|
|
241
362
|
if (!entry)
|
|
242
363
|
continue;
|
|
243
|
-
|
|
364
|
+
const result = {
|
|
244
365
|
entry,
|
|
245
366
|
score: s.finalScore,
|
|
246
367
|
bm25: 0,
|
|
247
368
|
cosine: s.baseScore,
|
|
248
369
|
tokens: estimateTokens(entry.content),
|
|
249
|
-
}
|
|
370
|
+
};
|
|
371
|
+
if (explain) {
|
|
372
|
+
const ageDays = Math.max(0, Math.floor((now.getTime() - new Date(entry.created).getTime()) / 86_400_000));
|
|
373
|
+
result.breakdown = {
|
|
374
|
+
mode: 'physics',
|
|
375
|
+
normBm25: 0,
|
|
376
|
+
bm25Weight: 0,
|
|
377
|
+
embeddingWeight: 1,
|
|
378
|
+
cosine: s.baseScore,
|
|
379
|
+
base: s.baseScore,
|
|
380
|
+
strengthMultiplier: 1,
|
|
381
|
+
recencyMultiplier: 1,
|
|
382
|
+
decisionBoost: 1,
|
|
383
|
+
pathBoost: 1,
|
|
384
|
+
sourceBump: 1,
|
|
385
|
+
outcomeBoost: 1,
|
|
386
|
+
matchedTerms: [],
|
|
387
|
+
final: s.finalScore,
|
|
388
|
+
ageDays,
|
|
389
|
+
};
|
|
390
|
+
}
|
|
391
|
+
physicsResults.push(result);
|
|
250
392
|
}
|
|
251
393
|
}
|
|
252
394
|
// Score classic memories (no physics state)
|
|
253
395
|
const classicResults = classicEntries.length > 0
|
|
254
|
-
? await hybridSearch(query, classicEntries, { ...options, budget: Infinity })
|
|
396
|
+
? await hybridSearch(query, classicEntries, { ...options, budget: Infinity, explain })
|
|
255
397
|
: [];
|
|
256
398
|
// Normalize both pools to [0, 1] and merge
|
|
257
399
|
const merged = mergeScorePools(physicsResults, classicResults);
|