@seanhogg/builderforce-memory 2026.6.27 → 2026.6.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +6 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +6 -0
- package/dist/index.js.map +1 -1
- package/dist/memory/MemoryStore.d.ts +12 -0
- package/dist/memory/MemoryStore.d.ts.map +1 -1
- package/dist/memory/MemoryStore.js +28 -0
- package/dist/memory/MemoryStore.js.map +1 -1
- package/dist/retrieval/HybridRetriever.d.ts +56 -0
- package/dist/retrieval/HybridRetriever.d.ts.map +1 -0
- package/dist/retrieval/HybridRetriever.js +75 -0
- package/dist/retrieval/HybridRetriever.js.map +1 -0
- package/dist/retrieval/bm25.d.ts +32 -0
- package/dist/retrieval/bm25.d.ts.map +1 -0
- package/dist/retrieval/bm25.js +66 -0
- package/dist/retrieval/bm25.js.map +1 -0
- package/dist/retrieval/chunk.d.ts +33 -0
- package/dist/retrieval/chunk.d.ts.map +1 -0
- package/dist/retrieval/chunk.js +83 -0
- package/dist/retrieval/chunk.js.map +1 -0
- package/dist/retrieval/fusion.d.ts +40 -0
- package/dist/retrieval/fusion.d.ts.map +1 -0
- package/dist/retrieval/fusion.js +64 -0
- package/dist/retrieval/fusion.js.map +1 -0
- package/dist/retrieval/index.d.ts +16 -0
- package/dist/retrieval/index.d.ts.map +1 -0
- package/dist/retrieval/index.js +12 -0
- package/dist/retrieval/index.js.map +1 -0
- package/package.json +8 -4
- package/src/index.ts +48 -0
- package/src/memory/MemoryStore.ts +36 -0
- package/src/retrieval/HybridRetriever.ts +122 -0
- package/src/retrieval/bm25.ts +83 -0
- package/src/retrieval/chunk.ts +101 -0
- package/src/retrieval/fusion.ts +84 -0
- package/src/retrieval/index.ts +24 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Rank fusion + diversity reranking.
|
|
3
|
+
*
|
|
4
|
+
* • Reciprocal Rank Fusion (RRF) merges the dense (vector) and sparse (BM25)
|
|
5
|
+
* rankings into one list without needing the two score scales to be
|
|
6
|
+
* commensurable — it fuses on RANK, not raw score. This is the standard,
|
|
7
|
+
* parameter-light way to combine hybrid retrieval signals.
|
|
8
|
+
* • Maximal Marginal Relevance (MMR) reranks the fused list to trade off
|
|
9
|
+
* relevance against novelty, so the top-k isn't five near-duplicate chunks.
|
|
10
|
+
*
|
|
11
|
+
* Pure and zero-dependency.
|
|
12
|
+
*/
|
|
13
|
+
export interface RankedList {
|
|
14
|
+
/** Ordered ids, most relevant first. */
|
|
15
|
+
ids: string[];
|
|
16
|
+
/** Optional weight for this list in the fusion (default 1). */
|
|
17
|
+
weight?: number;
|
|
18
|
+
}
|
|
19
|
+
export interface FusedHit {
|
|
20
|
+
id: string;
|
|
21
|
+
score: number;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Reciprocal Rank Fusion over any number of ranked lists.
|
|
25
|
+
* score(d) = Σ_lists weight / (k + rank(d)). `k` (default 60) damps the
|
|
26
|
+
* contribution of low-ranked items; the canonical TREC value.
|
|
27
|
+
*/
|
|
28
|
+
export declare function reciprocalRankFusion(lists: RankedList[], k?: number): FusedHit[];
|
|
29
|
+
export interface MmrCandidate {
|
|
30
|
+
id: string;
|
|
31
|
+
vector: Float32Array;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Maximal Marginal Relevance rerank. Greedily selects up to `topK` candidates,
|
|
35
|
+
* each step maximising `λ·sim(query, d) − (1−λ)·max sim(d, already-selected)`.
|
|
36
|
+
* λ=1 is pure relevance; lower λ injects diversity. Candidates without vectors
|
|
37
|
+
* should be filtered out by the caller (they cannot be MMR-scored).
|
|
38
|
+
*/
|
|
39
|
+
export declare function maximalMarginalRelevance(queryVec: Float32Array, candidates: MmrCandidate[], topK: number, lambda?: number): string[];
|
|
40
|
+
//# sourceMappingURL=fusion.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fusion.d.ts","sourceRoot":"","sources":["../../src/retrieval/fusion.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAIH,MAAM,WAAW,UAAU;IACvB,wCAAwC;IACxC,GAAG,EAAE,MAAM,EAAE,CAAC;IACd,+DAA+D;IAC/D,MAAM,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,QAAQ;IACrB,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;CACjB;AAED;;;;GAIG;AACH,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE,CAAC,SAAK,GAAG,QAAQ,EAAE,CAW5E;AAED,MAAM,WAAW,YAAY;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,YAAY,CAAC;CACxB;AAED;;;;;GAKG;AACH,wBAAgB,wBAAwB,CACpC,QAAQ,EAAE,YAAY,EACtB,UAAU,EAAE,YAAY,EAAE,EAC1B,IAAI,EAAE,MAAM,EACZ,MAAM,SAAM,GACb,MAAM,EAAE,CAsBV"}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Rank fusion + diversity reranking.
|
|
3
|
+
*
|
|
4
|
+
* • Reciprocal Rank Fusion (RRF) merges the dense (vector) and sparse (BM25)
|
|
5
|
+
* rankings into one list without needing the two score scales to be
|
|
6
|
+
* commensurable — it fuses on RANK, not raw score. This is the standard,
|
|
7
|
+
* parameter-light way to combine hybrid retrieval signals.
|
|
8
|
+
* • Maximal Marginal Relevance (MMR) reranks the fused list to trade off
|
|
9
|
+
* relevance against novelty, so the top-k isn't five near-duplicate chunks.
|
|
10
|
+
*
|
|
11
|
+
* Pure and zero-dependency.
|
|
12
|
+
*/
|
|
13
|
+
import { cosineSimilarity } from '../similarity/index.js';
|
|
14
|
+
/**
|
|
15
|
+
* Reciprocal Rank Fusion over any number of ranked lists.
|
|
16
|
+
* score(d) = Σ_lists weight / (k + rank(d)). `k` (default 60) damps the
|
|
17
|
+
* contribution of low-ranked items; the canonical TREC value.
|
|
18
|
+
*/
|
|
19
|
+
export function reciprocalRankFusion(lists, k = 60) {
|
|
20
|
+
const acc = new Map();
|
|
21
|
+
for (const list of lists) {
|
|
22
|
+
const weight = list.weight ?? 1;
|
|
23
|
+
list.ids.forEach((id, rank) => {
|
|
24
|
+
acc.set(id, (acc.get(id) ?? 0) + weight / (k + rank + 1));
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
return [...acc.entries()]
|
|
28
|
+
.map(([id, score]) => ({ id, score }))
|
|
29
|
+
.sort((a, b) => b.score - a.score);
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Maximal Marginal Relevance rerank. Greedily selects up to `topK` candidates,
|
|
33
|
+
* each step maximising `λ·sim(query, d) − (1−λ)·max sim(d, already-selected)`.
|
|
34
|
+
* λ=1 is pure relevance; lower λ injects diversity. Candidates without vectors
|
|
35
|
+
* should be filtered out by the caller (they cannot be MMR-scored).
|
|
36
|
+
*/
|
|
37
|
+
export function maximalMarginalRelevance(queryVec, candidates, topK, lambda = 0.7) {
|
|
38
|
+
const remaining = [...candidates];
|
|
39
|
+
const selected = [];
|
|
40
|
+
const relevance = new Map();
|
|
41
|
+
for (const c of remaining)
|
|
42
|
+
relevance.set(c.id, cosineSimilarity(queryVec, c.vector));
|
|
43
|
+
while (selected.length < topK && remaining.length > 0) {
|
|
44
|
+
let bestIdx = 0;
|
|
45
|
+
let bestScore = -Infinity;
|
|
46
|
+
for (let i = 0; i < remaining.length; i++) {
|
|
47
|
+
const c = remaining[i];
|
|
48
|
+
let maxSimToSelected = 0;
|
|
49
|
+
for (const s of selected) {
|
|
50
|
+
const sim = cosineSimilarity(c.vector, s.vector);
|
|
51
|
+
if (sim > maxSimToSelected)
|
|
52
|
+
maxSimToSelected = sim;
|
|
53
|
+
}
|
|
54
|
+
const mmr = lambda * relevance.get(c.id) - (1 - lambda) * maxSimToSelected;
|
|
55
|
+
if (mmr > bestScore) {
|
|
56
|
+
bestScore = mmr;
|
|
57
|
+
bestIdx = i;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
selected.push(remaining.splice(bestIdx, 1)[0]);
|
|
61
|
+
}
|
|
62
|
+
return selected.map(s => s.id);
|
|
63
|
+
}
|
|
64
|
+
//# sourceMappingURL=fusion.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fusion.js","sourceRoot":"","sources":["../../src/retrieval/fusion.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAc1D;;;;GAIG;AACH,MAAM,UAAU,oBAAoB,CAAC,KAAmB,EAAE,CAAC,GAAG,EAAE;IAC5D,MAAM,GAAG,GAAG,IAAI,GAAG,EAAkB,CAAC;IACtC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACvB,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,CAAC,CAAC;QAChC,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,IAAI,EAAE,EAAE;YAC1B,GAAG,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,GAAG,MAAM,GAAG,CAAC,CAAC,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC;QAC9D,CAAC,CAAC,CAAC;IACP,CAAC;IACD,OAAO,CAAC,GAAG,GAAG,CAAC,OAAO,EAAE,CAAC;SACpB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC;SACrC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;AAC3C,CAAC;AAOD;;;;;GAKG;AACH,MAAM,UAAU,wBAAwB,CACpC,QAAsB,EACtB,UAA0B,EAC1B,IAAY,EACZ,MAAM,GAAG,GAAG;IAEZ,MAAM,SAAS,GAAG,CAAC,GAAG,UAAU,CAAC,CAAC;IAClC,MAAM,QAAQ,GAAmB,EAAE,CAAC;IACpC,MAAM,SAAS,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC5C,KAAK,MAAM,CAAC,IAAI,SAAS;QAAE,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,gBAAgB,CAAC,QAAQ,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;IAErF,OAAO,QAAQ,CAAC,MAAM,GAAG,IAAI,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACpD,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,IAAI,SAAS,GAAG,CAAC,QAAQ,CAAC;QAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,MAAM,CAAC,GAAG,SAAS,CAAC,CAAC,CAAE,CAAC;YACxB,IAAI,gBAAgB,GAAG,CAAC,CAAC;YACzB,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;gBACvB,MAAM,GAAG,GAAG,gBAAgB,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC;gBACjD,IAAI,GAAG,GAAG,gBAAgB;oBAAE,gBAAgB,GAAG,GAAG,CAAC;YACvD,CAAC;YACD,MAAM,GAAG,GAAG,MAAM,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAE,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,gBAAgB,CAAC;YAC5E,IAAI,GAAG,GAAG,SAAS,EAAE,CAAC;gBAAC,SAAS,GAAG,GAAG,CAAC;gBAAC,OAAO,GAAG,CAAC,CAAC;YAAC,CAAC;QAC1D,CAAC;QACD,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC;IACpD,CAAC;IACD,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;AACnC,CAAC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Retrieval layer — chunking, BM25, rank fusion, and the HybridRetriever.
|
|
3
|
+
*
|
|
4
|
+
* The classic RAG pieces the memory stack previously lacked (chunking, hybrid
|
|
5
|
+
* dense+sparse search, reranking), implemented zero-dependency so they run in the
|
|
6
|
+
* browser, Node, and the SSM runtime alike.
|
|
7
|
+
*/
|
|
8
|
+
export { chunkText } from './chunk.js';
|
|
9
|
+
export type { Chunk, ChunkOptions } from './chunk.js';
|
|
10
|
+
export { bm25Search } from './bm25.js';
|
|
11
|
+
export type { Bm25Doc, Bm25Hit, Bm25Options } from './bm25.js';
|
|
12
|
+
export { reciprocalRankFusion, maximalMarginalRelevance } from './fusion.js';
|
|
13
|
+
export type { RankedList, FusedHit, MmrCandidate } from './fusion.js';
|
|
14
|
+
export { hybridRetrieve } from './HybridRetriever.js';
|
|
15
|
+
export type { RetrievalCandidate, HybridQuery, HybridRetrieveOptions, HybridHit, } from './HybridRetriever.js';
|
|
16
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/retrieval/index.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AACvC,YAAY,EAAE,KAAK,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAEtD,OAAO,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AACvC,YAAY,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAE/D,OAAO,EAAE,oBAAoB,EAAE,wBAAwB,EAAE,MAAM,aAAa,CAAC;AAC7E,YAAY,EAAE,UAAU,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAEtE,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,YAAY,EACR,kBAAkB,EAClB,WAAW,EACX,qBAAqB,EACrB,SAAS,GACZ,MAAM,sBAAsB,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Retrieval layer — chunking, BM25, rank fusion, and the HybridRetriever.
|
|
3
|
+
*
|
|
4
|
+
* The classic RAG pieces the memory stack previously lacked (chunking, hybrid
|
|
5
|
+
* dense+sparse search, reranking), implemented zero-dependency so they run in the
|
|
6
|
+
* browser, Node, and the SSM runtime alike.
|
|
7
|
+
*/
|
|
8
|
+
export { chunkText } from './chunk.js';
|
|
9
|
+
export { bm25Search } from './bm25.js';
|
|
10
|
+
export { reciprocalRankFusion, maximalMarginalRelevance } from './fusion.js';
|
|
11
|
+
export { hybridRetrieve } from './HybridRetriever.js';
|
|
12
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/retrieval/index.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAGvC,OAAO,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAGvC,OAAO,EAAE,oBAAoB,EAAE,wBAAwB,EAAE,MAAM,aAAa,CAAC;AAG7E,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@seanhogg/builderforce-memory",
|
|
3
|
-
"version": "2026.6.
|
|
4
|
-
"description": "BuilderForce Agent Memory — runtime layer. SSM execution, Transformer orchestration, online distillation, and persistent agent memory for BuilderForce.ai agents.",
|
|
3
|
+
"version": "2026.6.28",
|
|
4
|
+
"description": "BuilderForce Agent Memory — runtime layer. SSM execution, Transformer orchestration, online distillation, hybrid RAG retrieval, and persistent agent memory for BuilderForce.ai agents.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|
|
7
7
|
"types": "dist/index.d.ts",
|
|
@@ -9,6 +9,10 @@
|
|
|
9
9
|
".": {
|
|
10
10
|
"types": "./dist/index.d.ts",
|
|
11
11
|
"import": "./dist/index.js"
|
|
12
|
+
},
|
|
13
|
+
"./retrieval": {
|
|
14
|
+
"types": "./dist/retrieval/index.d.ts",
|
|
15
|
+
"import": "./dist/retrieval/index.js"
|
|
12
16
|
}
|
|
13
17
|
},
|
|
14
18
|
"type": "module",
|
|
@@ -49,7 +53,7 @@
|
|
|
49
53
|
},
|
|
50
54
|
"homepage": "https://github.com/SeanHogg/builderforce-memory/tree/main/packages/memory#readme",
|
|
51
55
|
"peerDependencies": {
|
|
52
|
-
"@seanhogg/builderforce-memory-engine": "^2026.6.
|
|
56
|
+
"@seanhogg/builderforce-memory-engine": "^2026.6.28"
|
|
53
57
|
},
|
|
54
58
|
"devDependencies": {
|
|
55
59
|
"@jest/globals": "^29.7.0",
|
|
@@ -62,7 +66,7 @@
|
|
|
62
66
|
"jest": "^29.7.0",
|
|
63
67
|
"ts-jest": "^29.2.0",
|
|
64
68
|
"typescript": "^5.0.0",
|
|
65
|
-
"@seanhogg/builderforce-memory-engine": "2026.6.
|
|
69
|
+
"@seanhogg/builderforce-memory-engine": "2026.6.28"
|
|
66
70
|
},
|
|
67
71
|
"jest": {
|
|
68
72
|
"preset": "ts-jest/presets/default-esm",
|
package/src/index.ts
CHANGED
|
@@ -68,6 +68,31 @@ export type {
|
|
|
68
68
|
Region,
|
|
69
69
|
} from '@seanhogg/builderforce-memory-engine';
|
|
70
70
|
|
|
71
|
+
// ── Mixture-of-Experts (shared-expert hybrid — the Evermind generator's sparsity) ──
|
|
72
|
+
// Re-exported from the engine so consumers reach it from @seanhogg/builderforce-memory.
|
|
73
|
+
export {
|
|
74
|
+
SharedExpertMoE,
|
|
75
|
+
LoadBalanceAccumulator,
|
|
76
|
+
DEFAULT_MOE_CONFIG,
|
|
77
|
+
DEFAULT_MOE_SEED,
|
|
78
|
+
MoETrainer,
|
|
79
|
+
EvermindModelPackage,
|
|
80
|
+
} from '@seanhogg/builderforce-memory-engine';
|
|
81
|
+
export type {
|
|
82
|
+
MoEConfig,
|
|
83
|
+
MoEParam,
|
|
84
|
+
RouteResult,
|
|
85
|
+
MoESample,
|
|
86
|
+
MoETrainOptions,
|
|
87
|
+
MoEEpochResult,
|
|
88
|
+
EvermindModelManifest,
|
|
89
|
+
EvermindModelCard,
|
|
90
|
+
PackageMeta,
|
|
91
|
+
ValidationResult,
|
|
92
|
+
} from '@seanhogg/builderforce-memory-engine';
|
|
93
|
+
export { EvermindLM, EvermindLMTrainer } from '@seanhogg/builderforce-memory-engine';
|
|
94
|
+
export type { EvermindLMConfig, LMGenerateOptions, TextCodec } from '@seanhogg/builderforce-memory-engine';
|
|
95
|
+
|
|
71
96
|
// ── Runtime ───────────────────────────────────────────────────────────────────
|
|
72
97
|
export { SSMRuntime } from './runtime/SSMRuntime.js';
|
|
73
98
|
export type { SSMRuntimeOptions, GenerateOptions } from './runtime/SSMRuntime.js';
|
|
@@ -101,6 +126,29 @@ export type {
|
|
|
101
126
|
// ── Similarity primitives ──────────────────────────────────────────────────────
|
|
102
127
|
export { cosineSimilarity, jaccardSimilarity, tokenize } from './similarity/index.js';
|
|
103
128
|
|
|
129
|
+
// ── Retrieval (chunking, BM25, rank fusion, hybrid RAG) ────────────────────────
|
|
130
|
+
export {
|
|
131
|
+
chunkText,
|
|
132
|
+
bm25Search,
|
|
133
|
+
reciprocalRankFusion,
|
|
134
|
+
maximalMarginalRelevance,
|
|
135
|
+
hybridRetrieve,
|
|
136
|
+
} from './retrieval/index.js';
|
|
137
|
+
export type {
|
|
138
|
+
Chunk,
|
|
139
|
+
ChunkOptions,
|
|
140
|
+
Bm25Doc,
|
|
141
|
+
Bm25Hit,
|
|
142
|
+
Bm25Options,
|
|
143
|
+
RankedList,
|
|
144
|
+
FusedHit,
|
|
145
|
+
MmrCandidate,
|
|
146
|
+
RetrievalCandidate,
|
|
147
|
+
HybridQuery,
|
|
148
|
+
HybridRetrieveOptions,
|
|
149
|
+
HybridHit,
|
|
150
|
+
} from './retrieval/index.js';
|
|
151
|
+
|
|
104
152
|
// ── Router ────────────────────────────────────────────────────────────────────
|
|
105
153
|
export { InferenceRouter } from './router/InferenceRouter.js';
|
|
106
154
|
export type {
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
|
|
11
11
|
import { SSMError } from '../errors/SSMError.js';
|
|
12
12
|
import { tokenize, jaccardSimilarity, cosineSimilarity } from '../similarity/index.js';
|
|
13
|
+
import { hybridRetrieve, type RetrievalCandidate, type HybridRetrieveOptions } from '../retrieval/index.js';
|
|
13
14
|
|
|
14
15
|
export type FactType = 'text' | 'json' | 'number' | 'boolean';
|
|
15
16
|
|
|
@@ -265,6 +266,41 @@ export class MemoryStore {
|
|
|
265
266
|
return scored.slice(0, topK).map(s => s.entry);
|
|
266
267
|
}
|
|
267
268
|
|
|
269
|
+
/**
|
|
270
|
+
* Hybrid recall: fuses dense (SSM-embedding cosine) and sparse (BM25 lexical)
|
|
271
|
+
* rankings via Reciprocal Rank Fusion, then applies an MMR diversity rerank.
|
|
272
|
+
*
|
|
273
|
+
* This is the production RAG retrieval path — it catches both semantic matches
|
|
274
|
+
* (embeddings) and exact-token matches (BM25 — identifiers, codes, rare names)
|
|
275
|
+
* that cosine-only `recallSimilar` misses, and avoids returning near-duplicate
|
|
276
|
+
* facts. Degrades to BM25-only when no embedding-capable runtime is available,
|
|
277
|
+
* so it is always strictly at least as good as the lexical fallback.
|
|
278
|
+
*/
|
|
279
|
+
async recallHybrid(
|
|
280
|
+
query: string,
|
|
281
|
+
topK: number,
|
|
282
|
+
runtime?: SSMRuntimeRef,
|
|
283
|
+
opts?: HybridRetrieveOptions,
|
|
284
|
+
): Promise<MemoryEntry[]> {
|
|
285
|
+
const all = await this.recallAll();
|
|
286
|
+
if (all.length === 0) return [];
|
|
287
|
+
|
|
288
|
+
// Embed candidates + query where a runtime is available; null vectors are
|
|
289
|
+
// fine — hybridRetrieve degrades that candidate to BM25-only.
|
|
290
|
+
const canEmbed = runtime != null && typeof runtime.embed === 'function';
|
|
291
|
+
const queryVec = canEmbed ? (await this._embedWithCache(runtime, query)) ?? undefined : undefined;
|
|
292
|
+
|
|
293
|
+
const candidates: RetrievalCandidate[] = [];
|
|
294
|
+
for (const entry of all) {
|
|
295
|
+
const vector = canEmbed ? (await this._embedWithCache(runtime, entry.content)) ?? undefined : undefined;
|
|
296
|
+
candidates.push({ id: entry.key, text: entry.content, vector });
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
const hits = hybridRetrieve({ text: query, vector: queryVec }, candidates, { topK, ...opts });
|
|
300
|
+
const byKey = new Map(all.map(e => [e.key, e]));
|
|
301
|
+
return hits.map(h => byKey.get(h.id)).filter((e): e is MemoryEntry => !!e);
|
|
302
|
+
}
|
|
303
|
+
|
|
268
304
|
/**
|
|
269
305
|
* Returns a cached embedding for `text`, computing it via `runtime.embed()`
|
|
270
306
|
* on a cache miss. Returns `null` (never throws) when embedding is
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HybridRetriever — dense + sparse retrieval with rank fusion and diversity rerank.
|
|
3
|
+
*
|
|
4
|
+
* This is the piece that takes the memory layer from "cosine-only similarity" to a
|
|
5
|
+
* full hybrid RAG retriever:
|
|
6
|
+
*
|
|
7
|
+
* 1. Dense: cosine over embeddings (SSM hidden-state vectors, or any embedder).
|
|
8
|
+
* 2. Sparse: BM25 lexical scoring (catches exact tokens dense search misses).
|
|
9
|
+
* 3. Fuse: Reciprocal Rank Fusion combines the two rankings.
|
|
10
|
+
* 4. Rerank: optional MMR pass for relevance/novelty trade-off (diversity).
|
|
11
|
+
*
|
|
12
|
+
* It is storage-agnostic — give it candidates (id + text + optional vector) and a
|
|
13
|
+
* query (text + optional vector). It degrades gracefully: no query vector / no
|
|
14
|
+
* candidate vectors → BM25-only; no overlap → dense-only.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { cosineSimilarity } from '../similarity/index.js';
|
|
18
|
+
import { bm25Search, type Bm25Options } from './bm25.js';
|
|
19
|
+
import { reciprocalRankFusion, maximalMarginalRelevance, type MmrCandidate } from './fusion.js';
|
|
20
|
+
|
|
21
|
+
export interface RetrievalCandidate {
|
|
22
|
+
id: string;
|
|
23
|
+
text: string;
|
|
24
|
+
/** Precomputed embedding. Omit to exclude this candidate from the dense pass. */
|
|
25
|
+
vector?: Float32Array;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface HybridQuery {
|
|
29
|
+
text: string;
|
|
30
|
+
/** Query embedding. Omit for BM25-only retrieval. */
|
|
31
|
+
vector?: Float32Array;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export interface HybridRetrieveOptions {
|
|
35
|
+
/** Number of results to return. Default 5. */
|
|
36
|
+
topK?: number;
|
|
37
|
+
/** RRF damping constant. Default 60. */
|
|
38
|
+
rrfK?: number;
|
|
39
|
+
/** Relative weight of the dense ranking in fusion. Default 1. */
|
|
40
|
+
denseWeight?: number;
|
|
41
|
+
/** Relative weight of the sparse (BM25) ranking in fusion. Default 1. */
|
|
42
|
+
sparseWeight?: number;
|
|
43
|
+
/** Apply MMR diversity rerank over the fused top results. Default true. */
|
|
44
|
+
rerank?: boolean;
|
|
45
|
+
/** MMR relevance/diversity trade-off (1 = pure relevance). Default 0.7. */
|
|
46
|
+
mmrLambda?: number;
|
|
47
|
+
/** BM25 tuning. */
|
|
48
|
+
bm25?: Bm25Options;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export interface HybridHit {
|
|
52
|
+
id: string;
|
|
53
|
+
text: string;
|
|
54
|
+
/** Fused RRF score (pre-rerank). */
|
|
55
|
+
score: number;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Runs the full hybrid pipeline over `candidates` and returns the top-K hits.
|
|
60
|
+
* Pure given its inputs (embeddings are supplied by the caller) so it is directly
|
|
61
|
+
* unit-testable without a model or vector DB.
|
|
62
|
+
*/
|
|
63
|
+
export function hybridRetrieve(
|
|
64
|
+
query: HybridQuery,
|
|
65
|
+
candidates: RetrievalCandidate[],
|
|
66
|
+
opts: HybridRetrieveOptions = {},
|
|
67
|
+
): HybridHit[] {
|
|
68
|
+
const topK = opts.topK ?? 5;
|
|
69
|
+
if (candidates.length === 0) return [];
|
|
70
|
+
|
|
71
|
+
const byId = new Map(candidates.map(c => [c.id, c]));
|
|
72
|
+
|
|
73
|
+
// ── Dense ranking (cosine) ────────────────────────────────────────────────
|
|
74
|
+
let denseIds: string[] = [];
|
|
75
|
+
if (query.vector) {
|
|
76
|
+
denseIds = candidates
|
|
77
|
+
.filter(c => c.vector && c.vector.length > 0)
|
|
78
|
+
.map(c => ({ id: c.id, score: cosineSimilarity(query.vector!, c.vector!) }))
|
|
79
|
+
.sort((a, b) => b.score - a.score)
|
|
80
|
+
.map(h => h.id);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// ── Sparse ranking (BM25) ─────────────────────────────────────────────────
|
|
84
|
+
const sparseIds = bm25Search(query.text, candidates, opts.bm25).map(h => h.id);
|
|
85
|
+
|
|
86
|
+
// ── Fuse ──────────────────────────────────────────────────────────────────
|
|
87
|
+
const fused = reciprocalRankFusion(
|
|
88
|
+
[
|
|
89
|
+
{ ids: denseIds, weight: opts.denseWeight ?? 1 },
|
|
90
|
+
{ ids: sparseIds, weight: opts.sparseWeight ?? 1 },
|
|
91
|
+
].filter(l => l.ids.length > 0),
|
|
92
|
+
opts.rrfK ?? 60,
|
|
93
|
+
);
|
|
94
|
+
if (fused.length === 0) return [];
|
|
95
|
+
|
|
96
|
+
// ── Rerank (MMR over fused top, using whatever vectors we have) ────────────
|
|
97
|
+
const rerank = opts.rerank ?? true;
|
|
98
|
+
let orderedIds: string[];
|
|
99
|
+
if (rerank && query.vector) {
|
|
100
|
+
// Consider a generous fused window so MMR has room to diversify.
|
|
101
|
+
const window = fused.slice(0, Math.max(topK * 4, topK));
|
|
102
|
+
const mmrCands: MmrCandidate[] = window
|
|
103
|
+
.map(f => byId.get(f.id))
|
|
104
|
+
.filter((c): c is RetrievalCandidate => !!c && !!c.vector && c.vector.length > 0)
|
|
105
|
+
.map(c => ({ id: c.id, vector: c.vector! }));
|
|
106
|
+
if (mmrCands.length > 0) {
|
|
107
|
+
const reranked = maximalMarginalRelevance(query.vector, mmrCands, topK, opts.mmrLambda ?? 0.7);
|
|
108
|
+
// MMR only ranks the vectored subset; append any remaining fused ids after.
|
|
109
|
+
const seen = new Set(reranked);
|
|
110
|
+
orderedIds = [...reranked, ...fused.map(f => f.id).filter(id => !seen.has(id))];
|
|
111
|
+
} else {
|
|
112
|
+
orderedIds = fused.map(f => f.id);
|
|
113
|
+
}
|
|
114
|
+
} else {
|
|
115
|
+
orderedIds = fused.map(f => f.id);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const fusedScore = new Map(fused.map(f => [f.id, f.score]));
|
|
119
|
+
return orderedIds
|
|
120
|
+
.slice(0, topK)
|
|
121
|
+
.map(id => ({ id, text: byId.get(id)!.text, score: fusedScore.get(id)! }));
|
|
122
|
+
}
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BM25 (Okapi) lexical ranking.
|
|
3
|
+
*
|
|
4
|
+
* The keyword half of hybrid retrieval. Dense vector search matches meaning but
|
|
5
|
+
* misses exact tokens (identifiers, error codes, rare names); BM25 catches those.
|
|
6
|
+
* Fusing the two (see {@link ./fusion}) is what lifts the memory layer from
|
|
7
|
+
* "cosine only" to a hybrid retriever on par with Weaviate-style search.
|
|
8
|
+
*
|
|
9
|
+
* Pure and zero-dependency — reuses the shared `tokenize` from ../similarity.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { tokenize } from '../similarity/index.js';
|
|
13
|
+
|
|
14
|
+
export interface Bm25Options {
|
|
15
|
+
/** Term-frequency saturation. Higher = TF matters more. Default 1.5. */
|
|
16
|
+
k1?: number;
|
|
17
|
+
/** Length normalisation, 0..1. Higher = penalise long docs more. Default 0.75. */
|
|
18
|
+
b?: number;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface Bm25Doc {
|
|
22
|
+
id: string;
|
|
23
|
+
text: string;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export interface Bm25Hit {
|
|
27
|
+
id: string;
|
|
28
|
+
score: number;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Scores every document against `query` with Okapi BM25, returning hits sorted by
|
|
33
|
+
* descending score (documents with no query-term overlap score 0 and are dropped).
|
|
34
|
+
* Builds the index inline — for a recall over a bounded candidate set (the memory
|
|
35
|
+
* store / a vector pre-filter) this is O(N·terms) and needs no persistence.
|
|
36
|
+
*/
|
|
37
|
+
export function bm25Search(query: string, docs: Bm25Doc[], opts: Bm25Options = {}): Bm25Hit[] {
|
|
38
|
+
const k1 = opts.k1 ?? 1.5;
|
|
39
|
+
const b = opts.b ?? 0.75;
|
|
40
|
+
const N = docs.length;
|
|
41
|
+
if (N === 0) return [];
|
|
42
|
+
|
|
43
|
+
const queryTerms = new Set(tokenize(query));
|
|
44
|
+
if (queryTerms.size === 0) return [];
|
|
45
|
+
|
|
46
|
+
// Per-doc term frequencies + document lengths.
|
|
47
|
+
const docTerms: { id: string; tf: Map<string, number>; len: number }[] = [];
|
|
48
|
+
const df = new Map<string, number>();
|
|
49
|
+
let totalLen = 0;
|
|
50
|
+
|
|
51
|
+
for (const doc of docs) {
|
|
52
|
+
const tokens = tokenize(doc.text);
|
|
53
|
+
const tf = new Map<string, number>();
|
|
54
|
+
for (const t of tokens) tf.set(t, (tf.get(t) ?? 0) + 1);
|
|
55
|
+
for (const t of tf.keys()) if (queryTerms.has(t)) df.set(t, (df.get(t) ?? 0) + 1);
|
|
56
|
+
docTerms.push({ id: doc.id, tf, len: tokens.length });
|
|
57
|
+
totalLen += tokens.length;
|
|
58
|
+
}
|
|
59
|
+
const avgdl = totalLen / N || 1;
|
|
60
|
+
|
|
61
|
+
// idf with the +1 smoothing variant (always non-negative).
|
|
62
|
+
const idf = new Map<string, number>();
|
|
63
|
+
for (const term of queryTerms) {
|
|
64
|
+
const n = df.get(term) ?? 0;
|
|
65
|
+
idf.set(term, Math.log(1 + (N - n + 0.5) / (n + 0.5)));
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const hits: Bm25Hit[] = [];
|
|
69
|
+
for (const d of docTerms) {
|
|
70
|
+
let score = 0;
|
|
71
|
+
for (const term of queryTerms) {
|
|
72
|
+
const f = d.tf.get(term);
|
|
73
|
+
if (!f) continue;
|
|
74
|
+
const numer = f * (k1 + 1);
|
|
75
|
+
const denom = f + k1 * (1 - b + b * (d.len / avgdl));
|
|
76
|
+
score += idf.get(term)! * (numer / denom);
|
|
77
|
+
}
|
|
78
|
+
if (score > 0) hits.push({ id: d.id, score });
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
hits.sort((a, b) => b.score - a.score);
|
|
82
|
+
return hits;
|
|
83
|
+
}
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document chunking — recursive character text splitter with overlap.
|
|
3
|
+
*
|
|
4
|
+
* The classic RAG ingestion step the memory layer was missing: large documents
|
|
5
|
+
* are split into smaller, semantically-coherent chunks before embedding so that
|
|
6
|
+
* retrieval returns precise passages rather than whole files. Mirrors the
|
|
7
|
+
* behaviour of LangChain's RecursiveCharacterTextSplitter (split on the largest
|
|
8
|
+
* natural boundary that fits, fall back to finer ones) but is zero-dependency.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
export interface ChunkOptions {
|
|
12
|
+
/** Target maximum chunk size in characters. Default 1000. */
|
|
13
|
+
chunkSize?: number;
|
|
14
|
+
/** Characters of overlap carried from the end of one chunk into the next,
|
|
15
|
+
* preserving context across boundaries. Default 200. Clamped below chunkSize. */
|
|
16
|
+
chunkOverlap?: number;
|
|
17
|
+
/** Separators tried in order, largest natural boundary first. */
|
|
18
|
+
separators?: string[];
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface Chunk {
|
|
22
|
+
/** The chunk text. */
|
|
23
|
+
text: string;
|
|
24
|
+
/** 0-based ordinal of this chunk within its source document. */
|
|
25
|
+
index: number;
|
|
26
|
+
/** Character offset of this chunk's start within the original document. */
|
|
27
|
+
start: number;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const DEFAULT_SEPARATORS = ['\n\n', '\n', '. ', ' ', ''];
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Splits `text` into overlapping chunks no larger than `chunkSize` characters,
|
|
34
|
+
* preferring the largest natural separator that keeps a piece under the limit.
|
|
35
|
+
* Returns `[]` for empty/whitespace input. Deterministic and pure.
|
|
36
|
+
*/
|
|
37
|
+
export function chunkText(text: string, opts: ChunkOptions = {}): Chunk[] {
|
|
38
|
+
const chunkSize = Math.max(1, opts.chunkSize ?? 1000);
|
|
39
|
+
const overlap = Math.min(Math.max(0, opts.chunkOverlap ?? 200), chunkSize - 1);
|
|
40
|
+
const separators = opts.separators ?? DEFAULT_SEPARATORS;
|
|
41
|
+
|
|
42
|
+
const trimmed = text.trim();
|
|
43
|
+
if (trimmed.length === 0) return [];
|
|
44
|
+
if (trimmed.length <= chunkSize) return [{ text: trimmed, index: 0, start: 0 }];
|
|
45
|
+
|
|
46
|
+
const pieces = splitRecursive(trimmed, chunkSize, separators);
|
|
47
|
+
|
|
48
|
+
// Merge adjacent pieces up to chunkSize, then stitch overlap between chunks.
|
|
49
|
+
const chunks: Chunk[] = [];
|
|
50
|
+
let buf = '';
|
|
51
|
+
const flush = () => {
|
|
52
|
+
const t = buf.trim();
|
|
53
|
+
if (t.length > 0) {
|
|
54
|
+
const start = chunks.length === 0 ? 0 : Math.max(0, trimmed.indexOf(t));
|
|
55
|
+
chunks.push({ text: t, index: chunks.length, start });
|
|
56
|
+
}
|
|
57
|
+
buf = '';
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
for (const piece of pieces) {
|
|
61
|
+
if (buf.length + piece.length <= chunkSize) {
|
|
62
|
+
buf += piece;
|
|
63
|
+
} else {
|
|
64
|
+
flush();
|
|
65
|
+
// Carry overlap from the previous chunk's tail.
|
|
66
|
+
const prev = chunks[chunks.length - 1]?.text ?? '';
|
|
67
|
+
buf = (overlap > 0 ? prev.slice(-overlap) : '') + piece;
|
|
68
|
+
// A single piece longer than chunkSize is hard-split.
|
|
69
|
+
while (buf.length > chunkSize) {
|
|
70
|
+
const head = buf.slice(0, chunkSize);
|
|
71
|
+
chunks.push({ text: head.trim(), index: chunks.length, start: 0 });
|
|
72
|
+
buf = (overlap > 0 ? head.slice(-overlap) : '') + buf.slice(chunkSize);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
flush();
|
|
77
|
+
|
|
78
|
+
return chunks.map((c, i) => ({ ...c, index: i }));
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/** Recursively splits text on the first separator that yields sub-chunkSize pieces. */
|
|
82
|
+
function splitRecursive(text: string, chunkSize: number, separators: string[]): string[] {
|
|
83
|
+
/* istanbul ignore next -- defensive base case; callers only recurse on parts > chunkSize */
|
|
84
|
+
if (text.length <= chunkSize) return [text];
|
|
85
|
+
const [sep, ...rest] = separators;
|
|
86
|
+
if (sep === undefined) return [text];
|
|
87
|
+
if (sep === '') {
|
|
88
|
+
// Last resort: hard character split.
|
|
89
|
+
const out: string[] = [];
|
|
90
|
+
for (let i = 0; i < text.length; i += chunkSize) out.push(text.slice(i, i + chunkSize));
|
|
91
|
+
return out;
|
|
92
|
+
}
|
|
93
|
+
const parts = text.split(sep);
|
|
94
|
+
const out: string[] = [];
|
|
95
|
+
for (let i = 0; i < parts.length; i++) {
|
|
96
|
+
const part = i < parts.length - 1 ? parts[i]! + sep : parts[i]!;
|
|
97
|
+
if (part.length > chunkSize) out.push(...splitRecursive(part, chunkSize, rest));
|
|
98
|
+
else out.push(part);
|
|
99
|
+
}
|
|
100
|
+
return out;
|
|
101
|
+
}
|