@seanhogg/builderforce-memory 2026.6.20 → 2026.6.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/dist/cognition/EvermindCognition.d.ts +61 -0
  2. package/dist/cognition/EvermindCognition.d.ts.map +1 -0
  3. package/dist/cognition/EvermindCognition.js +109 -0
  4. package/dist/cognition/EvermindCognition.js.map +1 -0
  5. package/dist/cognition/gatherers.d.ts +22 -0
  6. package/dist/cognition/gatherers.d.ts.map +1 -0
  7. package/dist/cognition/gatherers.js +29 -0
  8. package/dist/cognition/gatherers.js.map +1 -0
  9. package/dist/cognition/index.d.ts +12 -0
  10. package/dist/cognition/index.d.ts.map +1 -0
  11. package/dist/cognition/index.js +9 -0
  12. package/dist/cognition/index.js.map +1 -0
  13. package/dist/cognition/types.d.ts +84 -0
  14. package/dist/cognition/types.d.ts.map +1 -0
  15. package/dist/cognition/types.js +12 -0
  16. package/dist/cognition/types.js.map +1 -0
  17. package/dist/index.d.ts +12 -0
  18. package/dist/index.d.ts.map +1 -1
  19. package/dist/index.js +13 -0
  20. package/dist/index.js.map +1 -1
  21. package/dist/limbic/LimbicSession.d.ts +63 -0
  22. package/dist/limbic/LimbicSession.d.ts.map +1 -0
  23. package/dist/limbic/LimbicSession.js +188 -0
  24. package/dist/limbic/LimbicSession.js.map +1 -0
  25. package/dist/memory/MemoryStore.d.ts +12 -0
  26. package/dist/memory/MemoryStore.d.ts.map +1 -1
  27. package/dist/memory/MemoryStore.js +28 -0
  28. package/dist/memory/MemoryStore.js.map +1 -1
  29. package/dist/retrieval/HybridRetriever.d.ts +56 -0
  30. package/dist/retrieval/HybridRetriever.d.ts.map +1 -0
  31. package/dist/retrieval/HybridRetriever.js +75 -0
  32. package/dist/retrieval/HybridRetriever.js.map +1 -0
  33. package/dist/retrieval/bm25.d.ts +32 -0
  34. package/dist/retrieval/bm25.d.ts.map +1 -0
  35. package/dist/retrieval/bm25.js +66 -0
  36. package/dist/retrieval/bm25.js.map +1 -0
  37. package/dist/retrieval/chunk.d.ts +33 -0
  38. package/dist/retrieval/chunk.d.ts.map +1 -0
  39. package/dist/retrieval/chunk.js +83 -0
  40. package/dist/retrieval/chunk.js.map +1 -0
  41. package/dist/retrieval/fusion.d.ts +40 -0
  42. package/dist/retrieval/fusion.d.ts.map +1 -0
  43. package/dist/retrieval/fusion.js +64 -0
  44. package/dist/retrieval/fusion.js.map +1 -0
  45. package/dist/retrieval/index.d.ts +16 -0
  46. package/dist/retrieval/index.d.ts.map +1 -0
  47. package/dist/retrieval/index.js +12 -0
  48. package/dist/retrieval/index.js.map +1 -0
  49. package/package.json +8 -4
  50. package/src/cognition/EvermindCognition.ts +156 -0
  51. package/src/cognition/gatherers.ts +40 -0
  52. package/src/cognition/index.ts +20 -0
  53. package/src/cognition/types.ts +88 -0
  54. package/src/index.ts +90 -0
  55. package/src/limbic/LimbicSession.ts +253 -0
  56. package/src/memory/MemoryStore.ts +36 -0
  57. package/src/retrieval/HybridRetriever.ts +122 -0
  58. package/src/retrieval/bm25.ts +83 -0
  59. package/src/retrieval/chunk.ts +101 -0
  60. package/src/retrieval/fusion.ts +84 -0
  61. package/src/retrieval/index.ts +24 -0
package/src/index.ts CHANGED
@@ -40,6 +40,59 @@ export type {
40
40
  Tokenizer,
41
41
  } from './session/index.js';
42
42
 
43
+ // ── Limbic system (trainable affective dynamics) ──────────────────────────────
44
+ export { LimbicSession } from './limbic/LimbicSession.js';
45
+ export type { LimbicSessionOptions, LimbicGpuMode } from './limbic/LimbicSession.js';
46
+ // Re-export the limbic engine primitives so consumers can use the model/trainer
47
+ // and the region schema directly from @seanhogg/builderforce-memory.
48
+ export {
49
+ LimbicModel,
50
+ LimbicTrainer,
51
+ LIMBIC_DIM,
52
+ LIMBIC_DIM_NAMES,
53
+ LIMBIC_STATE_DIM,
54
+ LIMBIC_BOUNDS,
55
+ NEUTRAL_STATE,
56
+ REGION,
57
+ clampState,
58
+ neutralState,
59
+ stateToRecord,
60
+ recordToState,
61
+ } from '@seanhogg/builderforce-memory-engine';
62
+ export type {
63
+ LimbicModelConfig,
64
+ LimbicForward,
65
+ LimbicSample,
66
+ LimbicTrainOptions,
67
+ LimbicDimName,
68
+ Region,
69
+ } from '@seanhogg/builderforce-memory-engine';
70
+
71
+ // ── Mixture-of-Experts (shared-expert hybrid — the Evermind generator's sparsity) ──
72
+ // Re-exported from the engine so consumers reach it from @seanhogg/builderforce-memory.
73
+ export {
74
+ SharedExpertMoE,
75
+ LoadBalanceAccumulator,
76
+ DEFAULT_MOE_CONFIG,
77
+ DEFAULT_MOE_SEED,
78
+ MoETrainer,
79
+ EvermindModelPackage,
80
+ } from '@seanhogg/builderforce-memory-engine';
81
+ export type {
82
+ MoEConfig,
83
+ MoEParam,
84
+ RouteResult,
85
+ MoESample,
86
+ MoETrainOptions,
87
+ MoEEpochResult,
88
+ EvermindModelManifest,
89
+ EvermindModelCard,
90
+ PackageMeta,
91
+ ValidationResult,
92
+ } from '@seanhogg/builderforce-memory-engine';
93
+ export { EvermindLM, EvermindLMTrainer } from '@seanhogg/builderforce-memory-engine';
94
+ export type { EvermindLMConfig, LMGenerateOptions, TextCodec } from '@seanhogg/builderforce-memory-engine';
95
+
43
96
  // ── Runtime ───────────────────────────────────────────────────────────────────
44
97
  export { SSMRuntime } from './runtime/SSMRuntime.js';
45
98
  export type { SSMRuntimeOptions, GenerateOptions } from './runtime/SSMRuntime.js';
@@ -73,6 +126,29 @@ export type {
73
126
  // ── Similarity primitives ──────────────────────────────────────────────────────
74
127
  export { cosineSimilarity, jaccardSimilarity, tokenize } from './similarity/index.js';
75
128
 
129
+ // ── Retrieval (chunking, BM25, rank fusion, hybrid RAG) ────────────────────────
130
+ export {
131
+ chunkText,
132
+ bm25Search,
133
+ reciprocalRankFusion,
134
+ maximalMarginalRelevance,
135
+ hybridRetrieve,
136
+ } from './retrieval/index.js';
137
+ export type {
138
+ Chunk,
139
+ ChunkOptions,
140
+ Bm25Doc,
141
+ Bm25Hit,
142
+ Bm25Options,
143
+ RankedList,
144
+ FusedHit,
145
+ MmrCandidate,
146
+ RetrievalCandidate,
147
+ HybridQuery,
148
+ HybridRetrieveOptions,
149
+ HybridHit,
150
+ } from './retrieval/index.js';
151
+
76
152
  // ── Router ────────────────────────────────────────────────────────────────────
77
153
  export { InferenceRouter } from './router/InferenceRouter.js';
78
154
  export type {
@@ -92,6 +168,20 @@ export type {
92
168
  FactType,
93
169
  } from './memory/MemoryStore.js';
94
170
 
171
+ // ── Cognition (Evermind — Write-Through Cognition) ────────────────────────────
172
+ export { EvermindCognition, workspacePresenceGatherer } from './cognition/index.js';
173
+ export type {
174
+ EvermindCognitionOptions,
175
+ WorkspacePresenceRule,
176
+ Claim,
177
+ CognitionFactStore,
178
+ CommitResult,
179
+ EvidenceContext,
180
+ EvidenceGatherer,
181
+ EvidenceResult,
182
+ Verdict,
183
+ } from './cognition/index.js';
184
+
95
185
  // ── Distillation ──────────────────────────────────────────────────────────────
96
186
  export { DistillationEngine } from './distillation/DistillationEngine.js';
97
187
  export type {
@@ -0,0 +1,253 @@
1
+ /**
2
+ * LimbicSession.ts – high-level facade over the limbic affect model.
3
+ *
4
+ * Mirrors {@link MambaSession}: collapses GPU acquisition, model construction,
5
+ * checkpoint load, and the trainer into a single `LimbicSession.create()` call,
6
+ * with the same WebGPU-or-CPU-fallback contract. The agent runtime consumes
7
+ * this through `@seanhogg/builderforce-memory` to run the limbic system on a
8
+ * self-hosted node (GPU via @webgpu/node when present, CPU otherwise).
9
+ *
10
+ * const limbic = await LimbicSession.create({ gpuAdapter, checkpointBuffer });
11
+ * const { delta, reward } = await limbic.step(experienceEmbedding, state);
12
+ * await limbic.train(samples, { epochs: 30 });
13
+ * const bin = limbic.exportWeights({ fp16: true });
14
+ */
15
+
16
+ import {
17
+ LimbicModel,
18
+ LimbicTrainer,
19
+ LIMBIC_AFFECT_WGSL,
20
+ createStorageBuffer,
21
+ createEmptyStorageBuffer,
22
+ createUniformBuffer,
23
+ createComputePipeline,
24
+ createBindGroup,
25
+ dispatchKernel,
26
+ readBuffer,
27
+ type LimbicModelConfig,
28
+ type LimbicForward,
29
+ type LimbicSample,
30
+ type LimbicTrainOptions,
31
+ } from "@seanhogg/builderforce-memory-engine";
32
+
33
+ import { saveToIndexedDB, loadFromIndexedDB } from "../session/persistence.js";
34
+
35
+ export type LimbicGpuMode = "webgpu" | "cpu-fallback" | "cpu";
36
+
37
+ export interface LimbicSessionOptions {
38
+ /** Pre-created GPUAdapter (e.g. from @webgpu/node). When set, navigator.gpu is not used. */
39
+ gpuAdapter?: GPUAdapter;
40
+ /** Attempt a software (CPU) WebGPU adapter when no GPU is available. Default false. */
41
+ allowCpuFallback?: boolean;
42
+ /** Pre-read checkpoint bytes (Node: read the .bin with fs and pass the ArrayBuffer). */
43
+ checkpointBuffer?: ArrayBuffer;
44
+ /** IndexedDB key for save()/load(). Default 'limbic-default'. */
45
+ name?: string;
46
+ /** Injected IDBFactory (e.g. fake-indexeddb in Node). */
47
+ idbFactory?: IDBFactory;
48
+ /** Model configuration overrides. */
49
+ modelConfig?: Partial<LimbicModelConfig>;
50
+ /** Deterministic init seed. */
51
+ seed?: number;
52
+ }
53
+
54
+ interface StepBuffers {
55
+ win: GPUBuffer;
56
+ ws: GPUBuffer;
57
+ aLogit: GPUBuffer;
58
+ woutState: GPUBuffer;
59
+ boutState: GPUBuffer;
60
+ }
61
+
62
+ export class LimbicSession {
63
+ readonly model: LimbicModel;
64
+ readonly trainer: LimbicTrainer;
65
+ readonly device: GPUDevice | null;
66
+ readonly gpuMode: LimbicGpuMode;
67
+ private readonly _name: string;
68
+ private readonly _idbFactory: IDBFactory | undefined;
69
+
70
+ // GPU step pipeline + buffers, allocated lazily on first GPU step.
71
+ private _pipeline: GPUComputePipeline | null = null;
72
+ private _dimsBuf: GPUBuffer | null = null;
73
+ private _paramBufs: StepBuffers | null = null;
74
+ private _paramsDirty = true;
75
+
76
+ private constructor(
77
+ model: LimbicModel,
78
+ trainer: LimbicTrainer,
79
+ device: GPUDevice | null,
80
+ gpuMode: LimbicGpuMode,
81
+ name: string,
82
+ idbFactory: IDBFactory | undefined,
83
+ ) {
84
+ this.model = model;
85
+ this.trainer = trainer;
86
+ this.device = device;
87
+ this.gpuMode = gpuMode;
88
+ this._name = name;
89
+ this._idbFactory = idbFactory;
90
+ }
91
+
92
+ static async create(options: LimbicSessionOptions = {}): Promise<LimbicSession> {
93
+ let device: GPUDevice | null = null;
94
+ let gpuMode: LimbicGpuMode = "cpu";
95
+
96
+ if (options.gpuAdapter != null) {
97
+ try {
98
+ device = await options.gpuAdapter.requestDevice();
99
+ gpuMode = "webgpu";
100
+ } catch {
101
+ device = null;
102
+ gpuMode = "cpu";
103
+ }
104
+ } else if (typeof navigator !== "undefined" && navigator.gpu) {
105
+ try {
106
+ const adapter = await navigator.gpu.requestAdapter({ powerPreference: "high-performance" });
107
+ if (adapter) {
108
+ device = await adapter.requestDevice();
109
+ gpuMode = "webgpu";
110
+ }
111
+ } catch {
112
+ device = null;
113
+ }
114
+ if (!device && options.allowCpuFallback && typeof navigator !== "undefined" && navigator.gpu) {
115
+ try {
116
+ const fb = await navigator.gpu.requestAdapter({ forceFallbackAdapter: true });
117
+ if (fb) {
118
+ device = await fb.requestDevice();
119
+ gpuMode = "cpu-fallback";
120
+ }
121
+ } catch {
122
+ device = null;
123
+ }
124
+ }
125
+ }
126
+
127
+ const model = new LimbicModel({ ...options.modelConfig, seed: options.seed });
128
+ if (options.checkpointBuffer) {
129
+ model.loadWeights(options.checkpointBuffer);
130
+ }
131
+ const trainer = new LimbicTrainer(model, device);
132
+ return new LimbicSession(model, trainer, device, gpuMode, options.name ?? "limbic-default", options.idbFactory);
133
+ }
134
+
135
+ /** One affect step. Uses the GPU kernel when a device is available, else CPU. */
136
+ async step(input: ArrayLike<number>, state: ArrayLike<number>, hidden?: ArrayLike<number>): Promise<LimbicForward> {
137
+ const h = hidden ?? this.model.initHidden();
138
+ if (!this.device) return this.model.forward(input, h, state);
139
+ return this._stepGpu(input, h, state);
140
+ }
141
+
142
+ private _ensureGpuStep(): { pipeline: GPUComputePipeline; params: StepBuffers; dims: GPUBuffer } {
143
+ const device = this.device!;
144
+ if (!this._pipeline) {
145
+ this._pipeline = createComputePipeline(device, LIMBIC_AFFECT_WGSL, "affect_step");
146
+ const { inputDim, hiddenDim, stateDim } = this.model.config;
147
+ const dims = new ArrayBuffer(16);
148
+ new Uint32Array(dims).set([inputDim, hiddenDim, stateDim, 0]);
149
+ this._dimsBuf = createUniformBuffer(device, dims);
150
+ }
151
+ if (this._paramsDirty || !this._paramBufs) {
152
+ this._destroyParamBufs();
153
+ this._paramBufs = {
154
+ win: createStorageBuffer(device, this.model.win, false),
155
+ ws: createStorageBuffer(device, this.model.ws, false),
156
+ aLogit: createStorageBuffer(device, this.model.aLogit, false),
157
+ woutState: createStorageBuffer(device, this.model.woutState, false),
158
+ boutState: createStorageBuffer(device, this.model.boutState, false),
159
+ };
160
+ this._paramsDirty = false;
161
+ }
162
+ return { pipeline: this._pipeline, params: this._paramBufs, dims: this._dimsBuf! };
163
+ }
164
+
165
+ private async _stepGpu(input: ArrayLike<number>, hPrev: ArrayLike<number>, sPrev: ArrayLike<number>): Promise<LimbicForward> {
166
+ const device = this.device!;
167
+ const { inputDim, hiddenDim, stateDim } = this.model.config;
168
+ const { pipeline, params, dims } = this._ensureGpuStep();
169
+
170
+ const xBuf = createStorageBuffer(device, Float32Array.from({ length: inputDim }, (_, i) => input[i] ?? 0), false);
171
+ const hBuf = createStorageBuffer(device, Float32Array.from({ length: hiddenDim }, (_, j) => hPrev[j] ?? 0), false);
172
+ const sBuf = createStorageBuffer(device, Float32Array.from({ length: stateDim }, (_, k) => sPrev[k] ?? 0), false);
173
+ const hOut = createEmptyStorageBuffer(device, hiddenDim * 4, true);
174
+ const dOut = createEmptyStorageBuffer(device, stateDim * 4, true);
175
+
176
+ const bg = createBindGroup(device, pipeline, [
177
+ dims,
178
+ params.win,
179
+ params.ws,
180
+ params.aLogit,
181
+ params.woutState,
182
+ params.boutState,
183
+ xBuf,
184
+ hBuf,
185
+ sBuf,
186
+ hOut,
187
+ dOut,
188
+ ]);
189
+ dispatchKernel(device, pipeline, bg, [1, 1, 1]);
190
+
191
+ const hidden = (await readBuffer(device, hOut, hiddenDim * 4)).subarray(0, hiddenDim);
192
+ const delta = (await readBuffer(device, dOut, stateDim * 4)).subarray(0, stateDim);
193
+
194
+ // Reward head is small — compute on CPU from the GPU-produced hidden state.
195
+ let reward = this.model.boutReward[0]!;
196
+ for (let j = 0; j < hiddenDim; j++) reward += this.model.woutReward[j]! * hidden[j]!;
197
+
198
+ xBuf.destroy();
199
+ hBuf.destroy();
200
+ sBuf.destroy();
201
+ hOut.destroy();
202
+ dOut.destroy();
203
+
204
+ return { hidden: Float32Array.from(hidden), delta: Float32Array.from(delta), reward };
205
+ }
206
+
207
+ /** Train the affect model on observed experiences. Marks GPU step buffers dirty. */
208
+ async train(samples: LimbicSample[], opts?: LimbicTrainOptions): Promise<number[]> {
209
+ const losses = await this.trainer.train(samples, opts);
210
+ this._paramsDirty = true; // weights changed → GPU step buffers must be re-uploaded
211
+ return losses;
212
+ }
213
+
214
+ evaluate(samples: LimbicSample[]): number {
215
+ return this.trainer.evaluate(samples);
216
+ }
217
+
218
+ exportWeights(opts?: { fp16?: boolean }): ArrayBuffer {
219
+ return this.model.exportWeights(opts);
220
+ }
221
+
222
+ /** Persist weights to IndexedDB under the session name. */
223
+ async save(): Promise<void> {
224
+ await saveToIndexedDB(`limbic_${this._name}`, this.model.exportWeights({ fp16: true }), this._idbFactory);
225
+ }
226
+
227
+ /** Load weights from IndexedDB; returns true if a checkpoint was found. */
228
+ async load(): Promise<boolean> {
229
+ const buf = await loadFromIndexedDB(`limbic_${this._name}`, this._idbFactory);
230
+ if (!buf) return false;
231
+ this.model.loadWeights(buf);
232
+ this._paramsDirty = true;
233
+ return true;
234
+ }
235
+
236
+ private _destroyParamBufs(): void {
237
+ if (this._paramBufs) {
238
+ this._paramBufs.win.destroy();
239
+ this._paramBufs.ws.destroy();
240
+ this._paramBufs.aLogit.destroy();
241
+ this._paramBufs.woutState.destroy();
242
+ this._paramBufs.boutState.destroy();
243
+ this._paramBufs = null;
244
+ }
245
+ }
246
+
247
+ destroy(): void {
248
+ this._destroyParamBufs();
249
+ this._dimsBuf?.destroy();
250
+ this._dimsBuf = null;
251
+ this._pipeline = null;
252
+ }
253
+ }
@@ -10,6 +10,7 @@
10
10
 
11
11
  import { SSMError } from '../errors/SSMError.js';
12
12
  import { tokenize, jaccardSimilarity, cosineSimilarity } from '../similarity/index.js';
13
+ import { hybridRetrieve, type RetrievalCandidate, type HybridRetrieveOptions } from '../retrieval/index.js';
13
14
 
14
15
  export type FactType = 'text' | 'json' | 'number' | 'boolean';
15
16
 
@@ -265,6 +266,41 @@ export class MemoryStore {
265
266
  return scored.slice(0, topK).map(s => s.entry);
266
267
  }
267
268
 
269
+ /**
270
+ * Hybrid recall: fuses dense (SSM-embedding cosine) and sparse (BM25 lexical)
271
+ * rankings via Reciprocal Rank Fusion, then applies an MMR diversity rerank.
272
+ *
273
+ * This is the production RAG retrieval path — it catches both semantic matches
274
+ * (embeddings) and exact-token matches (BM25 — identifiers, codes, rare names)
275
+ * that cosine-only `recallSimilar` misses, and avoids returning near-duplicate
276
+ * facts. Degrades to BM25-only when no embedding-capable runtime is available,
277
+ * so it is always strictly at least as good as the lexical fallback.
278
+ */
279
+ async recallHybrid(
280
+ query: string,
281
+ topK: number,
282
+ runtime?: SSMRuntimeRef,
283
+ opts?: HybridRetrieveOptions,
284
+ ): Promise<MemoryEntry[]> {
285
+ const all = await this.recallAll();
286
+ if (all.length === 0) return [];
287
+
288
+ // Embed candidates + query where a runtime is available; null vectors are
289
+ // fine — hybridRetrieve degrades that candidate to BM25-only.
290
+ const canEmbed = runtime != null && typeof runtime.embed === 'function';
291
+ const queryVec = canEmbed ? (await this._embedWithCache(runtime, query)) ?? undefined : undefined;
292
+
293
+ const candidates: RetrievalCandidate[] = [];
294
+ for (const entry of all) {
295
+ const vector = canEmbed ? (await this._embedWithCache(runtime, entry.content)) ?? undefined : undefined;
296
+ candidates.push({ id: entry.key, text: entry.content, vector });
297
+ }
298
+
299
+ const hits = hybridRetrieve({ text: query, vector: queryVec }, candidates, { topK, ...opts });
300
+ const byKey = new Map(all.map(e => [e.key, e]));
301
+ return hits.map(h => byKey.get(h.id)).filter((e): e is MemoryEntry => !!e);
302
+ }
303
+
268
304
  /**
269
305
  * Returns a cached embedding for `text`, computing it via `runtime.embed()`
270
306
  * on a cache miss. Returns `null` (never throws) when embedding is
@@ -0,0 +1,122 @@
1
+ /**
2
+ * HybridRetriever — dense + sparse retrieval with rank fusion and diversity rerank.
3
+ *
4
+ * This is the piece that takes the memory layer from "cosine-only similarity" to a
5
+ * full hybrid RAG retriever:
6
+ *
7
+ * 1. Dense: cosine over embeddings (SSM hidden-state vectors, or any embedder).
8
+ * 2. Sparse: BM25 lexical scoring (catches exact tokens dense search misses).
9
+ * 3. Fuse: Reciprocal Rank Fusion combines the two rankings.
10
+ * 4. Rerank: optional MMR pass for relevance/novelty trade-off (diversity).
11
+ *
12
+ * It is storage-agnostic — give it candidates (id + text + optional vector) and a
13
+ * query (text + optional vector). It degrades gracefully: no query vector / no
14
+ * candidate vectors → BM25-only; no overlap → dense-only.
15
+ */
16
+
17
+ import { cosineSimilarity } from '../similarity/index.js';
18
+ import { bm25Search, type Bm25Options } from './bm25.js';
19
+ import { reciprocalRankFusion, maximalMarginalRelevance, type MmrCandidate } from './fusion.js';
20
+
21
+ export interface RetrievalCandidate {
22
+ id: string;
23
+ text: string;
24
+ /** Precomputed embedding. Omit to exclude this candidate from the dense pass. */
25
+ vector?: Float32Array;
26
+ }
27
+
28
+ export interface HybridQuery {
29
+ text: string;
30
+ /** Query embedding. Omit for BM25-only retrieval. */
31
+ vector?: Float32Array;
32
+ }
33
+
34
+ export interface HybridRetrieveOptions {
35
+ /** Number of results to return. Default 5. */
36
+ topK?: number;
37
+ /** RRF damping constant. Default 60. */
38
+ rrfK?: number;
39
+ /** Relative weight of the dense ranking in fusion. Default 1. */
40
+ denseWeight?: number;
41
+ /** Relative weight of the sparse (BM25) ranking in fusion. Default 1. */
42
+ sparseWeight?: number;
43
+ /** Apply MMR diversity rerank over the fused top results. Default true. */
44
+ rerank?: boolean;
45
+ /** MMR relevance/diversity trade-off (1 = pure relevance). Default 0.7. */
46
+ mmrLambda?: number;
47
+ /** BM25 tuning. */
48
+ bm25?: Bm25Options;
49
+ }
50
+
51
+ export interface HybridHit {
52
+ id: string;
53
+ text: string;
54
+ /** Fused RRF score (pre-rerank). */
55
+ score: number;
56
+ }
57
+
58
+ /**
59
+ * Runs the full hybrid pipeline over `candidates` and returns the top-K hits.
60
+ * Pure given its inputs (embeddings are supplied by the caller) so it is directly
61
+ * unit-testable without a model or vector DB.
62
+ */
63
+ export function hybridRetrieve(
64
+ query: HybridQuery,
65
+ candidates: RetrievalCandidate[],
66
+ opts: HybridRetrieveOptions = {},
67
+ ): HybridHit[] {
68
+ const topK = opts.topK ?? 5;
69
+ if (candidates.length === 0) return [];
70
+
71
+ const byId = new Map(candidates.map(c => [c.id, c]));
72
+
73
+ // ── Dense ranking (cosine) ────────────────────────────────────────────────
74
+ let denseIds: string[] = [];
75
+ if (query.vector) {
76
+ denseIds = candidates
77
+ .filter(c => c.vector && c.vector.length > 0)
78
+ .map(c => ({ id: c.id, score: cosineSimilarity(query.vector!, c.vector!) }))
79
+ .sort((a, b) => b.score - a.score)
80
+ .map(h => h.id);
81
+ }
82
+
83
+ // ── Sparse ranking (BM25) ─────────────────────────────────────────────────
84
+ const sparseIds = bm25Search(query.text, candidates, opts.bm25).map(h => h.id);
85
+
86
+ // ── Fuse ──────────────────────────────────────────────────────────────────
87
+ const fused = reciprocalRankFusion(
88
+ [
89
+ { ids: denseIds, weight: opts.denseWeight ?? 1 },
90
+ { ids: sparseIds, weight: opts.sparseWeight ?? 1 },
91
+ ].filter(l => l.ids.length > 0),
92
+ opts.rrfK ?? 60,
93
+ );
94
+ if (fused.length === 0) return [];
95
+
96
+ // ── Rerank (MMR over fused top, using whatever vectors we have) ────────────
97
+ const rerank = opts.rerank ?? true;
98
+ let orderedIds: string[];
99
+ if (rerank && query.vector) {
100
+ // Consider a generous fused window so MMR has room to diversify.
101
+ const window = fused.slice(0, Math.max(topK * 4, topK));
102
+ const mmrCands: MmrCandidate[] = window
103
+ .map(f => byId.get(f.id))
104
+ .filter((c): c is RetrievalCandidate => !!c && !!c.vector && c.vector.length > 0)
105
+ .map(c => ({ id: c.id, vector: c.vector! }));
106
+ if (mmrCands.length > 0) {
107
+ const reranked = maximalMarginalRelevance(query.vector, mmrCands, topK, opts.mmrLambda ?? 0.7);
108
+ // MMR only ranks the vectored subset; append any remaining fused ids after.
109
+ const seen = new Set(reranked);
110
+ orderedIds = [...reranked, ...fused.map(f => f.id).filter(id => !seen.has(id))];
111
+ } else {
112
+ orderedIds = fused.map(f => f.id);
113
+ }
114
+ } else {
115
+ orderedIds = fused.map(f => f.id);
116
+ }
117
+
118
+ const fusedScore = new Map(fused.map(f => [f.id, f.score]));
119
+ return orderedIds
120
+ .slice(0, topK)
121
+ .map(id => ({ id, text: byId.get(id)!.text, score: fusedScore.get(id)! }));
122
+ }
@@ -0,0 +1,83 @@
1
+ /**
2
+ * BM25 (Okapi) lexical ranking.
3
+ *
4
+ * The keyword half of hybrid retrieval. Dense vector search matches meaning but
5
+ * misses exact tokens (identifiers, error codes, rare names); BM25 catches those.
6
+ * Fusing the two (see {@link ./fusion}) is what lifts the memory layer from
7
+ * "cosine only" to a hybrid retriever on par with Weaviate-style search.
8
+ *
9
+ * Pure and zero-dependency — reuses the shared `tokenize` from ../similarity.
10
+ */
11
+
12
+ import { tokenize } from '../similarity/index.js';
13
+
14
+ export interface Bm25Options {
15
+ /** Term-frequency saturation. Higher = TF matters more. Default 1.5. */
16
+ k1?: number;
17
+ /** Length normalisation, 0..1. Higher = penalise long docs more. Default 0.75. */
18
+ b?: number;
19
+ }
20
+
21
+ export interface Bm25Doc {
22
+ id: string;
23
+ text: string;
24
+ }
25
+
26
+ export interface Bm25Hit {
27
+ id: string;
28
+ score: number;
29
+ }
30
+
31
+ /**
32
+ * Scores every document against `query` with Okapi BM25, returning hits sorted by
33
+ * descending score (documents with no query-term overlap score 0 and are dropped).
34
+ * Builds the index inline — for a recall over a bounded candidate set (the memory
35
+ * store / a vector pre-filter) this is O(N·terms) and needs no persistence.
36
+ */
37
+ export function bm25Search(query: string, docs: Bm25Doc[], opts: Bm25Options = {}): Bm25Hit[] {
38
+ const k1 = opts.k1 ?? 1.5;
39
+ const b = opts.b ?? 0.75;
40
+ const N = docs.length;
41
+ if (N === 0) return [];
42
+
43
+ const queryTerms = new Set(tokenize(query));
44
+ if (queryTerms.size === 0) return [];
45
+
46
+ // Per-doc term frequencies + document lengths.
47
+ const docTerms: { id: string; tf: Map<string, number>; len: number }[] = [];
48
+ const df = new Map<string, number>();
49
+ let totalLen = 0;
50
+
51
+ for (const doc of docs) {
52
+ const tokens = tokenize(doc.text);
53
+ const tf = new Map<string, number>();
54
+ for (const t of tokens) tf.set(t, (tf.get(t) ?? 0) + 1);
55
+ for (const t of tf.keys()) if (queryTerms.has(t)) df.set(t, (df.get(t) ?? 0) + 1);
56
+ docTerms.push({ id: doc.id, tf, len: tokens.length });
57
+ totalLen += tokens.length;
58
+ }
59
+ const avgdl = totalLen / N || 1;
60
+
61
+ // idf with the +1 smoothing variant (always non-negative).
62
+ const idf = new Map<string, number>();
63
+ for (const term of queryTerms) {
64
+ const n = df.get(term) ?? 0;
65
+ idf.set(term, Math.log(1 + (N - n + 0.5) / (n + 0.5)));
66
+ }
67
+
68
+ const hits: Bm25Hit[] = [];
69
+ for (const d of docTerms) {
70
+ let score = 0;
71
+ for (const term of queryTerms) {
72
+ const f = d.tf.get(term);
73
+ if (!f) continue;
74
+ const numer = f * (k1 + 1);
75
+ const denom = f + k1 * (1 - b + b * (d.len / avgdl));
76
+ score += idf.get(term)! * (numer / denom);
77
+ }
78
+ if (score > 0) hits.push({ id: d.id, score });
79
+ }
80
+
81
+ hits.sort((a, b) => b.score - a.score);
82
+ return hits;
83
+ }