@seanhogg/builderforce-memory 2026.6.20 → 2026.6.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cognition/EvermindCognition.d.ts +61 -0
- package/dist/cognition/EvermindCognition.d.ts.map +1 -0
- package/dist/cognition/EvermindCognition.js +109 -0
- package/dist/cognition/EvermindCognition.js.map +1 -0
- package/dist/cognition/gatherers.d.ts +22 -0
- package/dist/cognition/gatherers.d.ts.map +1 -0
- package/dist/cognition/gatherers.js +29 -0
- package/dist/cognition/gatherers.js.map +1 -0
- package/dist/cognition/index.d.ts +12 -0
- package/dist/cognition/index.d.ts.map +1 -0
- package/dist/cognition/index.js +9 -0
- package/dist/cognition/index.js.map +1 -0
- package/dist/cognition/types.d.ts +84 -0
- package/dist/cognition/types.d.ts.map +1 -0
- package/dist/cognition/types.js +12 -0
- package/dist/cognition/types.js.map +1 -0
- package/dist/index.d.ts +12 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +13 -0
- package/dist/index.js.map +1 -1
- package/dist/limbic/LimbicSession.d.ts +63 -0
- package/dist/limbic/LimbicSession.d.ts.map +1 -0
- package/dist/limbic/LimbicSession.js +188 -0
- package/dist/limbic/LimbicSession.js.map +1 -0
- package/dist/memory/MemoryStore.d.ts +12 -0
- package/dist/memory/MemoryStore.d.ts.map +1 -1
- package/dist/memory/MemoryStore.js +28 -0
- package/dist/memory/MemoryStore.js.map +1 -1
- package/dist/retrieval/HybridRetriever.d.ts +56 -0
- package/dist/retrieval/HybridRetriever.d.ts.map +1 -0
- package/dist/retrieval/HybridRetriever.js +75 -0
- package/dist/retrieval/HybridRetriever.js.map +1 -0
- package/dist/retrieval/bm25.d.ts +32 -0
- package/dist/retrieval/bm25.d.ts.map +1 -0
- package/dist/retrieval/bm25.js +66 -0
- package/dist/retrieval/bm25.js.map +1 -0
- package/dist/retrieval/chunk.d.ts +33 -0
- package/dist/retrieval/chunk.d.ts.map +1 -0
- package/dist/retrieval/chunk.js +83 -0
- package/dist/retrieval/chunk.js.map +1 -0
- package/dist/retrieval/fusion.d.ts +40 -0
- package/dist/retrieval/fusion.d.ts.map +1 -0
- package/dist/retrieval/fusion.js +64 -0
- package/dist/retrieval/fusion.js.map +1 -0
- package/dist/retrieval/index.d.ts +16 -0
- package/dist/retrieval/index.d.ts.map +1 -0
- package/dist/retrieval/index.js +12 -0
- package/dist/retrieval/index.js.map +1 -0
- package/package.json +8 -4
- package/src/cognition/EvermindCognition.ts +156 -0
- package/src/cognition/gatherers.ts +40 -0
- package/src/cognition/index.ts +20 -0
- package/src/cognition/types.ts +88 -0
- package/src/index.ts +90 -0
- package/src/limbic/LimbicSession.ts +253 -0
- package/src/memory/MemoryStore.ts +36 -0
- package/src/retrieval/HybridRetriever.ts +122 -0
- package/src/retrieval/bm25.ts +83 -0
- package/src/retrieval/chunk.ts +101 -0
- package/src/retrieval/fusion.ts +84 -0
- package/src/retrieval/index.ts +24 -0
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document chunking — recursive character text splitter with overlap.
|
|
3
|
+
*
|
|
4
|
+
* The classic RAG ingestion step the memory layer was missing: large documents
|
|
5
|
+
* are split into smaller, semantically-coherent chunks before embedding so that
|
|
6
|
+
* retrieval returns precise passages rather than whole files. Mirrors the
|
|
7
|
+
* behaviour of LangChain's RecursiveCharacterTextSplitter (split on the largest
|
|
8
|
+
* natural boundary that fits, fall back to finer ones) but is zero-dependency.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
export interface ChunkOptions {
|
|
12
|
+
/** Target maximum chunk size in characters. Default 1000. */
|
|
13
|
+
chunkSize?: number;
|
|
14
|
+
/** Characters of overlap carried from the end of one chunk into the next,
|
|
15
|
+
* preserving context across boundaries. Default 200. Clamped below chunkSize. */
|
|
16
|
+
chunkOverlap?: number;
|
|
17
|
+
/** Separators tried in order, largest natural boundary first. */
|
|
18
|
+
separators?: string[];
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface Chunk {
|
|
22
|
+
/** The chunk text. */
|
|
23
|
+
text: string;
|
|
24
|
+
/** 0-based ordinal of this chunk within its source document. */
|
|
25
|
+
index: number;
|
|
26
|
+
/** Character offset of this chunk's start within the original document. */
|
|
27
|
+
start: number;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const DEFAULT_SEPARATORS = ['\n\n', '\n', '. ', ' ', ''];
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Splits `text` into overlapping chunks no larger than `chunkSize` characters,
|
|
34
|
+
* preferring the largest natural separator that keeps a piece under the limit.
|
|
35
|
+
* Returns `[]` for empty/whitespace input. Deterministic and pure.
|
|
36
|
+
*/
|
|
37
|
+
export function chunkText(text: string, opts: ChunkOptions = {}): Chunk[] {
|
|
38
|
+
const chunkSize = Math.max(1, opts.chunkSize ?? 1000);
|
|
39
|
+
const overlap = Math.min(Math.max(0, opts.chunkOverlap ?? 200), chunkSize - 1);
|
|
40
|
+
const separators = opts.separators ?? DEFAULT_SEPARATORS;
|
|
41
|
+
|
|
42
|
+
const trimmed = text.trim();
|
|
43
|
+
if (trimmed.length === 0) return [];
|
|
44
|
+
if (trimmed.length <= chunkSize) return [{ text: trimmed, index: 0, start: 0 }];
|
|
45
|
+
|
|
46
|
+
const pieces = splitRecursive(trimmed, chunkSize, separators);
|
|
47
|
+
|
|
48
|
+
// Merge adjacent pieces up to chunkSize, then stitch overlap between chunks.
|
|
49
|
+
const chunks: Chunk[] = [];
|
|
50
|
+
let buf = '';
|
|
51
|
+
const flush = () => {
|
|
52
|
+
const t = buf.trim();
|
|
53
|
+
if (t.length > 0) {
|
|
54
|
+
const start = chunks.length === 0 ? 0 : Math.max(0, trimmed.indexOf(t));
|
|
55
|
+
chunks.push({ text: t, index: chunks.length, start });
|
|
56
|
+
}
|
|
57
|
+
buf = '';
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
for (const piece of pieces) {
|
|
61
|
+
if (buf.length + piece.length <= chunkSize) {
|
|
62
|
+
buf += piece;
|
|
63
|
+
} else {
|
|
64
|
+
flush();
|
|
65
|
+
// Carry overlap from the previous chunk's tail.
|
|
66
|
+
const prev = chunks[chunks.length - 1]?.text ?? '';
|
|
67
|
+
buf = (overlap > 0 ? prev.slice(-overlap) : '') + piece;
|
|
68
|
+
// A single piece longer than chunkSize is hard-split.
|
|
69
|
+
while (buf.length > chunkSize) {
|
|
70
|
+
const head = buf.slice(0, chunkSize);
|
|
71
|
+
chunks.push({ text: head.trim(), index: chunks.length, start: 0 });
|
|
72
|
+
buf = (overlap > 0 ? head.slice(-overlap) : '') + buf.slice(chunkSize);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
flush();
|
|
77
|
+
|
|
78
|
+
return chunks.map((c, i) => ({ ...c, index: i }));
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/** Recursively splits text on the first separator that yields sub-chunkSize pieces. */
|
|
82
|
+
function splitRecursive(text: string, chunkSize: number, separators: string[]): string[] {
|
|
83
|
+
/* istanbul ignore next -- defensive base case; callers only recurse on parts > chunkSize */
|
|
84
|
+
if (text.length <= chunkSize) return [text];
|
|
85
|
+
const [sep, ...rest] = separators;
|
|
86
|
+
if (sep === undefined) return [text];
|
|
87
|
+
if (sep === '') {
|
|
88
|
+
// Last resort: hard character split.
|
|
89
|
+
const out: string[] = [];
|
|
90
|
+
for (let i = 0; i < text.length; i += chunkSize) out.push(text.slice(i, i + chunkSize));
|
|
91
|
+
return out;
|
|
92
|
+
}
|
|
93
|
+
const parts = text.split(sep);
|
|
94
|
+
const out: string[] = [];
|
|
95
|
+
for (let i = 0; i < parts.length; i++) {
|
|
96
|
+
const part = i < parts.length - 1 ? parts[i]! + sep : parts[i]!;
|
|
97
|
+
if (part.length > chunkSize) out.push(...splitRecursive(part, chunkSize, rest));
|
|
98
|
+
else out.push(part);
|
|
99
|
+
}
|
|
100
|
+
return out;
|
|
101
|
+
}
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Rank fusion + diversity reranking.
|
|
3
|
+
*
|
|
4
|
+
* • Reciprocal Rank Fusion (RRF) merges the dense (vector) and sparse (BM25)
|
|
5
|
+
* rankings into one list without needing the two score scales to be
|
|
6
|
+
* commensurable — it fuses on RANK, not raw score. This is the standard,
|
|
7
|
+
* parameter-light way to combine hybrid retrieval signals.
|
|
8
|
+
* • Maximal Marginal Relevance (MMR) reranks the fused list to trade off
|
|
9
|
+
* relevance against novelty, so the top-k isn't five near-duplicate chunks.
|
|
10
|
+
*
|
|
11
|
+
* Pure and zero-dependency.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { cosineSimilarity } from '../similarity/index.js';
|
|
15
|
+
|
|
16
|
+
export interface RankedList {
|
|
17
|
+
/** Ordered ids, most relevant first. */
|
|
18
|
+
ids: string[];
|
|
19
|
+
/** Optional weight for this list in the fusion (default 1). */
|
|
20
|
+
weight?: number;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface FusedHit {
|
|
24
|
+
id: string;
|
|
25
|
+
score: number;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Reciprocal Rank Fusion over any number of ranked lists.
|
|
30
|
+
* score(d) = Σ_lists weight / (k + rank(d)). `k` (default 60) damps the
|
|
31
|
+
* contribution of low-ranked items; the canonical TREC value.
|
|
32
|
+
*/
|
|
33
|
+
export function reciprocalRankFusion(lists: RankedList[], k = 60): FusedHit[] {
|
|
34
|
+
const acc = new Map<string, number>();
|
|
35
|
+
for (const list of lists) {
|
|
36
|
+
const weight = list.weight ?? 1;
|
|
37
|
+
list.ids.forEach((id, rank) => {
|
|
38
|
+
acc.set(id, (acc.get(id) ?? 0) + weight / (k + rank + 1));
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
return [...acc.entries()]
|
|
42
|
+
.map(([id, score]) => ({ id, score }))
|
|
43
|
+
.sort((a, b) => b.score - a.score);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export interface MmrCandidate {
|
|
47
|
+
id: string;
|
|
48
|
+
vector: Float32Array;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Maximal Marginal Relevance rerank. Greedily selects up to `topK` candidates,
|
|
53
|
+
* each step maximising `λ·sim(query, d) − (1−λ)·max sim(d, already-selected)`.
|
|
54
|
+
* λ=1 is pure relevance; lower λ injects diversity. Candidates without vectors
|
|
55
|
+
* should be filtered out by the caller (they cannot be MMR-scored).
|
|
56
|
+
*/
|
|
57
|
+
export function maximalMarginalRelevance(
|
|
58
|
+
queryVec: Float32Array,
|
|
59
|
+
candidates: MmrCandidate[],
|
|
60
|
+
topK: number,
|
|
61
|
+
lambda = 0.7,
|
|
62
|
+
): string[] {
|
|
63
|
+
const remaining = [...candidates];
|
|
64
|
+
const selected: MmrCandidate[] = [];
|
|
65
|
+
const relevance = new Map<string, number>();
|
|
66
|
+
for (const c of remaining) relevance.set(c.id, cosineSimilarity(queryVec, c.vector));
|
|
67
|
+
|
|
68
|
+
while (selected.length < topK && remaining.length > 0) {
|
|
69
|
+
let bestIdx = 0;
|
|
70
|
+
let bestScore = -Infinity;
|
|
71
|
+
for (let i = 0; i < remaining.length; i++) {
|
|
72
|
+
const c = remaining[i]!;
|
|
73
|
+
let maxSimToSelected = 0;
|
|
74
|
+
for (const s of selected) {
|
|
75
|
+
const sim = cosineSimilarity(c.vector, s.vector);
|
|
76
|
+
if (sim > maxSimToSelected) maxSimToSelected = sim;
|
|
77
|
+
}
|
|
78
|
+
const mmr = lambda * relevance.get(c.id)! - (1 - lambda) * maxSimToSelected;
|
|
79
|
+
if (mmr > bestScore) { bestScore = mmr; bestIdx = i; }
|
|
80
|
+
}
|
|
81
|
+
selected.push(remaining.splice(bestIdx, 1)[0]!);
|
|
82
|
+
}
|
|
83
|
+
return selected.map(s => s.id);
|
|
84
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Retrieval layer — chunking, BM25, rank fusion, and the HybridRetriever.
|
|
3
|
+
*
|
|
4
|
+
* The classic RAG pieces the memory stack previously lacked (chunking, hybrid
|
|
5
|
+
* dense+sparse search, reranking), implemented zero-dependency so they run in the
|
|
6
|
+
* browser, Node, and the SSM runtime alike.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
export { chunkText } from './chunk.js';
|
|
10
|
+
export type { Chunk, ChunkOptions } from './chunk.js';
|
|
11
|
+
|
|
12
|
+
export { bm25Search } from './bm25.js';
|
|
13
|
+
export type { Bm25Doc, Bm25Hit, Bm25Options } from './bm25.js';
|
|
14
|
+
|
|
15
|
+
export { reciprocalRankFusion, maximalMarginalRelevance } from './fusion.js';
|
|
16
|
+
export type { RankedList, FusedHit, MmrCandidate } from './fusion.js';
|
|
17
|
+
|
|
18
|
+
export { hybridRetrieve } from './HybridRetriever.js';
|
|
19
|
+
export type {
|
|
20
|
+
RetrievalCandidate,
|
|
21
|
+
HybridQuery,
|
|
22
|
+
HybridRetrieveOptions,
|
|
23
|
+
HybridHit,
|
|
24
|
+
} from './HybridRetriever.js';
|