ralph-hero-knowledge-index 0.1.19 → 0.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ralph-knowledge",
3
- "version": "0.1.19",
3
+ "version": "0.1.21",
4
4
  "description": "Knowledge graph for ralph-hero: semantic search, relationship traversal, and document indexing across thoughts/ documents. Optional companion to ralph-hero.",
5
5
  "author": {
6
6
  "name": "Chad Dubiel",
package/.mcp.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "mcpServers": {
3
3
  "ralph-knowledge": {
4
4
  "command": "npx",
5
- "args": ["-y", "ralph-hero-knowledge-index@0.1.19"]
5
+ "args": ["-y", "ralph-hero-knowledge-index@0.1.21"]
6
6
  }
7
7
  }
8
8
  }
@@ -0,0 +1,37 @@
1
+ /**
2
+ * RecursiveCharacterTextSplitter-style chunker.
3
+ *
4
+ * Splits long text into overlapping chunks while preserving the original
5
+ * character offsets (charStart, charEnd) so downstream code can reconstruct
6
+ * positions. Mirrors the semantics of LangChain's RecursiveCharacterTextSplitter:
7
+ * tries each separator in order, snapping chunk boundaries to the highest-priority
8
+ * separator that keeps pieces under chunkSize.
9
+ *
10
+ * Defaults correspond to 512-token chunks with 64-token overlap
11
+ * (approx: 1 token ~= 4 chars for English text).
12
+ */
13
+ export interface Chunk {
14
+ index: number;
15
+ content: string;
16
+ charStart: number;
17
+ charEnd: number;
18
+ }
19
+ export interface ChunkerOptions {
20
+ chunkSize?: number;
21
+ chunkOverlap?: number;
22
+ separators?: string[];
23
+ }
24
+ /**
25
+ * Split `text` into overlapping chunks.
26
+ *
27
+ * Semantics:
28
+ * - Empty input -> empty array.
29
+ * - Short input (<= chunkSize) -> single chunk covering the whole text.
30
+ * - For each chunk, `text.slice(charStart, charEnd) === content`.
31
+ * - `charStart` is monotonically non-decreasing across chunks.
32
+ * - Consecutive chunks overlap by ~`chunkOverlap` chars (snapped to atom
33
+ * boundaries; may differ by up to the largest atom size).
34
+ * - Each chunk's content length is bounded by chunkSize + a small slack for
35
+ * the separator that snapped the boundary.
36
+ */
37
+ export declare function chunkText(text: string, opts?: ChunkerOptions): Chunk[];
@@ -0,0 +1,219 @@
1
+ /**
2
+ * RecursiveCharacterTextSplitter-style chunker.
3
+ *
4
+ * Splits long text into overlapping chunks while preserving the original
5
+ * character offsets (charStart, charEnd) so downstream code can reconstruct
6
+ * positions. Mirrors the semantics of LangChain's RecursiveCharacterTextSplitter:
7
+ * tries each separator in order, snapping chunk boundaries to the highest-priority
8
+ * separator that keeps pieces under chunkSize.
9
+ *
10
+ * Defaults correspond to 512-token chunks with 64-token overlap
11
+ * (approx: 1 token ~= 4 chars for English text).
12
+ */
13
+ const DEFAULT_CHUNK_SIZE = 2048;
14
+ const DEFAULT_CHUNK_OVERLAP = 256;
15
+ const DEFAULT_SEPARATORS = ["\n\n", "\n", ". ", " ", ""];
16
+ /**
17
+ * Pick the first separator from `separators` that occurs in `piece.text`.
18
+ * Falls back to the last separator (typically `""`) if none match — this is
19
+ * the sentinel that lets us character-split oversized pieces with no natural
20
+ * boundary.
21
+ */
22
+ function pickSeparator(pieceText, separators) {
23
+ for (let i = 0; i < separators.length; i++) {
24
+ const sep = separators[i];
25
+ if (sep === "") {
26
+ return { separator: sep, remaining: separators.slice(i + 1) };
27
+ }
28
+ if (pieceText.includes(sep)) {
29
+ return { separator: sep, remaining: separators.slice(i + 1) };
30
+ }
31
+ }
32
+ // Should be unreachable when DEFAULT_SEPARATORS ends with "".
33
+ return { separator: "", remaining: [] };
34
+ }
35
+ /**
36
+ * Split a piece on `separator` while retaining absolute char offsets.
37
+ * When separator is empty, split into single-character pieces.
38
+ */
39
+ function splitOnSeparator(piece, separator) {
40
+ if (separator === "") {
41
+ const out = [];
42
+ for (let i = 0; i < piece.text.length; i++) {
43
+ out.push({ text: piece.text[i], start: piece.start + i });
44
+ }
45
+ return out;
46
+ }
47
+ const out = [];
48
+ let cursor = 0;
49
+ let idx = piece.text.indexOf(separator, cursor);
50
+ while (idx !== -1) {
51
+ // Keep the separator attached to the preceding piece so reconstruction
52
+ // via text.slice(charStart, charEnd) works bit-for-bit.
53
+ const sliceEnd = idx + separator.length;
54
+ out.push({
55
+ text: piece.text.slice(cursor, sliceEnd),
56
+ start: piece.start + cursor,
57
+ });
58
+ cursor = sliceEnd;
59
+ idx = piece.text.indexOf(separator, cursor);
60
+ }
61
+ if (cursor < piece.text.length) {
62
+ out.push({
63
+ text: piece.text.slice(cursor),
64
+ start: piece.start + cursor,
65
+ });
66
+ }
67
+ return out;
68
+ }
69
+ /**
70
+ * Recursively flatten a piece into "atoms" — pieces small enough to merge
71
+ * greedily into chunks. Pieces larger than chunkSize are split with the next
72
+ * separator in line; pieces that fit are returned as-is.
73
+ */
74
+ function flattenToAtoms(piece, separators, chunkSize) {
75
+ if (piece.text.length <= chunkSize) {
76
+ return [piece];
77
+ }
78
+ const { separator, remaining } = pickSeparator(piece.text, separators);
79
+ const splits = splitOnSeparator(piece, separator);
80
+ // If the separator didn't actually reduce the piece (e.g., no occurrence),
81
+ // fall through to the next separator with the original piece.
82
+ if (splits.length <= 1) {
83
+ if (remaining.length === 0) {
84
+ // No more separators — return whatever we have, even if oversized.
85
+ return [piece];
86
+ }
87
+ return flattenToAtoms(piece, remaining, chunkSize);
88
+ }
89
+ const out = [];
90
+ for (const sub of splits) {
91
+ if (sub.text.length <= chunkSize) {
92
+ out.push(sub);
93
+ }
94
+ else if (remaining.length > 0) {
95
+ for (const leaf of flattenToAtoms(sub, remaining, chunkSize)) {
96
+ out.push(leaf);
97
+ }
98
+ }
99
+ else {
100
+ // Last-resort: character-split oversized atom so we never return a
101
+ // single atom larger than chunkSize.
102
+ out.push(...splitOnSeparator(sub, ""));
103
+ }
104
+ }
105
+ return out;
106
+ }
107
+ /**
108
+ * Build a chunk object from a contiguous run of atoms.
109
+ * `charStart` is taken from the first atom, `charEnd` from the last atom's
110
+ * end boundary, and `content` is `text.slice(start, end)` — this guarantees
111
+ * `text.slice(charStart, charEnd) === content`.
112
+ */
113
+ function buildChunk(originalText, atoms, index) {
114
+ const first = atoms[0];
115
+ const last = atoms[atoms.length - 1];
116
+ const charStart = first.start;
117
+ const charEnd = last.start + last.text.length;
118
+ return {
119
+ index,
120
+ content: originalText.slice(charStart, charEnd),
121
+ charStart,
122
+ charEnd,
123
+ };
124
+ }
125
+ /**
126
+ * Compute the start position for the next chunk's atoms given the previous
127
+ * chunk ended at `prevEnd`. We walk backward through the atom list to find
128
+ * the atom whose start >= prevEnd - chunkOverlap; that atom begins the
129
+ * overlap region.
130
+ */
131
+ function findOverlapStartIndex(atoms, lastEndAtomIndex, prevEnd, chunkOverlap) {
132
+ if (chunkOverlap <= 0) {
133
+ return lastEndAtomIndex + 1;
134
+ }
135
+ const targetStart = prevEnd - chunkOverlap;
136
+ // Find the earliest atom in [0..lastEndAtomIndex] whose start >= targetStart.
137
+ let overlapAtomIdx = lastEndAtomIndex + 1;
138
+ for (let i = lastEndAtomIndex; i >= 0; i--) {
139
+ if (atoms[i].start >= targetStart) {
140
+ overlapAtomIdx = i;
141
+ }
142
+ else {
143
+ break;
144
+ }
145
+ }
146
+ // If overlap produced no progress (no atoms found), step forward to avoid
147
+ // an infinite loop.
148
+ if (overlapAtomIdx > lastEndAtomIndex) {
149
+ overlapAtomIdx = lastEndAtomIndex + 1;
150
+ }
151
+ return overlapAtomIdx;
152
+ }
153
+ /**
154
+ * Split `text` into overlapping chunks.
155
+ *
156
+ * Semantics:
157
+ * - Empty input -> empty array.
158
+ * - Short input (<= chunkSize) -> single chunk covering the whole text.
159
+ * - For each chunk, `text.slice(charStart, charEnd) === content`.
160
+ * - `charStart` is monotonically non-decreasing across chunks.
161
+ * - Consecutive chunks overlap by ~`chunkOverlap` chars (snapped to atom
162
+ * boundaries; may differ by up to the largest atom size).
163
+ * - Each chunk's content length is bounded by chunkSize + a small slack for
164
+ * the separator that snapped the boundary.
165
+ */
166
+ export function chunkText(text, opts = {}) {
167
+ const chunkSize = opts.chunkSize ?? DEFAULT_CHUNK_SIZE;
168
+ const chunkOverlap = opts.chunkOverlap ?? DEFAULT_CHUNK_OVERLAP;
169
+ const separators = opts.separators ?? DEFAULT_SEPARATORS;
170
+ if (text.length === 0) {
171
+ return [];
172
+ }
173
+ if (chunkOverlap >= chunkSize) {
174
+ throw new Error(`chunker: chunkOverlap (${chunkOverlap}) must be smaller than chunkSize (${chunkSize})`);
175
+ }
176
+ // Fast path for short docs — no need to walk separators.
177
+ if (text.length <= chunkSize) {
178
+ return [
179
+ {
180
+ index: 0,
181
+ content: text,
182
+ charStart: 0,
183
+ charEnd: text.length,
184
+ },
185
+ ];
186
+ }
187
+ const atoms = flattenToAtoms({ text, start: 0 }, separators, chunkSize);
188
+ const chunks = [];
189
+ let chunkIdx = 0;
190
+ let i = 0;
191
+ while (i < atoms.length) {
192
+ // Greedily pack atoms into this chunk until adding one more would push
193
+ // the chunk content past chunkSize.
194
+ let runLen = 0;
195
+ let j = i;
196
+ while (j < atoms.length) {
197
+ const atomLen = atoms[j].text.length;
198
+ // Always include at least one atom per chunk to ensure progress.
199
+ if (j > i && runLen + atomLen > chunkSize) {
200
+ break;
201
+ }
202
+ runLen += atomLen;
203
+ j++;
204
+ }
205
+ const runAtoms = atoms.slice(i, j);
206
+ const chunk = buildChunk(text, runAtoms, chunkIdx);
207
+ chunks.push(chunk);
208
+ chunkIdx++;
209
+ if (j >= atoms.length) {
210
+ break;
211
+ }
212
+ // Compute overlap start for the next chunk.
213
+ const lastEndAtomIdx = j - 1;
214
+ const nextStart = findOverlapStartIndex(atoms, lastEndAtomIdx, chunk.charEnd, chunkOverlap);
215
+ i = nextStart;
216
+ }
217
+ return chunks;
218
+ }
219
+ //# sourceMappingURL=chunker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunker.js","sourceRoot":"","sources":["../src/chunker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAeH,MAAM,kBAAkB,GAAG,IAAI,CAAC;AAChC,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAClC,MAAM,kBAAkB,GAAa,CAAC,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC;AAWnE;;;;;GAKG;AACH,SAAS,aAAa,CAAC,SAAiB,EAAE,UAAoB;IAI5D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,MAAM,GAAG,GAAG,UAAU,CAAC,CAAC,CAAE,CAAC;QAC3B,IAAI,GAAG,KAAK,EAAE,EAAE,CAAC;YACf,OAAO,EAAE,SAAS,EAAE,GAAG,EAAE,SAAS,EAAE,UAAU,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;QAChE,CAAC;QACD,IAAI,SAAS,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YAC5B,OAAO,EAAE,SAAS,EAAE,GAAG,EAAE,SAAS,EAAE,UAAU,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;QAChE,CAAC;IACH,CAAC;IACD,8DAA8D;IAC9D,OAAO,EAAE,SAAS,EAAE,EAAE,EAAE,SAAS,EAAE,EAAE,EAAE,CAAC;AAC1C,CAAC;AAED;;;GAGG;AACH,SAAS,gBAAgB,CAAC,KAAY,EAAE,SAAiB;IACvD,IAAI,SAAS,KAAK,EAAE,EAAE,CAAC;QACrB,MAAM,GAAG,GAAY,EAAE,CAAC;QACxB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3C,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC,CAAE,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,GAAG,CAAC,EAAE,CAAC,CAAC;QAC7D,CAAC;QACD,OAAO,GAAG,CAAC;IACb,CAAC;IAED,MAAM,GAAG,GAAY,EAAE,CAAC;IACxB,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,IAAI,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAChD,OAAO,GAAG,KAAK,CAAC,CAAC,EAAE,CAAC;QAClB,uEAAuE;QACvE,wDAAwD;QACxD,MAAM,QAAQ,GAAG,GAAG,GAAG,SAAS,CAAC,MAAM,CAAC;QACxC,GAAG,CAAC,IAAI,CAAC;YACP,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,QAAQ,CAAC;YACxC,KAAK,EAAE,KAAK,CAAC,KAAK,GAAG,MAAM;SAC5B,CAAC,CAAC;QACH,MAAM,GAAG,QAAQ,CAAC;QAClB,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAC9C,CAAC;IACD,IAAI,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;QAC/B,GAAG,CAAC,IAAI,CAAC;YACP,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC;YAC9B,KAAK,EAAE,KAAK,CAAC,KAAK,GAAG,MAAM;SAC5B,CAAC,CAAC;IACL,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;;GAIG;AACH,SAAS,cAAc,CACrB,KAAY,EACZ,UAAoB,EACpB,SAAiB;IAEjB,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;QACnC,OAAO,CAAC,KAAK,CAAC,CAAC;IACjB,CAAC;IACD,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE,GAAG,aAAa,CAAC,KAAK,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;IACvE,MAAM,MAAM,GAAG,gBAAgB,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;IAElD,2EAA2E;IAC3E,8DAA8D;IAC9D,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QACvB,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,mEAAmE;YACnE,OAAO,CAAC,KAAK,CAAC,CAAC;QACjB,CAAC;QACD,OAAO,cAAc,CAAC,KAAK,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;IACrD,CAAC;IAED,MAAM,GAAG,GAAY,EAAE,CAAC;IACxB,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;QACzB,IAAI,GAAG,CAAC,IAAI,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;YACjC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAChB,CAAC;aAAM,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAChC,KAAK,MAAM,IAAI,IAAI,cAAc,CAAC,GAAG,EAAE,SAAS,EAAE,SAAS,CAAC,EAAE,CAAC;gBAC7D,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACjB,CAAC;QACH,CAAC;aAAM,CAAC;YACN,mEAAmE;YACnE,qCAAqC;YACrC,GAAG,CAAC,IAAI,CAAC,GAAG,gBAAgB,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,CAAC;QACzC,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;;;GAKG;AACH,SAAS,UAAU,CACjB,YAAoB,EACpB,KAAc,EACd,KAAa;IAEb,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAE,CAAC;IACxB,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC;IACtC,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC;IAC9B,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC;IAC9C,OAAO;QACL,KAAK;QACL,OAAO,EAAE,YAAY,CAAC,KAAK,CAAC,SAAS,EAAE,OAAO,CAAC;QAC/C,SAAS;QACT,OAAO;KACR,CAAC;AACJ,CAAC;AAED;;;;;GAKG;AACH,SAAS,qBAAqB,CAC5B,KAAc,EACd,gBAAwB,EACxB,OAAe,EACf,YAAoB;IAEpB,IAAI,YAAY,IAAI,CAAC,EAAE,CAAC;QACtB,OAAO,gBAAgB,GAAG,CAAC,CAAC;IAC9B,CAAC;IACD,MAAM,WAAW,GAAG,OAAO,GAAG,YAAY,CAAC;IAC3C,8EAA8E;IAC9E,IAAI,cAAc,GAAG,gBAAgB,GAAG,CAAC,CAAC;IAC1C,KAAK,IAAI,CAAC,GAAG,gBAAgB,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,IAAI,KAAK,CAAC,CAAC,CAAE,CAAC,KAAK,IAAI,WAAW,EAAE,CAAC;YACnC,cAAc,GAAG,CAAC,CAAC;QACrB,CAAC;aAAM,CAAC;YACN,MAAM;QACR,CAAC;IACH,CAAC;IACD,0EAA0E;IAC1E,oBAAoB;IACpB,IAAI,cAAc,GAAG,gBAAgB,EAAE,CAAC;QACtC,cAAc,GAAG,gBAAgB,GAAG,CAAC,CAAC;IACxC,CAAC;IACD,OAAO,cAAc,CAAC;AACxB,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,SAAS,CAAC,IAAY,EAAE,OAAuB,EAAE;IAC/D,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,kBAAkB,CAAC;IACvD,MAAM,YAAY,GAAG,IAAI,CAAC,YAAY,IAAI,qBAAqB,CAAC;IAChE,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,IAAI,kBAAkB,CAAC;IAEzD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,IAAI,YAAY,IAAI,SAAS,EAAE,CAAC;QAC9B,MAAM,IAAI,KAAK,CACb,0BAA0B,YAAY,qCAAqC,SAAS,GAAG,CACxF,CAAC;IACJ,CAAC;IAED,yDAAyD;IACzD,IAAI,IAAI,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;QAC7B,OAAO;YACL;gBACE,KAAK,EAAE,CAAC;gBACR,OAAO,EAAE,IAAI;gBACb,SAAS,EAAE,CAAC;gBACZ,OAAO,EAAE,IAAI,CAAC,MAAM;aACrB;SACF,CAAC;IACJ,CAAC;IAED,MAAM,KAAK,GAAG,cAAc,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,EAAE,EAAE,UAAU,EAAE,SAAS,CAAC,CAAC;IAExE,MAAM,MAAM,GAAY,EAAE,CAAC;IAC3B,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,IAAI,CAAC,GAAG,CAAC,CAAC;IAEV,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;QACxB,uEAAuE;QACvE,oCAAoC;QACpC,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,CAAC,GAAG,CAAC,CAAC;QACV,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;YACxB,MAAM,OAAO,GAAG,KAAK,CAAC,CAAC,CAAE,CAAC,IAAI,CAAC,MAAM,CAAC;YACtC,iEAAiE;YACjE,IAAI,CAAC,GAAG,CAAC,IAAI,MAAM,GAAG,OAAO,GAAG,SAAS,EAAE,CAAC;gBAC1C,MAAM;YACR,CAAC;YACD,MAAM,IAAI,OAAO,CAAC;YAClB,CAAC,EAAE,CAAC;QACN,CAAC;QACD,MAAM,QAAQ,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QACnC,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;QACnD,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACnB,QAAQ,EAAE,CAAC;QAEX,IAAI,CAAC,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;YACtB,MAAM;QACR,CAAC;QAED,4CAA4C;QAC5C,MAAM,cAAc,GAAG,CAAC,GAAG,CAAC,CAAC;QAC7B,MAAM,SAAS,GAAG,qBAAqB,CACrC,KAAK,EACL,cAAc,EACd,KAAK,CAAC,OAAO,EACb,YAAY,CACb,CAAC;QACF,CAAC,GAAG,SAAS,CAAC;IAChB,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
package/dist/db.js CHANGED
@@ -71,6 +71,18 @@ export class KnowledgeDB {
71
71
  key TEXT PRIMARY KEY,
72
72
  value TEXT
73
73
  );
74
+
75
+ CREATE TABLE IF NOT EXISTS chunks (
76
+ id TEXT PRIMARY KEY,
77
+ document_id TEXT NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
78
+ chunk_index INTEGER NOT NULL,
79
+ content TEXT NOT NULL,
80
+ char_start INTEGER NOT NULL,
81
+ char_end INTEGER NOT NULL,
82
+ context_prefix TEXT NOT NULL DEFAULT '',
83
+ UNIQUE(document_id, chunk_index)
84
+ );
85
+ CREATE INDEX IF NOT EXISTS idx_chunks_document_id ON chunks(document_id);
74
86
  `);
75
87
  // Migration: add is_stub column for databases created before it existed.
76
88
  // SQLite has no IF NOT EXISTS for ALTER TABLE ADD COLUMN, so we catch the
@@ -81,6 +93,16 @@ export class KnowledgeDB {
81
93
  catch {
82
94
  // Column already exists — expected for new databases
83
95
  }
96
+ // Migration: add memory_tier column (schema v3) for databases created before it existed.
97
+ // Uses the same try/catch pattern as is_stub. CHECK constraint restricts values to
98
+ // 'doc' (existing documents), 'raw' (dream-loop raw memories), 'reflection' (synthesized).
99
+ try {
100
+ this.db.exec("ALTER TABLE documents ADD COLUMN memory_tier TEXT NOT NULL DEFAULT 'doc' CHECK(memory_tier IN ('doc','raw','reflection'))");
101
+ }
102
+ catch {
103
+ // Column already exists — expected for new databases
104
+ }
105
+ this.db.exec("CREATE INDEX IF NOT EXISTS idx_documents_memory_tier ON documents(memory_tier)");
84
106
  // Migration: rebuild relationships table for databases created before the
85
107
  // context column, post_mortem/untyped CHECK types, and target_id FK were added.
86
108
  // SQLite cannot ALTER CHECK constraints, so a full table rebuild is required.
@@ -306,7 +328,7 @@ export class KnowledgeDB {
306
328
  }
307
329
  clearAll() {
308
330
  // outcome_events is intentionally NOT cleared — outcome data is preserved across rebuilds
309
- this.db.exec("DELETE FROM relationships; DELETE FROM tags; DELETE FROM documents; DELETE FROM sync;");
331
+ this.db.exec("DELETE FROM chunks; DELETE FROM relationships; DELETE FROM tags; DELETE FROM documents; DELETE FROM sync;");
310
332
  }
311
333
  close() {
312
334
  this.db.close();
package/dist/db.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"db.js","sourceRoot":"","sources":["../src/db.ts"],"names":[],"mappings":"AAAA,OAAO,QAAQ,MAAM,gBAAgB,CAAC;AAEtC,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AACpC,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAuFpC,MAAM,OAAO,WAAW;IACb,EAAE,CAAe;IAE1B,YAAY,MAAc;QACxB,SAAS,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAChD,IAAI,CAAC,EAAE,GAAG,IAAI,QAAQ,CAAC,MAAM,CAAC,CAAC;QAC/B,IAAI,CAAC,EAAE,CAAC,MAAM,CAAC,oBAAoB,CAAC,CAAC;QACrC,IAAI,CAAC,YAAY,EAAE,CAAC;IACtB,CAAC;IAEO,YAAY;QAClB,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA4DZ,CAAC,CAAC;QAEH,yEAAyE;QACzE,0EAA0E;QAC1E,0CAA0C;QAC1C,IAAI,CAAC;YACH,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,4DAA4D,CAAC,CAAC;QAC7E,CAAC;QAAC,MAAM,CAAC;YACP,qDAAqD;QACvD,CAAC;QAED,0EAA0E;QAC1E,gFAAgF;QAChF,8EAA8E;QAC9E,IAAI,CAAC;YACH,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,2CAA2C,CAAC,CAAC,GAAG,EAAE,CAAC;QACrE,CAAC;QAAC,MAAM,CAAC;YACP,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC;;;;;;;;;;;;;OAaZ,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,cAAc,CAAC,GAAsD;QACnE,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;;;;;KAMf,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IACd,CAAC;IAED;;;OAGG;IACH,kBAAkB,CAAC,EAAU;QAC3B,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;;KAGf,CAAC,CAAC,GAAG,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC;IACjB,CAAC;IAED,WAAW,CAAC,EAAU;QACpB,OAAO,IAAI,CAAC,EAAE,CAAC,OAAO,CACpB,iIAAiI,CAClI,CAAC,GAAG,CAAC,EAAE,CAA4B,CAAC;IACvC,CAAC;IAED,OAAO,CAAC,KAAa,EAAE,IAAc;QACnC,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,mCAAmC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QAChE,MAAM,MAAM,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,8CAA8C,CAAC,CAAC;QAC/E,KAAK,MAAM,GAAG,IAAI,IAAI;YAAE,MAAM,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;IACjD,CAAC;IAED,OAAO,CAAC,KAAa;QACnB,OAAQ,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,oDAAoD,CAAC,CAAC,GAAG,CAAC,KAAK,CAA4B,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IACtI,CAAC;IAED,eAAe,CAAC,QAAgB,EAAE,QAAgB,EAAE,IAAY,EAAE,OAAgB;QAChF,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,+FAA+F,CAAC,CAAC,GAAG,CAAC,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAE,OAAO,IAAI,IAAI,CAAC,CAAC;IAClK,CAAC;IAED,oBAAoB,CAAC,QAAgB;QACnC,OAAO,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,2GAA2G,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAsB,CAAC;IACzK,CAAC;IAED,kBAAkB,CAAC,QAAgB;QACjC,OAAO,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,2GAA2G,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAsB,CAAC;IACzK,CAAC;IAED,kBAAkB,CAAC,KAAwB;QACzC,MAAM,EAAE,GAAG,UAAU,EAAE,CAAC;QACxB,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAC3C,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC;QAEpD,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;;KAGf,CAAC,CAAC,GAAG,CACJ,EAAE,EACF,KAAK,CAAC,SAAS,EACf,KAAK,CAAC,WAAW,EACjB,KAAK,CAAC,SAAS,IAAI,IAAI,EACvB,SAAS,EACT,KAAK,CAAC,UAAU,IAAI,IAAI,EACxB,KAAK,CAAC,OAAO,IAAI,IAAI,EACrB,KAAK,CAAC,aAAa,IAAI,IAAI,EAC3B,KAAK,CAAC,QAAQ,IAAI,IAAI,EACtB,KAAK,CAAC,UAAU,IAAI,IAAI,EACxB,KAAK,CAAC,KAAK,IAAI,IAAI,EACnB,KAAK,CAAC,SAAS,IAAI,IAAI,EACvB,KAAK,CAAC,cAAc,IAAI,IAAI,EAC5B,OAAO,CACR,CAAC;QAEF,OAAO,EAAE,EAAE,EAAE,SAAS,EAAE,KAAK,CAAC,SAAS,EAAE,WAAW,EAAE,KAAK,CAAC,WAAW,EAAE,SAAS,EAAE,CAAC;IACvF,CAAC;IAED,kBAAkB,CAAC,SAA6B,EAAE;QAChD,MAAM,UAAU,GAAa,EAAE,CAAC;QAChC,MAAM,MAAM,GAAc,EAAE,CAAC;QAE7B,IAAI,MAAM,CAAC,WAAW,KAAK,SAAS,EAAE,CAAC;YACrC,UAAU,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;QAClC,CAAC;QACD,IAAI,MAAM,CAAC,SAAS,KAAK,SAAS,EAAE,CAAC;YACnC,UAAU,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;YAClC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAChC,CAAC;QACD,IAAI,MAAM,CAAC,aAAa,KAAK,SAAS,EAAE,CAAC;YACvC,UAAU,CAAC,IAAI,CAAC,uBAAuB,CAAC,CAAC;YACzC,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,aAAa,GAAG,CAAC,CAAC;QAC1C,CAAC;QACD,IAAI,MAAM,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;YAClC,UAAU,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;YAChC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAC/B,CAAC;QACD,IAAI,MAAM,CAAC,OAAO,KAAK,SAAS,EAAE,CAAC;YACjC,UAAU,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;YAC/B,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC9B,CAAC;QACD,IAAI,MAAM,CAAC,SAAS,KAAK,SAAS,EAAE,CAAC;YACnC,UAAU,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;YAClC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAChC,CAAC;QACD,IAAI,MAAM,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;YAC/B,UAAU,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;YAClC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QAC5B,CAAC;QAED,MAAM,KAAK,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAC/E,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC;QAEjC,MAAM,GAAG,GAAG;;;;;4BAKY,KAAK;;;KAG5B,CAAC;QAEF,OAAO,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,MAAM,EAAE,KAAK,CAAsB,CAAC;IACzE,CAAC;IAED,sBAAsB,CAAC,SAA6B,EAAE;QACpD,oFAAoF;QACpF,MAAM,IAAI,GAAG,IAAI,CAAC,kBAAkB,CAAC,EAAE,GAAG,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,CAAC;QAEtE,MAAM,mBAAmB,GAA2B,EAAE,CAAC;QACvD,MAAM,qBAAqB,GAA2B,EAAE,CAAC;QACzD,MAAM,eAAe,GAA2B,EAAE,CAAC;QACnD,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,IAAI,SAAS,GAAG,CAAC,CAAC;QAElB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,IAAI,GAAG,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBACzB,mBAAmB,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;YACjF,CAAC;YACD,qBAAqB,CAAC,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,qBAAqB,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;YACvF,IAAI,GAAG,CAAC,aAAa,KAAK,IAAI,EAAE,CAAC;gBAC/B,eAAe,CAAC,GAAG,CAAC,aAAa,CAAC,GAAG,CAAC,eAAe,CAAC,GAAG,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;YACrF,CAAC;YACD,IAAI,GAAG,CAAC,UAAU,KAAK,IAAI,EAAE,CAAC;gBAC5B,QAAQ,IAAI,GAAG,CAAC,UAAU,CAAC;gBAC3B,UAAU,EAAE,CAAC;YACf,CAAC;YACD,IAAI,GAAG,CAAC,cAAc,KAAK,IAAI,EAAE,CAAC;gBAChC,OAAO,IAAI,GAAG,CAAC,cAAc,CAAC;gBAC9B,SAAS,EAAE,CAAC;YACd,CAAC;QACH,CAAC;QAED,MAAM,iBAAiB,GAAG,MAAM,CAAC,OAAO,CAAC,eAAe,CAAC;aACtD,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC;aACzC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;aACjC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAEhB,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,MAAM;YAClB,aAAa,EAAE,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,GAAG,UAAU,CAAC,CAAC,CAAC,IAAI;YAC5D,iBAAiB,EAAE,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI;YAC7D,mBAAmB;YACnB,qBAAqB;YACrB,iBAAiB;SAClB,CAAC;IACJ,CAAC;IAED,iBAAiB,CAAC,WAAmB;QACnC,MAAM,IAAI,GAAG,IAAI,CAAC,kBAAkB,CAAC,EAAE,WAAW,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC;QACpE,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,IAAI,CAAC;QAEnC,MAAM,YAAY,GAA2B,EAAE,CAAC;QAChD,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,IAAI,aAAa,GAAkB,IAAI,CAAC;QAExC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,YAAY,CAAC,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;YACrE,IAAI,GAAG,CAAC,UAAU,KAAK,IAAI,EAAE,CAAC;gBAC5B,UAAU,IAAI,GAAG,CAAC,UAAU,CAAC;YAC/B,CAAC;YACD,IAAI,GAAG,CAAC,SAAS,KAAK,kBAAkB,EAAE,CAAC;gBACzC,QAAQ,EAAE,CAAC;YACb,CAAC;QACH,CAAC;QAED,0EAA0E;QAC1E,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,IAAI,GAAG,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBACzB,aAAa,GAAG,GAAG,CAAC,OAAO,CAAC;gBAC5B,MAAM;YACR,CAAC;QACH,CAAC;QAED,OAAO;YACL,WAAW,EAAE,IAAI,CAAC,MAAM;YACxB,aAAa;YACb,UAAU;YACV,QAAQ;YACR,YAAY;SACb,CAAC;IACJ,CAAC;IAED,aAAa,CAAC,IAAY;QACxB,OAAO,IAAI,CAAC,EAAE,CAAC,OAAO,CACpB,uEAAuE,CACxE,CAAC,GAAG,CAAC,IAAI,CAA2B,CAAC;IACxC,CAAC;IAED,gBAAgB,CAAC,IAAY,EAAE,KAAa;QAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;;;KAIf,CAAC,CAAC,GAAG,CAAC,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,KAAK,EAAE,SAAS,CAAC,CAAC;IACnD,CAAC;IAED,gBAAgB,CAAC,IAAY;QAC3B,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,iCAAiC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC/D,CAAC;IAED,eAAe;QACb,OAAQ,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,uBAAuB,CAAC,CAAC,GAAG,EAA8B,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IACtG,CAAC;IAED,gBAAgB;QACd,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,kBAAkB,CAAC,CAAC,GAAG,EAAE,CAAC;IAC5C,CAAC;IAED,OAAO,CAAC,GAAW;QACjB,MAAM,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,sCAAsC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAkC,CAAC;QAC9G,OAAO,GAAG,EAAE,KAAK,CAAC;IACpB,CAAC;IAED,OAAO,CAAC,GAAW,EAAE,KAAa;QAChC,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,mGAAmG,CAAC,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IACvI,CAAC;IAED,cAAc,CAAC,EAAU;QACvB,MAAM,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,sCAAsC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAC5E,OAAO,GAAG,KAAK,SAAS,CAAC;IAC3B,CAAC;IAED,cAAc,CAAC,EAAU;QACvB,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,oCAAoC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAChE,CAAC;IAED,QAAQ;QACN,0FAA0F;QAC1F,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,uFAAuF,CAAC,CAAC;IACxG,CAAC;IAED,KAAK;QACH,IAAI,CAAC,EAAE,CAAC,KAAK,EAAE,CAAC;IAClB,CAAC;CACF"}
1
+ {"version":3,"file":"db.js","sourceRoot":"","sources":["../src/db.ts"],"names":[],"mappings":"AAAA,OAAO,QAAQ,MAAM,gBAAgB,CAAC;AAEtC,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AACpC,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAuFpC,MAAM,OAAO,WAAW;IACb,EAAE,CAAe;IAE1B,YAAY,MAAc;QACxB,SAAS,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAChD,IAAI,CAAC,EAAE,GAAG,IAAI,QAAQ,CAAC,MAAM,CAAC,CAAC;QAC/B,IAAI,CAAC,EAAE,CAAC,MAAM,CAAC,oBAAoB,CAAC,CAAC;QACrC,IAAI,CAAC,YAAY,EAAE,CAAC;IACtB,CAAC;IAEO,YAAY;QAClB,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAwEZ,CAAC,CAAC;QAEH,yEAAyE;QACzE,0EAA0E;QAC1E,0CAA0C;QAC1C,IAAI,CAAC;YACH,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,4DAA4D,CAAC,CAAC;QAC7E,CAAC;QAAC,MAAM,CAAC;YACP,qDAAqD;QACvD,CAAC;QAED,yFAAyF;QACzF,mFAAmF;QACnF,2FAA2F;QAC3F,IAAI,CAAC;YACH,IAAI,CAAC,EAAE,CAAC,IAAI,CACV,2HAA2H,CAC5H,CAAC;QACJ,CAAC;QAAC,MAAM,CAAC;YACP,qDAAqD;QACvD,CAAC;QACD,IAAI,CAAC,EAAE,CAAC,IAAI,CACV,gFAAgF,CACjF,CAAC;QAEF,0EAA0E;QAC1E,gFAAgF;QAChF,8EAA8E;QAC9E,IAAI,CAAC;YACH,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,2CAA2C,CAAC,CAAC,GAAG,EAAE,CAAC;QACrE,CAAC;QAAC,MAAM,CAAC;YACP,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC;;;;;;;;;;;;;OAaZ,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,cAAc,CAAC,GAAsD;QACnE,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;;;;;KAMf,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IACd,CAAC;IAED;;;OAGG;IACH,kBAAkB,CAAC,EAAU;QAC3B,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;;KAGf,CAAC,CAAC,GAAG,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC;IACjB,CAAC;IAED,WAAW,CAAC,EAAU;QACpB,OAAO,IAAI,CAAC,EAAE,CAAC,OAAO,CACpB,iIAAiI,CAClI,CAAC,GAAG,CAAC,EAAE,CAA4B,CAAC;IACvC,CAAC;IAED,OAAO,CAAC,KAAa,EAAE,IAAc;QACnC,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,mCAAmC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QAChE,MAAM,MAAM,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,8CAA8C,CAAC,CAAC;QAC/E,KAAK,MAAM,GAAG,IAAI,IAAI;YAAE,MAAM,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;IACjD,CAAC;IAED,OAAO,CAAC,KAAa;QACnB,OAAQ,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,oDAAoD,CAAC,CAAC,GAAG,CAAC,KAAK,CAA4B,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IACtI,CAAC;IAED,eAAe,CAAC,QAAgB,EAAE,QAAgB,EAAE,IAAY,EAAE,OAAgB;QAChF,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,+FAA+F,CAAC,CAAC,GAAG,CAAC,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAE,OAAO,IAAI,IAAI,CAAC,CAAC;IAClK,CAAC;IAED,oBAAoB,CAAC,QAAgB;QACnC,OAAO,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,2GAA2G,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAsB,CAAC;IACzK,CAAC;IAED,kBAAkB,CAAC,QAAgB;QACjC,OAAO,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,2GAA2G,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAsB,CAAC;IACzK,CAAC;IAED,kBAAkB,CAAC,KAAwB;QACzC,MAAM,EAAE,GAAG,UAAU,EAAE,CAAC;QACxB,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAC3C,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC;QAEpD,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;;KAGf,CAAC,CAAC,GAAG,CACJ,EAAE,EACF,KAAK,CAAC,SAAS,EACf,KAAK,CAAC,WAAW,EACjB,KAAK,CAAC,SAAS,IAAI,IAAI,EACvB,SAAS,EACT,KAAK,CAAC,UAAU,IAAI,IAAI,EACxB,KAAK,CAAC,OAAO,IAAI,IAAI,EACrB,KAAK,CAAC,aAAa,IAAI,IAAI,EAC3B,KAAK,CAAC,QAAQ,IAAI,IAAI,EACtB,KAAK,CAAC,UAAU,IAAI,IAAI,EACxB,KAAK,CAAC,KAAK,IAAI,IAAI,EACnB,KAAK,CAAC,SAAS,IAAI,IAAI,EACvB,KAAK,CAAC,cAAc,IAAI,IAAI,EAC5B,OAAO,CACR,CAAC;QAEF,OAAO,EAAE,EAAE,EAAE,SAAS,EAAE,KAAK,CAAC,SAAS,EAAE,WAAW,EAAE,KAAK,CAAC,WAAW,EAAE,SAAS,EAAE,CAAC;IACvF,CAAC;IAED,kBAAkB,CAAC,SAA6B,EAAE;QAChD,MAAM,UAAU,GAAa,EAAE,CAAC;QAChC,MAAM,MAAM,GAAc,EAAE,CAAC;QAE7B,IAAI,MAAM,CAAC,WAAW,KAAK,SAAS,EAAE,CAAC;YACrC,UAAU,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;QAClC,CAAC;QACD,IAAI,MAAM,CAAC,SAAS,KAAK,SAAS,EAAE,CAAC;YACnC,UAAU,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;YAClC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAChC,CAAC;QACD,IAAI,MAAM,CAAC,aAAa,KAAK,SAAS,EAAE,CAAC;YACvC,UAAU,CAAC,IAAI,CAAC,uBAAuB,CAAC,CAAC;YACzC,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,aAAa,GAAG,CAAC,CAAC;QAC1C,CAAC;QACD,IAAI,MAAM,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;YAClC,UAAU,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;YAChC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAC/B,CAAC;QACD,IAAI,MAAM,CAAC,OAAO,KAAK,SAAS,EAAE,CAAC;YACjC,UAAU,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;YAC/B,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC9B,CAAC;QACD,IAAI,MAAM,CAAC,SAAS,KAAK,SAAS,EAAE,CAAC;YACnC,UAAU,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;YAClC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAChC,CAAC;QACD,IAAI,MAAM,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;YAC/B,UAAU,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;YAClC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QAC5B,CAAC;QAED,MAAM,KAAK,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAC/E,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC;QAEjC,MAAM,GAAG,GAAG;;;;;4BAKY,KAAK;;;KAG5B,CAAC;QAEF,OAAO,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,MAAM,EAAE,KAAK,CAAsB,CAAC;IACzE,CAAC;IAED,sBAAsB,CAAC,SAA6B,EAAE;QACpD,oFAAoF;QACpF,MAAM,IAAI,GAAG,IAAI,CAAC,kBAAkB,CAAC,EAAE,GAAG,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,CAAC;QAEtE,MAAM,mBAAmB,GAA2B,EAAE,CAAC;QACvD,MAAM,qBAAqB,GAA2B,EAAE,CAAC;QACzD,MAAM,eAAe,GAA2B,EAAE,CAAC;QACnD,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,IAAI,SAAS,GAAG,CAAC,CAAC;QAElB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,IAAI,GAAG,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBACzB,mBAAmB,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;YACjF,CAAC;YACD,qBAAqB,CAAC,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,qBAAqB,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;YACvF,IAAI,GAAG,CAAC,aAAa,KAAK,IAAI,EAAE,CAAC;gBAC/B,eAAe,CAAC,GAAG,CAAC,aAAa,CAAC,GAAG,CAAC,eAAe,CAAC,GAAG,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;YACrF,CAAC;YACD,IAAI,GAAG,CAAC,UAAU,KAAK,IAAI,EAAE,CAAC;gBAC5B,QAAQ,IAAI,GAAG,CAAC,UAAU,CAAC;gBAC3B,UAAU,EAAE,CAAC;YACf,CAAC;YACD,IAAI,GAAG,CAAC,cAAc,KAAK,IAAI,EAAE,CAAC;gBAChC,OAAO,IAAI,GAAG,CAAC,cAAc,CAAC;gBAC9B,SAAS,EAAE,CAAC;YACd,CAAC;QACH,CAAC;QAED,MAAM,iBAAiB,GAAG,MAAM,CAAC,OAAO,CAAC,eAAe,CAAC;aACtD,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC;aACzC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;aACjC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAEhB,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,MAAM;YAClB,aAAa,EAAE,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,GAAG,UAAU,CAAC,CAAC,CAAC,IAAI;YAC5D,iBAAiB,EAAE,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI;YAC7D,mBAAmB;YACnB,qBAAqB;YACrB,iBAAiB;SAClB,CAAC;IACJ,CAAC;IAED,iBAAiB,CAAC,WAAmB;QACnC,MAAM,IAAI,GAAG,IAAI,CAAC,kBAAkB,CAAC,EAAE,WAAW,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC;QACpE,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,IAAI,CAAC;QAEnC,MAAM,YAAY,GAA2B,EAAE,CAAC;QAChD,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,IAAI,aAAa,GAAkB,IAAI,CAAC;QAExC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,YAAY,CAAC,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;YACrE,IAAI,GAAG,CAAC,UAAU,KAAK,IAAI,EAAE,CAAC;gBAC5B,UAAU,IAAI,GAAG,CAAC,UAAU,CAAC;YAC/B,CAAC;YACD,IAAI,GAAG,CAAC,SAAS,KAAK,kBAAkB,EAAE,CAAC;gBACzC,QAAQ,EAAE,CAAC;YACb,CAAC;QACH,CAAC;QAED,0EAA0E;QAC1E,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,IAAI,GAAG,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBACzB,aAAa,GAAG,GAAG,CAAC,OAAO,CAAC;gBAC5B,MAAM;YACR,CAAC;QACH,CAAC;QAED,OAAO;YACL,WAAW,EAAE,IAAI,CAAC,MAAM;YACxB,aAAa;YACb,UAAU;YACV,QAAQ;YACR,YAAY;SACb,CAAC;IACJ,CAAC;IAED,aAAa,CAAC,IAAY;QACxB,OAAO,IAAI,CAAC,EAAE,CAAC,OAAO,CACpB,uEAAuE,CACxE,CAAC,GAAG,CAAC,IAAI,CAA2B,CAAC;IACxC,CAAC;IAED,gBAAgB,CAAC,IAAY,EAAE,KAAa;QAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;;;KAIf,CAAC,CAAC,GAAG,CAAC,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,KAAK,EAAE,SAAS,CAAC,CAAC;IACnD,CAAC;IAED,gBAAgB,CAAC,IAAY;QAC3B,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,iCAAiC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC/D,CAAC;IAED,eAAe;QACb,OAAQ,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,uBAAuB,CAAC,CAAC,GAAG,EAA8B,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IACtG,CAAC;IAED,gBAAgB;QACd,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,kBAAkB,CAAC,CAAC,GAAG,EAAE,CAAC;IAC5C,CAAC;IAED,OAAO,CAAC,GAAW;QACjB,MAAM,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,sCAAsC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAkC,CAAC;QAC9G,OAAO,GAAG,EAAE,KAAK,CAAC;IACpB,CAAC;IAED,OAAO,CAAC,GAAW,EAAE,KAAa;QAChC,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,mGAAmG,CAAC,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IACvI,CAAC;IAED,cAAc,CAAC,EAAU;QACvB,MAAM,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,sCAAsC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAC5E,OAAO,GAAG,KAAK,SAAS,CAAC;IAC3B,CAAC;IAED,cAAc,CAAC,EAAU;QACvB,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,oCAAoC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAChE,CAAC;IAED,QAAQ;QACN,0FAA0F;QAC1F,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,2GAA2G,CAAC,CAAC;IAC5H,CAAC;IAED,KAAK;QACH,IAAI,CAAC,EAAE,CAAC,KAAK,EAAE,CAAC;IAClB,CAAC;CACF"}
package/dist/reindex.js CHANGED
@@ -16,7 +16,7 @@ export async function reindex(dirs, dbPath, generate = false) {
16
16
  const vec = new VectorSearch(db);
17
17
  vec.createIndex();
18
18
  // Schema version check — force full re-embed when embedding algorithm changes
19
- const SCHEMA_VERSION = "2";
19
+ const SCHEMA_VERSION = "3";
20
20
  const currentVersion = db.getMeta("schema_version");
21
21
  let needsFullFtsRebuild = false;
22
22
  if (currentVersion !== SCHEMA_VERSION) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ralph-hero-knowledge-index",
3
- "version": "0.1.19",
3
+ "version": "0.1.21",
4
4
  "type": "module",
5
5
  "main": "dist/index.js",
6
6
  "bin": {
@@ -0,0 +1,246 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { chunkText, type Chunk } from "../chunker.js";
3
+
4
+ /**
5
+ * Test suite for the recursive character text splitter.
6
+ *
7
+ * Invariants asserted (all must hold for every chunk returned):
8
+ * - text.slice(charStart, charEnd) === content
9
+ * - charStart is monotonically non-decreasing
10
+ * - content.length <= chunkSize + (longest separator length)
11
+ */
12
+
13
+ function assertCoreInvariants(
14
+ text: string,
15
+ chunks: Chunk[],
16
+ chunkSize: number,
17
+ separatorSlack = 2,
18
+ ): void {
19
+ let prevStart = -1;
20
+ for (let i = 0; i < chunks.length; i++) {
21
+ const c = chunks[i]!;
22
+ expect(c.index).toBe(i);
23
+ expect(text.slice(c.charStart, c.charEnd)).toBe(c.content);
24
+ expect(c.charStart).toBeGreaterThanOrEqual(prevStart);
25
+ expect(c.content.length).toBeLessThanOrEqual(chunkSize + separatorSlack);
26
+ prevStart = c.charStart;
27
+ }
28
+ }
29
+
30
+ describe("chunkText — empty and short inputs", () => {
31
+ it("returns [] for empty string", () => {
32
+ expect(chunkText("")).toEqual([]);
33
+ });
34
+
35
+ it("returns a single chunk for a short doc", () => {
36
+ const text = "short doc";
37
+ const chunks = chunkText(text);
38
+ expect(chunks).toHaveLength(1);
39
+ expect(chunks[0]!.charStart).toBe(0);
40
+ expect(chunks[0]!.charEnd).toBe(text.length);
41
+ expect(chunks[0]!.content).toBe(text);
42
+ expect(chunks[0]!.index).toBe(0);
43
+ });
44
+
45
+ it("returns a single chunk at exactly chunkSize", () => {
46
+ const text = "x".repeat(2048);
47
+ const chunks = chunkText(text, { chunkSize: 2048 });
48
+ expect(chunks).toHaveLength(1);
49
+ expect(chunks[0]!.content).toBe(text);
50
+ });
51
+
52
+ it("returns a single chunk when text.length < chunkSize even with unusual separators", () => {
53
+ const chunks = chunkText("hello world", {
54
+ chunkSize: 100,
55
+ chunkOverlap: 10,
56
+ separators: ["##", "\n"],
57
+ });
58
+ expect(chunks).toHaveLength(1);
59
+ expect(chunks[0]!.content).toBe("hello world");
60
+ });
61
+ });
62
+
63
+ describe("chunkText — long documents", () => {
64
+ it("produces >= 4 chunks for an 8K-char paragraph-rich doc with chunkSize=2048", () => {
65
+ const paragraph = "The quick brown fox jumps over the lazy dog. ".repeat(20);
66
+ const text = Array.from({ length: 10 }, () => paragraph).join("\n\n");
67
+ expect(text.length).toBeGreaterThan(8000);
68
+
69
+ const chunks = chunkText(text, { chunkSize: 2048, chunkOverlap: 256 });
70
+ expect(chunks.length).toBeGreaterThanOrEqual(4);
71
+ assertCoreInvariants(text, chunks, 2048);
72
+ });
73
+
74
+ it("bounds content.length at chunkSize + separator slack", () => {
75
+ const text = Array.from({ length: 400 }, (_, i) => `Sentence ${i}.`).join(" ");
76
+ const chunks = chunkText(text, { chunkSize: 512, chunkOverlap: 64 });
77
+ assertCoreInvariants(text, chunks, 512);
78
+ });
79
+
80
+ it("keeps charStart monotonically non-decreasing", () => {
81
+ const text = "x".repeat(10_000);
82
+ const chunks = chunkText(text, { chunkSize: 256, chunkOverlap: 32 });
83
+ for (let i = 1; i < chunks.length; i++) {
84
+ expect(chunks[i]!.charStart).toBeGreaterThanOrEqual(chunks[i - 1]!.charStart);
85
+ }
86
+ });
87
+
88
+ it("reconstructs content bit-for-bit from offsets for every chunk", () => {
89
+ const text = Array.from({ length: 500 }, (_, i) => `Para ${i}.\n\n`).join("");
90
+ const chunks = chunkText(text, { chunkSize: 1024, chunkOverlap: 128 });
91
+ for (const c of chunks) {
92
+ expect(text.slice(c.charStart, c.charEnd)).toBe(c.content);
93
+ }
94
+ });
95
+ });
96
+
97
+ describe("chunkText — overlap behavior", () => {
98
+ it("produces overlap of approximately chunkOverlap between consecutive chunks", () => {
99
+ const text = "abcdefghij ".repeat(500); // ~5500 chars
100
+ const chunkOverlap = 256;
101
+ const chunks = chunkText(text, { chunkSize: 1024, chunkOverlap });
102
+
103
+ expect(chunks.length).toBeGreaterThan(1);
104
+
105
+ for (let i = 1; i < chunks.length; i++) {
106
+ const prev = chunks[i - 1]!;
107
+ const curr = chunks[i]!;
108
+ // Overlap in characters = prev.charEnd - curr.charStart.
109
+ const overlap = prev.charEnd - curr.charStart;
110
+ expect(overlap).toBeGreaterThan(0);
111
+ // Tolerance: +/- 16 chars (allows for snap to atom boundary).
112
+ expect(Math.abs(overlap - chunkOverlap)).toBeLessThanOrEqual(16);
113
+ }
114
+ });
115
+
116
+ it("makes forward progress even when chunkOverlap is zero", () => {
117
+ const text = "abcdefghij ".repeat(300);
118
+ const chunks = chunkText(text, { chunkSize: 512, chunkOverlap: 0 });
119
+ for (let i = 1; i < chunks.length; i++) {
120
+ expect(chunks[i]!.charStart).toBeGreaterThanOrEqual(chunks[i - 1]!.charEnd);
121
+ }
122
+ });
123
+
124
+ it("rejects chunkOverlap >= chunkSize", () => {
125
+ // Only need enough text to trigger the chunking path past the fast return.
126
+ const text = "x".repeat(2048);
127
+ expect(() =>
128
+ chunkText(text, { chunkSize: 100, chunkOverlap: 100 }),
129
+ ).toThrow(/chunkOverlap/);
130
+ expect(() =>
131
+ chunkText(text, { chunkSize: 100, chunkOverlap: 200 }),
132
+ ).toThrow(/chunkOverlap/);
133
+ });
134
+ });
135
+
136
+ describe("chunkText — unicode correctness", () => {
137
+ it("preserves character boundaries with emoji + CJK", () => {
138
+ // Mix emoji (surrogate pairs), CJK, and ASCII; repeat enough to exceed chunkSize.
139
+ const segment =
140
+ "Hello. こんにちは。 你好。 안녕하세요。 Emojis: 🎉🚀✨. ";
141
+ const text = segment.repeat(100); // plenty of chunks
142
+ const chunks = chunkText(text, { chunkSize: 512, chunkOverlap: 64 });
143
+
144
+ assertCoreInvariants(text, chunks, 512);
145
+
146
+ // Slicing must produce the same string as content — if indices split a
147
+ // surrogate pair the JS string would still compare equal char-by-char,
148
+ // but we additionally verify no lone surrogate prefix/suffix on boundaries.
149
+ for (const c of chunks) {
150
+ expect(text.slice(c.charStart, c.charEnd)).toBe(c.content);
151
+ }
152
+ });
153
+
154
+ it("handles pure CJK doc (no ASCII separators beyond ideographic space)", () => {
155
+ const text = "这是一个很长的中文文档。".repeat(300);
156
+ const chunks = chunkText(text, { chunkSize: 256, chunkOverlap: 32 });
157
+ assertCoreInvariants(text, chunks, 256);
158
+ // Must cover the whole doc (last chunk ends at text.length).
159
+ expect(chunks[chunks.length - 1]!.charEnd).toBe(text.length);
160
+ });
161
+ });
162
+
163
+ describe("chunkText — separator priority", () => {
164
+ it("prefers \\n\\n over other separators so code fences stay intact", () => {
165
+ // A large markdown doc with a code fence that would exceed chunkSize if
166
+ // we naively split on every newline; ensure it's kept whole by preferring
167
+ // \n\n as the outer boundary.
168
+ const prefix = "# Heading\n\nSome text before the fence.\n\n";
169
+ const fence =
170
+ "```typescript\n" +
171
+ "const x = 1;\n".repeat(60) +
172
+ "```";
173
+ const suffix = "\n\nText after the fence.";
174
+ const text = prefix + fence + suffix;
175
+
176
+ const chunkSize = fence.length + 50; // Large enough to keep fence whole.
177
+ const chunks = chunkText(text, {
178
+ chunkSize,
179
+ chunkOverlap: 64,
180
+ separators: ["\n\n", "\n", ". ", " ", ""],
181
+ });
182
+
183
+ // Fence should appear intact in at least one chunk.
184
+ const anyChunkContainsWholeFence = chunks.some(c => c.content.includes(fence));
185
+ expect(anyChunkContainsWholeFence).toBe(true);
186
+
187
+ // And no chunk should contain just a fragment of the opening backticks
188
+ // separated from its closing ones — i.e., if a chunk starts with ``` it
189
+ // must also contain the matching closing ```.
190
+ for (const c of chunks) {
191
+ const opens = (c.content.match(/```/g) ?? []).length;
192
+ // Either zero fences (text-only chunk) or an even number (complete fences).
193
+ expect(opens % 2 === 0 || c.content.includes(fence)).toBe(true);
194
+ }
195
+ });
196
+
197
+ it("honors custom separators — '##' heading splitter", () => {
198
+ const text =
199
+ "Intro paragraph here.\n\n" +
200
+ "## Section A\nContent for A.\n\n" +
201
+ "## Section B\nContent for B with a bit more text.\n\n" +
202
+ "## Section C\nFinal content goes here.";
203
+ const chunks = chunkText(text, {
204
+ chunkSize: 40, // small -> forces splitting
205
+ chunkOverlap: 8,
206
+ separators: ["##", "\n", " ", ""],
207
+ });
208
+
209
+ // Custom separator must be honored: the chunker must split on '##'.
210
+ // Because the separator stays attached to the preceding piece (to keep
211
+ // text.slice(start,end) === content), chunk boundaries occur *just after*
212
+ // each '##' occurrence — verify that at least one chunk's charEnd aligns
213
+ // with a '##' position + 2.
214
+ expect(chunks.length).toBeGreaterThan(1);
215
+ const sectionStarts = [
216
+ text.indexOf("## Section A"),
217
+ text.indexOf("## Section B"),
218
+ text.indexOf("## Section C"),
219
+ ];
220
+ const endPositions = chunks.map(c => c.charEnd);
221
+ const boundariesHit = sectionStarts.filter(pos =>
222
+ endPositions.includes(pos + 2),
223
+ );
224
+ expect(boundariesHit.length).toBeGreaterThan(0);
225
+ });
226
+
227
+ it("character-splits as a last resort when no separator applies", () => {
228
+ // A single token longer than chunkSize with no separators; must still
229
+ // produce chunks (via the "" sentinel).
230
+ const text = "a".repeat(300);
231
+ const chunks = chunkText(text, {
232
+ chunkSize: 100,
233
+ chunkOverlap: 10,
234
+ separators: ["\n\n", "\n", ""],
235
+ });
236
+ expect(chunks.length).toBeGreaterThan(1);
237
+ for (const c of chunks) {
238
+ expect(c.content.length).toBeLessThanOrEqual(100);
239
+ }
240
+ // Overall coverage: concatenating (with overlap removed) must reconstruct.
241
+ const firstChar = chunks[0]!.charStart;
242
+ const lastEnd = chunks[chunks.length - 1]!.charEnd;
243
+ expect(firstChar).toBe(0);
244
+ expect(lastEnd).toBe(text.length);
245
+ });
246
+ });
@@ -543,3 +543,168 @@ describe("documentExists", () => {
543
543
  expect(db.documentExists("stub-1")).toBe(true);
544
544
  });
545
545
  });
546
+
547
+ describe("schema v3: chunks table", () => {
548
+ it("creates the chunks table with expected columns", () => {
549
+ // PRAGMA table_info(chunks) returns one row per column with name, type, notnull, dflt_value, pk
550
+ const cols = db.db
551
+ .prepare("PRAGMA table_info(chunks)")
552
+ .all() as Array<{ name: string; type: string; notnull: number; dflt_value: unknown; pk: number }>;
553
+ const byName = Object.fromEntries(cols.map(c => [c.name, c]));
554
+
555
+ expect(byName.id).toMatchObject({ type: "TEXT", pk: 1 });
556
+ expect(byName.document_id).toMatchObject({ type: "TEXT", notnull: 1 });
557
+ expect(byName.chunk_index).toMatchObject({ type: "INTEGER", notnull: 1 });
558
+ expect(byName.content).toMatchObject({ type: "TEXT", notnull: 1 });
559
+ expect(byName.char_start).toMatchObject({ type: "INTEGER", notnull: 1 });
560
+ expect(byName.char_end).toMatchObject({ type: "INTEGER", notnull: 1 });
561
+ expect(byName.context_prefix).toMatchObject({ type: "TEXT", notnull: 1 });
562
+ });
563
+
564
+ it("creates idx_chunks_document_id index", () => {
565
+ const indexes = db.db
566
+ .prepare("PRAGMA index_list(chunks)")
567
+ .all() as Array<{ name: string }>;
568
+ const names = indexes.map(i => i.name);
569
+ expect(names).toContain("idx_chunks_document_id");
570
+ });
571
+
572
+ it("enforces UNIQUE(document_id, chunk_index)", () => {
573
+ db.upsertDocument({ id: "doc-1", path: "p", title: "t", date: null, type: null, status: null, githubIssue: null, content: "" });
574
+ db.db.prepare(
575
+ "INSERT INTO chunks (id, document_id, chunk_index, content, char_start, char_end) VALUES (?, ?, ?, ?, ?, ?)"
576
+ ).run("doc-1#c0", "doc-1", 0, "hello", 0, 5);
577
+
578
+ expect(() => {
579
+ db.db.prepare(
580
+ "INSERT INTO chunks (id, document_id, chunk_index, content, char_start, char_end) VALUES (?, ?, ?, ?, ?, ?)"
581
+ ).run("doc-1#c0-dup", "doc-1", 0, "hello again", 0, 11);
582
+ }).toThrow();
583
+ });
584
+
585
+ it("cascades ON DELETE from documents", () => {
586
+ // better-sqlite3 needs foreign_keys pragma enabled to enforce FK constraints
587
+ db.db.pragma("foreign_keys = ON");
588
+ db.upsertDocument({ id: "doc-1", path: "p", title: "t", date: null, type: null, status: null, githubIssue: null, content: "" });
589
+ db.db.prepare(
590
+ "INSERT INTO chunks (id, document_id, chunk_index, content, char_start, char_end) VALUES (?, ?, ?, ?, ?, ?)"
591
+ ).run("doc-1#c0", "doc-1", 0, "hello", 0, 5);
592
+
593
+ db.deleteDocument("doc-1");
594
+
595
+ const remaining = db.db.prepare("SELECT COUNT(*) AS c FROM chunks WHERE document_id = ?").get("doc-1") as { c: number };
596
+ expect(remaining.c).toBe(0);
597
+ });
598
+
599
+ it("defaults context_prefix to empty string", () => {
600
+ db.upsertDocument({ id: "doc-1", path: "p", title: "t", date: null, type: null, status: null, githubIssue: null, content: "" });
601
+ db.db.prepare(
602
+ "INSERT INTO chunks (id, document_id, chunk_index, content, char_start, char_end) VALUES (?, ?, ?, ?, ?, ?)"
603
+ ).run("doc-1#c0", "doc-1", 0, "hello", 0, 5);
604
+ const row = db.db.prepare("SELECT context_prefix FROM chunks WHERE id = ?").get("doc-1#c0") as { context_prefix: string };
605
+ expect(row.context_prefix).toBe("");
606
+ });
607
+ });
608
+
609
+ describe("schema v3: memory_tier column", () => {
610
+ it("adds memory_tier column with default 'doc'", () => {
611
+ db.upsertDocument({ id: "doc-1", path: "p", title: "t", date: null, type: null, status: null, githubIssue: null, content: "" });
612
+ const row = db.db.prepare("SELECT memory_tier FROM documents WHERE id = ?").get("doc-1") as { memory_tier: string };
613
+ expect(row.memory_tier).toBe("doc");
614
+ });
615
+
616
+ it("accepts 'raw' memory_tier values", () => {
617
+ expect(() => {
618
+ db.db.prepare(
619
+ "INSERT INTO documents (id, path, title, date, type, status, github_issue, content, is_stub, memory_tier) VALUES (?, ?, ?, ?, ?, ?, ?, ?, 0, 'raw')"
620
+ ).run("raw-doc", "raw/path.md", "Raw Memory", null, null, null, null, "");
621
+ }).not.toThrow();
622
+ const row = db.db.prepare("SELECT memory_tier FROM documents WHERE id = ?").get("raw-doc") as { memory_tier: string };
623
+ expect(row.memory_tier).toBe("raw");
624
+ });
625
+
626
+ it("accepts 'reflection' memory_tier values", () => {
627
+ expect(() => {
628
+ db.db.prepare(
629
+ "INSERT INTO documents (id, path, title, date, type, status, github_issue, content, is_stub, memory_tier) VALUES (?, ?, ?, ?, ?, ?, ?, ?, 0, 'reflection')"
630
+ ).run("refl-doc", "refl/path.md", "Reflection", null, null, null, null, "");
631
+ }).not.toThrow();
632
+ const row = db.db.prepare("SELECT memory_tier FROM documents WHERE id = ?").get("refl-doc") as { memory_tier: string };
633
+ expect(row.memory_tier).toBe("reflection");
634
+ });
635
+
636
+ it("rejects invalid memory_tier values via CHECK constraint", () => {
637
+ expect(() => {
638
+ db.db.prepare(
639
+ "INSERT INTO documents (id, path, title, date, type, status, github_issue, content, is_stub, memory_tier) VALUES (?, ?, ?, ?, ?, ?, ?, ?, 0, 'garbage')"
640
+ ).run("bad-doc", "bad/path.md", "Bad", null, null, null, null, "");
641
+ }).toThrow();
642
+ });
643
+
644
+ it("creates idx_documents_memory_tier index", () => {
645
+ const indexes = db.db
646
+ .prepare("PRAGMA index_list(documents)")
647
+ .all() as Array<{ name: string }>;
648
+ const names = indexes.map(i => i.name);
649
+ expect(names).toContain("idx_documents_memory_tier");
650
+ });
651
+
652
+ it("preserves existing documents with default 'doc' when migrating from v2", () => {
653
+ // Simulate a v2 database without memory_tier column
654
+ const dir = mkdtempSync(join(tmpdir(), "knowledge-v3-migration-"));
655
+ const dbPath = join(dir, "legacy-v2.db");
656
+
657
+ const rawDb = new Database(dbPath);
658
+ rawDb.exec(`
659
+ CREATE TABLE documents (
660
+ id TEXT PRIMARY KEY, path TEXT, title TEXT, date TEXT, type TEXT,
661
+ status TEXT, github_issue INTEGER, content TEXT, is_stub INTEGER DEFAULT 0
662
+ );
663
+ CREATE TABLE tags (doc_id TEXT REFERENCES documents(id) ON DELETE CASCADE, tag TEXT, PRIMARY KEY (doc_id, tag));
664
+ CREATE TABLE relationships (
665
+ source_id TEXT REFERENCES documents(id) ON DELETE CASCADE,
666
+ target_id TEXT REFERENCES documents(id) ON DELETE CASCADE,
667
+ type TEXT CHECK(type IN ('builds_on', 'tensions', 'superseded_by', 'post_mortem', 'untyped')),
668
+ context TEXT,
669
+ PRIMARY KEY (source_id, target_id, type)
670
+ );
671
+ CREATE TABLE outcome_events (
672
+ id TEXT PRIMARY KEY, event_type TEXT NOT NULL, issue_number INTEGER NOT NULL,
673
+ session_id TEXT, timestamp TEXT NOT NULL, duration_ms INTEGER, verdict TEXT,
674
+ component_area TEXT, estimate TEXT, drift_count INTEGER, model TEXT,
675
+ agent_type TEXT, iteration_count INTEGER, payload TEXT DEFAULT '{}'
676
+ );
677
+ CREATE TABLE sync (path TEXT PRIMARY KEY, mtime INTEGER NOT NULL, indexed_at INTEGER NOT NULL);
678
+ CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT);
679
+ INSERT INTO documents (id, path, title, content) VALUES ('existing', 'existing.md', 'Existing Doc', 'content');
680
+ `);
681
+ rawDb.close();
682
+
683
+ const migrated = new KnowledgeDB(dbPath);
684
+ const row = migrated.db
685
+ .prepare("SELECT memory_tier FROM documents WHERE id = ?")
686
+ .get("existing") as { memory_tier: string };
687
+ expect(row.memory_tier).toBe("doc");
688
+ migrated.close();
689
+ });
690
+ });
691
+
692
+ describe("schema v3: clearAll includes chunks", () => {
693
+ it("deletes chunks along with documents", () => {
694
+ db.upsertDocument({ id: "doc-1", path: "p", title: "t", date: null, type: null, status: null, githubIssue: null, content: "" });
695
+ db.db.prepare(
696
+ "INSERT INTO chunks (id, document_id, chunk_index, content, char_start, char_end) VALUES (?, ?, ?, ?, ?, ?)"
697
+ ).run("doc-1#c0", "doc-1", 0, "hello", 0, 5);
698
+ db.db.prepare(
699
+ "INSERT INTO chunks (id, document_id, chunk_index, content, char_start, char_end) VALUES (?, ?, ?, ?, ?, ?)"
700
+ ).run("doc-1#c1", "doc-1", 1, "world", 5, 10);
701
+
702
+ const before = db.db.prepare("SELECT COUNT(*) AS c FROM chunks").get() as { c: number };
703
+ expect(before.c).toBe(2);
704
+
705
+ db.clearAll();
706
+
707
+ const after = db.db.prepare("SELECT COUNT(*) AS c FROM chunks").get() as { c: number };
708
+ expect(after.c).toBe(0);
709
+ });
710
+ });
@@ -188,7 +188,7 @@ describe("incremental reindex", () => {
188
188
 
189
189
  // Verify schema version is set
190
190
  const db1 = new KnowledgeDB(dbPath);
191
- expect(db1.getMeta("schema_version")).toBe("2");
191
+ expect(db1.getMeta("schema_version")).toBe("3");
192
192
  db1.close();
193
193
 
194
194
  mockedEmbed.mockClear();
@@ -210,7 +210,7 @@ describe("incremental reindex", () => {
210
210
 
211
211
  // Verify version was updated
212
212
  const db3 = new KnowledgeDB(dbPath);
213
- expect(db3.getMeta("schema_version")).toBe("2");
213
+ expect(db3.getMeta("schema_version")).toBe("3");
214
214
  db3.close();
215
215
  });
216
216
 
package/src/chunker.ts ADDED
@@ -0,0 +1,279 @@
1
+ /**
2
+ * RecursiveCharacterTextSplitter-style chunker.
3
+ *
4
+ * Splits long text into overlapping chunks while preserving the original
5
+ * character offsets (charStart, charEnd) so downstream code can reconstruct
6
+ * positions. Mirrors the semantics of LangChain's RecursiveCharacterTextSplitter:
7
+ * tries each separator in order, snapping chunk boundaries to the highest-priority
8
+ * separator that keeps pieces under chunkSize.
9
+ *
10
+ * Defaults correspond to 512-token chunks with 64-token overlap
11
+ * (approx: 1 token ~= 4 chars for English text).
12
+ */
13
+
14
+ export interface Chunk {
15
+ index: number;
16
+ content: string;
17
+ charStart: number;
18
+ charEnd: number;
19
+ }
20
+
21
+ export interface ChunkerOptions {
22
+ chunkSize?: number;
23
+ chunkOverlap?: number;
24
+ separators?: string[];
25
+ }
26
+
27
+ const DEFAULT_CHUNK_SIZE = 2048;
28
+ const DEFAULT_CHUNK_OVERLAP = 256;
29
+ const DEFAULT_SEPARATORS: string[] = ["\n\n", "\n", ". ", " ", ""];
30
+
31
+ /**
32
+ * A piece of the original text with an absolute offset back to the source.
33
+ * Used as the internal working type while recursing through separators.
34
+ */
35
+ interface Piece {
36
+ text: string;
37
+ start: number;
38
+ }
39
+
40
+ /**
41
+ * Pick the first separator from `separators` that occurs in `piece.text`.
42
+ * Falls back to the last separator (typically `""`) if none match — this is
43
+ * the sentinel that lets us character-split oversized pieces with no natural
44
+ * boundary.
45
+ */
46
+ function pickSeparator(pieceText: string, separators: string[]): {
47
+ separator: string;
48
+ remaining: string[];
49
+ } {
50
+ for (let i = 0; i < separators.length; i++) {
51
+ const sep = separators[i]!;
52
+ if (sep === "") {
53
+ return { separator: sep, remaining: separators.slice(i + 1) };
54
+ }
55
+ if (pieceText.includes(sep)) {
56
+ return { separator: sep, remaining: separators.slice(i + 1) };
57
+ }
58
+ }
59
+ // Should be unreachable when DEFAULT_SEPARATORS ends with "".
60
+ return { separator: "", remaining: [] };
61
+ }
62
+
63
+ /**
64
+ * Split a piece on `separator` while retaining absolute char offsets.
65
+ * When separator is empty, split into single-character pieces.
66
+ */
67
+ function splitOnSeparator(piece: Piece, separator: string): Piece[] {
68
+ if (separator === "") {
69
+ const out: Piece[] = [];
70
+ for (let i = 0; i < piece.text.length; i++) {
71
+ out.push({ text: piece.text[i]!, start: piece.start + i });
72
+ }
73
+ return out;
74
+ }
75
+
76
+ const out: Piece[] = [];
77
+ let cursor = 0;
78
+ let idx = piece.text.indexOf(separator, cursor);
79
+ while (idx !== -1) {
80
+ // Keep the separator attached to the preceding piece so reconstruction
81
+ // via text.slice(charStart, charEnd) works bit-for-bit.
82
+ const sliceEnd = idx + separator.length;
83
+ out.push({
84
+ text: piece.text.slice(cursor, sliceEnd),
85
+ start: piece.start + cursor,
86
+ });
87
+ cursor = sliceEnd;
88
+ idx = piece.text.indexOf(separator, cursor);
89
+ }
90
+ if (cursor < piece.text.length) {
91
+ out.push({
92
+ text: piece.text.slice(cursor),
93
+ start: piece.start + cursor,
94
+ });
95
+ }
96
+ return out;
97
+ }
98
+
99
+ /**
100
+ * Recursively flatten a piece into "atoms" — pieces small enough to merge
101
+ * greedily into chunks. Pieces larger than chunkSize are split with the next
102
+ * separator in line; pieces that fit are returned as-is.
103
+ */
104
+ function flattenToAtoms(
105
+ piece: Piece,
106
+ separators: string[],
107
+ chunkSize: number,
108
+ ): Piece[] {
109
+ if (piece.text.length <= chunkSize) {
110
+ return [piece];
111
+ }
112
+ const { separator, remaining } = pickSeparator(piece.text, separators);
113
+ const splits = splitOnSeparator(piece, separator);
114
+
115
+ // If the separator didn't actually reduce the piece (e.g., no occurrence),
116
+ // fall through to the next separator with the original piece.
117
+ if (splits.length <= 1) {
118
+ if (remaining.length === 0) {
119
+ // No more separators — return whatever we have, even if oversized.
120
+ return [piece];
121
+ }
122
+ return flattenToAtoms(piece, remaining, chunkSize);
123
+ }
124
+
125
+ const out: Piece[] = [];
126
+ for (const sub of splits) {
127
+ if (sub.text.length <= chunkSize) {
128
+ out.push(sub);
129
+ } else if (remaining.length > 0) {
130
+ for (const leaf of flattenToAtoms(sub, remaining, chunkSize)) {
131
+ out.push(leaf);
132
+ }
133
+ } else {
134
+ // Last-resort: character-split oversized atom so we never return a
135
+ // single atom larger than chunkSize.
136
+ out.push(...splitOnSeparator(sub, ""));
137
+ }
138
+ }
139
+ return out;
140
+ }
141
+
142
+ /**
143
+ * Build a chunk object from a contiguous run of atoms.
144
+ * `charStart` is taken from the first atom, `charEnd` from the last atom's
145
+ * end boundary, and `content` is `text.slice(start, end)` — this guarantees
146
+ * `text.slice(charStart, charEnd) === content`.
147
+ */
148
+ function buildChunk(
149
+ originalText: string,
150
+ atoms: Piece[],
151
+ index: number,
152
+ ): Chunk {
153
+ const first = atoms[0]!;
154
+ const last = atoms[atoms.length - 1]!;
155
+ const charStart = first.start;
156
+ const charEnd = last.start + last.text.length;
157
+ return {
158
+ index,
159
+ content: originalText.slice(charStart, charEnd),
160
+ charStart,
161
+ charEnd,
162
+ };
163
+ }
164
+
165
+ /**
166
+ * Compute the start position for the next chunk's atoms given the previous
167
+ * chunk ended at `prevEnd`. We walk backward through the atom list to find
168
+ * the atom whose start >= prevEnd - chunkOverlap; that atom begins the
169
+ * overlap region.
170
+ */
171
+ function findOverlapStartIndex(
172
+ atoms: Piece[],
173
+ lastEndAtomIndex: number,
174
+ prevEnd: number,
175
+ chunkOverlap: number,
176
+ ): number {
177
+ if (chunkOverlap <= 0) {
178
+ return lastEndAtomIndex + 1;
179
+ }
180
+ const targetStart = prevEnd - chunkOverlap;
181
+ // Find the earliest atom in [0..lastEndAtomIndex] whose start >= targetStart.
182
+ let overlapAtomIdx = lastEndAtomIndex + 1;
183
+ for (let i = lastEndAtomIndex; i >= 0; i--) {
184
+ if (atoms[i]!.start >= targetStart) {
185
+ overlapAtomIdx = i;
186
+ } else {
187
+ break;
188
+ }
189
+ }
190
+ // If overlap produced no progress (no atoms found), step forward to avoid
191
+ // an infinite loop.
192
+ if (overlapAtomIdx > lastEndAtomIndex) {
193
+ overlapAtomIdx = lastEndAtomIndex + 1;
194
+ }
195
+ return overlapAtomIdx;
196
+ }
197
+
198
+ /**
199
+ * Split `text` into overlapping chunks.
200
+ *
201
+ * Semantics:
202
+ * - Empty input -> empty array.
203
+ * - Short input (<= chunkSize) -> single chunk covering the whole text.
204
+ * - For each chunk, `text.slice(charStart, charEnd) === content`.
205
+ * - `charStart` is monotonically non-decreasing across chunks.
206
+ * - Consecutive chunks overlap by ~`chunkOverlap` chars (snapped to atom
207
+ * boundaries; may differ by up to the largest atom size).
208
+ * - Each chunk's content length is bounded by chunkSize + a small slack for
209
+ * the separator that snapped the boundary.
210
+ */
211
+ export function chunkText(text: string, opts: ChunkerOptions = {}): Chunk[] {
212
+ const chunkSize = opts.chunkSize ?? DEFAULT_CHUNK_SIZE;
213
+ const chunkOverlap = opts.chunkOverlap ?? DEFAULT_CHUNK_OVERLAP;
214
+ const separators = opts.separators ?? DEFAULT_SEPARATORS;
215
+
216
+ if (text.length === 0) {
217
+ return [];
218
+ }
219
+
220
+ if (chunkOverlap >= chunkSize) {
221
+ throw new Error(
222
+ `chunker: chunkOverlap (${chunkOverlap}) must be smaller than chunkSize (${chunkSize})`,
223
+ );
224
+ }
225
+
226
+ // Fast path for short docs — no need to walk separators.
227
+ if (text.length <= chunkSize) {
228
+ return [
229
+ {
230
+ index: 0,
231
+ content: text,
232
+ charStart: 0,
233
+ charEnd: text.length,
234
+ },
235
+ ];
236
+ }
237
+
238
+ const atoms = flattenToAtoms({ text, start: 0 }, separators, chunkSize);
239
+
240
+ const chunks: Chunk[] = [];
241
+ let chunkIdx = 0;
242
+ let i = 0;
243
+
244
+ while (i < atoms.length) {
245
+ // Greedily pack atoms into this chunk until adding one more would push
246
+ // the chunk content past chunkSize.
247
+ let runLen = 0;
248
+ let j = i;
249
+ while (j < atoms.length) {
250
+ const atomLen = atoms[j]!.text.length;
251
+ // Always include at least one atom per chunk to ensure progress.
252
+ if (j > i && runLen + atomLen > chunkSize) {
253
+ break;
254
+ }
255
+ runLen += atomLen;
256
+ j++;
257
+ }
258
+ const runAtoms = atoms.slice(i, j);
259
+ const chunk = buildChunk(text, runAtoms, chunkIdx);
260
+ chunks.push(chunk);
261
+ chunkIdx++;
262
+
263
+ if (j >= atoms.length) {
264
+ break;
265
+ }
266
+
267
+ // Compute overlap start for the next chunk.
268
+ const lastEndAtomIdx = j - 1;
269
+ const nextStart = findOverlapStartIndex(
270
+ atoms,
271
+ lastEndAtomIdx,
272
+ chunk.charEnd,
273
+ chunkOverlap,
274
+ );
275
+ i = nextStart;
276
+ }
277
+
278
+ return chunks;
279
+ }
package/src/db.ts CHANGED
@@ -160,6 +160,18 @@ export class KnowledgeDB {
160
160
  key TEXT PRIMARY KEY,
161
161
  value TEXT
162
162
  );
163
+
164
+ CREATE TABLE IF NOT EXISTS chunks (
165
+ id TEXT PRIMARY KEY,
166
+ document_id TEXT NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
167
+ chunk_index INTEGER NOT NULL,
168
+ content TEXT NOT NULL,
169
+ char_start INTEGER NOT NULL,
170
+ char_end INTEGER NOT NULL,
171
+ context_prefix TEXT NOT NULL DEFAULT '',
172
+ UNIQUE(document_id, chunk_index)
173
+ );
174
+ CREATE INDEX IF NOT EXISTS idx_chunks_document_id ON chunks(document_id);
163
175
  `);
164
176
 
165
177
  // Migration: add is_stub column for databases created before it existed.
@@ -171,6 +183,20 @@ export class KnowledgeDB {
171
183
  // Column already exists — expected for new databases
172
184
  }
173
185
 
186
+ // Migration: add memory_tier column (schema v3) for databases created before it existed.
187
+ // Uses the same try/catch pattern as is_stub. CHECK constraint restricts values to
188
+ // 'doc' (existing documents), 'raw' (dream-loop raw memories), 'reflection' (synthesized).
189
+ try {
190
+ this.db.exec(
191
+ "ALTER TABLE documents ADD COLUMN memory_tier TEXT NOT NULL DEFAULT 'doc' CHECK(memory_tier IN ('doc','raw','reflection'))"
192
+ );
193
+ } catch {
194
+ // Column already exists — expected for new databases
195
+ }
196
+ this.db.exec(
197
+ "CREATE INDEX IF NOT EXISTS idx_documents_memory_tier ON documents(memory_tier)"
198
+ );
199
+
174
200
  // Migration: rebuild relationships table for databases created before the
175
201
  // context column, post_mortem/untyped CHECK types, and target_id FK were added.
176
202
  // SQLite cannot ALTER CHECK constraints, so a full table rebuild is required.
@@ -448,7 +474,7 @@ export class KnowledgeDB {
448
474
 
449
475
  clearAll(): void {
450
476
  // outcome_events is intentionally NOT cleared — outcome data is preserved across rebuilds
451
- this.db.exec("DELETE FROM relationships; DELETE FROM tags; DELETE FROM documents; DELETE FROM sync;");
477
+ this.db.exec("DELETE FROM chunks; DELETE FROM relationships; DELETE FROM tags; DELETE FROM documents; DELETE FROM sync;");
452
478
  }
453
479
 
454
480
  close(): void {
package/src/reindex.ts CHANGED
@@ -19,7 +19,7 @@ export async function reindex(dirs: string[], dbPath: string, generate: boolean
19
19
  vec.createIndex();
20
20
 
21
21
  // Schema version check — force full re-embed when embedding algorithm changes
22
- const SCHEMA_VERSION = "2";
22
+ const SCHEMA_VERSION = "3";
23
23
  const currentVersion = db.getMeta("schema_version");
24
24
  let needsFullFtsRebuild = false;
25
25
  if (currentVersion !== SCHEMA_VERSION) {
package/test_script.js ADDED
@@ -0,0 +1,50 @@
1
+ const Database = require("better-sqlite3");
2
+ const fs = require("fs");
3
+ const path = require("path");
4
+ const os = require("os");
5
+
6
+ // Create a fresh test DB
7
+ const testDir = fs.mkdtempSync(path.join(os.tmpdir(), "fts-bug-"));
8
+ const dbPath = path.join(testDir, "test.db");
9
+
10
+ // Create a fresh database
11
+ const db = new Database(dbPath);
12
+ db.pragma("journal_mode = WAL");
13
+
14
+ // Create basic schema like KnowledgeDB does
15
+ db.exec(`
16
+ CREATE TABLE IF NOT EXISTS documents (
17
+ id TEXT PRIMARY KEY,
18
+ path TEXT,
19
+ title TEXT,
20
+ date TEXT,
21
+ type TEXT,
22
+ status TEXT,
23
+ github_issue INTEGER,
24
+ content TEXT,
25
+ is_stub INTEGER DEFAULT 0
26
+ );
27
+ `);
28
+
29
+ // Insert a test document
30
+ db.prepare(`
31
+ INSERT INTO documents (id, path, title, date, type, status, content)
32
+ VALUES (?, ?, ?, ?, ?, ?, ?)
33
+ `).run("test-doc", "test.md", "Test Doc", "2026-04-03", "idea", "active", "searchable content");
34
+
35
+ // Now try to query the FTS table that doesn't exist
36
+ try {
37
+ const results = db.prepare(`
38
+ SELECT d.id FROM documents_fts
39
+ JOIN documents d ON d.rowid = documents_fts.rowid
40
+ WHERE documents_fts MATCH ?
41
+ `).all("test");
42
+ console.log("SUCCESS: Got results:", results);
43
+ } catch (err) {
44
+ console.log("ERROR (bug confirmed):");
45
+ console.log(" Message:", err.message);
46
+ console.log(" Code:", err.code);
47
+ }
48
+
49
+ db.close();
50
+ fs.rmSync(testDir, { recursive: true });