@ruso-0/nreki 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/CHANGELOG.md +648 -0
  2. package/LICENSE +21 -0
  3. package/README.md +425 -0
  4. package/dist/ast-navigator.d.ts +29 -0
  5. package/dist/ast-navigator.d.ts.map +1 -0
  6. package/dist/ast-navigator.js +279 -0
  7. package/dist/ast-navigator.js.map +1 -0
  8. package/dist/ast-sandbox.d.ts +74 -0
  9. package/dist/ast-sandbox.d.ts.map +1 -0
  10. package/dist/ast-sandbox.js +242 -0
  11. package/dist/ast-sandbox.js.map +1 -0
  12. package/dist/chronos-memory.d.ts +69 -0
  13. package/dist/chronos-memory.d.ts.map +1 -0
  14. package/dist/chronos-memory.js +247 -0
  15. package/dist/chronos-memory.js.map +1 -0
  16. package/dist/circuit-breaker.d.ts +107 -0
  17. package/dist/circuit-breaker.d.ts.map +1 -0
  18. package/dist/circuit-breaker.js +330 -0
  19. package/dist/circuit-breaker.js.map +1 -0
  20. package/dist/compressor-advanced.d.ts +80 -0
  21. package/dist/compressor-advanced.d.ts.map +1 -0
  22. package/dist/compressor-advanced.js +555 -0
  23. package/dist/compressor-advanced.js.map +1 -0
  24. package/dist/compressor.d.ts +81 -0
  25. package/dist/compressor.d.ts.map +1 -0
  26. package/dist/compressor.js +227 -0
  27. package/dist/compressor.js.map +1 -0
  28. package/dist/database.d.ts +169 -0
  29. package/dist/database.d.ts.map +1 -0
  30. package/dist/database.js +1029 -0
  31. package/dist/database.js.map +1 -0
  32. package/dist/embedder.d.ts +73 -0
  33. package/dist/embedder.d.ts.map +1 -0
  34. package/dist/embedder.js +165 -0
  35. package/dist/embedder.js.map +1 -0
  36. package/dist/engine.d.ts +224 -0
  37. package/dist/engine.d.ts.map +1 -0
  38. package/dist/engine.js +582 -0
  39. package/dist/engine.js.map +1 -0
  40. package/dist/hologram/harvester.d.ts +41 -0
  41. package/dist/hologram/harvester.d.ts.map +1 -0
  42. package/dist/hologram/harvester.js +129 -0
  43. package/dist/hologram/harvester.js.map +1 -0
  44. package/dist/hologram/shadow-cache.d.ts +49 -0
  45. package/dist/hologram/shadow-cache.d.ts.map +1 -0
  46. package/dist/hologram/shadow-cache.js +165 -0
  47. package/dist/hologram/shadow-cache.js.map +1 -0
  48. package/dist/hologram/shadow-generator.d.ts +32 -0
  49. package/dist/hologram/shadow-generator.d.ts.map +1 -0
  50. package/dist/hologram/shadow-generator.js +828 -0
  51. package/dist/hologram/shadow-generator.js.map +1 -0
  52. package/dist/hooks/preToolUse.d.ts +63 -0
  53. package/dist/hooks/preToolUse.d.ts.map +1 -0
  54. package/dist/hooks/preToolUse.js +103 -0
  55. package/dist/hooks/preToolUse.js.map +1 -0
  56. package/dist/index.d.ts +19 -0
  57. package/dist/index.d.ts.map +1 -0
  58. package/dist/index.js +367 -0
  59. package/dist/index.js.map +1 -0
  60. package/dist/kernel/kernel-manager.d.ts +52 -0
  61. package/dist/kernel/kernel-manager.d.ts.map +1 -0
  62. package/dist/kernel/kernel-manager.js +197 -0
  63. package/dist/kernel/kernel-manager.js.map +1 -0
  64. package/dist/kernel/kernel-worker.d.ts +9 -0
  65. package/dist/kernel/kernel-worker.d.ts.map +1 -0
  66. package/dist/kernel/kernel-worker.js +76 -0
  67. package/dist/kernel/kernel-worker.js.map +1 -0
  68. package/dist/kernel/nreki-kernel.d.ts +244 -0
  69. package/dist/kernel/nreki-kernel.d.ts.map +1 -0
  70. package/dist/kernel/nreki-kernel.js +1656 -0
  71. package/dist/kernel/nreki-kernel.js.map +1 -0
  72. package/dist/middleware/circuit-breaker.d.ts +32 -0
  73. package/dist/middleware/circuit-breaker.d.ts.map +1 -0
  74. package/dist/middleware/circuit-breaker.js +160 -0
  75. package/dist/middleware/circuit-breaker.js.map +1 -0
  76. package/dist/middleware/file-lock.d.ts +33 -0
  77. package/dist/middleware/file-lock.d.ts.map +1 -0
  78. package/dist/middleware/file-lock.js +55 -0
  79. package/dist/middleware/file-lock.js.map +1 -0
  80. package/dist/middleware/validator.d.ts +26 -0
  81. package/dist/middleware/validator.d.ts.map +1 -0
  82. package/dist/middleware/validator.js +39 -0
  83. package/dist/middleware/validator.js.map +1 -0
  84. package/dist/monitor.d.ts +94 -0
  85. package/dist/monitor.d.ts.map +1 -0
  86. package/dist/monitor.js +221 -0
  87. package/dist/monitor.js.map +1 -0
  88. package/dist/parser-pool.d.ts +28 -0
  89. package/dist/parser-pool.d.ts.map +1 -0
  90. package/dist/parser-pool.js +81 -0
  91. package/dist/parser-pool.js.map +1 -0
  92. package/dist/parser.d.ts +91 -0
  93. package/dist/parser.d.ts.map +1 -0
  94. package/dist/parser.js +311 -0
  95. package/dist/parser.js.map +1 -0
  96. package/dist/pin-memory.d.ts +35 -0
  97. package/dist/pin-memory.d.ts.map +1 -0
  98. package/dist/pin-memory.js +161 -0
  99. package/dist/pin-memory.js.map +1 -0
  100. package/dist/repo-map.d.ts +81 -0
  101. package/dist/repo-map.d.ts.map +1 -0
  102. package/dist/repo-map.js +550 -0
  103. package/dist/repo-map.js.map +1 -0
  104. package/dist/router.d.ts +102 -0
  105. package/dist/router.d.ts.map +1 -0
  106. package/dist/router.js +1989 -0
  107. package/dist/router.js.map +1 -0
  108. package/dist/semantic-edit.d.ts +82 -0
  109. package/dist/semantic-edit.d.ts.map +1 -0
  110. package/dist/semantic-edit.js +529 -0
  111. package/dist/semantic-edit.js.map +1 -0
  112. package/dist/terminal-filter.d.ts +27 -0
  113. package/dist/terminal-filter.d.ts.map +1 -0
  114. package/dist/terminal-filter.js +257 -0
  115. package/dist/terminal-filter.js.map +1 -0
  116. package/dist/undo.d.ts +21 -0
  117. package/dist/undo.d.ts.map +1 -0
  118. package/dist/undo.js +55 -0
  119. package/dist/undo.js.map +1 -0
  120. package/dist/utils/code-tokenizer.d.ts +25 -0
  121. package/dist/utils/code-tokenizer.d.ts.map +1 -0
  122. package/dist/utils/code-tokenizer.js +52 -0
  123. package/dist/utils/code-tokenizer.js.map +1 -0
  124. package/dist/utils/file-filter.d.ts +23 -0
  125. package/dist/utils/file-filter.d.ts.map +1 -0
  126. package/dist/utils/file-filter.js +48 -0
  127. package/dist/utils/file-filter.js.map +1 -0
  128. package/dist/utils/imports.d.ts +32 -0
  129. package/dist/utils/imports.d.ts.map +1 -0
  130. package/dist/utils/imports.js +155 -0
  131. package/dist/utils/imports.js.map +1 -0
  132. package/dist/utils/path-jail.d.ts +27 -0
  133. package/dist/utils/path-jail.d.ts.map +1 -0
  134. package/dist/utils/path-jail.js +95 -0
  135. package/dist/utils/path-jail.js.map +1 -0
  136. package/dist/utils/read-source.d.ts +18 -0
  137. package/dist/utils/read-source.d.ts.map +1 -0
  138. package/dist/utils/read-source.js +22 -0
  139. package/dist/utils/read-source.js.map +1 -0
  140. package/dist/utils/safe-parse.d.ts +20 -0
  141. package/dist/utils/safe-parse.d.ts.map +1 -0
  142. package/dist/utils/safe-parse.js +25 -0
  143. package/dist/utils/safe-parse.js.map +1 -0
  144. package/package.json +75 -0
  145. package/scripts/download-wasm.js +46 -0
  146. package/wasm/.gitkeep +0 -0
  147. package/wasm/tree-sitter-go.wasm +0 -0
  148. package/wasm/tree-sitter-javascript.wasm +0 -0
  149. package/wasm/tree-sitter-python.wasm +0 -0
  150. package/wasm/tree-sitter-typescript.wasm +0 -0
@@ -0,0 +1,1029 @@
1
+ /**
2
+ * database.ts - SQLite persistence layer for NREKI.
3
+ *
4
+ * Uses sql.js (SQLite compiled to WASM) for zero-native-dependency
5
+ * operation. Vector search AND keyword search are both implemented
6
+ * in pure JavaScript:
7
+ *
8
+ * - VectorIndex: brute-force cosine similarity on Float32Array
9
+ * - KeywordIndex: inverted index with Porter-inspired BM25 scoring
10
+ *
11
+ * This eliminates the need for FTS5, sqlite-vec, better-sqlite3,
12
+ * node-gyp, and Visual Studio Build Tools - making NREKI
13
+ * portable to any platform without native compilation.
14
+ */
15
+ import initSqlJs from "sql.js";
16
+ import crypto from "crypto";
17
+ import fs from "fs";
18
+ import path from "path";
19
+ import { codeTokenize } from "./utils/code-tokenizer.js";
20
+ // ─── In-Memory Vector Store ──────────────────────────────────────────
21
+ /**
22
+ * Fast dot-product similarity for L2-normalized vectors.
23
+ * Jina embeddings output L2-normalized vectors (magnitude = 1),
24
+ * so cosine_similarity = dot_product (no sqrt/division needed).
25
+ * This is ~3x faster than full cosine similarity.
26
+ */
27
+ function fastSimilarity(a, b) {
28
+ let dot = 0;
29
+ for (let i = 0; i < a.length; i++) {
30
+ dot += a[i] * b[i];
31
+ }
32
+ return dot;
33
+ }
34
+ // Fallback cosine similarity for non-normalized models:
35
+ // function cosineSimilarity(a: Float32Array, aNorm: number, b: Float32Array, bNorm: number): number {
36
+ // let dot = 0;
37
+ // for (let i = 0; i < a.length; i++) dot += a[i] * b[i];
38
+ // return (aNorm > 0 && bNorm > 0) ? dot / (aNorm * bNorm) : 0;
39
+ // }
40
+ /**
41
+ * Pure JavaScript vector index using brute-force dot-product similarity.
42
+ * For L2-normalized embeddings (Jina), dot product = cosine similarity.
43
+ * For codebases up to ~50K chunks, brute-force is fast enough (<10ms)
44
+ * and avoids any native dependency.
45
+ */
46
+ class VectorIndex {
47
+ vectors = new Map();
48
+ insert(rowid, embedding) {
49
+ this.vectors.set(rowid, embedding);
50
+ }
51
+ delete(rowid) {
52
+ this.vectors.delete(rowid);
53
+ }
54
+ deleteBulk(rowids) {
55
+ for (const id of rowids) {
56
+ this.vectors.delete(id);
57
+ }
58
+ }
59
+ search(query, limit) {
60
+ const scored = [];
61
+ for (const [rowid, vec] of this.vectors) {
62
+ const sim = fastSimilarity(query, vec);
63
+ scored.push({ rowid, distance: 1 - sim });
64
+ }
65
+ scored.sort((a, b) => a.distance - b.distance);
66
+ return scored.slice(0, limit);
67
+ }
68
+ get size() {
69
+ return this.vectors.size;
70
+ }
71
+ /**
72
+ * Serialize the vector index to a binary buffer.
73
+ * Format: [count:u32] + ([rowid:u32][vec:f32×dim])×count
74
+ * Note: rowid uses UInt32 - max 4,294,967,295. Sufficient for practical
75
+ * codebases (would require billions of INSERT/DELETE cycles to overflow).
76
+ */
77
+ serialize() {
78
+ const entries = Array.from(this.vectors.entries());
79
+ const header = Buffer.alloc(4);
80
+ header.writeUInt32LE(entries.length);
81
+ const chunks = [header];
82
+ for (const [rowid, vec] of entries) {
83
+ const idBuf = Buffer.alloc(4);
84
+ idBuf.writeUInt32LE(rowid);
85
+ chunks.push(idBuf);
86
+ chunks.push(Buffer.from(vec.buffer, vec.byteOffset, vec.byteLength));
87
+ }
88
+ return Buffer.concat(chunks);
89
+ }
90
+ static deserialize(buf, dim) {
91
+ const index = new VectorIndex();
92
+ if (buf.length < 4)
93
+ return index;
94
+ const count = buf.readUInt32LE(0);
95
+ let offset = 4;
96
+ const vecBytes = dim * 4;
97
+ for (let i = 0; i < count; i++) {
98
+ if (offset + 4 + vecBytes > buf.length)
99
+ break;
100
+ const rowid = buf.readUInt32LE(offset);
101
+ offset += 4;
102
+ const vec = new Float32Array(buf.buffer.slice(buf.byteOffset + offset, buf.byteOffset + offset + vecBytes));
103
+ index.vectors.set(rowid, vec);
104
+ offset += vecBytes;
105
+ }
106
+ return index;
107
+ }
108
+ }
109
+ // ─── Porter Stemmer ─────────────────────────────────────────────────
110
+ /**
111
+ * Full Porter stemming algorithm in pure TypeScript.
112
+ * Based on the original 1980 paper by Martin Porter.
113
+ */
114
+ class PorterStemmer {
115
+ static isConsonant(word, i) {
116
+ if (i < 0 || i >= word.length)
117
+ return false;
118
+ const c = word[i];
119
+ if (/[aeiou]/.test(c))
120
+ return false;
121
+ if (c === "y")
122
+ return i === 0 || !PorterStemmer.isConsonant(word, i - 1);
123
+ return true;
124
+ }
125
+ /** Measure: count VC sequences in the stem. */
126
+ static measure(stem) {
127
+ let m = 0;
128
+ let i = 0;
129
+ const len = stem.length;
130
+ // Skip leading consonants
131
+ while (i < len && PorterStemmer.isConsonant(stem, i))
132
+ i++;
133
+ while (i < len) {
134
+ // Count vowel sequence
135
+ while (i < len && !PorterStemmer.isConsonant(stem, i))
136
+ i++;
137
+ if (i >= len)
138
+ break;
139
+ // Count consonant sequence
140
+ while (i < len && PorterStemmer.isConsonant(stem, i))
141
+ i++;
142
+ m++;
143
+ }
144
+ return m;
145
+ }
146
+ static containsVowel(stem) {
147
+ for (let i = 0; i < stem.length; i++) {
148
+ if (!PorterStemmer.isConsonant(stem, i))
149
+ return true;
150
+ }
151
+ return false;
152
+ }
153
+ static endsWithDouble(word) {
154
+ if (word.length < 2)
155
+ return false;
156
+ return word[word.length - 1] === word[word.length - 2] &&
157
+ PorterStemmer.isConsonant(word, word.length - 1);
158
+ }
159
+ /** Ends with consonant-vowel-consonant where last C is not w, x, or y. */
160
+ static cvc(word) {
161
+ const len = word.length;
162
+ if (len < 3)
163
+ return false;
164
+ const last = word[len - 1];
165
+ if (!PorterStemmer.isConsonant(word, len - 1))
166
+ return false;
167
+ if (PorterStemmer.isConsonant(word, len - 2))
168
+ return false;
169
+ if (!PorterStemmer.isConsonant(word, len - 3))
170
+ return false;
171
+ return last !== "w" && last !== "x" && last !== "y";
172
+ }
173
+ static stem(word) {
174
+ if (!word || word.length <= 2)
175
+ return word || "";
176
+ let w = word.toLowerCase();
177
+ // Step 1a: Plurals
178
+ if (w.endsWith("sses"))
179
+ w = w.slice(0, -2);
180
+ else if (w.endsWith("ies"))
181
+ w = w.slice(0, -2);
182
+ else if (!w.endsWith("ss") && w.endsWith("s"))
183
+ w = w.slice(0, -1);
184
+ // Step 1b: Past participles / gerunds
185
+ let step1bFlag = false;
186
+ if (w.endsWith("eed")) {
187
+ const stem = w.slice(0, -3);
188
+ if (PorterStemmer.measure(stem) > 0)
189
+ w = w.slice(0, -1); // eed -> ee
190
+ }
191
+ else if (w.endsWith("ed")) {
192
+ const stem = w.slice(0, -2);
193
+ if (PorterStemmer.containsVowel(stem)) {
194
+ w = stem;
195
+ step1bFlag = true;
196
+ }
197
+ }
198
+ else if (w.endsWith("ing")) {
199
+ const stem = w.slice(0, -3);
200
+ if (PorterStemmer.containsVowel(stem)) {
201
+ w = stem;
202
+ step1bFlag = true;
203
+ }
204
+ }
205
+ if (step1bFlag) {
206
+ if (w.endsWith("at") || w.endsWith("bl") || w.endsWith("iz")) {
207
+ w += "e";
208
+ }
209
+ else if (PorterStemmer.endsWithDouble(w) &&
210
+ !/[lsz]$/.test(w)) {
211
+ w = w.slice(0, -1);
212
+ }
213
+ else if (PorterStemmer.measure(w) === 1 && PorterStemmer.cvc(w)) {
214
+ w += "e";
215
+ }
216
+ }
217
+ // Step 1c: y -> i
218
+ if (w.endsWith("y") && PorterStemmer.containsVowel(w.slice(0, -1))) {
219
+ w = w.slice(0, -1) + "i";
220
+ }
221
+ // Step 2: Double suffixes
222
+ const step2 = [
223
+ ["ational", "ate"], ["tional", "tion"], ["enci", "ence"],
224
+ ["anci", "ance"], ["izer", "ize"], ["abli", "able"],
225
+ ["alli", "al"], ["entli", "ent"], ["eli", "e"],
226
+ ["ousli", "ous"], ["ization", "ize"], ["ation", "ate"],
227
+ ["ator", "ate"], ["alism", "al"], ["iveness", "ive"],
228
+ ["fulness", "ful"], ["ousness", "ous"], ["aliti", "al"],
229
+ ["iviti", "ive"], ["biliti", "ble"],
230
+ ];
231
+ for (const [suffix, replacement] of step2) {
232
+ if (w.endsWith(suffix)) {
233
+ const stem = w.slice(0, -suffix.length);
234
+ if (PorterStemmer.measure(stem) > 0)
235
+ w = stem + replacement;
236
+ break;
237
+ }
238
+ }
239
+ // Step 3
240
+ const step3 = [
241
+ ["icate", "ic"], ["ative", ""], ["alize", "al"],
242
+ ["iciti", "ic"], ["ical", "ic"], ["ful", ""], ["ness", ""],
243
+ ];
244
+ for (const [suffix, replacement] of step3) {
245
+ if (w.endsWith(suffix)) {
246
+ const stem = w.slice(0, -suffix.length);
247
+ if (PorterStemmer.measure(stem) > 0)
248
+ w = stem + replacement;
249
+ break;
250
+ }
251
+ }
252
+ // Step 4: Remove suffixes
253
+ const step4 = [
254
+ "al", "ance", "ence", "er", "ic", "able", "ible", "ant",
255
+ "ement", "ment", "ent", "ion", "ou", "ism", "ate", "iti",
256
+ "ous", "ive", "ize",
257
+ ];
258
+ for (const suffix of step4) {
259
+ if (w.endsWith(suffix)) {
260
+ const stem = w.slice(0, -suffix.length);
261
+ if (PorterStemmer.measure(stem) > 1) {
262
+ if (suffix === "ion") {
263
+ if (stem.endsWith("s") || stem.endsWith("t"))
264
+ w = stem;
265
+ }
266
+ else {
267
+ w = stem;
268
+ }
269
+ }
270
+ break;
271
+ }
272
+ }
273
+ // Step 5a: Remove trailing e
274
+ if (w.endsWith("e")) {
275
+ const stem = w.slice(0, -1);
276
+ const m = PorterStemmer.measure(stem);
277
+ if (m > 1 || (m === 1 && !PorterStemmer.cvc(stem))) {
278
+ w = stem;
279
+ }
280
+ }
281
+ // Step 5b: Remove double l
282
+ if (w.endsWith("ll") && PorterStemmer.measure(w) > 1) {
283
+ w = w.slice(0, -1);
284
+ }
285
+ return w;
286
+ }
287
+ }
288
+ // ─── In-Memory Keyword Index ─────────────────────────────────────────
289
+ /**
290
+ * Pure JavaScript inverted index for BM25-style keyword search.
291
+ * Replaces FTS5 entirely - no native extensions needed.
292
+ *
293
+ * Tokenization: lowercases, splits on non-alphanumeric chars,
294
+ * filters stopwords, applies basic stemming (suffix removal).
295
+ */
296
+ class KeywordIndex {
297
+ /** Map from term → Map<rowid, TF> - unified inverted index + term frequency */
298
+ invertedIndex = new Map();
299
+ /** Map from bigram → Set of document rowids (for phrase search) */
300
+ bigramIndex = new Map();
301
+ /** Map from rowid → tokenized terms (for delete and avgDocLen) */
302
+ docTerms = new Map();
303
+ /** Total number of documents */
304
+ docCount = 0;
305
+ /** Average document length in terms */
306
+ avgDocLen = 0;
307
+ static STOPWORDS = new Set([
308
+ "a", "an", "the", "is", "are", "was", "were", "be", "been",
309
+ "being", "have", "has", "had", "do", "does", "did", "will",
310
+ "would", "could", "should", "may", "might", "shall", "can",
311
+ "to", "of", "in", "for", "on", "with", "at", "by", "from",
312
+ "as", "into", "through", "during", "before", "after", "above",
313
+ "below", "and", "but", "or", "not", "no", "if", "then",
314
+ "else", "this", "that", "it", "its", "new", "old",
315
+ ]);
316
+ /** Tokenize text into normalized terms with code-aware splitting. */
317
+ tokenize(text) {
318
+ // FIX 5: Apply code-aware tokenizer before stemming
319
+ const rawTokens = text
320
+ .replace(/[^a-zA-Z0-9_.]/g, " ")
321
+ .split(/\s+/)
322
+ .filter((t) => t.length > 1);
323
+ const allTerms = [];
324
+ for (const raw of rawTokens) {
325
+ // Code-aware tokenization: split identifiers
326
+ const subTokens = codeTokenize(raw);
327
+ if (subTokens.length > 0) {
328
+ for (const sub of subTokens) {
329
+ if (sub.length > 1 && !KeywordIndex.STOPWORDS.has(sub)) {
330
+ allTerms.push(sub);
331
+ }
332
+ }
333
+ }
334
+ else {
335
+ const lower = raw.toLowerCase();
336
+ if (!KeywordIndex.STOPWORDS.has(lower)) {
337
+ allTerms.push(this.stem(lower));
338
+ }
339
+ }
340
+ }
341
+ return allTerms;
342
+ }
343
+ /**
344
+ * Porter stemmer - full implementation of the Porter stemming algorithm.
345
+ * 5 steps with consonant-vowel pattern analysis for accurate English stemming.
346
+ */
347
+ stem(word) {
348
+ if (word.length <= 2)
349
+ return word;
350
+ return PorterStemmer.stem(word);
351
+ }
352
+ /** Add a document to the index. */
353
+ insert(rowid, text) {
354
+ const terms = this.tokenize(text);
355
+ this.docTerms.set(rowid, terms);
356
+ // Compute local TF
357
+ const tfMap = new Map();
358
+ for (const term of terms) {
359
+ tfMap.set(term, (tfMap.get(term) || 0) + 1);
360
+ }
361
+ // Store TF directly in inverted index for O(1) lookup
362
+ for (const [term, tf] of tfMap) {
363
+ let docMap = this.invertedIndex.get(term);
364
+ if (!docMap) {
365
+ docMap = new Map();
366
+ this.invertedIndex.set(term, docMap);
367
+ }
368
+ docMap.set(rowid, tf);
369
+ }
370
+ // Generate bigrams for phrase search
371
+ for (let i = 0; i < terms.length - 1; i++) {
372
+ const bigram = terms[i] + "_" + terms[i + 1];
373
+ if (!this.bigramIndex.has(bigram)) {
374
+ this.bigramIndex.set(bigram, new Set());
375
+ }
376
+ this.bigramIndex.get(bigram).add(rowid);
377
+ }
378
+ this.docCount++;
379
+ this.updateAvgDocLen();
380
+ }
381
+ /** Remove a document from the index. */
382
+ delete(rowid) {
383
+ const terms = this.docTerms.get(rowid);
384
+ if (!terms)
385
+ return;
386
+ for (const term of terms) {
387
+ const docMap = this.invertedIndex.get(term);
388
+ if (docMap) {
389
+ docMap.delete(rowid);
390
+ if (docMap.size === 0) {
391
+ this.invertedIndex.delete(term);
392
+ }
393
+ }
394
+ }
395
+ // Clean up bigram entries
396
+ for (let i = 0; i < terms.length - 1; i++) {
397
+ const bigram = terms[i] + "_" + terms[i + 1];
398
+ const docs = this.bigramIndex.get(bigram);
399
+ if (docs) {
400
+ docs.delete(rowid);
401
+ if (docs.size === 0) {
402
+ this.bigramIndex.delete(bigram);
403
+ }
404
+ }
405
+ }
406
+ this.docTerms.delete(rowid);
407
+ this.docCount = Math.max(0, this.docCount - 1);
408
+ this.updateAvgDocLen();
409
+ }
410
+ deleteBulk(rowids) {
411
+ for (const id of rowids) {
412
+ this.delete(id);
413
+ }
414
+ }
415
+ updateAvgDocLen() {
416
+ if (this.docCount === 0) {
417
+ this.avgDocLen = 0;
418
+ return;
419
+ }
420
+ let totalLen = 0;
421
+ for (const terms of this.docTerms.values()) {
422
+ totalLen += terms.length;
423
+ }
424
+ this.avgDocLen = totalLen / this.docCount;
425
+ }
426
+ /**
427
+ * BM25 search with bigram phrase boosting.
428
+ * Code-tuned parameters: k1 = 1.8, b = 0.35
429
+ * Multi-word queries get a 0.3 weight bigram boost.
430
+ */
431
+ search(queryText, limit) {
432
+ const queryTerms = this.tokenize(queryText);
433
+ if (queryTerms.length === 0)
434
+ return [];
435
+ const k1 = 1.8;
436
+ const b = 0.35;
437
+ const scores = new Map();
438
+ for (const term of queryTerms) {
439
+ const docMap = this.invertedIndex.get(term);
440
+ if (!docMap)
441
+ continue;
442
+ // IDF = log((N - df + 0.5) / (df + 0.5) + 1)
443
+ const df = docMap.size;
444
+ const idf = Math.log((this.docCount - df + 0.5) / (df + 0.5) + 1);
445
+ // TF read directly from inverted index - O(1)
446
+ for (const [rowid, tf] of docMap) {
447
+ const docLen = this.docTerms.get(rowid).length;
448
+ // BM25 formula
449
+ const tfNorm = (tf * (k1 + 1)) /
450
+ (tf + k1 * (1 - b + b * (docLen / (this.avgDocLen || 1))));
451
+ const score = idf * tfNorm;
452
+ scores.set(rowid, (scores.get(rowid) || 0) + score);
453
+ }
454
+ }
455
+ // Bigram phrase boost for multi-word queries
456
+ if (queryTerms.length >= 2) {
457
+ const bigramWeight = 0.3;
458
+ for (let i = 0; i < queryTerms.length - 1; i++) {
459
+ const bigram = queryTerms[i] + "_" + queryTerms[i + 1];
460
+ const docs = this.bigramIndex.get(bigram);
461
+ if (!docs)
462
+ continue;
463
+ for (const rowid of docs) {
464
+ const existing = scores.get(rowid) || 0;
465
+ scores.set(rowid, existing + bigramWeight);
466
+ }
467
+ }
468
+ }
469
+ return Array.from(scores.entries())
470
+ .map(([rowid, score]) => ({ rowid, score }))
471
+ .sort((a, b) => b.score - a.score)
472
+ .slice(0, limit);
473
+ }
474
+ }
475
+ // ─── Database Manager ────────────────────────────────────────────────
476
+ export class NrekiDB {
477
+ db;
478
+ vecIndex = new VectorIndex();
479
+ kwIndex = new KeywordIndex();
480
+ dbPath;
481
+ vecPath;
482
+ initPromise = null;
483
+ _ready = false;
484
+ constructor(dbPath = ".nreki.db") {
485
+ this.dbPath = dbPath;
486
+ this.vecPath = dbPath.replace(/\.db$/, ".vec");
487
+ }
488
+ /** Async initialization - must be called before any DB operation. */
489
+ async initialize() {
490
+ if (this._ready)
491
+ return;
492
+ if (!this.initPromise) {
493
+ this.initPromise = this._init();
494
+ }
495
+ await this.initPromise;
496
+ }
497
+ async _init() {
498
+ const SQL = await initSqlJs();
499
+ // Load existing database if it exists
500
+ if (fs.existsSync(this.dbPath)) {
501
+ const fileBuffer = fs.readFileSync(this.dbPath);
502
+ this.db = new SQL.Database(fileBuffer);
503
+ }
504
+ else {
505
+ this.db = new SQL.Database();
506
+ }
507
+ // Setup schema first (creates metadata table needed for dimension lookup)
508
+ this.setupSchema();
509
+ // Load vector index using stored dimension (default 512)
510
+ const storedDim = parseInt(this.getMetadata("embedding_dim") ?? "512", 10);
511
+ if (fs.existsSync(this.vecPath)) {
512
+ const vecBuffer = fs.readFileSync(this.vecPath);
513
+ this.vecIndex = VectorIndex.deserialize(vecBuffer, storedDim);
514
+ }
515
+ // Rebuild keyword index from existing data
516
+ this.rebuildKeywordIndex();
517
+ this._ready = true;
518
+ }
519
+ get ready() {
520
+ return this._ready;
521
+ }
522
+ // ─── Schema ──────────────────────────────────────────────────
523
+ setupSchema() {
524
+ this.db.run(`
525
+ -- Indexed files with content hashes for Merkle-style diffing
526
+ CREATE TABLE IF NOT EXISTS files (
527
+ path TEXT PRIMARY KEY,
528
+ hash TEXT NOT NULL,
529
+ indexed_at TEXT DEFAULT (datetime('now'))
530
+ );
531
+
532
+ -- AST chunks extracted from source files
533
+ CREATE TABLE IF NOT EXISTS chunks (
534
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
535
+ path TEXT NOT NULL,
536
+ shorthand TEXT NOT NULL,
537
+ raw_code TEXT NOT NULL,
538
+ node_type TEXT NOT NULL DEFAULT 'unknown',
539
+ start_line INTEGER NOT NULL DEFAULT 0,
540
+ end_line INTEGER NOT NULL DEFAULT 0,
541
+ start_index INTEGER NOT NULL DEFAULT 0,
542
+ end_index INTEGER NOT NULL DEFAULT 0,
543
+ symbol_name TEXT NOT NULL DEFAULT ''
544
+ );
545
+
546
+ -- Token usage tracking
547
+ CREATE TABLE IF NOT EXISTS usage_log (
548
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
549
+ timestamp TEXT DEFAULT (datetime('now')),
550
+ tool_name TEXT NOT NULL,
551
+ input_tokens INTEGER NOT NULL DEFAULT 0,
552
+ output_tokens INTEGER NOT NULL DEFAULT 0,
553
+ saved_tokens INTEGER NOT NULL DEFAULT 0
554
+ );
555
+
556
+ -- Indexes for common queries
557
+ CREATE INDEX IF NOT EXISTS idx_chunks_path ON chunks(path);
558
+ CREATE INDEX IF NOT EXISTS idx_usage_timestamp ON usage_log(timestamp);
559
+
560
+ -- Metadata key-value store (embedding dimension, model name, etc.)
561
+ CREATE TABLE IF NOT EXISTS metadata (
562
+ key TEXT PRIMARY KEY,
563
+ value TEXT NOT NULL
564
+ );
565
+ `);
566
+ // Migration: add columns for existing DBs that lack them
567
+ const migrationColumns = [
568
+ "ALTER TABLE chunks ADD COLUMN start_index INTEGER NOT NULL DEFAULT 0",
569
+ "ALTER TABLE chunks ADD COLUMN end_index INTEGER NOT NULL DEFAULT 0",
570
+ "ALTER TABLE chunks ADD COLUMN symbol_name TEXT NOT NULL DEFAULT ''",
571
+ ];
572
+ for (const sql of migrationColumns) {
573
+ try {
574
+ this.db.run(sql);
575
+ }
576
+ catch { /* column already exists */ }
577
+ }
578
+ }
579
+ /** Rebuild the in-memory keyword index from all existing chunks. */
580
+ rebuildKeywordIndex() {
581
+ const rows = this.db.exec("SELECT id, shorthand FROM chunks");
582
+ if (rows.length === 0)
583
+ return;
584
+ for (const row of rows[0].values) {
585
+ const [id, shorthand] = row;
586
+ this.kwIndex.insert(id, shorthand);
587
+ }
588
+ }
589
+ // ─── Metadata ────────────────────────────────────────────────
590
+ /** Read a metadata value by key, or null if not set. */
591
+ getMetadata(key) {
592
+ const stmt = this.db.prepare("SELECT value FROM metadata WHERE key = ?");
593
+ stmt.bind([key]);
594
+ let result = null;
595
+ if (stmt.step()) {
596
+ result = stmt.getAsObject().value;
597
+ }
598
+ stmt.free();
599
+ return result;
600
+ }
601
+ /** Write a metadata key-value pair (upsert). */
602
+ setMetadata(key, value) {
603
+ this.db.run("INSERT OR REPLACE INTO metadata (key, value) VALUES (?, ?)", [key, value]);
604
+ }
605
+ /**
606
+ * Check if the active embedding dimension matches what was stored.
607
+ * If they differ, clear all vectors and update the stored dimension.
608
+ * Returns true if a re-index is needed.
609
+ */
610
+ checkEmbeddingDimension(activeDim) {
611
+ const storedDim = this.getMetadata("embedding_dim");
612
+ if (storedDim && parseInt(storedDim, 10) !== activeDim) {
613
+ console.error(`[NREKI] Embedding dimension changed (${storedDim} -> ${activeDim}). Clearing index.`);
614
+ // Clear all vectors
615
+ this.vecIndex = new VectorIndex();
616
+ // Clear all chunks and files so they get re-indexed
617
+ this.db.run("DELETE FROM chunks");
618
+ this.db.run("DELETE FROM files");
619
+ this.kwIndex = new KeywordIndex();
620
+ this.setMetadata("embedding_dim", String(activeDim));
621
+ return true;
622
+ }
623
+ if (!storedDim) {
624
+ this.setMetadata("embedding_dim", String(activeDim));
625
+ }
626
+ return false;
627
+ }
628
+ // ─── Persistence ─────────────────────────────────────────────
629
+ /** Persist database and vector index to disk. */
630
+ save() {
631
+ // Save SQLite database
632
+ const data = this.db.export();
633
+ const buffer = Buffer.from(data);
634
+ const dir = path.dirname(this.dbPath);
635
+ if (dir && !fs.existsSync(dir)) {
636
+ fs.mkdirSync(dir, { recursive: true });
637
+ }
638
+ fs.writeFileSync(this.dbPath, buffer);
639
+ // Save vector index
640
+ const vecData = this.vecIndex.serialize();
641
+ fs.writeFileSync(this.vecPath, vecData);
642
+ }
643
+ // ─── File Operations ─────────────────────────────────────────
644
+ fileNeedsUpdate(filePath, content) {
645
+ const newHash = crypto.createHash("sha256").update(content).digest("hex");
646
+ const stmt = this.db.prepare("SELECT hash FROM files WHERE path = ?");
647
+ stmt.bind([filePath]);
648
+ if (stmt.step()) {
649
+ const row = stmt.getAsObject();
650
+ stmt.free();
651
+ return row.hash !== newHash;
652
+ }
653
+ stmt.free();
654
+ return true;
655
+ }
656
+ hashContent(content) {
657
+ return crypto.createHash("sha256").update(content).digest("hex");
658
+ }
659
+ upsertFile(filePath, hash) {
660
+ this.db.run("INSERT OR REPLACE INTO files (path, hash, indexed_at) VALUES (?, ?, datetime('now'))", [filePath, hash]);
661
+ }
662
+ clearChunks(filePath) {
663
+ const stmt = this.db.prepare("SELECT id FROM chunks WHERE path = ?");
664
+ stmt.bind([filePath]);
665
+ const ids = [];
666
+ while (stmt.step()) {
667
+ const row = stmt.getAsObject();
668
+ ids.push(row.id);
669
+ }
670
+ stmt.free();
671
+ if (ids.length > 0) {
672
+ this.vecIndex.deleteBulk(ids);
673
+ this.kwIndex.deleteBulk(ids);
674
+ this.db.run("DELETE FROM chunks WHERE path = ?", [filePath]);
675
+ }
676
+ }
677
+ // ─── Chunk Operations ────────────────────────────────────────
678
+ insertChunk(filePath, shorthand, rawCode, nodeType, startLine, endLine, embedding, startIndex = 0, endIndex = 0, symbolName = "") {
679
+ this.db.run(`INSERT INTO chunks (path, shorthand, raw_code, node_type, start_line, end_line, start_index, end_index, symbol_name)
680
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, [filePath, shorthand, rawCode, nodeType, startLine, endLine, startIndex, endIndex, symbolName]);
681
+ const rowid = this.db.exec("SELECT last_insert_rowid() AS id")[0]
682
+ .values[0][0];
683
+ // A-04: Only insert non-empty vectors (Lite mode uses Float32Array(0))
684
+ if (embedding.length > 0) {
685
+ this.vecIndex.insert(rowid, embedding);
686
+ }
687
+ this.kwIndex.insert(rowid, shorthand);
688
+ return rowid;
689
+ }
690
+ insertChunksBatch(chunks) {
691
+ this.db.run("BEGIN TRANSACTION");
692
+ try {
693
+ for (const chunk of chunks) {
694
+ this.insertChunk(chunk.path, chunk.shorthand, chunk.rawCode, chunk.nodeType, chunk.startLine, chunk.endLine, chunk.embedding, chunk.startIndex ?? 0, chunk.endIndex ?? 0, chunk.symbolName ?? "");
695
+ }
696
+ this.db.run("COMMIT");
697
+ }
698
+ catch (err) {
699
+ this.db.run("ROLLBACK");
700
+ throw err;
701
+ }
702
+ }
703
+ // ─── Path Boosting ────────────────────────────────────────────
704
+ /** Apply path-based weighting: boost src/, penalize tests/node_modules/. */
705
+ getPathBoost(filePath) {
706
+ const normalized = filePath.replace(/\\/g, "/").toLowerCase();
707
+ if (normalized.includes("/node_modules/"))
708
+ return 0.3;
709
+ if (normalized.includes("/dist/") || normalized.includes("/build/"))
710
+ return 0.5;
711
+ if (normalized.includes("/test") || normalized.includes("/__test"))
712
+ return 0.7;
713
+ if (normalized.includes("/src/"))
714
+ return 1.2;
715
+ if (normalized.includes("/lib/") || normalized.includes("/core/"))
716
+ return 1.1;
717
+ return 1.0;
718
+ }
719
+ // ─── Batch Helpers ─────────────────────────────────────────────
720
+ /**
721
+ * Batch-fetch paths for an array of chunk IDs. Single SQL query.
722
+ * Used by RRF fusion to apply path boosting without N+1 queries.
723
+ */
724
+ fetchPathsBatch(ids) {
725
+ const result = new Map();
726
+ if (ids.length === 0)
727
+ return result;
728
+ const placeholders = ids.map(() => "?").join(",");
729
+ const stmt = this.db.prepare(`SELECT id, path FROM chunks WHERE id IN (${placeholders})`);
730
+ stmt.bind(ids);
731
+ while (stmt.step()) {
732
+ const row = stmt.getAsObject();
733
+ result.set(row.id, row.path);
734
+ }
735
+ stmt.free();
736
+ return result;
737
+ }
738
+ /**
739
+ * Batch-fetch full chunk data for an array of chunk IDs. Single SQL query.
740
+ * Used by all search methods to hydrate final results without N+1 queries.
741
+ */
742
+ fetchChunksBatch(ids) {
743
+ const result = new Map();
744
+ if (ids.length === 0)
745
+ return result;
746
+ const placeholders = ids.map(() => "?").join(",");
747
+ const stmt = this.db.prepare(`SELECT id, path, shorthand, raw_code, node_type, start_line, end_line, start_index, end_index, symbol_name
748
+ FROM chunks WHERE id IN (${placeholders})`);
749
+ stmt.bind(ids);
750
+ while (stmt.step()) {
751
+ const row = stmt.getAsObject();
752
+ result.set(row.id, {
753
+ id: row.id,
754
+ path: row.path,
755
+ shorthand: row.shorthand,
756
+ raw_code: row.raw_code,
757
+ node_type: row.node_type,
758
+ start_line: row.start_line,
759
+ end_line: row.end_line,
760
+ start_index: row.start_index ?? 0,
761
+ end_index: row.end_index ?? 0,
762
+ symbol_name: row.symbol_name ?? "",
763
+ });
764
+ }
765
+ stmt.free();
766
+ return result;
767
+ }
768
+ // ─── Search Operations ───────────────────────────────────────
769
+ /**
770
+ * Hybrid search using Reciprocal Rank Fusion (RRF).
771
+ * Combines:
772
+ * - Vector similarity (semantic, cosine distance)
773
+ * - BM25 keyword matching (in pure JS inverted index)
774
+ *
775
+ * RRF formula: score = Σ 1/(k + rank_i) where k=10
776
+ */
777
+ searchHybrid(queryEmbedding, queryText, limit = 10) {
778
+ // 1. Vector search - top 60 by cosine similarity
779
+ const vecResults = this.vecIndex.search(queryEmbedding, 60);
780
+ const vecRanks = new Map();
781
+ vecResults.forEach((r, i) => vecRanks.set(r.rowid, i + 1));
782
+ // 2. BM25 keyword search - top 60 by term relevance
783
+ const kwResults = this.kwIndex.search(queryText, 60);
784
+ const kwRanks = new Map();
785
+ kwResults.forEach((r, i) => kwRanks.set(r.rowid, i + 1));
786
+ // 3. RRF fusion with path boosting (batch query)
787
+ const allIds = new Set([...vecRanks.keys(), ...kwRanks.keys()]);
788
+ const pathMap = this.fetchPathsBatch([...allIds]);
789
+ const scored = [];
790
+ for (const id of allIds) {
791
+ const vecRank = vecRanks.get(id);
792
+ const kwRank = kwRanks.get(id);
793
+ let rrf = (vecRank ? 1.0 / (10 + vecRank) : 0) +
794
+ (kwRank ? 1.0 / (10 + kwRank) : 0);
795
+ const filePath = pathMap.get(id);
796
+ if (filePath) {
797
+ rrf *= this.getPathBoost(filePath);
798
+ }
799
+ scored.push({ id, rrf });
800
+ }
801
+ scored.sort((a, b) => b.rrf - a.rrf);
802
+ const topIds = scored.slice(0, limit);
803
+ // 4. Fetch full chunk data (batch query)
804
+ const chunkMap = this.fetchChunksBatch(topIds.map(t => t.id));
805
+ const results = [];
806
+ for (const { id, rrf } of topIds) {
807
+ const row = chunkMap.get(id);
808
+ if (row) {
809
+ results.push({
810
+ id: row.id, path: row.path, shorthand: row.shorthand,
811
+ raw_code: row.raw_code, node_type: row.node_type,
812
+ start_line: row.start_line, end_line: row.end_line,
813
+ start_index: row.start_index, end_index: row.end_index,
814
+ symbol_name: row.symbol_name,
815
+ rrf_score: rrf,
816
+ });
817
+ }
818
+ }
819
+ return results;
820
+ }
821
+ /**
822
+ * Keyword-only search using BM25 (for Lite mode - no embeddings needed).
823
+ * Uses the in-memory KeywordIndex with path boosting.
824
+ */
825
+ searchKeywordOnly(queryText, limit = 10) {
826
+ const kwResults = this.kwIndex.search(queryText, limit * 2);
827
+ if (kwResults.length === 0)
828
+ return [];
829
+ const chunkMap = this.fetchChunksBatch(kwResults.map(r => r.rowid));
830
+ const results = [];
831
+ for (const { rowid, score } of kwResults) {
832
+ const row = chunkMap.get(rowid);
833
+ if (row) {
834
+ const boostedScore = score * this.getPathBoost(row.path);
835
+ results.push({
836
+ id: row.id, path: row.path, shorthand: row.shorthand,
837
+ raw_code: row.raw_code, node_type: row.node_type,
838
+ start_line: row.start_line, end_line: row.end_line,
839
+ start_index: row.start_index, end_index: row.end_index,
840
+ symbol_name: row.symbol_name,
841
+ rrf_score: boostedScore,
842
+ });
843
+ }
844
+ }
845
+ results.sort((a, b) => b.rrf_score - a.rrf_score);
846
+ return results.slice(0, limit);
847
+ }
848
+ /**
849
+ * BM25-powered fast resolution for import-anchored auto-context.
850
+ * Searches "symbol pathHint" together to defeat homonyms.
851
+ * Enforces a 150ms hard timeout to prevent event loop blocking.
852
+ */
853
+ resolveImportSignatures(deps, maxTimeMs = 150) {
854
+ if (!this._ready || deps.length === 0)
855
+ return [];
856
+ const start = performance.now();
857
+ const results = [];
858
+ const seenSymbols = new Set();
859
+ const escapeRegex = (s) => s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
860
+ for (const dep of deps) {
861
+ if (seenSymbols.has(dep.symbol))
862
+ continue;
863
+ seenSymbols.add(dep.symbol);
864
+ if (performance.now() - start > maxTimeMs)
865
+ break;
866
+ // BM25 with two terms: symbol + path hint defeats homonyms
867
+ const cleanHint = dep.pathHint.replace(/['"%_]/g, " ").trim();
868
+ const queryText = cleanHint
869
+ ? `${dep.symbol} ${cleanHint}`
870
+ : dep.symbol;
871
+ const hits = this.searchKeywordOnly(queryText, 3);
872
+ if (hits.length > 0) {
873
+ // Final validation: symbol must appear textually in the shorthand
874
+ // Uses safe boundaries (not \b) to handle $store etc.
875
+ const safeSym = escapeRegex(dep.symbol);
876
+ const exactRegex = new RegExp(`(^|[^a-zA-Z0-9_$])${safeSym}(?=[^a-zA-Z0-9_$]|$)`);
877
+ for (const hit of hits) {
878
+ if (exactRegex.test(hit.shorthand)) {
879
+ results.push({ raw: hit.shorthand, path: hit.path });
880
+ break;
881
+ }
882
+ }
883
+ }
884
+ }
885
+ return results;
886
+ }
887
+ searchVector(queryEmbedding, limit = 10) {
888
+ const vecResults = this.vecIndex.search(queryEmbedding, limit);
889
+ if (vecResults.length === 0)
890
+ return [];
891
+ const chunkMap = this.fetchChunksBatch(vecResults.map(r => r.rowid));
892
+ const results = [];
893
+ for (const { rowid, distance } of vecResults) {
894
+ const row = chunkMap.get(rowid);
895
+ if (row) {
896
+ results.push({
897
+ id: row.id, path: row.path, shorthand: row.shorthand,
898
+ raw_code: row.raw_code, node_type: row.node_type,
899
+ start_line: row.start_line, end_line: row.end_line,
900
+ start_index: row.start_index, end_index: row.end_index,
901
+ symbol_name: row.symbol_name,
902
+ rrf_score: 1 - distance,
903
+ });
904
+ }
905
+ }
906
+ return results;
907
+ }
908
+ // ─── Usage Tracking ──────────────────────────────────────────
909
+ logUsage(toolName, inputTokens, outputTokens, savedTokens) {
910
+ this.db.run(`INSERT INTO usage_log (tool_name, input_tokens, output_tokens, saved_tokens)
911
+ VALUES (?, ?, ?, ?)`, [toolName, inputTokens, outputTokens, savedTokens]);
912
+ }
913
+ getUsageStats(since) {
914
+ const whereClause = since ? "WHERE timestamp >= ?" : "";
915
+ const params = since ? [since] : [];
916
+ const stmt = this.db.prepare(`SELECT
917
+ COALESCE(SUM(input_tokens), 0) AS total_input,
918
+ COALESCE(SUM(output_tokens), 0) AS total_output,
919
+ COALESCE(SUM(saved_tokens), 0) AS total_saved,
920
+ COUNT(*) AS tool_calls
921
+ FROM usage_log ${whereClause}`);
922
+ if (params.length > 0)
923
+ stmt.bind(params);
924
+ let result = { total_input: 0, total_output: 0, total_saved: 0, tool_calls: 0 };
925
+ if (stmt.step()) {
926
+ const row = stmt.getAsObject();
927
+ result = {
928
+ total_input: row.total_input ?? 0,
929
+ total_output: row.total_output ?? 0,
930
+ total_saved: row.total_saved ?? 0,
931
+ tool_calls: row.tool_calls ?? 0,
932
+ };
933
+ }
934
+ stmt.free();
935
+ return result;
936
+ }
937
+ /**
938
+ * Find the heaviest files by total raw code size.
939
+ * Zero disk I/O - queries indexed data in SQLite.
940
+ */
941
+ getTopHeavyFiles(limit = 5) {
942
+ if (!this._ready)
943
+ return [];
944
+ const stmt = this.db.prepare(`
945
+ SELECT path, SUM(LENGTH(raw_code)) as total_chars
946
+ FROM chunks
947
+ GROUP BY path
948
+ ORDER BY total_chars DESC
949
+ LIMIT ?
950
+ `);
951
+ stmt.bind([limit]);
952
+ const results = [];
953
+ while (stmt.step()) {
954
+ const row = stmt.getAsObject();
955
+ results.push({
956
+ path: row.path,
957
+ estimated_tokens: Math.ceil(row.total_chars / 3.5),
958
+ });
959
+ }
960
+ stmt.free();
961
+ return results;
962
+ }
963
+ // ─── Statistics ──────────────────────────────────────────────
964
+ getStats() {
965
+ const rows = this.db.exec(`
966
+ SELECT
967
+ COUNT(*) AS total_chunks,
968
+ COUNT(DISTINCT path) AS total_files,
969
+ COALESCE(SUM(LENGTH(raw_code)), 0) AS total_raw_tokens,
970
+ COALESCE(SUM(LENGTH(shorthand)), 0) AS total_shorthand_tokens
971
+ FROM chunks
972
+ `);
973
+ if (rows.length === 0 || rows[0].values.length === 0) {
974
+ return {
975
+ total_chunks: 0,
976
+ total_files: 0,
977
+ total_raw_tokens: 0,
978
+ total_shorthand_tokens: 0,
979
+ compression_ratio: 0,
980
+ };
981
+ }
982
+ const [total_chunks, total_files, total_raw_tokens, total_shorthand_tokens] = rows[0].values[0];
983
+ return {
984
+ total_chunks,
985
+ total_files,
986
+ total_raw_tokens,
987
+ total_shorthand_tokens,
988
+ compression_ratio: total_raw_tokens > 0
989
+ ? 1 - total_shorthand_tokens / total_raw_tokens
990
+ : 0,
991
+ };
992
+ }
993
+ getFileCount() {
994
+ const rows = this.db.exec("SELECT COUNT(*) AS count FROM files");
995
+ if (rows.length === 0)
996
+ return 0;
997
+ return rows[0].values[0][0];
998
+ }
999
+ getVectorCount() {
1000
+ return this.vecIndex.size;
1001
+ }
1002
+ /**
1003
+ * Scan ALL chunks whose raw_code contains the given symbol name.
1004
+ * Returns distinct file paths. Used by prepare_refactor for 100% coverage.
1005
+ */
1006
+ searchRawCode(symbolName) {
1007
+ if (!this._ready)
1008
+ return [];
1009
+ // C-04 + A-07: Escape backslashes first, then LIKE wildcards
1010
+ const escaped = symbolName.replace(/\\/g, '\\\\').replace(/[%_]/g, '\\$&');
1011
+ const stmt = this.db.prepare(`SELECT DISTINCT path FROM chunks WHERE raw_code LIKE ? ESCAPE '\\'`);
1012
+ stmt.bind([`%${escaped}%`]);
1013
+ const paths = [];
1014
+ while (stmt.step()) {
1015
+ paths.push(stmt.getAsObject().path);
1016
+ }
1017
+ stmt.free();
1018
+ return paths;
1019
+ }
1020
+ close() {
1021
+ this.save();
1022
+ this.db.close();
1023
+ }
1024
+ }
1025
+ // Re-export for testing
1026
+ export { fastSimilarity };
1027
+ // Backward-compat alias
1028
+ export { NrekiDB as TokenGuardDB };
1029
+ //# sourceMappingURL=database.js.map