brainbank 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +149 -16
- package/dist/{types-Da_zLLOl.d.ts → base-9vfWRHCV.d.ts} +131 -31
- package/dist/{chunk-YGSEUWLV.js → chunk-6MFTQV3O.js} +911 -674
- package/dist/chunk-6MFTQV3O.js.map +1 -0
- package/dist/chunk-7JCEW7LT.js +266 -0
- package/dist/chunk-7JCEW7LT.js.map +1 -0
- package/dist/{chunk-GOUBW7UA.js → chunk-F6SJ3U4H.js} +98 -34
- package/dist/chunk-F6SJ3U4H.js.map +1 -0
- package/dist/{chunk-MJ3Y24H6.js → chunk-FJJY4H2Y.js} +11 -11
- package/dist/chunk-FJJY4H2Y.js.map +1 -0
- package/dist/{chunk-3GAIDXRW.js → chunk-GUT5MSJT.js} +5 -11
- package/dist/chunk-GUT5MSJT.js.map +1 -0
- package/dist/{chunk-2P3EGY6S.js → chunk-QNHBCOKB.js} +2 -2
- package/dist/chunk-QNHBCOKB.js.map +1 -0
- package/dist/{chunk-4ZKBQ33J.js → chunk-V4UJKXPK.js} +23 -5
- package/dist/chunk-V4UJKXPK.js.map +1 -0
- package/dist/chunk-WR4WXKJT.js +723 -0
- package/dist/chunk-WR4WXKJT.js.map +1 -0
- package/dist/{chunk-Z5SU54HP.js → chunk-X6645UVR.js} +3 -3
- package/dist/chunk-X6645UVR.js.map +1 -0
- package/dist/cli.js +150 -100
- package/dist/cli.js.map +1 -1
- package/dist/code.d.ts +5 -5
- package/dist/code.js +1 -1
- package/dist/docs.d.ts +4 -6
- package/dist/docs.js +1 -1
- package/dist/git.d.ts +5 -5
- package/dist/git.js +1 -1
- package/dist/index.d.ts +95 -104
- package/dist/index.js +13 -13
- package/dist/memory.d.ts +5 -7
- package/dist/memory.js +9 -12
- package/dist/memory.js.map +1 -1
- package/dist/notes.d.ts +4 -6
- package/dist/notes.js +7 -10
- package/dist/notes.js.map +1 -1
- package/dist/{openai-PCTYLOWI.js → openai-CYDMYX7X.js} +2 -2
- package/package.json +24 -4
- package/dist/chunk-2P3EGY6S.js.map +0 -1
- package/dist/chunk-3GAIDXRW.js.map +0 -1
- package/dist/chunk-4ZKBQ33J.js.map +0 -1
- package/dist/chunk-EDKSKLX4.js +0 -490
- package/dist/chunk-EDKSKLX4.js.map +0 -1
- package/dist/chunk-GOUBW7UA.js.map +0 -1
- package/dist/chunk-MJ3Y24H6.js.map +0 -1
- package/dist/chunk-N6ZMBFDE.js +0 -224
- package/dist/chunk-N6ZMBFDE.js.map +0 -1
- package/dist/chunk-YGSEUWLV.js.map +0 -1
- package/dist/chunk-Z5SU54HP.js.map +0 -1
- /package/dist/{openai-PCTYLOWI.js.map → openai-CYDMYX7X.js.map} +0 -0
|
@@ -0,0 +1,723 @@
|
|
|
1
|
+
import {
|
|
2
|
+
__name
|
|
3
|
+
} from "./chunk-7QVYU63E.js";
|
|
4
|
+
|
|
5
|
+
// src/indexers/code/code-indexer.ts
|
|
6
|
+
import fs from "fs";
|
|
7
|
+
import path2 from "path";
|
|
8
|
+
|
|
9
|
+
// src/indexers/code/code-chunker.ts
|
|
10
|
+
import { createRequire as createRequire2 } from "module";
|
|
11
|
+
|
|
12
|
+
// src/indexers/code/grammars.ts
|
|
13
|
+
import { createRequire } from "module";
|
|
14
|
+
var require2 = createRequire(import.meta.url);
|
|
15
|
+
function tryGrammar(pkg, nodeTypes, accessor) {
|
|
16
|
+
return () => {
|
|
17
|
+
try {
|
|
18
|
+
const mod = require2(pkg);
|
|
19
|
+
return { grammar: accessor ? mod[accessor] : mod, nodeTypes };
|
|
20
|
+
} catch {
|
|
21
|
+
return null;
|
|
22
|
+
}
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
__name(tryGrammar, "tryGrammar");
|
|
26
|
+
var GRAMMARS = {
|
|
27
|
+
// ── Web ──────────────────────────────────────────
|
|
28
|
+
typescript: tryGrammar("tree-sitter-typescript", {
|
|
29
|
+
class: ["class_declaration"],
|
|
30
|
+
interface: ["interface_declaration", "type_alias_declaration"],
|
|
31
|
+
function: ["function_declaration", "method_definition"],
|
|
32
|
+
variable: ["lexical_declaration"]
|
|
33
|
+
}, "typescript"),
|
|
34
|
+
javascript: tryGrammar("tree-sitter-javascript", {
|
|
35
|
+
class: ["class_declaration"],
|
|
36
|
+
function: ["function_declaration", "method_definition"],
|
|
37
|
+
variable: ["lexical_declaration"]
|
|
38
|
+
}),
|
|
39
|
+
html: tryGrammar("tree-sitter-html", {}),
|
|
40
|
+
css: tryGrammar("tree-sitter-css", {}),
|
|
41
|
+
// ── Systems ──────────────────────────────────────
|
|
42
|
+
go: tryGrammar("tree-sitter-go", {
|
|
43
|
+
function: ["function_declaration", "method_declaration"],
|
|
44
|
+
struct: ["type_declaration"]
|
|
45
|
+
}),
|
|
46
|
+
rust: tryGrammar("tree-sitter-rust", {
|
|
47
|
+
function: ["function_item"],
|
|
48
|
+
struct: ["struct_item"],
|
|
49
|
+
impl: ["impl_item"]
|
|
50
|
+
}),
|
|
51
|
+
c: tryGrammar("tree-sitter-c", {
|
|
52
|
+
function: ["function_definition"],
|
|
53
|
+
struct: ["struct_specifier"]
|
|
54
|
+
}),
|
|
55
|
+
cpp: tryGrammar("tree-sitter-cpp", {
|
|
56
|
+
class: ["class_specifier"],
|
|
57
|
+
function: ["function_definition"]
|
|
58
|
+
}),
|
|
59
|
+
swift: tryGrammar("tree-sitter-swift", {
|
|
60
|
+
class: ["class_declaration"],
|
|
61
|
+
function: ["function_declaration"],
|
|
62
|
+
struct: ["struct_declaration"]
|
|
63
|
+
}),
|
|
64
|
+
// ── JVM ──────────────────────────────────────────
|
|
65
|
+
java: tryGrammar("tree-sitter-java", {
|
|
66
|
+
class: ["class_declaration"],
|
|
67
|
+
interface: ["interface_declaration"],
|
|
68
|
+
method: ["method_declaration"]
|
|
69
|
+
}),
|
|
70
|
+
kotlin: tryGrammar("tree-sitter-kotlin", {
|
|
71
|
+
class: ["class_declaration"],
|
|
72
|
+
function: ["function_declaration"]
|
|
73
|
+
}),
|
|
74
|
+
scala: tryGrammar("tree-sitter-scala", {
|
|
75
|
+
class: ["class_definition"],
|
|
76
|
+
function: ["function_definition"]
|
|
77
|
+
}),
|
|
78
|
+
// ── Scripting ────────────────────────────────────
|
|
79
|
+
python: tryGrammar("tree-sitter-python", {
|
|
80
|
+
class: ["class_definition"],
|
|
81
|
+
function: ["function_definition"]
|
|
82
|
+
}),
|
|
83
|
+
ruby: tryGrammar("tree-sitter-ruby", {
|
|
84
|
+
class: ["class"],
|
|
85
|
+
method: ["method", "singleton_method"]
|
|
86
|
+
}),
|
|
87
|
+
php: tryGrammar("tree-sitter-php", {
|
|
88
|
+
class: ["class_declaration"],
|
|
89
|
+
function: ["function_definition", "method_declaration"]
|
|
90
|
+
}, "php"),
|
|
91
|
+
lua: tryGrammar("tree-sitter-lua", {
|
|
92
|
+
function: ["function_declaration"]
|
|
93
|
+
}),
|
|
94
|
+
bash: tryGrammar("tree-sitter-bash", {
|
|
95
|
+
function: ["function_definition"]
|
|
96
|
+
}),
|
|
97
|
+
elixir: tryGrammar("tree-sitter-elixir", {
|
|
98
|
+
function: ["call"]
|
|
99
|
+
// defmodule, def, defp
|
|
100
|
+
}),
|
|
101
|
+
// ── .NET ─────────────────────────────────────────
|
|
102
|
+
c_sharp: tryGrammar("tree-sitter-c-sharp", {
|
|
103
|
+
class: ["class_declaration"],
|
|
104
|
+
interface: ["interface_declaration"],
|
|
105
|
+
method: ["method_declaration"]
|
|
106
|
+
})
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
// src/indexers/code/code-chunker.ts
|
|
110
|
+
var require3 = createRequire2(import.meta.url);
|
|
111
|
+
var CodeChunker = class {
|
|
112
|
+
static {
|
|
113
|
+
__name(this, "CodeChunker");
|
|
114
|
+
}
|
|
115
|
+
MAX;
|
|
116
|
+
MIN;
|
|
117
|
+
OVERLAP;
|
|
118
|
+
_parser = null;
|
|
119
|
+
_langCache = /* @__PURE__ */ new Map();
|
|
120
|
+
constructor(config = {}) {
|
|
121
|
+
this.MAX = config.maxLines ?? 80;
|
|
122
|
+
this.MIN = config.minLines ?? 3;
|
|
123
|
+
this.OVERLAP = config.overlap ?? 5;
|
|
124
|
+
}
|
|
125
|
+
/** Lazy-init tree-sitter parser. */
|
|
126
|
+
_ensureParser() {
|
|
127
|
+
if (!this._parser) {
|
|
128
|
+
try {
|
|
129
|
+
const Parser = require3("tree-sitter");
|
|
130
|
+
this._parser = new Parser();
|
|
131
|
+
} catch {
|
|
132
|
+
this._parser = false;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
return this._parser || null;
|
|
136
|
+
}
|
|
137
|
+
/** Load a language grammar (cached). */
|
|
138
|
+
_loadGrammar(language) {
|
|
139
|
+
if (this._langCache.has(language)) return this._langCache.get(language);
|
|
140
|
+
const factory = GRAMMARS[language];
|
|
141
|
+
const grammar = factory ? factory() : null;
|
|
142
|
+
this._langCache.set(language, grammar);
|
|
143
|
+
return grammar;
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Split file content into semantic chunks using tree-sitter AST.
|
|
147
|
+
* Falls back to sliding window if grammar isn't available.
|
|
148
|
+
*/
|
|
149
|
+
async chunk(filePath, content, language) {
|
|
150
|
+
const lines = content.split("\n");
|
|
151
|
+
if (lines.length <= this.MAX) {
|
|
152
|
+
return [{
|
|
153
|
+
filePath,
|
|
154
|
+
chunkType: "file",
|
|
155
|
+
startLine: 1,
|
|
156
|
+
endLine: lines.length,
|
|
157
|
+
content: content.trim(),
|
|
158
|
+
language
|
|
159
|
+
}];
|
|
160
|
+
}
|
|
161
|
+
const parser = this._ensureParser();
|
|
162
|
+
const langConfig = this._loadGrammar(language);
|
|
163
|
+
if (parser && langConfig) {
|
|
164
|
+
try {
|
|
165
|
+
parser.setLanguage(langConfig.grammar);
|
|
166
|
+
const tree = parser.parse(content);
|
|
167
|
+
const chunks = this._extractChunks(filePath, lines, tree.rootNode, langConfig, language);
|
|
168
|
+
if (chunks.length > 0) {
|
|
169
|
+
return chunks.filter((c) => c.content.length > 20);
|
|
170
|
+
}
|
|
171
|
+
} catch {
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
return this._chunkGeneric(filePath, lines, language);
|
|
175
|
+
}
|
|
176
|
+
/** Walk AST and extract top-level semantic blocks. */
|
|
177
|
+
_extractChunks(filePath, lines, rootNode, langConfig, language) {
|
|
178
|
+
const chunks = [];
|
|
179
|
+
const seen = /* @__PURE__ */ new Set();
|
|
180
|
+
for (let i = 0; i < rootNode.childCount; i++) {
|
|
181
|
+
const child = rootNode.child(i);
|
|
182
|
+
this._processNode(filePath, lines, child, langConfig, language, chunks, seen);
|
|
183
|
+
}
|
|
184
|
+
return chunks;
|
|
185
|
+
}
|
|
186
|
+
/** Classify and process a single AST node. */
|
|
187
|
+
_processNode(filePath, lines, node, langConfig, language, chunks, seen) {
|
|
188
|
+
const type = node.type;
|
|
189
|
+
if (type === "export_statement") {
|
|
190
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
191
|
+
const child = node.child(i);
|
|
192
|
+
const category2 = this._categorize(child.type, langConfig);
|
|
193
|
+
if (category2) {
|
|
194
|
+
this._processDeclaration(filePath, lines, node, child, category2, langConfig, language, chunks, seen);
|
|
195
|
+
return;
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
const nodeLines = node.endPosition.row - node.startPosition.row + 1;
|
|
199
|
+
if (nodeLines >= this.MIN) {
|
|
200
|
+
this._addChunk(filePath, lines, node, "function", this._extractName(node), language, chunks, seen);
|
|
201
|
+
}
|
|
202
|
+
return;
|
|
203
|
+
}
|
|
204
|
+
if (type === "decorated_definition") {
|
|
205
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
206
|
+
const child = node.child(i);
|
|
207
|
+
const category2 = this._categorize(child.type, langConfig);
|
|
208
|
+
if (category2) {
|
|
209
|
+
this._processDeclaration(filePath, lines, node, child, category2, langConfig, language, chunks, seen);
|
|
210
|
+
return;
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
const category = this._categorize(type, langConfig);
|
|
215
|
+
if (category) {
|
|
216
|
+
this._processDeclaration(filePath, lines, node, node, category, langConfig, language, chunks, seen);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
/** Check which category a node type belongs to. */
|
|
220
|
+
_categorize(nodeType, langConfig) {
|
|
221
|
+
for (const [category, types] of Object.entries(langConfig.nodeTypes)) {
|
|
222
|
+
if (types && types.includes(nodeType)) return category;
|
|
223
|
+
}
|
|
224
|
+
return null;
|
|
225
|
+
}
|
|
226
|
+
/** Process a matched declaration: class → split by methods, else → chunk directly. */
|
|
227
|
+
_processDeclaration(filePath, lines, outerNode, innerNode, category, langConfig, language, chunks, seen) {
|
|
228
|
+
const nodeLines = outerNode.endPosition.row - outerNode.startPosition.row + 1;
|
|
229
|
+
const name = this._extractName(innerNode);
|
|
230
|
+
const chunkType = this._toChunkType(category);
|
|
231
|
+
if ((category === "class" || category === "struct" || category === "impl") && nodeLines > this.MAX) {
|
|
232
|
+
this._splitClassIntoMethods(filePath, lines, outerNode, innerNode, name, langConfig, language, chunks, seen);
|
|
233
|
+
return;
|
|
234
|
+
}
|
|
235
|
+
if (nodeLines > this.MAX) {
|
|
236
|
+
chunks.push(...this._splitLargeBlock(
|
|
237
|
+
filePath,
|
|
238
|
+
lines,
|
|
239
|
+
outerNode.startPosition.row,
|
|
240
|
+
outerNode.endPosition.row,
|
|
241
|
+
name,
|
|
242
|
+
chunkType,
|
|
243
|
+
language
|
|
244
|
+
));
|
|
245
|
+
return;
|
|
246
|
+
}
|
|
247
|
+
if (nodeLines >= this.MIN) {
|
|
248
|
+
this._addChunk(filePath, lines, outerNode, chunkType, name, language, chunks, seen);
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
/** Split a large class into individual method chunks. */
|
|
252
|
+
_splitClassIntoMethods(filePath, lines, outerNode, classNode, className, langConfig, language, chunks, seen) {
|
|
253
|
+
const body = this._findClassBody(classNode);
|
|
254
|
+
if (!body) {
|
|
255
|
+
chunks.push(...this._splitLargeBlock(
|
|
256
|
+
filePath,
|
|
257
|
+
lines,
|
|
258
|
+
outerNode.startPosition.row,
|
|
259
|
+
outerNode.endPosition.row,
|
|
260
|
+
className,
|
|
261
|
+
"class",
|
|
262
|
+
language
|
|
263
|
+
));
|
|
264
|
+
return;
|
|
265
|
+
}
|
|
266
|
+
const methodTypes = /* @__PURE__ */ new Set([
|
|
267
|
+
...langConfig.nodeTypes.function || [],
|
|
268
|
+
...langConfig.nodeTypes.method || []
|
|
269
|
+
]);
|
|
270
|
+
let methodsFound = false;
|
|
271
|
+
for (let i = 0; i < body.childCount; i++) {
|
|
272
|
+
const child = body.child(i);
|
|
273
|
+
let methodNode = child;
|
|
274
|
+
if (child.type === "decorated_definition") {
|
|
275
|
+
for (let j = 0; j < child.childCount; j++) {
|
|
276
|
+
if (methodTypes.has(child.child(j).type)) {
|
|
277
|
+
methodNode = child.child(j);
|
|
278
|
+
break;
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
if (methodTypes.has(methodNode.type) || methodTypes.has(child.type)) {
|
|
283
|
+
const methodName = this._extractName(methodNode);
|
|
284
|
+
const nodeToChunk = child.type === "decorated_definition" ? child : methodNode;
|
|
285
|
+
const methodLineCount = nodeToChunk.endPosition.row - nodeToChunk.startPosition.row + 1;
|
|
286
|
+
if (methodLineCount >= this.MIN) {
|
|
287
|
+
methodsFound = true;
|
|
288
|
+
const fullName = `${className}.${methodName}`;
|
|
289
|
+
if (methodLineCount > this.MAX) {
|
|
290
|
+
chunks.push(...this._splitLargeBlock(
|
|
291
|
+
filePath,
|
|
292
|
+
lines,
|
|
293
|
+
nodeToChunk.startPosition.row,
|
|
294
|
+
nodeToChunk.endPosition.row,
|
|
295
|
+
fullName,
|
|
296
|
+
"method",
|
|
297
|
+
language
|
|
298
|
+
));
|
|
299
|
+
} else {
|
|
300
|
+
this._addChunk(filePath, lines, nodeToChunk, "method", fullName, language, chunks, seen);
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
if (!methodsFound) {
|
|
306
|
+
chunks.push(...this._splitLargeBlock(
|
|
307
|
+
filePath,
|
|
308
|
+
lines,
|
|
309
|
+
outerNode.startPosition.row,
|
|
310
|
+
outerNode.endPosition.row,
|
|
311
|
+
className,
|
|
312
|
+
"class",
|
|
313
|
+
language
|
|
314
|
+
));
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
/** Find the class body node. */
|
|
318
|
+
_findClassBody(classNode) {
|
|
319
|
+
const bodyTypes = ["class_body", "block", "declaration_list", "body"];
|
|
320
|
+
for (let i = 0; i < classNode.childCount; i++) {
|
|
321
|
+
const child = classNode.child(i);
|
|
322
|
+
if (bodyTypes.includes(child.type)) return child;
|
|
323
|
+
}
|
|
324
|
+
return null;
|
|
325
|
+
}
|
|
326
|
+
/** Extract name from an AST node. */
|
|
327
|
+
_extractName(node) {
|
|
328
|
+
if (typeof node.childForFieldName === "function") {
|
|
329
|
+
const nameNode = node.childForFieldName("name");
|
|
330
|
+
if (nameNode) return nameNode.text;
|
|
331
|
+
}
|
|
332
|
+
for (let i = 0; i < node.namedChildCount; i++) {
|
|
333
|
+
const child = node.namedChild(i);
|
|
334
|
+
if (["identifier", "type_identifier", "property_identifier"].includes(child.type)) {
|
|
335
|
+
return child.text;
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
if (node.type === "lexical_declaration" || node.type === "variable_declaration") {
|
|
339
|
+
for (let i = 0; i < node.namedChildCount; i++) {
|
|
340
|
+
const child = node.namedChild(i);
|
|
341
|
+
if (child.type === "variable_declarator") {
|
|
342
|
+
const nameNode = child.childForFieldName("name");
|
|
343
|
+
if (nameNode) return nameNode.text;
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
return "anonymous";
|
|
348
|
+
}
|
|
349
|
+
/** Map category to chunk type. */
|
|
350
|
+
_toChunkType(category) {
|
|
351
|
+
if (category === "class" || category === "struct" || category === "impl") return "class";
|
|
352
|
+
if (category === "interface") return "interface";
|
|
353
|
+
if (category === "variable") return "function";
|
|
354
|
+
return category;
|
|
355
|
+
}
|
|
356
|
+
/** Add a node as a chunk, avoiding duplicates. */
|
|
357
|
+
_addChunk(filePath, lines, node, chunkType, name, language, chunks, seen) {
|
|
358
|
+
const start = node.startPosition.row;
|
|
359
|
+
const end = node.endPosition.row;
|
|
360
|
+
const key = `${start}-${end}`;
|
|
361
|
+
if (seen.has(key)) return;
|
|
362
|
+
seen.add(key);
|
|
363
|
+
const content = lines.slice(start, end + 1).join("\n").trim();
|
|
364
|
+
if (content.length <= 20) return;
|
|
365
|
+
chunks.push({
|
|
366
|
+
filePath,
|
|
367
|
+
chunkType,
|
|
368
|
+
name,
|
|
369
|
+
startLine: start + 1,
|
|
370
|
+
endLine: end + 1,
|
|
371
|
+
content,
|
|
372
|
+
language
|
|
373
|
+
});
|
|
374
|
+
}
|
|
375
|
+
// ── Fallback: Generic sliding window ────────────
|
|
376
|
+
_chunkGeneric(filePath, lines, language) {
|
|
377
|
+
const chunks = [];
|
|
378
|
+
const step = Math.max(this.MAX - this.OVERLAP, 1);
|
|
379
|
+
for (let s = 0; s < lines.length; s += step) {
|
|
380
|
+
const e = Math.min(s + this.MAX, lines.length);
|
|
381
|
+
const content = lines.slice(s, e).join("\n").trim();
|
|
382
|
+
if (content.length > 20) {
|
|
383
|
+
chunks.push({
|
|
384
|
+
filePath,
|
|
385
|
+
chunkType: "block",
|
|
386
|
+
startLine: s + 1,
|
|
387
|
+
endLine: e,
|
|
388
|
+
content,
|
|
389
|
+
language
|
|
390
|
+
});
|
|
391
|
+
}
|
|
392
|
+
if (e >= lines.length) break;
|
|
393
|
+
}
|
|
394
|
+
return chunks;
|
|
395
|
+
}
|
|
396
|
+
/** Split a large block into overlapping sub-chunks. */
|
|
397
|
+
_splitLargeBlock(filePath, lines, start, end, name, type, language) {
|
|
398
|
+
const chunks = [];
|
|
399
|
+
const step = Math.max(this.MAX - this.OVERLAP, 1);
|
|
400
|
+
let part = 1;
|
|
401
|
+
for (let s = start; s <= end; s += step) {
|
|
402
|
+
const e = Math.min(s + this.MAX, end + 1);
|
|
403
|
+
const content = lines.slice(s, e).join("\n").trim();
|
|
404
|
+
if (content.length > 20) {
|
|
405
|
+
chunks.push({
|
|
406
|
+
filePath,
|
|
407
|
+
chunkType: type,
|
|
408
|
+
name: `${name} (part ${part++})`,
|
|
409
|
+
startLine: s + 1,
|
|
410
|
+
endLine: e,
|
|
411
|
+
content,
|
|
412
|
+
language
|
|
413
|
+
});
|
|
414
|
+
}
|
|
415
|
+
if (e > end) break;
|
|
416
|
+
}
|
|
417
|
+
return chunks;
|
|
418
|
+
}
|
|
419
|
+
};
|
|
420
|
+
|
|
421
|
+
// src/indexers/languages.ts
|
|
422
|
+
import path from "path";
|
|
423
|
+
var SUPPORTED_EXTENSIONS = {
|
|
424
|
+
// TypeScript / JavaScript
|
|
425
|
+
".ts": "typescript",
|
|
426
|
+
".tsx": "typescript",
|
|
427
|
+
".js": "javascript",
|
|
428
|
+
".jsx": "javascript",
|
|
429
|
+
".mjs": "javascript",
|
|
430
|
+
".cjs": "javascript",
|
|
431
|
+
// Systems
|
|
432
|
+
".go": "go",
|
|
433
|
+
".rs": "rust",
|
|
434
|
+
".cpp": "cpp",
|
|
435
|
+
".cc": "cpp",
|
|
436
|
+
".c": "c",
|
|
437
|
+
".h": "c",
|
|
438
|
+
".hpp": "cpp",
|
|
439
|
+
// JVM
|
|
440
|
+
".java": "java",
|
|
441
|
+
".kt": "kotlin",
|
|
442
|
+
".scala": "scala",
|
|
443
|
+
// Scripting
|
|
444
|
+
".py": "python",
|
|
445
|
+
".rb": "ruby",
|
|
446
|
+
".php": "php",
|
|
447
|
+
".lua": "lua",
|
|
448
|
+
".sh": "bash",
|
|
449
|
+
".bash": "bash",
|
|
450
|
+
".zsh": "bash",
|
|
451
|
+
// Web
|
|
452
|
+
".html": "html",
|
|
453
|
+
".css": "css",
|
|
454
|
+
".scss": "scss",
|
|
455
|
+
".less": "less",
|
|
456
|
+
".svelte": "svelte",
|
|
457
|
+
".vue": "vue",
|
|
458
|
+
// Data / Config
|
|
459
|
+
".json": "json",
|
|
460
|
+
".yaml": "yaml",
|
|
461
|
+
".yml": "yaml",
|
|
462
|
+
".toml": "toml",
|
|
463
|
+
".xml": "xml",
|
|
464
|
+
".graphql": "graphql",
|
|
465
|
+
".gql": "graphql",
|
|
466
|
+
// Docs
|
|
467
|
+
".md": "markdown",
|
|
468
|
+
".mdx": "markdown",
|
|
469
|
+
// Database
|
|
470
|
+
".sql": "sql",
|
|
471
|
+
".prisma": "prisma",
|
|
472
|
+
// Other
|
|
473
|
+
".swift": "swift",
|
|
474
|
+
".dart": "dart",
|
|
475
|
+
".r": "r",
|
|
476
|
+
".ex": "elixir",
|
|
477
|
+
".exs": "elixir",
|
|
478
|
+
".erl": "erlang",
|
|
479
|
+
".zig": "zig"
|
|
480
|
+
};
|
|
481
|
+
var IGNORE_DIRS = /* @__PURE__ */ new Set([
|
|
482
|
+
// Package managers
|
|
483
|
+
"node_modules",
|
|
484
|
+
"bower_components",
|
|
485
|
+
".pnpm",
|
|
486
|
+
// Build output
|
|
487
|
+
"dist",
|
|
488
|
+
"build",
|
|
489
|
+
"out",
|
|
490
|
+
".next",
|
|
491
|
+
".nuxt",
|
|
492
|
+
".output",
|
|
493
|
+
".svelte-kit",
|
|
494
|
+
// Version control
|
|
495
|
+
".git",
|
|
496
|
+
".hg",
|
|
497
|
+
".svn",
|
|
498
|
+
// IDE / Editor
|
|
499
|
+
".idea",
|
|
500
|
+
".vscode",
|
|
501
|
+
// Runtime / Cache
|
|
502
|
+
"__pycache__",
|
|
503
|
+
".pytest_cache",
|
|
504
|
+
"venv",
|
|
505
|
+
".venv",
|
|
506
|
+
".env",
|
|
507
|
+
".tox",
|
|
508
|
+
// Coverage / Test artifacts
|
|
509
|
+
"coverage",
|
|
510
|
+
".nyc_output",
|
|
511
|
+
"htmlcov",
|
|
512
|
+
// Compiled
|
|
513
|
+
"target",
|
|
514
|
+
// Rust, Java
|
|
515
|
+
".cargo",
|
|
516
|
+
"vendor",
|
|
517
|
+
// Go, PHP
|
|
518
|
+
// AI / Model cache
|
|
519
|
+
".model-cache",
|
|
520
|
+
".brainbank",
|
|
521
|
+
// OS
|
|
522
|
+
".DS_Store"
|
|
523
|
+
]);
|
|
524
|
+
var IGNORE_FILES = /* @__PURE__ */ new Set([
|
|
525
|
+
"package-lock.json",
|
|
526
|
+
"yarn.lock",
|
|
527
|
+
"pnpm-lock.yaml",
|
|
528
|
+
"bun.lockb",
|
|
529
|
+
"Cargo.lock",
|
|
530
|
+
"Gemfile.lock",
|
|
531
|
+
"poetry.lock",
|
|
532
|
+
"composer.lock",
|
|
533
|
+
"go.sum"
|
|
534
|
+
]);
|
|
535
|
+
function isSupported(filePath) {
|
|
536
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
537
|
+
return ext in SUPPORTED_EXTENSIONS;
|
|
538
|
+
}
|
|
539
|
+
__name(isSupported, "isSupported");
|
|
540
|
+
function getLanguage(filePath) {
|
|
541
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
542
|
+
return SUPPORTED_EXTENSIONS[ext];
|
|
543
|
+
}
|
|
544
|
+
__name(getLanguage, "getLanguage");
|
|
545
|
+
function isIgnoredDir(dirName) {
|
|
546
|
+
return IGNORE_DIRS.has(dirName);
|
|
547
|
+
}
|
|
548
|
+
__name(isIgnoredDir, "isIgnoredDir");
|
|
549
|
+
function isIgnoredFile(fileName) {
|
|
550
|
+
return IGNORE_FILES.has(fileName);
|
|
551
|
+
}
|
|
552
|
+
__name(isIgnoredFile, "isIgnoredFile");
|
|
553
|
+
|
|
554
|
+
// src/indexers/code/code-indexer.ts
|
|
555
|
+
var CodeIndexer = class {
|
|
556
|
+
static {
|
|
557
|
+
__name(this, "CodeIndexer");
|
|
558
|
+
}
|
|
559
|
+
_chunker = new CodeChunker();
|
|
560
|
+
_deps;
|
|
561
|
+
_repoPath;
|
|
562
|
+
_maxFileSize;
|
|
563
|
+
constructor(repoPath, deps, maxFileSize = 512e3) {
|
|
564
|
+
this._deps = deps;
|
|
565
|
+
this._repoPath = repoPath;
|
|
566
|
+
this._maxFileSize = maxFileSize;
|
|
567
|
+
}
|
|
568
|
+
/**
|
|
569
|
+
* Index all supported files in the repository.
|
|
570
|
+
* Skips unchanged files (same content hash).
|
|
571
|
+
*/
|
|
572
|
+
async index(options = {}) {
|
|
573
|
+
const { forceReindex = false, onProgress } = options;
|
|
574
|
+
const files = this._walkRepo(this._repoPath);
|
|
575
|
+
let indexed = 0, skipped = 0, totalChunks = 0;
|
|
576
|
+
for (let i = 0; i < files.length; i++) {
|
|
577
|
+
const filePath = files[i];
|
|
578
|
+
const rel = path2.relative(this._repoPath, filePath);
|
|
579
|
+
onProgress?.(rel, i + 1, files.length);
|
|
580
|
+
let content;
|
|
581
|
+
try {
|
|
582
|
+
content = fs.readFileSync(filePath, "utf-8");
|
|
583
|
+
} catch {
|
|
584
|
+
continue;
|
|
585
|
+
}
|
|
586
|
+
const hash = this._hash(content);
|
|
587
|
+
const existing = this._deps.db.prepare(
|
|
588
|
+
"SELECT file_hash FROM indexed_files WHERE file_path = ?"
|
|
589
|
+
).get(rel);
|
|
590
|
+
if (!forceReindex && existing?.file_hash === hash) {
|
|
591
|
+
skipped++;
|
|
592
|
+
continue;
|
|
593
|
+
}
|
|
594
|
+
const oldChunks = this._deps.db.prepare(
|
|
595
|
+
"SELECT id FROM code_chunks WHERE file_path = ?"
|
|
596
|
+
).all(rel);
|
|
597
|
+
if (oldChunks.length > 0) {
|
|
598
|
+
for (const { id } of oldChunks) {
|
|
599
|
+
this._deps.hnsw.remove(id);
|
|
600
|
+
this._deps.vectorCache.delete(id);
|
|
601
|
+
}
|
|
602
|
+
this._deps.db.prepare("DELETE FROM code_chunks WHERE file_path = ?").run(rel);
|
|
603
|
+
}
|
|
604
|
+
const ext = path2.extname(filePath).toLowerCase();
|
|
605
|
+
const language = SUPPORTED_EXTENSIONS[ext] ?? "text";
|
|
606
|
+
const chunks = await this._chunker.chunk(rel, content, language);
|
|
607
|
+
const embeddingTexts = chunks.map((chunk) => [
|
|
608
|
+
`File: ${rel}`,
|
|
609
|
+
chunk.name ? `${chunk.chunkType}: ${chunk.name}` : chunk.chunkType,
|
|
610
|
+
chunk.content
|
|
611
|
+
].join("\n"));
|
|
612
|
+
const vecs = await this._deps.embedding.embedBatch(embeddingTexts);
|
|
613
|
+
for (let ci = 0; ci < chunks.length; ci++) {
|
|
614
|
+
const chunk = chunks[ci];
|
|
615
|
+
const result = this._deps.db.prepare(
|
|
616
|
+
`INSERT INTO code_chunks (file_path, chunk_type, name, start_line, end_line, content, language, file_hash)
|
|
617
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)`
|
|
618
|
+
).run(rel, chunk.chunkType, chunk.name ?? null, chunk.startLine, chunk.endLine, chunk.content, language, hash);
|
|
619
|
+
const id = Number(result.lastInsertRowid);
|
|
620
|
+
this._deps.db.prepare(
|
|
621
|
+
"INSERT INTO code_vectors (chunk_id, embedding) VALUES (?, ?)"
|
|
622
|
+
).run(id, Buffer.from(vecs[ci].buffer));
|
|
623
|
+
this._deps.hnsw.add(vecs[ci], id);
|
|
624
|
+
this._deps.vectorCache.set(id, vecs[ci]);
|
|
625
|
+
totalChunks++;
|
|
626
|
+
}
|
|
627
|
+
this._deps.db.prepare(
|
|
628
|
+
"INSERT OR REPLACE INTO indexed_files (file_path, file_hash) VALUES (?, ?)"
|
|
629
|
+
).run(rel, hash);
|
|
630
|
+
indexed++;
|
|
631
|
+
}
|
|
632
|
+
return { indexed, skipped, chunks: totalChunks };
|
|
633
|
+
}
|
|
634
|
+
// ── File Walker ─────────────────────────────────
|
|
635
|
+
_walkRepo(dir, files = []) {
|
|
636
|
+
let entries;
|
|
637
|
+
try {
|
|
638
|
+
entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
639
|
+
} catch {
|
|
640
|
+
return files;
|
|
641
|
+
}
|
|
642
|
+
for (const entry of entries) {
|
|
643
|
+
if (entry.isDirectory()) {
|
|
644
|
+
if (isIgnoredDir(entry.name)) continue;
|
|
645
|
+
this._walkRepo(path2.join(dir, entry.name), files);
|
|
646
|
+
} else if (entry.isFile()) {
|
|
647
|
+
if (isIgnoredFile(entry.name)) continue;
|
|
648
|
+
const ext = path2.extname(entry.name).toLowerCase();
|
|
649
|
+
if (!(ext in SUPPORTED_EXTENSIONS)) continue;
|
|
650
|
+
const full = path2.join(dir, entry.name);
|
|
651
|
+
try {
|
|
652
|
+
if (fs.statSync(full).size <= this._maxFileSize) {
|
|
653
|
+
files.push(full);
|
|
654
|
+
}
|
|
655
|
+
} catch {
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
return files;
|
|
660
|
+
}
|
|
661
|
+
// ── FNV-1a Hash ─────────────────────────────────
|
|
662
|
+
_hash(content) {
|
|
663
|
+
let h = 2166136261;
|
|
664
|
+
for (let i = 0; i < content.length; i++) {
|
|
665
|
+
h ^= content.charCodeAt(i);
|
|
666
|
+
h = h * 16777619 >>> 0;
|
|
667
|
+
}
|
|
668
|
+
return h.toString(16);
|
|
669
|
+
}
|
|
670
|
+
};
|
|
671
|
+
|
|
672
|
+
// src/indexers/code/code-plugin.ts
|
|
673
|
+
var CodePlugin = class {
|
|
674
|
+
constructor(opts = {}) {
|
|
675
|
+
this.opts = opts;
|
|
676
|
+
this.name = opts.name ?? "code";
|
|
677
|
+
}
|
|
678
|
+
static {
|
|
679
|
+
__name(this, "CodePlugin");
|
|
680
|
+
}
|
|
681
|
+
name;
|
|
682
|
+
hnsw;
|
|
683
|
+
indexer;
|
|
684
|
+
vecCache = /* @__PURE__ */ new Map();
|
|
685
|
+
async initialize(ctx) {
|
|
686
|
+
const shared = await ctx.getOrCreateSharedHnsw("code");
|
|
687
|
+
this.hnsw = shared.hnsw;
|
|
688
|
+
this.vecCache = shared.vecCache;
|
|
689
|
+
if (shared.isNew) {
|
|
690
|
+
ctx.loadVectors("code_vectors", "chunk_id", this.hnsw, this.vecCache);
|
|
691
|
+
}
|
|
692
|
+
const repoPath = this.opts.repoPath ?? ctx.config.repoPath;
|
|
693
|
+
this.indexer = new CodeIndexer(repoPath, {
|
|
694
|
+
db: ctx.db,
|
|
695
|
+
hnsw: this.hnsw,
|
|
696
|
+
vectorCache: this.vecCache,
|
|
697
|
+
embedding: ctx.embedding
|
|
698
|
+
}, this.opts.maxFileSize ?? ctx.config.maxFileSize);
|
|
699
|
+
}
|
|
700
|
+
async index(options = {}) {
|
|
701
|
+
return this.indexer.index(options);
|
|
702
|
+
}
|
|
703
|
+
stats() {
|
|
704
|
+
return { hnswSize: this.hnsw.size };
|
|
705
|
+
}
|
|
706
|
+
};
|
|
707
|
+
function code(opts) {
|
|
708
|
+
return new CodePlugin(opts);
|
|
709
|
+
}
|
|
710
|
+
__name(code, "code");
|
|
711
|
+
|
|
712
|
+
export {
|
|
713
|
+
SUPPORTED_EXTENSIONS,
|
|
714
|
+
IGNORE_DIRS,
|
|
715
|
+
isSupported,
|
|
716
|
+
getLanguage,
|
|
717
|
+
isIgnoredDir,
|
|
718
|
+
isIgnoredFile,
|
|
719
|
+
CodeChunker,
|
|
720
|
+
CodeIndexer,
|
|
721
|
+
code
|
|
722
|
+
};
|
|
723
|
+
//# sourceMappingURL=chunk-WR4WXKJT.js.map
|