@lyy0709/contextweaver 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.en.md +405 -0
- package/README.md +475 -0
- package/dist/SearchService-R7UMDQ6T.js +1623 -0
- package/dist/browser-VC5772XM.js +30 -0
- package/dist/chunk-6HF343R7.js +186 -0
- package/dist/chunk-CKN7LWEA.js +1337 -0
- package/dist/chunk-ECEVTSSZ.js +894 -0
- package/dist/chunk-LPFRFKFW.js +543 -0
- package/dist/chunk-V2USKRIC.js +310 -0
- package/dist/chunk-XVKMTPCT.js +297 -0
- package/dist/chunk-YVLGQTLG.js +170 -0
- package/dist/claude-IKIA62JA.js +42 -0
- package/dist/codebaseRetrieval-SCN3YIPM.js +11 -0
- package/dist/config-WTC56Y2R.js +22 -0
- package/dist/enhancer-QHNMR35J.js +8 -0
- package/dist/gemini-Q37K5XA5.js +44 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +202 -0
- package/dist/lock-K4TS4ENC.js +106 -0
- package/dist/logger-SF6S6GVR.js +9 -0
- package/dist/mcp/main.d.ts +1 -0
- package/dist/mcp/main.js +18 -0
- package/dist/openai-MOPZNA5I.js +34 -0
- package/dist/scanner-T7MGYXQV.js +10 -0
- package/dist/server-276GGS5G.js +614 -0
- package/dist/server-DENFYPME.js +263 -0
- package/package.json +82 -0
|
@@ -0,0 +1,1337 @@
|
|
|
1
|
+
import {
|
|
2
|
+
closeAllIndexers,
|
|
3
|
+
closeAllVectorStores,
|
|
4
|
+
getIndexer
|
|
5
|
+
} from "./chunk-ECEVTSSZ.js";
|
|
6
|
+
import {
|
|
7
|
+
batchDelete,
|
|
8
|
+
batchUpdateMtime,
|
|
9
|
+
batchUpsert,
|
|
10
|
+
clear,
|
|
11
|
+
closeDb,
|
|
12
|
+
generateProjectId,
|
|
13
|
+
getAllFileMeta,
|
|
14
|
+
getAllPaths,
|
|
15
|
+
getFilesNeedingVectorIndex,
|
|
16
|
+
getStoredEmbeddingDimensions,
|
|
17
|
+
initDb,
|
|
18
|
+
setStoredEmbeddingDimensions
|
|
19
|
+
} from "./chunk-LPFRFKFW.js";
|
|
20
|
+
import {
|
|
21
|
+
logger
|
|
22
|
+
} from "./chunk-YVLGQTLG.js";
|
|
23
|
+
import {
|
|
24
|
+
getEmbeddingConfig,
|
|
25
|
+
getExcludePatterns
|
|
26
|
+
} from "./chunk-V2USKRIC.js";
|
|
27
|
+
|
|
28
|
+
// src/scanner/index.ts
|
|
29
|
+
import path3 from "path";
|
|
30
|
+
|
|
31
|
+
// src/scanner/crawler.ts
|
|
32
|
+
import { fdir } from "fdir";
|
|
33
|
+
|
|
34
|
+
// src/scanner/filter.ts
|
|
35
|
+
import fs from "fs/promises";
|
|
36
|
+
import path from "path";
|
|
37
|
+
import ignore from "ignore";
|
|
38
|
+
|
|
39
|
+
// src/scanner/language.ts
|
|
40
|
+
var LANGUAGE_MAP = {
|
|
41
|
+
".ts": "typescript",
|
|
42
|
+
".tsx": "typescript",
|
|
43
|
+
".js": "javascript",
|
|
44
|
+
".jsx": "javascript",
|
|
45
|
+
".mjs": "javascript",
|
|
46
|
+
".cjs": "javascript",
|
|
47
|
+
".md": "markdown",
|
|
48
|
+
".py": "python",
|
|
49
|
+
".go": "go",
|
|
50
|
+
".rs": "rust",
|
|
51
|
+
".java": "java",
|
|
52
|
+
".kt": "kotlin",
|
|
53
|
+
".swift": "swift",
|
|
54
|
+
".cpp": "cpp",
|
|
55
|
+
".cc": "cpp",
|
|
56
|
+
".cxx": "cpp",
|
|
57
|
+
".hpp": "cpp",
|
|
58
|
+
".h": "cpp",
|
|
59
|
+
".c": "c",
|
|
60
|
+
".sh": "shell",
|
|
61
|
+
".bash": "shell",
|
|
62
|
+
".zsh": "shell",
|
|
63
|
+
".fish": "shell",
|
|
64
|
+
".ps1": "powershell",
|
|
65
|
+
".sql": "sql",
|
|
66
|
+
".yaml": "yaml",
|
|
67
|
+
".yml": "yaml",
|
|
68
|
+
".json": "json",
|
|
69
|
+
".toml": "toml",
|
|
70
|
+
".xml": "xml",
|
|
71
|
+
".html": "html",
|
|
72
|
+
".css": "css",
|
|
73
|
+
".scss": "scss",
|
|
74
|
+
".sass": "sass",
|
|
75
|
+
".less": "less",
|
|
76
|
+
".vue": "vue",
|
|
77
|
+
".svelte": "svelte",
|
|
78
|
+
".rb": "ruby",
|
|
79
|
+
".php": "php",
|
|
80
|
+
".dart": "dart",
|
|
81
|
+
".lua": "lua",
|
|
82
|
+
".r": "r"
|
|
83
|
+
};
|
|
84
|
+
var ALLOWED_EXTENSIONS = new Set(Object.keys(LANGUAGE_MAP));
|
|
85
|
+
function getLanguage(filePath) {
|
|
86
|
+
const ext = getFileExtension(filePath);
|
|
87
|
+
return LANGUAGE_MAP[ext] || "unknown";
|
|
88
|
+
}
|
|
89
|
+
function isAllowedExtension(filePath) {
|
|
90
|
+
const ext = getFileExtension(filePath);
|
|
91
|
+
return ALLOWED_EXTENSIONS.has(ext);
|
|
92
|
+
}
|
|
93
|
+
function getFileExtension(filePath) {
|
|
94
|
+
const ext = filePath.split(".").pop();
|
|
95
|
+
return ext ? `.${ext.toLowerCase()}` : "";
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// src/scanner/filter.ts
|
|
99
|
+
var ignoreInstance = null;
|
|
100
|
+
var lastConfigHash = null;
|
|
101
|
+
async function generateConfigHash(rootPath) {
|
|
102
|
+
const crypto2 = await import("crypto");
|
|
103
|
+
const hashes = [];
|
|
104
|
+
const gitignorePath = path.join(rootPath, ".gitignore");
|
|
105
|
+
try {
|
|
106
|
+
const content = await fs.readFile(gitignorePath, "utf-8");
|
|
107
|
+
hashes.push(crypto2.createHash("sha256").update(content).digest("hex"));
|
|
108
|
+
} catch {
|
|
109
|
+
}
|
|
110
|
+
const envPatterns = process.env.IGNORE_PATTERNS || "";
|
|
111
|
+
const envHash = crypto2.createHash("sha256").update(envPatterns).digest("hex");
|
|
112
|
+
hashes.push(envHash);
|
|
113
|
+
const combined = hashes.join("|");
|
|
114
|
+
return crypto2.createHash("sha256").update(combined).digest("hex");
|
|
115
|
+
}
|
|
116
|
+
async function initFilter(rootPath) {
|
|
117
|
+
const currentHash = await generateConfigHash(rootPath);
|
|
118
|
+
if (lastConfigHash === currentHash && ignoreInstance) {
|
|
119
|
+
return;
|
|
120
|
+
}
|
|
121
|
+
const ig = ignore();
|
|
122
|
+
const patterns = getExcludePatterns();
|
|
123
|
+
ig.add(patterns);
|
|
124
|
+
const gitignorePath = path.join(rootPath, ".gitignore");
|
|
125
|
+
try {
|
|
126
|
+
await fs.access(gitignorePath);
|
|
127
|
+
ig.add(await fs.readFile(gitignorePath, "utf-8"));
|
|
128
|
+
} catch {
|
|
129
|
+
}
|
|
130
|
+
ignoreInstance = ig;
|
|
131
|
+
lastConfigHash = currentHash;
|
|
132
|
+
}
|
|
133
|
+
function isFiltered(relativePath) {
|
|
134
|
+
if (!ignoreInstance) {
|
|
135
|
+
throw new Error("Filter not initialized. Call initFilter() first.");
|
|
136
|
+
}
|
|
137
|
+
return ignoreInstance.ignores(relativePath);
|
|
138
|
+
}
|
|
139
|
+
function isAllowedFile(filePath) {
|
|
140
|
+
return isAllowedExtension(filePath);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// src/scanner/crawler.ts
|
|
144
|
+
function escapeRegExp(str) {
|
|
145
|
+
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
146
|
+
}
|
|
147
|
+
async function crawl(rootPath) {
|
|
148
|
+
const api = new fdir().withFullPaths().withErrors().filter((filePath) => {
|
|
149
|
+
const normalizedFilePath = filePath.replace(/\\/g, "/");
|
|
150
|
+
const normalizedRootPath = rootPath.replace(/\\/g, "/");
|
|
151
|
+
const relativePath = normalizedFilePath.replace(
|
|
152
|
+
new RegExp(`^${escapeRegExp(normalizedRootPath)}/?`),
|
|
153
|
+
""
|
|
154
|
+
);
|
|
155
|
+
return !isFiltered(relativePath) && isAllowedFile(filePath);
|
|
156
|
+
});
|
|
157
|
+
const paths = await api.crawl(rootPath).withPromise();
|
|
158
|
+
return paths;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// src/scanner/processor.ts
|
|
162
|
+
import fs3 from "fs/promises";
|
|
163
|
+
import os from "os";
|
|
164
|
+
import path2 from "path";
|
|
165
|
+
import pLimit from "p-limit";
|
|
166
|
+
|
|
167
|
+
// src/chunking/ParserPool.ts
|
|
168
|
+
import Parser from "@keqingmoe/tree-sitter";
|
|
169
|
+
var GRAMMAR_MODULES = {
|
|
170
|
+
typescript: "tree-sitter-typescript",
|
|
171
|
+
javascript: "tree-sitter-javascript",
|
|
172
|
+
python: "tree-sitter-python",
|
|
173
|
+
go: "tree-sitter-go",
|
|
174
|
+
rust: "tree-sitter-rust",
|
|
175
|
+
java: "tree-sitter-java",
|
|
176
|
+
c: "tree-sitter-c",
|
|
177
|
+
cpp: "tree-sitter-cpp",
|
|
178
|
+
c_sharp: "tree-sitter-c-sharp"
|
|
179
|
+
};
|
|
180
|
+
var loadedGrammars = /* @__PURE__ */ new Map();
|
|
181
|
+
var parserCache = /* @__PURE__ */ new Map();
|
|
182
|
+
async function loadGrammar(language) {
|
|
183
|
+
const cached = loadedGrammars.get(language);
|
|
184
|
+
if (cached) return cached;
|
|
185
|
+
const moduleName = GRAMMAR_MODULES[language];
|
|
186
|
+
if (!moduleName) return null;
|
|
187
|
+
try {
|
|
188
|
+
const grammarModule = await import(moduleName);
|
|
189
|
+
let grammar = null;
|
|
190
|
+
if (language === "typescript") {
|
|
191
|
+
grammar = grammarModule.default?.typescript ?? grammarModule.typescript;
|
|
192
|
+
} else {
|
|
193
|
+
const exported = grammarModule.default ?? grammarModule;
|
|
194
|
+
if (exported && typeof exported === "object" && "nodeTypeInfo" in exported) {
|
|
195
|
+
grammar = exported;
|
|
196
|
+
} else if (exported?.language) {
|
|
197
|
+
grammar = exported.language;
|
|
198
|
+
} else if (exported?.[language]) {
|
|
199
|
+
grammar = exported[language];
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
if (!grammar) {
|
|
203
|
+
console.error(
|
|
204
|
+
`[ParserPool] Could not extract grammar for ${language} from module ${moduleName}`
|
|
205
|
+
);
|
|
206
|
+
return null;
|
|
207
|
+
}
|
|
208
|
+
loadedGrammars.set(language, grammar);
|
|
209
|
+
return grammar;
|
|
210
|
+
} catch (err) {
|
|
211
|
+
console.error(`[ParserPool] Failed to load grammar for ${language}:`, err);
|
|
212
|
+
return null;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
async function getParser(language) {
|
|
216
|
+
const cached = parserCache.get(language);
|
|
217
|
+
if (cached) return cached;
|
|
218
|
+
const grammar = await loadGrammar(language);
|
|
219
|
+
if (!grammar) return null;
|
|
220
|
+
const parser = new Parser();
|
|
221
|
+
parser.setLanguage(grammar);
|
|
222
|
+
parserCache.set(language, parser);
|
|
223
|
+
return parser;
|
|
224
|
+
}
|
|
225
|
+
function isLanguageSupported(language) {
|
|
226
|
+
return language in GRAMMAR_MODULES;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// src/chunking/LanguageSpec.ts
|
|
230
|
+
var LANGUAGE_SPECS = {
|
|
231
|
+
typescript: {
|
|
232
|
+
hierarchy: /* @__PURE__ */ new Set([
|
|
233
|
+
"class_declaration",
|
|
234
|
+
"abstract_class_declaration",
|
|
235
|
+
"interface_declaration",
|
|
236
|
+
"function_declaration",
|
|
237
|
+
"generator_function_declaration",
|
|
238
|
+
"method_definition",
|
|
239
|
+
"arrow_function",
|
|
240
|
+
"export_statement",
|
|
241
|
+
"import_statement"
|
|
242
|
+
]),
|
|
243
|
+
nameFields: ["name", "id"],
|
|
244
|
+
nameNodeTypes: /* @__PURE__ */ new Set(["identifier", "type_identifier", "property_identifier"]),
|
|
245
|
+
prefixMap: {
|
|
246
|
+
class_declaration: "class ",
|
|
247
|
+
abstract_class_declaration: "abstract class ",
|
|
248
|
+
interface_declaration: "interface ",
|
|
249
|
+
function_declaration: "fn ",
|
|
250
|
+
generator_function_declaration: "fn* ",
|
|
251
|
+
method_definition: "",
|
|
252
|
+
arrow_function: ""
|
|
253
|
+
},
|
|
254
|
+
commentTypes: /* @__PURE__ */ new Set(["comment"])
|
|
255
|
+
},
|
|
256
|
+
javascript: {
|
|
257
|
+
hierarchy: /* @__PURE__ */ new Set([
|
|
258
|
+
"class_declaration",
|
|
259
|
+
"function_declaration",
|
|
260
|
+
"generator_function_declaration",
|
|
261
|
+
"method_definition",
|
|
262
|
+
"arrow_function"
|
|
263
|
+
]),
|
|
264
|
+
nameFields: ["name", "id"],
|
|
265
|
+
nameNodeTypes: /* @__PURE__ */ new Set(["identifier", "property_identifier"]),
|
|
266
|
+
prefixMap: {
|
|
267
|
+
class_declaration: "class ",
|
|
268
|
+
function_declaration: "fn ",
|
|
269
|
+
generator_function_declaration: "fn* ",
|
|
270
|
+
method_definition: "",
|
|
271
|
+
arrow_function: ""
|
|
272
|
+
},
|
|
273
|
+
commentTypes: /* @__PURE__ */ new Set(["comment"])
|
|
274
|
+
},
|
|
275
|
+
python: {
|
|
276
|
+
hierarchy: /* @__PURE__ */ new Set(["class_definition", "function_definition", "decorated_definition"]),
|
|
277
|
+
nameFields: ["name"],
|
|
278
|
+
nameNodeTypes: /* @__PURE__ */ new Set(["identifier"]),
|
|
279
|
+
prefixMap: {
|
|
280
|
+
class_definition: "class ",
|
|
281
|
+
function_definition: "def ",
|
|
282
|
+
decorated_definition: ""
|
|
283
|
+
},
|
|
284
|
+
commentTypes: /* @__PURE__ */ new Set(["comment"])
|
|
285
|
+
},
|
|
286
|
+
go: {
|
|
287
|
+
hierarchy: /* @__PURE__ */ new Set([
|
|
288
|
+
"function_declaration",
|
|
289
|
+
"method_declaration",
|
|
290
|
+
"type_spec",
|
|
291
|
+
"type_declaration",
|
|
292
|
+
"struct_type",
|
|
293
|
+
"interface_type"
|
|
294
|
+
]),
|
|
295
|
+
nameFields: ["name"],
|
|
296
|
+
nameNodeTypes: /* @__PURE__ */ new Set(["identifier", "type_identifier", "field_identifier"]),
|
|
297
|
+
prefixMap: {
|
|
298
|
+
function_declaration: "func ",
|
|
299
|
+
method_declaration: "func ",
|
|
300
|
+
type_spec: "type ",
|
|
301
|
+
type_declaration: "type ",
|
|
302
|
+
struct_type: "struct ",
|
|
303
|
+
interface_type: "interface "
|
|
304
|
+
},
|
|
305
|
+
commentTypes: /* @__PURE__ */ new Set(["comment"])
|
|
306
|
+
},
|
|
307
|
+
rust: {
|
|
308
|
+
hierarchy: /* @__PURE__ */ new Set([
|
|
309
|
+
"function_item",
|
|
310
|
+
"struct_item",
|
|
311
|
+
"enum_item",
|
|
312
|
+
"trait_item",
|
|
313
|
+
"impl_item",
|
|
314
|
+
"mod_item",
|
|
315
|
+
"type_item"
|
|
316
|
+
]),
|
|
317
|
+
nameFields: ["name"],
|
|
318
|
+
nameNodeTypes: /* @__PURE__ */ new Set(["identifier", "type_identifier"]),
|
|
319
|
+
prefixMap: {
|
|
320
|
+
function_item: "fn ",
|
|
321
|
+
struct_item: "struct ",
|
|
322
|
+
enum_item: "enum ",
|
|
323
|
+
trait_item: "trait ",
|
|
324
|
+
impl_item: "impl ",
|
|
325
|
+
mod_item: "mod ",
|
|
326
|
+
type_item: "type "
|
|
327
|
+
},
|
|
328
|
+
commentTypes: /* @__PURE__ */ new Set(["line_comment", "block_comment"])
|
|
329
|
+
},
|
|
330
|
+
java: {
|
|
331
|
+
hierarchy: /* @__PURE__ */ new Set([
|
|
332
|
+
"class_declaration",
|
|
333
|
+
"interface_declaration",
|
|
334
|
+
"enum_declaration",
|
|
335
|
+
"annotation_type_declaration",
|
|
336
|
+
"method_declaration",
|
|
337
|
+
"constructor_declaration",
|
|
338
|
+
"record_declaration"
|
|
339
|
+
]),
|
|
340
|
+
nameFields: ["name", "identifier"],
|
|
341
|
+
nameNodeTypes: /* @__PURE__ */ new Set(["identifier"]),
|
|
342
|
+
prefixMap: {
|
|
343
|
+
class_declaration: "class ",
|
|
344
|
+
interface_declaration: "interface ",
|
|
345
|
+
enum_declaration: "enum ",
|
|
346
|
+
annotation_type_declaration: "@interface ",
|
|
347
|
+
method_declaration: "",
|
|
348
|
+
constructor_declaration: "",
|
|
349
|
+
record_declaration: "record "
|
|
350
|
+
},
|
|
351
|
+
commentTypes: /* @__PURE__ */ new Set(["line_comment", "block_comment"])
|
|
352
|
+
},
|
|
353
|
+
c: {
|
|
354
|
+
hierarchy: /* @__PURE__ */ new Set([
|
|
355
|
+
"function_definition",
|
|
356
|
+
"struct_specifier",
|
|
357
|
+
"union_specifier",
|
|
358
|
+
"enum_specifier",
|
|
359
|
+
"type_definition"
|
|
360
|
+
]),
|
|
361
|
+
nameFields: ["declarator", "name"],
|
|
362
|
+
nameNodeTypes: /* @__PURE__ */ new Set(["identifier", "type_identifier", "field_identifier"]),
|
|
363
|
+
prefixMap: {
|
|
364
|
+
function_definition: "",
|
|
365
|
+
struct_specifier: "struct ",
|
|
366
|
+
union_specifier: "union ",
|
|
367
|
+
enum_specifier: "enum ",
|
|
368
|
+
type_definition: "typedef "
|
|
369
|
+
},
|
|
370
|
+
commentTypes: /* @__PURE__ */ new Set(["comment"])
|
|
371
|
+
},
|
|
372
|
+
cpp: {
|
|
373
|
+
hierarchy: /* @__PURE__ */ new Set([
|
|
374
|
+
"function_definition",
|
|
375
|
+
"class_specifier",
|
|
376
|
+
"struct_specifier",
|
|
377
|
+
"union_specifier",
|
|
378
|
+
"enum_specifier",
|
|
379
|
+
"namespace_definition",
|
|
380
|
+
"template_declaration",
|
|
381
|
+
"type_definition"
|
|
382
|
+
]),
|
|
383
|
+
nameFields: ["declarator", "name"],
|
|
384
|
+
nameNodeTypes: /* @__PURE__ */ new Set(["identifier", "type_identifier", "field_identifier", "namespace_identifier"]),
|
|
385
|
+
prefixMap: {
|
|
386
|
+
function_definition: "",
|
|
387
|
+
class_specifier: "class ",
|
|
388
|
+
struct_specifier: "struct ",
|
|
389
|
+
union_specifier: "union ",
|
|
390
|
+
enum_specifier: "enum ",
|
|
391
|
+
namespace_definition: "namespace ",
|
|
392
|
+
template_declaration: "template ",
|
|
393
|
+
type_definition: "typedef "
|
|
394
|
+
},
|
|
395
|
+
commentTypes: /* @__PURE__ */ new Set(["comment"])
|
|
396
|
+
},
|
|
397
|
+
c_sharp: {
|
|
398
|
+
hierarchy: /* @__PURE__ */ new Set([
|
|
399
|
+
"class_declaration",
|
|
400
|
+
"interface_declaration",
|
|
401
|
+
"struct_declaration",
|
|
402
|
+
"enum_declaration",
|
|
403
|
+
"record_declaration",
|
|
404
|
+
"method_declaration",
|
|
405
|
+
"constructor_declaration",
|
|
406
|
+
"property_declaration",
|
|
407
|
+
"namespace_declaration"
|
|
408
|
+
]),
|
|
409
|
+
nameFields: ["name", "identifier"],
|
|
410
|
+
nameNodeTypes: /* @__PURE__ */ new Set(["identifier"]),
|
|
411
|
+
prefixMap: {
|
|
412
|
+
class_declaration: "class ",
|
|
413
|
+
interface_declaration: "interface ",
|
|
414
|
+
struct_declaration: "struct ",
|
|
415
|
+
enum_declaration: "enum ",
|
|
416
|
+
record_declaration: "record ",
|
|
417
|
+
method_declaration: "",
|
|
418
|
+
constructor_declaration: "",
|
|
419
|
+
property_declaration: "",
|
|
420
|
+
namespace_declaration: "namespace "
|
|
421
|
+
},
|
|
422
|
+
commentTypes: /* @__PURE__ */ new Set(["comment"])
|
|
423
|
+
}
|
|
424
|
+
};
|
|
425
|
+
function getLanguageSpec(language) {
|
|
426
|
+
return LANGUAGE_SPECS[language] ?? null;
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
// src/chunking/SourceAdapter.ts
|
|
430
|
+
var SourceAdapter = class {
|
|
431
|
+
code;
|
|
432
|
+
domain;
|
|
433
|
+
buffer;
|
|
434
|
+
// UTF-8 字节偏移 -> 字符偏移的映射表(仅 UTF-8 域使用)
|
|
435
|
+
byteToCharMap;
|
|
436
|
+
// UTF-16 前缀和(用于 NWS 计算)
|
|
437
|
+
nwsPrefixSum;
|
|
438
|
+
constructor(config) {
|
|
439
|
+
this.code = config.code;
|
|
440
|
+
const lenUtf16 = config.code.length;
|
|
441
|
+
const lenUtf8 = Buffer.byteLength(config.code, "utf8");
|
|
442
|
+
if (config.endIndex === lenUtf16) {
|
|
443
|
+
this.domain = "utf16";
|
|
444
|
+
this.buffer = null;
|
|
445
|
+
this.byteToCharMap = null;
|
|
446
|
+
} else if (config.endIndex === lenUtf8) {
|
|
447
|
+
this.domain = "utf8";
|
|
448
|
+
this.buffer = Buffer.from(config.code, "utf8");
|
|
449
|
+
this.byteToCharMap = this.buildByteToCharMap();
|
|
450
|
+
} else {
|
|
451
|
+
this.domain = "unknown";
|
|
452
|
+
this.buffer = null;
|
|
453
|
+
this.byteToCharMap = null;
|
|
454
|
+
console.warn(
|
|
455
|
+
`[SourceAdapter] Index domain unclear: endIndex=${config.endIndex}, utf16Len=${lenUtf16}, utf8Len=${lenUtf8}`
|
|
456
|
+
);
|
|
457
|
+
}
|
|
458
|
+
this.nwsPrefixSum = this.buildNwsPrefixSum();
|
|
459
|
+
}
|
|
460
|
+
/**
|
|
461
|
+
* 获取检测到的索引域
|
|
462
|
+
*/
|
|
463
|
+
getDomain() {
|
|
464
|
+
return this.domain;
|
|
465
|
+
}
|
|
466
|
+
/**
|
|
467
|
+
* 安全切片:根据索引域选择正确的切片方式
|
|
468
|
+
*
|
|
469
|
+
* 对于 UTF-8 域,先将字节边界对齐到字符边界,再进行切片
|
|
470
|
+
*
|
|
471
|
+
* @param start Tree-sitter 返回的 startIndex
|
|
472
|
+
* @param end Tree-sitter 返回的 endIndex
|
|
473
|
+
* @returns 切片后的字符串
|
|
474
|
+
*/
|
|
475
|
+
slice(start, end) {
|
|
476
|
+
if (this.domain === "utf16" || this.domain === "unknown") {
|
|
477
|
+
return this.code.slice(start, end);
|
|
478
|
+
}
|
|
479
|
+
if (!this.byteToCharMap) {
|
|
480
|
+
return this.code.slice(start, end);
|
|
481
|
+
}
|
|
482
|
+
const charStart = this.byteToChar(start);
|
|
483
|
+
const charEnd = this.byteToChar(end);
|
|
484
|
+
return this.code.slice(charStart, charEnd);
|
|
485
|
+
}
|
|
486
|
+
/**
|
|
487
|
+
* 计算区间的非空白字符数
|
|
488
|
+
*
|
|
489
|
+
* 注意:NWS 始终在字符域计算,保持语义一致性
|
|
490
|
+
* 如果索引域是 UTF-8,需要先将字节偏移转换为字符偏移
|
|
491
|
+
*
|
|
492
|
+
* @param start Tree-sitter 返回的 startIndex
|
|
493
|
+
* @param end Tree-sitter 返回的 endIndex
|
|
494
|
+
* @returns 非空白字符数
|
|
495
|
+
*/
|
|
496
|
+
nws(start, end) {
|
|
497
|
+
let charStart;
|
|
498
|
+
let charEnd;
|
|
499
|
+
if (this.domain === "utf8" && this.byteToCharMap) {
|
|
500
|
+
charStart = this.byteToChar(start);
|
|
501
|
+
charEnd = this.byteToChar(end);
|
|
502
|
+
} else {
|
|
503
|
+
charStart = start;
|
|
504
|
+
charEnd = end;
|
|
505
|
+
}
|
|
506
|
+
const maxIndex = this.nwsPrefixSum.length - 1;
|
|
507
|
+
const s = Math.max(0, Math.min(maxIndex, charStart));
|
|
508
|
+
const e = Math.max(0, Math.min(maxIndex, charEnd));
|
|
509
|
+
return this.nwsPrefixSum[e] - this.nwsPrefixSum[s];
|
|
510
|
+
}
|
|
511
|
+
/**
|
|
512
|
+
* 获取总的非空白字符数
|
|
513
|
+
*/
|
|
514
|
+
getTotalNws() {
|
|
515
|
+
return this.nwsPrefixSum[this.nwsPrefixSum.length - 1];
|
|
516
|
+
}
|
|
517
|
+
/**
|
|
518
|
+
* 将字节偏移转换为字符偏移
|
|
519
|
+
*/
|
|
520
|
+
byteToChar(byteOffset) {
|
|
521
|
+
if (!this.byteToCharMap) return byteOffset;
|
|
522
|
+
const safeOffset = Math.max(0, Math.min(this.byteToCharMap.length - 1, byteOffset));
|
|
523
|
+
return this.byteToCharMap[safeOffset];
|
|
524
|
+
}
|
|
525
|
+
/**
|
|
526
|
+
* 构建字节偏移到字符偏移的映射表
|
|
527
|
+
*
|
|
528
|
+
* 对于 UTF-8 编码,一个字符可能占用 1-4 个字节
|
|
529
|
+
* 此映射表允许 O(1) 查找任意字节偏移对应的字符偏移
|
|
530
|
+
*/
|
|
531
|
+
buildByteToCharMap() {
|
|
532
|
+
const buffer = this.buffer;
|
|
533
|
+
const map = new Uint32Array(buffer.length + 1);
|
|
534
|
+
let charIndex = 0;
|
|
535
|
+
let byteIndex = 0;
|
|
536
|
+
while (byteIndex < buffer.length) {
|
|
537
|
+
map[byteIndex] = charIndex;
|
|
538
|
+
const byte = buffer[byteIndex];
|
|
539
|
+
let charBytes;
|
|
540
|
+
if ((byte & 128) === 0) {
|
|
541
|
+
charBytes = 1;
|
|
542
|
+
} else if ((byte & 224) === 192) {
|
|
543
|
+
charBytes = 2;
|
|
544
|
+
} else if ((byte & 240) === 224) {
|
|
545
|
+
charBytes = 3;
|
|
546
|
+
} else if ((byte & 248) === 240) {
|
|
547
|
+
charBytes = 4;
|
|
548
|
+
} else {
|
|
549
|
+
charBytes = 1;
|
|
550
|
+
}
|
|
551
|
+
for (let i = 1; i < charBytes && byteIndex + i < buffer.length; i++) {
|
|
552
|
+
map[byteIndex + i] = charIndex;
|
|
553
|
+
}
|
|
554
|
+
byteIndex += charBytes;
|
|
555
|
+
if (charBytes === 4) {
|
|
556
|
+
charIndex += 2;
|
|
557
|
+
} else {
|
|
558
|
+
charIndex += 1;
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
map[buffer.length] = charIndex;
|
|
562
|
+
return map;
|
|
563
|
+
}
|
|
564
|
+
/**
|
|
565
|
+
* 构建字符域的 NWS 前缀和
|
|
566
|
+
*/
|
|
567
|
+
buildNwsPrefixSum() {
|
|
568
|
+
const prefixSum = new Uint32Array(this.code.length + 1);
|
|
569
|
+
let count = 0;
|
|
570
|
+
for (let i = 0; i < this.code.length; i++) {
|
|
571
|
+
const cc = this.code.charCodeAt(i);
|
|
572
|
+
if (!(cc === 32 || cc === 9 || cc === 10 || cc === 13)) {
|
|
573
|
+
count++;
|
|
574
|
+
}
|
|
575
|
+
prefixSum[i + 1] = count;
|
|
576
|
+
}
|
|
577
|
+
return prefixSum;
|
|
578
|
+
}
|
|
579
|
+
};
|
|
580
|
+
|
|
581
|
+
// src/chunking/SemanticSplitter.ts
|
|
582
|
+
var SemanticSplitter = class {
|
|
583
|
+
config;
|
|
584
|
+
adapter;
|
|
585
|
+
code;
|
|
586
|
+
language;
|
|
587
|
+
constructor(config = {}) {
|
|
588
|
+
const maxChunkSize = config.maxChunkSize ?? 2500;
|
|
589
|
+
this.config = {
|
|
590
|
+
maxChunkSize,
|
|
591
|
+
minChunkSize: config.minChunkSize ?? 100,
|
|
592
|
+
chunkOverlap: config.chunkOverlap ?? 200,
|
|
593
|
+
// 物理字符硬上限:默认为 maxChunkSize * 4(假设 1 token ≈ 4 chars)
|
|
594
|
+
maxRawChars: config.maxRawChars ?? maxChunkSize * 4
|
|
595
|
+
};
|
|
596
|
+
}
|
|
597
|
+
/**
|
|
598
|
+
* 对代码进行语义分片
|
|
599
|
+
* @param tree Tree-sitter 解析树
|
|
600
|
+
* @param code 源代码字符串
|
|
601
|
+
* @param filePath 文件路径
|
|
602
|
+
* @param language 语言标识
|
|
603
|
+
* @returns 处理后的分片数组
|
|
604
|
+
*/
|
|
605
|
+
split(tree, code, filePath, language) {
|
|
606
|
+
this.adapter = new SourceAdapter({
|
|
607
|
+
code,
|
|
608
|
+
endIndex: tree.rootNode.endIndex
|
|
609
|
+
});
|
|
610
|
+
const domain = this.adapter.getDomain();
|
|
611
|
+
if (domain === "unknown") {
|
|
612
|
+
console.warn(
|
|
613
|
+
`[SemanticSplitter] Unknown index domain for ${filePath}, falling back to simple split`
|
|
614
|
+
);
|
|
615
|
+
return this.fallbackSplit(code, filePath, language);
|
|
616
|
+
}
|
|
617
|
+
if (domain === "utf8") {
|
|
618
|
+
console.info(`[SemanticSplitter] Using UTF-8 byte indexing for ${filePath}`);
|
|
619
|
+
}
|
|
620
|
+
this.code = code;
|
|
621
|
+
this.language = language;
|
|
622
|
+
const initialContext = [filePath];
|
|
623
|
+
const windows = this.visitNode(tree.rootNode, initialContext);
|
|
624
|
+
return this.windowsToChunks(windows, filePath, language);
|
|
625
|
+
}
|
|
626
|
+
/**
|
|
627
|
+
* 公开的纯文本分片接口
|
|
628
|
+
*
|
|
629
|
+
* 用于不支持 AST 解析的语言,或作为 AST 解析失败时的降级方案。
|
|
630
|
+
* 使用 UTF-16 索引(JS 原生字符串),按行切分。
|
|
631
|
+
*
|
|
632
|
+
* @param code 源代码字符串
|
|
633
|
+
* @param filePath 文件路径
|
|
634
|
+
* @param language 语言标识
|
|
635
|
+
* @returns 处理后的分片数组
|
|
636
|
+
*/
|
|
637
|
+
splitPlainText(code, filePath, language) {
|
|
638
|
+
return this.fallbackSplit(code, filePath, language);
|
|
639
|
+
}
|
|
640
|
+
/**
|
|
641
|
+
* 降级分片:当索引域不明确时使用
|
|
642
|
+
*
|
|
643
|
+
* 使用 UTF-16 索引(JS 原生字符串),按行切分
|
|
644
|
+
* 注意:fallback 模式不支持 overlap
|
|
645
|
+
*/
|
|
646
|
+
fallbackSplit(code, filePath, language) {
|
|
647
|
+
const adapter = new SourceAdapter({
|
|
648
|
+
code,
|
|
649
|
+
endIndex: code.length
|
|
650
|
+
});
|
|
651
|
+
const totalSize = adapter.getTotalNws();
|
|
652
|
+
if (totalSize <= this.config.maxChunkSize) {
|
|
653
|
+
return [
|
|
654
|
+
{
|
|
655
|
+
displayCode: code,
|
|
656
|
+
vectorText: `// Context: ${filePath}
|
|
657
|
+
${code}`,
|
|
658
|
+
nwsSize: totalSize,
|
|
659
|
+
metadata: {
|
|
660
|
+
startIndex: 0,
|
|
661
|
+
endIndex: code.length,
|
|
662
|
+
rawSpan: { start: 0, end: code.length },
|
|
663
|
+
vectorSpan: { start: 0, end: code.length },
|
|
664
|
+
filePath,
|
|
665
|
+
language,
|
|
666
|
+
contextPath: [filePath]
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
];
|
|
670
|
+
}
|
|
671
|
+
const lines = code.split("\n");
|
|
672
|
+
const chunks = [];
|
|
673
|
+
let currentLines = [];
|
|
674
|
+
let currentSize = 0;
|
|
675
|
+
let lineStartIndex = 0;
|
|
676
|
+
let chunkStartIndex = 0;
|
|
677
|
+
let chunkRawStart = 0;
|
|
678
|
+
for (const line of lines) {
|
|
679
|
+
const lineEndIndex = lineStartIndex + line.length;
|
|
680
|
+
const lineNws = adapter.nws(lineStartIndex, lineEndIndex);
|
|
681
|
+
if (currentSize + lineNws > this.config.maxChunkSize && currentLines.length > 0) {
|
|
682
|
+
const displayCode = currentLines.join("\n");
|
|
683
|
+
const chunkEndIndex = chunkStartIndex + displayCode.length;
|
|
684
|
+
chunks.push({
|
|
685
|
+
displayCode,
|
|
686
|
+
vectorText: `// Context: ${filePath}
|
|
687
|
+
${displayCode}`,
|
|
688
|
+
nwsSize: currentSize,
|
|
689
|
+
metadata: {
|
|
690
|
+
startIndex: chunkStartIndex,
|
|
691
|
+
endIndex: chunkEndIndex,
|
|
692
|
+
rawSpan: { start: chunkRawStart, end: chunkEndIndex + 1 },
|
|
693
|
+
// +1 for newline gap
|
|
694
|
+
vectorSpan: { start: chunkStartIndex, end: chunkEndIndex },
|
|
695
|
+
filePath,
|
|
696
|
+
language,
|
|
697
|
+
contextPath: [filePath]
|
|
698
|
+
}
|
|
699
|
+
});
|
|
700
|
+
chunkRawStart = chunkEndIndex + 1;
|
|
701
|
+
chunkStartIndex += displayCode.length + 1;
|
|
702
|
+
currentLines = [line];
|
|
703
|
+
currentSize = lineNws;
|
|
704
|
+
} else {
|
|
705
|
+
currentLines.push(line);
|
|
706
|
+
currentSize += lineNws;
|
|
707
|
+
}
|
|
708
|
+
lineStartIndex = lineEndIndex + 1;
|
|
709
|
+
}
|
|
710
|
+
if (currentLines.length > 0) {
|
|
711
|
+
const displayCode = currentLines.join("\n");
|
|
712
|
+
const chunkEndIndex = chunkStartIndex + displayCode.length;
|
|
713
|
+
chunks.push({
|
|
714
|
+
displayCode,
|
|
715
|
+
vectorText: `// Context: ${filePath}
|
|
716
|
+
${displayCode}`,
|
|
717
|
+
nwsSize: currentSize,
|
|
718
|
+
metadata: {
|
|
719
|
+
startIndex: chunkStartIndex,
|
|
720
|
+
endIndex: chunkEndIndex,
|
|
721
|
+
rawSpan: { start: chunkRawStart, end: code.length },
|
|
722
|
+
vectorSpan: { start: chunkStartIndex, end: chunkEndIndex },
|
|
723
|
+
filePath,
|
|
724
|
+
language,
|
|
725
|
+
contextPath: [filePath]
|
|
726
|
+
}
|
|
727
|
+
});
|
|
728
|
+
}
|
|
729
|
+
return chunks;
|
|
730
|
+
}
|
|
731
|
+
/**
|
|
732
|
+
* 递归遍历 AST 节点
|
|
733
|
+
*/
|
|
734
|
+
visitNode(node, context) {
|
|
735
|
+
const start = node.startIndex;
|
|
736
|
+
const end = node.endIndex;
|
|
737
|
+
const nodeSize = this.adapter.nws(start, end);
|
|
738
|
+
let nextContext = context;
|
|
739
|
+
const spec = getLanguageSpec(this.language);
|
|
740
|
+
if (spec?.hierarchy.has(node.type)) {
|
|
741
|
+
const name = this.extractNodeName(node, spec);
|
|
742
|
+
if (name) {
|
|
743
|
+
const prefix = spec.prefixMap[node.type] ?? "";
|
|
744
|
+
nextContext = [...context, `${prefix}${name}`];
|
|
745
|
+
}
|
|
746
|
+
}
|
|
747
|
+
if (nodeSize <= this.config.maxChunkSize) {
|
|
748
|
+
return [{ nodes: [node], size: nodeSize, contextPath: nextContext }];
|
|
749
|
+
}
|
|
750
|
+
const children = node.children;
|
|
751
|
+
if (children.length === 0) {
|
|
752
|
+
return [{ nodes: [node], size: nodeSize, contextPath: nextContext }];
|
|
753
|
+
}
|
|
754
|
+
const childWindows = [];
|
|
755
|
+
for (const child of children) {
|
|
756
|
+
childWindows.push(...this.visitNode(child, nextContext));
|
|
757
|
+
}
|
|
758
|
+
return this.mergeAdjacentWindows(childWindows);
|
|
759
|
+
}
|
|
760
|
+
/**
|
|
761
|
+
* 从节点中提取名称(数据驱动)
|
|
762
|
+
*/
|
|
763
|
+
extractNodeName(node, spec) {
|
|
764
|
+
for (const child of node.namedChildren) {
|
|
765
|
+
if (spec.nameNodeTypes.has(child.type)) {
|
|
766
|
+
return child.text;
|
|
767
|
+
}
|
|
768
|
+
}
|
|
769
|
+
if (node.firstNamedChild) {
|
|
770
|
+
const firstChild = node.firstNamedChild;
|
|
771
|
+
if (firstChild.text.length <= 100 && !firstChild.text.includes("\n")) {
|
|
772
|
+
return firstChild.text;
|
|
773
|
+
}
|
|
774
|
+
}
|
|
775
|
+
return null;
|
|
776
|
+
}
|
|
777
|
+
/**
|
|
778
|
+
* Gap-Aware 相邻窗口合并
|
|
779
|
+
*
|
|
780
|
+
* 使用三重预算策略:
|
|
781
|
+
* - NWS 预算:控制有效代码量
|
|
782
|
+
* - Raw 预算:控制物理字符数,防止大量注释撑爆 Token
|
|
783
|
+
* - 语义边界惩罚:不同 contextPath 的窗口合并门槛更高
|
|
784
|
+
*
|
|
785
|
+
* 前向吸附策略:
|
|
786
|
+
* - 如果当前窗口以 comment 结尾,将 comment 推到下一个窗口
|
|
787
|
+
* - 保证 JSDoc/注释与其描述的代码在同一个 chunk
|
|
788
|
+
*/
|
|
789
|
+
mergeAdjacentWindows(windows) {
|
|
790
|
+
if (windows.length === 0) return [];
|
|
791
|
+
const merged = [];
|
|
792
|
+
let current = windows[0];
|
|
793
|
+
for (let i = 1; i < windows.length; i++) {
|
|
794
|
+
const next = windows[i];
|
|
795
|
+
this.forwardAbsorbComments(current, next);
|
|
796
|
+
if (current.nodes.length === 0) {
|
|
797
|
+
current = next;
|
|
798
|
+
continue;
|
|
799
|
+
}
|
|
800
|
+
const currentStart = current.nodes[0].startIndex;
|
|
801
|
+
const currentEnd = current.nodes[current.nodes.length - 1].endIndex;
|
|
802
|
+
const nextStart = next.nodes[0].startIndex;
|
|
803
|
+
const nextEnd = next.nodes[next.nodes.length - 1].endIndex;
|
|
804
|
+
const gapNws = this.adapter.nws(currentEnd, nextStart);
|
|
805
|
+
const combinedNws = current.size + gapNws + next.size;
|
|
806
|
+
const combinedRawLen = nextEnd - currentStart;
|
|
807
|
+
const sameContext = this.isSameContext(current.contextPath, next.contextPath);
|
|
808
|
+
const boundaryPenalty = sameContext ? 1 : 0.7;
|
|
809
|
+
const isTiny = current.size < this.config.minChunkSize;
|
|
810
|
+
const effectiveBudget = this.config.maxChunkSize * boundaryPenalty;
|
|
811
|
+
const fitsNwsBudget = combinedNws <= effectiveBudget || isTiny && combinedNws < effectiveBudget * 1.5;
|
|
812
|
+
const fitsRawBudget = combinedRawLen <= this.config.maxRawChars * boundaryPenalty;
|
|
813
|
+
if (fitsNwsBudget && fitsRawBudget) {
|
|
814
|
+
current.nodes.push(...next.nodes);
|
|
815
|
+
current.size = combinedNws;
|
|
816
|
+
if (next.contextPath.length > current.contextPath.length) {
|
|
817
|
+
current.contextPath = next.contextPath;
|
|
818
|
+
}
|
|
819
|
+
} else {
|
|
820
|
+
merged.push(current);
|
|
821
|
+
current = next;
|
|
822
|
+
}
|
|
823
|
+
}
|
|
824
|
+
merged.push(current);
|
|
825
|
+
return merged;
|
|
826
|
+
}
|
|
827
|
+
/**
|
|
828
|
+
* 前向吸附:将 current 尾部的 comment 节点推到 next 头部
|
|
829
|
+
*
|
|
830
|
+
* 这确保 JSDoc/docstring/注释与其描述的函数/方法在同一个 chunk 中,
|
|
831
|
+
* 而不是被切到前一个 chunk 的末尾。
|
|
832
|
+
*
|
|
833
|
+
* 注意:此方法会直接修改 current 和 next
|
|
834
|
+
*/
|
|
835
|
+
forwardAbsorbComments(current, next) {
|
|
836
|
+
const spec = getLanguageSpec(this.language);
|
|
837
|
+
const commentTypes = spec?.commentTypes ?? /* @__PURE__ */ new Set(["comment"]);
|
|
838
|
+
const absorbedNodes = [];
|
|
839
|
+
let absorbedNws = 0;
|
|
840
|
+
while (current.nodes.length > 0) {
|
|
841
|
+
const lastNode = current.nodes[current.nodes.length - 1];
|
|
842
|
+
if (commentTypes.has(lastNode.type)) {
|
|
843
|
+
current.nodes.pop();
|
|
844
|
+
const nodeNws = this.adapter.nws(lastNode.startIndex, lastNode.endIndex);
|
|
845
|
+
absorbedNodes.unshift(lastNode);
|
|
846
|
+
absorbedNws += nodeNws;
|
|
847
|
+
current.size -= nodeNws;
|
|
848
|
+
} else {
|
|
849
|
+
break;
|
|
850
|
+
}
|
|
851
|
+
}
|
|
852
|
+
if (absorbedNodes.length > 0) {
|
|
853
|
+
const gapNws = next.nodes.length > 0 ? this.adapter.nws(
|
|
854
|
+
absorbedNodes[absorbedNodes.length - 1].endIndex,
|
|
855
|
+
next.nodes[0].startIndex
|
|
856
|
+
) : 0;
|
|
857
|
+
next.nodes.unshift(...absorbedNodes);
|
|
858
|
+
next.size += absorbedNws + gapNws;
|
|
859
|
+
}
|
|
860
|
+
}
|
|
861
|
+
/**
|
|
862
|
+
* 检查两个 contextPath 是否属于同一语义单元
|
|
863
|
+
*
|
|
864
|
+
* 规则:如果两者的公共前缀长度 >= 较短路径长度,认为是同一单元
|
|
865
|
+
* 例如:
|
|
866
|
+
* - ["file", "class A", "method foo"] 和 ["file", "class A", "method bar"] -> false(不同方法)
|
|
867
|
+
* - ["file", "class A"] 和 ["file", "class A", "method foo"] -> true(父子关系)
|
|
868
|
+
*/
|
|
869
|
+
isSameContext(a, b) {
|
|
870
|
+
const minLen = Math.min(a.length, b.length);
|
|
871
|
+
let commonLen = 0;
|
|
872
|
+
for (let i = 0; i < minLen; i++) {
|
|
873
|
+
if (a[i] === b[i]) {
|
|
874
|
+
commonLen++;
|
|
875
|
+
} else {
|
|
876
|
+
break;
|
|
877
|
+
}
|
|
878
|
+
}
|
|
879
|
+
return commonLen >= minLen;
|
|
880
|
+
}
|
|
881
|
+
/**
|
|
882
|
+
* 将窗口转换为最终的 ProcessedChunk
|
|
883
|
+
*
|
|
884
|
+
* Gap 归属策略:gap 归属到后一个 chunk(即 chunk 的 rawSpan.start 向前延伸到前一个 chunk 的 endIndex)
|
|
885
|
+
* Overlap 策略:vectorSpan 向前延伸 chunkOverlap 个 NWS 字符,提升语义检索召回率
|
|
886
|
+
*
|
|
887
|
+
* 保证:所有 rawSpan 拼接后 === 完整文件(不重叠)
|
|
888
|
+
*/
|
|
889
|
+
windowsToChunks(windows, filePath, language) {
|
|
890
|
+
if (windows.length === 0) return [];
|
|
891
|
+
const chunks = [];
|
|
892
|
+
let prevEnd = 0;
|
|
893
|
+
const overlap = this.config.chunkOverlap;
|
|
894
|
+
for (let i = 0; i < windows.length; i++) {
|
|
895
|
+
const w = windows[i];
|
|
896
|
+
const start = w.nodes[0].startIndex;
|
|
897
|
+
const end = w.nodes[w.nodes.length - 1].endIndex;
|
|
898
|
+
const isLast = i === windows.length - 1;
|
|
899
|
+
const codeEndIndex = this.adapter.getDomain() === "utf8" ? Buffer.byteLength(this.code, "utf8") : this.code.length;
|
|
900
|
+
const rawSpanEnd = isLast ? codeEndIndex : end;
|
|
901
|
+
let vectorStart = start;
|
|
902
|
+
if (i > 0 && overlap > 0) {
|
|
903
|
+
const candidateStart = this.findOverlapStart(start, overlap);
|
|
904
|
+
const overlapRawLen = start - candidateStart;
|
|
905
|
+
if (overlapRawLen <= this.config.maxRawChars * 0.25) {
|
|
906
|
+
vectorStart = candidateStart;
|
|
907
|
+
}
|
|
908
|
+
}
|
|
909
|
+
const vectorEnd = end;
|
|
910
|
+
const displayCode = this.adapter.slice(start, end);
|
|
911
|
+
const vectorCode = this.adapter.slice(vectorStart, vectorEnd);
|
|
912
|
+
const metadata = {
|
|
913
|
+
startIndex: start,
|
|
914
|
+
endIndex: end,
|
|
915
|
+
rawSpan: { start: prevEnd, end: rawSpanEnd },
|
|
916
|
+
vectorSpan: { start: vectorStart, end: vectorEnd },
|
|
917
|
+
filePath,
|
|
918
|
+
language,
|
|
919
|
+
contextPath: w.contextPath
|
|
920
|
+
};
|
|
921
|
+
chunks.push({
|
|
922
|
+
displayCode,
|
|
923
|
+
vectorText: generateVectorText(vectorCode, w.contextPath),
|
|
924
|
+
nwsSize: w.size,
|
|
925
|
+
metadata
|
|
926
|
+
});
|
|
927
|
+
prevEnd = end;
|
|
928
|
+
}
|
|
929
|
+
return chunks;
|
|
930
|
+
}
|
|
931
|
+
/**
|
|
932
|
+
* 找到 overlap 的起始位置
|
|
933
|
+
*
|
|
934
|
+
* 从 start 位置向前搜索,找到包含 targetNws 个非空白字符的位置
|
|
935
|
+
*
|
|
936
|
+
* @param start 当前 chunk 的起始位置
|
|
937
|
+
* @param targetNws 目标 overlap 大小(NWS 字符数)
|
|
938
|
+
* @returns overlap 起始位置
|
|
939
|
+
*/
|
|
940
|
+
findOverlapStart(start, targetNws) {
|
|
941
|
+
if (start <= 0 || targetNws <= 0) return start;
|
|
942
|
+
let low = 0;
|
|
943
|
+
let high = start;
|
|
944
|
+
let result = start;
|
|
945
|
+
while (low <= high) {
|
|
946
|
+
const mid = Math.floor((low + high) / 2);
|
|
947
|
+
const nwsInRange = this.adapter.nws(mid, start);
|
|
948
|
+
if (nwsInRange >= targetNws) {
|
|
949
|
+
result = mid;
|
|
950
|
+
low = mid + 1;
|
|
951
|
+
} else {
|
|
952
|
+
high = mid - 1;
|
|
953
|
+
}
|
|
954
|
+
}
|
|
955
|
+
return Math.max(0, result);
|
|
956
|
+
}
|
|
957
|
+
};
|
|
958
|
+
function generateVectorText(code, contextPath) {
|
|
959
|
+
const breadcrumb = contextPath.join(" > ");
|
|
960
|
+
return `// Context: ${breadcrumb}
|
|
961
|
+
${code}`;
|
|
962
|
+
}
|
|
963
|
+
|
|
964
|
+
// src/utils/encoding.ts
|
|
965
|
+
import fs2 from "fs/promises";
|
|
966
|
+
import chardet from "chardet";
|
|
967
|
+
import iconv from "iconv-lite";
|
|
968
|
+
function normalizeEncoding(encoding) {
|
|
969
|
+
const map = {
|
|
970
|
+
"UTF-8": "utf8",
|
|
971
|
+
"UTF-16 LE": "utf16le",
|
|
972
|
+
"UTF-16 BE": "utf16be",
|
|
973
|
+
"UTF-32 LE": "utf32le",
|
|
974
|
+
"UTF-32 BE": "utf32be",
|
|
975
|
+
GB18030: "gb18030",
|
|
976
|
+
GBK: "gbk",
|
|
977
|
+
GB2312: "gb2312",
|
|
978
|
+
Big5: "big5",
|
|
979
|
+
Shift_JIS: "shiftjis",
|
|
980
|
+
"EUC-JP": "eucjp",
|
|
981
|
+
"EUC-KR": "euckr",
|
|
982
|
+
"ISO-8859-1": "iso88591",
|
|
983
|
+
"windows-1252": "win1252",
|
|
984
|
+
ASCII: "utf8"
|
|
985
|
+
// ASCII 是 UTF-8 的子集
|
|
986
|
+
};
|
|
987
|
+
return map[encoding] || encoding.toLowerCase().replace(/[^a-z0-9]/g, "");
|
|
988
|
+
}
|
|
989
|
+
function detectBOM(buffer) {
|
|
990
|
+
if (buffer.length >= 3) {
|
|
991
|
+
if (buffer[0] === 239 && buffer[1] === 187 && buffer[2] === 191) {
|
|
992
|
+
return "UTF-8";
|
|
993
|
+
}
|
|
994
|
+
}
|
|
995
|
+
if (buffer.length >= 4) {
|
|
996
|
+
if (buffer[0] === 255 && buffer[1] === 254 && buffer[2] === 0 && buffer[3] === 0) {
|
|
997
|
+
return "UTF-32 LE";
|
|
998
|
+
}
|
|
999
|
+
if (buffer[0] === 0 && buffer[1] === 0 && buffer[2] === 254 && buffer[3] === 255) {
|
|
1000
|
+
return "UTF-32 BE";
|
|
1001
|
+
}
|
|
1002
|
+
}
|
|
1003
|
+
if (buffer.length >= 2) {
|
|
1004
|
+
if (buffer[0] === 255 && buffer[1] === 254) {
|
|
1005
|
+
return "UTF-16 LE";
|
|
1006
|
+
}
|
|
1007
|
+
if (buffer[0] === 254 && buffer[1] === 255) {
|
|
1008
|
+
return "UTF-16 BE";
|
|
1009
|
+
}
|
|
1010
|
+
}
|
|
1011
|
+
return null;
|
|
1012
|
+
}
|
|
1013
|
+
async function readFileWithEncoding(filePath) {
|
|
1014
|
+
const buffer = await fs2.readFile(filePath);
|
|
1015
|
+
const bom = detectBOM(buffer);
|
|
1016
|
+
let encoding = bom;
|
|
1017
|
+
if (!encoding) {
|
|
1018
|
+
const detected = chardet.detect(buffer);
|
|
1019
|
+
encoding = detected || "UTF-8";
|
|
1020
|
+
}
|
|
1021
|
+
const normalizedEncoding = normalizeEncoding(encoding);
|
|
1022
|
+
let content;
|
|
1023
|
+
try {
|
|
1024
|
+
if (iconv.encodingExists(normalizedEncoding)) {
|
|
1025
|
+
content = iconv.decode(buffer, normalizedEncoding);
|
|
1026
|
+
} else {
|
|
1027
|
+
content = buffer.toString("utf-8");
|
|
1028
|
+
}
|
|
1029
|
+
} catch {
|
|
1030
|
+
content = buffer.toString("utf-8");
|
|
1031
|
+
}
|
|
1032
|
+
return {
|
|
1033
|
+
content,
|
|
1034
|
+
encoding: "utf-8",
|
|
1035
|
+
// 输出始终是 UTF-8
|
|
1036
|
+
originalEncoding: encoding
|
|
1037
|
+
};
|
|
1038
|
+
}
|
|
1039
|
+
|
|
1040
|
+
// src/scanner/hash.ts
|
|
1041
|
+
import crypto from "crypto";
|
|
1042
|
+
function sha256(data) {
|
|
1043
|
+
return crypto.createHash("sha256").update(data).digest("hex");
|
|
1044
|
+
}
|
|
1045
|
+
|
|
1046
|
+
// src/scanner/processor.ts
|
|
1047
|
+
var MAX_FILE_SIZE = 100 * 1024;
|
|
1048
|
+
var FALLBACK_LANGS = /* @__PURE__ */ new Set(["python", "go", "rust", "java", "markdown", "json"]);
|
|
1049
|
+
function shouldSkipJson(relPath) {
|
|
1050
|
+
if (relPath.endsWith("-lock.json") || relPath.endsWith("package-lock.json")) {
|
|
1051
|
+
return true;
|
|
1052
|
+
}
|
|
1053
|
+
if (relPath.includes("node_modules/") || relPath.includes("node_modules\\")) {
|
|
1054
|
+
return true;
|
|
1055
|
+
}
|
|
1056
|
+
return false;
|
|
1057
|
+
}
|
|
1058
|
+
function getAdaptiveConcurrency() {
|
|
1059
|
+
const cpuCount = os.cpus().length;
|
|
1060
|
+
const concurrency = Math.max(4, Math.min(cpuCount - 1, 32));
|
|
1061
|
+
return concurrency;
|
|
1062
|
+
}
|
|
1063
|
+
var splitter = new SemanticSplitter({
|
|
1064
|
+
maxChunkSize: 500,
|
|
1065
|
+
minChunkSize: 50,
|
|
1066
|
+
chunkOverlap: 40
|
|
1067
|
+
// 混合检索(BM25+向量+rerank)下的保守 overlap
|
|
1068
|
+
});
|
|
1069
|
+
async function processFile(absPath, relPath, known) {
|
|
1070
|
+
const language = getLanguage(relPath);
|
|
1071
|
+
try {
|
|
1072
|
+
const stat = await fs3.stat(absPath);
|
|
1073
|
+
const mtime = stat.mtimeMs;
|
|
1074
|
+
const size = stat.size;
|
|
1075
|
+
if (size > MAX_FILE_SIZE) {
|
|
1076
|
+
return {
|
|
1077
|
+
absPath,
|
|
1078
|
+
relPath,
|
|
1079
|
+
hash: "",
|
|
1080
|
+
content: null,
|
|
1081
|
+
chunks: [],
|
|
1082
|
+
language,
|
|
1083
|
+
mtime,
|
|
1084
|
+
size,
|
|
1085
|
+
status: "skipped",
|
|
1086
|
+
error: `File too large (${size} bytes > ${MAX_FILE_SIZE} bytes)`
|
|
1087
|
+
};
|
|
1088
|
+
}
|
|
1089
|
+
if (known && known.mtime === mtime && known.size === size) {
|
|
1090
|
+
return {
|
|
1091
|
+
absPath,
|
|
1092
|
+
relPath,
|
|
1093
|
+
hash: known.hash,
|
|
1094
|
+
content: null,
|
|
1095
|
+
chunks: [],
|
|
1096
|
+
language,
|
|
1097
|
+
mtime,
|
|
1098
|
+
size,
|
|
1099
|
+
status: "unchanged"
|
|
1100
|
+
};
|
|
1101
|
+
}
|
|
1102
|
+
const { content, originalEncoding } = await readFileWithEncoding(absPath);
|
|
1103
|
+
if (content.includes("\0")) {
|
|
1104
|
+
return {
|
|
1105
|
+
absPath,
|
|
1106
|
+
relPath,
|
|
1107
|
+
hash: "",
|
|
1108
|
+
content: null,
|
|
1109
|
+
chunks: [],
|
|
1110
|
+
language,
|
|
1111
|
+
mtime,
|
|
1112
|
+
size,
|
|
1113
|
+
status: "skipped",
|
|
1114
|
+
error: `Binary file detected (original encoding: ${originalEncoding})`
|
|
1115
|
+
};
|
|
1116
|
+
}
|
|
1117
|
+
const hash = sha256(content);
|
|
1118
|
+
if (known && known.hash === hash) {
|
|
1119
|
+
return {
|
|
1120
|
+
absPath,
|
|
1121
|
+
relPath,
|
|
1122
|
+
hash,
|
|
1123
|
+
content,
|
|
1124
|
+
chunks: [],
|
|
1125
|
+
language,
|
|
1126
|
+
mtime,
|
|
1127
|
+
size,
|
|
1128
|
+
status: "unchanged"
|
|
1129
|
+
};
|
|
1130
|
+
}
|
|
1131
|
+
if (language === "json" && shouldSkipJson(relPath)) {
|
|
1132
|
+
return {
|
|
1133
|
+
absPath,
|
|
1134
|
+
relPath,
|
|
1135
|
+
hash,
|
|
1136
|
+
content: null,
|
|
1137
|
+
chunks: [],
|
|
1138
|
+
language,
|
|
1139
|
+
mtime,
|
|
1140
|
+
size,
|
|
1141
|
+
status: "skipped",
|
|
1142
|
+
error: "Lock file or node_modules JSON"
|
|
1143
|
+
};
|
|
1144
|
+
}
|
|
1145
|
+
let chunks = [];
|
|
1146
|
+
if (isLanguageSupported(language)) {
|
|
1147
|
+
try {
|
|
1148
|
+
const parser = await getParser(language);
|
|
1149
|
+
if (parser) {
|
|
1150
|
+
const tree = parser.parse(content);
|
|
1151
|
+
chunks = splitter.split(tree, content, relPath, language);
|
|
1152
|
+
}
|
|
1153
|
+
} catch (err) {
|
|
1154
|
+
const error = err;
|
|
1155
|
+
console.warn(`[Chunking] AST failed for ${relPath}: ${error.message}`);
|
|
1156
|
+
}
|
|
1157
|
+
}
|
|
1158
|
+
if (chunks.length === 0 && FALLBACK_LANGS.has(language)) {
|
|
1159
|
+
chunks = splitter.splitPlainText(content, relPath, language);
|
|
1160
|
+
}
|
|
1161
|
+
return {
|
|
1162
|
+
absPath,
|
|
1163
|
+
relPath,
|
|
1164
|
+
hash,
|
|
1165
|
+
content,
|
|
1166
|
+
chunks,
|
|
1167
|
+
language,
|
|
1168
|
+
mtime,
|
|
1169
|
+
size,
|
|
1170
|
+
status: known ? "modified" : "added"
|
|
1171
|
+
};
|
|
1172
|
+
} catch (err) {
|
|
1173
|
+
const error = err;
|
|
1174
|
+
return {
|
|
1175
|
+
absPath,
|
|
1176
|
+
relPath,
|
|
1177
|
+
hash: "",
|
|
1178
|
+
content: null,
|
|
1179
|
+
chunks: [],
|
|
1180
|
+
language,
|
|
1181
|
+
mtime: 0,
|
|
1182
|
+
size: 0,
|
|
1183
|
+
status: "error",
|
|
1184
|
+
error: error.message
|
|
1185
|
+
};
|
|
1186
|
+
}
|
|
1187
|
+
}
|
|
1188
|
+
async function processFiles(rootPath, filePaths, knownFiles) {
|
|
1189
|
+
const concurrency = getAdaptiveConcurrency();
|
|
1190
|
+
const limit = pLimit(concurrency);
|
|
1191
|
+
const tasks = filePaths.map((filePath) => {
|
|
1192
|
+
const relPath = path2.relative(rootPath, filePath).replace(/\\/g, "/");
|
|
1193
|
+
const known = knownFiles.get(relPath);
|
|
1194
|
+
return limit(() => processFile(filePath, relPath, known));
|
|
1195
|
+
});
|
|
1196
|
+
return Promise.all(tasks);
|
|
1197
|
+
}
|
|
1198
|
+
|
|
1199
|
+
// src/scanner/index.ts
|
|
1200
|
+
async function scan(rootPath, options = {}) {
|
|
1201
|
+
const projectId = generateProjectId(rootPath);
|
|
1202
|
+
const db = initDb(projectId);
|
|
1203
|
+
try {
|
|
1204
|
+
await initFilter(rootPath);
|
|
1205
|
+
let forceReindex = options.force ?? false;
|
|
1206
|
+
if (options.vectorIndex !== false) {
|
|
1207
|
+
const currentDimensions = getEmbeddingConfig().dimensions;
|
|
1208
|
+
const storedDimensions = getStoredEmbeddingDimensions(db);
|
|
1209
|
+
if (storedDimensions !== null && storedDimensions !== currentDimensions) {
|
|
1210
|
+
logger.warn(
|
|
1211
|
+
{ stored: storedDimensions, current: currentDimensions },
|
|
1212
|
+
"Embedding \u7EF4\u5EA6\u53D8\u5316\uFF0C\u5F3A\u5236\u91CD\u65B0\u7D22\u5F15"
|
|
1213
|
+
);
|
|
1214
|
+
forceReindex = true;
|
|
1215
|
+
}
|
|
1216
|
+
setStoredEmbeddingDimensions(db, currentDimensions);
|
|
1217
|
+
}
|
|
1218
|
+
if (forceReindex) {
|
|
1219
|
+
clear(db);
|
|
1220
|
+
if (options.vectorIndex !== false) {
|
|
1221
|
+
const embeddingConfig = getEmbeddingConfig();
|
|
1222
|
+
const indexer = await getIndexer(projectId, embeddingConfig.dimensions);
|
|
1223
|
+
await indexer.clear();
|
|
1224
|
+
}
|
|
1225
|
+
}
|
|
1226
|
+
const knownFiles = getAllFileMeta(db);
|
|
1227
|
+
const filePaths = await crawl(rootPath);
|
|
1228
|
+
const scannedPaths = new Set(
|
|
1229
|
+
filePaths.map((p) => path3.relative(rootPath, p).replace(/\\/g, "/"))
|
|
1230
|
+
);
|
|
1231
|
+
const results = [];
|
|
1232
|
+
const batchSize = 100;
|
|
1233
|
+
for (let i = 0; i < filePaths.length; i += batchSize) {
|
|
1234
|
+
const batch = filePaths.slice(i, i + batchSize);
|
|
1235
|
+
const batchResults = await processFiles(rootPath, batch, knownFiles);
|
|
1236
|
+
results.push(...batchResults);
|
|
1237
|
+
}
|
|
1238
|
+
const toAdd = [];
|
|
1239
|
+
const toUpdateMtime = [];
|
|
1240
|
+
const deletedPaths = [];
|
|
1241
|
+
for (const result of results) {
|
|
1242
|
+
switch (result.status) {
|
|
1243
|
+
case "added":
|
|
1244
|
+
case "modified":
|
|
1245
|
+
toAdd.push({
|
|
1246
|
+
path: result.relPath,
|
|
1247
|
+
hash: result.hash,
|
|
1248
|
+
mtime: result.mtime,
|
|
1249
|
+
size: result.size,
|
|
1250
|
+
content: result.content,
|
|
1251
|
+
language: result.language,
|
|
1252
|
+
vectorIndexHash: null
|
|
1253
|
+
// 新文件/修改的文件需要重新索引
|
|
1254
|
+
});
|
|
1255
|
+
break;
|
|
1256
|
+
case "unchanged":
|
|
1257
|
+
toUpdateMtime.push({ path: result.relPath, mtime: result.mtime });
|
|
1258
|
+
break;
|
|
1259
|
+
case "skipped":
|
|
1260
|
+
logger.debug({ path: result.relPath, reason: result.error }, "\u8DF3\u8FC7\u6587\u4EF6");
|
|
1261
|
+
break;
|
|
1262
|
+
case "error":
|
|
1263
|
+
logger.error({ path: result.relPath, error: result.error }, "\u5904\u7406\u6587\u4EF6\u9519\u8BEF");
|
|
1264
|
+
break;
|
|
1265
|
+
}
|
|
1266
|
+
}
|
|
1267
|
+
const allIndexedPaths = getAllPaths(db);
|
|
1268
|
+
for (const indexedPath of allIndexedPaths) {
|
|
1269
|
+
const normalizedIndexedPath = indexedPath.replace(/\\/g, "/");
|
|
1270
|
+
if (!scannedPaths.has(normalizedIndexedPath)) {
|
|
1271
|
+
deletedPaths.push(indexedPath);
|
|
1272
|
+
}
|
|
1273
|
+
}
|
|
1274
|
+
batchUpsert(db, toAdd);
|
|
1275
|
+
batchUpdateMtime(db, toUpdateMtime);
|
|
1276
|
+
batchDelete(db, deletedPaths);
|
|
1277
|
+
const stats = {
|
|
1278
|
+
totalFiles: filePaths.length,
|
|
1279
|
+
added: results.filter((r) => r.status === "added").length,
|
|
1280
|
+
modified: results.filter((r) => r.status === "modified").length,
|
|
1281
|
+
unchanged: results.filter((r) => r.status === "unchanged").length,
|
|
1282
|
+
deleted: deletedPaths.length,
|
|
1283
|
+
skipped: results.filter((r) => r.status === "skipped").length,
|
|
1284
|
+
errors: results.filter((r) => r.status === "error").length
|
|
1285
|
+
};
|
|
1286
|
+
if (options.vectorIndex !== false) {
|
|
1287
|
+
options.onProgress?.(45, 100, "\u6B63\u5728\u51C6\u5907\u5411\u91CF\u7D22\u5F15...");
|
|
1288
|
+
const embeddingConfig = getEmbeddingConfig();
|
|
1289
|
+
const indexer = await getIndexer(projectId, embeddingConfig.dimensions);
|
|
1290
|
+
const needsVectorIndex = results.filter(
|
|
1291
|
+
(r) => r.status === "added" || r.status === "modified"
|
|
1292
|
+
);
|
|
1293
|
+
const healingPathSet = new Set(getFilesNeedingVectorIndex(db));
|
|
1294
|
+
const healingFilePaths = results.filter((r) => r.status === "unchanged" && healingPathSet.has(r.relPath)).map((r) => r.absPath);
|
|
1295
|
+
let healingFiles = [];
|
|
1296
|
+
if (healingFilePaths.length > 0) {
|
|
1297
|
+
logger.info({ count: healingFilePaths.length }, "\u81EA\u6108\uFF1A\u53D1\u73B0\u9700\u8981\u8865\u7D22\u5F15\u7684\u6587\u4EF6");
|
|
1298
|
+
const processedHealingFiles = await processFiles(rootPath, healingFilePaths, /* @__PURE__ */ new Map());
|
|
1299
|
+
healingFiles = processedHealingFiles.filter((r) => r.status === "added" || r.status === "modified").map((r) => ({ ...r, status: "modified" }));
|
|
1300
|
+
}
|
|
1301
|
+
const deletedResults = deletedPaths.map((path4) => ({
|
|
1302
|
+
absPath: "",
|
|
1303
|
+
relPath: path4,
|
|
1304
|
+
hash: "",
|
|
1305
|
+
content: null,
|
|
1306
|
+
chunks: [],
|
|
1307
|
+
language: "",
|
|
1308
|
+
mtime: 0,
|
|
1309
|
+
size: 0,
|
|
1310
|
+
status: "deleted"
|
|
1311
|
+
}));
|
|
1312
|
+
const allToIndex = [...needsVectorIndex, ...healingFiles, ...deletedResults];
|
|
1313
|
+
if (allToIndex.length > 0) {
|
|
1314
|
+
options.onProgress?.(45, 100, `\u6B63\u5728\u751F\u6210\u5411\u91CF\u5D4C\u5165... (${allToIndex.length} \u4E2A\u6587\u4EF6)`);
|
|
1315
|
+
const indexStats = await indexer.indexFiles(db, allToIndex, (completed, total) => {
|
|
1316
|
+
const progress = 45 + Math.floor(completed / total * 54);
|
|
1317
|
+
options.onProgress?.(progress, 100, `\u6B63\u5728\u751F\u6210\u5411\u91CF\u5D4C\u5165... (${completed}/${total} \u6279\u6B21)`);
|
|
1318
|
+
});
|
|
1319
|
+
stats.vectorIndex = {
|
|
1320
|
+
indexed: indexStats.indexed,
|
|
1321
|
+
deleted: indexStats.deleted,
|
|
1322
|
+
errors: indexStats.errors
|
|
1323
|
+
};
|
|
1324
|
+
}
|
|
1325
|
+
}
|
|
1326
|
+
options.onProgress?.(100, 100, "\u7D22\u5F15\u5B8C\u6210");
|
|
1327
|
+
return stats;
|
|
1328
|
+
} finally {
|
|
1329
|
+
closeDb(db);
|
|
1330
|
+
closeAllIndexers();
|
|
1331
|
+
await closeAllVectorStores();
|
|
1332
|
+
}
|
|
1333
|
+
}
|
|
1334
|
+
|
|
1335
|
+
export {
|
|
1336
|
+
scan
|
|
1337
|
+
};
|