@chiway/contextweaver 1.4.0 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +482 -196
- package/README.zh-CN.md +669 -0
- package/dist/{SearchService-OS7CYHNJ.js → SearchService-WVD6THR3.js} +116 -74
- package/dist/chunk-2EVCLNYN.js +223 -0
- package/dist/{chunk-ZOMGPIU6.js → chunk-3BNHQV5W.js} +1 -5
- package/dist/chunk-BFCIZ52F.js +102 -0
- package/dist/chunk-H4MGLXXF.js +115 -0
- package/dist/{lock-FL54LIQL.js → chunk-HHYPQA3X.js} +1 -1
- package/dist/chunk-IZ6IUHNN.js +77 -0
- package/dist/chunk-LB42CZEB.js +18 -0
- package/dist/chunk-MN6BQJDB.js +85 -0
- package/dist/{chunk-EMSMLPMK.js → chunk-ORYIVY7D.js} +10 -117
- package/dist/{chunk-RGJSXUFS.js → chunk-PPLFJGO3.js} +60 -0
- package/dist/chunk-TPM6YP43.js +38 -0
- package/dist/chunk-XFIM2T6S.js +57 -0
- package/dist/{chunk-AB24E3Z7.js → chunk-XMZZZKG7.js} +23 -79
- package/dist/chunk-XTWNT7KP.js +156 -0
- package/dist/chunk-YMQWNIQI.js +143 -0
- package/dist/{chunk-X7PAYQMT.js → chunk-YSQI5IRI.js} +125 -5
- package/dist/{codebaseRetrieval-3Z4CRA7X.js → codebaseRetrieval-4BFIM7PU.js} +5 -2
- package/dist/{db-PMVM7557.js → db-GBCLP4GG.js} +15 -1
- package/dist/findReferences-EBYR3VNL.js +16 -0
- package/dist/getSymbolDefinition-ZQK65FPN.js +17 -0
- package/dist/index.js +244 -41
- package/dist/listFiles-W7C5UYOP.js +14 -0
- package/dist/loadConfig-XTVT2OWW.js +9 -0
- package/dist/lock-HNKQ6X5B.js +8 -0
- package/dist/scanner-OVMAMQSQ.js +13 -0
- package/dist/server-ZIJIRVWH.js +347 -0
- package/dist/stats-AGKUCJQI.js +12 -0
- package/dist/{vectorStore-HPQZOVWF.js → vectorStore-4ODCERRO.js} +1 -1
- package/package.json +15 -23
- package/dist/scanner-2XGJWYHR.js +0 -11
- package/dist/server-XK6EINRV.js +0 -146
|
@@ -1,21 +1,32 @@
|
|
|
1
1
|
import {
|
|
2
|
-
|
|
2
|
+
createSearchConfigFingerprint
|
|
3
|
+
} from "./chunk-IZ6IUHNN.js";
|
|
4
|
+
import {
|
|
3
5
|
bootstrap,
|
|
4
6
|
getGraphExpander,
|
|
5
7
|
getIndexer,
|
|
6
8
|
scoreChunkTokenOverlap
|
|
7
|
-
} from "./chunk-
|
|
9
|
+
} from "./chunk-XMZZZKG7.js";
|
|
10
|
+
import "./chunk-LB42CZEB.js";
|
|
11
|
+
import {
|
|
12
|
+
ChunkContentLoader
|
|
13
|
+
} from "./chunk-XFIM2T6S.js";
|
|
8
14
|
import {
|
|
9
15
|
getVectorStore
|
|
10
|
-
} from "./chunk-
|
|
16
|
+
} from "./chunk-3BNHQV5W.js";
|
|
11
17
|
import {
|
|
18
|
+
DEFAULT_CONFIG
|
|
19
|
+
} from "./chunk-BFCIZ52F.js";
|
|
20
|
+
import {
|
|
21
|
+
getIndexVersion,
|
|
22
|
+
incrementStat,
|
|
12
23
|
initDb,
|
|
13
24
|
isChunksFtsInitialized,
|
|
14
25
|
isFtsInitialized,
|
|
15
26
|
searchChunksFts,
|
|
16
27
|
searchFilesFts,
|
|
17
28
|
segmentQuery
|
|
18
|
-
} from "./chunk-
|
|
29
|
+
} from "./chunk-PPLFJGO3.js";
|
|
19
30
|
import {
|
|
20
31
|
isDebugEnabled,
|
|
21
32
|
logger
|
|
@@ -161,10 +172,8 @@ function sleep(ms) {
|
|
|
161
172
|
|
|
162
173
|
// src/search/ContextPacker.ts
|
|
163
174
|
var ContextPacker = class {
|
|
164
|
-
projectId;
|
|
165
175
|
config;
|
|
166
|
-
constructor(
|
|
167
|
-
this.projectId = projectId;
|
|
176
|
+
constructor(_projectId, config) {
|
|
168
177
|
this.config = config;
|
|
169
178
|
}
|
|
170
179
|
/**
|
|
@@ -270,73 +279,63 @@ var ContextPacker = class {
|
|
|
270
279
|
}
|
|
271
280
|
};
|
|
272
281
|
|
|
273
|
-
// src/search/
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
maxBreadcrumbChars: 250,
|
|
301
|
-
// Max chars for breadcrumb context in rerank input. Range: 100–500.
|
|
302
|
-
headRatio: 0.67,
|
|
303
|
-
// Ratio of head vs tail when truncating chunks. Range: 0.5–0.8.
|
|
304
|
-
// ── Expansion (上下文扩展: E1 邻居 / E2 面包屑 / E3 跨文件导入) ──
|
|
305
|
-
neighborHops: 2,
|
|
306
|
-
// E1: How many sibling chunks to expand in each direction. Range: 1–3.
|
|
307
|
-
breadcrumbExpandLimit: 3,
|
|
308
|
-
// E2: Max ancestor breadcrumbs (class/function scope). Range: 1–5.
|
|
309
|
-
importFilesPerSeed: 3,
|
|
310
|
-
// E3: Cross-file import files to resolve per seed chunk. Range: 0–5. Set to 3 to enable import-graph expansion for better cross-file context.
|
|
311
|
-
chunksPerImportFile: 3,
|
|
312
|
-
// E3: Chunks to pull from each resolved import file. Range: 1–5. Set to 3 for balanced coverage of imported symbols.
|
|
313
|
-
decayNeighbor: 0.8,
|
|
314
|
-
// Score decay per E1 hop. Range: 0.5–0.9. Higher = neighbors stay relevant longer.
|
|
315
|
-
decayBreadcrumb: 0.7,
|
|
316
|
-
// Score decay per E2 level. Range: 0.4–0.8.
|
|
317
|
-
decayImport: 0.6,
|
|
318
|
-
// Score decay for E3 import chunks. Range: 0.3–0.7. Lower than E1/E2 since cross-file is less certain.
|
|
319
|
-
decayDepth: 0.7,
|
|
320
|
-
// General depth decay multiplier. Range: 0.5–0.9.
|
|
321
|
-
// ── ContextPacker (上下文打包) ──
|
|
322
|
-
maxSegmentsPerFile: 3,
|
|
323
|
-
// Max non-contiguous segments per file in output. Range: 1–5. Prevents excessive fragmentation.
|
|
324
|
-
maxTotalChars: 48e3,
|
|
325
|
-
// Token budget expressed as chars (~12k tokens). Range: 20000–80000.
|
|
326
|
-
// ── Smart TopK (动态结果数量) ──
|
|
327
|
-
enableSmartTopK: true,
|
|
328
|
-
// Dynamically adjust result count based on score distribution.
|
|
329
|
-
smartTopScoreRatio: 0.5,
|
|
330
|
-
// Min score as ratio of top-1 score to remain included. Range: 0.3–0.7.
|
|
331
|
-
smartTopScoreDeltaAbs: 0.25,
|
|
332
|
-
// Max absolute score drop from top-1 before cutting off. Range: 0.1–0.4.
|
|
333
|
-
smartMinScore: 0.25,
|
|
334
|
-
// Hard floor: chunks below this score are always excluded. Range: 0.1–0.4.
|
|
335
|
-
smartMinK: 2,
|
|
336
|
-
// Minimum results to return regardless of scores. Range: 1–3.
|
|
337
|
-
smartMaxK: 8
|
|
338
|
-
// Maximum results when smart topK is active. Range: 5–15.
|
|
282
|
+
// src/search/QueryCache.ts
|
|
283
|
+
import crypto from "crypto";
|
|
284
|
+
var MAX_CACHE_ENTRIES = 50;
|
|
285
|
+
var LruCache = class {
|
|
286
|
+
constructor(maxSize) {
|
|
287
|
+
this.maxSize = maxSize;
|
|
288
|
+
}
|
|
289
|
+
entries = /* @__PURE__ */ new Map();
|
|
290
|
+
get(key) {
|
|
291
|
+
const value = this.entries.get(key);
|
|
292
|
+
if (value === void 0) return void 0;
|
|
293
|
+
this.entries.delete(key);
|
|
294
|
+
this.entries.set(key, value);
|
|
295
|
+
return value;
|
|
296
|
+
}
|
|
297
|
+
set(key, value) {
|
|
298
|
+
if (this.entries.has(key)) {
|
|
299
|
+
this.entries.delete(key);
|
|
300
|
+
}
|
|
301
|
+
this.entries.set(key, value);
|
|
302
|
+
if (this.entries.size > this.maxSize) {
|
|
303
|
+
const oldestKey = this.entries.keys().next().value;
|
|
304
|
+
if (oldestKey !== void 0) {
|
|
305
|
+
this.entries.delete(oldestKey);
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
}
|
|
339
309
|
};
|
|
310
|
+
var projectCaches = /* @__PURE__ */ new Map();
|
|
311
|
+
function normalizeQuery(query) {
|
|
312
|
+
return query.trim().replace(/\s+/g, " ").toLowerCase();
|
|
313
|
+
}
|
|
314
|
+
function getProjectCache(projectId) {
|
|
315
|
+
let cache = projectCaches.get(projectId);
|
|
316
|
+
if (!cache) {
|
|
317
|
+
cache = new LruCache(MAX_CACHE_ENTRIES);
|
|
318
|
+
projectCaches.set(projectId, cache);
|
|
319
|
+
}
|
|
320
|
+
return cache;
|
|
321
|
+
}
|
|
322
|
+
function buildQueryCacheKey(input) {
|
|
323
|
+
const normalizedQuery = normalizeQuery(input.query);
|
|
324
|
+
return crypto.createHash("sha256").update(
|
|
325
|
+
JSON.stringify({
|
|
326
|
+
query: normalizedQuery,
|
|
327
|
+
projectId: input.projectId,
|
|
328
|
+
indexVersion: input.indexVersion,
|
|
329
|
+
configFingerprint: input.configFingerprint
|
|
330
|
+
})
|
|
331
|
+
).digest("hex");
|
|
332
|
+
}
|
|
333
|
+
function getCachedContextPack(projectId, key) {
|
|
334
|
+
return getProjectCache(projectId).get(key);
|
|
335
|
+
}
|
|
336
|
+
function setCachedContextPack(projectId, key, pack) {
|
|
337
|
+
getProjectCache(projectId).set(key, pack);
|
|
338
|
+
}
|
|
340
339
|
|
|
341
340
|
// src/search/SearchService.ts
|
|
342
341
|
var SearchService = class {
|
|
@@ -345,9 +344,11 @@ var SearchService = class {
|
|
|
345
344
|
vectorStore = null;
|
|
346
345
|
db = null;
|
|
347
346
|
config;
|
|
347
|
+
configFingerprint;
|
|
348
348
|
constructor(projectId, _projectPath, config) {
|
|
349
349
|
this.projectId = projectId;
|
|
350
350
|
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
351
|
+
this.configFingerprint = createSearchConfigFingerprint(this.config);
|
|
351
352
|
}
|
|
352
353
|
async init() {
|
|
353
354
|
const embeddingConfig = getEmbeddingConfig();
|
|
@@ -366,6 +367,18 @@ var SearchService = class {
|
|
|
366
367
|
* 构建上下文包(用于问答/生成)
|
|
367
368
|
*/
|
|
368
369
|
async buildContextPack(query) {
|
|
370
|
+
const db = this.db;
|
|
371
|
+
const cacheKey = buildQueryCacheKey({
|
|
372
|
+
query,
|
|
373
|
+
projectId: this.projectId,
|
|
374
|
+
indexVersion: getIndexVersion(db),
|
|
375
|
+
configFingerprint: this.configFingerprint
|
|
376
|
+
});
|
|
377
|
+
const cached = getCachedContextPack(this.projectId, cacheKey);
|
|
378
|
+
if (cached) {
|
|
379
|
+
this.recordSearchStats(db, { cacheHit: true });
|
|
380
|
+
return cached;
|
|
381
|
+
}
|
|
369
382
|
const timingMs = {};
|
|
370
383
|
let t0 = Date.now();
|
|
371
384
|
const candidates = await this.hybridRetrieve(query);
|
|
@@ -385,7 +398,7 @@ var SearchService = class {
|
|
|
385
398
|
const packer = new ContextPacker(this.projectId, this.config);
|
|
386
399
|
const files = await packer.pack([...seeds, ...expanded], this.db);
|
|
387
400
|
timingMs.pack = Date.now() - t0;
|
|
388
|
-
|
|
401
|
+
const pack = {
|
|
389
402
|
query,
|
|
390
403
|
seeds,
|
|
391
404
|
expanded,
|
|
@@ -396,6 +409,35 @@ var SearchService = class {
|
|
|
396
409
|
timingMs
|
|
397
410
|
}
|
|
398
411
|
};
|
|
412
|
+
setCachedContextPack(this.projectId, cacheKey, pack);
|
|
413
|
+
this.recordSearchStats(db, { cacheHit: false, timingMs, seedCount: seeds.length });
|
|
414
|
+
return pack;
|
|
415
|
+
}
|
|
416
|
+
/**
|
|
417
|
+
* 记录搜索统计埋点(静默吞错,不影响搜索主流程)
|
|
418
|
+
*
|
|
419
|
+
* 多个计数器用事务包裹,保证一次查询要么全写要么全不写。
|
|
420
|
+
*/
|
|
421
|
+
recordSearchStats(db, args) {
|
|
422
|
+
try {
|
|
423
|
+
const tx = db.transaction(() => {
|
|
424
|
+
incrementStat(db, "stats.search.total_queries");
|
|
425
|
+
if (args.cacheHit) {
|
|
426
|
+
incrementStat(db, "stats.search.cache_hits");
|
|
427
|
+
return;
|
|
428
|
+
}
|
|
429
|
+
incrementStat(db, "stats.search.compute_runs");
|
|
430
|
+
const t = args.timingMs ?? {};
|
|
431
|
+
incrementStat(db, "stats.search.sum_retrieve_ms", Math.round(t.retrieve ?? 0));
|
|
432
|
+
incrementStat(db, "stats.search.sum_rerank_ms", Math.round(t.rerank ?? 0));
|
|
433
|
+
incrementStat(db, "stats.search.sum_expand_ms", Math.round(t.expand ?? 0));
|
|
434
|
+
incrementStat(db, "stats.search.sum_pack_ms", Math.round(t.pack ?? 0));
|
|
435
|
+
incrementStat(db, "stats.search.sum_seed_count", args.seedCount ?? 0);
|
|
436
|
+
});
|
|
437
|
+
tx();
|
|
438
|
+
} catch (err) {
|
|
439
|
+
logger.debug({ error: err.message }, "\u641C\u7D22\u7EDF\u8BA1\u57CB\u70B9\u5931\u8D25");
|
|
440
|
+
}
|
|
399
441
|
}
|
|
400
442
|
// 召回方法
|
|
401
443
|
/**
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
import {
|
|
2
|
+
commonPrefixLength
|
|
3
|
+
} from "./chunk-LB42CZEB.js";
|
|
4
|
+
import {
|
|
5
|
+
ChunkContentLoader
|
|
6
|
+
} from "./chunk-XFIM2T6S.js";
|
|
7
|
+
import {
|
|
8
|
+
getVectorStore
|
|
9
|
+
} from "./chunk-3BNHQV5W.js";
|
|
10
|
+
import {
|
|
11
|
+
ensureIndexed,
|
|
12
|
+
formatTextResponse
|
|
13
|
+
} from "./chunk-H4MGLXXF.js";
|
|
14
|
+
import {
|
|
15
|
+
generateProjectId,
|
|
16
|
+
initDb,
|
|
17
|
+
searchChunksFts
|
|
18
|
+
} from "./chunk-PPLFJGO3.js";
|
|
19
|
+
import {
|
|
20
|
+
logger
|
|
21
|
+
} from "./chunk-JVKVSTQ3.js";
|
|
22
|
+
|
|
23
|
+
// src/mcp/tools/getSymbolDefinition.ts
|
|
24
|
+
import { z } from "zod";
|
|
25
|
+
var getSymbolDefinitionSchema = z.object({
|
|
26
|
+
repo_path: z.string().describe(
|
|
27
|
+
"The absolute file system path to the repository root. (e.g., '/Users/dev/my-project')"
|
|
28
|
+
),
|
|
29
|
+
symbol: z.string().min(1).describe("The exact symbol name to resolve."),
|
|
30
|
+
hint_path: z.string().optional().describe("Optional preferred path used to disambiguate same-name definitions."),
|
|
31
|
+
max_results: z.number().int().positive().max(20).optional().describe("Maximum number of definitions to return. Defaults to 3.")
|
|
32
|
+
});
|
|
33
|
+
var LANGUAGE_DEFINITION_PATTERNS = {
|
|
34
|
+
typescript: [
|
|
35
|
+
"function\\s+{symbol}\\b",
|
|
36
|
+
"class\\s+{symbol}\\b",
|
|
37
|
+
"(?:const|let|var)\\s+{symbol}\\b",
|
|
38
|
+
"interface\\s+{symbol}\\b",
|
|
39
|
+
"type\\s+{symbol}\\b",
|
|
40
|
+
"enum\\s+{symbol}\\b"
|
|
41
|
+
],
|
|
42
|
+
javascript: [
|
|
43
|
+
"function\\s+{symbol}\\b",
|
|
44
|
+
"class\\s+{symbol}\\b",
|
|
45
|
+
"(?:const|let|var)\\s+{symbol}\\b"
|
|
46
|
+
],
|
|
47
|
+
python: ["def\\s+{symbol}\\b", "class\\s+{symbol}\\b"],
|
|
48
|
+
go: ["func\\s+{symbol}\\b", "type\\s+{symbol}\\b", "const\\s+{symbol}\\b", "var\\s+{symbol}\\b"],
|
|
49
|
+
rust: [
|
|
50
|
+
"fn\\s+{symbol}\\b",
|
|
51
|
+
"struct\\s+{symbol}\\b",
|
|
52
|
+
"enum\\s+{symbol}\\b",
|
|
53
|
+
"const\\s+{symbol}\\b"
|
|
54
|
+
],
|
|
55
|
+
java: [
|
|
56
|
+
"class\\s+{symbol}\\b",
|
|
57
|
+
"interface\\s+{symbol}\\b",
|
|
58
|
+
"enum\\s+{symbol}\\b",
|
|
59
|
+
"\\b{symbol}\\s*\\("
|
|
60
|
+
],
|
|
61
|
+
csharp: [
|
|
62
|
+
"class\\s+{symbol}\\b",
|
|
63
|
+
"interface\\s+{symbol}\\b",
|
|
64
|
+
"enum\\s+{symbol}\\b",
|
|
65
|
+
"\\b{symbol}\\s*\\("
|
|
66
|
+
],
|
|
67
|
+
cpp: ["class\\s+{symbol}\\b", "struct\\s+{symbol}\\b", "\\b{symbol}\\s*\\("],
|
|
68
|
+
c: ["\\b{symbol}\\s*\\("]
|
|
69
|
+
};
|
|
70
|
+
function escapeRegex(text) {
|
|
71
|
+
return text.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
72
|
+
}
|
|
73
|
+
function breadcrumbTail(breadcrumb) {
|
|
74
|
+
return breadcrumb.split(">").pop()?.trim() ?? "";
|
|
75
|
+
}
|
|
76
|
+
function countLinesBefore(content, index) {
|
|
77
|
+
let line = 1;
|
|
78
|
+
for (let i = 0; i < index && i < content.length; i++) {
|
|
79
|
+
if (content[i] === "\n") {
|
|
80
|
+
line += 1;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
return line;
|
|
84
|
+
}
|
|
85
|
+
function computeEndLine(startLine, code) {
|
|
86
|
+
const normalized = code.replace(/\n+$/u, "");
|
|
87
|
+
if (!normalized) {
|
|
88
|
+
return startLine;
|
|
89
|
+
}
|
|
90
|
+
return startLine + normalized.split("\n").length - 1;
|
|
91
|
+
}
|
|
92
|
+
function detectLanguage(filePath) {
|
|
93
|
+
const ext = filePath.split(".").pop()?.toLowerCase() || "";
|
|
94
|
+
const langMap = {
|
|
95
|
+
ts: "typescript",
|
|
96
|
+
tsx: "typescript",
|
|
97
|
+
js: "javascript",
|
|
98
|
+
jsx: "javascript",
|
|
99
|
+
py: "python",
|
|
100
|
+
rs: "rust",
|
|
101
|
+
go: "go",
|
|
102
|
+
java: "java",
|
|
103
|
+
c: "c",
|
|
104
|
+
cpp: "cpp",
|
|
105
|
+
h: "c",
|
|
106
|
+
hpp: "cpp",
|
|
107
|
+
cs: "csharp",
|
|
108
|
+
md: "markdown",
|
|
109
|
+
json: "json"
|
|
110
|
+
};
|
|
111
|
+
return langMap[ext] || ext || "plaintext";
|
|
112
|
+
}
|
|
113
|
+
function formatDefinition(candidate) {
|
|
114
|
+
const header = `## ${candidate.chunk.file_path} (L${candidate.startLine}-L${candidate.endLine})`;
|
|
115
|
+
const breadcrumb = candidate.chunk.breadcrumb ? `> ${candidate.chunk.breadcrumb}` : "";
|
|
116
|
+
const code = `\`\`\`${detectLanguage(candidate.chunk.file_path)}
|
|
117
|
+
${candidate.code}
|
|
118
|
+
\`\`\``;
|
|
119
|
+
return [header, breadcrumb, code].filter(Boolean).join("\n");
|
|
120
|
+
}
|
|
121
|
+
function hasDefinitionPattern(language, code, symbol) {
|
|
122
|
+
const patterns = LANGUAGE_DEFINITION_PATTERNS[language] ?? LANGUAGE_DEFINITION_PATTERNS.typescript;
|
|
123
|
+
return patterns.some((pattern) => {
|
|
124
|
+
const source = pattern.replaceAll("{symbol}", escapeRegex(symbol));
|
|
125
|
+
return new RegExp(source, "u").test(code);
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
function rankCandidates(a, b) {
|
|
129
|
+
if (a.breadcrumbExact !== b.breadcrumbExact) {
|
|
130
|
+
return a.breadcrumbExact ? -1 : 1;
|
|
131
|
+
}
|
|
132
|
+
if (a.prefixScore !== b.prefixScore) {
|
|
133
|
+
return b.prefixScore - a.prefixScore;
|
|
134
|
+
}
|
|
135
|
+
if (a.score !== b.score) {
|
|
136
|
+
return b.score - a.score;
|
|
137
|
+
}
|
|
138
|
+
return a.chunk.file_path.localeCompare(b.chunk.file_path);
|
|
139
|
+
}
|
|
140
|
+
async function handleGetSymbolDefinition(args, onProgress) {
|
|
141
|
+
const { repo_path, symbol, hint_path, max_results = 3 } = args;
|
|
142
|
+
const projectId = generateProjectId(repo_path);
|
|
143
|
+
logger.info({ repo_path, symbol, hint_path, max_results }, "MCP get-symbol-definition \u8C03\u7528\u5F00\u59CB");
|
|
144
|
+
await ensureIndexed(repo_path, projectId, { onProgress });
|
|
145
|
+
const db = initDb(projectId);
|
|
146
|
+
try {
|
|
147
|
+
const hits = searchChunksFts(db, symbol, Math.max(max_results * 5, 20));
|
|
148
|
+
const uniquePaths = Array.from(new Set(hits.map((hit) => hit.filePath)));
|
|
149
|
+
const vectorStore = await getVectorStore(projectId);
|
|
150
|
+
const chunkMap = await vectorStore.getFilesChunks(uniquePaths);
|
|
151
|
+
const chunkByKey = /* @__PURE__ */ new Map();
|
|
152
|
+
for (const [filePath, chunks] of chunkMap) {
|
|
153
|
+
for (const chunk of chunks) {
|
|
154
|
+
chunkByKey.set(`${filePath}#${chunk.chunk_index}`, chunk);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
const slices = Array.from(chunkByKey.values()).map((chunk) => ({
|
|
158
|
+
filePath: chunk.file_path,
|
|
159
|
+
start_index: chunk.start_index,
|
|
160
|
+
end_index: chunk.end_index
|
|
161
|
+
}));
|
|
162
|
+
const loader = new ChunkContentLoader(db);
|
|
163
|
+
const codeMap = loader.loadMany(slices);
|
|
164
|
+
const fileContentStmt = db.prepare("SELECT content FROM files WHERE path = ?");
|
|
165
|
+
const fullFileCache = /* @__PURE__ */ new Map();
|
|
166
|
+
const candidates = /* @__PURE__ */ new Map();
|
|
167
|
+
for (const hit of hits) {
|
|
168
|
+
const chunk = chunkByKey.get(`${hit.filePath}#${hit.chunkIndex}`);
|
|
169
|
+
if (!chunk) {
|
|
170
|
+
continue;
|
|
171
|
+
}
|
|
172
|
+
const codeKey = ChunkContentLoader.key({
|
|
173
|
+
filePath: chunk.file_path,
|
|
174
|
+
start_index: chunk.start_index,
|
|
175
|
+
end_index: chunk.end_index
|
|
176
|
+
});
|
|
177
|
+
const code = codeMap.get(codeKey) ?? "";
|
|
178
|
+
if (!code) {
|
|
179
|
+
continue;
|
|
180
|
+
}
|
|
181
|
+
const breadcrumbExact = breadcrumbTail(chunk.breadcrumb) === symbol;
|
|
182
|
+
const definitionPattern = hasDefinitionPattern(chunk.language, code, symbol);
|
|
183
|
+
if (!breadcrumbExact && !definitionPattern) {
|
|
184
|
+
continue;
|
|
185
|
+
}
|
|
186
|
+
let fullContent = fullFileCache.get(chunk.file_path);
|
|
187
|
+
if (fullContent === void 0) {
|
|
188
|
+
const row = fileContentStmt.get(chunk.file_path);
|
|
189
|
+
fullContent = row?.content ?? "";
|
|
190
|
+
fullFileCache.set(chunk.file_path, fullContent);
|
|
191
|
+
}
|
|
192
|
+
const startLine = countLinesBefore(fullContent, chunk.start_index);
|
|
193
|
+
const candidate = {
|
|
194
|
+
chunk,
|
|
195
|
+
code,
|
|
196
|
+
score: hit.score,
|
|
197
|
+
breadcrumbExact,
|
|
198
|
+
prefixScore: hint_path ? commonPrefixLength(hint_path, chunk.file_path) : 0,
|
|
199
|
+
startLine,
|
|
200
|
+
endLine: computeEndLine(startLine, code)
|
|
201
|
+
};
|
|
202
|
+
const key = `${chunk.file_path}#${chunk.chunk_index}`;
|
|
203
|
+
const existing = candidates.get(key);
|
|
204
|
+
if (!existing || rankCandidates(candidate, existing) < 0) {
|
|
205
|
+
candidates.set(key, candidate);
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
const ranked = Array.from(candidates.values()).sort(rankCandidates).slice(0, max_results);
|
|
209
|
+
const body = ranked.length > 0 ? ranked.map((candidate) => formatDefinition(candidate)).join("\n\n---\n\n") : "No likely symbol definitions found.";
|
|
210
|
+
return formatTextResponse(
|
|
211
|
+
`Found ${ranked.length} symbol definitions for "${symbol}"
|
|
212
|
+
|
|
213
|
+
${body}`
|
|
214
|
+
);
|
|
215
|
+
} finally {
|
|
216
|
+
db.close();
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
export {
|
|
221
|
+
getSymbolDefinitionSchema,
|
|
222
|
+
handleGetSymbolDefinition
|
|
223
|
+
};
|
|
@@ -35,11 +35,9 @@ function sampleCheckDisplayCode(oldRows, getContent, options = {}) {
|
|
|
35
35
|
var VectorStore = class {
|
|
36
36
|
db = null;
|
|
37
37
|
table = null;
|
|
38
|
-
projectId;
|
|
39
38
|
dbPath;
|
|
40
39
|
vectorDim;
|
|
41
40
|
constructor(projectId, vectorDim = 1024, dbPathOverride) {
|
|
42
|
-
this.projectId = projectId;
|
|
43
41
|
this.dbPath = dbPathOverride ?? path.join(BASE_DIR, projectId, "vectors.lance");
|
|
44
42
|
this.vectorDim = vectorDim;
|
|
45
43
|
}
|
|
@@ -185,9 +183,7 @@ var VectorStore = class {
|
|
|
185
183
|
continue;
|
|
186
184
|
}
|
|
187
185
|
if (this.table && batch.length > 0) {
|
|
188
|
-
await this.deleteFilesByHash(
|
|
189
|
-
batch.map((f) => ({ path: f.path, hash: f.hash }))
|
|
190
|
-
);
|
|
186
|
+
await this.deleteFilesByHash(batch.map((f) => ({ path: f.path, hash: f.hash })));
|
|
191
187
|
}
|
|
192
188
|
if (!this.table) {
|
|
193
189
|
await this.ensureTable(batchRecords);
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
// src/search/config.ts
|
|
2
|
+
var SEARCH_CONFIG_BOUNDS = {
|
|
3
|
+
vectorTopK: { min: 40, max: 200, integer: true },
|
|
4
|
+
vectorTopM: { min: 30, max: 100, integer: true },
|
|
5
|
+
ftsTopKFiles: { min: 10, max: 50, integer: true },
|
|
6
|
+
lexChunksPerFile: { min: 1, max: 5, integer: true },
|
|
7
|
+
lexTotalChunks: { min: 20, max: 80, integer: true },
|
|
8
|
+
rrfK0: { min: 10, max: 60, integer: true },
|
|
9
|
+
wVec: { min: 0, max: 1, integer: false },
|
|
10
|
+
wLex: { min: 0, max: 1, integer: false },
|
|
11
|
+
fusedTopM: { min: 30, max: 100, integer: true },
|
|
12
|
+
rerankTopN: { min: 5, max: 20, integer: true },
|
|
13
|
+
maxRerankChars: { min: 500, max: 2e3, integer: true },
|
|
14
|
+
maxBreadcrumbChars: { min: 100, max: 500, integer: true },
|
|
15
|
+
headRatio: { min: 0.5, max: 0.8, integer: false },
|
|
16
|
+
neighborHops: { min: 1, max: 3, integer: true },
|
|
17
|
+
breadcrumbExpandLimit: { min: 1, max: 5, integer: true },
|
|
18
|
+
importFilesPerSeed: { min: 0, max: 5, integer: true },
|
|
19
|
+
chunksPerImportFile: { min: 1, max: 5, integer: true },
|
|
20
|
+
decayNeighbor: { min: 0.5, max: 0.9, integer: false },
|
|
21
|
+
decayBreadcrumb: { min: 0.4, max: 0.8, integer: false },
|
|
22
|
+
decayImport: { min: 0.3, max: 0.7, integer: false },
|
|
23
|
+
decayDepth: { min: 0.5, max: 0.9, integer: false },
|
|
24
|
+
maxSegmentsPerFile: { min: 1, max: 5, integer: true },
|
|
25
|
+
maxTotalChars: { min: 2e4, max: 8e4, integer: true },
|
|
26
|
+
smartTopScoreRatio: { min: 0.3, max: 0.7, integer: false },
|
|
27
|
+
smartTopScoreDeltaAbs: { min: 0.1, max: 0.4, integer: false },
|
|
28
|
+
smartMinScore: { min: 0.1, max: 0.4, integer: false },
|
|
29
|
+
smartMinK: { min: 1, max: 3, integer: true },
|
|
30
|
+
smartMaxK: { min: 5, max: 15, integer: true }
|
|
31
|
+
};
|
|
32
|
+
var DEFAULT_CONFIG = {
|
|
33
|
+
// ── Recall (向量 + 词法召回) ──
|
|
34
|
+
vectorTopK: 80,
|
|
35
|
+
// Vector ANN candidates before dedup. Range: 40–200. Higher = better recall, more compute.
|
|
36
|
+
vectorTopM: 60,
|
|
37
|
+
// Vectors kept after dedup. Range: 30–100.
|
|
38
|
+
ftsTopKFiles: 20,
|
|
39
|
+
// Max files returned by FTS5 full-text search. Range: 10–50.
|
|
40
|
+
lexChunksPerFile: 2,
|
|
41
|
+
// Chunks to pull per FTS-matched file. Range: 1–5. Low keeps diversity across files.
|
|
42
|
+
lexTotalChunks: 40,
|
|
43
|
+
// Hard cap on total lexical chunks. Range: 20–80.
|
|
44
|
+
// ── RRF Fusion (向量 + 词法分数融合) ──
|
|
45
|
+
rrfK0: 20,
|
|
46
|
+
// RRF smoothing constant. Range: 10–60. Lower amplifies top ranks.
|
|
47
|
+
wVec: 0.6,
|
|
48
|
+
// Vector weight in fused score. Range: 0.3–0.8. Semantic relevance emphasis.
|
|
49
|
+
wLex: 0.4,
|
|
50
|
+
// Lexical weight in fused score. wVec + wLex should equal 1.0.
|
|
51
|
+
fusedTopM: 60,
|
|
52
|
+
// Candidates after fusion, fed into reranker. Range: 30–100.
|
|
53
|
+
// ── Rerank (精排) ──
|
|
54
|
+
rerankTopN: 10,
|
|
55
|
+
// Final top-N results after reranking. Range: 5–20.
|
|
56
|
+
maxRerankChars: 1e3,
|
|
57
|
+
// Max chars per chunk sent to reranker. Truncated beyond this. Range: 500–2000.
|
|
58
|
+
maxBreadcrumbChars: 250,
|
|
59
|
+
// Max chars for breadcrumb context in rerank input. Range: 100–500.
|
|
60
|
+
headRatio: 0.67,
|
|
61
|
+
// Ratio of head vs tail when truncating chunks. Range: 0.5–0.8.
|
|
62
|
+
// ── Expansion (上下文扩展: E1 邻居 / E2 面包屑 / E3 跨文件导入) ──
|
|
63
|
+
neighborHops: 2,
|
|
64
|
+
// E1: How many sibling chunks to expand in each direction. Range: 1–3.
|
|
65
|
+
breadcrumbExpandLimit: 3,
|
|
66
|
+
// E2: Max ancestor breadcrumbs (class/function scope). Range: 1–5.
|
|
67
|
+
importFilesPerSeed: 3,
|
|
68
|
+
// E3: Cross-file import files to resolve per seed chunk. Range: 0–5. Set to 3 to enable import-graph expansion for better cross-file context.
|
|
69
|
+
chunksPerImportFile: 3,
|
|
70
|
+
// E3: Chunks to pull from each resolved import file. Range: 1–5. Set to 3 for balanced coverage of imported symbols.
|
|
71
|
+
decayNeighbor: 0.8,
|
|
72
|
+
// Score decay per E1 hop. Range: 0.5–0.9. Higher = neighbors stay relevant longer.
|
|
73
|
+
decayBreadcrumb: 0.7,
|
|
74
|
+
// Score decay per E2 level. Range: 0.4–0.8.
|
|
75
|
+
decayImport: 0.6,
|
|
76
|
+
// Score decay for E3 import chunks. Range: 0.3–0.7. Lower than E1/E2 since cross-file is less certain.
|
|
77
|
+
decayDepth: 0.7,
|
|
78
|
+
// General depth decay multiplier. Range: 0.5–0.9.
|
|
79
|
+
// ── ContextPacker (上下文打包) ──
|
|
80
|
+
maxSegmentsPerFile: 3,
|
|
81
|
+
// Max non-contiguous segments per file in output. Range: 1–5. Prevents excessive fragmentation.
|
|
82
|
+
maxTotalChars: 48e3,
|
|
83
|
+
// Token budget expressed as chars (~12k tokens). Range: 20000–80000.
|
|
84
|
+
// ── Smart TopK (动态结果数量) ──
|
|
85
|
+
enableSmartTopK: true,
|
|
86
|
+
// Dynamically adjust result count based on score distribution.
|
|
87
|
+
smartTopScoreRatio: 0.5,
|
|
88
|
+
// Min score as ratio of top-1 score to remain included. Range: 0.3–0.7.
|
|
89
|
+
smartTopScoreDeltaAbs: 0.25,
|
|
90
|
+
// Max absolute score drop from top-1 before cutting off. Range: 0.1–0.4.
|
|
91
|
+
smartMinScore: 0.25,
|
|
92
|
+
// Hard floor: chunks below this score are always excluded. Range: 0.1–0.4.
|
|
93
|
+
smartMinK: 2,
|
|
94
|
+
// Minimum results to return regardless of scores. Range: 1–3.
|
|
95
|
+
smartMaxK: 8
|
|
96
|
+
// Maximum results when smart topK is active. Range: 5–15.
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
export {
|
|
100
|
+
SEARCH_CONFIG_BOUNDS,
|
|
101
|
+
DEFAULT_CONFIG
|
|
102
|
+
};
|