@chiway/contextweaver 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +138 -28
- package/dist/{SearchService-OS7CYHNJ.js → SearchService-WVD6THR3.js} +116 -74
- package/dist/{chunk-ZOMGPIU6.js → chunk-3BNHQV5W.js} +1 -5
- package/dist/chunk-BFCIZ52F.js +102 -0
- package/dist/{chunk-X7PAYQMT.js → chunk-GDVB6PJ4.js} +21 -3
- package/dist/{lock-FL54LIQL.js → chunk-HHYPQA3X.js} +1 -1
- package/dist/chunk-ISVCQFB4.js +223 -0
- package/dist/chunk-IZ6IUHNN.js +77 -0
- package/dist/chunk-LB42CZEB.js +18 -0
- package/dist/{chunk-RGJSXUFS.js → chunk-PPLFJGO3.js} +60 -0
- package/dist/chunk-R6CNZXZ7.js +143 -0
- package/dist/chunk-TPM6YP43.js +38 -0
- package/dist/{chunk-EMSMLPMK.js → chunk-V3K4YVAR.js} +10 -117
- package/dist/chunk-VWBKZ6QL.js +115 -0
- package/dist/chunk-XFIM2T6S.js +57 -0
- package/dist/{chunk-AB24E3Z7.js → chunk-XMZZZKG7.js} +23 -79
- package/dist/chunk-XTWNT7KP.js +156 -0
- package/dist/chunk-Y6H7C3NA.js +85 -0
- package/dist/{codebaseRetrieval-3Z4CRA7X.js → codebaseRetrieval-DIS5RH2C.js} +5 -2
- package/dist/{db-PMVM7557.js → db-GBCLP4GG.js} +15 -1
- package/dist/findReferences-N7ML7TUP.js +16 -0
- package/dist/getSymbolDefinition-6KMY4H33.js +17 -0
- package/dist/index.js +244 -41
- package/dist/listFiles-4VT2TPJD.js +14 -0
- package/dist/loadConfig-XTVT2OWW.js +9 -0
- package/dist/lock-HNKQ6X5B.js +8 -0
- package/dist/scanner-QDFZJLP7.js +13 -0
- package/dist/server-UAI3U7AB.js +347 -0
- package/dist/stats-AGKUCJQI.js +12 -0
- package/dist/{vectorStore-HPQZOVWF.js → vectorStore-4ODCERRO.js} +1 -1
- package/package.json +9 -23
- package/dist/scanner-2XGJWYHR.js +0 -11
- package/dist/server-XK6EINRV.js +0 -146
|
@@ -1,14 +1,15 @@
|
|
|
1
|
+
import {
|
|
2
|
+
checkEnvOrRespond,
|
|
3
|
+
ensureIndexed
|
|
4
|
+
} from "./chunk-VWBKZ6QL.js";
|
|
1
5
|
import {
|
|
2
6
|
generateProjectId
|
|
3
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-PPLFJGO3.js";
|
|
4
8
|
import {
|
|
5
9
|
logger
|
|
6
10
|
} from "./chunk-JVKVSTQ3.js";
|
|
7
11
|
|
|
8
12
|
// src/mcp/tools/codebaseRetrieval.ts
|
|
9
|
-
import fs from "fs";
|
|
10
|
-
import os from "os";
|
|
11
|
-
import path from "path";
|
|
12
13
|
import { z } from "zod";
|
|
13
14
|
var codebaseRetrievalSchema = z.object({
|
|
14
15
|
repo_path: z.string().describe(
|
|
@@ -21,80 +22,6 @@ var codebaseRetrievalSchema = z.object({
|
|
|
21
22
|
"HARD FILTERS. Precise identifiers to narrow down results. Only use symbols KNOWN to exist to avoid false negatives."
|
|
22
23
|
)
|
|
23
24
|
});
|
|
24
|
-
var BASE_DIR = path.join(os.homedir(), ".contextweaver");
|
|
25
|
-
var INDEX_LOCK_TIMEOUT_MS = 10 * 60 * 1e3;
|
|
26
|
-
async function ensureDefaultEnvFile() {
|
|
27
|
-
const configDir = BASE_DIR;
|
|
28
|
-
const envFile = path.join(configDir, ".env");
|
|
29
|
-
if (fs.existsSync(envFile)) {
|
|
30
|
-
return;
|
|
31
|
-
}
|
|
32
|
-
if (!fs.existsSync(configDir)) {
|
|
33
|
-
fs.mkdirSync(configDir, { recursive: true });
|
|
34
|
-
logger.info({ configDir }, "\u521B\u5EFA\u914D\u7F6E\u76EE\u5F55");
|
|
35
|
-
}
|
|
36
|
-
const defaultEnvContent = `# ContextWeaver \u793A\u4F8B\u73AF\u5883\u53D8\u91CF\u914D\u7F6E\u6587\u4EF6
|
|
37
|
-
|
|
38
|
-
# Embedding API \u914D\u7F6E\uFF08\u5FC5\u9700\uFF09
|
|
39
|
-
EMBEDDINGS_API_KEY=your-api-key-here
|
|
40
|
-
EMBEDDINGS_BASE_URL=https://api.siliconflow.cn/v1/embeddings
|
|
41
|
-
EMBEDDINGS_MODEL=BAAI/bge-m3
|
|
42
|
-
EMBEDDINGS_MAX_CONCURRENCY=10
|
|
43
|
-
EMBEDDINGS_DIMENSIONS=1024
|
|
44
|
-
|
|
45
|
-
# Reranker \u914D\u7F6E\uFF08\u5FC5\u9700\uFF09
|
|
46
|
-
RERANK_API_KEY=your-api-key-here
|
|
47
|
-
RERANK_BASE_URL=https://api.siliconflow.cn/v1/rerank
|
|
48
|
-
RERANK_MODEL=BAAI/bge-reranker-v2-m3
|
|
49
|
-
RERANK_TOP_N=20
|
|
50
|
-
|
|
51
|
-
# \u7D22\u5F15\u5FFD\u7565\u6A21\u5F0F\uFF08\u53EF\u9009\uFF0C\u9017\u53F7\u5206\u9694\uFF0C\u9ED8\u8BA4\u5DF2\u5305\u542B\u5E38\u89C1\u5FFD\u7565\u9879\uFF09
|
|
52
|
-
# IGNORE_PATTERNS=.venv,node_modules
|
|
53
|
-
`;
|
|
54
|
-
fs.writeFileSync(envFile, defaultEnvContent);
|
|
55
|
-
logger.info({ envFile }, "\u5DF2\u521B\u5EFA\u9ED8\u8BA4 .env \u914D\u7F6E\u6587\u4EF6");
|
|
56
|
-
}
|
|
57
|
-
function isProjectIndexed(projectId) {
|
|
58
|
-
const dbPath = path.join(BASE_DIR, projectId, "index.db");
|
|
59
|
-
return fs.existsSync(dbPath);
|
|
60
|
-
}
|
|
61
|
-
async function ensureIndexed(repoPath, projectId, onProgress) {
|
|
62
|
-
const { withLock } = await import("./lock-FL54LIQL.js");
|
|
63
|
-
const { scan } = await import("./scanner-2XGJWYHR.js");
|
|
64
|
-
await withLock(
|
|
65
|
-
projectId,
|
|
66
|
-
"index",
|
|
67
|
-
async () => {
|
|
68
|
-
const wasIndexed = isProjectIndexed(projectId);
|
|
69
|
-
if (!wasIndexed) {
|
|
70
|
-
logger.info(
|
|
71
|
-
{ repoPath, projectId: projectId.slice(0, 10) },
|
|
72
|
-
"\u4EE3\u7801\u5E93\u672A\u521D\u59CB\u5316\uFF0C\u5F00\u59CB\u9996\u6B21\u7D22\u5F15..."
|
|
73
|
-
);
|
|
74
|
-
onProgress?.(0, 100, "\u4EE3\u7801\u5E93\u672A\u7D22\u5F15\uFF0C\u5F00\u59CB\u9996\u6B21\u7D22\u5F15...");
|
|
75
|
-
} else {
|
|
76
|
-
logger.debug({ projectId: projectId.slice(0, 10) }, "\u6267\u884C\u589E\u91CF\u7D22\u5F15...");
|
|
77
|
-
}
|
|
78
|
-
const startTime = Date.now();
|
|
79
|
-
const stats = await scan(repoPath, { vectorIndex: true, onProgress });
|
|
80
|
-
const elapsed = Date.now() - startTime;
|
|
81
|
-
logger.info(
|
|
82
|
-
{
|
|
83
|
-
projectId: projectId.slice(0, 10),
|
|
84
|
-
isFirstTime: !wasIndexed,
|
|
85
|
-
totalFiles: stats.totalFiles,
|
|
86
|
-
added: stats.added,
|
|
87
|
-
modified: stats.modified,
|
|
88
|
-
deleted: stats.deleted,
|
|
89
|
-
vectorIndex: stats.vectorIndex,
|
|
90
|
-
elapsedMs: elapsed
|
|
91
|
-
},
|
|
92
|
-
"\u7D22\u5F15\u5B8C\u6210"
|
|
93
|
-
);
|
|
94
|
-
},
|
|
95
|
-
INDEX_LOCK_TIMEOUT_MS
|
|
96
|
-
);
|
|
97
|
-
}
|
|
98
25
|
async function handleCodebaseRetrieval(args, onProgress) {
|
|
99
26
|
const { repo_path, information_request, technical_terms } = args;
|
|
100
27
|
logger.info(
|
|
@@ -111,11 +38,10 @@ async function handleCodebaseRetrieval(args, onProgress) {
|
|
|
111
38
|
const allMissingVars = [...embeddingCheck.missingVars, ...rerankerCheck.missingVars];
|
|
112
39
|
if (allMissingVars.length > 0) {
|
|
113
40
|
logger.warn({ missingVars: allMissingVars }, "MCP \u73AF\u5883\u53D8\u91CF\u672A\u914D\u7F6E");
|
|
114
|
-
await
|
|
115
|
-
return formatEnvMissingResponse(allMissingVars);
|
|
41
|
+
return await checkEnvOrRespond(allMissingVars);
|
|
116
42
|
}
|
|
117
43
|
const projectId = generateProjectId(repo_path);
|
|
118
|
-
await ensureIndexed(repo_path, projectId, onProgress);
|
|
44
|
+
await ensureIndexed(repo_path, projectId, { onProgress });
|
|
119
45
|
const query = [information_request, ...technical_terms || []].filter(Boolean).join(" ");
|
|
120
46
|
logger.info(
|
|
121
47
|
{
|
|
@@ -124,8 +50,9 @@ async function handleCodebaseRetrieval(args, onProgress) {
|
|
|
124
50
|
},
|
|
125
51
|
"MCP \u67E5\u8BE2\u6784\u5EFA"
|
|
126
52
|
);
|
|
127
|
-
const { SearchService } = await import("./SearchService-
|
|
128
|
-
const
|
|
53
|
+
const { SearchService } = await import("./SearchService-WVD6THR3.js");
|
|
54
|
+
const { getSearchConfigOverrides } = await import("./loadConfig-XTVT2OWW.js");
|
|
55
|
+
const service = new SearchService(projectId, repo_path, getSearchConfigOverrides());
|
|
129
56
|
await service.init();
|
|
130
57
|
logger.debug("SearchService \u521D\u59CB\u5316\u5B8C\u6210");
|
|
131
58
|
const contextPack = await service.buildContextPack(query);
|
|
@@ -244,40 +171,6 @@ function detectLanguage(filePath) {
|
|
|
244
171
|
};
|
|
245
172
|
return langMap[ext] || ext || "plaintext";
|
|
246
173
|
}
|
|
247
|
-
function formatEnvMissingResponse(missingVars) {
|
|
248
|
-
const configPath = "~/.contextweaver/.env";
|
|
249
|
-
const text = `## \u26A0\uFE0F \u914D\u7F6E\u7F3A\u5931
|
|
250
|
-
|
|
251
|
-
ContextWeaver \u9700\u8981\u914D\u7F6E Embedding API \u624D\u80FD\u5DE5\u4F5C\u3002
|
|
252
|
-
|
|
253
|
-
### \u7F3A\u5931\u7684\u73AF\u5883\u53D8\u91CF
|
|
254
|
-
${missingVars.map((v) => `- \`${v}\``).join("\n")}
|
|
255
|
-
|
|
256
|
-
### \u914D\u7F6E\u6B65\u9AA4
|
|
257
|
-
|
|
258
|
-
\u5DF2\u81EA\u52A8\u521B\u5EFA\u914D\u7F6E\u6587\u4EF6\uFF1A\`${configPath}\`
|
|
259
|
-
|
|
260
|
-
\u8BF7\u7F16\u8F91\u8BE5\u6587\u4EF6\uFF0C\u586B\u5199\u4F60\u7684 API Key\uFF1A
|
|
261
|
-
|
|
262
|
-
\`\`\`bash
|
|
263
|
-
# Embedding API \u914D\u7F6E\uFF08\u5FC5\u9700\uFF09
|
|
264
|
-
EMBEDDINGS_API_KEY=your-api-key-here # \u2190 \u66FF\u6362\u4E3A\u4F60\u7684 API Key
|
|
265
|
-
|
|
266
|
-
# Reranker \u914D\u7F6E\uFF08\u5FC5\u9700\uFF09
|
|
267
|
-
RERANK_API_KEY=your-api-key-here # \u2190 \u66FF\u6362\u4E3A\u4F60\u7684 API Key
|
|
268
|
-
\`\`\`
|
|
269
|
-
|
|
270
|
-
\u4FDD\u5B58\u6587\u4EF6\u540E\u91CD\u65B0\u8C03\u7528\u6B64\u5DE5\u5177\u5373\u53EF\u3002
|
|
271
|
-
`;
|
|
272
|
-
return {
|
|
273
|
-
content: [
|
|
274
|
-
{
|
|
275
|
-
type: "text",
|
|
276
|
-
text
|
|
277
|
-
}
|
|
278
|
-
]
|
|
279
|
-
};
|
|
280
|
-
}
|
|
281
174
|
|
|
282
175
|
export {
|
|
283
176
|
codebaseRetrievalSchema,
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
import {
|
|
2
|
+
getDefaultEnvFileContent
|
|
3
|
+
} from "./chunk-TPM6YP43.js";
|
|
4
|
+
import {
|
|
5
|
+
logger
|
|
6
|
+
} from "./chunk-JVKVSTQ3.js";
|
|
7
|
+
|
|
8
|
+
// src/mcp/tools/shared.ts
|
|
9
|
+
import fs from "fs";
|
|
10
|
+
import os from "os";
|
|
11
|
+
import path from "path";
|
|
12
|
+
var BASE_DIR = path.join(os.homedir(), ".contextweaver");
|
|
13
|
+
var INDEX_LOCK_TIMEOUT_MS = 10 * 60 * 1e3;
|
|
14
|
+
function isProjectIndexed(projectId) {
|
|
15
|
+
const dbPath = path.join(BASE_DIR, projectId, "index.db");
|
|
16
|
+
return fs.existsSync(dbPath);
|
|
17
|
+
}
|
|
18
|
+
async function ensureDefaultEnvFile() {
|
|
19
|
+
const configDir = BASE_DIR;
|
|
20
|
+
const envFile = path.join(configDir, ".env");
|
|
21
|
+
if (fs.existsSync(envFile)) {
|
|
22
|
+
return;
|
|
23
|
+
}
|
|
24
|
+
if (!fs.existsSync(configDir)) {
|
|
25
|
+
fs.mkdirSync(configDir, { recursive: true });
|
|
26
|
+
logger.info({ configDir }, "\u521B\u5EFA\u914D\u7F6E\u76EE\u5F55");
|
|
27
|
+
}
|
|
28
|
+
fs.writeFileSync(envFile, getDefaultEnvFileContent());
|
|
29
|
+
logger.info({ envFile }, "\u5DF2\u521B\u5EFA\u9ED8\u8BA4 .env \u914D\u7F6E\u6587\u4EF6");
|
|
30
|
+
}
|
|
31
|
+
async function ensureIndexed(repoPath, projectId, options = {}) {
|
|
32
|
+
const { onProgress, vectorIndex = true } = options;
|
|
33
|
+
const { withLock } = await import("./lock-HNKQ6X5B.js");
|
|
34
|
+
const { scan } = await import("./scanner-QDFZJLP7.js");
|
|
35
|
+
await withLock(
|
|
36
|
+
projectId,
|
|
37
|
+
"index",
|
|
38
|
+
async () => {
|
|
39
|
+
const wasIndexed = isProjectIndexed(projectId);
|
|
40
|
+
if (!wasIndexed) {
|
|
41
|
+
logger.info(
|
|
42
|
+
{ repoPath, projectId: projectId.slice(0, 10), vectorIndex },
|
|
43
|
+
"\u4EE3\u7801\u5E93\u672A\u521D\u59CB\u5316\uFF0C\u5F00\u59CB\u9996\u6B21\u7D22\u5F15..."
|
|
44
|
+
);
|
|
45
|
+
onProgress?.(0, 100, "\u4EE3\u7801\u5E93\u672A\u7D22\u5F15\uFF0C\u5F00\u59CB\u9996\u6B21\u7D22\u5F15...");
|
|
46
|
+
} else {
|
|
47
|
+
logger.debug({ projectId: projectId.slice(0, 10), vectorIndex }, "\u6267\u884C\u589E\u91CF\u7D22\u5F15...");
|
|
48
|
+
}
|
|
49
|
+
const startTime = Date.now();
|
|
50
|
+
const stats = await scan(repoPath, { vectorIndex, onProgress });
|
|
51
|
+
const elapsed = Date.now() - startTime;
|
|
52
|
+
logger.info(
|
|
53
|
+
{
|
|
54
|
+
projectId: projectId.slice(0, 10),
|
|
55
|
+
isFirstTime: !wasIndexed,
|
|
56
|
+
totalFiles: stats.totalFiles,
|
|
57
|
+
added: stats.added,
|
|
58
|
+
modified: stats.modified,
|
|
59
|
+
deleted: stats.deleted,
|
|
60
|
+
vectorIndex: stats.vectorIndex,
|
|
61
|
+
elapsedMs: elapsed
|
|
62
|
+
},
|
|
63
|
+
"\u7D22\u5F15\u5B8C\u6210"
|
|
64
|
+
);
|
|
65
|
+
},
|
|
66
|
+
INDEX_LOCK_TIMEOUT_MS
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
function formatTextResponse(text) {
|
|
70
|
+
return {
|
|
71
|
+
content: [
|
|
72
|
+
{
|
|
73
|
+
type: "text",
|
|
74
|
+
text
|
|
75
|
+
}
|
|
76
|
+
]
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
function formatEnvMissingResponse(missingVars) {
|
|
80
|
+
const configPath = "~/.contextweaver/.env";
|
|
81
|
+
const text = `## \u26A0\uFE0F \u914D\u7F6E\u7F3A\u5931
|
|
82
|
+
|
|
83
|
+
ContextWeaver \u9700\u8981\u914D\u7F6E Embedding API \u624D\u80FD\u5DE5\u4F5C\u3002
|
|
84
|
+
|
|
85
|
+
### \u7F3A\u5931\u7684\u73AF\u5883\u53D8\u91CF
|
|
86
|
+
${missingVars.map((v) => `- \`${v}\``).join("\n")}
|
|
87
|
+
|
|
88
|
+
### \u914D\u7F6E\u6B65\u9AA4
|
|
89
|
+
|
|
90
|
+
\u5DF2\u81EA\u52A8\u521B\u5EFA\u914D\u7F6E\u6587\u4EF6\uFF1A\`${configPath}\`
|
|
91
|
+
|
|
92
|
+
\u8BF7\u7F16\u8F91\u8BE5\u6587\u4EF6\uFF0C\u586B\u5199\u4F60\u7684 API Key\uFF1A
|
|
93
|
+
|
|
94
|
+
\`\`\`bash
|
|
95
|
+
# Embedding API \u914D\u7F6E\uFF08\u5FC5\u9700\uFF09
|
|
96
|
+
EMBEDDINGS_API_KEY=your-api-key-here # \u2190 \u66FF\u6362\u4E3A\u4F60\u7684 API Key
|
|
97
|
+
|
|
98
|
+
# Reranker \u914D\u7F6E\uFF08\u5FC5\u9700\uFF09
|
|
99
|
+
RERANK_API_KEY=your-api-key-here # \u2190 \u66FF\u6362\u4E3A\u4F60\u7684 API Key
|
|
100
|
+
\`\`\`
|
|
101
|
+
|
|
102
|
+
\u4FDD\u5B58\u6587\u4EF6\u540E\u91CD\u65B0\u8C03\u7528\u6B64\u5DE5\u5177\u5373\u53EF\u3002
|
|
103
|
+
`;
|
|
104
|
+
return formatTextResponse(text);
|
|
105
|
+
}
|
|
106
|
+
async function checkEnvOrRespond(missingVars) {
|
|
107
|
+
await ensureDefaultEnvFile();
|
|
108
|
+
return formatEnvMissingResponse(missingVars);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
export {
|
|
112
|
+
ensureIndexed,
|
|
113
|
+
formatTextResponse,
|
|
114
|
+
checkEnvOrRespond
|
|
115
|
+
};
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
// src/search/ChunkContentLoader.ts
|
|
2
|
+
var ChunkContentLoader = class _ChunkContentLoader {
|
|
3
|
+
constructor(db) {
|
|
4
|
+
this.db = db;
|
|
5
|
+
}
|
|
6
|
+
/**
|
|
7
|
+
* 生成 cache key
|
|
8
|
+
*/
|
|
9
|
+
static key(slice) {
|
|
10
|
+
return `${slice.filePath}#${slice.start_index}#${slice.end_index}`;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* 批量加载 chunk 正文
|
|
14
|
+
*
|
|
15
|
+
* @returns Map<key, code>,key 由 ChunkContentLoader.key 生成
|
|
16
|
+
*/
|
|
17
|
+
loadMany(slices) {
|
|
18
|
+
const result = /* @__PURE__ */ new Map();
|
|
19
|
+
if (slices.length === 0) return result;
|
|
20
|
+
const byPath = /* @__PURE__ */ new Map();
|
|
21
|
+
for (const s of slices) {
|
|
22
|
+
let arr = byPath.get(s.filePath);
|
|
23
|
+
if (!arr) {
|
|
24
|
+
arr = [];
|
|
25
|
+
byPath.set(s.filePath, arr);
|
|
26
|
+
}
|
|
27
|
+
arr.push(s);
|
|
28
|
+
}
|
|
29
|
+
const stmt = this.db.prepare("SELECT content FROM files WHERE path = ?");
|
|
30
|
+
for (const [path, spans] of byPath) {
|
|
31
|
+
const row = stmt.get(path);
|
|
32
|
+
const content = row?.content ?? null;
|
|
33
|
+
for (const s of spans) {
|
|
34
|
+
const k = _ChunkContentLoader.key(s);
|
|
35
|
+
if (content === null) {
|
|
36
|
+
result.set(k, "");
|
|
37
|
+
continue;
|
|
38
|
+
}
|
|
39
|
+
const safeStart = Math.max(0, Math.min(s.start_index, content.length));
|
|
40
|
+
const safeEnd = Math.max(safeStart, Math.min(s.end_index, content.length));
|
|
41
|
+
result.set(k, content.slice(safeStart, safeEnd));
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
return result;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* 加载单个 chunk 正文(便捷方法,不推荐在批量场景使用)
|
|
48
|
+
*/
|
|
49
|
+
loadOne(slice) {
|
|
50
|
+
const map = this.loadMany([slice]);
|
|
51
|
+
return map.get(_ChunkContentLoader.key(slice)) ?? "";
|
|
52
|
+
}
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
export {
|
|
56
|
+
ChunkContentLoader
|
|
57
|
+
};
|
|
@@ -1,7 +1,13 @@
|
|
|
1
|
+
import {
|
|
2
|
+
commonPrefixLength
|
|
3
|
+
} from "./chunk-LB42CZEB.js";
|
|
4
|
+
import {
|
|
5
|
+
ChunkContentLoader
|
|
6
|
+
} from "./chunk-XFIM2T6S.js";
|
|
1
7
|
import {
|
|
2
8
|
getVectorStore,
|
|
3
9
|
sampleCheckDisplayCode
|
|
4
|
-
} from "./chunk-
|
|
10
|
+
} from "./chunk-3BNHQV5W.js";
|
|
5
11
|
import {
|
|
6
12
|
batchDeleteFileChunksFts,
|
|
7
13
|
batchUpdateVectorIndexHash,
|
|
@@ -17,7 +23,7 @@ import {
|
|
|
17
23
|
replayPendingMarks,
|
|
18
24
|
setLanceDbMigrationState,
|
|
19
25
|
tryAcquireLanceDbMigrationLock
|
|
20
|
-
} from "./chunk-
|
|
26
|
+
} from "./chunk-PPLFJGO3.js";
|
|
21
27
|
import {
|
|
22
28
|
logger
|
|
23
29
|
} from "./chunk-JVKVSTQ3.js";
|
|
@@ -606,10 +612,7 @@ var Indexer = class {
|
|
|
606
612
|
stats.deleted = toDelete.length;
|
|
607
613
|
} catch (err) {
|
|
608
614
|
const error = err;
|
|
609
|
-
logger.error(
|
|
610
|
-
{ error: error.message, count: toDelete.length },
|
|
611
|
-
"\u5220\u9664\u9636\u6BB5\u5931\u8D25\uFF0C\u5DF2\u6807\u8BB0\u91CD\u8BD5"
|
|
612
|
-
);
|
|
615
|
+
logger.error({ error: error.message, count: toDelete.length }, "\u5220\u9664\u9636\u6BB5\u5931\u8D25\uFF0C\u5DF2\u6807\u8BB0\u91CD\u8BD5");
|
|
613
616
|
stats.errors += toDelete.length;
|
|
614
617
|
}
|
|
615
618
|
}
|
|
@@ -678,16 +681,27 @@ var Indexer = class {
|
|
|
678
681
|
continue;
|
|
679
682
|
}
|
|
680
683
|
logger.info(
|
|
681
|
-
{
|
|
684
|
+
{
|
|
685
|
+
batch: `${batchNum}/${totalBatches}`,
|
|
686
|
+
texts: batchTexts.length,
|
|
687
|
+
files: batchFiles.length
|
|
688
|
+
},
|
|
682
689
|
"\u6279\u6B21 Embedding \u5F00\u59CB"
|
|
683
690
|
);
|
|
684
691
|
let embeddings;
|
|
685
692
|
const EMBED_BATCH_SIZE = 10;
|
|
686
693
|
try {
|
|
687
694
|
const batchOnProgress = onProgress ? (_completed, _total) => {
|
|
688
|
-
onProgress(
|
|
695
|
+
onProgress(
|
|
696
|
+
completedChunks + Math.min(_completed * EMBED_BATCH_SIZE, batchTexts.length),
|
|
697
|
+
totalChunks
|
|
698
|
+
);
|
|
689
699
|
} : void 0;
|
|
690
|
-
const results = await this.embeddingClient.embedBatch(
|
|
700
|
+
const results = await this.embeddingClient.embedBatch(
|
|
701
|
+
batchTexts,
|
|
702
|
+
EMBED_BATCH_SIZE,
|
|
703
|
+
batchOnProgress
|
|
704
|
+
);
|
|
691
705
|
embeddings = results.map((r) => r.embedding);
|
|
692
706
|
} catch (err) {
|
|
693
707
|
const error = err;
|
|
@@ -1007,75 +1021,6 @@ function scoreChunkTokenOverlap(chunk, code, queryTokens) {
|
|
|
1007
1021
|
return score;
|
|
1008
1022
|
}
|
|
1009
1023
|
|
|
1010
|
-
// src/search/ChunkContentLoader.ts
|
|
1011
|
-
var ChunkContentLoader = class _ChunkContentLoader {
|
|
1012
|
-
constructor(db) {
|
|
1013
|
-
this.db = db;
|
|
1014
|
-
}
|
|
1015
|
-
/**
|
|
1016
|
-
* 生成 cache key
|
|
1017
|
-
*/
|
|
1018
|
-
static key(slice) {
|
|
1019
|
-
return `${slice.filePath}#${slice.start_index}#${slice.end_index}`;
|
|
1020
|
-
}
|
|
1021
|
-
/**
|
|
1022
|
-
* 批量加载 chunk 正文
|
|
1023
|
-
*
|
|
1024
|
-
* @returns Map<key, code>,key 由 ChunkContentLoader.key 生成
|
|
1025
|
-
*/
|
|
1026
|
-
loadMany(slices) {
|
|
1027
|
-
const result = /* @__PURE__ */ new Map();
|
|
1028
|
-
if (slices.length === 0) return result;
|
|
1029
|
-
const byPath = /* @__PURE__ */ new Map();
|
|
1030
|
-
for (const s of slices) {
|
|
1031
|
-
let arr = byPath.get(s.filePath);
|
|
1032
|
-
if (!arr) {
|
|
1033
|
-
arr = [];
|
|
1034
|
-
byPath.set(s.filePath, arr);
|
|
1035
|
-
}
|
|
1036
|
-
arr.push(s);
|
|
1037
|
-
}
|
|
1038
|
-
const stmt = this.db.prepare("SELECT content FROM files WHERE path = ?");
|
|
1039
|
-
for (const [path, spans] of byPath) {
|
|
1040
|
-
const row = stmt.get(path);
|
|
1041
|
-
const content = row?.content ?? null;
|
|
1042
|
-
for (const s of spans) {
|
|
1043
|
-
const k = _ChunkContentLoader.key(s);
|
|
1044
|
-
if (content === null) {
|
|
1045
|
-
result.set(k, "");
|
|
1046
|
-
continue;
|
|
1047
|
-
}
|
|
1048
|
-
const safeStart = Math.max(0, Math.min(s.start_index, content.length));
|
|
1049
|
-
const safeEnd = Math.max(safeStart, Math.min(s.end_index, content.length));
|
|
1050
|
-
result.set(k, content.slice(safeStart, safeEnd));
|
|
1051
|
-
}
|
|
1052
|
-
}
|
|
1053
|
-
return result;
|
|
1054
|
-
}
|
|
1055
|
-
/**
|
|
1056
|
-
* 加载单个 chunk 正文(便捷方法,不推荐在批量场景使用)
|
|
1057
|
-
*/
|
|
1058
|
-
loadOne(slice) {
|
|
1059
|
-
const map = this.loadMany([slice]);
|
|
1060
|
-
return map.get(_ChunkContentLoader.key(slice)) ?? "";
|
|
1061
|
-
}
|
|
1062
|
-
};
|
|
1063
|
-
|
|
1064
|
-
// src/search/resolvers/types.ts
|
|
1065
|
-
function commonPrefixLength(path1, path2) {
|
|
1066
|
-
const parts1 = path1.split("/");
|
|
1067
|
-
const parts2 = path2.split("/");
|
|
1068
|
-
let count = 0;
|
|
1069
|
-
for (let i = 0; i < Math.min(parts1.length, parts2.length); i++) {
|
|
1070
|
-
if (parts1[i] === parts2[i]) {
|
|
1071
|
-
count++;
|
|
1072
|
-
} else {
|
|
1073
|
-
break;
|
|
1074
|
-
}
|
|
1075
|
-
}
|
|
1076
|
-
return count;
|
|
1077
|
-
}
|
|
1078
|
-
|
|
1079
1024
|
// src/search/resolvers/CppResolver.ts
|
|
1080
1025
|
var CPP_EXTENSIONS = /* @__PURE__ */ new Set([".c", ".cpp", ".cc", ".cxx", ".h", ".hpp", ".hh", ".hxx"]);
|
|
1081
1026
|
var CppResolver = class {
|
|
@@ -1876,7 +1821,6 @@ export {
|
|
|
1876
1821
|
getIndexer,
|
|
1877
1822
|
closeAllIndexers,
|
|
1878
1823
|
scoreChunkTokenOverlap,
|
|
1879
|
-
ChunkContentLoader,
|
|
1880
1824
|
invalidateAllExpanderCaches,
|
|
1881
1825
|
getGraphExpander
|
|
1882
1826
|
};
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
import {
|
|
2
|
+
getVectorStore
|
|
3
|
+
} from "./chunk-3BNHQV5W.js";
|
|
4
|
+
import {
|
|
5
|
+
closeDb,
|
|
6
|
+
collectHealthSnapshot,
|
|
7
|
+
getAllStats,
|
|
8
|
+
getStatJson,
|
|
9
|
+
initDb
|
|
10
|
+
} from "./chunk-PPLFJGO3.js";
|
|
11
|
+
import {
|
|
12
|
+
logger
|
|
13
|
+
} from "./chunk-JVKVSTQ3.js";
|
|
14
|
+
import {
|
|
15
|
+
getEmbeddingConfig
|
|
16
|
+
} from "./chunk-SKBAE26T.js";
|
|
17
|
+
|
|
18
|
+
// src/stats/index.ts
|
|
19
|
+
function num(stats, key) {
|
|
20
|
+
const v = stats[key];
|
|
21
|
+
if (v === void 0) return 0;
|
|
22
|
+
const parsed = Number.parseInt(v, 10);
|
|
23
|
+
return Number.isNaN(parsed) ? 0 : parsed;
|
|
24
|
+
}
|
|
25
|
+
function avg(sum, count) {
|
|
26
|
+
if (count <= 0) return null;
|
|
27
|
+
return sum / count;
|
|
28
|
+
}
|
|
29
|
+
async function collectStats(projectId) {
|
|
30
|
+
const db = initDb(projectId);
|
|
31
|
+
try {
|
|
32
|
+
const health = collectHealthSnapshot(db);
|
|
33
|
+
const stats = getAllStats(db);
|
|
34
|
+
let lancedbRows = 0;
|
|
35
|
+
try {
|
|
36
|
+
const store = await getVectorStore(projectId, getEmbeddingConfig().dimensions);
|
|
37
|
+
lancedbRows = await store.count();
|
|
38
|
+
} catch (err) {
|
|
39
|
+
logger.warn({ error: err.message }, "\u8BFB\u53D6 LanceDB \u884C\u6570\u5931\u8D25");
|
|
40
|
+
}
|
|
41
|
+
const computeRuns = num(stats, "stats.search.compute_runs");
|
|
42
|
+
const totalQueries = num(stats, "stats.search.total_queries");
|
|
43
|
+
const cacheHits = num(stats, "stats.search.cache_hits");
|
|
44
|
+
const index = {
|
|
45
|
+
totalRuns: num(stats, "stats.index.total_runs"),
|
|
46
|
+
lastRun: getStatJson(db, "stats.index.last_run_json"),
|
|
47
|
+
lastRunAt: getStatJson(db, "stats.index.last_run_at")
|
|
48
|
+
};
|
|
49
|
+
const search = {
|
|
50
|
+
totalQueries,
|
|
51
|
+
cacheHits,
|
|
52
|
+
cacheHitRate: totalQueries > 0 ? cacheHits / totalQueries : null,
|
|
53
|
+
computeRuns,
|
|
54
|
+
avgRetrieveMs: avg(num(stats, "stats.search.sum_retrieve_ms"), computeRuns),
|
|
55
|
+
avgRerankMs: avg(num(stats, "stats.search.sum_rerank_ms"), computeRuns),
|
|
56
|
+
avgExpandMs: avg(num(stats, "stats.search.sum_expand_ms"), computeRuns),
|
|
57
|
+
avgPackMs: avg(num(stats, "stats.search.sum_pack_ms"), computeRuns),
|
|
58
|
+
avgSeedCount: avg(num(stats, "stats.search.sum_seed_count"), computeRuns)
|
|
59
|
+
};
|
|
60
|
+
const diagnostics = buildDiagnostics(health, lancedbRows);
|
|
61
|
+
return { projectId, health, lancedbRows, index, search, diagnostics };
|
|
62
|
+
} finally {
|
|
63
|
+
closeDb(db);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
function buildDiagnostics(health, lancedbRows) {
|
|
67
|
+
const out = [];
|
|
68
|
+
if (health.migrationState === "aborted") {
|
|
69
|
+
out.push(
|
|
70
|
+
"LanceDB \u8FC1\u79FB\u72B6\u6001\u4E3A aborted\uFF0C\u7D22\u5F15\u5199\u5165\u88AB\u62D2\u7EDD\u3002\u8FD0\u884C `contextweaver migrate --reset` \u89E3\u9664\u3002"
|
|
71
|
+
);
|
|
72
|
+
}
|
|
73
|
+
if (health.migrationState === "pending") {
|
|
74
|
+
out.push("LanceDB \u8FC1\u79FB\u72B6\u6001\u4E3A pending\uFF0C\u53EF\u80FD\u4E0A\u6B21\u8FC1\u79FB\u672A\u5B8C\u6210\u3002");
|
|
75
|
+
}
|
|
76
|
+
if (health.pendingMarks > 0) {
|
|
77
|
+
out.push(`pending_marks \u79EF\u538B ${health.pendingMarks} \u6761\uFF0C\u4E0B\u6B21\u542F\u52A8\u5C06\u91CD\u653E\u3002\u82E5\u6301\u7EED\u4E0D\u51CF\u9700\u6392\u67E5\u3002`);
|
|
78
|
+
}
|
|
79
|
+
if (health.totalFiles > 0 && lancedbRows === 0) {
|
|
80
|
+
out.push(
|
|
81
|
+
`\u5DF2\u7D22\u5F15 ${health.totalFiles} \u4E2A\u6587\u4EF6\u4F46 LanceDB \u65E0\u5411\u91CF\u884C\uFF0C\u5411\u91CF\u7D22\u5F15\u53EF\u80FD\u672A\u5EFA\u7ACB\u3002\u8FD0\u884C \`contextweaver index\` \u91CD\u5EFA\u3002`
|
|
82
|
+
);
|
|
83
|
+
}
|
|
84
|
+
if (health.embeddingDimensions === null && health.totalFiles > 0) {
|
|
85
|
+
out.push("\u672A\u8BB0\u5F55 embedding \u7EF4\u5EA6\uFF0C\u7D22\u5F15\u5143\u6570\u636E\u53EF\u80FD\u4E0D\u5B8C\u6574\u3002");
|
|
86
|
+
}
|
|
87
|
+
return out;
|
|
88
|
+
}
|
|
89
|
+
function renderStatsText(report) {
|
|
90
|
+
const fmt = (v, suffix = "") => v === null ? "\u2014" : `${Number.isInteger(v) ? v : v.toFixed(1)}${suffix}`;
|
|
91
|
+
const pct = (v) => v === null ? "\u2014" : `${(v * 100).toFixed(1)}%`;
|
|
92
|
+
const bytes = (n) => {
|
|
93
|
+
if (n < 1024) return `${n} B`;
|
|
94
|
+
if (n < 1024 * 1024) return `${(n / 1024).toFixed(1)} KB`;
|
|
95
|
+
return `${(n / 1024 / 1024).toFixed(1)} MB`;
|
|
96
|
+
};
|
|
97
|
+
const lines = [];
|
|
98
|
+
lines.push(`ContextWeaver \u7EDF\u8BA1 (projectId: ${report.projectId})`);
|
|
99
|
+
lines.push("");
|
|
100
|
+
const ix = report.index;
|
|
101
|
+
lines.push("\u3010\u7D22\u5F15\u8FC7\u7A0B\u3011");
|
|
102
|
+
lines.push(` \u7D2F\u8BA1\u7D22\u5F15\u8FD0\u884C: ${ix.totalRuns} \u6B21`);
|
|
103
|
+
if (ix.lastRunAt) {
|
|
104
|
+
lines.push(` \u4E0A\u6B21\u7D22\u5F15\u65F6\u95F4: ${new Date(ix.lastRunAt).toLocaleString()}`);
|
|
105
|
+
}
|
|
106
|
+
if (ix.lastRun) {
|
|
107
|
+
const r = ix.lastRun;
|
|
108
|
+
lines.push(
|
|
109
|
+
` \u4E0A\u6B21\u7ED3\u679C: \u603B\u6570=${r.totalFiles} \u65B0\u589E=${r.added} \u4FEE\u6539=${r.modified} \u672A\u53D8=${r.unchanged} \u5220\u9664=${r.deleted} \u8DF3\u8FC7=${r.skipped} \u9519\u8BEF=${r.errors}`
|
|
110
|
+
);
|
|
111
|
+
if (r.vectorIndex) {
|
|
112
|
+
lines.push(
|
|
113
|
+
` \u5411\u91CF\u7D22\u5F15: \u5DF2\u7D22\u5F15=${r.vectorIndex.indexed} \u5220\u9664=${r.vectorIndex.deleted} \u9519\u8BEF=${r.vectorIndex.errors}`
|
|
114
|
+
);
|
|
115
|
+
}
|
|
116
|
+
} else {
|
|
117
|
+
lines.push(" \u4E0A\u6B21\u7ED3\u679C: \u6682\u65E0\uFF08\u5C1A\u672A\u7D22\u5F15\uFF09");
|
|
118
|
+
}
|
|
119
|
+
lines.push("");
|
|
120
|
+
const s = report.search;
|
|
121
|
+
lines.push("\u3010\u641C\u7D22\u8D28\u91CF/\u884C\u4E3A\u3011");
|
|
122
|
+
lines.push(
|
|
123
|
+
` \u7D2F\u8BA1\u67E5\u8BE2: ${s.totalQueries} \u6B21 (\u7F13\u5B58\u547D\u4E2D ${s.cacheHits}\uFF0C\u547D\u4E2D\u7387 ${pct(s.cacheHitRate)})`
|
|
124
|
+
);
|
|
125
|
+
lines.push(` \u5B9E\u9645\u8BA1\u7B97: ${s.computeRuns} \u6B21\uFF08\u672A\u547D\u4E2D\u7F13\u5B58\uFF0C\u4F5C\u4E3A\u4E0B\u5217\u5747\u503C\u5206\u6BCD\uFF09`);
|
|
126
|
+
lines.push(
|
|
127
|
+
` \u5E73\u5747\u8017\u65F6: retrieve=${fmt(s.avgRetrieveMs, "ms")} rerank=${fmt(s.avgRerankMs, "ms")} expand=${fmt(s.avgExpandMs, "ms")} pack=${fmt(s.avgPackMs, "ms")}`
|
|
128
|
+
);
|
|
129
|
+
lines.push(` \u5E73\u5747\u53EC\u56DE: ${fmt(s.avgSeedCount)} \u4E2A seed`);
|
|
130
|
+
lines.push("");
|
|
131
|
+
const h = report.health;
|
|
132
|
+
lines.push("\u3010\u5065\u5EB7/\u4E00\u81F4\u6027\u3011");
|
|
133
|
+
lines.push(` \u6587\u4EF6: ${h.totalFiles} \u4E2A\uFF0C\u6B63\u6587\u603B\u91CF ${bytes(h.totalBytes)}`);
|
|
134
|
+
lines.push(` LanceDB \u5411\u91CF\u884C: ${report.lancedbRows}`);
|
|
135
|
+
lines.push(
|
|
136
|
+
` embedding \u7EF4\u5EA6: ${h.embeddingDimensions ?? "\u2014"} \u7D22\u5F15\u7248\u672C: ${h.indexVersion} \u8FC1\u79FB\u72B6\u6001: ${h.migrationState ?? "\u672A\u8BBE\u7F6E"} pending_marks: ${h.pendingMarks}`
|
|
137
|
+
);
|
|
138
|
+
const langs = Object.entries(h.byLanguage).sort((a, b) => b[1] - a[1]);
|
|
139
|
+
if (langs.length > 0) {
|
|
140
|
+
const langStr = langs.map(([lang, c]) => `${lang}=${c}`).join(" ");
|
|
141
|
+
lines.push(` \u8BED\u8A00\u5360\u6BD4: ${langStr}`);
|
|
142
|
+
}
|
|
143
|
+
lines.push("");
|
|
144
|
+
if (report.diagnostics.length > 0) {
|
|
145
|
+
lines.push("\u3010\u8BCA\u65AD\u544A\u8B66\u3011");
|
|
146
|
+
for (const d of report.diagnostics) lines.push(` \u26A0 ${d}`);
|
|
147
|
+
} else {
|
|
148
|
+
lines.push("\u3010\u8BCA\u65AD\u3011\u65E0\u5F02\u5E38");
|
|
149
|
+
}
|
|
150
|
+
return lines.join("\n");
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
export {
|
|
154
|
+
collectStats,
|
|
155
|
+
renderStatsText
|
|
156
|
+
};
|