@kk-irving/knowledge-mcp-server 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,186 @@
1
+ /**
2
+ * AOSP 模块索引器:遍历 module 路径下的源码 → 切块 → 入库。
3
+ *
4
+ * 设计:
5
+ * - 输入 platform + module + module_path(可绝对,也可相对 repo_root) + repo_root
6
+ * - 递归遍历,按 chunker 切块,写 aosp_chunks 表(UNIQUE 约束去重)
7
+ * - 跳过黑名单目录、二进制、>5MB 文件
8
+ * - 嵌入由后续 `embed_pending(source="aosp")` 异步处理
9
+ *
10
+ * 提供:
11
+ * - indexAospModule({ platform, module, module_path, repo_root })
12
+ * - clearAospIndex({ platform?, module? })
13
+ */
14
+ import { mkdir, readdir, readFile, stat } from "node:fs/promises";
15
+ import * as path from "node:path";
16
+ import * as crypto from "node:crypto";
17
+ import { getDb, runInTransaction } from "../db.js";
18
+ import { chunkFile, shouldIndexFile, shouldSkipDir } from "./chunker.js";
19
+ const UPSERT_SQL = `
20
+ INSERT INTO aosp_chunks (
21
+ platform, module, module_path, file_path,
22
+ line_start, line_end, symbol_kind, symbol_name,
23
+ content, content_hash, indexed_at
24
+ ) VALUES (
25
+ ?, ?, ?, ?,
26
+ ?, ?, ?, ?,
27
+ ?, ?, datetime('now')
28
+ )
29
+ ON CONFLICT(platform, module, file_path, line_start, line_end) DO UPDATE SET
30
+ symbol_kind = excluded.symbol_kind,
31
+ symbol_name = excluded.symbol_name,
32
+ content = excluded.content,
33
+ content_hash = excluded.content_hash,
34
+ embedding = NULL,
35
+ embedding_updated_at = NULL,
36
+ indexed_at = excluded.indexed_at
37
+ WHERE aosp_chunks.content_hash IS NOT excluded.content_hash
38
+ `;
39
+ export async function indexAospModule(args) {
40
+ const t0 = Date.now();
41
+ const stats = {
42
+ platform: args.platform,
43
+ module: args.module,
44
+ module_path: args.module_path,
45
+ files_scanned: 0,
46
+ files_indexed: 0,
47
+ chunks_inserted: 0,
48
+ chunks_unchanged: 0,
49
+ errors: 0,
50
+ elapsed_ms: 0,
51
+ };
52
+ if (!args.repo_root)
53
+ throw new Error("repo_root 不能为空");
54
+ await mkdir(args.repo_root, { recursive: true }).catch(() => { });
55
+ const absPath = path.isAbsolute(args.module_path)
56
+ ? args.module_path
57
+ : path.join(args.repo_root, args.module_path);
58
+ const stRoot = await stat(absPath).catch(() => null);
59
+ if (!stRoot)
60
+ throw new Error(`module_path 不存在: ${absPath}`);
61
+ if (!stRoot.isDirectory())
62
+ throw new Error(`module_path 不是目录: ${absPath}`);
63
+ const db = getDb();
64
+ const upsert = db.prepare(UPSERT_SQL);
65
+ // BFS 遍历
66
+ const queue = [absPath];
67
+ const fileRows = []; // [args[], hash]
68
+ while (queue.length > 0) {
69
+ const dir = queue.shift();
70
+ let entries;
71
+ try {
72
+ entries = await readdir(dir, { withFileTypes: true });
73
+ }
74
+ catch {
75
+ stats.errors++;
76
+ continue;
77
+ }
78
+ for (const entry of entries) {
79
+ const full = path.join(dir, entry.name);
80
+ if (entry.isDirectory()) {
81
+ if (shouldSkipDir(entry.name))
82
+ continue;
83
+ queue.push(full);
84
+ continue;
85
+ }
86
+ if (!entry.isFile())
87
+ continue;
88
+ stats.files_scanned++;
89
+ let st;
90
+ try {
91
+ st = await stat(full);
92
+ }
93
+ catch {
94
+ stats.errors++;
95
+ continue;
96
+ }
97
+ if (!shouldIndexFile(full, st.size))
98
+ continue;
99
+ let content;
100
+ try {
101
+ content = await readFile(full, "utf8");
102
+ }
103
+ catch {
104
+ stats.errors++;
105
+ continue;
106
+ }
107
+ // 跳过包含 NUL 的伪文本(疑似二进制)
108
+ if (content.indexOf("\u0000") >= 0)
109
+ continue;
110
+ const chunks = chunkFile(full, content);
111
+ if (chunks.length === 0)
112
+ continue;
113
+ stats.files_indexed++;
114
+ const relPath = path.relative(args.repo_root, full).replace(/\\/g, "/");
115
+ for (const ch of chunks) {
116
+ const hash = crypto
117
+ .createHash("sha1")
118
+ .update(ch.content)
119
+ .digest("hex");
120
+ fileRows.push([
121
+ [
122
+ args.platform.toUpperCase(),
123
+ args.module.toLowerCase(),
124
+ args.module_path,
125
+ relPath,
126
+ String(ch.line_start),
127
+ String(ch.line_end),
128
+ ch.symbol_kind,
129
+ ch.symbol_name,
130
+ ch.content,
131
+ hash,
132
+ ],
133
+ hash,
134
+ ]);
135
+ }
136
+ }
137
+ }
138
+ // 一次性事务写入
139
+ if (fileRows.length > 0) {
140
+ runInTransaction(db, () => {
141
+ for (const [args2, _hash] of fileRows) {
142
+ const info = upsert.run(...args2);
143
+ if ((info?.changes ?? 0) > 0)
144
+ stats.chunks_inserted++;
145
+ else
146
+ stats.chunks_unchanged++;
147
+ }
148
+ });
149
+ }
150
+ stats.elapsed_ms = Date.now() - t0;
151
+ return stats;
152
+ }
153
+ export function clearAospIndex(args = {}) {
154
+ const db = getDb();
155
+ const platform = args.platform?.toUpperCase();
156
+ const module = args.module?.toLowerCase();
157
+ let where = "";
158
+ const params = [];
159
+ if (platform && module) {
160
+ where = "WHERE platform = ? AND module = ?";
161
+ params.push(platform, module);
162
+ }
163
+ else if (platform) {
164
+ where = "WHERE platform = ?";
165
+ params.push(platform);
166
+ }
167
+ else if (module) {
168
+ where = "WHERE module = ?";
169
+ params.push(module);
170
+ }
171
+ const beforeRow = db
172
+ .prepare(`SELECT COUNT(*) AS n FROM aosp_chunks ${where}`)
173
+ .get(...params);
174
+ const before = beforeRow?.n ?? 0;
175
+ db.prepare(`DELETE FROM aosp_chunks ${where}`).run(...params);
176
+ return {
177
+ cleared: before,
178
+ scope: platform && module
179
+ ? `platform=${platform}, module=${module}`
180
+ : platform
181
+ ? `platform=${platform}`
182
+ : module
183
+ ? `module=${module}`
184
+ : "all",
185
+ };
186
+ }
@@ -0,0 +1,176 @@
1
+ /**
2
+ * 解析 steering/module-path-map.md,提取 D4/X5/STB 各平台的模块 → 路径映射。
3
+ *
4
+ * map 的 Markdown 结构(约定):
5
+ *
6
+ * ```
7
+ * ## D4 平台
8
+ *
9
+ * ### 一级目录: vendor/zeasn
10
+ *
11
+ * | 模块 | 路径 |
12
+ * |---|---|
13
+ * | tvsystemui | `vendor/zeasn/...` |
14
+ * | asplayer | `vendor/zeasn/...` |
15
+ * ```
16
+ *
17
+ * 我们只关注**含路径的代码块或表格**,按平台 → module → [paths] 三层结构提取。
18
+ *
19
+ * 输出 `data/module-map.json`,下次启动直接读 JSON 不再解析 markdown。
20
+ */
21
+ import { readFile, writeFile, mkdir, stat } from "node:fs/promises";
22
+ import * as path from "node:path";
23
+ import { existsSync } from "node:fs";
24
+ import { config } from "../config.js";
25
+ const PLATFORM_NAMES = ["D4", "X5", "STB"];
26
+ /**
27
+ * 默认从仓库根的 `steering/module-path-map.md` 加载。
28
+ * 先试 cache: data/module-map.json;缺失或源文件更新过时则重新解析。
29
+ */
30
+ export async function loadModuleMap(opts = {}) {
31
+ const sourcePath = opts.sourcePath ?? path.resolve("./steering/module-path-map.md");
32
+ const cachePath = opts.cachePath ?? path.resolve(path.dirname(config.dbPath), "module-map.json");
33
+ // 用 cache 如果存在且比源新
34
+ if (!opts.rebuild && existsSync(cachePath) && existsSync(sourcePath)) {
35
+ try {
36
+ const [cacheStat, srcStat] = await Promise.all([stat(cachePath), stat(sourcePath)]);
37
+ if (cacheStat.mtimeMs >= srcStat.mtimeMs) {
38
+ const text = await readFile(cachePath, "utf8");
39
+ return JSON.parse(text);
40
+ }
41
+ }
42
+ catch {
43
+ /* 落到重建 */
44
+ }
45
+ }
46
+ // 解析 markdown
47
+ if (!existsSync(sourcePath)) {
48
+ return {
49
+ platforms: {},
50
+ generated_at: new Date().toISOString(),
51
+ source: sourcePath,
52
+ };
53
+ }
54
+ const md = await readFile(sourcePath, "utf8");
55
+ const map = parseModulePathMap(md, sourcePath);
56
+ // 写 cache
57
+ try {
58
+ await mkdir(path.dirname(cachePath), { recursive: true });
59
+ await writeFile(cachePath, JSON.stringify(map, null, 2), "utf8");
60
+ }
61
+ catch {
62
+ /* cache 失败不致命 */
63
+ }
64
+ return map;
65
+ }
66
+ // =============================================================================
67
+ // Markdown 解析
68
+ // =============================================================================
69
+ /**
70
+ * 解析 module-path-map.md 文本。
71
+ *
72
+ * 思路:
73
+ * - 顺序扫描行,维护 currentPlatform / currentModule
74
+ * - 平台 heading 形式:`## D4 平台` / `## X5 平台` / `## STB 平台`(或英文)
75
+ * - 在表格行中提取 `| 模块名 | \`path/to/module\` |` 形式
76
+ * - 在 inline code 块中提取 `` `vendor/...` `` 形式
77
+ * - 模块名 token 化为 lowercase + 去空格用作 key
78
+ */
79
+ export function parseModulePathMap(markdown, sourcePath) {
80
+ const lines = markdown.split(/\r?\n/);
81
+ const platforms = {};
82
+ let currentPlatform = null;
83
+ let currentModule = null;
84
+ const platformHeadingRe = /^##\s+(D4|X5|STB)\s*(?:平台)?/i;
85
+ const subHeadingRe = /^####\s+(.+?)\s*$/; // module 子节标题
86
+ const tableRowRe = /^\|\s*([^|]+?)\s*\|\s*`([^`]+)`/;
87
+ const codePathLineRe = /`([^`]+)`/g;
88
+ for (const rawLine of lines) {
89
+ const line = rawLine.trim();
90
+ if (!line)
91
+ continue;
92
+ // 平台标题
93
+ const pm = line.match(platformHeadingRe);
94
+ if (pm) {
95
+ const name = pm[1].toUpperCase();
96
+ currentPlatform = PLATFORM_NAMES.includes(name) ? name : null;
97
+ currentModule = null;
98
+ if (currentPlatform && !platforms[currentPlatform])
99
+ platforms[currentPlatform] = {};
100
+ continue;
101
+ }
102
+ if (!currentPlatform)
103
+ continue;
104
+ // 模块子标题(如 `#### TvSystemUI`)
105
+ const sm = line.match(subHeadingRe);
106
+ if (sm) {
107
+ currentModule = normalizeModuleName(sm[1]);
108
+ if (currentModule && !platforms[currentPlatform][currentModule]) {
109
+ platforms[currentPlatform][currentModule] = [];
110
+ }
111
+ continue;
112
+ }
113
+ // 表格行:| 模块名 | `path` | ...
114
+ const trm = line.match(tableRowRe);
115
+ if (trm) {
116
+ // 跳过表头分隔符
117
+ if (/^[-:|\s]+$/.test(trm[1]))
118
+ continue;
119
+ // 跳过表头("模块" / "路径" 这种无 path 的)
120
+ const moduleName = normalizeModuleName(trm[1]);
121
+ const pathStr = trm[2].trim();
122
+ if (moduleName && pathStr) {
123
+ if (!platforms[currentPlatform][moduleName])
124
+ platforms[currentPlatform][moduleName] = [];
125
+ platforms[currentPlatform][moduleName].push(pathStr);
126
+ }
127
+ continue;
128
+ }
129
+ // 在 currentModule 段内的 inline code 路径
130
+ if (currentModule) {
131
+ let m;
132
+ const re = new RegExp(codePathLineRe.source, "g");
133
+ while ((m = re.exec(line)) !== null) {
134
+ const candidate = m[1].trim();
135
+ // 只接受形如 `xxx/yyy/...` 的路径片段
136
+ if (/^[\w._-]+\/[\w./_-]+/.test(candidate)) {
137
+ const arr = platforms[currentPlatform][currentModule];
138
+ if (arr && !arr.includes(candidate))
139
+ arr.push(candidate);
140
+ }
141
+ }
142
+ }
143
+ }
144
+ return {
145
+ platforms,
146
+ generated_at: new Date().toISOString(),
147
+ source: sourcePath,
148
+ };
149
+ }
150
+ function normalizeModuleName(raw) {
151
+ const trimmed = raw
152
+ .replace(/[`*_]/g, "")
153
+ .trim()
154
+ .toLowerCase();
155
+ if (!trimmed)
156
+ return null;
157
+ if (/^(模块|路径|说明|备注|描述|—|-)/.test(trimmed))
158
+ return null;
159
+ // 把空格 / 标点替换成短横线
160
+ return trimmed.replace(/[\s,()()/]+/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
161
+ }
162
+ // =============================================================================
163
+ // 查询帮手
164
+ // =============================================================================
165
+ /**
166
+ * 给定 platform + module,返回路径前缀数组(可能为空)。
167
+ */
168
+ export function resolveModulePaths(map, platform, moduleId) {
169
+ const p = (platform || "").toUpperCase();
170
+ const m = (moduleId || "").toLowerCase();
171
+ return map.platforms[p]?.[m] ?? [];
172
+ }
173
+ export function listModulesOfPlatform(map, platform) {
174
+ const p = (platform || "").toUpperCase();
175
+ return Object.keys(map.platforms[p] ?? {}).sort();
176
+ }
@@ -0,0 +1,254 @@
1
+ /**
2
+ * AOSP chunks 检索:vector / fts / hybrid 三模式 + platform/module 过滤。
3
+ *
4
+ * 与三源 search 共享 embedder 与 FTS5 设计,但单独存矩阵(aosp_chunks 通常远多于
5
+ * zmind_issues,可独立 invalidate)。
6
+ */
7
+ import { config } from "../config.js";
8
+ import { blobToVector, getDb } from "../db.js";
9
+ import { embedOne } from "../embedder.js";
10
+ import { loadModuleMap, resolveModulePaths } from "./module-map-loader.js";
11
+ const _cache = new Map();
12
+ function cacheKey(platform, module, modulePath) {
13
+ return `${(platform ?? "*").toUpperCase()}::${(module ?? "*").toLowerCase()}::${modulePath ?? ""}`;
14
+ }
15
+ export function invalidateAospIndex() {
16
+ _cache.clear();
17
+ }
18
+ function loadAospIndex(args) {
19
+ const key = cacheKey(args.platform, args.module, (args.modulePathPrefixes ?? []).join("|"));
20
+ const cached = _cache.get(key);
21
+ if (cached)
22
+ return cached;
23
+ const db = getDb();
24
+ const dim = config.embeddingDim;
25
+ const where = ["embedding IS NOT NULL"];
26
+ const params = [];
27
+ if (args.platform) {
28
+ where.push("platform = ?");
29
+ params.push(args.platform.toUpperCase());
30
+ }
31
+ if (args.module) {
32
+ where.push("module = ?");
33
+ params.push(args.module.toLowerCase());
34
+ }
35
+ if (args.modulePathPrefixes && args.modulePathPrefixes.length > 0) {
36
+ const ors = args.modulePathPrefixes.map(() => "module_path LIKE ?").join(" OR ");
37
+ where.push(`(${ors})`);
38
+ for (const p of args.modulePathPrefixes)
39
+ params.push(`${p}%`);
40
+ }
41
+ const rows = db
42
+ .prepare(`SELECT id, platform, module, module_path, file_path,
43
+ line_start, line_end, symbol_kind, symbol_name, content, embedding
44
+ FROM aosp_chunks
45
+ WHERE ${where.join(" AND ")}`)
46
+ .all(...params);
47
+ const matrix = new Float32Array(rows.length * dim);
48
+ const ids = [];
49
+ const meta = new Map();
50
+ rows.forEach((r, i) => {
51
+ if (!r.embedding)
52
+ return;
53
+ const vec = blobToVector(r.embedding, dim);
54
+ matrix.set(vec, i * dim);
55
+ ids.push(r.id);
56
+ meta.set(r.id, r);
57
+ });
58
+ const idx = { ids, matrix, count: rows.length, meta };
59
+ _cache.set(key, idx);
60
+ return idx;
61
+ }
62
+ // =============================================================================
63
+ // FTS 检索
64
+ // =============================================================================
65
+ function ftsSearch(args) {
66
+ const db = getDb();
67
+ const safeQ = escapeFts5(args.query);
68
+ if (!safeQ)
69
+ return [];
70
+ const where = [];
71
+ const params = [];
72
+ where.push("aosp_chunks_fts MATCH ?");
73
+ params.push(safeQ);
74
+ if (args.platform) {
75
+ where.push("a.platform = ?");
76
+ params.push(args.platform.toUpperCase());
77
+ }
78
+ if (args.module) {
79
+ where.push("a.module = ?");
80
+ params.push(args.module.toLowerCase());
81
+ }
82
+ if (args.modulePathPrefixes && args.modulePathPrefixes.length > 0) {
83
+ const ors = args.modulePathPrefixes.map(() => "a.module_path LIKE ?").join(" OR ");
84
+ where.push(`(${ors})`);
85
+ for (const p of args.modulePathPrefixes)
86
+ params.push(`${p}%`);
87
+ }
88
+ const sql = `
89
+ SELECT a.id, a.platform, a.module, a.module_path, a.file_path,
90
+ a.line_start, a.line_end, a.symbol_kind, a.symbol_name, a.content,
91
+ bm25(aosp_chunks_fts) AS bm
92
+ FROM aosp_chunks_fts
93
+ JOIN aosp_chunks a ON a.id = aosp_chunks_fts.rowid
94
+ WHERE ${where.join(" AND ")}
95
+ ORDER BY bm
96
+ LIMIT ?
97
+ `;
98
+ const rows = db.prepare(sql).all(...params, args.limit);
99
+ return rows.map((r) => ({ row: r, bm25: r.bm }));
100
+ }
101
+ function escapeFts5(q) {
102
+ const tokens = q
103
+ .replace(/["']/g, " ")
104
+ .split(/\s+/)
105
+ .filter((t) => t.length > 0);
106
+ if (tokens.length === 0)
107
+ return "";
108
+ return tokens.map((t) => `"${t}"`).join(" ");
109
+ }
110
+ function snippetOf(content) {
111
+ const clean = content.replace(/\s+/g, " ").trim();
112
+ return clean.length <= config.snippetMaxChars ? clean : clean.slice(0, config.snippetMaxChars).trimEnd() + "…";
113
+ }
114
+ function rowToHit(r, score, match) {
115
+ return {
116
+ id: r.id,
117
+ source: "aosp",
118
+ platform: r.platform,
119
+ module: r.module,
120
+ file_path: r.file_path,
121
+ line_start: r.line_start,
122
+ line_end: r.line_end,
123
+ symbol_kind: r.symbol_kind,
124
+ symbol_name: r.symbol_name,
125
+ snippet: snippetOf(r.content),
126
+ score,
127
+ match,
128
+ };
129
+ }
130
+ function normalizeFts(values) {
131
+ const map = new Map();
132
+ if (values.length === 0)
133
+ return map;
134
+ const min = Math.min(...values);
135
+ const max = Math.max(...values);
136
+ const span = max - min || 1;
137
+ values.forEach((v) => map.set(v, 1 - (v - min) / span));
138
+ return map;
139
+ }
140
+ // =============================================================================
141
+ // 公共入口
142
+ // =============================================================================
143
+ export async function searchAosp(args) {
144
+ const limit = Math.max(1, Math.min(config.searchMaxLimit, args.limit ?? config.searchDefaultLimit));
145
+ const mode = args.mode ?? "hybrid";
146
+ if (!args.query.trim()) {
147
+ return {
148
+ source: "aosp",
149
+ query: args.query,
150
+ mode,
151
+ hits: [],
152
+ filter: {},
153
+ };
154
+ }
155
+ // 把 module 翻译成 module_path 前缀
156
+ let modulePathPrefixes;
157
+ if (args.module_path) {
158
+ modulePathPrefixes = [args.module_path];
159
+ }
160
+ else if (args.module && args.platform) {
161
+ try {
162
+ const map = await loadModuleMap();
163
+ const paths = resolveModulePaths(map, args.platform, args.module);
164
+ if (paths.length > 0)
165
+ modulePathPrefixes = paths;
166
+ }
167
+ catch {
168
+ /* fallback to no prefix filter */
169
+ }
170
+ }
171
+ // FTS 部分
172
+ let ftsHits = [];
173
+ if (mode === "fts" || mode === "hybrid") {
174
+ ftsHits = ftsSearch({
175
+ query: args.query,
176
+ platform: args.platform,
177
+ module: args.module,
178
+ modulePathPrefixes,
179
+ limit: limit * 2,
180
+ });
181
+ }
182
+ // Vector 部分
183
+ let vectorHits = [];
184
+ if (mode === "vector" || mode === "hybrid") {
185
+ try {
186
+ const vec = await embedOne(args.query);
187
+ const idx = loadAospIndex({ platform: args.platform, module: args.module, modulePathPrefixes });
188
+ if (idx.count > 0) {
189
+ const dim = config.embeddingDim;
190
+ const scores = new Float32Array(idx.count);
191
+ for (let i = 0; i < idx.count; i++) {
192
+ let s = 0;
193
+ const off = i * dim;
194
+ for (let j = 0; j < dim; j++)
195
+ s += idx.matrix[off + j] * vec[j];
196
+ scores[i] = s;
197
+ }
198
+ const indices = Array.from({ length: idx.count }, (_, k) => k).sort((a, b) => scores[b] - scores[a]);
199
+ for (let i = 0; i < Math.min(limit * 2, indices.length); i++) {
200
+ const id = idx.ids[indices[i]];
201
+ const row = idx.meta.get(id);
202
+ if (row)
203
+ vectorHits.push({ row, score: scores[indices[i]] });
204
+ }
205
+ }
206
+ }
207
+ catch (e) {
208
+ if (mode === "vector")
209
+ throw e;
210
+ }
211
+ }
212
+ let hits;
213
+ if (mode === "vector") {
214
+ hits = vectorHits.map((h) => rowToHit(h.row, h.score, "vector")).slice(0, limit);
215
+ }
216
+ else if (mode === "fts") {
217
+ const norm = normalizeFts(ftsHits.map((h) => h.bm25));
218
+ hits = ftsHits
219
+ .map((h) => rowToHit(h.row, norm.get(h.bm25) ?? 0, "fts"))
220
+ .slice(0, limit);
221
+ }
222
+ else {
223
+ // hybrid 合并
224
+ const merged = new Map();
225
+ for (const h of vectorHits) {
226
+ merged.set(h.row.id, rowToHit(h.row, h.score, "vector"));
227
+ }
228
+ const norm = normalizeFts(ftsHits.map((h) => h.bm25));
229
+ for (const h of ftsHits) {
230
+ const ftsScore = norm.get(h.bm25) ?? 0;
231
+ const existed = merged.get(h.row.id);
232
+ if (existed) {
233
+ merged.set(h.row.id, { ...existed, score: Math.max(existed.score, ftsScore), match: "both" });
234
+ }
235
+ else {
236
+ merged.set(h.row.id, rowToHit(h.row, ftsScore, "fts"));
237
+ }
238
+ }
239
+ hits = Array.from(merged.values())
240
+ .sort((a, b) => b.score - a.score)
241
+ .slice(0, limit);
242
+ }
243
+ return {
244
+ source: "aosp",
245
+ query: args.query,
246
+ mode,
247
+ hits,
248
+ filter: {
249
+ platform: args.platform,
250
+ module: args.module,
251
+ module_paths: modulePathPrefixes,
252
+ },
253
+ };
254
+ }
package/dist/config.js ADDED
@@ -0,0 +1,39 @@
1
+ /**
2
+ * 全局配置读取(从环境变量)。
3
+ *
4
+ * 路径与限制集中放这里,避免散落在各模块。
5
+ */
6
+ import * as path from "node:path";
7
+ import * as os from "node:os";
8
+ function envInt(key, fallback) {
9
+ const raw = (process.env[key] ?? "").trim();
10
+ if (!/^\d+$/.test(raw))
11
+ return fallback;
12
+ const n = parseInt(raw, 10);
13
+ return Number.isFinite(n) && n > 0 ? n : fallback;
14
+ }
15
+ export const config = {
16
+ /** SQLite 主库路径 */
17
+ dbPath: (process.env.KNOWLEDGE_DB_PATH ?? path.resolve("./data/knowledge.db")).trim(),
18
+ /** ONNX 嵌入模型缓存目录 */
19
+ modelCacheDir: (process.env.KNOWLEDGE_MODEL_CACHE_DIR ?? path.resolve("./data/models")).trim(),
20
+ /** 嵌入模型 ID(默认 BGE-small-zh ONNX,dim=512) */
21
+ embeddingModelId: (process.env.KNOWLEDGE_EMBEDDING_MODEL ?? "Xenova/bge-small-zh-v1.5").trim(),
22
+ /** 嵌入向量维度(必须与模型一致;BGE-small-zh-v1.5 是 512) */
23
+ embeddingDim: envInt("KNOWLEDGE_EMBEDDING_DIM", 512),
24
+ /** 嵌入 batch 大小 */
25
+ embeddingBatchSize: envInt("KNOWLEDGE_EMBEDDING_BATCH", 32),
26
+ /** ONNX runtime 线程数(防止低配机吃光) */
27
+ embeddingThreads: envInt("KNOWLEDGE_EMBEDDING_THREADS", Math.max(1, Math.min(4, Math.floor((os.cpus()?.length ?? 4) / 2)))),
28
+ /** 单条文本嵌入前的最大字符长度(防 token 超 BGE 512 上限) */
29
+ maxTextChars: envInt("KNOWLEDGE_MAX_TEXT_CHARS", 1800),
30
+ /** search_local 的默认 limit */
31
+ searchDefaultLimit: 5,
32
+ /** search_local 的 limit 上限 */
33
+ searchMaxLimit: envInt("KNOWLEDGE_SEARCH_MAX_LIMIT", 20),
34
+ /** snippet 截断字符数 */
35
+ snippetMaxChars: envInt("KNOWLEDGE_SNIPPET_MAX_CHARS", 320),
36
+ };
37
+ export function isSourceName(s) {
38
+ return s === "zmind" || s === "gerrit" || s === "confluence";
39
+ }