mn-docs-mcp 0.5.2 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -24,11 +24,30 @@ pnpm preview # 预览构建结果
24
24
 
25
25
  ## 本地MCP搜索
26
26
 
27
- 本项目内置一个本地MCPServer,支持stdio与HTTPStream两种方式,返回纯文本片段,适合AI直接调用。
27
+ 本项目内置一个本地MCPServer,支持stdio与HTTPStream两种方式,面向AI开发问答提供“两步检索”工作流:先发现相关文档,再按需读取全文。
28
28
 
29
29
  embedding模型使用本地BGE-small-zh-v1.5(ONNX),首次启动会自动下载到transformers.js默认缓存目录。模型文件约95.8MB,向量维度为512。
30
30
  模型下载使用镜像https://hf-mirror.com
31
31
 
32
+ ### 工具设计
33
+
34
+ - `discover_docs`
35
+ - 用于第一步检索。
36
+ - 支持`hybrid`、`keyword`、`semantic`三种模式。
37
+ - 返回按文档聚合的结果:`doc_id`、`title`、`url`、`summary`、`matched_by`、`snippets[]`。
38
+ - 适合回答“先找到该看哪篇文档”。
39
+
40
+ - `read_doc`
41
+ - 用于第二步读取全文。
42
+ - 支持通过`doc_id`、`slug`或`url`读取指定文档。
43
+ - 返回完整文档内容与章节标题,适合继续回答“完整字段有哪些”“完整API是什么”“示例代码在哪里”。
44
+
45
+ ### 推荐调用顺序
46
+
47
+ 1. 先调用`discover_docs`定位最相关文档。
48
+ 2. 若结果里已出现明确目标文档,再调用`read_doc`读取整篇文档。
49
+ 3. 当问题涉及字段、方法、返回值、完整API或完整示例时,不要只依赖片段,应该继续读取全文。
50
+
32
51
  ### 快速开始(npx)
33
52
 
34
53
  ### MCP配置示例(npx)
package/mcp/lib.mjs CHANGED
@@ -11,6 +11,38 @@ const __dirname = path.dirname(__filename);
11
11
 
12
12
  const DEFAULT_ROOT = path.resolve(__dirname, '..');
13
13
 
14
+ const MODEL_ID = 'Xenova/bge-small-zh-v1.5';
15
+ const MODEL_DIM = 512;
16
+ const INDEX_VERSION = 2;
17
+ const MAX_EXTRACTOR_RETRIES = 3;
18
+
19
+ const QUERY_SYNONYMS = {
20
+ mn: ['marginnote'],
21
+ marginnote: ['mn'],
22
+ 卡片: ['笔记', '脑图节点'],
23
+ 笔记: ['卡片'],
24
+ 字段: ['属性'],
25
+ 属性: ['字段'],
26
+ 方法: ['函数'],
27
+ comment: ['comments', '评论'],
28
+ comments: ['comment', '评论'],
29
+ markdown: ['md'],
30
+ };
31
+
32
+ const DOC_ALIAS_HINTS = {
33
+ MbBookNote: ['笔记', '卡片', '脑图节点', 'mn卡片', '笔记对象'],
34
+ Note: ['创建笔记', '新建笔记', '笔记工厂'],
35
+ MbTopic: ['笔记本', '脑图', '卡片组'],
36
+ MbBook: ['文档', '书本', '书籍'],
37
+ };
38
+
39
+ let extractorPromise;
40
+ let proxyInitialized = false;
41
+ const IS_STDIO = process.env.MCP_STDIO === '1';
42
+ const IS_SILENT = process.env.MCP_SILENT === '1';
43
+ const NO_COLOR = process.env.MCP_NO_COLOR === '1';
44
+ let lastDownloadProgress = -1;
45
+
14
46
  function resolveRootDir() {
15
47
  const envRoot = (process.env.MN_DOCS_ROOT || '').trim();
16
48
  if (envRoot && fsSyncExists(path.join(envRoot, 'src', 'content', 'docs'))) return envRoot;
@@ -35,16 +67,6 @@ const DOCS_DIR = path.join(ROOT_DIR, 'src', 'content', 'docs');
35
67
  const MCP_DIR = path.join(ROOT_DIR, '.mcp');
36
68
  const INDEX_PATH = path.join(MCP_DIR, 'index.json');
37
69
 
38
- const MODEL_ID = 'Xenova/bge-small-zh-v1.5';
39
- const MODEL_DIM = 512;
40
- let extractorPromise;
41
- let proxyInitialized = false;
42
- const MAX_EXTRACTOR_RETRIES = 3;
43
- const IS_STDIO = process.env.MCP_STDIO === '1';
44
- const IS_SILENT = process.env.MCP_SILENT === '1';
45
- const NO_COLOR = process.env.MCP_NO_COLOR === '1';
46
- let lastDownloadProgress = -1;
47
-
48
70
  function logInfo(message) {
49
71
  if (IS_SILENT) return;
50
72
  if (IS_STDIO) {
@@ -74,12 +96,11 @@ function formatBytes(bytes) {
74
96
  function logDownloadProgress(info) {
75
97
  if (IS_SILENT) return;
76
98
  if (info?.status === 'download') {
77
- // 清除当前行(如果之前有内容)
78
99
  if (IS_STDIO) {
79
- process.stderr.write('\r\x1b[K'); // 清除整行
100
+ process.stderr.write('\r\x1b[K');
80
101
  process.stderr.write(color('开始下载模型...', '38;5;45') + '\n');
81
102
  } else {
82
- process.stdout.write('\r\x1b[K'); // 清除整行
103
+ process.stdout.write('\r\x1b[K');
83
104
  console.log(color('开始下载模型...', '38;5;45'));
84
105
  }
85
106
  lastDownloadProgress = -1;
@@ -94,7 +115,7 @@ function logDownloadProgress(info) {
94
115
  const suffix = loaded && total ? ` ${loaded}/${total}` : '';
95
116
  const line = `${color('模型下载进度', '38;5;45')}: ${pct}%${suffix}`;
96
117
  if (IS_STDIO) {
97
- process.stderr.write(`\r\x1b[K${line}`); // \x1b[K 清除从光标到行尾的内容
118
+ process.stderr.write(`\r\x1b[K${line}`);
98
119
  if (pct === 100) process.stderr.write('\n');
99
120
  } else {
100
121
  process.stdout.write(`\r\x1b[K${line}`);
@@ -109,8 +130,7 @@ function setupProxy() {
109
130
  const proxyUrl = (process.env.HTTPS_PROXY || process.env.HTTP_PROXY || process.env.ALL_PROXY || '').trim();
110
131
  if (!proxyUrl) return;
111
132
  try {
112
- const dispatcher = new ProxyAgent(proxyUrl);
113
- setGlobalDispatcher(dispatcher);
133
+ setGlobalDispatcher(new ProxyAgent(proxyUrl));
114
134
  } catch {
115
135
  setGlobalDispatcher(new Agent());
116
136
  }
@@ -119,21 +139,14 @@ function setupProxy() {
119
139
  async function getExtractor() {
120
140
  if (extractorPromise) return extractorPromise;
121
141
  setupProxy();
122
-
123
- // 抑制 Hugging Face Transformers 的警告输出
142
+
124
143
  env.allowRemoteModels = true;
125
- env.disableProgressBars = true; // 禁用库自带的进度条
126
- env.disableSymlinksWarning = true; // 禁用符号链接警告
144
+ env.disableProgressBars = true;
145
+ env.disableSymlinksWarning = true;
127
146
  env.remoteHost = 'https://hf-mirror.com';
128
-
129
- // 设置日志级别为 error,避免 info/warning 级别日志干扰
130
- if (!process.env.LOG_LEVEL) {
131
- process.env.LOG_LEVEL = 'error';
132
- }
133
-
134
- const modelDir = env.cacheDir
135
- ? path.join(env.cacheDir, 'Xenova', 'bge-small-zh-v1.5')
136
- : null;
147
+ if (!process.env.LOG_LEVEL) process.env.LOG_LEVEL = 'error';
148
+
149
+ const modelDir = env.cacheDir ? path.join(env.cacheDir, 'Xenova', 'bge-small-zh-v1.5') : null;
137
150
  const create = async () =>
138
151
  pipeline('feature-extraction', MODEL_ID, {
139
152
  progress_callback: logDownloadProgress,
@@ -151,16 +164,11 @@ async function getExtractor() {
151
164
  message.includes('fetch failed') ||
152
165
  message.includes('ConnectTimeoutError');
153
166
 
154
- if (!shouldRetry || attempt === MAX_EXTRACTOR_RETRIES) {
155
- throw error;
156
- }
167
+ if (!shouldRetry || attempt === MAX_EXTRACTOR_RETRIES) throw error;
157
168
 
158
- // 清除上次的进度状态,为重试做准备
159
169
  lastDownloadProgress = -1;
160
170
  logInfo(`模型下载失败,准备重试(${attempt}/${MAX_EXTRACTOR_RETRIES})...`);
161
- if (modelDir) {
162
- await fs.rm(modelDir, { recursive: true, force: true });
163
- }
171
+ if (modelDir) await fs.rm(modelDir, { recursive: true, force: true });
164
172
  }
165
173
  }
166
174
  throw new Error('模型加载失败');
@@ -239,8 +247,69 @@ async function walkFiles(dir) {
239
247
  return results;
240
248
  }
241
249
 
242
- function makeId(slug, index) {
243
- return `${slug}::${index}`;
250
+ function makeDocId(slug) {
251
+ return slug;
252
+ }
253
+
254
+ function makeChunkId(docId, index) {
255
+ return `${docId}::${index}`;
256
+ }
257
+
258
+ function uniqueList(values) {
259
+ const set = new Set();
260
+ for (const value of values) {
261
+ const normalized = normalizeWhitespace(String(value || ''));
262
+ if (!normalized) continue;
263
+ set.add(normalized);
264
+ }
265
+ return [...set];
266
+ }
267
+
268
+ function normalizeForMatch(text) {
269
+ return normalizeWhitespace(String(text || '').toLowerCase())
270
+ .replace(/[`"'“”‘’()[\]{}:;,.!?/\\|<>+=_*&#%-]+/g, ' ')
271
+ .trim();
272
+ }
273
+
274
+ function splitIdentifierWords(text) {
275
+ const value = String(text || '')
276
+ .replace(/([a-z0-9])([A-Z])/g, '$1 $2')
277
+ .replace(/[_/-]+/g, ' ');
278
+ return uniqueList(value.split(/\s+/));
279
+ }
280
+
281
+ function tokenize(text) {
282
+ const normalized = normalizeForMatch(text);
283
+ if (!normalized) return [];
284
+ const matches = normalized.match(/[a-z0-9]+|[\p{Script=Han}]+/gu);
285
+ if (!matches) return [];
286
+ const tokens = [];
287
+ for (const match of matches) {
288
+ tokens.push(match);
289
+ if (/^[\p{Script=Han}]+$/u.test(match) && match.length >= 2) {
290
+ for (let i = 0; i < match.length - 1; i += 1) {
291
+ tokens.push(match.slice(i, i + 2));
292
+ }
293
+ }
294
+ }
295
+ return uniqueList(tokens);
296
+ }
297
+
298
+ function buildAliasCandidates({ title, slug, description, headings, plainText }) {
299
+ const slugTail = slug.split('/').pop() || slug;
300
+ const firstSentence = plainText.split(/[。!?.!?]/)[0] || '';
301
+ const aliases = [
302
+ title,
303
+ description,
304
+ slugTail,
305
+ slugTail.replace(/-/g, ' '),
306
+ ...splitIdentifierWords(title),
307
+ ...splitIdentifierWords(slugTail),
308
+ ...headings.slice(0, 6),
309
+ firstSentence,
310
+ ...(DOC_ALIAS_HINTS[title] || []),
311
+ ];
312
+ return uniqueList(aliases);
244
313
  }
245
314
 
246
315
  async function embedText(text) {
@@ -257,9 +326,9 @@ export async function buildIndex() {
257
326
  await fs.mkdir(MCP_DIR, { recursive: true });
258
327
 
259
328
  const files = await walkFiles(DOCS_DIR);
260
- const docs = [];
329
+ const documents = [];
330
+ const chunks = [];
261
331
  const tasks = [];
262
- let counter = 0;
263
332
 
264
333
  for (const file of files) {
265
334
  const rel = path.relative(DOCS_DIR, file).replace(/\\/g, '/');
@@ -268,20 +337,44 @@ export async function buildIndex() {
268
337
  const parsed = matter(raw);
269
338
  const frontmatterTitle = typeof parsed.data?.title === 'string' ? parsed.data.title.trim() : '';
270
339
  const frontmatterSlug = typeof parsed.data?.slug === 'string' ? parsed.data.slug.trim() : '';
271
- const content = stripMarkdown(parsed.content);
272
- const chunks = splitByHeadingAndParagraph(content);
273
- const pageTitle = frontmatterTitle || (chunks[0]?.heading || slug.split('/').pop() || slug);
274
- const url = slugToUrl(frontmatterSlug || slug);
340
+ const frontmatterDescription =
341
+ typeof parsed.data?.description === 'string' ? parsed.data.description.trim() : '';
342
+ const rawMarkdown = parsed.content.trim();
343
+ const plainText = stripMarkdown(parsed.content);
344
+ const chunkEntries = splitByHeadingAndParagraph(rawMarkdown);
345
+ const pageTitle = frontmatterTitle || (chunkEntries[0]?.heading || slug.split('/').pop() || slug);
346
+ const finalSlug = frontmatterSlug || slug;
347
+ const url = slugToUrl(finalSlug);
348
+ const headings = uniqueList(chunkEntries.map((chunk) => chunk.heading).filter(Boolean));
349
+ const docId = makeDocId(finalSlug);
350
+ const aliases = buildAliasCandidates({
351
+ title: pageTitle,
352
+ slug: finalSlug,
353
+ description: frontmatterDescription,
354
+ headings,
355
+ plainText,
356
+ });
357
+
358
+ documents.push({
359
+ doc_id: docId,
360
+ title: pageTitle,
361
+ slug: finalSlug,
362
+ url,
363
+ description: frontmatterDescription,
364
+ aliases,
365
+ headings,
366
+ raw_markdown: rawMarkdown,
367
+ plain_text: plainText,
368
+ });
275
369
 
276
- for (const chunk of chunks) {
370
+ chunkEntries.forEach((chunk, index) => {
277
371
  tasks.push({
278
- id: makeId(slug, counter++),
279
- url,
280
- title: pageTitle,
372
+ chunk_id: makeChunkId(docId, index),
373
+ doc_id: docId,
281
374
  section: chunk.heading,
282
375
  text: chunk.text,
283
376
  });
284
- }
377
+ });
285
378
  }
286
379
 
287
380
  const total = tasks.length;
@@ -300,14 +393,12 @@ export async function buildIndex() {
300
393
 
301
394
  for (const task of tasks) {
302
395
  const embedding = await embedText(task.text);
303
- docs.push({ ...task, embedding });
396
+ chunks.push({ ...task, embedding });
304
397
  done += 1;
305
398
  renderProgress(false);
306
- // 让出事件循环,避免长时间阻塞MCP握手/请求处理
307
- if (done % 10 === 0) {
308
- await new Promise((resolve) => setImmediate(resolve));
309
- }
399
+ if (done % 10 === 0) await new Promise((resolve) => setImmediate(resolve));
310
400
  }
401
+
311
402
  if (IS_STDIO ? process.stderr.isTTY : process.stdout.isTTY) {
312
403
  const stream = IS_STDIO ? process.stderr : process.stdout;
313
404
  stream.write(`\r索引构建完成:${done}/${total}\n`);
@@ -316,26 +407,34 @@ export async function buildIndex() {
316
407
  }
317
408
 
318
409
  const payload = {
319
- version: 1,
410
+ version: INDEX_VERSION,
320
411
  generatedAt: new Date().toISOString(),
321
412
  source: {
322
413
  root: 'src/content/docs',
323
- split: 'heading+paragraph',
414
+ split: 'document+heading+paragraph',
324
415
  model: MODEL_ID,
325
416
  dim: MODEL_DIM,
326
417
  },
327
- docs,
418
+ documents,
419
+ chunks,
328
420
  };
329
421
  await fs.writeFile(INDEX_PATH, JSON.stringify(payload, null, 2));
330
- return { count: docs.length, path: INDEX_PATH };
422
+ return {
423
+ documentCount: documents.length,
424
+ chunkCount: chunks.length,
425
+ path: INDEX_PATH,
426
+ };
331
427
  }
332
428
 
333
429
  export async function loadIndex() {
334
430
  const { INDEX_PATH } = getPaths();
335
431
  const raw = await fs.readFile(INDEX_PATH, 'utf-8');
336
432
  const data = JSON.parse(raw);
337
- if (!Array.isArray(data?.docs)) {
338
- throw new Error('索引文件格式错误,未找到docs数组');
433
+ if (data?.version !== INDEX_VERSION) {
434
+ throw new Error('索引版本过旧,需要重建');
435
+ }
436
+ if (!Array.isArray(data?.documents) || !Array.isArray(data?.chunks)) {
437
+ throw new Error('索引文件格式错误,未找到documents或chunks数组');
339
438
  }
340
439
  return data;
341
440
  }
@@ -363,7 +462,7 @@ function cosineSimilarity(a, b) {
363
462
  let dot = 0;
364
463
  let normA = 0;
365
464
  let normB = 0;
366
- for (let i = 0; i < a.length; i++) {
465
+ for (let i = 0; i < a.length; i += 1) {
367
466
  dot += a[i] * b[i];
368
467
  normA += a[i] * a[i];
369
468
  normB += b[i] * b[i];
@@ -371,15 +470,221 @@ function cosineSimilarity(a, b) {
371
470
  return dot / (Math.sqrt(normA) * Math.sqrt(normB) || 1);
372
471
  }
373
472
 
374
- export async function searchDocs(query, topK = 5) {
473
+ function expandQueryTerms(query) {
474
+ const normalizedQuery = normalizeForMatch(query);
475
+ const baseTerms = tokenize(query);
476
+ const expanded = new Set(baseTerms);
477
+ for (const key of Object.keys(QUERY_SYNONYMS)) {
478
+ if (normalizedQuery.includes(normalizeForMatch(key))) {
479
+ expanded.add(key);
480
+ }
481
+ }
482
+ for (const term of baseTerms) {
483
+ for (const synonym of QUERY_SYNONYMS[term] || []) {
484
+ expanded.add(synonym);
485
+ }
486
+ }
487
+ return [...expanded];
488
+ }
489
+
490
+ function countContains(text, terms) {
491
+ const normalized = normalizeForMatch(text);
492
+ if (!normalized) return 0;
493
+ let count = 0;
494
+ for (const term of terms) {
495
+ if (normalized.includes(normalizeForMatch(term))) count += 1;
496
+ }
497
+ return count;
498
+ }
499
+
500
+ function makeSnippetSummary(text, maxLength = 180) {
501
+ const compact = normalizeWhitespace(text);
502
+ if (compact.length <= maxLength) return compact;
503
+ return `${compact.slice(0, maxLength - 1)}...`;
504
+ }
505
+
506
+ function scoreDocument(doc, query, terms) {
507
+ const title = normalizeForMatch(doc.title);
508
+ const slug = normalizeForMatch(doc.slug);
509
+ const url = normalizeForMatch(doc.url);
510
+ const aliasText = normalizeForMatch(doc.aliases.join(' '));
511
+ const headingText = normalizeForMatch(doc.headings.join(' '));
512
+ const bodyText = normalizeForMatch(doc.plain_text);
513
+ const exactQuery = normalizeForMatch(query);
514
+ let score = 0;
515
+ const matchedBy = new Set();
516
+
517
+ if (exactQuery && (title === exactQuery || slug === exactQuery || url === exactQuery)) {
518
+ score += 12;
519
+ matchedBy.add('title_exact');
520
+ }
521
+
522
+ for (const alias of doc.aliases) {
523
+ if (normalizeForMatch(alias) === exactQuery && exactQuery) {
524
+ score += 10;
525
+ matchedBy.add('alias_match');
526
+ break;
527
+ }
528
+ }
529
+
530
+ if (exactQuery && slug.includes(exactQuery)) {
531
+ score += 6;
532
+ matchedBy.add('slug_match');
533
+ }
534
+ if (exactQuery && title.includes(exactQuery) && title !== exactQuery) {
535
+ score += 5;
536
+ matchedBy.add('title_match');
537
+ }
538
+ if (exactQuery && aliasText.includes(exactQuery)) {
539
+ score += 4;
540
+ matchedBy.add('alias_match');
541
+ }
542
+
543
+ const titleHits = countContains(doc.title, terms);
544
+ const slugHits = countContains(doc.slug, terms);
545
+ const aliasHits = countContains(doc.aliases.join(' '), terms);
546
+ const headingHits = countContains(doc.headings.join(' '), terms);
547
+ const bodyHits = countContains(doc.plain_text, terms);
548
+
549
+ if (titleHits > 0) matchedBy.add('title_match');
550
+ if (slugHits > 0) matchedBy.add('slug_match');
551
+ if (aliasHits > 0) matchedBy.add('alias_match');
552
+ if (bodyHits > 0) matchedBy.add('keyword_body');
553
+
554
+ score += titleHits * 2.8;
555
+ score += slugHits * 2.4;
556
+ score += aliasHits * 2.2;
557
+ score += headingHits * 1.4;
558
+ score += Math.min(bodyHits, 6) * 0.8;
559
+
560
+ if (/^[a-z][a-z0-9]+(?:[A-Z][a-z0-9]+)+$/.test(query.trim()) && doc.title === query.trim()) {
561
+ score += 8;
562
+ matchedBy.add('title_exact');
563
+ }
564
+
565
+ return { score, matchedBy: [...matchedBy] };
566
+ }
567
+
568
+ function scoreChunk(chunk, terms, queryEmbedding) {
569
+ const keywordHits = countContains(chunk.text, terms) + countContains(chunk.section, terms) * 0.8;
570
+ let score = keywordHits * 1.1;
571
+ let semanticScore = null;
572
+ if (queryEmbedding) {
573
+ semanticScore = cosineSimilarity(queryEmbedding, chunk.embedding);
574
+ score += Math.max(semanticScore, 0) * 4;
575
+ }
576
+ return {
577
+ score,
578
+ semanticScore,
579
+ };
580
+ }
581
+
582
+ function buildDocSummary(snippets) {
583
+ if (!snippets.length) return '';
584
+ const joined = snippets
585
+ .slice(0, 2)
586
+ .map((snippet) => snippet.text)
587
+ .join(' ');
588
+ return makeSnippetSummary(joined, 220);
589
+ }
590
+
591
+ export async function discoverDocs(query, options = {}) {
592
+ const trimmedQuery = normalizeWhitespace(query || '');
593
+ if (!trimmedQuery) throw new Error('query不能为空');
594
+
595
+ const topK = Number(options.topK || 5);
596
+ const mode = ['hybrid', 'keyword', 'semantic'].includes(options.mode) ? options.mode : 'hybrid';
375
597
  const index = await loadIndex();
376
- const queryEmbedding = await embedText(query);
598
+ const terms = expandQueryTerms(trimmedQuery);
599
+ const queryEmbedding = mode === 'keyword' ? null : await embedText(trimmedQuery);
600
+ const chunkMap = new Map();
601
+
602
+ for (const chunk of index.chunks) {
603
+ const result = scoreChunk(chunk, terms, mode === 'semantic' || mode === 'hybrid' ? queryEmbedding : null);
604
+ const list = chunkMap.get(chunk.doc_id) || [];
605
+ list.push({
606
+ section: chunk.section || '',
607
+ text: chunk.text,
608
+ score: result.score,
609
+ semanticScore: result.semanticScore,
610
+ });
611
+ chunkMap.set(chunk.doc_id, list);
612
+ }
613
+
614
+ const results = index.documents
615
+ .map((doc) => {
616
+ const docScore = scoreDocument(doc, trimmedQuery, terms);
617
+ const scoredChunks = (chunkMap.get(doc.doc_id) || [])
618
+ .filter((item) => item.score > 0 || item.semanticScore === null || item.semanticScore > 0.18)
619
+ .sort((a, b) => b.score - a.score);
620
+
621
+ const bestChunk = scoredChunks[0];
622
+ let score = docScore.score;
623
+ if (bestChunk) {
624
+ score += bestChunk.score;
625
+ if (bestChunk.semanticScore && bestChunk.semanticScore > 0.25) {
626
+ docScore.matchedBy.push('semantic');
627
+ }
628
+ }
629
+ if (mode === 'semantic' && bestChunk?.semanticScore != null) {
630
+ score += Math.max(bestChunk.semanticScore, 0) * 3;
631
+ }
632
+
633
+ const snippets = scoredChunks.slice(0, 3).map((item) => ({
634
+ section: item.section,
635
+ text: makeSnippetSummary(item.text, 260),
636
+ score: Number(item.score.toFixed(4)),
637
+ }));
638
+
639
+ return {
640
+ doc_id: doc.doc_id,
641
+ title: doc.title,
642
+ url: doc.url,
643
+ score,
644
+ summary: buildDocSummary(snippets),
645
+ matched_by: uniqueList(docScore.matchedBy),
646
+ snippets,
647
+ };
648
+ })
649
+ .filter((doc) => doc.score > 0)
650
+ .sort((a, b) => b.score - a.score)
651
+ .slice(0, topK)
652
+ .map((doc) => ({
653
+ ...doc,
654
+ score: Number(doc.score.toFixed(4)),
655
+ }));
656
+
657
+ return {
658
+ query: trimmedQuery,
659
+ mode,
660
+ results,
661
+ };
662
+ }
377
663
 
378
- const scored = index.docs.map((doc) => ({
379
- text: doc.text,
380
- score: cosineSimilarity(queryEmbedding, doc.embedding),
381
- }));
664
+ function findDocument(index, identifier) {
665
+ const docId = normalizeWhitespace(identifier.doc_id || '');
666
+ const slug = normalizeWhitespace(identifier.slug || '');
667
+ const url = normalizeWhitespace(identifier.url || '');
382
668
 
383
- scored.sort((a, b) => b.score - a.score);
384
- return scored.slice(0, topK).map((item) => item.text);
669
+ return index.documents.find((doc) => {
670
+ if (docId && doc.doc_id === docId) return true;
671
+ if (slug && doc.slug === slug) return true;
672
+ if (url && doc.url === url) return true;
673
+ return false;
674
+ });
675
+ }
676
+
677
+ export async function readDoc(identifier = {}) {
678
+ const index = await loadIndex();
679
+ const doc = findDocument(index, identifier);
680
+ if (!doc) {
681
+ throw new Error('未找到匹配的文档,请提供有效的doc_id、slug或url');
682
+ }
683
+ return {
684
+ doc_id: doc.doc_id,
685
+ title: doc.title,
686
+ url: doc.url,
687
+ headings: doc.headings,
688
+ content: doc.raw_markdown,
689
+ };
385
690
  }
@@ -1,8 +1,9 @@
1
1
  import { FastMCP } from 'fastmcp';
2
2
  import { z } from 'zod';
3
- import { buildIndex, getPaths, isIndexStale, loadIndex, searchDocs } from './lib.mjs';
3
+ import { buildIndex, discoverDocs, getPaths, isIndexStale, loadIndex, readDoc } from './lib.mjs';
4
4
 
5
- const TOOL_NAME = 'search_docs';
5
+ const DISCOVER_TOOL_NAME = 'discover_docs';
6
+ const READ_TOOL_NAME = 'read_doc';
6
7
  const PORT = Number(process.env.MCP_HTTP_PORT || 8788);
7
8
  const IS_SILENT = process.env.MCP_SILENT === '1';
8
9
  const NO_COLOR = process.env.MCP_NO_COLOR === '1';
@@ -72,7 +73,7 @@ async function ensureIndex() {
72
73
  await buildIndex();
73
74
  }
74
75
  } catch {
75
- console.error(`未找到索引,开始重建:${INDEX_PATH}`);
76
+ console.error(`未找到可用索引,开始重建:${INDEX_PATH}`);
76
77
  await buildIndex();
77
78
  }
78
79
  }
@@ -87,32 +88,100 @@ function initIndexInBackground() {
87
88
  return initPromise;
88
89
  }
89
90
 
91
+ async function ensureReady() {
92
+ if (initPromise) {
93
+ await initPromise;
94
+ } else {
95
+ await initIndexInBackground();
96
+ }
97
+ }
98
+
99
+ function renderJsonPayload(payload) {
100
+ return {
101
+ content: [
102
+ {
103
+ type: 'text',
104
+ text: JSON.stringify(payload, null, 2),
105
+ },
106
+ ],
107
+ };
108
+ }
109
+
110
+ function renderError(message) {
111
+ return {
112
+ content: [{ type: 'text', text: message }],
113
+ isError: true,
114
+ };
115
+ }
116
+
90
117
  const server = new FastMCP({
91
118
  name: 'marginnote-docs-mcp',
92
119
  version: '0.1.0',
93
120
  });
94
121
 
95
122
  server.addTool({
96
- name: TOOL_NAME,
97
- description: '在本地文档索引中检索相关文本片段',
123
+ name: DISCOVER_TOOL_NAME,
124
+ description:
125
+ [
126
+ '发现与当前问题最相关的MarginNote文档。这个工具适合做第一步检索:先找对文档,再决定是否读取全文。',
127
+ '推荐用法:当用户问某个类、对象、字段、方法、返回值、示例、完整API时,先调用discover_docs。',
128
+ '如果结果已经出现明确目标文档,再调用read_doc读取整篇文档,不要只依赖片段回答“字段有哪些”“完整API是什么”。',
129
+ '当query中包含类名、方法名、属性名时,优先使用mode=hybrid或mode=keyword。',
130
+ '返回结果按文档聚合,每项包含doc_id、title、url、summary、matched_by和snippets,便于继续跳转。',
131
+ ].join('\n'),
98
132
  parameters: z.object({
99
- query: z.string().describe('检索关键词或问题'),
100
- top_k: z.number().optional().describe('返回片段数量'),
133
+ query: z.string().describe('用户的问题、关键词或API名,例如“mn卡片字段”“MbBookNote comments”“创建新笔记的方法”'),
134
+ top_k: z
135
+ .number()
136
+ .int()
137
+ .min(1)
138
+ .max(20)
139
+ .optional()
140
+ .describe('返回文档数量,默认5。通常3到8足够。'),
141
+ mode: z
142
+ .enum(['hybrid', 'keyword', 'semantic'])
143
+ .optional()
144
+ .describe('检索模式。默认hybrid;keyword适合精确API名;semantic适合自然语言描述。'),
101
145
  }),
102
- execute: async ({ query, top_k }) => {
103
- const topK = Number(top_k || 5);
104
- if (!query.trim()) {
105
- return { content: [{ type: 'text', text: 'query不能为空' }] };
146
+ execute: async ({ query, top_k, mode }) => {
147
+ try {
148
+ await ensureReady();
149
+ const payload = await discoverDocs(query, {
150
+ topK: top_k,
151
+ mode,
152
+ });
153
+ return renderJsonPayload(payload);
154
+ } catch (error) {
155
+ return renderError(error?.message || 'discover_docs执行失败');
106
156
  }
107
- if (initPromise) {
108
- await initPromise;
109
- } else {
110
- await initIndexInBackground();
157
+ },
158
+ });
159
+
160
+ server.addTool({
161
+ name: READ_TOOL_NAME,
162
+ description:
163
+ [
164
+ '读取某篇MarginNote文档的全文。这个工具适合做第二步检索:在discover_docs确认目标文档后,拉取完整字段、方法、返回值和示例。',
165
+ '推荐优先使用discover_docs返回的doc_id调用read_doc,避免slug或url歧义。',
166
+ '当用户追问“还有哪些字段”“完整API”“相关示例”“完整方法签名”时,应继续调用read_doc,而不是只根据片段猜测。',
167
+ ].join('\n'),
168
+ parameters: z
169
+ .object({
170
+ doc_id: z.string().optional().describe('discover_docs返回的doc_id,最推荐使用'),
171
+ slug: z.string().optional().describe('文档slug,例如reference/marginnote/mb-book-note'),
172
+ url: z.string().optional().describe('文档URL,例如/reference/marginnote/mb-book-note/'),
173
+ })
174
+ .refine((value) => Boolean(value.doc_id || value.slug || value.url), {
175
+ message: 'doc_id、slug、url至少需要提供一个',
176
+ }),
177
+ execute: async ({ doc_id, slug, url }) => {
178
+ try {
179
+ await ensureReady();
180
+ const payload = await readDoc({ doc_id, slug, url });
181
+ return renderJsonPayload(payload);
182
+ } catch (error) {
183
+ return renderError(error?.message || 'read_doc执行失败');
111
184
  }
112
- const results = await searchDocs(query, topK);
113
- return {
114
- content: results.map((text) => ({ type: 'text', text })),
115
- };
116
185
  },
117
186
  });
118
187
 
@@ -126,5 +195,4 @@ await server.start({
126
195
 
127
196
  renderSplash();
128
197
 
129
- // 默认自动构建,异步启动避免阻塞握手
130
198
  setTimeout(() => initIndexInBackground(), 0);
package/mcp/server.mjs CHANGED
@@ -1,8 +1,9 @@
1
1
  import { FastMCP } from 'fastmcp';
2
2
  import { z } from 'zod';
3
- import { buildIndex, getPaths, isIndexStale, loadIndex, searchDocs } from './lib.mjs';
3
+ import { buildIndex, discoverDocs, getPaths, isIndexStale, loadIndex, readDoc } from './lib.mjs';
4
4
 
5
- const TOOL_NAME = 'search_docs';
5
+ const DISCOVER_TOOL_NAME = 'discover_docs';
6
+ const READ_TOOL_NAME = 'read_doc';
6
7
  const IS_SILENT = process.env.MCP_SILENT === '1';
7
8
  const NO_COLOR = process.env.MCP_NO_COLOR === '1';
8
9
 
@@ -26,7 +27,6 @@ function stringWidth(text) {
26
27
  for (const char of plain) {
27
28
  const code = char.codePointAt(0);
28
29
  if (!code) continue;
29
- // CJK / Fullwidth / Wide characters
30
30
  const isWide =
31
31
  (code >= 0x1100 && code <= 0x115f) ||
32
32
  (code === 0x2329 || code === 0x232a) ||
@@ -76,7 +76,7 @@ async function ensureIndex() {
76
76
  await buildIndex();
77
77
  }
78
78
  } catch {
79
- logError(`未找到索引,开始重建:${INDEX_PATH}`);
79
+ logError(`未找到可用索引,开始重建:${INDEX_PATH}`);
80
80
  await buildIndex();
81
81
  }
82
82
  }
@@ -91,6 +91,32 @@ function initIndexInBackground() {
91
91
  return initPromise;
92
92
  }
93
93
 
94
+ async function ensureReady() {
95
+ if (initPromise) {
96
+ await initPromise;
97
+ } else {
98
+ await initIndexInBackground();
99
+ }
100
+ }
101
+
102
+ function renderJsonPayload(payload) {
103
+ return {
104
+ content: [
105
+ {
106
+ type: 'text',
107
+ text: JSON.stringify(payload, null, 2),
108
+ },
109
+ ],
110
+ };
111
+ }
112
+
113
+ function renderError(message) {
114
+ return {
115
+ content: [{ type: 'text', text: message }],
116
+ isError: true,
117
+ };
118
+ }
119
+
94
120
  const logger = IS_SILENT
95
121
  ? {
96
122
  debug() {},
@@ -114,26 +140,68 @@ const server = new FastMCP({
114
140
  });
115
141
 
116
142
  server.addTool({
117
- name: TOOL_NAME,
118
- description: '在本地文档索引中检索相关文本片段',
143
+ name: DISCOVER_TOOL_NAME,
144
+ description:
145
+ [
146
+ '发现与当前问题最相关的MarginNote文档。这个工具适合做第一步检索:先找对文档,再决定是否读取全文。',
147
+ '推荐用法:当用户问某个类、对象、字段、方法、返回值、示例、完整API时,先调用discover_docs。',
148
+ '如果结果已经出现明确目标文档,再调用read_doc读取整篇文档,不要只依赖片段回答“字段有哪些”“完整API是什么”。',
149
+ '当query中包含类名、方法名、属性名时,优先使用mode=hybrid或mode=keyword。',
150
+ '返回结果按文档聚合,每项包含doc_id、title、url、summary、matched_by和snippets,便于继续跳转。',
151
+ ].join('\n'),
119
152
  parameters: z.object({
120
- query: z.string().describe('检索关键词或问题'),
121
- top_k: z.number().optional().describe('返回片段数量'),
153
+ query: z.string().describe('用户的问题、关键词或API名,例如“mn卡片字段”“MbBookNote comments”“创建新笔记的方法”'),
154
+ top_k: z
155
+ .number()
156
+ .int()
157
+ .min(1)
158
+ .max(20)
159
+ .optional()
160
+ .describe('返回文档数量,默认5。通常3到8足够。'),
161
+ mode: z
162
+ .enum(['hybrid', 'keyword', 'semantic'])
163
+ .optional()
164
+ .describe('检索模式。默认hybrid;keyword适合精确API名;semantic适合自然语言描述。'),
122
165
  }),
123
- execute: async ({ query, top_k }) => {
124
- const topK = Number(top_k || 5);
125
- if (!query.trim()) {
126
- return { content: [{ type: 'text', text: 'query不能为空' }] };
166
+ execute: async ({ query, top_k, mode }) => {
167
+ try {
168
+ await ensureReady();
169
+ const payload = await discoverDocs(query, {
170
+ topK: top_k,
171
+ mode,
172
+ });
173
+ return renderJsonPayload(payload);
174
+ } catch (error) {
175
+ return renderError(error?.message || 'discover_docs执行失败');
127
176
  }
128
- if (initPromise) {
129
- await initPromise;
130
- } else {
131
- await initIndexInBackground();
177
+ },
178
+ });
179
+
180
+ server.addTool({
181
+ name: READ_TOOL_NAME,
182
+ description:
183
+ [
184
+ '读取某篇MarginNote文档的全文。这个工具适合做第二步检索:在discover_docs确认目标文档后,拉取完整字段、方法、返回值和示例。',
185
+ '推荐优先使用discover_docs返回的doc_id调用read_doc,避免slug或url歧义。',
186
+ '当用户追问“还有哪些字段”“完整API”“相关示例”“完整方法签名”时,应继续调用read_doc,而不是只根据片段猜测。',
187
+ ].join('\n'),
188
+ parameters: z
189
+ .object({
190
+ doc_id: z.string().optional().describe('discover_docs返回的doc_id,最推荐使用'),
191
+ slug: z.string().optional().describe('文档slug,例如reference/marginnote/mb-book-note'),
192
+ url: z.string().optional().describe('文档URL,例如/reference/marginnote/mb-book-note/'),
193
+ })
194
+ .refine((value) => Boolean(value.doc_id || value.slug || value.url), {
195
+ message: 'doc_id、slug、url至少需要提供一个',
196
+ }),
197
+ execute: async ({ doc_id, slug, url }) => {
198
+ try {
199
+ await ensureReady();
200
+ const payload = await readDoc({ doc_id, slug, url });
201
+ return renderJsonPayload(payload);
202
+ } catch (error) {
203
+ return renderError(error?.message || 'read_doc执行失败');
132
204
  }
133
- const results = await searchDocs(query, topK);
134
- return {
135
- content: results.map((text) => ({ type: 'text', text })),
136
- };
137
205
  },
138
206
  });
139
207
 
@@ -143,5 +211,4 @@ await server.start({
143
211
 
144
212
  renderSplash();
145
213
 
146
- // 默认自动构建,异步启动避免阻塞握手
147
214
  setTimeout(() => initIndexInBackground(), 0);
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "mn-docs-mcp",
3
3
  "type": "module",
4
- "version": "0.5.2",
4
+ "version": "0.6.1",
5
5
  "repository": {
6
6
  "type": "git",
7
7
  "url": "https://github.com/Temsys-Shen/marginnote-addon-docs.git"