docusaurus-plugin-mcp-server 0.9.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs DELETED
@@ -1,1056 +0,0 @@
1
- import path from 'path';
2
- import fs3 from 'fs-extra';
3
- import pMap from 'p-map';
4
- import { unified } from 'unified';
5
- import rehypeParse from 'rehype-parse';
6
- import { select } from 'hast-util-select';
7
- import { toString } from 'hast-util-to-string';
8
- import { toHtml } from 'hast-util-to-html';
9
- import rehypeRemark from 'rehype-remark';
10
- import remarkStringify from 'remark-stringify';
11
- import remarkGfm from 'remark-gfm';
12
- import FlexSearch from 'flexsearch';
13
- import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
14
- import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
15
- import { WebStandardStreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js';
16
- import { z } from 'zod';
17
-
18
- // src/plugin/docusaurus-plugin.ts
19
-
20
- // src/types/index.ts
21
- var DEFAULT_OPTIONS = {
22
- outputDir: "mcp",
23
- contentSelectors: ["article", "main", ".main-wrapper", '[role="main"]'],
24
- excludeSelectors: [
25
- "nav",
26
- "header",
27
- "footer",
28
- "aside",
29
- '[role="navigation"]',
30
- '[role="banner"]',
31
- '[role="contentinfo"]'
32
- ],
33
- minContentLength: 50,
34
- server: {
35
- name: "docs-mcp-server",
36
- version: "1.0.0"
37
- },
38
- excludeRoutes: ["/404*", "/search*"],
39
- indexers: void 0,
40
- // Default: ['flexsearch'] applied at runtime
41
- search: "flexsearch"
42
- };
43
- function filterRoutes(routes, excludePatterns) {
44
- return routes.filter((route) => {
45
- return !excludePatterns.some((pattern) => {
46
- const regexPattern = pattern.replace(/\*/g, ".*").replace(/\?/g, ".");
47
- const regex = new RegExp(`^${regexPattern}$`);
48
- return regex.test(route.path);
49
- });
50
- });
51
- }
52
- async function discoverHtmlFiles(outDir) {
53
- const routes = [];
54
- async function scanDirectory(dir) {
55
- const entries = await fs3.readdir(dir, { withFileTypes: true });
56
- for (const entry of entries) {
57
- const fullPath = path.join(dir, entry.name);
58
- if (entry.isDirectory()) {
59
- if (["assets", "img", "static"].includes(entry.name)) {
60
- continue;
61
- }
62
- await scanDirectory(fullPath);
63
- } else if (entry.name === "index.html") {
64
- const relativePath = path.relative(outDir, fullPath);
65
- let routePath = "/" + path.dirname(relativePath).replace(/\\/g, "/");
66
- if (routePath === "/.") {
67
- routePath = "/";
68
- }
69
- routes.push({
70
- path: routePath,
71
- htmlPath: fullPath
72
- });
73
- }
74
- }
75
- }
76
- await scanDirectory(outDir);
77
- return routes;
78
- }
79
- async function collectRoutes(outDir, excludePatterns) {
80
- const allRoutes = await discoverHtmlFiles(outDir);
81
- const filteredRoutes = filterRoutes(allRoutes, excludePatterns);
82
- const uniqueRoutes = /* @__PURE__ */ new Map();
83
- for (const route of filteredRoutes) {
84
- if (!uniqueRoutes.has(route.path)) {
85
- uniqueRoutes.set(route.path, route);
86
- }
87
- }
88
- return Array.from(uniqueRoutes.values());
89
- }
90
- function parseHtml(html) {
91
- const processor = unified().use(rehypeParse);
92
- return processor.parse(html);
93
- }
94
- async function parseHtmlFile(filePath) {
95
- const html = await fs3.readFile(filePath, "utf-8");
96
- return parseHtml(html);
97
- }
98
- function extractTitle(tree) {
99
- const h1Element = select("h1", tree);
100
- if (h1Element) {
101
- return toString(h1Element).trim();
102
- }
103
- const titleElement = select("title", tree);
104
- if (titleElement) {
105
- return toString(titleElement).trim();
106
- }
107
- return "Untitled";
108
- }
109
- function extractDescription(tree) {
110
- const metaDescription = select('meta[name="description"]', tree);
111
- if (metaDescription && metaDescription.properties?.content) {
112
- return String(metaDescription.properties.content);
113
- }
114
- const ogDescription = select('meta[property="og:description"]', tree);
115
- if (ogDescription && ogDescription.properties?.content) {
116
- return String(ogDescription.properties.content);
117
- }
118
- return "";
119
- }
120
- function findContentElement(tree, selectors) {
121
- for (const selector of selectors) {
122
- const element = select(selector, tree);
123
- if (element) {
124
- const text = toString(element).trim();
125
- if (text.length > 50) {
126
- return element;
127
- }
128
- }
129
- }
130
- return null;
131
- }
132
- var ALWAYS_EXCLUDED = ["script", "style", "noscript"];
133
- function cleanContentElement(element, excludeSelectors) {
134
- const allSelectors = [...ALWAYS_EXCLUDED, ...excludeSelectors];
135
- const cloned = JSON.parse(JSON.stringify(element));
136
- function removeUnwanted(node) {
137
- if (!node.children) return;
138
- node.children = node.children.filter((child) => {
139
- if (child.type !== "element") return true;
140
- const childElement = child;
141
- for (const selector of allSelectors) {
142
- if (selector.startsWith(".")) {
143
- const className = selector.slice(1);
144
- const classes = childElement.properties?.className;
145
- if (Array.isArray(classes) && classes.includes(className)) {
146
- return false;
147
- }
148
- if (typeof classes === "string" && classes.includes(className)) {
149
- return false;
150
- }
151
- } else if (selector.startsWith("[")) {
152
- const match = selector.match(/\[([^=]+)="([^"]+)"\]/);
153
- if (match) {
154
- const [, attr, value] = match;
155
- if (attr && childElement.properties?.[attr] === value) {
156
- return false;
157
- }
158
- }
159
- } else {
160
- if (childElement.tagName === selector) {
161
- return false;
162
- }
163
- }
164
- }
165
- removeUnwanted(childElement);
166
- return true;
167
- });
168
- }
169
- removeUnwanted(cloned);
170
- return cloned;
171
- }
172
- async function extractContent(filePath, options) {
173
- const tree = await parseHtmlFile(filePath);
174
- const title = extractTitle(tree);
175
- const description = extractDescription(tree);
176
- let contentElement = findContentElement(tree, options.contentSelectors);
177
- if (!contentElement) {
178
- const body = select("body", tree);
179
- if (body) {
180
- contentElement = body;
181
- }
182
- }
183
- let contentHtml = "";
184
- if (contentElement) {
185
- const cleanedElement = cleanContentElement(contentElement, options.excludeSelectors);
186
- contentHtml = toHtml(cleanedElement);
187
- }
188
- return {
189
- title,
190
- description,
191
- contentHtml
192
- };
193
- }
194
- async function htmlToMarkdown(html) {
195
- if (!html || html.trim().length === 0) {
196
- return "";
197
- }
198
- try {
199
- const processor = unified().use(rehypeParse, { fragment: true }).use(rehypeRemark).use(remarkGfm).use(remarkStringify, {
200
- bullet: "-",
201
- fences: true
202
- });
203
- const result = await processor.process(html);
204
- let markdown = String(result);
205
- markdown = cleanMarkdown(markdown);
206
- return markdown;
207
- } catch (error) {
208
- console.error("Error converting HTML to Markdown:", error);
209
- return extractTextFallback(html);
210
- }
211
- }
212
- function cleanMarkdown(markdown) {
213
- return markdown.replace(/\n{3,}/g, "\n\n").split("\n").map((line) => line.trimEnd()).join("\n").trim() + "\n";
214
- }
215
- function extractTextFallback(html) {
216
- let text = html.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, "");
217
- text = text.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, "");
218
- text = text.replace(/<br\s*\/?>/gi, "\n");
219
- text = text.replace(/<\/p>/gi, "\n\n");
220
- text = text.replace(/<\/h[1-6]>/gi, "\n\n");
221
- text = text.replace(/<\/li>/gi, "\n");
222
- text = text.replace(/<\/div>/gi, "\n");
223
- text = text.replace(/<[^>]+>/g, "");
224
- text = text.replace(/&nbsp;/g, " ");
225
- text = text.replace(/&amp;/g, "&");
226
- text = text.replace(/&lt;/g, "<");
227
- text = text.replace(/&gt;/g, ">");
228
- text = text.replace(/&quot;/g, '"');
229
- text = text.replace(/&#39;/g, "'");
230
- text = text.replace(/[ \t]+/g, " ");
231
- text = text.replace(/\n{3,}/g, "\n\n");
232
- return text.trim();
233
- }
234
-
235
- // src/processing/heading-extractor.ts
236
- function extractHeadingsFromMarkdown(markdown) {
237
- const headings = [];
238
- const lines = markdown.split("\n");
239
- let currentOffset = 0;
240
- for (let i = 0; i < lines.length; i++) {
241
- const line = lines[i] ?? "";
242
- const headingMatch = line.match(/^(#{1,6})\s+(.+?)(?:\s+\{#([^}]+)\})?$/);
243
- if (headingMatch) {
244
- const hashes = headingMatch[1] ?? "";
245
- const level = hashes.length;
246
- let text = headingMatch[2] ?? "";
247
- let id = headingMatch[3] ?? "";
248
- if (!id) {
249
- id = generateHeadingId(text);
250
- }
251
- text = text.replace(/\*\*([^*]+)\*\*/g, "$1");
252
- text = text.replace(/_([^_]+)_/g, "$1");
253
- text = text.replace(/`([^`]+)`/g, "$1");
254
- headings.push({
255
- level,
256
- text: text.trim(),
257
- id,
258
- startOffset: currentOffset,
259
- endOffset: -1
260
- // Will be calculated below
261
- });
262
- }
263
- currentOffset += line.length + 1;
264
- }
265
- for (let i = 0; i < headings.length; i++) {
266
- const current = headings[i];
267
- if (!current) continue;
268
- let endOffset = markdown.length;
269
- for (let j = i + 1; j < headings.length; j++) {
270
- const next = headings[j];
271
- if (next && next.level <= current.level) {
272
- endOffset = next.startOffset;
273
- break;
274
- }
275
- }
276
- current.endOffset = endOffset;
277
- }
278
- return headings;
279
- }
280
- function generateHeadingId(text) {
281
- return text.toLowerCase().replace(/[^\w\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
282
- }
283
- function extractSection(markdown, headingId, headings) {
284
- const heading = headings.find((h) => h.id === headingId);
285
- if (!heading) {
286
- return null;
287
- }
288
- return markdown.slice(heading.startOffset, heading.endOffset).trim();
289
- }
290
- var FIELD_WEIGHTS = {
291
- title: 3,
292
- headings: 2,
293
- description: 1.5,
294
- content: 1
295
- };
296
- function englishStemmer(word) {
297
- if (word.length <= 3) return word;
298
- return word.replace(/ing$/, "").replace(/tion$/, "t").replace(/sion$/, "s").replace(/([^aeiou])ed$/, "$1").replace(/([^aeiou])es$/, "$1").replace(/ly$/, "").replace(/ment$/, "").replace(/ness$/, "").replace(/ies$/, "y").replace(/([^s])s$/, "$1");
299
- }
300
- function createSearchIndex() {
301
- return new FlexSearch.Document({
302
- // Use 'full' tokenization for substring matching
303
- // This allows "auth" to match "authentication"
304
- tokenize: "full",
305
- // Enable caching for faster repeated queries
306
- cache: 100,
307
- // Higher resolution = more granular ranking (1-9)
308
- resolution: 9,
309
- // Enable context for phrase/proximity matching
310
- context: {
311
- resolution: 2,
312
- depth: 2,
313
- bidirectional: true
314
- },
315
- // Apply stemming to normalize word forms
316
- encode: (str) => {
317
- const words = str.toLowerCase().split(/[\s\-_.,;:!?'"()[\]{}]+/);
318
- return words.filter(Boolean).map(englishStemmer);
319
- },
320
- // Document schema
321
- document: {
322
- id: "id",
323
- // Index these fields for searching
324
- index: ["title", "content", "headings", "description"],
325
- // Store these fields in results (for enriched queries)
326
- store: ["title", "description"]
327
- }
328
- });
329
- }
330
- function addDocumentToIndex(index, doc, baseUrl) {
331
- const id = baseUrl ? `${baseUrl.replace(/\/$/, "")}${doc.route}` : doc.route;
332
- const indexable = {
333
- id,
334
- title: doc.title,
335
- content: doc.markdown,
336
- headings: doc.headings.map((h) => h.text).join(" "),
337
- description: doc.description
338
- };
339
- index.add(indexable);
340
- }
341
- function buildSearchIndex(docs, baseUrl) {
342
- const index = createSearchIndex();
343
- for (const doc of docs) {
344
- addDocumentToIndex(index, doc, baseUrl);
345
- }
346
- return index;
347
- }
348
- function searchIndex(index, docs, query, options = {}) {
349
- const { limit = 5 } = options;
350
- const rawResults = index.search(query, {
351
- limit: limit * 3,
352
- // Get extra results for better ranking after weighting
353
- enrich: true
354
- });
355
- const docScores = /* @__PURE__ */ new Map();
356
- for (const fieldResult of rawResults) {
357
- const field = fieldResult.field;
358
- const fieldWeight = FIELD_WEIGHTS[field] ?? 1;
359
- const results2 = fieldResult.result;
360
- for (let i = 0; i < results2.length; i++) {
361
- const item = results2[i];
362
- if (!item) continue;
363
- const docId = typeof item === "string" ? item : item.id;
364
- const positionScore = (results2.length - i) / results2.length;
365
- const weightedScore = positionScore * fieldWeight;
366
- const existingScore = docScores.get(docId) ?? 0;
367
- docScores.set(docId, existingScore + weightedScore);
368
- }
369
- }
370
- const results = [];
371
- for (const [docId, score] of docScores) {
372
- const doc = docs[docId];
373
- if (!doc) continue;
374
- results.push({
375
- url: docId,
376
- // docId is the full URL when indexed with baseUrl
377
- route: doc.route,
378
- title: doc.title,
379
- score,
380
- snippet: generateSnippet(doc.markdown, query),
381
- matchingHeadings: findMatchingHeadings(doc, query)
382
- });
383
- }
384
- results.sort((a, b) => b.score - a.score);
385
- return results.slice(0, limit);
386
- }
387
- function generateSnippet(markdown, query) {
388
- const maxLength = 200;
389
- const queryTerms = query.toLowerCase().split(/\s+/).filter(Boolean);
390
- if (queryTerms.length === 0) {
391
- return markdown.slice(0, maxLength) + (markdown.length > maxLength ? "..." : "");
392
- }
393
- const lowerMarkdown = markdown.toLowerCase();
394
- let bestIndex = -1;
395
- let bestTerm = "";
396
- const allTerms = [...queryTerms, ...queryTerms.map(englishStemmer)];
397
- for (const term of allTerms) {
398
- const index = lowerMarkdown.indexOf(term);
399
- if (index !== -1 && (bestIndex === -1 || index < bestIndex)) {
400
- bestIndex = index;
401
- bestTerm = term;
402
- }
403
- }
404
- if (bestIndex === -1) {
405
- return markdown.slice(0, maxLength) + (markdown.length > maxLength ? "..." : "");
406
- }
407
- const snippetStart = Math.max(0, bestIndex - 50);
408
- const snippetEnd = Math.min(markdown.length, bestIndex + bestTerm.length + 150);
409
- let snippet = markdown.slice(snippetStart, snippetEnd);
410
- snippet = snippet.replace(/^#{1,6}\s+/gm, "").replace(/\[([^\]]+)\]\([^)]+\)/g, "$1").replace(/!\[([^\]]*)\]\([^)]+\)/g, "").replace(/```[a-z]*\n?/g, "").replace(/`([^`]+)`/g, "$1").replace(/\s+/g, " ").trim();
411
- const prefix = snippetStart > 0 ? "..." : "";
412
- const suffix = snippetEnd < markdown.length ? "..." : "";
413
- return prefix + snippet + suffix;
414
- }
415
- function findMatchingHeadings(doc, query) {
416
- const queryTerms = query.toLowerCase().split(/\s+/).filter(Boolean);
417
- const allTerms = [...queryTerms, ...queryTerms.map(englishStemmer)];
418
- const matching = [];
419
- for (const heading of doc.headings) {
420
- const headingLower = heading.text.toLowerCase();
421
- const headingStemmed = headingLower.split(/\s+/).map(englishStemmer).join(" ");
422
- if (allTerms.some(
423
- (term) => headingLower.includes(term) || headingStemmed.includes(englishStemmer(term))
424
- )) {
425
- matching.push(heading.text);
426
- }
427
- }
428
- return matching.slice(0, 3);
429
- }
430
- async function exportSearchIndex(index) {
431
- const exportData = {};
432
- await index.export((key, data) => {
433
- exportData[key] = data;
434
- });
435
- return exportData;
436
- }
437
- async function importSearchIndex(data) {
438
- const index = createSearchIndex();
439
- for (const [key, value] of Object.entries(data)) {
440
- await index.import(
441
- key,
442
- value
443
- );
444
- }
445
- return index;
446
- }
447
-
448
- // src/providers/indexers/flexsearch-indexer.ts
449
- var FlexSearchIndexer = class {
450
- name = "flexsearch";
451
- baseUrl = "";
452
- docsIndex = {};
453
- exportedIndex = null;
454
- docCount = 0;
455
- /**
456
- * FlexSearch indexer always runs by default.
457
- * It respects the indexers configuration - if not included, it won't run.
458
- */
459
- shouldRun() {
460
- return true;
461
- }
462
- async initialize(context) {
463
- this.baseUrl = context.baseUrl.replace(/\/$/, "");
464
- this.docsIndex = {};
465
- this.exportedIndex = null;
466
- this.docCount = 0;
467
- }
468
- async indexDocuments(docs) {
469
- this.docCount = docs.length;
470
- for (const doc of docs) {
471
- const fullUrl = `${this.baseUrl}${doc.route}`;
472
- this.docsIndex[fullUrl] = doc;
473
- }
474
- console.log("[FlexSearch] Building search index...");
475
- const searchIndex2 = buildSearchIndex(docs, this.baseUrl);
476
- this.exportedIndex = await exportSearchIndex(searchIndex2);
477
- console.log(`[FlexSearch] Indexed ${this.docCount} documents`);
478
- }
479
- async finalize() {
480
- const artifacts = /* @__PURE__ */ new Map();
481
- artifacts.set("docs.json", this.docsIndex);
482
- artifacts.set("search-index.json", this.exportedIndex);
483
- return artifacts;
484
- }
485
- async getManifestData() {
486
- return {
487
- searchEngine: "flexsearch"
488
- };
489
- }
490
- };
491
- var FlexSearchProvider = class {
492
- name = "flexsearch";
493
- docs = null;
494
- searchIndex = null;
495
- ready = false;
496
- async initialize(_context, initData) {
497
- if (!initData) {
498
- throw new Error("[FlexSearch] SearchProviderInitData required for FlexSearch provider");
499
- }
500
- if (initData.docs && initData.indexData) {
501
- this.docs = initData.docs;
502
- this.searchIndex = await importSearchIndex(initData.indexData);
503
- this.ready = true;
504
- return;
505
- }
506
- if (initData.docsPath && initData.indexPath) {
507
- if (await fs3.pathExists(initData.docsPath)) {
508
- this.docs = await fs3.readJson(initData.docsPath);
509
- } else {
510
- throw new Error(`[FlexSearch] Docs file not found: ${initData.docsPath}`);
511
- }
512
- if (await fs3.pathExists(initData.indexPath)) {
513
- const indexData = await fs3.readJson(initData.indexPath);
514
- this.searchIndex = await importSearchIndex(indexData);
515
- } else {
516
- throw new Error(`[FlexSearch] Search index not found: ${initData.indexPath}`);
517
- }
518
- this.ready = true;
519
- return;
520
- }
521
- throw new Error(
522
- "[FlexSearch] Invalid init data: must provide either file paths (docsPath, indexPath) or pre-loaded data (docs, indexData)"
523
- );
524
- }
525
- isReady() {
526
- return this.ready && this.docs !== null && this.searchIndex !== null;
527
- }
528
- async search(query, options) {
529
- if (!this.isReady() || !this.docs || !this.searchIndex) {
530
- throw new Error("[FlexSearch] Provider not initialized");
531
- }
532
- const limit = options?.limit ?? 5;
533
- return searchIndex(this.searchIndex, this.docs, query, { limit });
534
- }
535
- async getDocument(url) {
536
- if (!this.docs) {
537
- throw new Error("[FlexSearch] Provider not initialized");
538
- }
539
- return this.docs[url] ?? null;
540
- }
541
- async healthCheck() {
542
- if (!this.isReady()) {
543
- return { healthy: false, message: "FlexSearch provider not initialized" };
544
- }
545
- const docCount = this.docs ? Object.keys(this.docs).length : 0;
546
- return {
547
- healthy: true,
548
- message: `FlexSearch provider ready with ${docCount} documents`
549
- };
550
- }
551
- /**
552
- * Get all loaded documents (for compatibility with existing server code)
553
- */
554
- getDocs() {
555
- return this.docs;
556
- }
557
- /**
558
- * Get the FlexSearch index (for compatibility with existing server code)
559
- */
560
- getSearchIndex() {
561
- return this.searchIndex;
562
- }
563
- };
564
-
565
- // src/providers/loader.ts
566
- async function loadIndexer(specifier) {
567
- if (specifier === "flexsearch") {
568
- return new FlexSearchIndexer();
569
- }
570
- try {
571
- const module = await import(specifier);
572
- const IndexerClass = module.default;
573
- if (typeof IndexerClass === "function") {
574
- const instance = new IndexerClass();
575
- if (!isContentIndexer(instance)) {
576
- throw new Error(
577
- `Invalid indexer module "${specifier}": does not implement ContentIndexer interface`
578
- );
579
- }
580
- return instance;
581
- }
582
- if (isContentIndexer(IndexerClass)) {
583
- return IndexerClass;
584
- }
585
- throw new Error(
586
- `Invalid indexer module "${specifier}": must export a default class or ContentIndexer instance`
587
- );
588
- } catch (error) {
589
- if (error instanceof Error && error.message.includes("Cannot find module")) {
590
- throw new Error(`Indexer module not found: "${specifier}". Check the path or package name.`);
591
- }
592
- throw error;
593
- }
594
- }
595
- async function loadSearchProvider(specifier) {
596
- if (specifier === "flexsearch") {
597
- return new FlexSearchProvider();
598
- }
599
- try {
600
- const module = await import(specifier);
601
- const ProviderClass = module.default;
602
- if (typeof ProviderClass === "function") {
603
- const instance = new ProviderClass();
604
- if (!isSearchProvider(instance)) {
605
- throw new Error(
606
- `Invalid search provider module "${specifier}": does not implement SearchProvider interface`
607
- );
608
- }
609
- return instance;
610
- }
611
- if (isSearchProvider(ProviderClass)) {
612
- return ProviderClass;
613
- }
614
- throw new Error(
615
- `Invalid search provider module "${specifier}": must export a default class or SearchProvider instance`
616
- );
617
- } catch (error) {
618
- if (error instanceof Error && error.message.includes("Cannot find module")) {
619
- throw new Error(
620
- `Search provider module not found: "${specifier}". Check the path or package name.`
621
- );
622
- }
623
- throw error;
624
- }
625
- }
626
- function isContentIndexer(obj) {
627
- if (!obj || typeof obj !== "object") {
628
- return false;
629
- }
630
- const indexer = obj;
631
- return typeof indexer.name === "string" && typeof indexer.initialize === "function" && typeof indexer.indexDocuments === "function" && typeof indexer.finalize === "function";
632
- }
633
- function isSearchProvider(obj) {
634
- if (!obj || typeof obj !== "object") {
635
- return false;
636
- }
637
- const provider = obj;
638
- return typeof provider.name === "string" && typeof provider.initialize === "function" && typeof provider.isReady === "function" && typeof provider.search === "function";
639
- }
640
-
641
- // src/plugin/docusaurus-plugin.ts
642
- function resolveOptions(options) {
643
- return {
644
- ...DEFAULT_OPTIONS,
645
- ...options,
646
- server: {
647
- ...DEFAULT_OPTIONS.server,
648
- ...options.server
649
- }
650
- };
651
- }
652
- async function processHtmlFile(htmlPath, route, options) {
653
- try {
654
- const extractOptions = {
655
- contentSelectors: options.contentSelectors,
656
- excludeSelectors: options.excludeSelectors
657
- };
658
- const extracted = await extractContent(htmlPath, extractOptions);
659
- if (!extracted.contentHtml) {
660
- console.warn(`[MCP] No content found in ${htmlPath}`);
661
- return null;
662
- }
663
- const markdown = await htmlToMarkdown(extracted.contentHtml);
664
- if (!markdown || markdown.trim().length < options.minContentLength) {
665
- console.warn(`[MCP] Insufficient content in ${htmlPath}`);
666
- return null;
667
- }
668
- const headings = extractHeadingsFromMarkdown(markdown);
669
- return {
670
- route,
671
- title: extracted.title,
672
- description: extracted.description,
673
- markdown,
674
- headings
675
- };
676
- } catch (error) {
677
- console.error(`[MCP] Error processing ${htmlPath}:`, error);
678
- return null;
679
- }
680
- }
681
- function mcpServerPlugin(context, options) {
682
- const resolvedOptions = resolveOptions(options);
683
- return {
684
- name: "docusaurus-plugin-mcp-server",
685
- // Expose configuration to theme components via globalData
686
- async contentLoaded({ actions }) {
687
- const { setGlobalData } = actions;
688
- const serverUrl = `${context.siteConfig.url}/${resolvedOptions.outputDir}`;
689
- setGlobalData({
690
- serverUrl,
691
- serverName: resolvedOptions.server.name
692
- });
693
- },
694
- async postBuild({ outDir }) {
695
- console.log("[MCP] Starting MCP artifact generation...");
696
- const startTime = Date.now();
697
- if (resolvedOptions.indexers === false) {
698
- console.log("[MCP] Indexing disabled, skipping artifact generation");
699
- return;
700
- }
701
- const routes = await collectRoutes(outDir, resolvedOptions.excludeRoutes);
702
- console.log(`[MCP] Found ${routes.length} routes to process`);
703
- if (routes.length === 0) {
704
- console.warn("[MCP] No routes found to process");
705
- return;
706
- }
707
- const processOptions = {
708
- contentSelectors: resolvedOptions.contentSelectors,
709
- excludeSelectors: resolvedOptions.excludeSelectors,
710
- minContentLength: resolvedOptions.minContentLength
711
- };
712
- const processedDocs = await pMap(
713
- routes,
714
- async (route) => {
715
- return processHtmlFile(route.htmlPath, route.path, processOptions);
716
- },
717
- { concurrency: 10 }
718
- );
719
- const validDocs = processedDocs.filter((doc) => doc !== null);
720
- console.log(`[MCP] Successfully processed ${validDocs.length} documents`);
721
- if (validDocs.length === 0) {
722
- console.warn("[MCP] No valid documents to index");
723
- return;
724
- }
725
- const mcpOutputDir = path.join(outDir, resolvedOptions.outputDir);
726
- const providerContext = {
727
- baseUrl: context.siteConfig.url,
728
- serverName: resolvedOptions.server.name,
729
- serverVersion: resolvedOptions.server.version,
730
- outputDir: mcpOutputDir
731
- };
732
- const indexerSpecs = resolvedOptions.indexers ?? ["flexsearch"];
733
- await fs3.ensureDir(mcpOutputDir);
734
- const indexerNames = [];
735
- for (const indexerSpec of indexerSpecs) {
736
- try {
737
- const indexer = await loadIndexer(indexerSpec);
738
- if (indexer.shouldRun && !indexer.shouldRun()) {
739
- console.log(`[MCP] Skipping indexer: ${indexer.name}`);
740
- continue;
741
- }
742
- console.log(`[MCP] Running indexer: ${indexer.name}`);
743
- await indexer.initialize(providerContext);
744
- await indexer.indexDocuments(validDocs);
745
- const artifacts = await indexer.finalize();
746
- for (const [filename, content] of artifacts) {
747
- await fs3.writeJson(path.join(mcpOutputDir, filename), content, { spaces: 0 });
748
- }
749
- indexerNames.push(indexer.name);
750
- } catch (error) {
751
- console.error(`[MCP] Error running indexer "${indexerSpec}":`, error);
752
- throw error;
753
- }
754
- }
755
- if (indexerNames.length > 0) {
756
- const manifest = {
757
- version: resolvedOptions.server.version,
758
- buildTime: (/* @__PURE__ */ new Date()).toISOString(),
759
- docCount: validDocs.length,
760
- serverName: resolvedOptions.server.name,
761
- baseUrl: context.siteConfig.url,
762
- indexers: indexerNames
763
- };
764
- await fs3.writeJson(path.join(mcpOutputDir, "manifest.json"), manifest, { spaces: 2 });
765
- }
766
- const elapsed = Date.now() - startTime;
767
- console.log(`[MCP] Artifacts written to ${mcpOutputDir}`);
768
- console.log(`[MCP] Generation complete in ${elapsed}ms`);
769
- }
770
- };
771
- }
772
- var docsSearchInputSchema = {
773
- query: z.string().min(1).describe("The search query string"),
774
- limit: z.number().int().min(1).max(20).optional().default(5).describe("Maximum number of results to return (1-20, default: 5)")
775
- };
776
- var docsSearchTool = {
777
- name: "docs_search",
778
- description: "Search the documentation for relevant pages. Returns matching documents with URLs, snippets, and relevance scores. Use this to find information across all documentation.",
779
- inputSchema: docsSearchInputSchema
780
- };
781
- function formatSearchResults(results) {
782
- if (results.length === 0) {
783
- return "No matching documents found.";
784
- }
785
- const lines = [`Found ${results.length} result(s):
786
- `];
787
- for (let i = 0; i < results.length; i++) {
788
- const result = results[i];
789
- if (!result) continue;
790
- lines.push(`${i + 1}. **${result.title}**`);
791
- lines.push(` URL: ${result.url}`);
792
- if (result.matchingHeadings && result.matchingHeadings.length > 0) {
793
- lines.push(` Matching sections: ${result.matchingHeadings.join(", ")}`);
794
- }
795
- lines.push(` ${result.snippet}`);
796
- lines.push("");
797
- }
798
- lines.push("Use docs_fetch with the URL to retrieve the full page content.");
799
- return lines.join("\n");
800
- }
801
- var docsFetchInputSchema = {
802
- url: z.string().url().describe(
803
- 'The full URL of the page to fetch (e.g., "https://docs.example.com/docs/getting-started")'
804
- )
805
- };
806
- var docsFetchTool = {
807
- name: "docs_fetch",
808
- description: "Fetch the complete content of a documentation page. Use this after searching to get the full markdown content of a specific page.",
809
- inputSchema: docsFetchInputSchema
810
- };
811
- function formatPageContent(doc) {
812
- if (!doc) {
813
- return "Page not found. Please check the URL and try again.";
814
- }
815
- const lines = [];
816
- lines.push(`# ${doc.title}`);
817
- lines.push("");
818
- if (doc.description) {
819
- lines.push(`> ${doc.description}`);
820
- lines.push("");
821
- }
822
- if (doc.headings.length > 0) {
823
- lines.push("## Contents");
824
- lines.push("");
825
- for (const heading of doc.headings) {
826
- if (heading.level <= 3) {
827
- const indent = " ".repeat(heading.level - 1);
828
- lines.push(`${indent}- [${heading.text}](#${heading.id})`);
829
- }
830
- }
831
- lines.push("");
832
- lines.push("---");
833
- lines.push("");
834
- }
835
- lines.push(doc.markdown);
836
- return lines.join("\n");
837
- }
838
-
839
- // src/mcp/server.ts
840
- function isFileConfig(config) {
841
- return "docsPath" in config && "indexPath" in config;
842
- }
843
- function isDataConfig(config) {
844
- return "docs" in config && "searchIndexData" in config;
845
- }
846
- var McpDocsServer = class {
847
- config;
848
- searchProvider = null;
849
- mcpServer;
850
- initialized = false;
851
- constructor(config) {
852
- this.config = config;
853
- this.mcpServer = new McpServer(
854
- {
855
- name: config.name,
856
- version: config.version ?? "1.0.0"
857
- },
858
- {
859
- capabilities: {
860
- tools: {}
861
- }
862
- }
863
- );
864
- this.registerTools();
865
- }
866
- /**
867
- * Register all MCP tools using definitions from tool files
868
- */
869
- registerTools() {
870
- this.mcpServer.registerTool(
871
- docsSearchTool.name,
872
- {
873
- description: docsSearchTool.description,
874
- inputSchema: docsSearchTool.inputSchema
875
- },
876
- async ({ query, limit }) => {
877
- await this.initialize();
878
- if (!this.searchProvider || !this.searchProvider.isReady()) {
879
- return {
880
- content: [{ type: "text", text: "Server not initialized. Please try again." }],
881
- isError: true
882
- };
883
- }
884
- try {
885
- const results = await this.searchProvider.search(query, { limit });
886
- return {
887
- content: [{ type: "text", text: formatSearchResults(results) }]
888
- };
889
- } catch (error) {
890
- console.error("[MCP] Search error:", error);
891
- return {
892
- content: [{ type: "text", text: `Search error: ${String(error)}` }],
893
- isError: true
894
- };
895
- }
896
- }
897
- );
898
- this.mcpServer.registerTool(
899
- docsFetchTool.name,
900
- {
901
- description: docsFetchTool.description,
902
- inputSchema: docsFetchTool.inputSchema
903
- },
904
- async ({ url }) => {
905
- await this.initialize();
906
- if (!this.searchProvider || !this.searchProvider.isReady()) {
907
- return {
908
- content: [{ type: "text", text: "Server not initialized. Please try again." }],
909
- isError: true
910
- };
911
- }
912
- try {
913
- const doc = await this.getDocument(url);
914
- return {
915
- content: [{ type: "text", text: formatPageContent(doc) }]
916
- };
917
- } catch (error) {
918
- console.error("[MCP] Fetch error:", error);
919
- return {
920
- content: [{ type: "text", text: `Error fetching page: ${String(error)}` }],
921
- isError: true
922
- };
923
- }
924
- }
925
- );
926
- }
927
- /**
928
- * Get a document by URL using the search provider
929
- */
930
- async getDocument(url) {
931
- if (!this.searchProvider) {
932
- return null;
933
- }
934
- if (this.searchProvider.getDocument) {
935
- return this.searchProvider.getDocument(url);
936
- }
937
- return null;
938
- }
939
- /**
940
- * Load docs and search index using the configured search provider
941
- *
942
- * For file-based config: reads from disk
943
- * For data config: uses pre-loaded data directly
944
- */
945
- async initialize() {
946
- if (this.initialized) {
947
- return;
948
- }
949
- try {
950
- const searchSpecifier = this.config.search ?? "flexsearch";
951
- this.searchProvider = await loadSearchProvider(searchSpecifier);
952
- const providerContext = {
953
- baseUrl: this.config.baseUrl ?? "",
954
- serverName: this.config.name,
955
- serverVersion: this.config.version ?? "1.0.0",
956
- outputDir: ""
957
- // Not relevant for runtime
958
- };
959
- const initData = {};
960
- if (isDataConfig(this.config)) {
961
- initData.docs = this.config.docs;
962
- initData.indexData = this.config.searchIndexData;
963
- } else if (isFileConfig(this.config)) {
964
- initData.docsPath = this.config.docsPath;
965
- initData.indexPath = this.config.indexPath;
966
- } else {
967
- throw new Error("Invalid server config: must provide either file paths or pre-loaded data");
968
- }
969
- await this.searchProvider.initialize(providerContext, initData);
970
- this.initialized = true;
971
- } catch (error) {
972
- console.error("[MCP] Failed to initialize:", error);
973
- throw error;
974
- }
975
- }
976
- /**
977
- * Handle an HTTP request using the MCP SDK's transport
978
- *
979
- * This method is designed for serverless environments (Vercel, Netlify).
980
- * It creates a stateless transport instance and processes the request.
981
- *
982
- * @param req - Node.js IncomingMessage or compatible request object
983
- * @param res - Node.js ServerResponse or compatible response object
984
- * @param parsedBody - Optional pre-parsed request body
985
- */
986
- async handleHttpRequest(req, res, parsedBody) {
987
- await this.initialize();
988
- const transport = new StreamableHTTPServerTransport({
989
- sessionIdGenerator: void 0,
990
- // Stateless mode - no session tracking
991
- enableJsonResponse: true
992
- // Return JSON instead of SSE streams
993
- });
994
- await this.mcpServer.connect(transport);
995
- try {
996
- await transport.handleRequest(req, res, parsedBody);
997
- } finally {
998
- await transport.close();
999
- }
1000
- }
1001
- /**
1002
- * Handle a Web Standard Request (Cloudflare Workers, Deno, Bun)
1003
- *
1004
- * This method is designed for Web Standard environments that use
1005
- * the Fetch API Request/Response pattern.
1006
- *
1007
- * @param request - Web Standard Request object
1008
- * @returns Web Standard Response object
1009
- */
1010
- async handleWebRequest(request) {
1011
- await this.initialize();
1012
- const transport = new WebStandardStreamableHTTPServerTransport({
1013
- sessionIdGenerator: void 0,
1014
- // Stateless mode
1015
- enableJsonResponse: true
1016
- });
1017
- await this.mcpServer.connect(transport);
1018
- try {
1019
- return await transport.handleRequest(request);
1020
- } finally {
1021
- await transport.close();
1022
- }
1023
- }
1024
- /**
1025
- * Get server status information
1026
- *
1027
- * Useful for health checks and debugging
1028
- */
1029
- async getStatus() {
1030
- let docCount = 0;
1031
- if (this.searchProvider instanceof FlexSearchProvider) {
1032
- const docs = this.searchProvider.getDocs();
1033
- docCount = docs ? Object.keys(docs).length : 0;
1034
- }
1035
- return {
1036
- name: this.config.name,
1037
- version: this.config.version ?? "1.0.0",
1038
- initialized: this.initialized,
1039
- docCount,
1040
- baseUrl: this.config.baseUrl,
1041
- searchProvider: this.searchProvider?.name
1042
- };
1043
- }
1044
- /**
1045
- * Get the underlying McpServer instance
1046
- *
1047
- * Useful for advanced use cases like custom transports
1048
- */
1049
- getMcpServer() {
1050
- return this.mcpServer;
1051
- }
1052
- };
1053
-
1054
- export { DEFAULT_OPTIONS, FlexSearchIndexer, FlexSearchProvider, McpDocsServer, buildSearchIndex, collectRoutes, discoverHtmlFiles, docsFetchTool, docsSearchTool, exportSearchIndex, extractContent, extractHeadingsFromMarkdown, extractSection, htmlToMarkdown, importSearchIndex, loadIndexer, loadSearchProvider, mcpServerPlugin, parseHtml, parseHtmlFile, searchIndex };
1055
- //# sourceMappingURL=index.mjs.map
1056
- //# sourceMappingURL=index.mjs.map