@arabold/docs-mcp-server 1.15.1 → 1.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { execSync } from "node:child_process";
|
|
3
|
-
import fs$1, { existsSync } from "node:fs";
|
|
3
|
+
import fs$1, { readFileSync, existsSync } from "node:fs";
|
|
4
4
|
import "dotenv/config";
|
|
5
5
|
import { Command } from "commander";
|
|
6
6
|
import * as http from "node:http";
|
|
@@ -16,12 +16,13 @@ import { VirtualConsole, JSDOM } from "jsdom";
|
|
|
16
16
|
import { chromium } from "playwright";
|
|
17
17
|
import { gfm } from "@joplin/turndown-plugin-gfm";
|
|
18
18
|
import TurndownService from "turndown";
|
|
19
|
-
import
|
|
19
|
+
import iconv from "iconv-lite";
|
|
20
20
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
21
21
|
import axios from "axios";
|
|
22
22
|
import { HeaderGenerator } from "header-generator";
|
|
23
23
|
import fs from "node:fs/promises";
|
|
24
24
|
import path from "node:path";
|
|
25
|
+
import * as mime from "mime-types";
|
|
25
26
|
import { v4 } from "uuid";
|
|
26
27
|
import psl from "psl";
|
|
27
28
|
import { URL as URL$1, fileURLToPath } from "node:url";
|
|
@@ -41,7 +42,7 @@ import Fastify from "fastify";
|
|
|
41
42
|
import { jsxs, jsx, Fragment } from "@kitajs/html/jsx-runtime";
|
|
42
43
|
import DOMPurify from "dompurify";
|
|
43
44
|
const name = "@arabold/docs-mcp-server";
|
|
44
|
-
const version = "1.15.
|
|
45
|
+
const version = "1.15.1";
|
|
45
46
|
const description = "MCP server for fetching and searching documentation";
|
|
46
47
|
const type = "module";
|
|
47
48
|
const bin = { "docs-mcp-server": "dist/index.js" };
|
|
@@ -49,8 +50,8 @@ const license = "MIT";
|
|
|
49
50
|
const repository = { "type": "git", "url": "git+https://github.com/arabold/docs-mcp-server.git" };
|
|
50
51
|
const files = ["dist", "public", "db", "README.md", "LICENSE", "package.json"];
|
|
51
52
|
const scripts = { "prepare": "husky || true", "build": "vite build --config vite.config.web.ts && vite build", "start": "node --enable-source-maps dist/index.js", "cli": "node --enable-source-maps dist/index.js", "server": "node --enable-source-maps dist/index.ts", "web": "node --enable-source-maps dist/index.ts web", "dev:cli": "vite-node src/index.ts", "dev:server": "vite-node --watch src/index.ts", "dev:server:stdio": "vite-node --watch src/index.ts -- --protocol stdio", "dev:server:http": "vite-node --watch src/index.ts -- --protocol http", "dev:web": "npm-run-all --parallel dev:web:assets dev:web:bin", "dev:web:bin": "vite-node --watch src/index.ts web", "dev:web:assets": "vite build --config vite.config.web.ts --watch", "test": "vitest run", "test:watch": "vitest", "test:coverage": "vitest run --coverage", "lint": "biome check .", "format": "biome format . --write", "postinstall": "echo 'Skipping Playwright browser install. See README.md for details.'" };
|
|
52
|
-
const dependencies = { "@fastify/formbody": "^8.0.2", "@fastify/static": "^8.1.1", "@joplin/turndown-plugin-gfm": "^1.0.62", "@kitajs/html": "^4.2.9", "@kitajs/ts-html-plugin": "^4.1.1", "@langchain/aws": "^0.1.10", "@langchain/community": "^0.3.43", "@langchain/google-genai": "^0.2.9", "@langchain/google-vertexai": "^0.2.9", "@langchain/openai": "^0.5.10", "@modelcontextprotocol/sdk": "^1.11.4", "alpinejs": "^3.14.9", "axios": "^1.9.0", "axios-retry": "^4.5.0", "better-sqlite3": "^11.10.0", "cheerio": "^1.0.0", "commander": "^13.1.0", "dompurify": "^3.2.5", "dotenv": "^16.5.0", "env-paths": "^3.0.0", "fastify": "^5.3.3", "flowbite": "^3.1.2", "fuse.js": "^7.1.0", "header-generator": "^2.1.66", "htmx.org": "^1.9.12", "jsdom": "^26.1.0", "langchain": "0.3.19", "minimatch": "^10.0.1", "playwright": "^1.52.0", "psl": "^1.15.0", "remark": "^15.0.1", "remark-gfm": "^4.0.1", "remark-html": "^16.0.1", "semver": "^7.7.2", "sqlite-vec": "^0.1.7-alpha.2", "turndown": "^7.2.0", "zod": "^3.24.4" };
|
|
53
|
-
const devDependencies = { "@biomejs/biome": "1.9.4", "@commitlint/cli": "^19.8.1", "@commitlint/config-conventional": "^19.8.1", "@semantic-release/changelog": "^6.0.3", "@semantic-release/git": "^10.0.1", "@semantic-release/github": "^11.0.2", "@semantic-release/npm": "^12.0.1", "@tailwindcss/postcss": "^4.1.7", "@tailwindcss/vite": "^4.1.7", "@types/alpinejs": "^3.13.11", "@types/better-sqlite3": "^7.6.13", "@types/jsdom": "~21.1.7", "@types/lint-staged": "~13.3.0", "@types/node": "^20.17.47", "@types/node-fetch": "^2.6.12", "@types/psl": "^1.1.3", "@types/semver": "^7.7.0", "@types/turndown": "^5.0.5", "autoprefixer": "^10.4.21", "flowbite-typography": "^1.0.5", "husky": "^9.1.7", "lint-staged": "^15.5.2", "memfs": "^4.17.2", "npm-run-all": "^4.1.5", "postcss": "^8.5.3", "semantic-release": "^24.2.4", "tailwindcss": "^4.1.4", "typescript": "^5.8.3", "vite": "^6.3.5", "vite-node": "^3.1.2", "vite-plugin-dts": "^4.5.4", "vitest": "^3.1.3" };
|
|
53
|
+
const dependencies = { "@fastify/formbody": "^8.0.2", "@fastify/static": "^8.1.1", "@joplin/turndown-plugin-gfm": "^1.0.62", "@kitajs/html": "^4.2.9", "@kitajs/ts-html-plugin": "^4.1.1", "@langchain/aws": "^0.1.10", "@langchain/community": "^0.3.43", "@langchain/google-genai": "^0.2.9", "@langchain/google-vertexai": "^0.2.9", "@langchain/openai": "^0.5.10", "@modelcontextprotocol/sdk": "^1.11.4", "alpinejs": "^3.14.9", "axios": "^1.9.0", "axios-retry": "^4.5.0", "better-sqlite3": "^11.10.0", "cheerio": "^1.0.0", "commander": "^13.1.0", "dompurify": "^3.2.5", "dotenv": "^16.5.0", "env-paths": "^3.0.0", "fastify": "^5.3.3", "flowbite": "^3.1.2", "fuse.js": "^7.1.0", "header-generator": "^2.1.66", "htmx.org": "^1.9.12", "iconv-lite": "^0.6.3", "jsdom": "^26.1.0", "langchain": "0.3.19", "mime-types": "^3.0.1", "minimatch": "^10.0.1", "playwright": "^1.52.0", "psl": "^1.15.0", "remark": "^15.0.1", "remark-gfm": "^4.0.1", "remark-html": "^16.0.1", "semver": "^7.7.2", "sqlite-vec": "^0.1.7-alpha.2", "turndown": "^7.2.0", "zod": "^3.24.4" };
|
|
54
|
+
const devDependencies = { "@biomejs/biome": "1.9.4", "@commitlint/cli": "^19.8.1", "@commitlint/config-conventional": "^19.8.1", "@semantic-release/changelog": "^6.0.3", "@semantic-release/git": "^10.0.1", "@semantic-release/github": "^11.0.2", "@semantic-release/npm": "^12.0.1", "@tailwindcss/postcss": "^4.1.7", "@tailwindcss/vite": "^4.1.7", "@types/alpinejs": "^3.13.11", "@types/better-sqlite3": "^7.6.13", "@types/jsdom": "~21.1.7", "@types/lint-staged": "~13.3.0", "@types/mime-types": "^2.1.4", "@types/node": "^20.17.47", "@types/node-fetch": "^2.6.12", "@types/psl": "^1.1.3", "@types/semver": "^7.7.0", "@types/turndown": "^5.0.5", "autoprefixer": "^10.4.21", "flowbite-typography": "^1.0.5", "husky": "^9.1.7", "lint-staged": "^15.5.2", "memfs": "^4.17.2", "npm-run-all": "^4.1.5", "postcss": "^8.5.3", "semantic-release": "^24.2.4", "tailwindcss": "^4.1.4", "typescript": "^5.8.3", "vite": "^6.3.5", "vite-node": "^3.1.2", "vite-plugin-dts": "^4.5.4", "vitest": "^3.1.3" };
|
|
54
55
|
const engines = { "node": ">=20.0.0" };
|
|
55
56
|
const packageJson = {
|
|
56
57
|
name,
|
|
@@ -372,6 +373,21 @@ class HtmlMetadataExtractorMiddleware {
|
|
|
372
373
|
await next();
|
|
373
374
|
}
|
|
374
375
|
}
|
|
376
|
+
const DEFAULT_MAX_PAGES$1 = 1e3;
|
|
377
|
+
const DEFAULT_MAX_DEPTH$1 = 3;
|
|
378
|
+
const DEFAULT_MAX_CONCURRENCY = 3;
|
|
379
|
+
const DEFAULT_PROTOCOL = "stdio";
|
|
380
|
+
const DEFAULT_HTTP_PORT = 6280;
|
|
381
|
+
const DEFAULT_WEB_PORT = 6281;
|
|
382
|
+
const DEFAULT_PAGE_TIMEOUT = 5e3;
|
|
383
|
+
const FETCHER_MAX_RETRIES = 6;
|
|
384
|
+
const FETCHER_BASE_DELAY = 1e3;
|
|
385
|
+
const SPLITTER_MIN_CHUNK_SIZE = 500;
|
|
386
|
+
const SPLITTER_PREFERRED_CHUNK_SIZE = 1500;
|
|
387
|
+
const SPLITTER_MAX_CHUNK_SIZE = 5e3;
|
|
388
|
+
const EMBEDDING_BATCH_SIZE = 100;
|
|
389
|
+
const MIGRATION_MAX_RETRIES = 5;
|
|
390
|
+
const MIGRATION_RETRY_DELAY_MS = 300;
|
|
375
391
|
var ScrapeMode = /* @__PURE__ */ ((ScrapeMode2) => {
|
|
376
392
|
ScrapeMode2["Fetch"] = "fetch";
|
|
377
393
|
ScrapeMode2["Playwright"] = "playwright";
|
|
@@ -387,10 +403,15 @@ class HtmlPlaywrightMiddleware {
|
|
|
387
403
|
async ensureBrowser() {
|
|
388
404
|
if (!this.browser || !this.browser.isConnected()) {
|
|
389
405
|
const launchArgs = process.env.PLAYWRIGHT_LAUNCH_ARGS?.split(" ") ?? [];
|
|
406
|
+
const executablePath = process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH || void 0;
|
|
390
407
|
logger.debug(
|
|
391
408
|
`Launching new Playwright browser instance (Chromium) with args: ${launchArgs.join(" ") || "none"}...`
|
|
392
409
|
);
|
|
393
|
-
this.browser = await chromium.launch({
|
|
410
|
+
this.browser = await chromium.launch({
|
|
411
|
+
channel: "chromium",
|
|
412
|
+
args: launchArgs,
|
|
413
|
+
executablePath
|
|
414
|
+
});
|
|
394
415
|
this.browser.on("disconnected", () => {
|
|
395
416
|
logger.debug("Playwright browser instance disconnected.");
|
|
396
417
|
this.browser = null;
|
|
@@ -409,12 +430,50 @@ class HtmlPlaywrightMiddleware {
|
|
|
409
430
|
this.browser = null;
|
|
410
431
|
}
|
|
411
432
|
}
|
|
433
|
+
/**
|
|
434
|
+
* Waits for common loading indicators (spinners, loaders) that are currently visible to disappear from the page.
|
|
435
|
+
* Only waits for selectors that are present and visible at the time of check.
|
|
436
|
+
*
|
|
437
|
+
* @param page The Playwright page instance to operate on.
|
|
438
|
+
*/
|
|
439
|
+
async waitForLoadingToComplete(page) {
|
|
440
|
+
const commonLoadingSelectors = [
|
|
441
|
+
'[class*="loading"]',
|
|
442
|
+
'[class*="spinner"]',
|
|
443
|
+
'[class*="loader"]',
|
|
444
|
+
'[id*="loading"]',
|
|
445
|
+
'[class*="preload"]',
|
|
446
|
+
"#loading",
|
|
447
|
+
'[aria-label*="loading" i]',
|
|
448
|
+
'[aria-label*="spinner" i]'
|
|
449
|
+
];
|
|
450
|
+
const waitPromises = [];
|
|
451
|
+
for (const selector of commonLoadingSelectors) {
|
|
452
|
+
try {
|
|
453
|
+
const isVisible = await page.isVisible(selector).catch(() => false);
|
|
454
|
+
if (isVisible) {
|
|
455
|
+
waitPromises.push(
|
|
456
|
+
page.waitForSelector(selector, {
|
|
457
|
+
state: "hidden",
|
|
458
|
+
timeout: DEFAULT_PAGE_TIMEOUT
|
|
459
|
+
}).catch(() => {
|
|
460
|
+
})
|
|
461
|
+
);
|
|
462
|
+
}
|
|
463
|
+
} catch {
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
if (waitPromises.length > 0) {
|
|
467
|
+
await Promise.all(waitPromises);
|
|
468
|
+
}
|
|
469
|
+
}
|
|
412
470
|
/**
|
|
413
471
|
* Processes the context using Playwright, rendering dynamic content and propagating credentials for all same-origin requests.
|
|
414
472
|
*
|
|
415
473
|
* - Parses credentials from the URL (if present).
|
|
416
474
|
* - Uses browser.newContext({ httpCredentials }) for HTTP Basic Auth on the main page and subresources.
|
|
417
475
|
* - Injects Authorization header for all same-origin requests if credentials are present and not already set.
|
|
476
|
+
* - Waits for common loading indicators to disappear before extracting HTML.
|
|
418
477
|
*
|
|
419
478
|
* @param context The middleware context containing the HTML and source URL.
|
|
420
479
|
* @param next The next middleware function in the pipeline.
|
|
@@ -447,7 +506,7 @@ class HtmlPlaywrightMiddleware {
|
|
|
447
506
|
);
|
|
448
507
|
}
|
|
449
508
|
} catch (e) {
|
|
450
|
-
logger.warn(`⚠️
|
|
509
|
+
logger.warn(`⚠️ Could not parse URL for credential extraction: ${context.source}`);
|
|
451
510
|
}
|
|
452
511
|
try {
|
|
453
512
|
const browser = await this.ensureBrowser();
|
|
@@ -492,6 +551,7 @@ class HtmlPlaywrightMiddleware {
|
|
|
492
551
|
});
|
|
493
552
|
await page.goto(context.source, { waitUntil: "load" });
|
|
494
553
|
await page.waitForSelector("body");
|
|
554
|
+
await this.waitForLoadingToComplete(page);
|
|
495
555
|
renderedHtml = await page.content();
|
|
496
556
|
logger.debug(`Playwright: Successfully rendered content for ${context.source}`);
|
|
497
557
|
} catch (error) {
|
|
@@ -515,7 +575,7 @@ class HtmlPlaywrightMiddleware {
|
|
|
515
575
|
);
|
|
516
576
|
} else {
|
|
517
577
|
logger.warn(
|
|
518
|
-
`⚠️
|
|
578
|
+
`⚠️ Playwright rendering resulted in null content for ${context.source}. Proceeding without content update.`
|
|
519
579
|
);
|
|
520
580
|
}
|
|
521
581
|
await next();
|
|
@@ -611,7 +671,7 @@ class HtmlSanitizerMiddleware {
|
|
|
611
671
|
}
|
|
612
672
|
} catch (selectorError) {
|
|
613
673
|
logger.warn(
|
|
614
|
-
`⚠️
|
|
674
|
+
`⚠️ Potentially invalid selector "${selector}" during element removal: ${selectorError}`
|
|
615
675
|
);
|
|
616
676
|
context.errors.push(
|
|
617
677
|
new Error(`Invalid selector "${selector}": ${selectorError}`)
|
|
@@ -711,7 +771,7 @@ ${text.replace(/^\n+|\n+$/g, "")}
|
|
|
711
771
|
const markdown = this.turndownService.turndown(htmlToConvert).trim();
|
|
712
772
|
if (!markdown) {
|
|
713
773
|
const warnMsg = `HTML to Markdown conversion resulted in empty content for ${context.source}.`;
|
|
714
|
-
logger.warn(`⚠️
|
|
774
|
+
logger.warn(`⚠️ ${warnMsg}`);
|
|
715
775
|
context.content = "";
|
|
716
776
|
} else {
|
|
717
777
|
context.content = markdown;
|
|
@@ -768,11 +828,12 @@ class MarkdownMetadataExtractorMiddleware {
|
|
|
768
828
|
}
|
|
769
829
|
}
|
|
770
830
|
function convertToString(content, charset) {
|
|
771
|
-
if (
|
|
772
|
-
|
|
773
|
-
return
|
|
831
|
+
if (typeof content === "string") return content;
|
|
832
|
+
try {
|
|
833
|
+
return iconv.decode(content, charset || "utf-8");
|
|
834
|
+
} catch {
|
|
835
|
+
return iconv.decode(content, "utf-8");
|
|
774
836
|
}
|
|
775
|
-
return content;
|
|
776
837
|
}
|
|
777
838
|
class BasePipeline {
|
|
778
839
|
/**
|
|
@@ -985,13 +1046,13 @@ class FetchUrlTool {
|
|
|
985
1046
|
}
|
|
986
1047
|
if (!processed) {
|
|
987
1048
|
logger.warn(
|
|
988
|
-
`⚠️
|
|
1049
|
+
`⚠️ Unsupported content type "${rawContent.mimeType}" for ${url}. Returning raw content.`
|
|
989
1050
|
);
|
|
990
1051
|
const contentString = typeof rawContent.content === "string" ? rawContent.content : Buffer.from(rawContent.content).toString("utf-8");
|
|
991
1052
|
return contentString;
|
|
992
1053
|
}
|
|
993
1054
|
for (const err of processed.errors) {
|
|
994
|
-
logger.warn(`⚠️
|
|
1055
|
+
logger.warn(`⚠️ Processing error for ${url}: ${err.message}`);
|
|
995
1056
|
}
|
|
996
1057
|
if (typeof processed.textContent !== "string" || !processed.textContent.trim()) {
|
|
997
1058
|
throw new ToolError(
|
|
@@ -1179,20 +1240,6 @@ class RemoveTool {
|
|
|
1179
1240
|
}
|
|
1180
1241
|
}
|
|
1181
1242
|
}
|
|
1182
|
-
const DEFAULT_MAX_PAGES$1 = 1e3;
|
|
1183
|
-
const DEFAULT_MAX_DEPTH$1 = 3;
|
|
1184
|
-
const DEFAULT_MAX_CONCURRENCY = 3;
|
|
1185
|
-
const DEFAULT_PROTOCOL = "stdio";
|
|
1186
|
-
const DEFAULT_HTTP_PORT = 6280;
|
|
1187
|
-
const DEFAULT_WEB_PORT = 6281;
|
|
1188
|
-
const FETCHER_MAX_RETRIES = 6;
|
|
1189
|
-
const FETCHER_BASE_DELAY = 1e3;
|
|
1190
|
-
const SPLITTER_MIN_CHUNK_SIZE = 500;
|
|
1191
|
-
const SPLITTER_PREFERRED_CHUNK_SIZE = 1500;
|
|
1192
|
-
const SPLITTER_MAX_CHUNK_SIZE = 5e3;
|
|
1193
|
-
const EMBEDDING_BATCH_SIZE = 100;
|
|
1194
|
-
const MIGRATION_MAX_RETRIES = 5;
|
|
1195
|
-
const MIGRATION_RETRY_DELAY_MS = 300;
|
|
1196
1243
|
class ScrapeTool {
|
|
1197
1244
|
docService;
|
|
1198
1245
|
manager;
|
|
@@ -1717,7 +1764,7 @@ ${formattedJob}`);
|
|
|
1717
1764
|
if (validation.success) {
|
|
1718
1765
|
statusFilter = validation.data;
|
|
1719
1766
|
} else {
|
|
1720
|
-
logger.warn(`⚠️
|
|
1767
|
+
logger.warn(`⚠️ Invalid status parameter received: ${statusParam}`);
|
|
1721
1768
|
}
|
|
1722
1769
|
}
|
|
1723
1770
|
const result = await tools.listJobs.execute({ status: statusFilter });
|
|
@@ -1746,7 +1793,7 @@ ${formattedJob}`);
|
|
|
1746
1793
|
},
|
|
1747
1794
|
async (uri, { jobId }) => {
|
|
1748
1795
|
if (typeof jobId !== "string" || jobId.length === 0) {
|
|
1749
|
-
logger.warn(`⚠️
|
|
1796
|
+
logger.warn(`⚠️ Invalid jobId received in URI: ${jobId}`);
|
|
1750
1797
|
return { contents: [] };
|
|
1751
1798
|
}
|
|
1752
1799
|
const result = await tools.getJobInfo.execute({ jobId });
|
|
@@ -1959,7 +2006,7 @@ class HttpFetcher {
|
|
|
1959
2006
|
if (attempt < maxRetries && (status === void 0 || this.retryableStatusCodes.includes(status))) {
|
|
1960
2007
|
const delay = baseDelay * 2 ** attempt;
|
|
1961
2008
|
logger.warn(
|
|
1962
|
-
`⚠️
|
|
2009
|
+
`⚠️ Attempt ${attempt + 1}/${maxRetries + 1} failed for ${source} (Status: ${status}, Code: ${code}). Retrying in ${delay}ms...`
|
|
1963
2010
|
);
|
|
1964
2011
|
await this.delay(delay);
|
|
1965
2012
|
continue;
|
|
@@ -1981,13 +2028,17 @@ class FileFetcher {
|
|
|
1981
2028
|
canFetch(source) {
|
|
1982
2029
|
return source.startsWith("file://");
|
|
1983
2030
|
}
|
|
2031
|
+
/**
|
|
2032
|
+
* Fetches the content of a file given a file:// URL, decoding percent-encoded paths as needed.
|
|
2033
|
+
* Only HTML and Markdown files are processed.
|
|
2034
|
+
*/
|
|
1984
2035
|
async fetch(source, options) {
|
|
1985
|
-
const
|
|
1986
|
-
|
|
2036
|
+
const rawPath = source.replace("file://", "");
|
|
2037
|
+
const filePath = decodeURIComponent(rawPath);
|
|
1987
2038
|
try {
|
|
1988
2039
|
const content = await fs.readFile(filePath);
|
|
1989
2040
|
const ext = path.extname(filePath).toLowerCase();
|
|
1990
|
-
const mimeType =
|
|
2041
|
+
const mimeType = mime.lookup(ext) || "application/octet-stream";
|
|
1991
2042
|
return {
|
|
1992
2043
|
content,
|
|
1993
2044
|
mimeType,
|
|
@@ -2003,19 +2054,6 @@ class FileFetcher {
|
|
|
2003
2054
|
);
|
|
2004
2055
|
}
|
|
2005
2056
|
}
|
|
2006
|
-
getMimeType(ext) {
|
|
2007
|
-
switch (ext) {
|
|
2008
|
-
case ".html":
|
|
2009
|
-
case ".htm":
|
|
2010
|
-
return "text/html";
|
|
2011
|
-
case ".md":
|
|
2012
|
-
return "text/markdown";
|
|
2013
|
-
case ".txt":
|
|
2014
|
-
return "text/plain";
|
|
2015
|
-
default:
|
|
2016
|
-
return "application/octet-stream";
|
|
2017
|
-
}
|
|
2018
|
-
}
|
|
2019
2057
|
}
|
|
2020
2058
|
async function initializeTools(docService, pipelineManager) {
|
|
2021
2059
|
const tools = {
|
|
@@ -2072,7 +2110,7 @@ async function stopServer() {
|
|
|
2072
2110
|
}
|
|
2073
2111
|
runningServer = null;
|
|
2074
2112
|
if (hadError) {
|
|
2075
|
-
logger.warn("⚠️
|
|
2113
|
+
logger.warn("⚠️ MCP Server instance stopped with errors.");
|
|
2076
2114
|
} else {
|
|
2077
2115
|
logger.info("✅ MCP Server instance stopped.");
|
|
2078
2116
|
}
|
|
@@ -2184,9 +2222,19 @@ function extractPathAndQuery(url) {
|
|
|
2184
2222
|
function shouldIncludeUrl(url, includePatterns, excludePatterns) {
|
|
2185
2223
|
const path2 = extractPathAndQuery(url);
|
|
2186
2224
|
const normalizedPath = path2.startsWith("/") ? path2 : `/${path2}`;
|
|
2187
|
-
|
|
2225
|
+
let basename;
|
|
2226
|
+
if (url.startsWith("file://")) {
|
|
2227
|
+
try {
|
|
2228
|
+
const u = new URL(url);
|
|
2229
|
+
basename = u.pathname ? u.pathname.split("/").pop() : void 0;
|
|
2230
|
+
} catch {
|
|
2231
|
+
}
|
|
2232
|
+
}
|
|
2233
|
+
const stripSlash = (patterns) => patterns?.map((p) => p.startsWith("/") ? p.slice(1) : p);
|
|
2234
|
+
if (matchesAnyPattern(normalizedPath, excludePatterns) || basename && matchesAnyPattern(basename, stripSlash(excludePatterns)))
|
|
2235
|
+
return false;
|
|
2188
2236
|
if (!includePatterns || includePatterns.length === 0) return true;
|
|
2189
|
-
return matchesAnyPattern(normalizedPath, includePatterns);
|
|
2237
|
+
return matchesAnyPattern(normalizedPath, includePatterns) || (basename ? matchesAnyPattern(basename, stripSlash(includePatterns)) : false);
|
|
2190
2238
|
}
|
|
2191
2239
|
function isInScope(baseUrl, targetUrl, scope) {
|
|
2192
2240
|
if (baseUrl.protocol !== targetUrl.protocol) return false;
|
|
@@ -2385,16 +2433,16 @@ class WebScraperStrategy extends BaseScraperStrategy {
|
|
|
2385
2433
|
}
|
|
2386
2434
|
if (!processed) {
|
|
2387
2435
|
logger.warn(
|
|
2388
|
-
`⚠️
|
|
2436
|
+
`⚠️ Unsupported content type "${rawContent.mimeType}" for URL ${url}. Skipping processing.`
|
|
2389
2437
|
);
|
|
2390
2438
|
return { document: void 0, links: [] };
|
|
2391
2439
|
}
|
|
2392
2440
|
for (const err of processed.errors) {
|
|
2393
|
-
logger.warn(`⚠️
|
|
2441
|
+
logger.warn(`⚠️ Processing error for ${url}: ${err.message}`);
|
|
2394
2442
|
}
|
|
2395
2443
|
if (!processed.textContent || !processed.textContent.trim()) {
|
|
2396
2444
|
logger.warn(
|
|
2397
|
-
`⚠️
|
|
2445
|
+
`⚠️ No processable content found for ${url} after pipeline execution.`
|
|
2398
2446
|
);
|
|
2399
2447
|
return { document: void 0, links: processed.links };
|
|
2400
2448
|
}
|
|
@@ -2500,15 +2548,14 @@ class LocalFileStrategy extends BaseScraperStrategy {
|
|
|
2500
2548
|
return url.startsWith("file://");
|
|
2501
2549
|
}
|
|
2502
2550
|
async processItem(item, options, _progressCallback, _signal) {
|
|
2503
|
-
const filePath = item.url.replace(/^file:\/\//, "");
|
|
2551
|
+
const filePath = decodeURIComponent(item.url.replace(/^file:\/\//, ""));
|
|
2504
2552
|
const stats = await fs.stat(filePath);
|
|
2505
2553
|
if (stats.isDirectory()) {
|
|
2506
2554
|
const contents = await fs.readdir(filePath);
|
|
2507
|
-
|
|
2508
|
-
|
|
2509
|
-
};
|
|
2555
|
+
const links = contents.map((name2) => `file://${path.join(filePath, name2)}`).filter((url) => this.shouldProcessUrl(url, options));
|
|
2556
|
+
return { links };
|
|
2510
2557
|
}
|
|
2511
|
-
logger.info(
|
|
2558
|
+
logger.info(`🗂️ Processing file ${this.pageCount}/${options.maxPages}: ${filePath}`);
|
|
2512
2559
|
const rawContent = await this.fileFetcher.fetch(item.url);
|
|
2513
2560
|
let processed;
|
|
2514
2561
|
for (const pipeline of this.pipelines) {
|
|
@@ -2519,12 +2566,12 @@ class LocalFileStrategy extends BaseScraperStrategy {
|
|
|
2519
2566
|
}
|
|
2520
2567
|
if (!processed) {
|
|
2521
2568
|
logger.warn(
|
|
2522
|
-
`⚠️
|
|
2569
|
+
`⚠️ Unsupported content type "${rawContent.mimeType}" for file ${filePath}. Skipping processing.`
|
|
2523
2570
|
);
|
|
2524
2571
|
return { document: void 0, links: [] };
|
|
2525
2572
|
}
|
|
2526
2573
|
for (const err of processed.errors) {
|
|
2527
|
-
logger.warn(`⚠️
|
|
2574
|
+
logger.warn(`⚠️ Processing error for ${filePath}: ${err.message}`);
|
|
2528
2575
|
}
|
|
2529
2576
|
return {
|
|
2530
2577
|
document: {
|
|
@@ -2682,7 +2729,7 @@ class PipelineWorker {
|
|
|
2682
2729
|
}
|
|
2683
2730
|
logger.debug(`[${jobId}] Worker finished job successfully.`);
|
|
2684
2731
|
} catch (error) {
|
|
2685
|
-
logger.warn(`⚠️
|
|
2732
|
+
logger.warn(`⚠️ [${jobId}] Worker encountered error: ${error}`);
|
|
2686
2733
|
throw error;
|
|
2687
2734
|
}
|
|
2688
2735
|
}
|
|
@@ -2719,7 +2766,7 @@ class PipelineManager {
|
|
|
2719
2766
|
*/
|
|
2720
2767
|
async start() {
|
|
2721
2768
|
if (this.isRunning) {
|
|
2722
|
-
logger.warn("⚠️
|
|
2769
|
+
logger.warn("⚠️ PipelineManager is already running.");
|
|
2723
2770
|
return;
|
|
2724
2771
|
}
|
|
2725
2772
|
this.isRunning = true;
|
|
@@ -2733,7 +2780,7 @@ class PipelineManager {
|
|
|
2733
2780
|
*/
|
|
2734
2781
|
async stop() {
|
|
2735
2782
|
if (!this.isRunning) {
|
|
2736
|
-
logger.warn("⚠️
|
|
2783
|
+
logger.warn("⚠️ PipelineManager is not running.");
|
|
2737
2784
|
return;
|
|
2738
2785
|
}
|
|
2739
2786
|
this.isRunning = false;
|
|
@@ -2852,7 +2899,7 @@ class PipelineManager {
|
|
|
2852
2899
|
case PipelineJobStatus.CANCELLED:
|
|
2853
2900
|
case PipelineJobStatus.CANCELLING:
|
|
2854
2901
|
logger.warn(
|
|
2855
|
-
`⚠️
|
|
2902
|
+
`⚠️ Job ${jobId} cannot be cancelled in its current state: ${job.status}`
|
|
2856
2903
|
);
|
|
2857
2904
|
break;
|
|
2858
2905
|
default:
|
|
@@ -3764,7 +3811,7 @@ async function applyMigrations(db) {
|
|
|
3764
3811
|
if (error?.code === "SQLITE_BUSY" && retries < MIGRATION_MAX_RETRIES) {
|
|
3765
3812
|
retries++;
|
|
3766
3813
|
logger.warn(
|
|
3767
|
-
`⚠️
|
|
3814
|
+
`⚠️ Migrations busy (SQLITE_BUSY), retrying attempt ${retries}/${MIGRATION_MAX_RETRIES} in ${MIGRATION_RETRY_DELAY_MS}ms...`
|
|
3768
3815
|
);
|
|
3769
3816
|
await new Promise((resolve) => setTimeout(resolve, MIGRATION_RETRY_DELAY_MS));
|
|
3770
3817
|
} else {
|
|
@@ -3943,7 +3990,7 @@ class DocumentStore {
|
|
|
3943
3990
|
*/
|
|
3944
3991
|
async initializeEmbeddings() {
|
|
3945
3992
|
const modelSpec = process.env.DOCS_MCP_EMBEDDING_MODEL || "text-embedding-3-small";
|
|
3946
|
-
const { createEmbeddingModel } = await import("./EmbeddingFactory-
|
|
3993
|
+
const { createEmbeddingModel } = await import("./EmbeddingFactory-CElwVk3X.js");
|
|
3947
3994
|
this.embeddings = createEmbeddingModel(modelSpec);
|
|
3948
3995
|
const testVector = await this.embeddings.embedQuery("test");
|
|
3949
3996
|
this.modelDimension = testVector.length;
|
|
@@ -4367,7 +4414,7 @@ class DocumentManagementService {
|
|
|
4367
4414
|
try {
|
|
4368
4415
|
fs$1.mkdirSync(dbDir, { recursive: true });
|
|
4369
4416
|
} catch (error) {
|
|
4370
|
-
logger.error(`⚠️
|
|
4417
|
+
logger.error(`⚠️ Failed to create database directory ${dbDir}: ${error}`);
|
|
4371
4418
|
}
|
|
4372
4419
|
this.store = new DocumentStore(dbPath);
|
|
4373
4420
|
this.documentRetriever = new DocumentRetrieverService(this.store);
|
|
@@ -4407,7 +4454,7 @@ class DocumentManagementService {
|
|
|
4407
4454
|
const versions = await this.listVersions(normalizedLibrary);
|
|
4408
4455
|
const hasUnversioned = await this.exists(normalizedLibrary, "");
|
|
4409
4456
|
if (versions.length === 0 && !hasUnversioned) {
|
|
4410
|
-
logger.warn(`⚠️
|
|
4457
|
+
logger.warn(`⚠️ Library '${library}' not found.`);
|
|
4411
4458
|
const allLibraries = await this.listLibraries();
|
|
4412
4459
|
const libraryNames = allLibraries.map((lib) => lib.library);
|
|
4413
4460
|
let suggestions = [];
|
|
@@ -4466,7 +4513,7 @@ class DocumentManagementService {
|
|
|
4466
4513
|
logger.info(`ℹ️ Unversioned documents exist for ${library}`);
|
|
4467
4514
|
return { bestMatch: null, hasUnversioned: true };
|
|
4468
4515
|
}
|
|
4469
|
-
logger.warn(`⚠️
|
|
4516
|
+
logger.warn(`⚠️ No valid versions found for ${library}`);
|
|
4470
4517
|
const allLibraryDetails = await this.store.queryLibraryVersions();
|
|
4471
4518
|
const libraryDetails = allLibraryDetails.get(library) ?? [];
|
|
4472
4519
|
throw new VersionNotFoundError(library, targetVersion ?? "", libraryDetails);
|
|
@@ -4478,7 +4525,7 @@ class DocumentManagementService {
|
|
|
4478
4525
|
} else {
|
|
4479
4526
|
const versionRegex = /^(\d+)(?:\.(?:x(?:\.x)?|\d+(?:\.(?:x|\d+))?))?$|^$/;
|
|
4480
4527
|
if (!versionRegex.test(targetVersion)) {
|
|
4481
|
-
logger.warn(`⚠️
|
|
4528
|
+
logger.warn(`⚠️ Invalid target version format: ${targetVersion}`);
|
|
4482
4529
|
} else {
|
|
4483
4530
|
let range = targetVersion;
|
|
4484
4531
|
if (!semver__default.validRange(targetVersion)) {
|
|
@@ -4494,7 +4541,7 @@ class DocumentManagementService {
|
|
|
4494
4541
|
`✅ Found best match version ${bestMatch} for ${library}@${targetVersion}`
|
|
4495
4542
|
);
|
|
4496
4543
|
} else {
|
|
4497
|
-
logger.warn(`⚠️
|
|
4544
|
+
logger.warn(`⚠️ No matching semver version found for ${library}@${targetVersion}`);
|
|
4498
4545
|
}
|
|
4499
4546
|
if (!bestMatch && !hasUnversioned) {
|
|
4500
4547
|
const allLibraryDetails = await this.store.queryLibraryVersions();
|
|
@@ -4540,7 +4587,7 @@ class DocumentManagementService {
|
|
|
4540
4587
|
path: chunk.section.path
|
|
4541
4588
|
}
|
|
4542
4589
|
}));
|
|
4543
|
-
logger.info(
|
|
4590
|
+
logger.info(`✂️ Split document into ${splitDocs.length} chunks`);
|
|
4544
4591
|
await this.store.addDocuments(library, normalizedVersion, splitDocs);
|
|
4545
4592
|
}
|
|
4546
4593
|
/**
|
|
@@ -4561,17 +4608,23 @@ class DocumentManagementService {
|
|
|
4561
4608
|
}));
|
|
4562
4609
|
}
|
|
4563
4610
|
}
|
|
4564
|
-
const Layout = ({
|
|
4565
|
-
|
|
4566
|
-
|
|
4567
|
-
|
|
4568
|
-
|
|
4569
|
-
|
|
4570
|
-
|
|
4571
|
-
|
|
4572
|
-
|
|
4573
|
-
|
|
4574
|
-
|
|
4611
|
+
const Layout = ({ title, version: version2, children }) => {
|
|
4612
|
+
let versionString = version2;
|
|
4613
|
+
if (!versionString) {
|
|
4614
|
+
try {
|
|
4615
|
+
const packageJson2 = JSON.parse(readFileSync("package.json", "utf-8"));
|
|
4616
|
+
versionString = packageJson2.version;
|
|
4617
|
+
} catch (error) {
|
|
4618
|
+
console.error("Error reading package.json:", error);
|
|
4619
|
+
}
|
|
4620
|
+
}
|
|
4621
|
+
return /* @__PURE__ */ jsxs("html", { lang: "en", children: [
|
|
4622
|
+
/* @__PURE__ */ jsxs("head", { children: [
|
|
4623
|
+
/* @__PURE__ */ jsx("meta", { charset: "UTF-8" }),
|
|
4624
|
+
/* @__PURE__ */ jsx("meta", { name: "viewport", content: "width=device-width, initial-scale=1.0" }),
|
|
4625
|
+
/* @__PURE__ */ jsx("title", { safe: true, children: title }),
|
|
4626
|
+
/* @__PURE__ */ jsx("link", { rel: "stylesheet", href: "/assets/main.css" }),
|
|
4627
|
+
/* @__PURE__ */ jsx("style", { children: `
|
|
4575
4628
|
.htmx-indicator {
|
|
4576
4629
|
display: none;
|
|
4577
4630
|
}
|
|
@@ -4594,29 +4647,30 @@ const Layout = ({
|
|
|
4594
4647
|
form .htmx-indicator .search-text { display: none; }
|
|
4595
4648
|
form .spinner { display: none; }
|
|
4596
4649
|
` })
|
|
4597
|
-
] }),
|
|
4598
|
-
/* @__PURE__ */ jsxs("body", { class: "bg-gray-50 dark:bg-gray-900", children: [
|
|
4599
|
-
/* @__PURE__ */ jsxs("div", { class: "container max-w-2xl mx-auto px-4 py-4", children: [
|
|
4600
|
-
/* @__PURE__ */ jsx("header", { class: "mb-4", children: /* @__PURE__ */ jsxs("h1", { class: "text-3xl font-bold text-gray-900 dark:text-white", children: [
|
|
4601
|
-
/* @__PURE__ */ jsx("a", { href: "/", children: "MCP Docs" }),
|
|
4602
|
-
version2 ? /* @__PURE__ */ jsxs(
|
|
4603
|
-
"span",
|
|
4604
|
-
{
|
|
4605
|
-
safe: true,
|
|
4606
|
-
class: "ml-2 text-base font-normal text-gray-500 dark:text-gray-400 align-baseline",
|
|
4607
|
-
title: `Version ${version2}`,
|
|
4608
|
-
children: [
|
|
4609
|
-
"v",
|
|
4610
|
-
version2
|
|
4611
|
-
]
|
|
4612
|
-
}
|
|
4613
|
-
) : null
|
|
4614
|
-
] }) }),
|
|
4615
|
-
/* @__PURE__ */ jsx("main", { children })
|
|
4616
4650
|
] }),
|
|
4617
|
-
/* @__PURE__ */
|
|
4618
|
-
|
|
4619
|
-
|
|
4651
|
+
/* @__PURE__ */ jsxs("body", { class: "bg-gray-50 dark:bg-gray-900", children: [
|
|
4652
|
+
/* @__PURE__ */ jsxs("div", { class: "container max-w-2xl mx-auto px-4 py-4", children: [
|
|
4653
|
+
/* @__PURE__ */ jsx("header", { class: "mb-4", children: /* @__PURE__ */ jsxs("h1", { class: "text-3xl font-bold text-gray-900 dark:text-white", children: [
|
|
4654
|
+
/* @__PURE__ */ jsx("a", { href: "/", children: "MCP Docs" }),
|
|
4655
|
+
versionString ? /* @__PURE__ */ jsxs(
|
|
4656
|
+
"span",
|
|
4657
|
+
{
|
|
4658
|
+
safe: true,
|
|
4659
|
+
class: "ml-2 text-base font-normal text-gray-500 dark:text-gray-400 align-baseline",
|
|
4660
|
+
title: `Version ${versionString}`,
|
|
4661
|
+
children: [
|
|
4662
|
+
"v",
|
|
4663
|
+
versionString
|
|
4664
|
+
]
|
|
4665
|
+
}
|
|
4666
|
+
) : null
|
|
4667
|
+
] }) }),
|
|
4668
|
+
/* @__PURE__ */ jsx("main", { children })
|
|
4669
|
+
] }),
|
|
4670
|
+
/* @__PURE__ */ jsx("script", { type: "module", src: "/assets/main.js" })
|
|
4671
|
+
] })
|
|
4672
|
+
] });
|
|
4673
|
+
};
|
|
4620
4674
|
function registerIndexRoute(server) {
|
|
4621
4675
|
server.get("/", async (_, reply) => {
|
|
4622
4676
|
reply.type("text/html");
|
|
@@ -4857,7 +4911,25 @@ const ScrapeFormContent = () => /* @__PURE__ */ jsxs("div", { class: "mt-4 p-4 b
|
|
|
4857
4911
|
children: "URL"
|
|
4858
4912
|
}
|
|
4859
4913
|
),
|
|
4860
|
-
/* @__PURE__ */ jsx(
|
|
4914
|
+
/* @__PURE__ */ jsx(
|
|
4915
|
+
Tooltip,
|
|
4916
|
+
{
|
|
4917
|
+
text: /* @__PURE__ */ jsxs("div", { children: [
|
|
4918
|
+
/* @__PURE__ */ jsx("p", { children: "Enter the URL of the documentation you want to scrape." }),
|
|
4919
|
+
/* @__PURE__ */ jsxs("p", { class: "mt-2", children: [
|
|
4920
|
+
"For local files/folders, you must use the ",
|
|
4921
|
+
/* @__PURE__ */ jsx("code", { children: "file://" }),
|
|
4922
|
+
" ",
|
|
4923
|
+
"prefix and ensure the path is accessible to the server."
|
|
4924
|
+
] }),
|
|
4925
|
+
/* @__PURE__ */ jsxs("p", { class: "mt-2", children: [
|
|
4926
|
+
"If running in Docker, ",
|
|
4927
|
+
/* @__PURE__ */ jsx("b", { children: "mount the folder" }),
|
|
4928
|
+
" (see README for details)."
|
|
4929
|
+
] })
|
|
4930
|
+
] })
|
|
4931
|
+
}
|
|
4932
|
+
)
|
|
4861
4933
|
] }),
|
|
4862
4934
|
/* @__PURE__ */ jsx(
|
|
4863
4935
|
"input",
|
|
@@ -4875,7 +4947,7 @@ const ScrapeFormContent = () => /* @__PURE__ */ jsxs("div", { class: "mt-4 p-4 b
|
|
|
4875
4947
|
/* @__PURE__ */ jsx(
|
|
4876
4948
|
"div",
|
|
4877
4949
|
{
|
|
4878
|
-
"x-show": "hasPath",
|
|
4950
|
+
"x-show": "hasPath && !(url.startsWith('file://'))",
|
|
4879
4951
|
"x-cloak": true,
|
|
4880
4952
|
"x-transition:enter": "transition ease-out duration-300",
|
|
4881
4953
|
"x-transition:enter-start": "opacity-0 transform -translate-y-2",
|
|
@@ -5652,6 +5724,13 @@ async function stopWebServer(server) {
|
|
|
5652
5724
|
}
|
|
5653
5725
|
}
|
|
5654
5726
|
function ensurePlaywrightBrowsersInstalled() {
|
|
5727
|
+
const chromiumEnvPath = process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH;
|
|
5728
|
+
if (chromiumEnvPath && existsSync(chromiumEnvPath)) {
|
|
5729
|
+
logger.debug(
|
|
5730
|
+
`PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH is set to '${chromiumEnvPath}', skipping Playwright browser install.`
|
|
5731
|
+
);
|
|
5732
|
+
return;
|
|
5733
|
+
}
|
|
5655
5734
|
try {
|
|
5656
5735
|
const playwright = require("playwright");
|
|
5657
5736
|
const chromiumPath = playwright.chromium.executablePath();
|
|
@@ -5659,11 +5738,11 @@ function ensurePlaywrightBrowsersInstalled() {
|
|
|
5659
5738
|
throw new Error("Playwright Chromium browser not found");
|
|
5660
5739
|
}
|
|
5661
5740
|
} catch (err) {
|
|
5662
|
-
|
|
5741
|
+
logger.debug(
|
|
5663
5742
|
"Playwright browsers not found. Installing Chromium browser for dynamic scraping (this may take a minute)..."
|
|
5664
5743
|
);
|
|
5665
5744
|
try {
|
|
5666
|
-
execSync("
|
|
5745
|
+
execSync("npm exec -y playwright install --no-shell --with-deps chromium", {
|
|
5667
5746
|
stdio: "inherit",
|
|
5668
5747
|
cwd: getProjectRoot()
|
|
5669
5748
|
});
|
|
@@ -5777,7 +5856,9 @@ async function main() {
|
|
|
5777
5856
|
await new Promise(() => {
|
|
5778
5857
|
});
|
|
5779
5858
|
});
|
|
5780
|
-
program.command("scrape <library> <url>").description(
|
|
5859
|
+
program.command("scrape <library> <url>").description(
|
|
5860
|
+
"Scrape and index documentation from a URL or local folder.\n\nTo scrape local files or folders, use a file:// URL.\nExamples:\n scrape mylib https://react.dev/reference/react\n scrape mylib file:///Users/me/docs/index.html\n scrape mylib file:///Users/me/docs/my-library\n\nNote: For local files/folders, you must use the file:// prefix. If running in Docker, mount the folder and use the container path. See README for details."
|
|
5861
|
+
).option("-v, --version <string>", "Version of the library (optional)").option(
|
|
5781
5862
|
"-p, --max-pages <number>",
|
|
5782
5863
|
"Maximum pages to scrape",
|
|
5783
5864
|
DEFAULT_MAX_PAGES$1.toString()
|