@arabold/docs-mcp-server 1.25.1 → 1.25.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -254,6 +254,7 @@ You can index documentation from your local filesystem by using a `file://` URL
254
254
  -e OPENAI_API_KEY="your-key" \
255
255
  -v /absolute/path/to/docs:/docs:ro \
256
256
  -v docs-mcp-data:/data \
257
+ -p 6280:6280 \
257
258
  ghcr.io/arabold/docs-mcp-server:latest \
258
259
  scrape mylib file:///docs/my-library
259
260
  ```
@@ -498,7 +499,11 @@ DOCS_MCP_TELEMETRY=false npx @arabold/docs-mcp-server@latest
498
499
  **Option 3: Docker**
499
500
 
500
501
  ```bash
501
- docker run -e DOCS_MCP_TELEMETRY=false ghcr.io/arabold/docs-mcp-server:latest
502
+ docker run \
503
+ -e DOCS_MCP_TELEMETRY=false \
504
+ -v docs-mcp-data:/data \
505
+ -p 6280:6280 \
506
+ ghcr.io/arabold/docs-mcp-server:latest
502
507
  ```
503
508
 
504
509
  For more details about our telemetry practices, see the [Telemetry Guide](docs/telemetry.md).
package/dist/index.js CHANGED
@@ -9,6 +9,7 @@ import { PostHog } from "posthog-node";
9
9
  import { randomUUID } from "node:crypto";
10
10
  import fs, { existsSync, readFileSync } from "node:fs";
11
11
  import path from "node:path";
12
+ import { fileURLToPath, URL as URL$1 } from "node:url";
12
13
  import envPaths from "env-paths";
13
14
  import { Option, Command } from "commander";
14
15
  import formBody from "@fastify/formbody";
@@ -22,7 +23,6 @@ import { createTRPCProxyClient, httpBatchLink } from "@trpc/client";
22
23
  import { v4 } from "uuid";
23
24
  import { VirtualConsole, JSDOM } from "jsdom";
24
25
  import mime from "mime";
25
- import { fileURLToPath, URL as URL$1 } from "node:url";
26
26
  import psl from "psl";
27
27
  import fs$1 from "node:fs/promises";
28
28
  import axios from "axios";
@@ -539,6 +539,49 @@ class PostHogClient {
539
539
  return this.enabled && !!this.client;
540
540
  }
541
541
  }
542
+ let projectRoot = null;
543
+ function getProjectRoot() {
544
+ if (projectRoot) {
545
+ return projectRoot;
546
+ }
547
+ const currentFilePath = fileURLToPath(import.meta.url);
548
+ let currentDir = path.dirname(currentFilePath);
549
+ while (true) {
550
+ const packageJsonPath = path.join(currentDir, "package.json");
551
+ if (fs.existsSync(packageJsonPath)) {
552
+ projectRoot = currentDir;
553
+ return currentDir;
554
+ }
555
+ const parentDir = path.dirname(currentDir);
556
+ if (parentDir === currentDir) {
557
+ throw new Error("Could not find project root containing package.json.");
558
+ }
559
+ currentDir = parentDir;
560
+ }
561
+ }
562
+ function resolveStorePath(storePath) {
563
+ let dbDir;
564
+ if (storePath) {
565
+ dbDir = storePath;
566
+ } else {
567
+ const projectRoot2 = getProjectRoot();
568
+ const oldDbDir = path.join(projectRoot2, ".store");
569
+ const oldDbPath = path.join(oldDbDir, "documents.db");
570
+ const oldDbExists = fs.existsSync(oldDbPath);
571
+ if (oldDbExists) {
572
+ dbDir = oldDbDir;
573
+ } else {
574
+ const standardPaths = envPaths("docs-mcp-server", { suffix: "" });
575
+ dbDir = standardPaths.data;
576
+ }
577
+ }
578
+ try {
579
+ fs.mkdirSync(dbDir, { recursive: true });
580
+ } catch (error) {
581
+ console.warn(`⚠️ Failed to create database directory ${dbDir}: ${error}`);
582
+ }
583
+ return dbDir;
584
+ }
542
585
  class TelemetryConfig {
543
586
  static instance;
544
587
  enabled = true;
@@ -560,7 +603,7 @@ class TelemetryConfig {
560
603
  }
561
604
  function generateInstallationId(storePath) {
562
605
  try {
563
- const dataDir = storePath || envPaths("docs-mcp-server", { suffix: "" }).data;
606
+ const dataDir = resolveStorePath(storePath);
564
607
  const installationIdPath = path.join(dataDir, "installation.id");
565
608
  if (fs.existsSync(installationIdPath)) {
566
609
  const existingId = fs.readFileSync(installationIdPath, "utf8").trim();
@@ -709,7 +752,7 @@ function extractProtocol(urlOrPath) {
709
752
  }
710
753
  }
711
754
  const name = "@arabold/docs-mcp-server";
712
- const version = "1.25.0";
755
+ const version = "1.25.2";
713
756
  const description = "MCP server for fetching and searching documentation";
714
757
  const type = "module";
715
758
  const bin = { "docs-mcp-server": "dist/index.js" };
@@ -1582,26 +1625,6 @@ class MimeTypeUtils {
1582
1625
  return mimeToLanguage[mimeType] || "";
1583
1626
  }
1584
1627
  }
1585
- let projectRoot = null;
1586
- function getProjectRoot() {
1587
- if (projectRoot) {
1588
- return projectRoot;
1589
- }
1590
- const currentFilePath = fileURLToPath(import.meta.url);
1591
- let currentDir = path.dirname(currentFilePath);
1592
- while (true) {
1593
- const packageJsonPath = path.join(currentDir, "package.json");
1594
- if (fs.existsSync(packageJsonPath)) {
1595
- projectRoot = currentDir;
1596
- return projectRoot;
1597
- }
1598
- const parentDir = path.dirname(currentDir);
1599
- if (parentDir === currentDir) {
1600
- throw new Error("Could not find project root containing package.json.");
1601
- }
1602
- currentDir = parentDir;
1603
- }
1604
- }
1605
1628
  const fullTrim = (str) => {
1606
1629
  return str.replace(/^[\s\r\n\t]+|[\s\r\n\t]+$/g, "");
1607
1630
  };
@@ -4814,6 +4837,98 @@ class MarkdownMetadataExtractorMiddleware {
4814
4837
  await next();
4815
4838
  }
4816
4839
  }
4840
+ class HtmlNormalizationMiddleware {
4841
+ async process(context, next) {
4842
+ if (!context.dom) {
4843
+ logger.debug(
4844
+ `Skipping HTML normalization for ${context.source} - no DOM available`
4845
+ );
4846
+ await next();
4847
+ return;
4848
+ }
4849
+ try {
4850
+ logger.debug(`Normalizing HTML URLs and links for ${context.source}`);
4851
+ const $ = context.dom;
4852
+ const baseUrl = context.source;
4853
+ this.normalizeImageUrls($, baseUrl);
4854
+ this.normalizeLinks($, baseUrl);
4855
+ logger.debug(`Successfully normalized HTML content for ${context.source}`);
4856
+ } catch (error) {
4857
+ logger.error(`❌ Failed to normalize HTML for ${context.source}: ${error}`);
4858
+ context.errors.push(
4859
+ error instanceof Error ? error : new Error(`HTML normalization failed: ${String(error)}`)
4860
+ );
4861
+ }
4862
+ await next();
4863
+ }
4864
+ /**
4865
+ * Normalizes image URLs by converting relative URLs to absolute URLs.
4866
+ */
4867
+ normalizeImageUrls($, baseUrl) {
4868
+ $("img").each((_index, element) => {
4869
+ const $img = $(element);
4870
+ const src = $img.attr("src");
4871
+ if (!src) return;
4872
+ try {
4873
+ new URL(src);
4874
+ } catch {
4875
+ try {
4876
+ const absoluteUrl = new URL(src, baseUrl).href;
4877
+ $img.attr("src", absoluteUrl);
4878
+ logger.debug(`Converted relative image URL: ${src} → ${absoluteUrl}`);
4879
+ } catch (error) {
4880
+ logger.debug(`Failed to resolve relative image URL: ${src} - ${error}`);
4881
+ }
4882
+ }
4883
+ });
4884
+ }
4885
+ /**
4886
+ * Normalizes links by:
4887
+ * - Converting relative URLs to absolute URLs
4888
+ * - Unwrapping anchor links (preserving text content)
4889
+ * - Unwrapping non-HTTP links (preserving text content)
4890
+ */
4891
+ normalizeLinks($, baseUrl) {
4892
+ $("a").each((_index, element) => {
4893
+ const $link = $(element);
4894
+ const href = $link.attr("href");
4895
+ if (!href) {
4896
+ this.unwrapElement($, $link);
4897
+ return;
4898
+ }
4899
+ if (href.startsWith("#")) {
4900
+ logger.debug(`Removing anchor link: ${href}`);
4901
+ this.unwrapElement($, $link);
4902
+ return;
4903
+ }
4904
+ try {
4905
+ const url = new URL(href);
4906
+ if (url.protocol !== "http:" && url.protocol !== "https:") {
4907
+ logger.debug(`Removing non-HTTP link: ${href}`);
4908
+ this.unwrapElement($, $link);
4909
+ return;
4910
+ }
4911
+ } catch {
4912
+ try {
4913
+ const absoluteUrl = new URL(href, baseUrl).href;
4914
+ $link.attr("href", absoluteUrl);
4915
+ logger.debug(`Converted relative link URL: ${href} → ${absoluteUrl}`);
4916
+ } catch (error) {
4917
+ logger.debug(`Failed to resolve relative link URL: ${href} - ${error}`);
4918
+ this.unwrapElement($, $link);
4919
+ }
4920
+ }
4921
+ });
4922
+ }
4923
+ /**
4924
+ * Unwraps an element by replacing it with its HTML content.
4925
+ * This preserves the inner HTML (including nested elements) while removing the wrapping tag.
4926
+ */
4927
+ unwrapElement(_$, $element) {
4928
+ const htmlContent = $element.html() || $element.text();
4929
+ $element.replaceWith(htmlContent);
4930
+ }
4931
+ }
4817
4932
  function detectCharsetFromHtml(htmlContent) {
4818
4933
  const charsetMatch = htmlContent.match(
4819
4934
  /<meta\s+charset\s*=\s*["']?([^"'>\s]+)["']?[^>]*>/i
@@ -4937,6 +5052,7 @@ class HtmlPipeline extends BasePipeline {
4937
5052
  new HtmlMetadataExtractorMiddleware(),
4938
5053
  new HtmlLinkExtractorMiddleware(),
4939
5054
  new HtmlSanitizerMiddleware(),
5055
+ new HtmlNormalizationMiddleware(),
4940
5056
  new HtmlToMarkdownMiddleware()
4941
5057
  ];
4942
5058
  const semanticSplitter = new SemanticMarkdownSplitter(
@@ -7297,14 +7413,6 @@ function parseHeaders(headerOptions) {
7297
7413
  }
7298
7414
  return headers;
7299
7415
  }
7300
- const CLI_DEFAULTS = {
7301
- PROTOCOL: DEFAULT_PROTOCOL,
7302
- HTTP_PORT: DEFAULT_HTTP_PORT,
7303
- WEB_PORT: DEFAULT_WEB_PORT,
7304
- HOST: DEFAULT_HOST,
7305
- MAX_CONCURRENCY: DEFAULT_MAX_CONCURRENCY,
7306
- TELEMETRY: true
7307
- };
7308
7416
  function parseAuthConfig(options) {
7309
7417
  if (!options.authEnabled) {
7310
7418
  return void 0;
@@ -12447,34 +12555,10 @@ class DocumentManagementService {
12447
12555
  normalizeVersion(version2) {
12448
12556
  return (version2 ?? "").toLowerCase();
12449
12557
  }
12450
- constructor(embeddingConfig, pipelineConfig, storePath) {
12451
- let dbPath;
12452
- let dbDir;
12453
- if (storePath) {
12454
- dbDir = storePath;
12455
- dbPath = path.join(dbDir, "documents.db");
12456
- logger.debug(`Using database directory from storePath parameter: ${dbDir}`);
12457
- } else {
12458
- const projectRoot2 = getProjectRoot();
12459
- const oldDbDir = path.join(projectRoot2, ".store");
12460
- const oldDbPath = path.join(oldDbDir, "documents.db");
12461
- const oldDbExists = fs.existsSync(oldDbPath);
12462
- if (oldDbExists) {
12463
- dbPath = oldDbPath;
12464
- dbDir = oldDbDir;
12465
- logger.debug(`Using legacy database path: ${dbPath}`);
12466
- } else {
12467
- const standardPaths = envPaths("docs-mcp-server", { suffix: "" });
12468
- dbDir = standardPaths.data;
12469
- dbPath = path.join(dbDir, "documents.db");
12470
- logger.debug(`Using standard database directory: ${dbDir}`);
12471
- }
12472
- }
12473
- try {
12474
- fs.mkdirSync(dbDir, { recursive: true });
12475
- } catch (error) {
12476
- logger.error(`⚠️ Failed to create database directory ${dbDir}: ${error}`);
12477
- }
12558
+ constructor(storePath, embeddingConfig, pipelineConfig) {
12559
+ const dbDir = storePath;
12560
+ const dbPath = path.join(dbDir, "documents.db");
12561
+ logger.debug(`Using database directory: ${dbDir}`);
12478
12562
  this.store = new DocumentStore(dbPath, embeddingConfig);
12479
12563
  this.documentRetriever = new DocumentRetrieverService(this.store);
12480
12564
  this.pipelines = PipelineFactory$1.createStandardPipelines(pipelineConfig);
@@ -12829,16 +12913,19 @@ async function createDocumentManagement(options = {}) {
12829
12913
  await client.initialize();
12830
12914
  return client;
12831
12915
  }
12916
+ if (!options.storePath) {
12917
+ throw new Error("storePath is required when not using a remote server");
12918
+ }
12832
12919
  const service = new DocumentManagementService(
12920
+ options.storePath,
12833
12921
  options.embeddingConfig,
12834
- void 0,
12835
- options.storePath
12922
+ void 0
12836
12923
  );
12837
12924
  await service.initialize();
12838
12925
  return service;
12839
12926
  }
12840
- async function createLocalDocumentManagement(embeddingConfig, storePath) {
12841
- const service = new DocumentManagementService(embeddingConfig, void 0, storePath);
12927
+ async function createLocalDocumentManagement(storePath, embeddingConfig) {
12928
+ const service = new DocumentManagementService(storePath, embeddingConfig, void 0);
12842
12929
  await service.initialize();
12843
12930
  return service;
12844
12931
  }
@@ -12846,7 +12933,7 @@ function createDefaultAction(program) {
12846
12933
  return program.addOption(
12847
12934
  new Option("--protocol <protocol>", "Protocol for MCP server").env("DOCS_MCP_PROTOCOL").default("auto").choices(["auto", "stdio", "http"])
12848
12935
  ).addOption(
12849
- new Option("--port <number>", "Port for the server").env("DOCS_MCP_PORT").env("PORT").default(CLI_DEFAULTS.HTTP_PORT.toString()).argParser((v) => {
12936
+ new Option("--port <number>", "Port for the server").env("DOCS_MCP_PORT").env("PORT").default(DEFAULT_HTTP_PORT.toString()).argParser((v) => {
12850
12937
  const n = Number(v);
12851
12938
  if (!Number.isInteger(n) || n < 1 || n > 65535) {
12852
12939
  throw new Error("Port must be an integer between 1 and 65535");
@@ -12854,7 +12941,7 @@ function createDefaultAction(program) {
12854
12941
  return String(n);
12855
12942
  })
12856
12943
  ).addOption(
12857
- new Option("--host <host>", "Host to bind the server to").env("DOCS_MCP_HOST").env("HOST").default(CLI_DEFAULTS.HOST).argParser(validateHost)
12944
+ new Option("--host <host>", "Host to bind the server to").env("DOCS_MCP_HOST").env("HOST").default(DEFAULT_HOST).argParser(validateHost)
12858
12945
  ).addOption(
12859
12946
  new Option(
12860
12947
  "--embedding-model <model>",
@@ -12911,12 +12998,12 @@ function createDefaultAction(program) {
12911
12998
  validateAuthConfig(authConfig);
12912
12999
  warnHttpUsage(authConfig, port);
12913
13000
  }
12914
- const globalOptions = program.parent?.opts() || {};
13001
+ const globalOptions = program.opts();
12915
13002
  ensurePlaywrightBrowsersInstalled();
12916
13003
  const embeddingConfig = resolveEmbeddingContext(options.embeddingModel);
12917
13004
  const docService = await createLocalDocumentManagement(
12918
- embeddingConfig,
12919
- globalOptions.storePath
13005
+ globalOptions.storePath,
13006
+ embeddingConfig
12920
13007
  );
12921
13008
  const pipelineOptions = {
12922
13009
  recoverJobs: options.resume || false,
@@ -13067,9 +13154,9 @@ function createListCommand(program) {
13067
13154
  }
13068
13155
  function createMcpCommand(program) {
13069
13156
  return program.command("mcp").description("Start MCP server only").addOption(
13070
- new Option("--protocol <protocol>", "Protocol for MCP server").env("DOCS_MCP_PROTOCOL").default(CLI_DEFAULTS.PROTOCOL).choices(["auto", "stdio", "http"])
13157
+ new Option("--protocol <protocol>", "Protocol for MCP server").env("DOCS_MCP_PROTOCOL").default(DEFAULT_PROTOCOL).choices(["auto", "stdio", "http"])
13071
13158
  ).addOption(
13072
- new Option("--port <number>", "Port for the MCP server").env("DOCS_MCP_PORT").env("PORT").default(CLI_DEFAULTS.HTTP_PORT.toString()).argParser((v) => {
13159
+ new Option("--port <number>", "Port for the MCP server").env("DOCS_MCP_PORT").env("PORT").default(DEFAULT_HTTP_PORT.toString()).argParser((v) => {
13073
13160
  const n = Number(v);
13074
13161
  if (!Number.isInteger(n) || n < 1 || n > 65535) {
13075
13162
  throw new Error("Port must be an integer between 1 and 65535");
@@ -13077,7 +13164,7 @@ function createMcpCommand(program) {
13077
13164
  return String(n);
13078
13165
  })
13079
13166
  ).addOption(
13080
- new Option("--host <host>", "Host to bind the MCP server to").env("DOCS_MCP_HOST").env("HOST").default(CLI_DEFAULTS.HOST).argParser(validateHost)
13167
+ new Option("--host <host>", "Host to bind the MCP server to").env("DOCS_MCP_HOST").env("HOST").default(DEFAULT_HOST).argParser(validateHost)
13081
13168
  ).addOption(
13082
13169
  new Option(
13083
13170
  "--embedding-model <model>",
@@ -13437,7 +13524,7 @@ function createSearchCommand(program) {
13437
13524
  }
13438
13525
  function createWebCommand(program) {
13439
13526
  return program.command("web").description("Start web interface only").addOption(
13440
- new Option("--port <number>", "Port for the web interface").env("DOCS_MCP_WEB_PORT").env("DOCS_MCP_PORT").env("PORT").default(CLI_DEFAULTS.WEB_PORT.toString()).argParser((v) => {
13527
+ new Option("--port <number>", "Port for the web interface").env("DOCS_MCP_WEB_PORT").env("DOCS_MCP_PORT").env("PORT").default(DEFAULT_WEB_PORT.toString()).argParser((v) => {
13441
13528
  const n = Number(v);
13442
13529
  if (!Number.isInteger(n) || n < 1 || n > 65535) {
13443
13530
  throw new Error("Port must be an integer between 1 and 65535");
@@ -13445,7 +13532,7 @@ function createWebCommand(program) {
13445
13532
  return String(n);
13446
13533
  })
13447
13534
  ).addOption(
13448
- new Option("--host <host>", "Host to bind the web interface to").env("DOCS_MCP_HOST").env("HOST").default(CLI_DEFAULTS.HOST).argParser(validateHost)
13535
+ new Option("--host <host>", "Host to bind the web interface to").env("DOCS_MCP_HOST").env("HOST").default(DEFAULT_HOST).argParser(validateHost)
13449
13536
  ).addOption(
13450
13537
  new Option(
13451
13538
  "--embedding-model <model>",
@@ -13527,7 +13614,7 @@ function createWorkerCommand(program) {
13527
13614
  return String(n);
13528
13615
  })
13529
13616
  ).addOption(
13530
- new Option("--host <host>", "Host to bind the worker API to").env("DOCS_MCP_HOST").env("HOST").default(CLI_DEFAULTS.HOST).argParser(validateHost)
13617
+ new Option("--host <host>", "Host to bind the worker API to").env("DOCS_MCP_HOST").env("HOST").default(DEFAULT_HOST).argParser(validateHost)
13531
13618
  ).addOption(
13532
13619
  new Option(
13533
13620
  "--embedding-model <model>",
@@ -13547,11 +13634,15 @@ function createWorkerCommand(program) {
13547
13634
  logger.info(`🚀 Starting external pipeline worker on port ${port}`);
13548
13635
  ensurePlaywrightBrowsersInstalled();
13549
13636
  const embeddingConfig = resolveEmbeddingContext(cmdOptions.embeddingModel);
13550
- const docService = await createLocalDocumentManagement(embeddingConfig);
13637
+ const globalOptions = program.parent?.opts() || {};
13638
+ const docService = await createLocalDocumentManagement(
13639
+ globalOptions.storePath,
13640
+ embeddingConfig
13641
+ );
13551
13642
  const pipelineOptions = {
13552
13643
  recoverJobs: cmdOptions.resume,
13553
13644
  // Use the resume option
13554
- concurrency: CLI_DEFAULTS.MAX_CONCURRENCY
13645
+ concurrency: DEFAULT_MAX_CONCURRENCY
13555
13646
  };
13556
13647
  const pipeline = await createPipelineWithCallbacks(docService, pipelineOptions);
13557
13648
  const config = createAppServerConfig({
@@ -13599,10 +13690,12 @@ function createCliProgram() {
13599
13690
  ).enablePositionalOptions().allowExcessArguments(false).showHelpAfterError(true);
13600
13691
  program.hook("preAction", async (thisCommand, actionCommand) => {
13601
13692
  const globalOptions = thisCommand.opts();
13693
+ const resolvedStorePath = resolveStorePath(globalOptions.storePath);
13694
+ globalOptions.storePath = resolvedStorePath;
13602
13695
  setupLogging(globalOptions);
13603
13696
  initTelemetry({
13604
13697
  enabled: globalOptions.telemetry ?? true,
13605
- storePath: globalOptions.storePath
13698
+ storePath: resolvedStorePath
13606
13699
  });
13607
13700
  if (shouldEnableTelemetry()) {
13608
13701
  if (analytics.isEnabled()) {