@sylphx/pdf-reader-mcp 2.4.0 → 2.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +259 -36
  2. package/package.json +3 -3
package/dist/index.js CHANGED
@@ -307,7 +307,7 @@ var extractPageContent = async (pdfDocument, pageNum, includeImages, sourceDescr
307
307
  {
308
308
  type: "text",
309
309
  yPosition: 0,
310
- textContent: `Error processing page: ${message}`
310
+ textContent: `[Error processing page ${String(pageNum)}]`
311
311
  }
312
312
  ];
313
313
  }
@@ -315,14 +315,30 @@ var extractPageContent = async (pdfDocument, pageNum, includeImages, sourceDescr
315
315
  };
316
316
 
317
317
  // src/pdf/loader.ts
318
- import fs from "node:fs/promises";
318
+ import fs3 from "node:fs/promises";
319
319
  import { createRequire } from "node:module";
320
320
  import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
321
321
 
322
322
  // src/utils/config.ts
323
+ import dns from "node:dns";
324
+ import fs from "node:fs";
325
+ import net from "node:net";
323
326
  import path from "node:path";
324
327
  var splitList = (value, separators) => value.split(separators).map((s) => s.trim()).filter((s) => s.length > 0);
325
- var parseDirs = (values) => values.map((dir) => path.resolve(path.normalize(dir)));
328
+ var canonicalizeDir = (p) => {
329
+ try {
330
+ return fs.realpathSync(p);
331
+ } catch (err) {
332
+ if (typeof err === "object" && err !== null && "code" in err && (err.code === "ENOENT" || err.code === "ENOTDIR")) {
333
+ const parent = path.dirname(p);
334
+ if (parent === p)
335
+ return p;
336
+ return path.join(canonicalizeDir(parent), path.basename(p));
337
+ }
338
+ throw err;
339
+ }
340
+ };
341
+ var parseDirs = (values) => values.map((dir) => canonicalizeDir(path.resolve(path.normalize(dir))));
326
342
  var parseBool = (value, fallback) => {
327
343
  if (value === undefined)
328
344
  return fallback;
@@ -337,6 +353,7 @@ var parseCliFlags = (argv) => {
337
353
  const dirs = [];
338
354
  const hosts = [];
339
355
  let noHttp = false;
356
+ let allowPrivateIps = false;
340
357
  for (const arg of argv) {
341
358
  if (arg.startsWith("--allow-dir=")) {
342
359
  dirs.push(arg.slice("--allow-dir=".length));
@@ -344,9 +361,11 @@ var parseCliFlags = (argv) => {
344
361
  hosts.push(arg.slice("--allow-host=".length).toLowerCase());
345
362
  } else if (arg === "--no-http") {
346
363
  noHttp = true;
364
+ } else if (arg === "--allow-private-ips") {
365
+ allowPrivateIps = true;
347
366
  }
348
367
  }
349
- return { dirs, hosts, noHttp };
368
+ return { dirs, hosts, noHttp, allowPrivateIps };
350
369
  };
351
370
  var envList = (raw, separators, transform = (v) => v) => raw ? splitList(raw, separators).map(transform) : [];
352
371
  var readSecurityConfig = (argv = process.argv.slice(2), env = process.env) => {
@@ -358,7 +377,8 @@ var readSecurityConfig = (argv = process.argv.slice(2), env = process.env) => {
358
377
  return {
359
378
  allowedDirs: mergedDirs.length > 0 ? parseDirs(mergedDirs) : null,
360
379
  allowHttp: cli.noHttp ? false : parseBool(env["MCP_PDF_ALLOW_HTTP"], true),
361
- allowedHosts: mergedHosts.length > 0 ? mergedHosts : null
380
+ allowedHosts: mergedHosts.length > 0 ? mergedHosts : null,
381
+ allowPrivateIps: cli.allowPrivateIps || parseBool(env["MCP_PDF_ALLOW_PRIVATE_IPS"], false)
362
382
  };
363
383
  };
364
384
  var cached = null;
@@ -400,6 +420,70 @@ var isUrlAllowed = (urlString, config) => {
400
420
  return true;
401
421
  return config.allowedHosts.includes(parsed.hostname.toLowerCase());
402
422
  };
423
+ var PRIVATE_IPV4_PREDICATES = [
424
+ (a) => a === 10,
425
+ (a, b) => a === 172 && b >= 16 && b <= 31,
426
+ (a, b) => a === 192 && b === 168,
427
+ (a) => a === 127,
428
+ (a, b) => a === 169 && b === 254,
429
+ (a) => a === 0,
430
+ (a, b) => a === 100 && b >= 64 && b <= 127,
431
+ (a) => a >= 224
432
+ ];
433
+ var isPrivateIpv4 = (ip) => {
434
+ const parts = ip.split(".").map((s) => Number.parseInt(s, 10));
435
+ const a = parts[0];
436
+ const b = parts[1];
437
+ if (a === undefined || b === undefined)
438
+ return true;
439
+ return PRIVATE_IPV4_PREDICATES.some((pred) => pred(a, b));
440
+ };
441
+ var isPrivateIpv6 = (ip) => {
442
+ const lower = ip.toLowerCase();
443
+ if (lower === "::1" || lower === "::")
444
+ return true;
445
+ if (lower.startsWith("fc") || lower.startsWith("fd"))
446
+ return true;
447
+ if (lower.startsWith("fe80"))
448
+ return true;
449
+ if (lower.startsWith("ff"))
450
+ return true;
451
+ if (lower.startsWith("::ffff:")) {
452
+ const tail = lower.slice("::ffff:".length);
453
+ if (net.isIPv4(tail))
454
+ return isPrivateIpv4(tail);
455
+ }
456
+ return false;
457
+ };
458
+ var isPrivateIp = (ip) => {
459
+ if (net.isIPv4(ip))
460
+ return isPrivateIpv4(ip);
461
+ if (net.isIPv6(ip))
462
+ return isPrivateIpv6(ip);
463
+ return true;
464
+ };
465
+ var assertUrlNotPrivate = async (hostname) => {
466
+ if (net.isIP(hostname)) {
467
+ if (isPrivateIp(hostname)) {
468
+ throw new Error(`URL host '${hostname}' resolves to a non-public address (SSRF protection).`);
469
+ }
470
+ return;
471
+ }
472
+ let addresses;
473
+ try {
474
+ addresses = await dns.promises.lookup(hostname, { all: true });
475
+ } catch {
476
+ throw new Error(`URL host '${hostname}' could not be resolved.`);
477
+ }
478
+ if (addresses.length === 0) {
479
+ throw new Error(`URL host '${hostname}' resolved to no addresses.`);
480
+ }
481
+ for (const { address } of addresses) {
482
+ if (isPrivateIp(address)) {
483
+ throw new Error(`URL host '${hostname}' resolves to a non-public address (SSRF protection).`);
484
+ }
485
+ }
486
+ };
403
487
 
404
488
  // src/utils/errors.ts
405
489
  class PdfError extends Error {
@@ -412,19 +496,34 @@ class PdfError extends Error {
412
496
  }
413
497
 
414
498
  // src/utils/pathUtils.ts
499
+ import fs2 from "node:fs";
415
500
  import path2 from "node:path";
416
501
  var PROJECT_ROOT = process.cwd();
502
+ var canonicalize = (p) => {
503
+ try {
504
+ return fs2.realpathSync(p);
505
+ } catch (err) {
506
+ if (typeof err === "object" && err !== null && "code" in err && (err.code === "ENOENT" || err.code === "ENOTDIR")) {
507
+ const parent = path2.dirname(p);
508
+ if (parent === p)
509
+ return p;
510
+ return path2.join(canonicalize(parent), path2.basename(p));
511
+ }
512
+ throw err;
513
+ }
514
+ };
417
515
  var resolvePath = (userPath) => {
418
516
  if (typeof userPath !== "string") {
419
517
  throw new PdfError(-32602 /* InvalidParams */, "Path must be a string.");
420
518
  }
421
519
  const normalizedUserPath = path2.normalize(userPath);
422
520
  const resolved = path2.isAbsolute(normalizedUserPath) ? normalizedUserPath : path2.resolve(PROJECT_ROOT, normalizedUserPath);
521
+ const canonical = canonicalize(resolved);
423
522
  const { allowedDirs } = getSecurityConfig();
424
- if (!isPathAllowed(resolved, allowedDirs)) {
523
+ if (!isPathAllowed(canonical, allowedDirs)) {
425
524
  throw new PdfError(-32600 /* InvalidRequest */, `Access denied: path '${userPath}' is outside the allowed directories.`);
426
525
  }
427
- return resolved;
526
+ return canonical;
428
527
  };
429
528
 
430
529
  // src/pdf/loader.ts
@@ -436,42 +535,157 @@ var STANDARD_FONT_DATA_URL = `${PDFJS_ROOT}standard_fonts/`;
436
535
  var WASM_URL = `${PDFJS_ROOT}wasm/`;
437
536
  var ICC_URL = `${PDFJS_ROOT}iccs/`;
438
537
  var MAX_PDF_SIZE = 100 * 1024 * 1024;
538
+ var URL_FETCH_TIMEOUT_MS = 30000;
539
+ var MAX_REDIRECTS = 5;
540
+ var formatBytes = (bytes) => `${(bytes / 1024 / 1024).toFixed(0)}MB`;
541
+ var sanitizeSourceDescription = (description) => description.length > 200 ? `${description.slice(0, 197)}...` : description;
542
+ var loadLocalFile = async (userPath) => {
543
+ const safePath = resolvePath(userPath);
544
+ let stats;
545
+ try {
546
+ stats = await fs3.stat(safePath);
547
+ } catch (err) {
548
+ if (typeof err === "object" && err !== null && "code" in err && err.code === "ENOENT") {
549
+ throw new PdfError(-32600 /* InvalidRequest */, `File not found at '${userPath}'.`, {
550
+ cause: err instanceof Error ? err : undefined
551
+ });
552
+ }
553
+ throw new PdfError(-32600 /* InvalidRequest */, `Failed to access file at '${userPath}'.`, {
554
+ cause: err instanceof Error ? err : undefined
555
+ });
556
+ }
557
+ if (!stats.isFile()) {
558
+ throw new PdfError(-32600 /* InvalidRequest */, `Path '${userPath}' is not a regular file.`);
559
+ }
560
+ if (stats.size > MAX_PDF_SIZE) {
561
+ throw new PdfError(-32600 /* InvalidRequest */, `PDF file exceeds maximum size of ${formatBytes(MAX_PDF_SIZE)}. File size: ${formatBytes(stats.size)}.`);
562
+ }
563
+ const buffer = await fs3.readFile(safePath);
564
+ return new Uint8Array(buffer);
565
+ };
566
+ var validateUrlHop = async (urlString, config) => {
567
+ if (!isUrlAllowed(urlString, config)) {
568
+ const reason = config.allowHttp ? "host is not in the allowed list or scheme is not http(s)" : "HTTP access is disabled";
569
+ throw new PdfError(-32600 /* InvalidRequest */, `Access denied: URL '${urlString}' rejected (${reason}).`);
570
+ }
571
+ if (!config.allowPrivateIps) {
572
+ let hostname;
573
+ try {
574
+ hostname = new URL(urlString).hostname;
575
+ } catch {
576
+ throw new PdfError(-32600 /* InvalidRequest */, `Invalid URL: '${urlString}'.`);
577
+ }
578
+ try {
579
+ await assertUrlNotPrivate(hostname);
580
+ } catch (err) {
581
+ const reason = err instanceof Error ? err.message : "SSRF check failed";
582
+ throw new PdfError(-32600 /* InvalidRequest */, `Access denied: ${reason}`);
583
+ }
584
+ }
585
+ };
586
+ var fetchUrlBody = async (url, config) => {
587
+ let currentUrl = url;
588
+ const controller = new AbortController;
589
+ const timeout = setTimeout(() => controller.abort(), URL_FETCH_TIMEOUT_MS);
590
+ try {
591
+ for (let hop = 0;hop <= MAX_REDIRECTS; hop++) {
592
+ await validateUrlHop(currentUrl, config);
593
+ const response = await fetch(currentUrl, {
594
+ redirect: "manual",
595
+ signal: controller.signal
596
+ });
597
+ if (response.status >= 300 && response.status < 400) {
598
+ const location = response.headers.get("location");
599
+ if (!location) {
600
+ throw new PdfError(-32600 /* InvalidRequest */, `URL fetch failed: redirect without Location header.`);
601
+ }
602
+ currentUrl = new URL(location, currentUrl).toString();
603
+ continue;
604
+ }
605
+ if (!response.ok) {
606
+ throw new PdfError(-32600 /* InvalidRequest */, `URL fetch failed with HTTP ${String(response.status)}.`);
607
+ }
608
+ const contentLengthHeader = response.headers.get("content-length");
609
+ if (contentLengthHeader !== null) {
610
+ const declared = Number.parseInt(contentLengthHeader, 10);
611
+ if (Number.isFinite(declared) && declared > MAX_PDF_SIZE) {
612
+ throw new PdfError(-32600 /* InvalidRequest */, `Remote PDF exceeds maximum size of ${formatBytes(MAX_PDF_SIZE)} (Content-Length: ${formatBytes(declared)}).`);
613
+ }
614
+ }
615
+ if (!response.body) {
616
+ const ab = await response.arrayBuffer();
617
+ if (ab.byteLength > MAX_PDF_SIZE) {
618
+ throw new PdfError(-32600 /* InvalidRequest */, `Remote PDF exceeds maximum size of ${formatBytes(MAX_PDF_SIZE)}.`);
619
+ }
620
+ return new Uint8Array(ab);
621
+ }
622
+ const reader = response.body.getReader();
623
+ const chunks = [];
624
+ let total = 0;
625
+ while (true) {
626
+ const { done, value } = await reader.read();
627
+ if (done)
628
+ break;
629
+ if (value) {
630
+ total += value.byteLength;
631
+ if (total > MAX_PDF_SIZE) {
632
+ await reader.cancel().catch(() => {});
633
+ throw new PdfError(-32600 /* InvalidRequest */, `Remote PDF exceeds maximum size of ${formatBytes(MAX_PDF_SIZE)} during streaming.`);
634
+ }
635
+ chunks.push(value);
636
+ }
637
+ }
638
+ const combined = new Uint8Array(total);
639
+ let offset = 0;
640
+ for (const chunk of chunks) {
641
+ combined.set(chunk, offset);
642
+ offset += chunk.byteLength;
643
+ }
644
+ return combined;
645
+ }
646
+ throw new PdfError(-32600 /* InvalidRequest */, `URL fetch failed: exceeded redirect limit (${String(MAX_REDIRECTS)}).`);
647
+ } catch (err) {
648
+ if (err instanceof PdfError)
649
+ throw err;
650
+ if (err instanceof Error && (err.name === "AbortError" || err.name === "TimeoutError")) {
651
+ throw new PdfError(-32600 /* InvalidRequest */, `URL fetch timed out after ${String(URL_FETCH_TIMEOUT_MS / 1000)}s.`, { cause: err });
652
+ }
653
+ const message = err instanceof Error ? err.message : String(err);
654
+ logger3.warn("URL fetch failed", { url, error: message });
655
+ throw new PdfError(-32600 /* InvalidRequest */, `URL fetch failed for '${url}'.`, {
656
+ cause: err instanceof Error ? err : undefined
657
+ });
658
+ } finally {
659
+ clearTimeout(timeout);
660
+ }
661
+ };
439
662
  var loadPdfDocument = async (source, sourceDescription) => {
440
- let pdfDataSource;
663
+ const safeSource = sanitizeSourceDescription(sourceDescription);
664
+ let pdfData;
441
665
  try {
442
666
  if (source.path) {
443
- const safePath = resolvePath(source.path);
444
- const buffer = await fs.readFile(safePath);
445
- if (buffer.length > MAX_PDF_SIZE) {
446
- throw new PdfError(-32600 /* InvalidRequest */, `PDF file exceeds maximum size of ${MAX_PDF_SIZE} bytes (${(MAX_PDF_SIZE / 1024 / 1024).toFixed(0)}MB). File size: ${buffer.length} bytes.`);
447
- }
448
- pdfDataSource = new Uint8Array(buffer);
667
+ pdfData = await loadLocalFile(source.path);
449
668
  } else if (source.url) {
450
669
  const config = getSecurityConfig();
451
- if (!isUrlAllowed(source.url, config)) {
452
- const reason = config.allowHttp ? `host is not in the allowed list` : `HTTP access is disabled`;
453
- throw new PdfError(-32600 /* InvalidRequest */, `Access denied: URL '${source.url}' rejected (${reason}).`);
454
- }
455
- pdfDataSource = { url: source.url };
670
+ pdfData = await fetchUrlBody(source.url, config);
456
671
  } else {
457
- throw new PdfError(-32602 /* InvalidParams */, `Source ${sourceDescription} missing 'path' or 'url'.`);
672
+ throw new PdfError(-32602 /* InvalidParams */, `Source ${safeSource} missing 'path' or 'url'.`);
458
673
  }
459
674
  } catch (err) {
460
675
  if (err instanceof PdfError) {
461
676
  throw err;
462
677
  }
463
678
  const message = err instanceof Error ? err.message : String(err);
464
- const errorCode = -32600 /* InvalidRequest */;
465
- if (typeof err === "object" && err !== null && "code" in err && err.code === "ENOENT" && source.path) {
466
- throw new PdfError(errorCode, `File not found at '${source.path}'.`, {
467
- cause: err instanceof Error ? err : undefined
468
- });
469
- }
470
- throw new PdfError(errorCode, `Failed to prepare PDF source ${sourceDescription}. Reason: ${message}`, { cause: err instanceof Error ? err : undefined });
679
+ logger3.error("Unexpected error preparing PDF source", {
680
+ sourceDescription: safeSource,
681
+ error: message
682
+ });
683
+ throw new PdfError(-32600 /* InvalidRequest */, `Failed to prepare PDF source ${safeSource}.`, {
684
+ cause: err instanceof Error ? err : undefined
685
+ });
471
686
  }
472
- const documentParams = pdfDataSource instanceof Uint8Array ? { data: pdfDataSource } : pdfDataSource;
473
687
  const loadingTask = getDocument({
474
- ...documentParams,
688
+ data: pdfData,
475
689
  cMapUrl: CMAP_URL,
476
690
  cMapPacked: true,
477
691
  standardFontDataUrl: STANDARD_FONT_DATA_URL,
@@ -482,8 +696,8 @@ var loadPdfDocument = async (source, sourceDescription) => {
482
696
  return await loadingTask.promise;
483
697
  } catch (err) {
484
698
  const message = err instanceof Error ? err.message : String(err);
485
- logger3.error("PDF.js loading error", { sourceDescription, error: message });
486
- throw new PdfError(-32600 /* InvalidRequest */, `Failed to load PDF document from ${sourceDescription}. Reason: ${message || "Unknown loading error"}`, { cause: err instanceof Error ? err : undefined });
699
+ logger3.error("PDF.js loading error", { sourceDescription: safeSource, error: message });
700
+ throw new PdfError(-32600 /* InvalidRequest */, `Failed to load PDF document from ${safeSource}.`, { cause: err instanceof Error ? err : undefined });
487
701
  }
488
702
  };
489
703
 
@@ -923,11 +1137,16 @@ var processSingleSource = async (source, options) => {
923
1137
  }
924
1138
  individualResult = { ...individualResult, data: output, success: true };
925
1139
  } catch (error) {
926
- let errorMessage = `Failed to process PDF from ${sourceDescription}.`;
927
- if (error instanceof Error) {
928
- errorMessage += ` Reason: ${error.message}`;
1140
+ let errorMessage;
1141
+ if (error instanceof PdfError) {
1142
+ errorMessage = error.message;
929
1143
  } else {
930
- errorMessage += ` Unknown error: ${JSON.stringify(error)}`;
1144
+ const detail = error instanceof Error ? error.message : String(error);
1145
+ logger6.error("Unexpected error processing PDF source", {
1146
+ sourceDescription,
1147
+ error: detail
1148
+ });
1149
+ errorMessage = `Failed to process PDF from ${sourceDescription}.`;
931
1150
  }
932
1151
  individualResult.error = errorMessage;
933
1152
  individualResult.success = false;
@@ -1043,12 +1262,13 @@ var transportType = process.env["MCP_TRANSPORT"] ?? "stdio";
1043
1262
  var httpPort = Number.parseInt(process.env["MCP_HTTP_PORT"] ?? "8080", 10);
1044
1263
  var httpHost = process.env["MCP_HTTP_HOST"] ?? "0.0.0.0";
1045
1264
  var apiKey = process.env["MCP_API_KEY"];
1265
+ var corsOrigin = process.env["MCP_CORS_ORIGIN"];
1046
1266
  function createTransport() {
1047
1267
  if (transportType === "http") {
1048
1268
  return http({
1049
1269
  port: httpPort,
1050
1270
  hostname: httpHost,
1051
- cors: "*"
1271
+ ...corsOrigin ? { cors: corsOrigin } : {}
1052
1272
  });
1053
1273
  }
1054
1274
  return stdio();
@@ -1068,6 +1288,9 @@ async function main() {
1068
1288
  if (apiKey) {
1069
1289
  console.log("[PDF Reader MCP] API key authentication enabled (X-API-Key header)");
1070
1290
  }
1291
+ if (corsOrigin) {
1292
+ console.log(`[PDF Reader MCP] CORS allowed origin: ${corsOrigin}`);
1293
+ }
1071
1294
  console.log("[PDF Reader MCP] Project root:", process.cwd());
1072
1295
  } else if (process.env["DEBUG_MCP"]) {
1073
1296
  console.error("[PDF Reader MCP] Server running on stdio");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sylphx/pdf-reader-mcp",
3
- "version": "2.4.0",
3
+ "version": "2.4.1",
4
4
  "description": "An MCP server providing tools to read PDF files.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -91,11 +91,11 @@
91
91
  "@types/glob": "^8.1.0",
92
92
  "@types/node": "^25.6.0",
93
93
  "@types/pngjs": "^6.0.5",
94
- "bunup": "0.16.10",
94
+ "bunup": "0.16.31",
95
95
  "lefthook": "^2.1.6",
96
96
  "typedoc": "^0.28.19",
97
97
  "typedoc-plugin-markdown": "^4.11.0",
98
- "typescript": "^5.9.3",
98
+ "typescript": "^6.0.3",
99
99
  "vitepress": "^1.6.4"
100
100
  },
101
101
  "packageManager": "bun@1.3.1"