@sylphx/pdf-reader-mcp 2.3.1 → 2.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +53 -0
- package/dist/index.js +345 -30
- package/package.json +3 -3
package/README.md
CHANGED
|
@@ -171,6 +171,10 @@ Add to Cline's MCP settings:
|
|
|
171
171
|
1. Go to **Settings** → **AI** → **Manage MCP Servers** → **Add**
|
|
172
172
|
2. Command: `npx`, Args: `@sylphx/pdf-reader-mcp`
|
|
173
173
|
|
|
174
|
+
### Ontheia
|
|
175
|
+
|
|
176
|
+
Add the server in **Settings** → **MCP Servers** → **Add Server** with command `npx` and args `@sylphx/pdf-reader-mcp`. See [Ontheia's compatible MCP servers](https://docs.ontheia.ai/getting-started/03_compatible-mcp-servers/) for the full list.
|
|
177
|
+
|
|
174
178
|
### Smithery (One-click)
|
|
175
179
|
|
|
176
180
|
```bash
|
|
@@ -538,6 +542,55 @@ Response Order:
|
|
|
538
542
|
|
|
539
543
|
---
|
|
540
544
|
|
|
545
|
+
## 🔒 Security & Sandboxing
|
|
546
|
+
|
|
547
|
+
By default the server can read any local file the host process can access and fetch any HTTP(S) URL. When running outside a sandbox you should restrict it to a specific working set.
|
|
548
|
+
|
|
549
|
+
### Restricting filesystem access
|
|
550
|
+
|
|
551
|
+
Use `--allow-dir` (repeatable) or the `MCP_PDF_ALLOWED_DIRS` env var (`:` or `,` separated). Once set, all `path` sources must resolve inside one of the allowed directories — relative paths, absolute paths, and `..` traversal are all checked after resolution.
|
|
552
|
+
|
|
553
|
+
```bash
|
|
554
|
+
# CLI flags
|
|
555
|
+
npx @sylphx/pdf-reader-mcp --allow-dir=/srv/pdfs --allow-dir=/data/reports
|
|
556
|
+
|
|
557
|
+
# Environment
|
|
558
|
+
MCP_PDF_ALLOWED_DIRS="/srv/pdfs:/data/reports" npx @sylphx/pdf-reader-mcp
|
|
559
|
+
```
|
|
560
|
+
|
|
561
|
+
```json
|
|
562
|
+
{
|
|
563
|
+
"mcpServers": {
|
|
564
|
+
"pdf-reader": {
|
|
565
|
+
"command": "npx",
|
|
566
|
+
"args": ["@sylphx/pdf-reader-mcp", "--allow-dir=/srv/pdfs"]
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
```
|
|
571
|
+
|
|
572
|
+
### Disabling or restricting HTTP
|
|
573
|
+
|
|
574
|
+
```bash
|
|
575
|
+
# Block all URL sources
|
|
576
|
+
npx @sylphx/pdf-reader-mcp --no-http
|
|
577
|
+
MCP_PDF_ALLOW_HTTP=false npx @sylphx/pdf-reader-mcp
|
|
578
|
+
|
|
579
|
+
# Allowlist hosts (everything else rejected)
|
|
580
|
+
npx @sylphx/pdf-reader-mcp --allow-host=cdn.example.com --allow-host=files.internal
|
|
581
|
+
MCP_PDF_ALLOWED_HOSTS="cdn.example.com,files.internal" npx @sylphx/pdf-reader-mcp
|
|
582
|
+
```
|
|
583
|
+
|
|
584
|
+
| Setting | CLI flag | Environment variable | Default |
|
|
585
|
+
|---------|----------|----------------------|---------|
|
|
586
|
+
| Filesystem allowlist | `--allow-dir=<path>` (repeatable) | `MCP_PDF_ALLOWED_DIRS` (`:` or `,` separated) | unrestricted |
|
|
587
|
+
| Disable HTTP | `--no-http` | `MCP_PDF_ALLOW_HTTP=false` | enabled |
|
|
588
|
+
| HTTP host allowlist | `--allow-host=<host>` (repeatable) | `MCP_PDF_ALLOWED_HOSTS` (`,` separated) | any host |
|
|
589
|
+
|
|
590
|
+
Denied requests fail fast with an `Access denied` error before any disk read or network call.
|
|
591
|
+
|
|
592
|
+
---
|
|
593
|
+
|
|
541
594
|
## 🔧 Troubleshooting
|
|
542
595
|
|
|
543
596
|
### "Absolute paths are not allowed"
|
package/dist/index.js
CHANGED
|
@@ -307,7 +307,7 @@ var extractPageContent = async (pdfDocument, pageNum, includeImages, sourceDescr
|
|
|
307
307
|
{
|
|
308
308
|
type: "text",
|
|
309
309
|
yPosition: 0,
|
|
310
|
-
textContent: `Error processing page
|
|
310
|
+
textContent: `[Error processing page ${String(pageNum)}]`
|
|
311
311
|
}
|
|
312
312
|
];
|
|
313
313
|
}
|
|
@@ -315,10 +315,176 @@ var extractPageContent = async (pdfDocument, pageNum, includeImages, sourceDescr
|
|
|
315
315
|
};
|
|
316
316
|
|
|
317
317
|
// src/pdf/loader.ts
|
|
318
|
-
import
|
|
318
|
+
import fs3 from "node:fs/promises";
|
|
319
319
|
import { createRequire } from "node:module";
|
|
320
320
|
import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
|
|
321
321
|
|
|
322
|
+
// src/utils/config.ts
|
|
323
|
+
import dns from "node:dns";
|
|
324
|
+
import fs from "node:fs";
|
|
325
|
+
import net from "node:net";
|
|
326
|
+
import path from "node:path";
|
|
327
|
+
var splitList = (value, separators) => value.split(separators).map((s) => s.trim()).filter((s) => s.length > 0);
|
|
328
|
+
var canonicalizeDir = (p) => {
|
|
329
|
+
try {
|
|
330
|
+
return fs.realpathSync(p);
|
|
331
|
+
} catch (err) {
|
|
332
|
+
if (typeof err === "object" && err !== null && "code" in err && (err.code === "ENOENT" || err.code === "ENOTDIR")) {
|
|
333
|
+
const parent = path.dirname(p);
|
|
334
|
+
if (parent === p)
|
|
335
|
+
return p;
|
|
336
|
+
return path.join(canonicalizeDir(parent), path.basename(p));
|
|
337
|
+
}
|
|
338
|
+
throw err;
|
|
339
|
+
}
|
|
340
|
+
};
|
|
341
|
+
var parseDirs = (values) => values.map((dir) => canonicalizeDir(path.resolve(path.normalize(dir))));
|
|
342
|
+
var parseBool = (value, fallback) => {
|
|
343
|
+
if (value === undefined)
|
|
344
|
+
return fallback;
|
|
345
|
+
const v = value.trim().toLowerCase();
|
|
346
|
+
if (v === "false" || v === "0" || v === "no" || v === "off")
|
|
347
|
+
return false;
|
|
348
|
+
if (v === "true" || v === "1" || v === "yes" || v === "on")
|
|
349
|
+
return true;
|
|
350
|
+
return fallback;
|
|
351
|
+
};
|
|
352
|
+
var parseCliFlags = (argv) => {
|
|
353
|
+
const dirs = [];
|
|
354
|
+
const hosts = [];
|
|
355
|
+
let noHttp = false;
|
|
356
|
+
let allowPrivateIps = false;
|
|
357
|
+
for (const arg of argv) {
|
|
358
|
+
if (arg.startsWith("--allow-dir=")) {
|
|
359
|
+
dirs.push(arg.slice("--allow-dir=".length));
|
|
360
|
+
} else if (arg.startsWith("--allow-host=")) {
|
|
361
|
+
hosts.push(arg.slice("--allow-host=".length).toLowerCase());
|
|
362
|
+
} else if (arg === "--no-http") {
|
|
363
|
+
noHttp = true;
|
|
364
|
+
} else if (arg === "--allow-private-ips") {
|
|
365
|
+
allowPrivateIps = true;
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
return { dirs, hosts, noHttp, allowPrivateIps };
|
|
369
|
+
};
|
|
370
|
+
var envList = (raw, separators, transform = (v) => v) => raw ? splitList(raw, separators).map(transform) : [];
|
|
371
|
+
var readSecurityConfig = (argv = process.argv.slice(2), env = process.env) => {
|
|
372
|
+
const cli = parseCliFlags(argv);
|
|
373
|
+
const envDirs = envList(env["MCP_PDF_ALLOWED_DIRS"], /[:,]/);
|
|
374
|
+
const envHosts = envList(env["MCP_PDF_ALLOWED_HOSTS"], /,/, (h) => h.toLowerCase());
|
|
375
|
+
const mergedDirs = [...cli.dirs, ...envDirs];
|
|
376
|
+
const mergedHosts = [...cli.hosts, ...envHosts];
|
|
377
|
+
return {
|
|
378
|
+
allowedDirs: mergedDirs.length > 0 ? parseDirs(mergedDirs) : null,
|
|
379
|
+
allowHttp: cli.noHttp ? false : parseBool(env["MCP_PDF_ALLOW_HTTP"], true),
|
|
380
|
+
allowedHosts: mergedHosts.length > 0 ? mergedHosts : null,
|
|
381
|
+
allowPrivateIps: cli.allowPrivateIps || parseBool(env["MCP_PDF_ALLOW_PRIVATE_IPS"], false)
|
|
382
|
+
};
|
|
383
|
+
};
|
|
384
|
+
var cached = null;
|
|
385
|
+
var getSecurityConfig = () => {
|
|
386
|
+
if (cached === null) {
|
|
387
|
+
cached = readSecurityConfig();
|
|
388
|
+
}
|
|
389
|
+
return cached;
|
|
390
|
+
};
|
|
391
|
+
var isPathAllowed = (absPath, allowedDirs) => {
|
|
392
|
+
if (allowedDirs === null)
|
|
393
|
+
return true;
|
|
394
|
+
if (allowedDirs.length === 0)
|
|
395
|
+
return false;
|
|
396
|
+
const normalized = path.resolve(absPath);
|
|
397
|
+
return allowedDirs.some((dir) => {
|
|
398
|
+
const rel = path.relative(dir, normalized);
|
|
399
|
+
if (rel === "")
|
|
400
|
+
return true;
|
|
401
|
+
if (rel.startsWith(".."))
|
|
402
|
+
return false;
|
|
403
|
+
if (path.isAbsolute(rel))
|
|
404
|
+
return false;
|
|
405
|
+
return true;
|
|
406
|
+
});
|
|
407
|
+
};
|
|
408
|
+
var isUrlAllowed = (urlString, config) => {
|
|
409
|
+
if (!config.allowHttp)
|
|
410
|
+
return false;
|
|
411
|
+
let parsed;
|
|
412
|
+
try {
|
|
413
|
+
parsed = new URL(urlString);
|
|
414
|
+
} catch {
|
|
415
|
+
return false;
|
|
416
|
+
}
|
|
417
|
+
if (parsed.protocol !== "http:" && parsed.protocol !== "https:")
|
|
418
|
+
return false;
|
|
419
|
+
if (config.allowedHosts === null)
|
|
420
|
+
return true;
|
|
421
|
+
return config.allowedHosts.includes(parsed.hostname.toLowerCase());
|
|
422
|
+
};
|
|
423
|
+
var PRIVATE_IPV4_PREDICATES = [
|
|
424
|
+
(a) => a === 10,
|
|
425
|
+
(a, b) => a === 172 && b >= 16 && b <= 31,
|
|
426
|
+
(a, b) => a === 192 && b === 168,
|
|
427
|
+
(a) => a === 127,
|
|
428
|
+
(a, b) => a === 169 && b === 254,
|
|
429
|
+
(a) => a === 0,
|
|
430
|
+
(a, b) => a === 100 && b >= 64 && b <= 127,
|
|
431
|
+
(a) => a >= 224
|
|
432
|
+
];
|
|
433
|
+
var isPrivateIpv4 = (ip) => {
|
|
434
|
+
const parts = ip.split(".").map((s) => Number.parseInt(s, 10));
|
|
435
|
+
const a = parts[0];
|
|
436
|
+
const b = parts[1];
|
|
437
|
+
if (a === undefined || b === undefined)
|
|
438
|
+
return true;
|
|
439
|
+
return PRIVATE_IPV4_PREDICATES.some((pred) => pred(a, b));
|
|
440
|
+
};
|
|
441
|
+
var isPrivateIpv6 = (ip) => {
|
|
442
|
+
const lower = ip.toLowerCase();
|
|
443
|
+
if (lower === "::1" || lower === "::")
|
|
444
|
+
return true;
|
|
445
|
+
if (lower.startsWith("fc") || lower.startsWith("fd"))
|
|
446
|
+
return true;
|
|
447
|
+
if (lower.startsWith("fe80"))
|
|
448
|
+
return true;
|
|
449
|
+
if (lower.startsWith("ff"))
|
|
450
|
+
return true;
|
|
451
|
+
if (lower.startsWith("::ffff:")) {
|
|
452
|
+
const tail = lower.slice("::ffff:".length);
|
|
453
|
+
if (net.isIPv4(tail))
|
|
454
|
+
return isPrivateIpv4(tail);
|
|
455
|
+
}
|
|
456
|
+
return false;
|
|
457
|
+
};
|
|
458
|
+
var isPrivateIp = (ip) => {
|
|
459
|
+
if (net.isIPv4(ip))
|
|
460
|
+
return isPrivateIpv4(ip);
|
|
461
|
+
if (net.isIPv6(ip))
|
|
462
|
+
return isPrivateIpv6(ip);
|
|
463
|
+
return true;
|
|
464
|
+
};
|
|
465
|
+
var assertUrlNotPrivate = async (hostname) => {
|
|
466
|
+
if (net.isIP(hostname)) {
|
|
467
|
+
if (isPrivateIp(hostname)) {
|
|
468
|
+
throw new Error(`URL host '${hostname}' resolves to a non-public address (SSRF protection).`);
|
|
469
|
+
}
|
|
470
|
+
return;
|
|
471
|
+
}
|
|
472
|
+
let addresses;
|
|
473
|
+
try {
|
|
474
|
+
addresses = await dns.promises.lookup(hostname, { all: true });
|
|
475
|
+
} catch {
|
|
476
|
+
throw new Error(`URL host '${hostname}' could not be resolved.`);
|
|
477
|
+
}
|
|
478
|
+
if (addresses.length === 0) {
|
|
479
|
+
throw new Error(`URL host '${hostname}' resolved to no addresses.`);
|
|
480
|
+
}
|
|
481
|
+
for (const { address } of addresses) {
|
|
482
|
+
if (isPrivateIp(address)) {
|
|
483
|
+
throw new Error(`URL host '${hostname}' resolves to a non-public address (SSRF protection).`);
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
};
|
|
487
|
+
|
|
322
488
|
// src/utils/errors.ts
|
|
323
489
|
class PdfError extends Error {
|
|
324
490
|
code;
|
|
@@ -330,14 +496,34 @@ class PdfError extends Error {
|
|
|
330
496
|
}
|
|
331
497
|
|
|
332
498
|
// src/utils/pathUtils.ts
|
|
333
|
-
import
|
|
499
|
+
import fs2 from "node:fs";
|
|
500
|
+
import path2 from "node:path";
|
|
334
501
|
var PROJECT_ROOT = process.cwd();
|
|
502
|
+
var canonicalize = (p) => {
|
|
503
|
+
try {
|
|
504
|
+
return fs2.realpathSync(p);
|
|
505
|
+
} catch (err) {
|
|
506
|
+
if (typeof err === "object" && err !== null && "code" in err && (err.code === "ENOENT" || err.code === "ENOTDIR")) {
|
|
507
|
+
const parent = path2.dirname(p);
|
|
508
|
+
if (parent === p)
|
|
509
|
+
return p;
|
|
510
|
+
return path2.join(canonicalize(parent), path2.basename(p));
|
|
511
|
+
}
|
|
512
|
+
throw err;
|
|
513
|
+
}
|
|
514
|
+
};
|
|
335
515
|
var resolvePath = (userPath) => {
|
|
336
516
|
if (typeof userPath !== "string") {
|
|
337
517
|
throw new PdfError(-32602 /* InvalidParams */, "Path must be a string.");
|
|
338
518
|
}
|
|
339
|
-
const normalizedUserPath =
|
|
340
|
-
|
|
519
|
+
const normalizedUserPath = path2.normalize(userPath);
|
|
520
|
+
const resolved = path2.isAbsolute(normalizedUserPath) ? normalizedUserPath : path2.resolve(PROJECT_ROOT, normalizedUserPath);
|
|
521
|
+
const canonical = canonicalize(resolved);
|
|
522
|
+
const { allowedDirs } = getSecurityConfig();
|
|
523
|
+
if (!isPathAllowed(canonical, allowedDirs)) {
|
|
524
|
+
throw new PdfError(-32600 /* InvalidRequest */, `Access denied: path '${userPath}' is outside the allowed directories.`);
|
|
525
|
+
}
|
|
526
|
+
return canonical;
|
|
341
527
|
};
|
|
342
528
|
|
|
343
529
|
// src/pdf/loader.ts
|
|
@@ -349,37 +535,157 @@ var STANDARD_FONT_DATA_URL = `${PDFJS_ROOT}standard_fonts/`;
|
|
|
349
535
|
var WASM_URL = `${PDFJS_ROOT}wasm/`;
|
|
350
536
|
var ICC_URL = `${PDFJS_ROOT}iccs/`;
|
|
351
537
|
var MAX_PDF_SIZE = 100 * 1024 * 1024;
|
|
538
|
+
var URL_FETCH_TIMEOUT_MS = 30000;
|
|
539
|
+
var MAX_REDIRECTS = 5;
|
|
540
|
+
var formatBytes = (bytes) => `${(bytes / 1024 / 1024).toFixed(0)}MB`;
|
|
541
|
+
var sanitizeSourceDescription = (description) => description.length > 200 ? `${description.slice(0, 197)}...` : description;
|
|
542
|
+
var loadLocalFile = async (userPath) => {
|
|
543
|
+
const safePath = resolvePath(userPath);
|
|
544
|
+
let stats;
|
|
545
|
+
try {
|
|
546
|
+
stats = await fs3.stat(safePath);
|
|
547
|
+
} catch (err) {
|
|
548
|
+
if (typeof err === "object" && err !== null && "code" in err && err.code === "ENOENT") {
|
|
549
|
+
throw new PdfError(-32600 /* InvalidRequest */, `File not found at '${userPath}'.`, {
|
|
550
|
+
cause: err instanceof Error ? err : undefined
|
|
551
|
+
});
|
|
552
|
+
}
|
|
553
|
+
throw new PdfError(-32600 /* InvalidRequest */, `Failed to access file at '${userPath}'.`, {
|
|
554
|
+
cause: err instanceof Error ? err : undefined
|
|
555
|
+
});
|
|
556
|
+
}
|
|
557
|
+
if (!stats.isFile()) {
|
|
558
|
+
throw new PdfError(-32600 /* InvalidRequest */, `Path '${userPath}' is not a regular file.`);
|
|
559
|
+
}
|
|
560
|
+
if (stats.size > MAX_PDF_SIZE) {
|
|
561
|
+
throw new PdfError(-32600 /* InvalidRequest */, `PDF file exceeds maximum size of ${formatBytes(MAX_PDF_SIZE)}. File size: ${formatBytes(stats.size)}.`);
|
|
562
|
+
}
|
|
563
|
+
const buffer = await fs3.readFile(safePath);
|
|
564
|
+
return new Uint8Array(buffer);
|
|
565
|
+
};
|
|
566
|
+
var validateUrlHop = async (urlString, config) => {
|
|
567
|
+
if (!isUrlAllowed(urlString, config)) {
|
|
568
|
+
const reason = config.allowHttp ? "host is not in the allowed list or scheme is not http(s)" : "HTTP access is disabled";
|
|
569
|
+
throw new PdfError(-32600 /* InvalidRequest */, `Access denied: URL '${urlString}' rejected (${reason}).`);
|
|
570
|
+
}
|
|
571
|
+
if (!config.allowPrivateIps) {
|
|
572
|
+
let hostname;
|
|
573
|
+
try {
|
|
574
|
+
hostname = new URL(urlString).hostname;
|
|
575
|
+
} catch {
|
|
576
|
+
throw new PdfError(-32600 /* InvalidRequest */, `Invalid URL: '${urlString}'.`);
|
|
577
|
+
}
|
|
578
|
+
try {
|
|
579
|
+
await assertUrlNotPrivate(hostname);
|
|
580
|
+
} catch (err) {
|
|
581
|
+
const reason = err instanceof Error ? err.message : "SSRF check failed";
|
|
582
|
+
throw new PdfError(-32600 /* InvalidRequest */, `Access denied: ${reason}`);
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
};
|
|
586
|
+
var fetchUrlBody = async (url, config) => {
|
|
587
|
+
let currentUrl = url;
|
|
588
|
+
const controller = new AbortController;
|
|
589
|
+
const timeout = setTimeout(() => controller.abort(), URL_FETCH_TIMEOUT_MS);
|
|
590
|
+
try {
|
|
591
|
+
for (let hop = 0;hop <= MAX_REDIRECTS; hop++) {
|
|
592
|
+
await validateUrlHop(currentUrl, config);
|
|
593
|
+
const response = await fetch(currentUrl, {
|
|
594
|
+
redirect: "manual",
|
|
595
|
+
signal: controller.signal
|
|
596
|
+
});
|
|
597
|
+
if (response.status >= 300 && response.status < 400) {
|
|
598
|
+
const location = response.headers.get("location");
|
|
599
|
+
if (!location) {
|
|
600
|
+
throw new PdfError(-32600 /* InvalidRequest */, `URL fetch failed: redirect without Location header.`);
|
|
601
|
+
}
|
|
602
|
+
currentUrl = new URL(location, currentUrl).toString();
|
|
603
|
+
continue;
|
|
604
|
+
}
|
|
605
|
+
if (!response.ok) {
|
|
606
|
+
throw new PdfError(-32600 /* InvalidRequest */, `URL fetch failed with HTTP ${String(response.status)}.`);
|
|
607
|
+
}
|
|
608
|
+
const contentLengthHeader = response.headers.get("content-length");
|
|
609
|
+
if (contentLengthHeader !== null) {
|
|
610
|
+
const declared = Number.parseInt(contentLengthHeader, 10);
|
|
611
|
+
if (Number.isFinite(declared) && declared > MAX_PDF_SIZE) {
|
|
612
|
+
throw new PdfError(-32600 /* InvalidRequest */, `Remote PDF exceeds maximum size of ${formatBytes(MAX_PDF_SIZE)} (Content-Length: ${formatBytes(declared)}).`);
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
if (!response.body) {
|
|
616
|
+
const ab = await response.arrayBuffer();
|
|
617
|
+
if (ab.byteLength > MAX_PDF_SIZE) {
|
|
618
|
+
throw new PdfError(-32600 /* InvalidRequest */, `Remote PDF exceeds maximum size of ${formatBytes(MAX_PDF_SIZE)}.`);
|
|
619
|
+
}
|
|
620
|
+
return new Uint8Array(ab);
|
|
621
|
+
}
|
|
622
|
+
const reader = response.body.getReader();
|
|
623
|
+
const chunks = [];
|
|
624
|
+
let total = 0;
|
|
625
|
+
while (true) {
|
|
626
|
+
const { done, value } = await reader.read();
|
|
627
|
+
if (done)
|
|
628
|
+
break;
|
|
629
|
+
if (value) {
|
|
630
|
+
total += value.byteLength;
|
|
631
|
+
if (total > MAX_PDF_SIZE) {
|
|
632
|
+
await reader.cancel().catch(() => {});
|
|
633
|
+
throw new PdfError(-32600 /* InvalidRequest */, `Remote PDF exceeds maximum size of ${formatBytes(MAX_PDF_SIZE)} during streaming.`);
|
|
634
|
+
}
|
|
635
|
+
chunks.push(value);
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
const combined = new Uint8Array(total);
|
|
639
|
+
let offset = 0;
|
|
640
|
+
for (const chunk of chunks) {
|
|
641
|
+
combined.set(chunk, offset);
|
|
642
|
+
offset += chunk.byteLength;
|
|
643
|
+
}
|
|
644
|
+
return combined;
|
|
645
|
+
}
|
|
646
|
+
throw new PdfError(-32600 /* InvalidRequest */, `URL fetch failed: exceeded redirect limit (${String(MAX_REDIRECTS)}).`);
|
|
647
|
+
} catch (err) {
|
|
648
|
+
if (err instanceof PdfError)
|
|
649
|
+
throw err;
|
|
650
|
+
if (err instanceof Error && (err.name === "AbortError" || err.name === "TimeoutError")) {
|
|
651
|
+
throw new PdfError(-32600 /* InvalidRequest */, `URL fetch timed out after ${String(URL_FETCH_TIMEOUT_MS / 1000)}s.`, { cause: err });
|
|
652
|
+
}
|
|
653
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
654
|
+
logger3.warn("URL fetch failed", { url, error: message });
|
|
655
|
+
throw new PdfError(-32600 /* InvalidRequest */, `URL fetch failed for '${url}'.`, {
|
|
656
|
+
cause: err instanceof Error ? err : undefined
|
|
657
|
+
});
|
|
658
|
+
} finally {
|
|
659
|
+
clearTimeout(timeout);
|
|
660
|
+
}
|
|
661
|
+
};
|
|
352
662
|
var loadPdfDocument = async (source, sourceDescription) => {
|
|
353
|
-
|
|
663
|
+
const safeSource = sanitizeSourceDescription(sourceDescription);
|
|
664
|
+
let pdfData;
|
|
354
665
|
try {
|
|
355
666
|
if (source.path) {
|
|
356
|
-
|
|
357
|
-
const buffer = await fs.readFile(safePath);
|
|
358
|
-
if (buffer.length > MAX_PDF_SIZE) {
|
|
359
|
-
throw new PdfError(-32600 /* InvalidRequest */, `PDF file exceeds maximum size of ${MAX_PDF_SIZE} bytes (${(MAX_PDF_SIZE / 1024 / 1024).toFixed(0)}MB). File size: ${buffer.length} bytes.`);
|
|
360
|
-
}
|
|
361
|
-
pdfDataSource = new Uint8Array(buffer);
|
|
667
|
+
pdfData = await loadLocalFile(source.path);
|
|
362
668
|
} else if (source.url) {
|
|
363
|
-
|
|
669
|
+
const config = getSecurityConfig();
|
|
670
|
+
pdfData = await fetchUrlBody(source.url, config);
|
|
364
671
|
} else {
|
|
365
|
-
throw new PdfError(-32602 /* InvalidParams */, `Source ${
|
|
672
|
+
throw new PdfError(-32602 /* InvalidParams */, `Source ${safeSource} missing 'path' or 'url'.`);
|
|
366
673
|
}
|
|
367
674
|
} catch (err) {
|
|
368
675
|
if (err instanceof PdfError) {
|
|
369
676
|
throw err;
|
|
370
677
|
}
|
|
371
678
|
const message = err instanceof Error ? err.message : String(err);
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
679
|
+
logger3.error("Unexpected error preparing PDF source", {
|
|
680
|
+
sourceDescription: safeSource,
|
|
681
|
+
error: message
|
|
682
|
+
});
|
|
683
|
+
throw new PdfError(-32600 /* InvalidRequest */, `Failed to prepare PDF source ${safeSource}.`, {
|
|
684
|
+
cause: err instanceof Error ? err : undefined
|
|
685
|
+
});
|
|
379
686
|
}
|
|
380
|
-
const documentParams = pdfDataSource instanceof Uint8Array ? { data: pdfDataSource } : pdfDataSource;
|
|
381
687
|
const loadingTask = getDocument({
|
|
382
|
-
|
|
688
|
+
data: pdfData,
|
|
383
689
|
cMapUrl: CMAP_URL,
|
|
384
690
|
cMapPacked: true,
|
|
385
691
|
standardFontDataUrl: STANDARD_FONT_DATA_URL,
|
|
@@ -390,8 +696,8 @@ var loadPdfDocument = async (source, sourceDescription) => {
|
|
|
390
696
|
return await loadingTask.promise;
|
|
391
697
|
} catch (err) {
|
|
392
698
|
const message = err instanceof Error ? err.message : String(err);
|
|
393
|
-
logger3.error("PDF.js loading error", { sourceDescription, error: message });
|
|
394
|
-
throw new PdfError(-32600 /* InvalidRequest */, `Failed to load PDF document from ${
|
|
699
|
+
logger3.error("PDF.js loading error", { sourceDescription: safeSource, error: message });
|
|
700
|
+
throw new PdfError(-32600 /* InvalidRequest */, `Failed to load PDF document from ${safeSource}.`, { cause: err instanceof Error ? err : undefined });
|
|
395
701
|
}
|
|
396
702
|
};
|
|
397
703
|
|
|
@@ -831,11 +1137,16 @@ var processSingleSource = async (source, options) => {
|
|
|
831
1137
|
}
|
|
832
1138
|
individualResult = { ...individualResult, data: output, success: true };
|
|
833
1139
|
} catch (error) {
|
|
834
|
-
let errorMessage
|
|
835
|
-
if (error instanceof
|
|
836
|
-
errorMessage
|
|
1140
|
+
let errorMessage;
|
|
1141
|
+
if (error instanceof PdfError) {
|
|
1142
|
+
errorMessage = error.message;
|
|
837
1143
|
} else {
|
|
838
|
-
|
|
1144
|
+
const detail = error instanceof Error ? error.message : String(error);
|
|
1145
|
+
logger6.error("Unexpected error processing PDF source", {
|
|
1146
|
+
sourceDescription,
|
|
1147
|
+
error: detail
|
|
1148
|
+
});
|
|
1149
|
+
errorMessage = `Failed to process PDF from ${sourceDescription}.`;
|
|
839
1150
|
}
|
|
840
1151
|
individualResult.error = errorMessage;
|
|
841
1152
|
individualResult.success = false;
|
|
@@ -951,12 +1262,13 @@ var transportType = process.env["MCP_TRANSPORT"] ?? "stdio";
|
|
|
951
1262
|
var httpPort = Number.parseInt(process.env["MCP_HTTP_PORT"] ?? "8080", 10);
|
|
952
1263
|
var httpHost = process.env["MCP_HTTP_HOST"] ?? "0.0.0.0";
|
|
953
1264
|
var apiKey = process.env["MCP_API_KEY"];
|
|
1265
|
+
var corsOrigin = process.env["MCP_CORS_ORIGIN"];
|
|
954
1266
|
function createTransport() {
|
|
955
1267
|
if (transportType === "http") {
|
|
956
1268
|
return http({
|
|
957
1269
|
port: httpPort,
|
|
958
1270
|
hostname: httpHost,
|
|
959
|
-
cors:
|
|
1271
|
+
...corsOrigin ? { cors: corsOrigin } : {}
|
|
960
1272
|
});
|
|
961
1273
|
}
|
|
962
1274
|
return stdio();
|
|
@@ -976,6 +1288,9 @@ async function main() {
|
|
|
976
1288
|
if (apiKey) {
|
|
977
1289
|
console.log("[PDF Reader MCP] API key authentication enabled (X-API-Key header)");
|
|
978
1290
|
}
|
|
1291
|
+
if (corsOrigin) {
|
|
1292
|
+
console.log(`[PDF Reader MCP] CORS allowed origin: ${corsOrigin}`);
|
|
1293
|
+
}
|
|
979
1294
|
console.log("[PDF Reader MCP] Project root:", process.cwd());
|
|
980
1295
|
} else if (process.env["DEBUG_MCP"]) {
|
|
981
1296
|
console.error("[PDF Reader MCP] Server running on stdio");
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sylphx/pdf-reader-mcp",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.4.1",
|
|
4
4
|
"description": "An MCP server providing tools to read PDF files.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -91,11 +91,11 @@
|
|
|
91
91
|
"@types/glob": "^8.1.0",
|
|
92
92
|
"@types/node": "^25.6.0",
|
|
93
93
|
"@types/pngjs": "^6.0.5",
|
|
94
|
-
"bunup": "0.16.
|
|
94
|
+
"bunup": "0.16.31",
|
|
95
95
|
"lefthook": "^2.1.6",
|
|
96
96
|
"typedoc": "^0.28.19",
|
|
97
97
|
"typedoc-plugin-markdown": "^4.11.0",
|
|
98
|
-
"typescript": "^
|
|
98
|
+
"typescript": "^6.0.3",
|
|
99
99
|
"vitepress": "^1.6.4"
|
|
100
100
|
},
|
|
101
101
|
"packageManager": "bun@1.3.1"
|