@modelcontextprotocol/server-pdf 1.0.1 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/server.d.ts CHANGED
@@ -2,7 +2,7 @@
2
2
  * PDF MCP Server
3
3
  *
4
4
  * An MCP server that displays PDFs in an interactive viewer.
5
- * Supports local files and remote URLs from academic sources (arxiv, biorxiv, etc).
5
+ * Supports local files and remote HTTPS URLs.
6
6
  *
7
7
  * Tools:
8
8
  * - list_pdfs: List available PDFs
@@ -13,10 +13,16 @@ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
13
13
  export declare const DEFAULT_PDF = "https://arxiv.org/pdf/1706.03762";
14
14
  export declare const MAX_CHUNK_BYTES: number;
15
15
  export declare const RESOURCE_URI = "ui://pdf-viewer/mcp-app.html";
16
- /** Allowed remote origins (security allowlist) */
17
- export declare const allowedRemoteOrigins: Set<string>;
16
+ /** Inactivity timeout: clear cache entry if not accessed for this long */
17
+ export declare const CACHE_INACTIVITY_TIMEOUT_MS = 10000;
18
+ /** Max lifetime: clear cache entry after this time regardless of access */
19
+ export declare const CACHE_MAX_LIFETIME_MS = 60000;
20
+ /** Max size for cached PDFs (defensive limit to prevent memory exhaustion) */
21
+ export declare const CACHE_MAX_PDF_SIZE_BYTES: number;
18
22
  /** Allowed local file paths (populated from CLI args) */
19
23
  export declare const allowedLocalFiles: Set<string>;
24
+ /** Allowed local directories (populated from MCP roots) */
25
+ export declare const allowedLocalDirs: Set<string>;
20
26
  export declare function isFileUrl(url: string): boolean;
21
27
  export declare function isArxivUrl(url: string): boolean;
22
28
  export declare function normalizeArxivUrl(url: string): string;
@@ -26,8 +32,30 @@ export declare function validateUrl(url: string): {
26
32
  valid: boolean;
27
33
  error?: string;
28
34
  };
29
- export declare function readPdfRange(url: string, offset: number, byteCount: number): Promise<{
30
- data: Uint8Array;
31
- totalBytes: number;
32
- }>;
35
+ /**
36
+ * Session-local PDF cache utilities.
37
+ * Each call to createPdfCache() creates an independent cache instance.
38
+ */
39
+ export interface PdfCache {
40
+ /** Read a range of bytes from a PDF, using cache for servers without Range support */
41
+ readPdfRange(url: string, offset: number, byteCount: number): Promise<{
42
+ data: Uint8Array;
43
+ totalBytes: number;
44
+ }>;
45
+ /** Get current number of cached entries */
46
+ getCacheSize(): number;
47
+ /** Clear all cached entries and their timers */
48
+ clearCache(): void;
49
+ }
50
+ /**
51
+ * Creates a session-local PDF cache with automatic timeout-based cleanup.
52
+ *
53
+ * When a remote server returns HTTP 200 (full body) instead of 206 (partial),
54
+ * the full response is cached so subsequent chunk requests don't re-download.
55
+ *
56
+ * Entries are automatically cleared after:
57
+ * - CACHE_INACTIVITY_TIMEOUT_MS of no access (resets on each access)
58
+ * - CACHE_MAX_LIFETIME_MS from creation (absolute timeout)
59
+ */
60
+ export declare function createPdfCache(): PdfCache;
33
61
  export declare function createServer(): McpServer;
package/dist/server.js CHANGED
@@ -35764,25 +35764,11 @@ function ak(r, i, o, t, n) {
35764
35764
  var DEFAULT_PDF = "https://arxiv.org/pdf/1706.03762";
35765
35765
  var MAX_CHUNK_BYTES = 512 * 1024;
35766
35766
  var RESOURCE_URI = "ui://pdf-viewer/mcp-app.html";
35767
- var allowedRemoteOrigins = new Set([
35768
- "https://agrirxiv.org",
35769
- "https://arxiv.org",
35770
- "https://chemrxiv.org",
35771
- "https://edarxiv.org",
35772
- "https://engrxiv.org",
35773
- "https://hal.science",
35774
- "https://osf.io",
35775
- "https://psyarxiv.com",
35776
- "https://ssrn.com",
35777
- "https://www.biorxiv.org",
35778
- "https://www.eartharxiv.org",
35779
- "https://www.medrxiv.org",
35780
- "https://www.preprints.org",
35781
- "https://www.researchsquare.com",
35782
- "https://www.sportarxiv.org",
35783
- "https://zenodo.org"
35784
- ]);
35767
+ var CACHE_INACTIVITY_TIMEOUT_MS = 1e4;
35768
+ var CACHE_MAX_LIFETIME_MS = 60000;
35769
+ var CACHE_MAX_PDF_SIZE_BYTES = 50 * 1024 * 1024;
35785
35770
  var allowedLocalFiles = new Set;
35771
+ var allowedLocalDirs = new Set;
35786
35772
  var DIST_DIR = import.meta.filename.endsWith(".ts") ? path.join(import.meta.dirname, "dist") : import.meta.dirname;
35787
35773
  function isFileUrl(url2) {
35788
35774
  return url2.startsWith("file://");
@@ -35808,7 +35794,10 @@ function pathToFileUrl(filePath) {
35808
35794
  function validateUrl(url2) {
35809
35795
  if (isFileUrl(url2)) {
35810
35796
  const filePath = fileUrlToPath(url2);
35811
- if (!allowedLocalFiles.has(filePath)) {
35797
+ const resolved = path.resolve(filePath);
35798
+ const exactMatch = allowedLocalFiles.has(filePath);
35799
+ const dirMatch = [...allowedLocalDirs].some((dir) => resolved === dir || resolved.startsWith(dir + path.sep));
35800
+ if (!exactMatch && !dirMatch) {
35812
35801
  return {
35813
35802
  valid: false,
35814
35803
  error: `Local file not in allowed list: ${filePath}`
@@ -35821,72 +35810,182 @@ function validateUrl(url2) {
35821
35810
  }
35822
35811
  try {
35823
35812
  const parsed = new URL(url2);
35824
- const origin = `${parsed.protocol}//${parsed.hostname}`;
35825
- if (![...allowedRemoteOrigins].some((allowed) => origin.startsWith(allowed))) {
35826
- return { valid: false, error: `Origin not allowed: ${origin}` };
35813
+ if (parsed.protocol !== "https:") {
35814
+ return { valid: false, error: `Only HTTPS URLs are allowed: ${url2}` };
35827
35815
  }
35828
35816
  return { valid: true };
35829
35817
  } catch {
35830
35818
  return { valid: false, error: `Invalid URL: ${url2}` };
35831
35819
  }
35832
35820
  }
35833
- async function readPdfRange(url2, offset, byteCount) {
35834
- const normalized = isArxivUrl(url2) ? normalizeArxivUrl(url2) : url2;
35835
- const clampedByteCount = Math.min(byteCount, MAX_CHUNK_BYTES);
35836
- if (isFileUrl(normalized)) {
35837
- const filePath = fileUrlToPath(normalized);
35838
- const stats = await fs.promises.stat(filePath);
35839
- const totalBytes2 = stats.size;
35840
- const start = Math.min(offset, totalBytes2);
35841
- const end = Math.min(start + clampedByteCount, totalBytes2);
35842
- if (start >= totalBytes2) {
35843
- return { data: new Uint8Array(0), totalBytes: totalBytes2 };
35844
- }
35845
- const buffer = Buffer.alloc(end - start);
35846
- const fd = await fs.promises.open(filePath, "r");
35847
- try {
35848
- await fd.read(buffer, 0, end - start, start);
35849
- } finally {
35850
- await fd.close();
35821
+ function createPdfCache() {
35822
+ const cache = new Map;
35823
+ function deleteCacheEntry(url2) {
35824
+ const entry = cache.get(url2);
35825
+ if (entry) {
35826
+ clearTimeout(entry.inactivityTimer);
35827
+ clearTimeout(entry.maxLifetimeTimer);
35828
+ cache.delete(url2);
35851
35829
  }
35852
- return { data: new Uint8Array(buffer), totalBytes: totalBytes2 };
35853
35830
  }
35854
- const response = await fetch(normalized, {
35855
- headers: {
35856
- Range: `bytes=${offset}-${offset + clampedByteCount - 1}`
35831
+ function getCacheEntry(url2) {
35832
+ const entry = cache.get(url2);
35833
+ if (!entry)
35834
+ return;
35835
+ clearTimeout(entry.inactivityTimer);
35836
+ entry.inactivityTimer = setTimeout(() => {
35837
+ deleteCacheEntry(url2);
35838
+ }, CACHE_INACTIVITY_TIMEOUT_MS);
35839
+ return entry.data;
35840
+ }
35841
+ function setCacheEntry(url2, data) {
35842
+ deleteCacheEntry(url2);
35843
+ const entry = {
35844
+ data,
35845
+ createdAt: Date.now(),
35846
+ inactivityTimer: setTimeout(() => {
35847
+ deleteCacheEntry(url2);
35848
+ }, CACHE_INACTIVITY_TIMEOUT_MS),
35849
+ maxLifetimeTimer: setTimeout(() => {
35850
+ deleteCacheEntry(url2);
35851
+ }, CACHE_MAX_LIFETIME_MS)
35852
+ };
35853
+ cache.set(url2, entry);
35854
+ }
35855
+ function sliceToChunk(fullData, offset, clampedByteCount) {
35856
+ const totalBytes = fullData.length;
35857
+ const start = Math.min(offset, totalBytes);
35858
+ const end = Math.min(start + clampedByteCount, totalBytes);
35859
+ return { data: fullData.slice(start, end), totalBytes };
35860
+ }
35861
+ async function readPdfRange(url2, offset, byteCount) {
35862
+ const normalized = isArxivUrl(url2) ? normalizeArxivUrl(url2) : url2;
35863
+ const clampedByteCount = Math.min(byteCount, MAX_CHUNK_BYTES);
35864
+ if (isFileUrl(normalized)) {
35865
+ const filePath = fileUrlToPath(normalized);
35866
+ const stats = await fs.promises.stat(filePath);
35867
+ const totalBytes2 = stats.size;
35868
+ const start = Math.min(offset, totalBytes2);
35869
+ const end = Math.min(start + clampedByteCount, totalBytes2);
35870
+ if (start >= totalBytes2) {
35871
+ return { data: new Uint8Array(0), totalBytes: totalBytes2 };
35872
+ }
35873
+ const buffer = Buffer.alloc(end - start);
35874
+ const fd = await fs.promises.open(filePath, "r");
35875
+ try {
35876
+ await fd.read(buffer, 0, end - start, start);
35877
+ } finally {
35878
+ await fd.close();
35879
+ }
35880
+ return { data: new Uint8Array(buffer), totalBytes: totalBytes2 };
35857
35881
  }
35858
- });
35859
- if (!response.ok && response.status !== 206) {
35860
- throw new Error(`Range request failed: ${response.status} ${response.statusText}`);
35882
+ const cached2 = getCacheEntry(normalized);
35883
+ if (cached2) {
35884
+ return sliceToChunk(cached2, offset, clampedByteCount);
35885
+ }
35886
+ let response = await fetch(normalized, {
35887
+ headers: {
35888
+ Range: `bytes=${offset}-${offset + clampedByteCount - 1}`
35889
+ }
35890
+ });
35891
+ if (!response.ok && response.status !== 206) {
35892
+ response = await fetch(normalized);
35893
+ if (!response.ok) {
35894
+ throw new Error(`Failed to fetch PDF: ${response.status} ${response.statusText}`);
35895
+ }
35896
+ }
35897
+ if (response.status === 200) {
35898
+ const contentLength = response.headers.get("content-length");
35899
+ if (contentLength) {
35900
+ const declaredSize = parseInt(contentLength, 10);
35901
+ if (declaredSize > CACHE_MAX_PDF_SIZE_BYTES) {
35902
+ throw new Error(`PDF too large to cache: ${declaredSize} bytes exceeds ${CACHE_MAX_PDF_SIZE_BYTES} byte limit`);
35903
+ }
35904
+ }
35905
+ const fullData = new Uint8Array(await response.arrayBuffer());
35906
+ if (fullData.length > CACHE_MAX_PDF_SIZE_BYTES) {
35907
+ throw new Error(`PDF too large to cache: ${fullData.length} bytes exceeds ${CACHE_MAX_PDF_SIZE_BYTES} byte limit`);
35908
+ }
35909
+ setCacheEntry(normalized, fullData);
35910
+ return sliceToChunk(fullData, offset, clampedByteCount);
35911
+ }
35912
+ const contentRange = response.headers.get("content-range");
35913
+ let totalBytes = 0;
35914
+ if (contentRange) {
35915
+ const match = contentRange.match(/bytes \d+-\d+\/(\d+)/);
35916
+ if (match) {
35917
+ totalBytes = parseInt(match[1], 10);
35918
+ }
35919
+ }
35920
+ const data = new Uint8Array(await response.arrayBuffer());
35921
+ return { data, totalBytes };
35861
35922
  }
35862
- const contentRange = response.headers.get("content-range");
35863
- let totalBytes = 0;
35864
- if (contentRange) {
35865
- const match = contentRange.match(/bytes \d+-\d+\/(\d+)/);
35866
- if (match) {
35867
- totalBytes = parseInt(match[1], 10);
35923
+ return {
35924
+ readPdfRange,
35925
+ getCacheSize: () => cache.size,
35926
+ clearCache: () => {
35927
+ for (const url2 of [...cache.keys()]) {
35928
+ deleteCacheEntry(url2);
35929
+ }
35930
+ }
35931
+ };
35932
+ }
35933
+ async function refreshRoots(server) {
35934
+ if (!server.getClientCapabilities()?.roots)
35935
+ return;
35936
+ try {
35937
+ const { roots } = await server.listRoots();
35938
+ allowedLocalDirs.clear();
35939
+ for (const root of roots) {
35940
+ if (root.uri.startsWith("file://")) {
35941
+ const dir = fileUrlToPath(root.uri);
35942
+ const resolved = path.resolve(dir);
35943
+ try {
35944
+ if (fs.statSync(resolved).isDirectory()) {
35945
+ allowedLocalDirs.add(resolved);
35946
+ console.error(`[pdf-server] Root directory allowed: ${resolved}`);
35947
+ }
35948
+ } catch {}
35949
+ }
35868
35950
  }
35951
+ } catch (err) {
35952
+ console.error(`[pdf-server] Failed to list roots: ${err instanceof Error ? err.message : err}`);
35869
35953
  }
35870
- const data = new Uint8Array(await response.arrayBuffer());
35871
- return { data, totalBytes };
35872
35954
  }
35873
35955
  function createServer() {
35874
35956
  const server = new McpServer({ name: "PDF Server", version: "2.0.0" });
35957
+ server.server.oninitialized = () => {
35958
+ refreshRoots(server.server);
35959
+ };
35960
+ server.server.setNotificationHandler(RootsListChangedNotificationSchema, async () => {
35961
+ await refreshRoots(server.server);
35962
+ });
35963
+ const { readPdfRange } = createPdfCache();
35875
35964
  server.tool("list_pdfs", "List available PDFs that can be displayed", {}, async () => {
35876
35965
  const pdfs = [];
35877
35966
  for (const filePath of allowedLocalFiles) {
35878
35967
  pdfs.push({ url: pathToFileUrl(filePath), type: "local" });
35879
35968
  }
35880
- const text = pdfs.length > 0 ? `Available PDFs:
35969
+ const parts = [];
35970
+ if (pdfs.length > 0) {
35971
+ parts.push(`Available PDFs:
35881
35972
  ${pdfs.map((p2) => `- ${p2.url} (${p2.type})`).join(`
35973
+ `)}`);
35974
+ }
35975
+ if (allowedLocalDirs.size > 0) {
35976
+ parts.push(`Allowed local directories (from client roots):
35977
+ ${[...allowedLocalDirs].map((d2) => `- ${d2}`).join(`
35882
35978
  `)}
35883
-
35884
- Remote PDFs from ${[...allowedRemoteOrigins].join(", ")} can also be loaded dynamically.` : `No local PDFs configured. Remote PDFs from ${[...allowedRemoteOrigins].join(", ")} can be loaded dynamically.`;
35979
+ Any PDF file under these directories can be displayed.`);
35980
+ }
35981
+ parts.push(`Any remote PDF accessible via HTTPS can also be loaded dynamically.`);
35885
35982
  return {
35886
- content: [{ type: "text", text }],
35983
+ content: [{ type: "text", text: parts.join(`
35984
+
35985
+ `) }],
35887
35986
  structuredContent: {
35888
35987
  localFiles: pdfs.filter((p2) => p2.type === "local").map((p2) => p2.url),
35889
- allowedOrigins: [...allowedRemoteOrigins]
35988
+ allowedDirectories: [...allowedLocalDirs]
35890
35989
  }
35891
35990
  };
35892
35991
  });
@@ -35948,21 +36047,22 @@ Remote PDFs from ${[...allowedRemoteOrigins].join(", ")} can also be loaded dyna
35948
36047
  };
35949
36048
  }
35950
36049
  });
35951
- const allowedDomains = [...allowedRemoteOrigins].map((origin) => origin.replace(/^https?:\/\/(www\.)?/, "")).join(", ");
35952
36050
  hk(server, "display_pdf", {
35953
36051
  title: "Display PDF",
35954
36052
  description: `Display an interactive PDF viewer.
35955
36053
 
35956
36054
  Accepts:
35957
36055
  - Local files explicitly added to the server (use list_pdfs to see available files)
35958
- - Remote PDFs from: ${allowedDomains}`,
36056
+ - Local files under directories provided by the client as MCP roots
36057
+ - Any remote PDF accessible via HTTPS`,
35959
36058
  inputSchema: {
35960
36059
  url: exports_external.string().default(DEFAULT_PDF).describe("PDF URL"),
35961
36060
  page: exports_external.number().min(1).default(1).describe("Initial page")
35962
36061
  },
35963
36062
  outputSchema: exports_external.object({
35964
36063
  url: exports_external.string(),
35965
- initialPage: exports_external.number()
36064
+ initialPage: exports_external.number(),
36065
+ totalBytes: exports_external.number()
35966
36066
  }),
35967
36067
  _meta: { ui: { resourceUri: RESOURCE_URI } }
35968
36068
  }, async ({ url: url2, page }) => {
@@ -35974,11 +36074,13 @@ Accepts:
35974
36074
  isError: true
35975
36075
  };
35976
36076
  }
36077
+ const { totalBytes } = await readPdfRange(normalized, 0, 1);
35977
36078
  return {
35978
36079
  content: [{ type: "text", text: `Displaying PDF: ${normalized}` }],
35979
36080
  structuredContent: {
35980
36081
  url: normalized,
35981
- initialPage: page
36082
+ initialPage: page,
36083
+ totalBytes
35982
36084
  },
35983
36085
  _meta: {
35984
36086
  viewUUID: randomUUID()
@@ -35997,16 +36099,19 @@ Accepts:
35997
36099
  }
35998
36100
  export {
35999
36101
  validateUrl,
36000
- readPdfRange,
36001
36102
  pathToFileUrl,
36002
36103
  normalizeArxivUrl,
36003
36104
  isFileUrl,
36004
36105
  isArxivUrl,
36005
36106
  fileUrlToPath,
36006
36107
  createServer,
36007
- allowedRemoteOrigins,
36108
+ createPdfCache,
36008
36109
  allowedLocalFiles,
36110
+ allowedLocalDirs,
36009
36111
  RESOURCE_URI,
36010
36112
  MAX_CHUNK_BYTES,
36011
- DEFAULT_PDF
36113
+ DEFAULT_PDF,
36114
+ CACHE_MAX_PDF_SIZE_BYTES,
36115
+ CACHE_MAX_LIFETIME_MS,
36116
+ CACHE_INACTIVITY_TIMEOUT_MS
36012
36117
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@modelcontextprotocol/server-pdf",
3
- "version": "1.0.1",
3
+ "version": "1.1.1",
4
4
  "type": "module",
5
5
  "description": "MCP server for loading and extracting text from PDF files with chunked pagination and interactive viewer",
6
6
  "repository": {
@@ -18,7 +18,7 @@
18
18
  "watch": "cross-env INPUT=mcp-app.html vite build --watch",
19
19
  "serve": "bun --watch main.ts",
20
20
  "start": "cross-env NODE_ENV=development npm run build && npm run serve",
21
- "dev": "cross-env NODE_ENV=development concurrently 'npm run watch' 'npm run serve'",
21
+ "dev": "cross-env NODE_ENV=development concurrently \"npm run watch\" \"npm run serve\"",
22
22
  "prepublishOnly": "npm run build"
23
23
  },
24
24
  "dependencies": {