@modelcontextprotocol/server-pdf 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/server.d.ts CHANGED
@@ -13,10 +13,18 @@ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
13
13
  export declare const DEFAULT_PDF = "https://arxiv.org/pdf/1706.03762";
14
14
  export declare const MAX_CHUNK_BYTES: number;
15
15
  export declare const RESOURCE_URI = "ui://pdf-viewer/mcp-app.html";
16
+ /** Inactivity timeout: clear cache entry if not accessed for this long */
17
+ export declare const CACHE_INACTIVITY_TIMEOUT_MS = 10000;
18
+ /** Max lifetime: clear cache entry after this time regardless of access */
19
+ export declare const CACHE_MAX_LIFETIME_MS = 60000;
20
+ /** Max size for cached PDFs (defensive limit to prevent memory exhaustion) */
21
+ export declare const CACHE_MAX_PDF_SIZE_BYTES: number;
16
22
  /** Allowed remote origins (security allowlist) */
17
23
  export declare const allowedRemoteOrigins: Set<string>;
18
24
  /** Allowed local file paths (populated from CLI args) */
19
25
  export declare const allowedLocalFiles: Set<string>;
26
+ /** Allowed local directories (populated from MCP roots) */
27
+ export declare const allowedLocalDirs: Set<string>;
20
28
  export declare function isFileUrl(url: string): boolean;
21
29
  export declare function isArxivUrl(url: string): boolean;
22
30
  export declare function normalizeArxivUrl(url: string): string;
@@ -26,8 +34,30 @@ export declare function validateUrl(url: string): {
26
34
  valid: boolean;
27
35
  error?: string;
28
36
  };
29
- export declare function readPdfRange(url: string, offset: number, byteCount: number): Promise<{
30
- data: Uint8Array;
31
- totalBytes: number;
32
- }>;
37
+ /**
38
+ * Session-local PDF cache utilities.
39
+ * Each call to createPdfCache() creates an independent cache instance.
40
+ */
41
+ export interface PdfCache {
42
+ /** Read a range of bytes from a PDF, using cache for servers without Range support */
43
+ readPdfRange(url: string, offset: number, byteCount: number): Promise<{
44
+ data: Uint8Array;
45
+ totalBytes: number;
46
+ }>;
47
+ /** Get current number of cached entries */
48
+ getCacheSize(): number;
49
+ /** Clear all cached entries and their timers */
50
+ clearCache(): void;
51
+ }
52
+ /**
53
+ * Creates a session-local PDF cache with automatic timeout-based cleanup.
54
+ *
55
+ * When a remote server returns HTTP 200 (full body) instead of 206 (partial),
56
+ * the full response is cached so subsequent chunk requests don't re-download.
57
+ *
58
+ * Entries are automatically cleared after:
59
+ * - CACHE_INACTIVITY_TIMEOUT_MS of no access (resets on each access)
60
+ * - CACHE_MAX_LIFETIME_MS from creation (absolute timeout)
61
+ */
62
+ export declare function createPdfCache(): PdfCache;
33
63
  export declare function createServer(): McpServer;
package/dist/server.js CHANGED
@@ -35764,6 +35764,9 @@ function ak(r, i, o, t, n) {
35764
35764
  var DEFAULT_PDF = "https://arxiv.org/pdf/1706.03762";
35765
35765
  var MAX_CHUNK_BYTES = 512 * 1024;
35766
35766
  var RESOURCE_URI = "ui://pdf-viewer/mcp-app.html";
35767
+ var CACHE_INACTIVITY_TIMEOUT_MS = 1e4;
35768
+ var CACHE_MAX_LIFETIME_MS = 60000;
35769
+ var CACHE_MAX_PDF_SIZE_BYTES = 50 * 1024 * 1024;
35767
35770
  var allowedRemoteOrigins = new Set([
35768
35771
  "https://agrirxiv.org",
35769
35772
  "https://arxiv.org",
@@ -35783,6 +35786,7 @@ var allowedRemoteOrigins = new Set([
35783
35786
  "https://zenodo.org"
35784
35787
  ]);
35785
35788
  var allowedLocalFiles = new Set;
35789
+ var allowedLocalDirs = new Set;
35786
35790
  var DIST_DIR = import.meta.filename.endsWith(".ts") ? path.join(import.meta.dirname, "dist") : import.meta.dirname;
35787
35791
  function isFileUrl(url2) {
35788
35792
  return url2.startsWith("file://");
@@ -35808,7 +35812,10 @@ function pathToFileUrl(filePath) {
35808
35812
  function validateUrl(url2) {
35809
35813
  if (isFileUrl(url2)) {
35810
35814
  const filePath = fileUrlToPath(url2);
35811
- if (!allowedLocalFiles.has(filePath)) {
35815
+ const resolved = path.resolve(filePath);
35816
+ const exactMatch = allowedLocalFiles.has(filePath);
35817
+ const dirMatch = [...allowedLocalDirs].some((dir) => resolved === dir || resolved.startsWith(dir + path.sep));
35818
+ if (!exactMatch && !dirMatch) {
35812
35819
  return {
35813
35820
  valid: false,
35814
35821
  error: `Local file not in allowed list: ${filePath}`
@@ -35830,62 +35837,174 @@ function validateUrl(url2) {
35830
35837
  return { valid: false, error: `Invalid URL: ${url2}` };
35831
35838
  }
35832
35839
  }
35833
- async function readPdfRange(url2, offset, byteCount) {
35834
- const normalized = isArxivUrl(url2) ? normalizeArxivUrl(url2) : url2;
35835
- const clampedByteCount = Math.min(byteCount, MAX_CHUNK_BYTES);
35836
- if (isFileUrl(normalized)) {
35837
- const filePath = fileUrlToPath(normalized);
35838
- const stats = await fs.promises.stat(filePath);
35839
- const totalBytes2 = stats.size;
35840
- const start = Math.min(offset, totalBytes2);
35841
- const end = Math.min(start + clampedByteCount, totalBytes2);
35842
- if (start >= totalBytes2) {
35843
- return { data: new Uint8Array(0), totalBytes: totalBytes2 };
35844
- }
35845
- const buffer = Buffer.alloc(end - start);
35846
- const fd = await fs.promises.open(filePath, "r");
35847
- try {
35848
- await fd.read(buffer, 0, end - start, start);
35849
- } finally {
35850
- await fd.close();
35840
+ function createPdfCache() {
35841
+ const cache = new Map;
35842
+ function deleteCacheEntry(url2) {
35843
+ const entry = cache.get(url2);
35844
+ if (entry) {
35845
+ clearTimeout(entry.inactivityTimer);
35846
+ clearTimeout(entry.maxLifetimeTimer);
35847
+ cache.delete(url2);
35851
35848
  }
35852
- return { data: new Uint8Array(buffer), totalBytes: totalBytes2 };
35853
35849
  }
35854
- const response = await fetch(normalized, {
35855
- headers: {
35856
- Range: `bytes=${offset}-${offset + clampedByteCount - 1}`
35850
+ function getCacheEntry(url2) {
35851
+ const entry = cache.get(url2);
35852
+ if (!entry)
35853
+ return;
35854
+ clearTimeout(entry.inactivityTimer);
35855
+ entry.inactivityTimer = setTimeout(() => {
35856
+ deleteCacheEntry(url2);
35857
+ }, CACHE_INACTIVITY_TIMEOUT_MS);
35858
+ return entry.data;
35859
+ }
35860
+ function setCacheEntry(url2, data) {
35861
+ deleteCacheEntry(url2);
35862
+ const entry = {
35863
+ data,
35864
+ createdAt: Date.now(),
35865
+ inactivityTimer: setTimeout(() => {
35866
+ deleteCacheEntry(url2);
35867
+ }, CACHE_INACTIVITY_TIMEOUT_MS),
35868
+ maxLifetimeTimer: setTimeout(() => {
35869
+ deleteCacheEntry(url2);
35870
+ }, CACHE_MAX_LIFETIME_MS)
35871
+ };
35872
+ cache.set(url2, entry);
35873
+ }
35874
+ function sliceToChunk(fullData, offset, clampedByteCount) {
35875
+ const totalBytes = fullData.length;
35876
+ const start = Math.min(offset, totalBytes);
35877
+ const end = Math.min(start + clampedByteCount, totalBytes);
35878
+ return { data: fullData.slice(start, end), totalBytes };
35879
+ }
35880
+ async function readPdfRange(url2, offset, byteCount) {
35881
+ const normalized = isArxivUrl(url2) ? normalizeArxivUrl(url2) : url2;
35882
+ const clampedByteCount = Math.min(byteCount, MAX_CHUNK_BYTES);
35883
+ if (isFileUrl(normalized)) {
35884
+ const filePath = fileUrlToPath(normalized);
35885
+ const stats = await fs.promises.stat(filePath);
35886
+ const totalBytes2 = stats.size;
35887
+ const start = Math.min(offset, totalBytes2);
35888
+ const end = Math.min(start + clampedByteCount, totalBytes2);
35889
+ if (start >= totalBytes2) {
35890
+ return { data: new Uint8Array(0), totalBytes: totalBytes2 };
35891
+ }
35892
+ const buffer = Buffer.alloc(end - start);
35893
+ const fd = await fs.promises.open(filePath, "r");
35894
+ try {
35895
+ await fd.read(buffer, 0, end - start, start);
35896
+ } finally {
35897
+ await fd.close();
35898
+ }
35899
+ return { data: new Uint8Array(buffer), totalBytes: totalBytes2 };
35857
35900
  }
35858
- });
35859
- if (!response.ok && response.status !== 206) {
35860
- throw new Error(`Range request failed: ${response.status} ${response.statusText}`);
35901
+ const cached2 = getCacheEntry(normalized);
35902
+ if (cached2) {
35903
+ return sliceToChunk(cached2, offset, clampedByteCount);
35904
+ }
35905
+ let response = await fetch(normalized, {
35906
+ headers: {
35907
+ Range: `bytes=${offset}-${offset + clampedByteCount - 1}`
35908
+ }
35909
+ });
35910
+ if (!response.ok && response.status !== 206) {
35911
+ response = await fetch(normalized);
35912
+ if (!response.ok) {
35913
+ throw new Error(`Failed to fetch PDF: ${response.status} ${response.statusText}`);
35914
+ }
35915
+ }
35916
+ if (response.status === 200) {
35917
+ const contentLength = response.headers.get("content-length");
35918
+ if (contentLength) {
35919
+ const declaredSize = parseInt(contentLength, 10);
35920
+ if (declaredSize > CACHE_MAX_PDF_SIZE_BYTES) {
35921
+ throw new Error(`PDF too large to cache: ${declaredSize} bytes exceeds ${CACHE_MAX_PDF_SIZE_BYTES} byte limit`);
35922
+ }
35923
+ }
35924
+ const fullData = new Uint8Array(await response.arrayBuffer());
35925
+ if (fullData.length > CACHE_MAX_PDF_SIZE_BYTES) {
35926
+ throw new Error(`PDF too large to cache: ${fullData.length} bytes exceeds ${CACHE_MAX_PDF_SIZE_BYTES} byte limit`);
35927
+ }
35928
+ setCacheEntry(normalized, fullData);
35929
+ return sliceToChunk(fullData, offset, clampedByteCount);
35930
+ }
35931
+ const contentRange = response.headers.get("content-range");
35932
+ let totalBytes = 0;
35933
+ if (contentRange) {
35934
+ const match = contentRange.match(/bytes \d+-\d+\/(\d+)/);
35935
+ if (match) {
35936
+ totalBytes = parseInt(match[1], 10);
35937
+ }
35938
+ }
35939
+ const data = new Uint8Array(await response.arrayBuffer());
35940
+ return { data, totalBytes };
35861
35941
  }
35862
- const contentRange = response.headers.get("content-range");
35863
- let totalBytes = 0;
35864
- if (contentRange) {
35865
- const match = contentRange.match(/bytes \d+-\d+\/(\d+)/);
35866
- if (match) {
35867
- totalBytes = parseInt(match[1], 10);
35942
+ return {
35943
+ readPdfRange,
35944
+ getCacheSize: () => cache.size,
35945
+ clearCache: () => {
35946
+ for (const url2 of [...cache.keys()]) {
35947
+ deleteCacheEntry(url2);
35948
+ }
35868
35949
  }
35950
+ };
35951
+ }
35952
+ async function refreshRoots(server) {
35953
+ if (!server.getClientCapabilities()?.roots)
35954
+ return;
35955
+ try {
35956
+ const { roots } = await server.listRoots();
35957
+ allowedLocalDirs.clear();
35958
+ for (const root of roots) {
35959
+ if (root.uri.startsWith("file://")) {
35960
+ const dir = fileUrlToPath(root.uri);
35961
+ const resolved = path.resolve(dir);
35962
+ try {
35963
+ if (fs.statSync(resolved).isDirectory()) {
35964
+ allowedLocalDirs.add(resolved);
35965
+ console.error(`[pdf-server] Root directory allowed: ${resolved}`);
35966
+ }
35967
+ } catch {}
35968
+ }
35969
+ }
35970
+ } catch (err) {
35971
+ console.error(`[pdf-server] Failed to list roots: ${err instanceof Error ? err.message : err}`);
35869
35972
  }
35870
- const data = new Uint8Array(await response.arrayBuffer());
35871
- return { data, totalBytes };
35872
35973
  }
35873
35974
  function createServer() {
35874
35975
  const server = new McpServer({ name: "PDF Server", version: "2.0.0" });
35976
+ server.server.oninitialized = () => {
35977
+ refreshRoots(server.server);
35978
+ };
35979
+ server.server.setNotificationHandler(RootsListChangedNotificationSchema, async () => {
35980
+ await refreshRoots(server.server);
35981
+ });
35982
+ const { readPdfRange } = createPdfCache();
35875
35983
  server.tool("list_pdfs", "List available PDFs that can be displayed", {}, async () => {
35876
35984
  const pdfs = [];
35877
35985
  for (const filePath of allowedLocalFiles) {
35878
35986
  pdfs.push({ url: pathToFileUrl(filePath), type: "local" });
35879
35987
  }
35880
- const text = pdfs.length > 0 ? `Available PDFs:
35988
+ const parts = [];
35989
+ if (pdfs.length > 0) {
35990
+ parts.push(`Available PDFs:
35881
35991
  ${pdfs.map((p2) => `- ${p2.url} (${p2.type})`).join(`
35992
+ `)}`);
35993
+ }
35994
+ if (allowedLocalDirs.size > 0) {
35995
+ parts.push(`Allowed local directories (from client roots):
35996
+ ${[...allowedLocalDirs].map((d2) => `- ${d2}`).join(`
35882
35997
  `)}
35883
-
35884
- Remote PDFs from ${[...allowedRemoteOrigins].join(", ")} can also be loaded dynamically.` : `No local PDFs configured. Remote PDFs from ${[...allowedRemoteOrigins].join(", ")} can be loaded dynamically.`;
35998
+ Any PDF file under these directories can be displayed.`);
35999
+ }
36000
+ parts.push(`Remote PDFs from ${[...allowedRemoteOrigins].join(", ")} can also be loaded dynamically.`);
35885
36001
  return {
35886
- content: [{ type: "text", text }],
36002
+ content: [{ type: "text", text: parts.join(`
36003
+
36004
+ `) }],
35887
36005
  structuredContent: {
35888
36006
  localFiles: pdfs.filter((p2) => p2.type === "local").map((p2) => p2.url),
36007
+ allowedDirectories: [...allowedLocalDirs],
35889
36008
  allowedOrigins: [...allowedRemoteOrigins]
35890
36009
  }
35891
36010
  };
@@ -35955,6 +36074,7 @@ Remote PDFs from ${[...allowedRemoteOrigins].join(", ")} can also be loaded dyna
35955
36074
 
35956
36075
  Accepts:
35957
36076
  - Local files explicitly added to the server (use list_pdfs to see available files)
36077
+ - Local files under directories provided by the client as MCP roots
35958
36078
  - Remote PDFs from: ${allowedDomains}`,
35959
36079
  inputSchema: {
35960
36080
  url: exports_external.string().default(DEFAULT_PDF).describe("PDF URL"),
@@ -35962,7 +36082,8 @@ Accepts:
35962
36082
  },
35963
36083
  outputSchema: exports_external.object({
35964
36084
  url: exports_external.string(),
35965
- initialPage: exports_external.number()
36085
+ initialPage: exports_external.number(),
36086
+ totalBytes: exports_external.number()
35966
36087
  }),
35967
36088
  _meta: { ui: { resourceUri: RESOURCE_URI } }
35968
36089
  }, async ({ url: url2, page }) => {
@@ -35974,11 +36095,13 @@ Accepts:
35974
36095
  isError: true
35975
36096
  };
35976
36097
  }
36098
+ const { totalBytes } = await readPdfRange(normalized, 0, 1);
35977
36099
  return {
35978
36100
  content: [{ type: "text", text: `Displaying PDF: ${normalized}` }],
35979
36101
  structuredContent: {
35980
36102
  url: normalized,
35981
- initialPage: page
36103
+ initialPage: page,
36104
+ totalBytes
35982
36105
  },
35983
36106
  _meta: {
35984
36107
  viewUUID: randomUUID()
@@ -35997,16 +36120,20 @@ Accepts:
35997
36120
  }
35998
36121
  export {
35999
36122
  validateUrl,
36000
- readPdfRange,
36001
36123
  pathToFileUrl,
36002
36124
  normalizeArxivUrl,
36003
36125
  isFileUrl,
36004
36126
  isArxivUrl,
36005
36127
  fileUrlToPath,
36006
36128
  createServer,
36129
+ createPdfCache,
36007
36130
  allowedRemoteOrigins,
36008
36131
  allowedLocalFiles,
36132
+ allowedLocalDirs,
36009
36133
  RESOURCE_URI,
36010
36134
  MAX_CHUNK_BYTES,
36011
- DEFAULT_PDF
36135
+ DEFAULT_PDF,
36136
+ CACHE_MAX_PDF_SIZE_BYTES,
36137
+ CACHE_MAX_LIFETIME_MS,
36138
+ CACHE_INACTIVITY_TIMEOUT_MS
36012
36139
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@modelcontextprotocol/server-pdf",
3
- "version": "1.0.1",
3
+ "version": "1.1.0",
4
4
  "type": "module",
5
5
  "description": "MCP server for loading and extracting text from PDF files with chunked pagination and interactive viewer",
6
6
  "repository": {
@@ -18,7 +18,7 @@
18
18
  "watch": "cross-env INPUT=mcp-app.html vite build --watch",
19
19
  "serve": "bun --watch main.ts",
20
20
  "start": "cross-env NODE_ENV=development npm run build && npm run serve",
21
- "dev": "cross-env NODE_ENV=development concurrently 'npm run watch' 'npm run serve'",
21
+ "dev": "cross-env NODE_ENV=development concurrently \"npm run watch\" \"npm run serve\"",
22
22
  "prepublishOnly": "npm run build"
23
23
  },
24
24
  "dependencies": {