@modelcontextprotocol/server-pdf 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -1
- package/dist/mcp-app.html +98 -48
- package/dist/server.d.ts +34 -4
- package/dist/server.js +169 -42
- package/package.json +2 -2
package/dist/server.d.ts
CHANGED
|
@@ -13,10 +13,18 @@ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
|
13
13
|
export declare const DEFAULT_PDF = "https://arxiv.org/pdf/1706.03762";
|
|
14
14
|
export declare const MAX_CHUNK_BYTES: number;
|
|
15
15
|
export declare const RESOURCE_URI = "ui://pdf-viewer/mcp-app.html";
|
|
16
|
+
/** Inactivity timeout: clear cache entry if not accessed for this long */
|
|
17
|
+
export declare const CACHE_INACTIVITY_TIMEOUT_MS = 10000;
|
|
18
|
+
/** Max lifetime: clear cache entry after this time regardless of access */
|
|
19
|
+
export declare const CACHE_MAX_LIFETIME_MS = 60000;
|
|
20
|
+
/** Max size for cached PDFs (defensive limit to prevent memory exhaustion) */
|
|
21
|
+
export declare const CACHE_MAX_PDF_SIZE_BYTES: number;
|
|
16
22
|
/** Allowed remote origins (security allowlist) */
|
|
17
23
|
export declare const allowedRemoteOrigins: Set<string>;
|
|
18
24
|
/** Allowed local file paths (populated from CLI args) */
|
|
19
25
|
export declare const allowedLocalFiles: Set<string>;
|
|
26
|
+
/** Allowed local directories (populated from MCP roots) */
|
|
27
|
+
export declare const allowedLocalDirs: Set<string>;
|
|
20
28
|
export declare function isFileUrl(url: string): boolean;
|
|
21
29
|
export declare function isArxivUrl(url: string): boolean;
|
|
22
30
|
export declare function normalizeArxivUrl(url: string): string;
|
|
@@ -26,8 +34,30 @@ export declare function validateUrl(url: string): {
|
|
|
26
34
|
valid: boolean;
|
|
27
35
|
error?: string;
|
|
28
36
|
};
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
37
|
+
/**
|
|
38
|
+
* Session-local PDF cache utilities.
|
|
39
|
+
* Each call to createPdfCache() creates an independent cache instance.
|
|
40
|
+
*/
|
|
41
|
+
export interface PdfCache {
|
|
42
|
+
/** Read a range of bytes from a PDF, using cache for servers without Range support */
|
|
43
|
+
readPdfRange(url: string, offset: number, byteCount: number): Promise<{
|
|
44
|
+
data: Uint8Array;
|
|
45
|
+
totalBytes: number;
|
|
46
|
+
}>;
|
|
47
|
+
/** Get current number of cached entries */
|
|
48
|
+
getCacheSize(): number;
|
|
49
|
+
/** Clear all cached entries and their timers */
|
|
50
|
+
clearCache(): void;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Creates a session-local PDF cache with automatic timeout-based cleanup.
|
|
54
|
+
*
|
|
55
|
+
* When a remote server returns HTTP 200 (full body) instead of 206 (partial),
|
|
56
|
+
* the full response is cached so subsequent chunk requests don't re-download.
|
|
57
|
+
*
|
|
58
|
+
* Entries are automatically cleared after:
|
|
59
|
+
* - CACHE_INACTIVITY_TIMEOUT_MS of no access (resets on each access)
|
|
60
|
+
* - CACHE_MAX_LIFETIME_MS from creation (absolute timeout)
|
|
61
|
+
*/
|
|
62
|
+
export declare function createPdfCache(): PdfCache;
|
|
33
63
|
export declare function createServer(): McpServer;
|
package/dist/server.js
CHANGED
|
@@ -35764,6 +35764,9 @@ function ak(r, i, o, t, n) {
|
|
|
35764
35764
|
var DEFAULT_PDF = "https://arxiv.org/pdf/1706.03762";
|
|
35765
35765
|
var MAX_CHUNK_BYTES = 512 * 1024;
|
|
35766
35766
|
var RESOURCE_URI = "ui://pdf-viewer/mcp-app.html";
|
|
35767
|
+
var CACHE_INACTIVITY_TIMEOUT_MS = 1e4;
|
|
35768
|
+
var CACHE_MAX_LIFETIME_MS = 60000;
|
|
35769
|
+
var CACHE_MAX_PDF_SIZE_BYTES = 50 * 1024 * 1024;
|
|
35767
35770
|
var allowedRemoteOrigins = new Set([
|
|
35768
35771
|
"https://agrirxiv.org",
|
|
35769
35772
|
"https://arxiv.org",
|
|
@@ -35783,6 +35786,7 @@ var allowedRemoteOrigins = new Set([
|
|
|
35783
35786
|
"https://zenodo.org"
|
|
35784
35787
|
]);
|
|
35785
35788
|
var allowedLocalFiles = new Set;
|
|
35789
|
+
var allowedLocalDirs = new Set;
|
|
35786
35790
|
var DIST_DIR = import.meta.filename.endsWith(".ts") ? path.join(import.meta.dirname, "dist") : import.meta.dirname;
|
|
35787
35791
|
function isFileUrl(url2) {
|
|
35788
35792
|
return url2.startsWith("file://");
|
|
@@ -35808,7 +35812,10 @@ function pathToFileUrl(filePath) {
|
|
|
35808
35812
|
function validateUrl(url2) {
|
|
35809
35813
|
if (isFileUrl(url2)) {
|
|
35810
35814
|
const filePath = fileUrlToPath(url2);
|
|
35811
|
-
|
|
35815
|
+
const resolved = path.resolve(filePath);
|
|
35816
|
+
const exactMatch = allowedLocalFiles.has(filePath);
|
|
35817
|
+
const dirMatch = [...allowedLocalDirs].some((dir) => resolved === dir || resolved.startsWith(dir + path.sep));
|
|
35818
|
+
if (!exactMatch && !dirMatch) {
|
|
35812
35819
|
return {
|
|
35813
35820
|
valid: false,
|
|
35814
35821
|
error: `Local file not in allowed list: ${filePath}`
|
|
@@ -35830,62 +35837,174 @@ function validateUrl(url2) {
|
|
|
35830
35837
|
return { valid: false, error: `Invalid URL: ${url2}` };
|
|
35831
35838
|
}
|
|
35832
35839
|
}
|
|
35833
|
-
|
|
35834
|
-
const
|
|
35835
|
-
|
|
35836
|
-
|
|
35837
|
-
|
|
35838
|
-
|
|
35839
|
-
|
|
35840
|
-
|
|
35841
|
-
const end = Math.min(start + clampedByteCount, totalBytes2);
|
|
35842
|
-
if (start >= totalBytes2) {
|
|
35843
|
-
return { data: new Uint8Array(0), totalBytes: totalBytes2 };
|
|
35844
|
-
}
|
|
35845
|
-
const buffer = Buffer.alloc(end - start);
|
|
35846
|
-
const fd = await fs.promises.open(filePath, "r");
|
|
35847
|
-
try {
|
|
35848
|
-
await fd.read(buffer, 0, end - start, start);
|
|
35849
|
-
} finally {
|
|
35850
|
-
await fd.close();
|
|
35840
|
+
function createPdfCache() {
|
|
35841
|
+
const cache = new Map;
|
|
35842
|
+
function deleteCacheEntry(url2) {
|
|
35843
|
+
const entry = cache.get(url2);
|
|
35844
|
+
if (entry) {
|
|
35845
|
+
clearTimeout(entry.inactivityTimer);
|
|
35846
|
+
clearTimeout(entry.maxLifetimeTimer);
|
|
35847
|
+
cache.delete(url2);
|
|
35851
35848
|
}
|
|
35852
|
-
return { data: new Uint8Array(buffer), totalBytes: totalBytes2 };
|
|
35853
35849
|
}
|
|
35854
|
-
|
|
35855
|
-
|
|
35856
|
-
|
|
35850
|
+
function getCacheEntry(url2) {
|
|
35851
|
+
const entry = cache.get(url2);
|
|
35852
|
+
if (!entry)
|
|
35853
|
+
return;
|
|
35854
|
+
clearTimeout(entry.inactivityTimer);
|
|
35855
|
+
entry.inactivityTimer = setTimeout(() => {
|
|
35856
|
+
deleteCacheEntry(url2);
|
|
35857
|
+
}, CACHE_INACTIVITY_TIMEOUT_MS);
|
|
35858
|
+
return entry.data;
|
|
35859
|
+
}
|
|
35860
|
+
function setCacheEntry(url2, data) {
|
|
35861
|
+
deleteCacheEntry(url2);
|
|
35862
|
+
const entry = {
|
|
35863
|
+
data,
|
|
35864
|
+
createdAt: Date.now(),
|
|
35865
|
+
inactivityTimer: setTimeout(() => {
|
|
35866
|
+
deleteCacheEntry(url2);
|
|
35867
|
+
}, CACHE_INACTIVITY_TIMEOUT_MS),
|
|
35868
|
+
maxLifetimeTimer: setTimeout(() => {
|
|
35869
|
+
deleteCacheEntry(url2);
|
|
35870
|
+
}, CACHE_MAX_LIFETIME_MS)
|
|
35871
|
+
};
|
|
35872
|
+
cache.set(url2, entry);
|
|
35873
|
+
}
|
|
35874
|
+
function sliceToChunk(fullData, offset, clampedByteCount) {
|
|
35875
|
+
const totalBytes = fullData.length;
|
|
35876
|
+
const start = Math.min(offset, totalBytes);
|
|
35877
|
+
const end = Math.min(start + clampedByteCount, totalBytes);
|
|
35878
|
+
return { data: fullData.slice(start, end), totalBytes };
|
|
35879
|
+
}
|
|
35880
|
+
async function readPdfRange(url2, offset, byteCount) {
|
|
35881
|
+
const normalized = isArxivUrl(url2) ? normalizeArxivUrl(url2) : url2;
|
|
35882
|
+
const clampedByteCount = Math.min(byteCount, MAX_CHUNK_BYTES);
|
|
35883
|
+
if (isFileUrl(normalized)) {
|
|
35884
|
+
const filePath = fileUrlToPath(normalized);
|
|
35885
|
+
const stats = await fs.promises.stat(filePath);
|
|
35886
|
+
const totalBytes2 = stats.size;
|
|
35887
|
+
const start = Math.min(offset, totalBytes2);
|
|
35888
|
+
const end = Math.min(start + clampedByteCount, totalBytes2);
|
|
35889
|
+
if (start >= totalBytes2) {
|
|
35890
|
+
return { data: new Uint8Array(0), totalBytes: totalBytes2 };
|
|
35891
|
+
}
|
|
35892
|
+
const buffer = Buffer.alloc(end - start);
|
|
35893
|
+
const fd = await fs.promises.open(filePath, "r");
|
|
35894
|
+
try {
|
|
35895
|
+
await fd.read(buffer, 0, end - start, start);
|
|
35896
|
+
} finally {
|
|
35897
|
+
await fd.close();
|
|
35898
|
+
}
|
|
35899
|
+
return { data: new Uint8Array(buffer), totalBytes: totalBytes2 };
|
|
35857
35900
|
}
|
|
35858
|
-
|
|
35859
|
-
|
|
35860
|
-
|
|
35901
|
+
const cached2 = getCacheEntry(normalized);
|
|
35902
|
+
if (cached2) {
|
|
35903
|
+
return sliceToChunk(cached2, offset, clampedByteCount);
|
|
35904
|
+
}
|
|
35905
|
+
let response = await fetch(normalized, {
|
|
35906
|
+
headers: {
|
|
35907
|
+
Range: `bytes=${offset}-${offset + clampedByteCount - 1}`
|
|
35908
|
+
}
|
|
35909
|
+
});
|
|
35910
|
+
if (!response.ok && response.status !== 206) {
|
|
35911
|
+
response = await fetch(normalized);
|
|
35912
|
+
if (!response.ok) {
|
|
35913
|
+
throw new Error(`Failed to fetch PDF: ${response.status} ${response.statusText}`);
|
|
35914
|
+
}
|
|
35915
|
+
}
|
|
35916
|
+
if (response.status === 200) {
|
|
35917
|
+
const contentLength = response.headers.get("content-length");
|
|
35918
|
+
if (contentLength) {
|
|
35919
|
+
const declaredSize = parseInt(contentLength, 10);
|
|
35920
|
+
if (declaredSize > CACHE_MAX_PDF_SIZE_BYTES) {
|
|
35921
|
+
throw new Error(`PDF too large to cache: ${declaredSize} bytes exceeds ${CACHE_MAX_PDF_SIZE_BYTES} byte limit`);
|
|
35922
|
+
}
|
|
35923
|
+
}
|
|
35924
|
+
const fullData = new Uint8Array(await response.arrayBuffer());
|
|
35925
|
+
if (fullData.length > CACHE_MAX_PDF_SIZE_BYTES) {
|
|
35926
|
+
throw new Error(`PDF too large to cache: ${fullData.length} bytes exceeds ${CACHE_MAX_PDF_SIZE_BYTES} byte limit`);
|
|
35927
|
+
}
|
|
35928
|
+
setCacheEntry(normalized, fullData);
|
|
35929
|
+
return sliceToChunk(fullData, offset, clampedByteCount);
|
|
35930
|
+
}
|
|
35931
|
+
const contentRange = response.headers.get("content-range");
|
|
35932
|
+
let totalBytes = 0;
|
|
35933
|
+
if (contentRange) {
|
|
35934
|
+
const match = contentRange.match(/bytes \d+-\d+\/(\d+)/);
|
|
35935
|
+
if (match) {
|
|
35936
|
+
totalBytes = parseInt(match[1], 10);
|
|
35937
|
+
}
|
|
35938
|
+
}
|
|
35939
|
+
const data = new Uint8Array(await response.arrayBuffer());
|
|
35940
|
+
return { data, totalBytes };
|
|
35861
35941
|
}
|
|
35862
|
-
|
|
35863
|
-
|
|
35864
|
-
|
|
35865
|
-
|
|
35866
|
-
|
|
35867
|
-
|
|
35942
|
+
return {
|
|
35943
|
+
readPdfRange,
|
|
35944
|
+
getCacheSize: () => cache.size,
|
|
35945
|
+
clearCache: () => {
|
|
35946
|
+
for (const url2 of [...cache.keys()]) {
|
|
35947
|
+
deleteCacheEntry(url2);
|
|
35948
|
+
}
|
|
35868
35949
|
}
|
|
35950
|
+
};
|
|
35951
|
+
}
|
|
35952
|
+
async function refreshRoots(server) {
|
|
35953
|
+
if (!server.getClientCapabilities()?.roots)
|
|
35954
|
+
return;
|
|
35955
|
+
try {
|
|
35956
|
+
const { roots } = await server.listRoots();
|
|
35957
|
+
allowedLocalDirs.clear();
|
|
35958
|
+
for (const root of roots) {
|
|
35959
|
+
if (root.uri.startsWith("file://")) {
|
|
35960
|
+
const dir = fileUrlToPath(root.uri);
|
|
35961
|
+
const resolved = path.resolve(dir);
|
|
35962
|
+
try {
|
|
35963
|
+
if (fs.statSync(resolved).isDirectory()) {
|
|
35964
|
+
allowedLocalDirs.add(resolved);
|
|
35965
|
+
console.error(`[pdf-server] Root directory allowed: ${resolved}`);
|
|
35966
|
+
}
|
|
35967
|
+
} catch {}
|
|
35968
|
+
}
|
|
35969
|
+
}
|
|
35970
|
+
} catch (err) {
|
|
35971
|
+
console.error(`[pdf-server] Failed to list roots: ${err instanceof Error ? err.message : err}`);
|
|
35869
35972
|
}
|
|
35870
|
-
const data = new Uint8Array(await response.arrayBuffer());
|
|
35871
|
-
return { data, totalBytes };
|
|
35872
35973
|
}
|
|
35873
35974
|
function createServer() {
|
|
35874
35975
|
const server = new McpServer({ name: "PDF Server", version: "2.0.0" });
|
|
35976
|
+
server.server.oninitialized = () => {
|
|
35977
|
+
refreshRoots(server.server);
|
|
35978
|
+
};
|
|
35979
|
+
server.server.setNotificationHandler(RootsListChangedNotificationSchema, async () => {
|
|
35980
|
+
await refreshRoots(server.server);
|
|
35981
|
+
});
|
|
35982
|
+
const { readPdfRange } = createPdfCache();
|
|
35875
35983
|
server.tool("list_pdfs", "List available PDFs that can be displayed", {}, async () => {
|
|
35876
35984
|
const pdfs = [];
|
|
35877
35985
|
for (const filePath of allowedLocalFiles) {
|
|
35878
35986
|
pdfs.push({ url: pathToFileUrl(filePath), type: "local" });
|
|
35879
35987
|
}
|
|
35880
|
-
const
|
|
35988
|
+
const parts = [];
|
|
35989
|
+
if (pdfs.length > 0) {
|
|
35990
|
+
parts.push(`Available PDFs:
|
|
35881
35991
|
${pdfs.map((p2) => `- ${p2.url} (${p2.type})`).join(`
|
|
35992
|
+
`)}`);
|
|
35993
|
+
}
|
|
35994
|
+
if (allowedLocalDirs.size > 0) {
|
|
35995
|
+
parts.push(`Allowed local directories (from client roots):
|
|
35996
|
+
${[...allowedLocalDirs].map((d2) => `- ${d2}`).join(`
|
|
35882
35997
|
`)}
|
|
35883
|
-
|
|
35884
|
-
|
|
35998
|
+
Any PDF file under these directories can be displayed.`);
|
|
35999
|
+
}
|
|
36000
|
+
parts.push(`Remote PDFs from ${[...allowedRemoteOrigins].join(", ")} can also be loaded dynamically.`);
|
|
35885
36001
|
return {
|
|
35886
|
-
content: [{ type: "text", text
|
|
36002
|
+
content: [{ type: "text", text: parts.join(`
|
|
36003
|
+
|
|
36004
|
+
`) }],
|
|
35887
36005
|
structuredContent: {
|
|
35888
36006
|
localFiles: pdfs.filter((p2) => p2.type === "local").map((p2) => p2.url),
|
|
36007
|
+
allowedDirectories: [...allowedLocalDirs],
|
|
35889
36008
|
allowedOrigins: [...allowedRemoteOrigins]
|
|
35890
36009
|
}
|
|
35891
36010
|
};
|
|
@@ -35955,6 +36074,7 @@ Remote PDFs from ${[...allowedRemoteOrigins].join(", ")} can also be loaded dyna
|
|
|
35955
36074
|
|
|
35956
36075
|
Accepts:
|
|
35957
36076
|
- Local files explicitly added to the server (use list_pdfs to see available files)
|
|
36077
|
+
- Local files under directories provided by the client as MCP roots
|
|
35958
36078
|
- Remote PDFs from: ${allowedDomains}`,
|
|
35959
36079
|
inputSchema: {
|
|
35960
36080
|
url: exports_external.string().default(DEFAULT_PDF).describe("PDF URL"),
|
|
@@ -35962,7 +36082,8 @@ Accepts:
|
|
|
35962
36082
|
},
|
|
35963
36083
|
outputSchema: exports_external.object({
|
|
35964
36084
|
url: exports_external.string(),
|
|
35965
|
-
initialPage: exports_external.number()
|
|
36085
|
+
initialPage: exports_external.number(),
|
|
36086
|
+
totalBytes: exports_external.number()
|
|
35966
36087
|
}),
|
|
35967
36088
|
_meta: { ui: { resourceUri: RESOURCE_URI } }
|
|
35968
36089
|
}, async ({ url: url2, page }) => {
|
|
@@ -35974,11 +36095,13 @@ Accepts:
|
|
|
35974
36095
|
isError: true
|
|
35975
36096
|
};
|
|
35976
36097
|
}
|
|
36098
|
+
const { totalBytes } = await readPdfRange(normalized, 0, 1);
|
|
35977
36099
|
return {
|
|
35978
36100
|
content: [{ type: "text", text: `Displaying PDF: ${normalized}` }],
|
|
35979
36101
|
structuredContent: {
|
|
35980
36102
|
url: normalized,
|
|
35981
|
-
initialPage: page
|
|
36103
|
+
initialPage: page,
|
|
36104
|
+
totalBytes
|
|
35982
36105
|
},
|
|
35983
36106
|
_meta: {
|
|
35984
36107
|
viewUUID: randomUUID()
|
|
@@ -35997,16 +36120,20 @@ Accepts:
|
|
|
35997
36120
|
}
|
|
35998
36121
|
export {
|
|
35999
36122
|
validateUrl,
|
|
36000
|
-
readPdfRange,
|
|
36001
36123
|
pathToFileUrl,
|
|
36002
36124
|
normalizeArxivUrl,
|
|
36003
36125
|
isFileUrl,
|
|
36004
36126
|
isArxivUrl,
|
|
36005
36127
|
fileUrlToPath,
|
|
36006
36128
|
createServer,
|
|
36129
|
+
createPdfCache,
|
|
36007
36130
|
allowedRemoteOrigins,
|
|
36008
36131
|
allowedLocalFiles,
|
|
36132
|
+
allowedLocalDirs,
|
|
36009
36133
|
RESOURCE_URI,
|
|
36010
36134
|
MAX_CHUNK_BYTES,
|
|
36011
|
-
DEFAULT_PDF
|
|
36135
|
+
DEFAULT_PDF,
|
|
36136
|
+
CACHE_MAX_PDF_SIZE_BYTES,
|
|
36137
|
+
CACHE_MAX_LIFETIME_MS,
|
|
36138
|
+
CACHE_INACTIVITY_TIMEOUT_MS
|
|
36012
36139
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@modelcontextprotocol/server-pdf",
|
|
3
|
-
"version": "1.0
|
|
3
|
+
"version": "1.1.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "MCP server for loading and extracting text from PDF files with chunked pagination and interactive viewer",
|
|
6
6
|
"repository": {
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
"watch": "cross-env INPUT=mcp-app.html vite build --watch",
|
|
19
19
|
"serve": "bun --watch main.ts",
|
|
20
20
|
"start": "cross-env NODE_ENV=development npm run build && npm run serve",
|
|
21
|
-
"dev": "cross-env NODE_ENV=development concurrently
|
|
21
|
+
"dev": "cross-env NODE_ENV=development concurrently \"npm run watch\" \"npm run serve\"",
|
|
22
22
|
"prepublishOnly": "npm run build"
|
|
23
23
|
},
|
|
24
24
|
"dependencies": {
|