@sylphx/pdf-reader-mcp 2.0.7 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +34 -21
- package/package.json +5 -6
package/dist/index.js
CHANGED
|
@@ -316,6 +316,7 @@ var extractPageContent = async (pdfDocument, pageNum, includeImages, sourceDescr
|
|
|
316
316
|
|
|
317
317
|
// src/pdf/loader.ts
|
|
318
318
|
import fs from "node:fs/promises";
|
|
319
|
+
import { createRequire } from "node:module";
|
|
319
320
|
import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
|
|
320
321
|
|
|
321
322
|
// src/utils/errors.ts
|
|
@@ -341,6 +342,8 @@ var resolvePath = (userPath) => {
|
|
|
341
342
|
|
|
342
343
|
// src/pdf/loader.ts
|
|
343
344
|
var logger3 = createLogger("Loader");
|
|
345
|
+
var require2 = createRequire(import.meta.url);
|
|
346
|
+
var CMAP_URL = require2.resolve("pdfjs-dist/package.json").replace("package.json", "cmaps/");
|
|
344
347
|
var MAX_PDF_SIZE = 100 * 1024 * 1024;
|
|
345
348
|
var loadPdfDocument = async (source, sourceDescription) => {
|
|
346
349
|
let pdfDataSource;
|
|
@@ -370,7 +373,12 @@ var loadPdfDocument = async (source, sourceDescription) => {
|
|
|
370
373
|
}
|
|
371
374
|
throw new PdfError(errorCode, `Failed to prepare PDF source ${sourceDescription}. Reason: ${message}`, { cause: err instanceof Error ? err : undefined });
|
|
372
375
|
}
|
|
373
|
-
const
|
|
376
|
+
const documentParams = pdfDataSource instanceof Uint8Array ? { data: pdfDataSource } : pdfDataSource;
|
|
377
|
+
const loadingTask = getDocument({
|
|
378
|
+
...documentParams,
|
|
379
|
+
cMapUrl: CMAP_URL,
|
|
380
|
+
cMapPacked: true
|
|
381
|
+
});
|
|
374
382
|
try {
|
|
375
383
|
return await loadingTask.promise;
|
|
376
384
|
} catch (err) {
|
|
@@ -455,27 +463,32 @@ var determinePagesToProcess = (targetPages, totalPages, includeFullText) => {
|
|
|
455
463
|
};
|
|
456
464
|
|
|
457
465
|
// src/schemas/readPdf.ts
|
|
458
|
-
import {
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
466
|
+
import {
|
|
467
|
+
array,
|
|
468
|
+
bool,
|
|
469
|
+
description,
|
|
470
|
+
gte,
|
|
471
|
+
int,
|
|
472
|
+
min,
|
|
473
|
+
num,
|
|
474
|
+
object,
|
|
475
|
+
optional,
|
|
476
|
+
str,
|
|
477
|
+
union
|
|
478
|
+
} from "@sylphx/vex";
|
|
479
|
+
var pageSpecifierSchema = union(array(num(int, gte(1))), str(min(1)));
|
|
480
|
+
var pdfSourceSchema = object({
|
|
481
|
+
path: optional(str(min(1), description("Path to the local PDF file (absolute or relative to cwd)."))),
|
|
482
|
+
url: optional(str(min(1), description("URL of the PDF file."))),
|
|
483
|
+
pages: optional(pageSpecifierSchema)
|
|
484
|
+
});
|
|
485
|
+
var readPdfArgsSchema = object({
|
|
486
|
+
sources: array(pdfSourceSchema),
|
|
487
|
+
include_full_text: optional(bool(description("Include the full text content of each PDF (only if 'pages' is not specified for that source)."))),
|
|
488
|
+
include_metadata: optional(bool(description("Include metadata and info objects for each PDF."))),
|
|
489
|
+
include_page_count: optional(bool(description("Include the total number of pages for each PDF."))),
|
|
490
|
+
include_images: optional(bool(description("Extract and include embedded images from the PDF pages as base64-encoded data.")))
|
|
471
491
|
});
|
|
472
|
-
var readPdfArgsSchema = z.object({
|
|
473
|
-
sources: z.array(pdfSourceSchema).min(1).describe("An array of PDF sources to process, each can optionally specify pages."),
|
|
474
|
-
include_full_text: z.boolean().optional().default(false).describe("Include the full text content of each PDF (only if 'pages' is not specified for that source)."),
|
|
475
|
-
include_metadata: z.boolean().optional().default(true).describe("Include metadata and info objects for each PDF."),
|
|
476
|
-
include_page_count: z.boolean().optional().default(true).describe("Include the total number of pages for each PDF."),
|
|
477
|
-
include_images: z.boolean().optional().default(false).describe("Extract and include embedded images from the PDF pages as base64-encoded data.")
|
|
478
|
-
}).strict();
|
|
479
492
|
|
|
480
493
|
// src/handlers/readPdf.ts
|
|
481
494
|
var logger5 = createLogger("ReadPdf");
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sylphx/pdf-reader-mcp",
|
|
3
|
-
"version": "2.0
|
|
3
|
+
"version": "2.1.0",
|
|
4
4
|
"description": "An MCP server providing tools to read PDF files.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -71,18 +71,17 @@
|
|
|
71
71
|
"prepare": "node_modules/.bin/lefthook install || true"
|
|
72
72
|
},
|
|
73
73
|
"dependencies": {
|
|
74
|
-
"@sylphx/mcp-server-sdk": "1.
|
|
74
|
+
"@sylphx/mcp-server-sdk": "^2.1.0",
|
|
75
|
+
"@sylphx/vex": "^0.1.11",
|
|
75
76
|
"glob": "^13.0.0",
|
|
76
77
|
"pdfjs-dist": "^5.4.449",
|
|
77
|
-
"pngjs": "^7.0.0"
|
|
78
|
-
"zod": "4.2.0-canary.20251124T022609",
|
|
79
|
-
"zod-to-json-schema": "^3.25.0"
|
|
78
|
+
"pngjs": "^7.0.0"
|
|
80
79
|
},
|
|
81
80
|
"devDependencies": {
|
|
82
81
|
"@biomejs/biome": "^2.3.8",
|
|
83
82
|
"@solidjs/router": "^0.15.4",
|
|
84
83
|
"@sylphx/biome-config": "^0.4.1",
|
|
85
|
-
"@sylphx/bump": "^
|
|
84
|
+
"@sylphx/bump": "^1.6.1",
|
|
86
85
|
"@sylphx/doctor": "^1.32.1",
|
|
87
86
|
"@sylphx/leaf": "^1.0.0",
|
|
88
87
|
"@sylphx/leaf-theme-default": "^1.0.0",
|