@sylphx/pdf-reader-mcp 2.0.7 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +34 -21
  2. package/package.json +5 -6
package/dist/index.js CHANGED
@@ -316,6 +316,7 @@ var extractPageContent = async (pdfDocument, pageNum, includeImages, sourceDescr
316
316
 
317
317
  // src/pdf/loader.ts
318
318
  import fs from "node:fs/promises";
319
+ import { createRequire } from "node:module";
319
320
  import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
320
321
 
321
322
  // src/utils/errors.ts
@@ -341,6 +342,8 @@ var resolvePath = (userPath) => {
341
342
 
342
343
  // src/pdf/loader.ts
343
344
  var logger3 = createLogger("Loader");
345
+ var require2 = createRequire(import.meta.url);
346
+ var CMAP_URL = require2.resolve("pdfjs-dist/package.json").replace("package.json", "cmaps/");
344
347
  var MAX_PDF_SIZE = 100 * 1024 * 1024;
345
348
  var loadPdfDocument = async (source, sourceDescription) => {
346
349
  let pdfDataSource;
@@ -370,7 +373,12 @@ var loadPdfDocument = async (source, sourceDescription) => {
370
373
  }
371
374
  throw new PdfError(errorCode, `Failed to prepare PDF source ${sourceDescription}. Reason: ${message}`, { cause: err instanceof Error ? err : undefined });
372
375
  }
373
- const loadingTask = getDocument(pdfDataSource);
376
+ const documentParams = pdfDataSource instanceof Uint8Array ? { data: pdfDataSource } : pdfDataSource;
377
+ const loadingTask = getDocument({
378
+ ...documentParams,
379
+ cMapUrl: CMAP_URL,
380
+ cMapPacked: true
381
+ });
374
382
  try {
375
383
  return await loadingTask.promise;
376
384
  } catch (err) {
@@ -455,27 +463,32 @@ var determinePagesToProcess = (targetPages, totalPages, includeFullText) => {
455
463
  };
456
464
 
457
465
  // src/schemas/readPdf.ts
458
- import { z } from "zod";
459
- var pageSpecifierSchema = z.union([
460
- z.array(z.number().int().min(1)).min(1).describe("Array of page numbers (1-based)"),
461
- z.string().min(1).refine((val) => /^[0-9,-]+$/.test(val.replace(/\s/g, "")), {
462
- message: "Page string must contain only numbers, commas, and hyphens."
463
- }).describe('Page range string (e.g., "1-5,10,15-20")')
464
- ]);
465
- var pdfSourceSchema = z.object({
466
- path: z.string().min(1).optional().describe("Path to the local PDF file (absolute or relative to cwd)."),
467
- url: z.string().url().optional().describe("URL of the PDF file."),
468
- pages: pageSpecifierSchema.optional().describe("Extract text only from specific pages (1-based) or ranges for this source. If provided, 'include_full_text' is ignored for this source.")
469
- }).strict().refine((data) => !!(data.path && !data.url) || !!(!data.path && data.url), {
470
- message: "Each source must have either 'path' or 'url', but not both."
466
+ import {
467
+ array,
468
+ bool,
469
+ description,
470
+ gte,
471
+ int,
472
+ min,
473
+ num,
474
+ object,
475
+ optional,
476
+ str,
477
+ union
478
+ } from "@sylphx/vex";
479
+ var pageSpecifierSchema = union(array(num(int, gte(1))), str(min(1)));
480
+ var pdfSourceSchema = object({
481
+ path: optional(str(min(1), description("Path to the local PDF file (absolute or relative to cwd)."))),
482
+ url: optional(str(min(1), description("URL of the PDF file."))),
483
+ pages: optional(pageSpecifierSchema)
484
+ });
485
+ var readPdfArgsSchema = object({
486
+ sources: array(pdfSourceSchema),
487
+ include_full_text: optional(bool(description("Include the full text content of each PDF (only if 'pages' is not specified for that source)."))),
488
+ include_metadata: optional(bool(description("Include metadata and info objects for each PDF."))),
489
+ include_page_count: optional(bool(description("Include the total number of pages for each PDF."))),
490
+ include_images: optional(bool(description("Extract and include embedded images from the PDF pages as base64-encoded data.")))
471
491
  });
472
- var readPdfArgsSchema = z.object({
473
- sources: z.array(pdfSourceSchema).min(1).describe("An array of PDF sources to process, each can optionally specify pages."),
474
- include_full_text: z.boolean().optional().default(false).describe("Include the full text content of each PDF (only if 'pages' is not specified for that source)."),
475
- include_metadata: z.boolean().optional().default(true).describe("Include metadata and info objects for each PDF."),
476
- include_page_count: z.boolean().optional().default(true).describe("Include the total number of pages for each PDF."),
477
- include_images: z.boolean().optional().default(false).describe("Extract and include embedded images from the PDF pages as base64-encoded data.")
478
- }).strict();
479
492
 
480
493
  // src/handlers/readPdf.ts
481
494
  var logger5 = createLogger("ReadPdf");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sylphx/pdf-reader-mcp",
3
- "version": "2.0.7",
3
+ "version": "2.1.0",
4
4
  "description": "An MCP server providing tools to read PDF files.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -71,18 +71,17 @@
71
71
  "prepare": "node_modules/.bin/lefthook install || true"
72
72
  },
73
73
  "dependencies": {
74
- "@sylphx/mcp-server-sdk": "1.3.0",
74
+ "@sylphx/mcp-server-sdk": "^2.1.0",
75
+ "@sylphx/vex": "^0.1.11",
75
76
  "glob": "^13.0.0",
76
77
  "pdfjs-dist": "^5.4.449",
77
- "pngjs": "^7.0.0",
78
- "zod": "4.2.0-canary.20251124T022609",
79
- "zod-to-json-schema": "^3.25.0"
78
+ "pngjs": "^7.0.0"
80
79
  },
81
80
  "devDependencies": {
82
81
  "@biomejs/biome": "^2.3.8",
83
82
  "@solidjs/router": "^0.15.4",
84
83
  "@sylphx/biome-config": "^0.4.1",
85
- "@sylphx/bump": "^0.12.1",
84
+ "@sylphx/bump": "^1.6.1",
86
85
  "@sylphx/doctor": "^1.32.1",
87
86
  "@sylphx/leaf": "^1.0.0",
88
87
  "@sylphx/leaf-theme-default": "^1.0.0",