@sylphx/pdf-reader-mcp 1.2.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,94 +0,0 @@
1
- // Page range parsing utilities
2
- import { ErrorCode, McpError } from '@modelcontextprotocol/sdk/types.js';
3
- const MAX_RANGE_SIZE = 10000; // Prevent infinite loops for open ranges
4
- /**
5
- * Parse a single range part (e.g., "1-3", "5", "7-")
6
- */
7
- const parseRangePart = (part, pages) => {
8
- const trimmedPart = part.trim();
9
- if (trimmedPart.includes('-')) {
10
- const [startStr, endStr] = trimmedPart.split('-');
11
- if (startStr === undefined) {
12
- throw new Error(`Invalid page range format: ${trimmedPart}`);
13
- }
14
- const start = parseInt(startStr, 10);
15
- const end = endStr === '' || endStr === undefined ? Infinity : parseInt(endStr, 10);
16
- if (Number.isNaN(start) || Number.isNaN(end) || start <= 0 || start > end) {
17
- throw new Error(`Invalid page range values: ${trimmedPart}`);
18
- }
19
- const practicalEnd = Math.min(end, start + MAX_RANGE_SIZE);
20
- for (let i = start; i <= practicalEnd; i++) {
21
- pages.add(i);
22
- }
23
- if (end === Infinity && practicalEnd === start + MAX_RANGE_SIZE) {
24
- console.warn(`[PDF Reader MCP] Open-ended range starting at ${String(start)} was truncated at page ${String(practicalEnd)}.`);
25
- }
26
- }
27
- else {
28
- const page = parseInt(trimmedPart, 10);
29
- if (Number.isNaN(page) || page <= 0) {
30
- throw new Error(`Invalid page number: ${trimmedPart}`);
31
- }
32
- pages.add(page);
33
- }
34
- };
35
- /**
36
- * Parse page range string into array of page numbers
37
- * @param ranges - Range string (e.g., "1-3,5,7-10")
38
- * @returns Sorted array of unique page numbers
39
- */
40
- export const parsePageRanges = (ranges) => {
41
- const pages = new Set();
42
- const parts = ranges.split(',');
43
- for (const part of parts) {
44
- parseRangePart(part, pages);
45
- }
46
- if (pages.size === 0) {
47
- throw new Error('Page range string resulted in zero valid pages.');
48
- }
49
- return Array.from(pages).sort((a, b) => a - b);
50
- };
51
- /**
52
- * Get target pages from page specification
53
- * @param sourcePages - Page specification (string or array)
54
- * @param sourceDescription - Description for error messages
55
- * @returns Array of page numbers or undefined
56
- */
57
- export const getTargetPages = (sourcePages, sourceDescription) => {
58
- if (!sourcePages) {
59
- return undefined;
60
- }
61
- try {
62
- if (typeof sourcePages === 'string') {
63
- return parsePageRanges(sourcePages);
64
- }
65
- // Array of page numbers
66
- if (sourcePages.some((p) => !Number.isInteger(p) || p <= 0)) {
67
- throw new Error('Page numbers in array must be positive integers.');
68
- }
69
- const uniquePages = [...new Set(sourcePages)].sort((a, b) => a - b);
70
- if (uniquePages.length === 0) {
71
- throw new Error('Page specification resulted in an empty set of pages.');
72
- }
73
- return uniquePages;
74
- }
75
- catch (error) {
76
- const message = error instanceof Error ? error.message : String(error);
77
- throw new McpError(ErrorCode.InvalidParams, `Invalid page specification for source ${sourceDescription}: ${message}`);
78
- }
79
- };
80
- /**
81
- * Determine which pages to process based on target pages and document size
82
- */
83
- export const determinePagesToProcess = (targetPages, totalPages, includeFullText) => {
84
- if (targetPages) {
85
- const pagesToProcess = targetPages.filter((p) => p <= totalPages);
86
- const invalidPages = targetPages.filter((p) => p > totalPages);
87
- return { pagesToProcess, invalidPages };
88
- }
89
- if (includeFullText) {
90
- const pagesToProcess = Array.from({ length: totalPages }, (_, i) => i + 1);
91
- return { pagesToProcess, invalidPages: [] };
92
- }
93
- return { pagesToProcess: [], invalidPages: [] };
94
- };
@@ -1,55 +0,0 @@
1
- // Zod validation schemas for PDF reading
2
- import { z } from 'zod';
3
- // Schema for page specification (array of numbers or range string)
4
- export const pageSpecifierSchema = z.union([
5
- z.array(z.number().int().min(1)).min(1).describe('Array of page numbers (1-based)'),
6
- z
7
- .string()
8
- .min(1)
9
- .refine((val) => /^[0-9,-]+$/.test(val.replace(/\s/g, '')), {
10
- message: 'Page string must contain only numbers, commas, and hyphens.',
11
- })
12
- .describe('Page range string (e.g., "1-5,10,15-20")'),
13
- ]);
14
- // Schema for a single PDF source (path or URL)
15
- export const pdfSourceSchema = z
16
- .object({
17
- path: z.string().min(1).optional().describe('Relative path to the local PDF file.'),
18
- url: z.string().url().optional().describe('URL of the PDF file.'),
19
- pages: pageSpecifierSchema
20
- .optional()
21
- .describe("Extract text only from specific pages (1-based) or ranges for this source. If provided, 'include_full_text' is ignored for this source."),
22
- })
23
- .strict()
24
- .refine((data) => !!(data.path && !data.url) || !!(!data.path && data.url), {
25
- message: "Each source must have either 'path' or 'url', but not both.",
26
- });
27
- // Schema for the read_pdf tool arguments
28
- export const readPdfArgsSchema = z
29
- .object({
30
- sources: z
31
- .array(pdfSourceSchema)
32
- .min(1)
33
- .describe('An array of PDF sources to process, each can optionally specify pages.'),
34
- include_full_text: z
35
- .boolean()
36
- .optional()
37
- .default(false)
38
- .describe("Include the full text content of each PDF (only if 'pages' is not specified for that source)."),
39
- include_metadata: z
40
- .boolean()
41
- .optional()
42
- .default(true)
43
- .describe('Include metadata and info objects for each PDF.'),
44
- include_page_count: z
45
- .boolean()
46
- .optional()
47
- .default(true)
48
- .describe('Include the total number of pages for each PDF.'),
49
- include_images: z
50
- .boolean()
51
- .optional()
52
- .default(false)
53
- .describe('Extract and include embedded images from the PDF pages as base64-encoded data.'),
54
- })
55
- .strict();
package/dist/types/pdf.js DELETED
@@ -1,2 +0,0 @@
1
- // PDF-related TypeScript type definitions
2
- export {};
@@ -1,30 +0,0 @@
1
- // Removed unused import: import { fileURLToPath } from 'url';
2
- import path from 'node:path';
3
- import { ErrorCode, McpError } from '@modelcontextprotocol/sdk/types.js';
4
- // Use the server's current working directory as the project root.
5
- // This relies on the process launching the server to set the CWD correctly.
6
- export const PROJECT_ROOT = process.cwd();
7
- console.info(`[Filesystem MCP - pathUtils] Project Root determined from CWD: ${PROJECT_ROOT}`); // Use info instead of log
8
- /**
9
- * Resolves a user-provided relative path against the project root,
10
- * ensuring it stays within the project boundaries.
11
- * Throws McpError on invalid input, absolute paths, or path traversal.
12
- * @param userPath The relative path provided by the user.
13
- * @returns The resolved absolute path.
14
- */
15
- export const resolvePath = (userPath) => {
16
- if (typeof userPath !== 'string') {
17
- throw new McpError(ErrorCode.InvalidParams, 'Path must be a string.');
18
- }
19
- const normalizedUserPath = path.normalize(userPath);
20
- if (path.isAbsolute(normalizedUserPath)) {
21
- throw new McpError(ErrorCode.InvalidParams, 'Absolute paths are not allowed.');
22
- }
23
- // Resolve against the calculated PROJECT_ROOT
24
- const resolved = path.resolve(PROJECT_ROOT, normalizedUserPath);
25
- // Security check: Ensure the resolved path is still within the project root
26
- if (!resolved.startsWith(PROJECT_ROOT)) {
27
- throw new McpError(ErrorCode.InvalidRequest, 'Path traversal detected. Access denied.');
28
- }
29
- return resolved;
30
- };