@modelcontextprotocol/server-pdf 1.2.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -149,6 +149,20 @@ bun examples/pdf-server/main.ts ./local.pdf https://arxiv.org/pdf/2401.00001.pdf
149
149
  bun examples/pdf-server/main.ts --stdio ./papers/
150
150
  ```
151
151
 
152
+ ### Additional Flags
153
+
154
+ - `--debug` — Enable verbose server-side logging.
155
+ - `--enable-interact` — Enable the `interact` tool in HTTP mode (see [Deployment](#deployment)). Not needed for stdio.
156
+ - `--writeable-uploads-root` — Allow saving annotated PDFs back to files under client roots named `uploads` (Claude Desktop mounts attachments there; writes are refused by default).
157
+
158
+ ## Deployment
159
+
160
+ The `interact` tool relies on an in-memory command queue (server enqueues → viewer polls). This constrains how the server can be deployed:
161
+
162
+ - **stdio** (Claude Desktop) — `interact` is always enabled. The server runs as a single long-lived process, so the in-memory queue works.
163
+ - **HTTP, single instance** — Pass `--enable-interact` to opt in. Works as long as all requests land on the same process.
164
+ - **HTTP, stateless / multi-instance** — `interact` will not work. Commands enqueued on one instance are invisible to viewers polling another. Leave the flag off; the tool will not be registered.
165
+
152
166
  ## Security: Client Roots
153
167
 
154
168
  MCP clients may advertise **roots** — `file://` URIs pointing to directories on the client's file system. The server uses these to allow access to local files under those directories.
@@ -174,34 +188,149 @@ When roots are ignored the server logs:
174
188
 
175
189
  ## Tools
176
190
 
177
- | Tool | Visibility | Purpose |
178
- | ---------------- | ---------- | -------------------------------------- |
179
- | `list_pdfs` | Model | List available local files and origins |
180
- | `display_pdf` | Model + UI | Display interactive viewer |
181
- | `read_pdf_bytes` | App only | Stream PDF data in chunks |
191
+ | Tool | Visibility | Purpose |
192
+ | ---------------- | ---------- | ----------------------------------------------------- |
193
+ | `list_pdfs` | Model | List available local files and origins |
194
+ | `display_pdf` | Model + UI | Display interactive viewer |
195
+ | `interact`¹ | Model | Navigate, annotate, search, extract pages, fill forms |
196
+ | `read_pdf_bytes` | App only | Stream PDF data in chunks |
197
+ | `save_pdf` | App only | Save annotated PDF back to local file |
198
+
199
+ ¹ stdio only by default; in HTTP mode requires `--enable-interact` — see [Deployment](#deployment).
200
+
201
+ ## Example Prompts
202
+
203
+ After the model calls `display_pdf`, it receives the `viewUUID` and a description of all capabilities. Here are example prompts and follow-ups that exercise annotation features:
204
+
205
+ ### Annotating
206
+
207
+ > **User:** Show me the Attention Is All You Need paper
208
+ >
209
+ > _Model calls `display_pdf` → viewer opens_
210
+ >
211
+ > **User:** Highlight the title and add an APPROVED stamp on the first page.
212
+ >
213
+ > _Model calls `interact` with `highlight_text` for the title and `add_annotations` with a stamp_
214
+
215
+ > **User:** Can you annotate this PDF? Mark important sections for me.
216
+ >
217
+ > _Model calls `interact` with `get_text` to read content first, then `add_annotations` with highlights/notes_
218
+
219
+ > **User:** Add a note on page 1 saying "Key contribution" at position (200, 500), and highlight the abstract.
220
+ >
221
+ > _Model calls `interact` with `add_annotations` containing a `note` and either `highlight_text` or a `highlight` annotation_
222
+
223
+ ### Navigation & Search
224
+
225
+ > **User:** Search for "self-attention" in the paper.
226
+ >
227
+ > _Model calls `interact` with action `search`, query `"self-attention"`_
228
+
229
+ > **User:** Go to page 5.
230
+ >
231
+ > _Model calls `interact` with action `navigate`, page `5`_
232
+
233
+ ### Page Extraction
234
+
235
+ > **User:** Give me the text of pages 1–3.
236
+ >
237
+ > _Model calls `interact` with action `get_text`, intervals `[{start:1, end:3}]`_
238
+
239
+ > **User:** Take a screenshot of the first page.
240
+ >
241
+ > _Model calls `interact` with action `get_screenshot`, page `1`_
242
+
243
+ ### Stamps & Form Filling
244
+
245
+ > **User:** Stamp this document as CONFIDENTIAL on every page.
246
+ >
247
+ > _Model calls `interact` with `add_annotations` containing `stamp` annotations on each page_
248
+
249
+ > **User:** Fill in the "Name" field with "Alice" and "Date" with "2026-02-26".
250
+ >
251
+ > _Model calls `interact` with action `fill_form`, fields `[{name:"Name", value:"Alice"}, {name:"Date", value:"2026-02-26"}]`_
252
+
253
+ ## Testing
254
+
255
+ ### E2E Tests (Playwright)
256
+
257
+ ```bash
258
+ # Run annotation E2E tests (renders annotations in a real browser)
259
+ npx playwright test tests/e2e/pdf-annotations.spec.ts
260
+
261
+ # Run all PDF server tests
262
+ npx playwright test -g "PDF Server"
263
+ ```
264
+
265
+ ### API Prompt Discovery Tests
266
+
267
+ These tests verify that Claude can discover and use annotation capabilities by calling the Anthropic Messages API with the tool schemas. They are **disabled by default** — skipped unless `ANTHROPIC_API_KEY` is set:
268
+
269
+ ```bash
270
+ ANTHROPIC_API_KEY=sk-ant-... npx playwright test tests/e2e/pdf-annotations-api.spec.ts
271
+ ```
272
+
273
+ The API tests simulate a conversation where `display_pdf` has already been called, then send a follow-up user message and verify the model uses annotation actions (or at least the `interact` tool). Three scenarios are tested:
274
+
275
+ | Scenario | User prompt | Expected model behavior |
276
+ | -------------------- | ----------------------------------------------------------------- | ------------------------------------------ |
277
+ | Direct annotation | "Highlight the title and add an APPROVED stamp" | Uses `highlight_text` or `add_annotations` |
278
+ | Capability discovery | "Can you annotate this PDF?" | Uses interact or mentions annotations |
279
+ | Specific notes | "Add a note saying 'Key contribution' and highlight the abstract" | Uses `interact` tool |
182
280
 
183
281
  ## Architecture
184
282
 
185
283
  ```
186
- server.ts # MCP server + tools
187
- main.ts # CLI entry point
284
+ server.ts # MCP server + tools
285
+ main.ts # CLI entry point
188
286
  src/
189
- └── mcp-app.ts # Interactive viewer UI (PDF.js)
287
+ ├── mcp-app.ts # Interactive viewer UI (PDF.js)
288
+ ├── pdf-annotations.ts # Annotation types, diff model, PDF import/export
289
+ └── pdf-annotations.test.ts # Unit tests for annotation module
190
290
  ```
191
291
 
192
292
  ## Key Patterns Shown
193
293
 
194
- | Pattern | Implementation |
195
- | ----------------- | ------------------------------------------- |
196
- | App-only tools | `_meta: { ui: { visibility: ["app"] } }` |
197
- | Chunked responses | `hasMore` + `offset` pagination |
198
- | Model context | `app.updateModelContext()` |
199
- | Display modes | `app.requestDisplayMode()` |
200
- | External links | `app.openLink()` |
201
- | View persistence | `viewUUID` + localStorage |
202
- | Theming | `applyDocumentTheme()` + CSS `light-dark()` |
294
+ | Pattern | Implementation |
295
+ | ----------------------------- | -------------------------------------------------------------- |
296
+ | App-only tools | `_meta: { ui: { visibility: ["app"] } }` |
297
+ | Chunked responses | `hasMore` + `offset` pagination |
298
+ | Model context | `app.updateModelContext()` |
299
+ | Display modes | `app.requestDisplayMode()` |
300
+ | External links | `app.openLink()` |
301
+ | View persistence | `viewUUID` + localStorage |
302
+ | Theming | `applyDocumentTheme()` + CSS `light-dark()` |
303
+ | Annotations | DOM overlays synced with proper PDF annotation dicts |
304
+ | Annotation import | Load existing PDF annotations via PDF.js `getAnnotations()` |
305
+ | Diff-based persistence | localStorage stores only additions/removals vs PDF baseline |
306
+ | Proper PDF export | pdf-lib low-level API creates real `/Type /Annot` dictionaries |
307
+ | Save to file | App-only `save_pdf` tool writes annotated bytes back to disk |
308
+ | Dirty flag | `*` prefix on title when unsaved local changes exist |
309
+ | Command queue | Server enqueues → client polls + processes |
310
+ | File download | `app.downloadFile()` for annotated PDF |
311
+ | Floating panel with anchoring | Magnetic corner-snapping panel for annotation list |
312
+ | Drag, resize, rotate | Interactive annotation handles with undo/redo |
313
+ | Keyboard shortcuts | Ctrl+Z/Y (undo/redo), Ctrl+S (save), Ctrl+F (search), ⌘Enter |
314
+
315
+ ### Annotation Types
316
+
317
+ Supported annotation types (synced with PDF.js):
318
+
319
+ | Type | Properties | PDF Subtype |
320
+ | --------------- | ------------------------------------------------------------------ | ------------ |
321
+ | `highlight` | `rects`, `color?`, `content?` | `/Highlight` |
322
+ | `underline` | `rects`, `color?` | `/Underline` |
323
+ | `strikethrough` | `rects`, `color?` | `/StrikeOut` |
324
+ | `note` | `x`, `y`, `content`, `color?` | `/Text` |
325
+ | `rectangle` | `x`, `y`, `width`, `height`, `color?`, etc. | `/Square` |
326
+ | `circle` | `x`, `y`, `width`, `height`, `color?`, etc. | `/Circle` |
327
+ | `line` | `x1`, `y1`, `x2`, `y2`, `color?` | `/Line` |
328
+ | `freetext` | `x`, `y`, `content`, `fontSize?`, `color?` | `/FreeText` |
329
+ | `stamp` | `x`, `y`, `label`, `color?`, `rotation?` | `/Stamp` |
330
+ | `image` | `x`, `y`, `width`, `height`, `imageData?`/`imageUrl?`, `rotation?` | `/Stamp` |
203
331
 
204
332
  ## Dependencies
205
333
 
206
- - `pdfjs-dist`: PDF rendering (frontend only)
334
+ - `pdfjs-dist`: PDF rendering and annotation import (frontend only)
335
+ - `pdf-lib`: Client-side PDF modification — creates proper PDF annotation dictionaries for export
207
336
  - `@modelcontextprotocol/ext-apps`: MCP Apps SDK
package/dist/index.js CHANGED
@@ -34135,8 +34135,10 @@ import {
34135
34135
  pathToFileUrl,
34136
34136
  fileUrlToPath,
34137
34137
  allowedLocalFiles,
34138
+ cliLocalFiles,
34138
34139
  DEFAULT_PDF,
34139
- allowedLocalDirs
34140
+ allowedLocalDirs,
34141
+ writeFlags
34140
34142
  } from "./server.js";
34141
34143
  async function startStreamableHTTPServer(createServer2) {
34142
34144
  const port = parseInt(process.env.PORT ?? "3001", 10);
@@ -34188,11 +34190,19 @@ function parseArgs() {
34188
34190
  const urls = [];
34189
34191
  let stdio = false;
34190
34192
  let useClientRoots = false;
34193
+ let enableInteract = false;
34194
+ let debug = false;
34191
34195
  for (const arg of args) {
34192
34196
  if (arg === "--stdio") {
34193
34197
  stdio = true;
34194
34198
  } else if (arg === "--use-client-roots") {
34195
34199
  useClientRoots = true;
34200
+ } else if (arg === "--enable-interact") {
34201
+ enableInteract = true;
34202
+ } else if (arg === "--debug") {
34203
+ debug = true;
34204
+ } else if (arg === "--writeable-uploads-root") {
34205
+ writeFlags.allowUploadsRoot = true;
34196
34206
  } else if (!arg.startsWith("-")) {
34197
34207
  let url = arg;
34198
34208
  if (!arg.startsWith("http://") && !arg.startsWith("https://") && !arg.startsWith("file://")) {
@@ -34206,11 +34216,13 @@ function parseArgs() {
34206
34216
  return {
34207
34217
  urls: urls.length > 0 ? urls : [DEFAULT_PDF],
34208
34218
  stdio,
34209
- useClientRoots
34219
+ useClientRoots,
34220
+ enableInteract,
34221
+ debug
34210
34222
  };
34211
34223
  }
34212
34224
  async function main() {
34213
- const { urls, stdio, useClientRoots } = parseArgs();
34225
+ const { urls, stdio, useClientRoots, enableInteract, debug } = parseArgs();
34214
34226
  for (const url of urls) {
34215
34227
  if (isFileUrl(url)) {
34216
34228
  const filePath = path.resolve(fileUrlToPath(url));
@@ -34218,6 +34230,7 @@ async function main() {
34218
34230
  const s = fs.statSync(filePath);
34219
34231
  if (s.isFile()) {
34220
34232
  allowedLocalFiles.add(filePath);
34233
+ cliLocalFiles.add(filePath);
34221
34234
  console.error(`[pdf-server] Registered local file: ${filePath}`);
34222
34235
  } else if (s.isDirectory()) {
34223
34236
  allowedLocalDirs.add(filePath);
@@ -34230,9 +34243,12 @@ async function main() {
34230
34243
  }
34231
34244
  console.error(`[pdf-server] Ready (${urls.length} URL(s) configured)`);
34232
34245
  if (stdio) {
34233
- await startStdioServer(() => createServer({ useClientRoots: true }));
34246
+ await startStdioServer(() => createServer({ enableInteract: true, useClientRoots: true, debug }));
34234
34247
  } else {
34235
- await startStreamableHTTPServer(() => createServer({ useClientRoots }));
34248
+ if (!useClientRoots) {
34249
+ console.error("[pdf-server] Client roots are ignored (default for remote transports). " + "Pass --use-client-roots to allow the client to expose local directories.");
34250
+ }
34251
+ await startStreamableHTTPServer(() => createServer({ useClientRoots, enableInteract, debug }));
34236
34252
  }
34237
34253
  }
34238
34254
  main().catch((e) => {