apapyr-mcp-server 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +73 -0
- package/package.json +24 -0
- package/src/index.js +335 -0
package/README.md
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# aPapyr MCP Server
|
|
2
|
+
|
|
3
|
+
Extract structured data from documents using AI agents. This MCP server lets Claude Code, Cursor, Windsurf, and any MCP-compatible AI assistant natively extract data from invoices, receipts, tax forms, bank statements, and contracts.
|
|
4
|
+
|
|
5
|
+
## Quick Setup
|
|
6
|
+
|
|
7
|
+
### 1. Get an API Key
|
|
8
|
+
|
|
9
|
+
Sign up at [apapyr.com](https://apapyr.com/dashboard.html) (free tier: 50 pages/month).
|
|
10
|
+
|
|
11
|
+
### 2. Add to Claude Code
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
claude mcp add apapyr -- npx @apapyr/mcp-server
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
Then set your API key:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
export APAPYR_API_KEY=sk_live_your_key
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
### 3. Add to Cursor / VS Code
|
|
24
|
+
|
|
25
|
+
Add to your MCP settings (`.cursor/mcp.json` or VS Code MCP config):
|
|
26
|
+
|
|
27
|
+
```json
|
|
28
|
+
{
|
|
29
|
+
"mcpServers": {
|
|
30
|
+
"apapyr": {
|
|
31
|
+
"command": "npx",
|
|
32
|
+
"args": ["@apapyr/mcp-server"],
|
|
33
|
+
"env": {
|
|
34
|
+
"APAPYR_API_KEY": "sk_live_your_key"
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## What You Can Do
|
|
42
|
+
|
|
43
|
+
Once connected, just ask your AI assistant naturally:
|
|
44
|
+
|
|
45
|
+
- *"Extract the data from invoice.pdf"*
|
|
46
|
+
- *"Parse this receipt and give me the total"*
|
|
47
|
+
- *"What fields can you extract from a W-2?"*
|
|
48
|
+
- *"How many pages have I used this month?"*
|
|
49
|
+
|
|
50
|
+
## Available Tools
|
|
51
|
+
|
|
52
|
+
| Tool | Description |
|
|
53
|
+
|------|-------------|
|
|
54
|
+
| `extract_document` | Extract structured data from a PDF or image |
|
|
55
|
+
| `get_extraction` | Get a previous extraction result by ID |
|
|
56
|
+
| `list_extractions` | List recent extractions |
|
|
57
|
+
| `get_usage` | Check current plan usage and limits |
|
|
58
|
+
| `list_document_schemas` | See supported document types and fields |
|
|
59
|
+
|
|
60
|
+
## Supported Documents
|
|
61
|
+
|
|
62
|
+
- **Invoices** — vendor, total, tax, due date, line items
|
|
63
|
+
- **Receipts** — merchant, total, tax, tip, payment method
|
|
64
|
+
- **W-2 Tax Forms** — employer, wages, withholdings
|
|
65
|
+
- **Bank Statements** — balances, transactions
|
|
66
|
+
- **Contracts** — parties, dates, key terms
|
|
67
|
+
|
|
68
|
+
## Links
|
|
69
|
+
|
|
70
|
+
- [Website](https://apapyr.com)
|
|
71
|
+
- [API Docs](https://apapyr.com/docs.html)
|
|
72
|
+
- [Dashboard](https://apapyr.com/dashboard.html)
|
|
73
|
+
- [Free Tool](https://apapyr.com/free-tool.html)
|
package/package.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "apapyr-mcp-server",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "MCP server for aPapyr — Extract structured data from documents via AI agents",
|
|
5
|
+
"main": "src/index.js",
|
|
6
|
+
"bin": {
|
|
7
|
+
"apapyr-mcp": "src/index.js"
|
|
8
|
+
},
|
|
9
|
+
"keywords": ["mcp", "document-extraction", "invoice", "receipt", "ocr", "ai-agent", "claude", "cursor"],
|
|
10
|
+
"author": "aPapyr",
|
|
11
|
+
"license": "MIT",
|
|
12
|
+
"homepage": "https://apapyr.com",
|
|
13
|
+
"repository": {
|
|
14
|
+
"type": "git",
|
|
15
|
+
"url": "https://github.com/apapyr/mcp-server"
|
|
16
|
+
},
|
|
17
|
+
"dependencies": {
|
|
18
|
+
"@modelcontextprotocol/sdk": "^1.0.0"
|
|
19
|
+
},
|
|
20
|
+
"engines": {
|
|
21
|
+
"node": ">=18"
|
|
22
|
+
},
|
|
23
|
+
"files": ["src/"]
|
|
24
|
+
}
|
package/src/index.js
ADDED
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* aPapyr MCP Server
|
|
5
|
+
*
|
|
6
|
+
* Lets AI agents (Claude Code, Cursor, Copilot, etc.) extract structured data
|
|
7
|
+
* from documents via the aPapyr API.
|
|
8
|
+
*
|
|
9
|
+
* Setup:
|
|
10
|
+
* 1. Get an API key at https://apapyr.com/dashboard.html
|
|
11
|
+
* 2. Add to your MCP config:
|
|
12
|
+
* {
|
|
13
|
+
* "mcpServers": {
|
|
14
|
+
* "apapyr": {
|
|
15
|
+
* "command": "npx",
|
|
16
|
+
* "args": ["@apapyr/mcp-server"],
|
|
17
|
+
* "env": { "APAPYR_API_KEY": "sk_live_..." }
|
|
18
|
+
* }
|
|
19
|
+
* }
|
|
20
|
+
* }
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
const { Server } = require("@modelcontextprotocol/sdk/server/index.js");
|
|
24
|
+
const { StdioServerTransport } = require("@modelcontextprotocol/sdk/server/stdio.js");
|
|
25
|
+
const {
|
|
26
|
+
CallToolRequestSchema,
|
|
27
|
+
ListToolsRequestSchema,
|
|
28
|
+
} = require("@modelcontextprotocol/sdk/types.js");
|
|
29
|
+
const fs = require("fs");
|
|
30
|
+
const path = require("path");
|
|
31
|
+
|
|
32
|
+
const API_URL = process.env.APAPYR_API_URL || "https://api.apapyr.com";
|
|
33
|
+
const API_KEY = process.env.APAPYR_API_KEY || "";
|
|
34
|
+
|
|
35
|
+
// ─── API Helper ──────────────────────────────────────────────────────────────
|
|
36
|
+
|
|
37
|
+
async function apiRequest(method, endpoint, body, isFormData) {
|
|
38
|
+
const headers = {
|
|
39
|
+
Authorization: `Bearer ${API_KEY}`,
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
if (!isFormData) {
|
|
43
|
+
headers["Content-Type"] = "application/json";
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const response = await fetch(`${API_URL}${endpoint}`, {
|
|
47
|
+
method,
|
|
48
|
+
headers,
|
|
49
|
+
body: isFormData ? body : body ? JSON.stringify(body) : undefined,
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
const data = await response.json();
|
|
53
|
+
if (!response.ok) {
|
|
54
|
+
throw new Error(data.detail || `API error: ${response.status}`);
|
|
55
|
+
}
|
|
56
|
+
return data;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
async function extractDocument(filePath, documentType) {
|
|
60
|
+
const resolvedPath = path.resolve(filePath);
|
|
61
|
+
|
|
62
|
+
if (!fs.existsSync(resolvedPath)) {
|
|
63
|
+
throw new Error(`File not found: ${resolvedPath}`);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const fileBuffer = fs.readFileSync(resolvedPath);
|
|
67
|
+
const fileName = path.basename(resolvedPath);
|
|
68
|
+
|
|
69
|
+
// Build multipart form data
|
|
70
|
+
const boundary = "----aPapyrMCP" + Math.random().toString(36).slice(2);
|
|
71
|
+
const parts = [];
|
|
72
|
+
|
|
73
|
+
// File part
|
|
74
|
+
parts.push(
|
|
75
|
+
`--${boundary}\r\nContent-Disposition: form-data; name="file"; filename="${fileName}"\r\nContent-Type: application/octet-stream\r\n\r\n`
|
|
76
|
+
);
|
|
77
|
+
parts.push(fileBuffer);
|
|
78
|
+
parts.push("\r\n");
|
|
79
|
+
|
|
80
|
+
// document_type part
|
|
81
|
+
parts.push(
|
|
82
|
+
`--${boundary}\r\nContent-Disposition: form-data; name="document_type"\r\n\r\n${documentType}\r\n`
|
|
83
|
+
);
|
|
84
|
+
|
|
85
|
+
parts.push(`--${boundary}--\r\n`);
|
|
86
|
+
|
|
87
|
+
const body = Buffer.concat(
|
|
88
|
+
parts.map((p) => (typeof p === "string" ? Buffer.from(p) : p))
|
|
89
|
+
);
|
|
90
|
+
|
|
91
|
+
const response = await fetch(`${API_URL}/v1/extract`, {
|
|
92
|
+
method: "POST",
|
|
93
|
+
headers: {
|
|
94
|
+
Authorization: `Bearer ${API_KEY}`,
|
|
95
|
+
"Content-Type": `multipart/form-data; boundary=${boundary}`,
|
|
96
|
+
},
|
|
97
|
+
body,
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
const data = await response.json();
|
|
101
|
+
if (!response.ok) {
|
|
102
|
+
throw new Error(data.detail || `Extraction failed: ${response.status}`);
|
|
103
|
+
}
|
|
104
|
+
return data;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// ─── MCP Server ──────────────────────────────────────────────────────────────
|
|
108
|
+
|
|
109
|
+
const server = new Server(
|
|
110
|
+
{
|
|
111
|
+
name: "apapyr",
|
|
112
|
+
version: "1.0.0",
|
|
113
|
+
},
|
|
114
|
+
{
|
|
115
|
+
capabilities: {
|
|
116
|
+
tools: {},
|
|
117
|
+
},
|
|
118
|
+
}
|
|
119
|
+
);
|
|
120
|
+
|
|
121
|
+
// List available tools
|
|
122
|
+
server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
123
|
+
return {
|
|
124
|
+
tools: [
|
|
125
|
+
{
|
|
126
|
+
name: "extract_document",
|
|
127
|
+
description:
|
|
128
|
+
"Extract structured data from a document (invoice, receipt, W-2, bank statement, or contract). Reads a PDF or image file and returns clean JSON with field values and confidence scores.",
|
|
129
|
+
inputSchema: {
|
|
130
|
+
type: "object",
|
|
131
|
+
properties: {
|
|
132
|
+
file_path: {
|
|
133
|
+
type: "string",
|
|
134
|
+
description:
|
|
135
|
+
"Absolute or relative path to the document file (PDF, PNG, JPG, or WEBP)",
|
|
136
|
+
},
|
|
137
|
+
document_type: {
|
|
138
|
+
type: "string",
|
|
139
|
+
enum: [
|
|
140
|
+
"auto",
|
|
141
|
+
"invoice",
|
|
142
|
+
"receipt",
|
|
143
|
+
"w2",
|
|
144
|
+
"bank_statement",
|
|
145
|
+
"contract",
|
|
146
|
+
],
|
|
147
|
+
description:
|
|
148
|
+
"Type of document. Use 'auto' to detect automatically. Default: auto",
|
|
149
|
+
default: "auto",
|
|
150
|
+
},
|
|
151
|
+
},
|
|
152
|
+
required: ["file_path"],
|
|
153
|
+
},
|
|
154
|
+
},
|
|
155
|
+
{
|
|
156
|
+
name: "get_extraction",
|
|
157
|
+
description:
|
|
158
|
+
"Retrieve the result of a previous document extraction by its ID.",
|
|
159
|
+
inputSchema: {
|
|
160
|
+
type: "object",
|
|
161
|
+
properties: {
|
|
162
|
+
extraction_id: {
|
|
163
|
+
type: "string",
|
|
164
|
+
description: "The extraction ID returned from extract_document",
|
|
165
|
+
},
|
|
166
|
+
},
|
|
167
|
+
required: ["extraction_id"],
|
|
168
|
+
},
|
|
169
|
+
},
|
|
170
|
+
{
|
|
171
|
+
name: "list_extractions",
|
|
172
|
+
description:
|
|
173
|
+
"List recent document extractions with their status and metadata.",
|
|
174
|
+
inputSchema: {
|
|
175
|
+
type: "object",
|
|
176
|
+
properties: {
|
|
177
|
+
limit: {
|
|
178
|
+
type: "number",
|
|
179
|
+
description: "Number of results to return (max 100). Default: 10",
|
|
180
|
+
default: 10,
|
|
181
|
+
},
|
|
182
|
+
},
|
|
183
|
+
},
|
|
184
|
+
},
|
|
185
|
+
{
|
|
186
|
+
name: "get_usage",
|
|
187
|
+
description:
|
|
188
|
+
"Check current API usage — pages used, remaining quota, and plan details.",
|
|
189
|
+
inputSchema: {
|
|
190
|
+
type: "object",
|
|
191
|
+
properties: {},
|
|
192
|
+
},
|
|
193
|
+
},
|
|
194
|
+
{
|
|
195
|
+
name: "list_document_schemas",
|
|
196
|
+
description:
|
|
197
|
+
"List all supported document types and the fields that will be extracted from each.",
|
|
198
|
+
inputSchema: {
|
|
199
|
+
type: "object",
|
|
200
|
+
properties: {},
|
|
201
|
+
},
|
|
202
|
+
},
|
|
203
|
+
],
|
|
204
|
+
};
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
// Handle tool calls
|
|
208
|
+
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
209
|
+
const { name, arguments: args } = request.params;
|
|
210
|
+
|
|
211
|
+
try {
|
|
212
|
+
if (!API_KEY) {
|
|
213
|
+
return {
|
|
214
|
+
content: [
|
|
215
|
+
{
|
|
216
|
+
type: "text",
|
|
217
|
+
text: "Error: APAPYR_API_KEY environment variable is not set. Get your API key at https://apapyr.com/dashboard.html",
|
|
218
|
+
},
|
|
219
|
+
],
|
|
220
|
+
isError: true,
|
|
221
|
+
};
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
let result;
|
|
225
|
+
|
|
226
|
+
switch (name) {
|
|
227
|
+
case "extract_document": {
|
|
228
|
+
const filePath = args.file_path;
|
|
229
|
+
const docType = args.document_type || "auto";
|
|
230
|
+
result = await extractDocument(filePath, docType);
|
|
231
|
+
|
|
232
|
+
// Format a nice summary
|
|
233
|
+
const summary = [];
|
|
234
|
+
summary.push(`Document Type: ${result.document_type || "auto"}`);
|
|
235
|
+
summary.push(
|
|
236
|
+
`Confidence: ${result.confidence ? (result.confidence * 100).toFixed(0) + "%" : "N/A"}`
|
|
237
|
+
);
|
|
238
|
+
summary.push(
|
|
239
|
+
`Processing Time: ${result.processing_time_ms ? result.processing_time_ms + "ms" : "N/A"}`
|
|
240
|
+
);
|
|
241
|
+
if (result.cached) summary.push("(cached result)");
|
|
242
|
+
if (result.validation_warnings && result.validation_warnings.length > 0) {
|
|
243
|
+
summary.push(`Warnings: ${result.validation_warnings.join(", ")}`);
|
|
244
|
+
}
|
|
245
|
+
summary.push("");
|
|
246
|
+
summary.push("Extracted Data:");
|
|
247
|
+
summary.push(JSON.stringify(result.data, null, 2));
|
|
248
|
+
|
|
249
|
+
return {
|
|
250
|
+
content: [{ type: "text", text: summary.join("\n") }],
|
|
251
|
+
};
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
case "get_extraction": {
|
|
255
|
+
result = await apiRequest("GET", `/v1/extract/${args.extraction_id}`);
|
|
256
|
+
return {
|
|
257
|
+
content: [
|
|
258
|
+
{ type: "text", text: JSON.stringify(result, null, 2) },
|
|
259
|
+
],
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
case "list_extractions": {
|
|
264
|
+
const limit = args.limit || 10;
|
|
265
|
+
result = await apiRequest(
|
|
266
|
+
"GET",
|
|
267
|
+
`/v1/extractions?limit=${limit}`
|
|
268
|
+
);
|
|
269
|
+
const lines = result.extractions.map(
|
|
270
|
+
(e) =>
|
|
271
|
+
`${e.id} | ${e.file_name || "—"} | ${e.document_type || "auto"} | ${e.confidence ? (e.confidence * 100).toFixed(0) + "%" : "—"} | ${e.created_at}`
|
|
272
|
+
);
|
|
273
|
+
return {
|
|
274
|
+
content: [
|
|
275
|
+
{
|
|
276
|
+
type: "text",
|
|
277
|
+
text:
|
|
278
|
+
lines.length > 0
|
|
279
|
+
? `ID | File | Type | Confidence | Date\n${"-".repeat(60)}\n${lines.join("\n")}`
|
|
280
|
+
: "No extractions found.",
|
|
281
|
+
},
|
|
282
|
+
],
|
|
283
|
+
};
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
case "get_usage": {
|
|
287
|
+
result = await apiRequest("GET", "/v1/usage");
|
|
288
|
+
return {
|
|
289
|
+
content: [
|
|
290
|
+
{
|
|
291
|
+
type: "text",
|
|
292
|
+
text: `Plan: ${result.plan}\nPages Used: ${result.used} / ${result.limit}\nRemaining: ${result.remaining}\nOverage Price: ${result.overage_price_per_page}/page`,
|
|
293
|
+
},
|
|
294
|
+
],
|
|
295
|
+
};
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
case "list_document_schemas": {
|
|
299
|
+
result = await apiRequest("GET", "/v1/schemas");
|
|
300
|
+
const schemaText = Object.entries(result.schemas)
|
|
301
|
+
.map(
|
|
302
|
+
([type, schema]) =>
|
|
303
|
+
`${type}:\n Fields: ${schema.fields.join(", ")}${schema.has_line_items ? `\n Line Items: ${schema.line_item_fields.join(", ")}` : ""}`
|
|
304
|
+
)
|
|
305
|
+
.join("\n\n");
|
|
306
|
+
return {
|
|
307
|
+
content: [{ type: "text", text: schemaText }],
|
|
308
|
+
};
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
default:
|
|
312
|
+
return {
|
|
313
|
+
content: [{ type: "text", text: `Unknown tool: ${name}` }],
|
|
314
|
+
isError: true,
|
|
315
|
+
};
|
|
316
|
+
}
|
|
317
|
+
} catch (error) {
|
|
318
|
+
return {
|
|
319
|
+
content: [{ type: "text", text: `Error: ${error.message}` }],
|
|
320
|
+
isError: true,
|
|
321
|
+
};
|
|
322
|
+
}
|
|
323
|
+
});
|
|
324
|
+
|
|
325
|
+
// ─── Start ───────────────────────────────────────────────────────────────────
|
|
326
|
+
|
|
327
|
+
async function main() {
|
|
328
|
+
const transport = new StdioServerTransport();
|
|
329
|
+
await server.connect(transport);
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
main().catch((error) => {
|
|
333
|
+
console.error("MCP server error:", error);
|
|
334
|
+
process.exit(1);
|
|
335
|
+
});
|