apapyr 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +63 -0
- package/package.json +19 -0
- package/src/index.d.ts +36 -0
- package/src/index.js +186 -0
package/README.md
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# aPapyr Node.js SDK
|
|
2
|
+
|
|
3
|
+
Extract structured data from invoices, receipts, and documents. Send a PDF or image, get clean JSON back.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install apapyr
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Quick Start
|
|
12
|
+
|
|
13
|
+
```javascript
|
|
14
|
+
const { aPapyr } = require("apapyr");
|
|
15
|
+
|
|
16
|
+
const client = new aPapyr("sk_live_your_key");
|
|
17
|
+
|
|
18
|
+
// Extract data from an invoice
|
|
19
|
+
const result = await client.extract("invoice.pdf");
|
|
20
|
+
|
|
21
|
+
console.log(result.getField("vendor_name")); // "Acme Corp"
|
|
22
|
+
console.log(result.getField("total")); // 1250.00
|
|
23
|
+
console.log(result.confidence); // 0.97
|
|
24
|
+
|
|
25
|
+
// Get all fields as a flat object
|
|
26
|
+
console.log(result.toFlatDict());
|
|
27
|
+
// { document_type: "invoice", vendor_name: "Acme Corp", total: 1250.00, ... }
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## AI Agent Support
|
|
31
|
+
|
|
32
|
+
aPapyr works natively with AI agents via MCP:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
claude mcp add apapyr -- npx @apapyr/mcp-server
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Then just ask: *"Extract the data from invoice.pdf"*
|
|
39
|
+
|
|
40
|
+
## Document Types
|
|
41
|
+
|
|
42
|
+
- `invoice` — vendor, total, tax, due_date, line_items
|
|
43
|
+
- `receipt` — merchant, total, tax, tip, payment_method
|
|
44
|
+
- `w2` — employer, wages, federal_tax, state_tax
|
|
45
|
+
- `bank_statement` — bank, balances, transactions
|
|
46
|
+
- `contract` — parties, dates, value, obligations
|
|
47
|
+
- `auto` — automatically detects type (default)
|
|
48
|
+
|
|
49
|
+
## Environment Variable
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
export APAPYR_API_KEY=sk_live_your_key
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
```javascript
|
|
56
|
+
const client = new aPapyr(); // reads from env
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Links
|
|
60
|
+
|
|
61
|
+
- [API Docs](https://apapyr.com/docs.html)
|
|
62
|
+
- [Dashboard](https://apapyr.com/dashboard.html)
|
|
63
|
+
- [Free Tool](https://apapyr.com/free-tool.html)
|
package/package.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "apapyr",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Extract structured data from invoices, receipts, and documents. PDF/image in, JSON out.",
|
|
5
|
+
"main": "src/index.js",
|
|
6
|
+
"types": "src/index.d.ts",
|
|
7
|
+
"keywords": ["invoice", "receipt", "OCR", "document", "extraction", "PDF", "parser", "API", "AI", "MCP", "apapyr"],
|
|
8
|
+
"author": "aPapyr",
|
|
9
|
+
"license": "MIT",
|
|
10
|
+
"homepage": "https://apapyr.com",
|
|
11
|
+
"repository": {
|
|
12
|
+
"type": "git",
|
|
13
|
+
"url": "https://github.com/apapyr/apapyr-node"
|
|
14
|
+
},
|
|
15
|
+
"engines": {
|
|
16
|
+
"node": ">=18"
|
|
17
|
+
},
|
|
18
|
+
"files": ["src/"]
|
|
19
|
+
}
|
package/src/index.d.ts
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
export class aPapyrError extends Error {
|
|
2
|
+
statusCode?: number;
|
|
3
|
+
response?: Record<string, any>;
|
|
4
|
+
constructor(message: string, statusCode?: number, response?: Record<string, any>);
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
export class Extraction {
|
|
8
|
+
id: string;
|
|
9
|
+
status: string;
|
|
10
|
+
documentType: string;
|
|
11
|
+
confidence: number;
|
|
12
|
+
processingTimeMs: number;
|
|
13
|
+
cached: boolean;
|
|
14
|
+
fields: Record<string, { value: any; confidence: number }>;
|
|
15
|
+
lineItems: Array<Record<string, { value: any; confidence: number }>>;
|
|
16
|
+
validationWarnings: string[];
|
|
17
|
+
|
|
18
|
+
getField(name: string): any;
|
|
19
|
+
getFieldConfidence(name: string): number;
|
|
20
|
+
toDict(): Record<string, any>;
|
|
21
|
+
toFlatDict(): Record<string, any>;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export class aPapyr {
|
|
25
|
+
constructor(apiKey?: string, options?: { baseUrl?: string });
|
|
26
|
+
|
|
27
|
+
extract(
|
|
28
|
+
file: string | Buffer | NodeJS.ReadableStream,
|
|
29
|
+
options?: { documentType?: string; webhookUrl?: string }
|
|
30
|
+
): Promise<Extraction>;
|
|
31
|
+
|
|
32
|
+
getExtraction(extractionId: string): Promise<Extraction>;
|
|
33
|
+
listExtractions(options?: { limit?: number; offset?: number }): Promise<any[]>;
|
|
34
|
+
getUsage(): Promise<Record<string, any>>;
|
|
35
|
+
listSchemas(): Promise<Record<string, any>>;
|
|
36
|
+
}
|
package/src/index.js
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
const fs = require("fs");
|
|
2
|
+
const path = require("path");
|
|
3
|
+
|
|
4
|
+
class aPapyrError extends Error {
|
|
5
|
+
constructor(message, statusCode, response) {
|
|
6
|
+
super(message);
|
|
7
|
+
this.name = "aPapyrError";
|
|
8
|
+
this.statusCode = statusCode;
|
|
9
|
+
this.response = response;
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
class Extraction {
|
|
14
|
+
constructor(data) {
|
|
15
|
+
this._data = data;
|
|
16
|
+
this.id = data.id;
|
|
17
|
+
this.status = data.status;
|
|
18
|
+
this.documentType = data.document_type;
|
|
19
|
+
this.confidence = data.confidence;
|
|
20
|
+
this.processingTimeMs = data.processing_time_ms;
|
|
21
|
+
this.cached = data.cached || false;
|
|
22
|
+
this.fields = (data.data && data.data.fields) || {};
|
|
23
|
+
this.lineItems = (data.data && data.data.line_items) || [];
|
|
24
|
+
this.validationWarnings = data.validation_warnings || [];
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
getField(name) {
|
|
28
|
+
const field = this.fields[name];
|
|
29
|
+
if (field && typeof field === "object") return field.value;
|
|
30
|
+
return field;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
getFieldConfidence(name) {
|
|
34
|
+
const field = this.fields[name];
|
|
35
|
+
if (field && typeof field === "object") return field.confidence || 0;
|
|
36
|
+
return 0;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
toDict() {
|
|
40
|
+
return this._data;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
toFlatDict() {
|
|
44
|
+
const result = { document_type: this.documentType };
|
|
45
|
+
for (const [key, val] of Object.entries(this.fields)) {
|
|
46
|
+
result[key] = typeof val === "object" ? val.value : val;
|
|
47
|
+
}
|
|
48
|
+
return result;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
class aPapyr {
|
|
53
|
+
/**
|
|
54
|
+
* @param {string} apiKey - Your aPapyr API key (sk_live_...)
|
|
55
|
+
* @param {object} [options]
|
|
56
|
+
* @param {string} [options.baseUrl] - API base URL
|
|
57
|
+
*/
|
|
58
|
+
constructor(apiKey, options = {}) {
|
|
59
|
+
this.apiKey = apiKey || process.env.APAPYR_API_KEY;
|
|
60
|
+
if (!this.apiKey) {
|
|
61
|
+
throw new aPapyrError(
|
|
62
|
+
'API key required. Pass it as new aPapyr("sk_live_...") or set APAPYR_API_KEY env var.'
|
|
63
|
+
);
|
|
64
|
+
}
|
|
65
|
+
this.baseUrl = (options.baseUrl || "https://api.apapyr.com").replace(/\/$/, "");
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Extract structured data from a document.
|
|
70
|
+
* @param {string|Buffer|ReadableStream} file - File path or Buffer or stream
|
|
71
|
+
* @param {object} [options]
|
|
72
|
+
* @param {string} [options.documentType='auto'] - Document type
|
|
73
|
+
* @param {string} [options.webhookUrl] - Webhook URL for async results
|
|
74
|
+
* @returns {Promise<Extraction>}
|
|
75
|
+
*/
|
|
76
|
+
async extract(file, options = {}) {
|
|
77
|
+
const { documentType = "auto", webhookUrl } = options;
|
|
78
|
+
|
|
79
|
+
let fileData;
|
|
80
|
+
let fileName = "document";
|
|
81
|
+
|
|
82
|
+
if (typeof file === "string") {
|
|
83
|
+
if (!fs.existsSync(file)) {
|
|
84
|
+
throw new aPapyrError(`File not found: ${file}`);
|
|
85
|
+
}
|
|
86
|
+
fileData = fs.readFileSync(file);
|
|
87
|
+
fileName = path.basename(file);
|
|
88
|
+
} else if (Buffer.isBuffer(file)) {
|
|
89
|
+
fileData = file;
|
|
90
|
+
} else {
|
|
91
|
+
fileData = file;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const boundary = "----aPapyr" + Math.random().toString(36).slice(2);
|
|
95
|
+
const parts = [];
|
|
96
|
+
|
|
97
|
+
parts.push(
|
|
98
|
+
`--${boundary}\r\nContent-Disposition: form-data; name="file"; filename="${fileName}"\r\nContent-Type: application/octet-stream\r\n\r\n`
|
|
99
|
+
);
|
|
100
|
+
parts.push(fileData);
|
|
101
|
+
parts.push("\r\n");
|
|
102
|
+
|
|
103
|
+
parts.push(
|
|
104
|
+
`--${boundary}\r\nContent-Disposition: form-data; name="document_type"\r\n\r\n${documentType}\r\n`
|
|
105
|
+
);
|
|
106
|
+
|
|
107
|
+
if (webhookUrl) {
|
|
108
|
+
parts.push(
|
|
109
|
+
`--${boundary}\r\nContent-Disposition: form-data; name="webhook_url"\r\n\r\n${webhookUrl}\r\n`
|
|
110
|
+
);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
parts.push(`--${boundary}--\r\n`);
|
|
114
|
+
|
|
115
|
+
const body = Buffer.concat(
|
|
116
|
+
parts.map((p) => (typeof p === "string" ? Buffer.from(p) : p))
|
|
117
|
+
);
|
|
118
|
+
|
|
119
|
+
const response = await fetch(`${this.baseUrl}/v1/extract`, {
|
|
120
|
+
method: "POST",
|
|
121
|
+
headers: {
|
|
122
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
123
|
+
"Content-Type": `multipart/form-data; boundary=${boundary}`,
|
|
124
|
+
},
|
|
125
|
+
body,
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
return this._handleResponse(response);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
async getExtraction(extractionId) {
|
|
132
|
+
const response = await fetch(`${this.baseUrl}/v1/extract/${extractionId}`, {
|
|
133
|
+
headers: { Authorization: `Bearer ${this.apiKey}` },
|
|
134
|
+
});
|
|
135
|
+
return this._handleResponse(response);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
async listExtractions(options = {}) {
|
|
139
|
+
const { limit = 20, offset = 0 } = options;
|
|
140
|
+
const response = await fetch(
|
|
141
|
+
`${this.baseUrl}/v1/extractions?limit=${limit}&offset=${offset}`,
|
|
142
|
+
{ headers: { Authorization: `Bearer ${this.apiKey}` } }
|
|
143
|
+
);
|
|
144
|
+
if (!response.ok) await this._raiseError(response);
|
|
145
|
+
const data = await response.json();
|
|
146
|
+
return data.extractions || [];
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
async getUsage() {
|
|
150
|
+
const response = await fetch(`${this.baseUrl}/v1/usage`, {
|
|
151
|
+
headers: { Authorization: `Bearer ${this.apiKey}` },
|
|
152
|
+
});
|
|
153
|
+
if (!response.ok) await this._raiseError(response);
|
|
154
|
+
return response.json();
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
async listSchemas() {
|
|
158
|
+
const response = await fetch(`${this.baseUrl}/v1/schemas`, {
|
|
159
|
+
headers: { Authorization: `Bearer ${this.apiKey}` },
|
|
160
|
+
});
|
|
161
|
+
if (!response.ok) await this._raiseError(response);
|
|
162
|
+
return response.json();
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
async _handleResponse(response) {
|
|
166
|
+
if (!response.ok) await this._raiseError(response);
|
|
167
|
+
const data = await response.json();
|
|
168
|
+
if (data.status === "failed") {
|
|
169
|
+
throw new aPapyrError(data.error || "Extraction failed", response.status, data);
|
|
170
|
+
}
|
|
171
|
+
return new Extraction(data);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
async _raiseError(response) {
|
|
175
|
+
let message;
|
|
176
|
+
try {
|
|
177
|
+
const data = await response.json();
|
|
178
|
+
message = data.detail || JSON.stringify(data);
|
|
179
|
+
} catch {
|
|
180
|
+
message = await response.text();
|
|
181
|
+
}
|
|
182
|
+
throw new aPapyrError(message, response.status);
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
module.exports = { aPapyr, Extraction, aPapyrError };
|