apapyr 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,63 @@
1
+ # aPapyr Node.js SDK
2
+
3
+ Extract structured data from invoices, receipts, and documents. Send a PDF or image, get clean JSON back.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ npm install apapyr
9
+ ```
10
+
11
+ ## Quick Start
12
+
13
+ ```javascript
14
+ const { aPapyr } = require("apapyr");
15
+
16
+ const client = new aPapyr("sk_live_your_key");
17
+
18
+ // Extract data from an invoice
19
+ const result = await client.extract("invoice.pdf");
20
+
21
+ console.log(result.getField("vendor_name")); // "Acme Corp"
22
+ console.log(result.getField("total")); // 1250.00
23
+ console.log(result.confidence); // 0.97
24
+
25
+ // Get all fields as a flat object
26
+ console.log(result.toFlatDict());
27
+ // { document_type: "invoice", vendor_name: "Acme Corp", total: 1250.00, ... }
28
+ ```
29
+
30
+ ## AI Agent Support
31
+
32
+ aPapyr works natively with AI agents via MCP:
33
+
34
+ ```bash
35
+ claude mcp add apapyr -- npx @apapyr/mcp-server
36
+ ```
37
+
38
+ Then just ask: *"Extract the data from invoice.pdf"*
39
+
40
+ ## Document Types
41
+
42
+ - `invoice` — vendor, total, tax, due_date, line_items
43
+ - `receipt` — merchant, total, tax, tip, payment_method
44
+ - `w2` — employer, wages, federal_tax, state_tax
45
+ - `bank_statement` — bank, balances, transactions
46
+ - `contract` — parties, dates, value, obligations
47
+ - `auto` — automatically detects type (default)
48
+
49
+ ## Environment Variable
50
+
51
+ ```bash
52
+ export APAPYR_API_KEY=sk_live_your_key
53
+ ```
54
+
55
+ ```javascript
56
+ const client = new aPapyr(); // reads from env
57
+ ```
58
+
59
+ ## Links
60
+
61
+ - [API Docs](https://apapyr.com/docs.html)
62
+ - [Dashboard](https://apapyr.com/dashboard.html)
63
+ - [Free Tool](https://apapyr.com/free-tool.html)
package/package.json ADDED
@@ -0,0 +1,19 @@
1
+ {
2
+ "name": "apapyr",
3
+ "version": "1.0.0",
4
+ "description": "Extract structured data from invoices, receipts, and documents. PDF/image in, JSON out.",
5
+ "main": "src/index.js",
6
+ "types": "src/index.d.ts",
7
+ "keywords": ["invoice", "receipt", "OCR", "document", "extraction", "PDF", "parser", "API", "AI", "MCP", "apapyr"],
8
+ "author": "aPapyr",
9
+ "license": "MIT",
10
+ "homepage": "https://apapyr.com",
11
+ "repository": {
12
+ "type": "git",
13
+ "url": "https://github.com/apapyr/apapyr-node"
14
+ },
15
+ "engines": {
16
+ "node": ">=18"
17
+ },
18
+ "files": ["src/"]
19
+ }
package/src/index.d.ts ADDED
@@ -0,0 +1,36 @@
1
+ export class aPapyrError extends Error {
2
+ statusCode?: number;
3
+ response?: Record<string, any>;
4
+ constructor(message: string, statusCode?: number, response?: Record<string, any>);
5
+ }
6
+
7
+ export class Extraction {
8
+ id: string;
9
+ status: string;
10
+ documentType: string;
11
+ confidence: number;
12
+ processingTimeMs: number;
13
+ cached: boolean;
14
+ fields: Record<string, { value: any; confidence: number }>;
15
+ lineItems: Array<Record<string, { value: any; confidence: number }>>;
16
+ validationWarnings: string[];
17
+
18
+ getField(name: string): any;
19
+ getFieldConfidence(name: string): number;
20
+ toDict(): Record<string, any>;
21
+ toFlatDict(): Record<string, any>;
22
+ }
23
+
24
+ export class aPapyr {
25
+ constructor(apiKey?: string, options?: { baseUrl?: string });
26
+
27
+ extract(
28
+ file: string | Buffer | NodeJS.ReadableStream,
29
+ options?: { documentType?: string; webhookUrl?: string }
30
+ ): Promise<Extraction>;
31
+
32
+ getExtraction(extractionId: string): Promise<Extraction>;
33
+ listExtractions(options?: { limit?: number; offset?: number }): Promise<any[]>;
34
+ getUsage(): Promise<Record<string, any>>;
35
+ listSchemas(): Promise<Record<string, any>>;
36
+ }
package/src/index.js ADDED
@@ -0,0 +1,186 @@
1
+ const fs = require("fs");
2
+ const path = require("path");
3
+
4
+ class aPapyrError extends Error {
5
+ constructor(message, statusCode, response) {
6
+ super(message);
7
+ this.name = "aPapyrError";
8
+ this.statusCode = statusCode;
9
+ this.response = response;
10
+ }
11
+ }
12
+
13
+ class Extraction {
14
+ constructor(data) {
15
+ this._data = data;
16
+ this.id = data.id;
17
+ this.status = data.status;
18
+ this.documentType = data.document_type;
19
+ this.confidence = data.confidence;
20
+ this.processingTimeMs = data.processing_time_ms;
21
+ this.cached = data.cached || false;
22
+ this.fields = (data.data && data.data.fields) || {};
23
+ this.lineItems = (data.data && data.data.line_items) || [];
24
+ this.validationWarnings = data.validation_warnings || [];
25
+ }
26
+
27
+ getField(name) {
28
+ const field = this.fields[name];
29
+ if (field && typeof field === "object") return field.value;
30
+ return field;
31
+ }
32
+
33
+ getFieldConfidence(name) {
34
+ const field = this.fields[name];
35
+ if (field && typeof field === "object") return field.confidence || 0;
36
+ return 0;
37
+ }
38
+
39
+ toDict() {
40
+ return this._data;
41
+ }
42
+
43
+ toFlatDict() {
44
+ const result = { document_type: this.documentType };
45
+ for (const [key, val] of Object.entries(this.fields)) {
46
+ result[key] = typeof val === "object" ? val.value : val;
47
+ }
48
+ return result;
49
+ }
50
+ }
51
+
52
+ class aPapyr {
53
+ /**
54
+ * @param {string} apiKey - Your aPapyr API key (sk_live_...)
55
+ * @param {object} [options]
56
+ * @param {string} [options.baseUrl] - API base URL
57
+ */
58
+ constructor(apiKey, options = {}) {
59
+ this.apiKey = apiKey || process.env.APAPYR_API_KEY;
60
+ if (!this.apiKey) {
61
+ throw new aPapyrError(
62
+ 'API key required. Pass it as new aPapyr("sk_live_...") or set APAPYR_API_KEY env var.'
63
+ );
64
+ }
65
+ this.baseUrl = (options.baseUrl || "https://api.apapyr.com").replace(/\/$/, "");
66
+ }
67
+
68
+ /**
69
+ * Extract structured data from a document.
70
+ * @param {string|Buffer|ReadableStream} file - File path or Buffer or stream
71
+ * @param {object} [options]
72
+ * @param {string} [options.documentType='auto'] - Document type
73
+ * @param {string} [options.webhookUrl] - Webhook URL for async results
74
+ * @returns {Promise<Extraction>}
75
+ */
76
+ async extract(file, options = {}) {
77
+ const { documentType = "auto", webhookUrl } = options;
78
+
79
+ let fileData;
80
+ let fileName = "document";
81
+
82
+ if (typeof file === "string") {
83
+ if (!fs.existsSync(file)) {
84
+ throw new aPapyrError(`File not found: ${file}`);
85
+ }
86
+ fileData = fs.readFileSync(file);
87
+ fileName = path.basename(file);
88
+ } else if (Buffer.isBuffer(file)) {
89
+ fileData = file;
90
+ } else {
91
+ fileData = file;
92
+ }
93
+
94
+ const boundary = "----aPapyr" + Math.random().toString(36).slice(2);
95
+ const parts = [];
96
+
97
+ parts.push(
98
+ `--${boundary}\r\nContent-Disposition: form-data; name="file"; filename="${fileName}"\r\nContent-Type: application/octet-stream\r\n\r\n`
99
+ );
100
+ parts.push(fileData);
101
+ parts.push("\r\n");
102
+
103
+ parts.push(
104
+ `--${boundary}\r\nContent-Disposition: form-data; name="document_type"\r\n\r\n${documentType}\r\n`
105
+ );
106
+
107
+ if (webhookUrl) {
108
+ parts.push(
109
+ `--${boundary}\r\nContent-Disposition: form-data; name="webhook_url"\r\n\r\n${webhookUrl}\r\n`
110
+ );
111
+ }
112
+
113
+ parts.push(`--${boundary}--\r\n`);
114
+
115
+ const body = Buffer.concat(
116
+ parts.map((p) => (typeof p === "string" ? Buffer.from(p) : p))
117
+ );
118
+
119
+ const response = await fetch(`${this.baseUrl}/v1/extract`, {
120
+ method: "POST",
121
+ headers: {
122
+ Authorization: `Bearer ${this.apiKey}`,
123
+ "Content-Type": `multipart/form-data; boundary=${boundary}`,
124
+ },
125
+ body,
126
+ });
127
+
128
+ return this._handleResponse(response);
129
+ }
130
+
131
+ async getExtraction(extractionId) {
132
+ const response = await fetch(`${this.baseUrl}/v1/extract/${extractionId}`, {
133
+ headers: { Authorization: `Bearer ${this.apiKey}` },
134
+ });
135
+ return this._handleResponse(response);
136
+ }
137
+
138
+ async listExtractions(options = {}) {
139
+ const { limit = 20, offset = 0 } = options;
140
+ const response = await fetch(
141
+ `${this.baseUrl}/v1/extractions?limit=${limit}&offset=${offset}`,
142
+ { headers: { Authorization: `Bearer ${this.apiKey}` } }
143
+ );
144
+ if (!response.ok) await this._raiseError(response);
145
+ const data = await response.json();
146
+ return data.extractions || [];
147
+ }
148
+
149
+ async getUsage() {
150
+ const response = await fetch(`${this.baseUrl}/v1/usage`, {
151
+ headers: { Authorization: `Bearer ${this.apiKey}` },
152
+ });
153
+ if (!response.ok) await this._raiseError(response);
154
+ return response.json();
155
+ }
156
+
157
+ async listSchemas() {
158
+ const response = await fetch(`${this.baseUrl}/v1/schemas`, {
159
+ headers: { Authorization: `Bearer ${this.apiKey}` },
160
+ });
161
+ if (!response.ok) await this._raiseError(response);
162
+ return response.json();
163
+ }
164
+
165
+ async _handleResponse(response) {
166
+ if (!response.ok) await this._raiseError(response);
167
+ const data = await response.json();
168
+ if (data.status === "failed") {
169
+ throw new aPapyrError(data.error || "Extraction failed", response.status, data);
170
+ }
171
+ return new Extraction(data);
172
+ }
173
+
174
+ async _raiseError(response) {
175
+ let message;
176
+ try {
177
+ const data = await response.json();
178
+ message = data.detail || JSON.stringify(data);
179
+ } catch {
180
+ message = await response.text();
181
+ }
182
+ throw new aPapyrError(message, response.status);
183
+ }
184
+ }
185
+
186
+ module.exports = { aPapyr, Extraction, aPapyrError };