parsefy 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +307 -0
- package/dist/index.cjs +2 -0
- package/dist/index.d.cts +183 -0
- package/dist/index.d.mts +183 -0
- package/dist/index.d.ts +183 -0
- package/dist/index.mjs +2 -0
- package/package.json +52 -7
- package/index.js +0 -1
package/LICENSE
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Parsefy
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|
package/README.md
ADDED
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
# Parsefy
|
|
2
|
+
|
|
3
|
+
Official TypeScript SDK for [Parsefy](https://parsefy.io) – AI-powered document data extraction.
|
|
4
|
+
|
|
5
|
+
Extract structured data from PDFs and DOCX files using Zod schemas with full TypeScript type inference.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npm install parsefy zod
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Quick Start
|
|
14
|
+
|
|
15
|
+
```typescript
|
|
16
|
+
import { Parsefy } from 'parsefy';
|
|
17
|
+
import * as z from 'zod';
|
|
18
|
+
|
|
19
|
+
const client = new Parsefy('pk_your_api_key');
|
|
20
|
+
|
|
21
|
+
const schema = z.object({
|
|
22
|
+
invoice_number: z.string().describe('The invoice number'),
|
|
23
|
+
date: z.string().describe('Invoice date in YYYY-MM-DD format'),
|
|
24
|
+
total: z.number().describe('Total amount'),
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
const { object, error } = await client.extract({
|
|
28
|
+
file: './invoice.pdf',
|
|
29
|
+
schema,
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
if (!error && object) {
|
|
33
|
+
console.log(object.invoice_number); // Fully typed!
|
|
34
|
+
}
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Configuration
|
|
38
|
+
|
|
39
|
+
### API Key
|
|
40
|
+
|
|
41
|
+
```typescript
|
|
42
|
+
// Option 1: Pass API key directly
|
|
43
|
+
const client = new Parsefy('pk_your_api_key');
|
|
44
|
+
|
|
45
|
+
// Option 2: Use environment variable
|
|
46
|
+
// Set PARSEFY_API_KEY in your environment
|
|
47
|
+
const client = new Parsefy();
|
|
48
|
+
|
|
49
|
+
// Option 3: Configuration object
|
|
50
|
+
const client = new Parsefy({
|
|
51
|
+
apiKey: 'pk_your_api_key',
|
|
52
|
+
timeout: 120000, // 2 minutes
|
|
53
|
+
});
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### Configuration Options
|
|
57
|
+
|
|
58
|
+
| Option | Type | Default | Description |
|
|
59
|
+
|--------|------|---------|-------------|
|
|
60
|
+
| `apiKey` | `string` | `process.env.PARSEFY_API_KEY` | Your Parsefy API key |
|
|
61
|
+
| `timeout` | `number` | `60000` | Request timeout in ms |
|
|
62
|
+
|
|
63
|
+
## Usage
|
|
64
|
+
|
|
65
|
+
### Basic Extraction
|
|
66
|
+
|
|
67
|
+
```typescript
|
|
68
|
+
import { Parsefy } from 'parsefy';
|
|
69
|
+
import * as z from 'zod';
|
|
70
|
+
|
|
71
|
+
const client = new Parsefy();
|
|
72
|
+
|
|
73
|
+
const schema = z.object({
|
|
74
|
+
name: z.string(),
|
|
75
|
+
email: z.string().email(),
|
|
76
|
+
phone: z.string().optional(),
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
const { object, metadata, error } = await client.extract({
|
|
80
|
+
file: './contact.pdf',
|
|
81
|
+
schema,
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
if (!error) {
|
|
85
|
+
console.log(object);
|
|
86
|
+
}
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### File Input Options
|
|
90
|
+
|
|
91
|
+
The SDK supports multiple file input types. **Files don't need to be on disk** – you can work entirely in memory, which is ideal for building APIs and serverless functions.
|
|
92
|
+
|
|
93
|
+
| Input Type | Usage | Environment |
|
|
94
|
+
|------------|-------|-------------|
|
|
95
|
+
| `string` | File path | Node.js only |
|
|
96
|
+
| `Buffer` | In-memory bytes | Node.js |
|
|
97
|
+
| `File` | From file input or FormData | Browser, Node.js 20+, Edge |
|
|
98
|
+
| `Blob` | Raw binary with MIME type | Universal |
|
|
99
|
+
| `ArrayBuffer` | Wrap in `Blob` first | Universal |
|
|
100
|
+
|
|
101
|
+
```typescript
|
|
102
|
+
// Node.js: File path (convenience for scripts/CLI)
|
|
103
|
+
const result = await client.extract({
|
|
104
|
+
file: './document.pdf',
|
|
105
|
+
schema,
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
// Node.js: Buffer (in-memory)
|
|
109
|
+
import { readFileSync } from 'fs';
|
|
110
|
+
const result = await client.extract({
|
|
111
|
+
file: readFileSync('./document.pdf'),
|
|
112
|
+
schema,
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
// Browser: File input
|
|
116
|
+
const fileInput = document.querySelector('input[type="file"]');
|
|
117
|
+
const result = await client.extract({
|
|
118
|
+
file: fileInput.files[0],
|
|
119
|
+
schema,
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
// Universal: Blob (with explicit MIME type)
|
|
123
|
+
const result = await client.extract({
|
|
124
|
+
file: new Blob([arrayBuffer], { type: 'application/pdf' }),
|
|
125
|
+
schema,
|
|
126
|
+
});
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### Server-Side / API Usage
|
|
130
|
+
|
|
131
|
+
When building APIs that receive file uploads, files are typically kept in memory. The SDK handles this seamlessly:
|
|
132
|
+
|
|
133
|
+
**Express with Multer:**
|
|
134
|
+
|
|
135
|
+
```typescript
|
|
136
|
+
import express from 'express';
|
|
137
|
+
import multer from 'multer';
|
|
138
|
+
import { Parsefy } from 'parsefy';
|
|
139
|
+
|
|
140
|
+
const upload = multer(); // Store in memory, not disk
|
|
141
|
+
const client = new Parsefy();
|
|
142
|
+
|
|
143
|
+
app.post('/extract', upload.single('document'), async (req, res) => {
|
|
144
|
+
const { object, error } = await client.extract({
|
|
145
|
+
file: req.file.buffer, // Buffer from multer
|
|
146
|
+
schema,
|
|
147
|
+
});
|
|
148
|
+
res.json({ data: object, error });
|
|
149
|
+
});
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
**Fastify:**
|
|
153
|
+
|
|
154
|
+
```typescript
|
|
155
|
+
import Fastify from 'fastify';
|
|
156
|
+
import multipart from '@fastify/multipart';
|
|
157
|
+
import { Parsefy } from 'parsefy';
|
|
158
|
+
|
|
159
|
+
const fastify = Fastify();
|
|
160
|
+
await fastify.register(multipart);
|
|
161
|
+
const client = new Parsefy();
|
|
162
|
+
|
|
163
|
+
fastify.post('/extract', async (request) => {
|
|
164
|
+
const data = await request.file();
|
|
165
|
+
const buffer = await data.toBuffer();
|
|
166
|
+
|
|
167
|
+
const { object, error } = await client.extract({
|
|
168
|
+
file: buffer,
|
|
169
|
+
schema,
|
|
170
|
+
});
|
|
171
|
+
return { data: object, error };
|
|
172
|
+
});
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
**Hono / Cloudflare Workers:**
|
|
176
|
+
|
|
177
|
+
```typescript
|
|
178
|
+
import { Hono } from 'hono';
|
|
179
|
+
import { Parsefy } from 'parsefy';
|
|
180
|
+
|
|
181
|
+
const app = new Hono();
|
|
182
|
+
const client = new Parsefy();
|
|
183
|
+
|
|
184
|
+
app.post('/extract', async (c) => {
|
|
185
|
+
const formData = await c.req.formData();
|
|
186
|
+
const file = formData.get('document'); // File object
|
|
187
|
+
|
|
188
|
+
const { object, error } = await client.extract({
|
|
189
|
+
file, // File from FormData works directly
|
|
190
|
+
schema,
|
|
191
|
+
});
|
|
192
|
+
return c.json({ data: object, error });
|
|
193
|
+
});
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### Complex Schemas
|
|
197
|
+
|
|
198
|
+
Use `.describe()` to guide the AI extraction:
|
|
199
|
+
|
|
200
|
+
```typescript
|
|
201
|
+
const invoiceSchema = z.object({
|
|
202
|
+
invoice_number: z.string().describe('The invoice or receipt number'),
|
|
203
|
+
date: z.string().describe('Invoice date in YYYY-MM-DD format'),
|
|
204
|
+
vendor: z.object({
|
|
205
|
+
name: z.string().describe('Company name of the vendor'),
|
|
206
|
+
address: z.string().describe('Full address of the vendor'),
|
|
207
|
+
}),
|
|
208
|
+
line_items: z.array(z.object({
|
|
209
|
+
description: z.string().describe('Item description'),
|
|
210
|
+
quantity: z.number().describe('Number of units'),
|
|
211
|
+
unit_price: z.number().describe('Price per unit'),
|
|
212
|
+
amount: z.number().describe('Total amount for this line'),
|
|
213
|
+
})).describe('List of items on the invoice'),
|
|
214
|
+
subtotal: z.number().describe('Subtotal before tax'),
|
|
215
|
+
tax: z.number().describe('Tax amount'),
|
|
216
|
+
total: z.number().describe('Total amount due'),
|
|
217
|
+
currency: z.string().describe('3-letter currency code (USD, EUR, etc.)'),
|
|
218
|
+
});
|
|
219
|
+
|
|
220
|
+
const { object } = await client.extract({
|
|
221
|
+
file: './invoice.pdf',
|
|
222
|
+
schema: invoiceSchema,
|
|
223
|
+
});
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
### Error Handling
|
|
227
|
+
|
|
228
|
+
```typescript
|
|
229
|
+
import { Parsefy, APIError, ValidationError, ParsefyError } from 'parsefy';
|
|
230
|
+
|
|
231
|
+
try {
|
|
232
|
+
const { object, error, metadata } = await client.extract({
|
|
233
|
+
file: './document.pdf',
|
|
234
|
+
schema,
|
|
235
|
+
});
|
|
236
|
+
|
|
237
|
+
// Extraction-level errors (request succeeded, but extraction failed)
|
|
238
|
+
if (error) {
|
|
239
|
+
console.error(`Extraction failed: [${error.code}] ${error.message}`);
|
|
240
|
+
console.log(`Tokens used: ${metadata.inputTokens} in, ${metadata.outputTokens} out`);
|
|
241
|
+
return;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
console.log('Success:', object);
|
|
245
|
+
} catch (err) {
|
|
246
|
+
// HTTP/Network errors
|
|
247
|
+
if (err instanceof APIError) {
|
|
248
|
+
console.error(`API Error ${err.statusCode}: ${err.message}`);
|
|
249
|
+
} else if (err instanceof ValidationError) {
|
|
250
|
+
console.error(`Validation Error: ${err.message}`);
|
|
251
|
+
} else if (err instanceof ParsefyError) {
|
|
252
|
+
console.error(`Parsefy Error: ${err.message}`);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
## Response Format
|
|
258
|
+
|
|
259
|
+
```typescript
|
|
260
|
+
interface ExtractResult<T> {
|
|
261
|
+
// Extracted data matching your schema, or null if extraction failed
|
|
262
|
+
object: T | null;
|
|
263
|
+
|
|
264
|
+
// Metadata about the extraction
|
|
265
|
+
metadata: {
|
|
266
|
+
processingTimeMs: number; // Processing time in milliseconds
|
|
267
|
+
inputTokens: number; // Input tokens used
|
|
268
|
+
outputTokens: number; // Output tokens generated
|
|
269
|
+
credits: number; // Credits consumed (1 credit = 1 page)
|
|
270
|
+
fallbackTriggered: boolean; // Whether fallback model was used
|
|
271
|
+
};
|
|
272
|
+
|
|
273
|
+
// Error details if extraction failed
|
|
274
|
+
error: {
|
|
275
|
+
code: string; // EXTRACTION_FAILED, LLM_ERROR, PARSING_ERROR, TIMEOUT_ERROR
|
|
276
|
+
message: string;
|
|
277
|
+
} | null;
|
|
278
|
+
}
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
## Error Types
|
|
282
|
+
|
|
283
|
+
| Error Class | Description |
|
|
284
|
+
|-------------|-------------|
|
|
285
|
+
| `ParsefyError` | Base error class for all Parsefy errors |
|
|
286
|
+
| `APIError` | HTTP errors (4xx/5xx responses) |
|
|
287
|
+
| `ExtractionError` | Extraction failed (returned in response) |
|
|
288
|
+
| `ValidationError` | Client-side validation errors |
|
|
289
|
+
|
|
290
|
+
## Supported File Types
|
|
291
|
+
|
|
292
|
+
- **PDF** (`.pdf`) – up to 10MB
|
|
293
|
+
- **DOCX** (`.docx`) – up to 10MB
|
|
294
|
+
|
|
295
|
+
## Rate Limits
|
|
296
|
+
|
|
297
|
+
The API allows 1 request per second. The SDK automatically retries with exponential backoff on rate limit errors (HTTP 429).
|
|
298
|
+
|
|
299
|
+
## Requirements
|
|
300
|
+
|
|
301
|
+
- Node.js 18+ (for native `fetch` and `FormData`)
|
|
302
|
+
- Zod 3.x (peer dependency)
|
|
303
|
+
|
|
304
|
+
## License
|
|
305
|
+
|
|
306
|
+
MIT © [Parsefy](https://parsefy.io)
|
|
307
|
+
|
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
'use strict';var zodToJsonSchema=require('zod-to-json-schema');var m={".pdf":"application/pdf",".docx":"application/vnd.openxmlformats-officedocument.wordprocessingml.document"},f=10*1024*1024,h="https://api.parsefy.io",g=6e4;var i=class extends Error{constructor(t,r){super(t),this.name="ParsefyError",this.code=r,typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,this.constructor);}},c=class extends i{constructor(t,r,o){super(t),this.name="APIError",this.statusCode=r,this.response=o;}},l=class extends i{constructor(t,r,o){super(t,r),this.name="ExtractionError",this.metadata=o;}},s=class extends i{constructor(t){super(t),this.name="ValidationError";}};function d(){return typeof process<"u"&&process.versions?.node!==void 0}function T(e){return zodToJsonSchema.zodToJsonSchema(e,{$refStrategy:"none",target:"openApi3"})}function b(e){let t=e.toLowerCase().match(/\.[^.]+$/)?.[0];return t&&m[t]||null}function R(e){if(!b(e)){let r=Object.keys(m).join(", ");throw new s(`Unsupported file type. Supported types: ${r}`)}}function u(e){if(e===0)throw new s("File is empty");if(e>f){let t=f/1048576;throw new s(`File size exceeds maximum limit of ${t}MB`)}}function w(e){return {object:e.object,metadata:{processingTimeMs:e.metadata.processing_time_ms,inputTokens:e.metadata.input_tokens,outputTokens:e.metadata.output_tokens,credits:e.metadata.credits,fallbackTriggered:e.metadata.fallback_triggered},error:e.error}}function E(e,t){let r=b(t)||"application/octet-stream",o=e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength);return typeof File<"u"?new File([o],t,{type:r}):new Blob([o],{type:r})}async function _(e){if(!d())throw new s("File path strings are only supported in Node.js. Use File or Blob in the browser.");let t=await import('fs'),r=await import('path');if(!t.existsSync(e))throw new s(`File not found: ${e}`);let o=r.basename(e);R(o);let a=t.readFileSync(e);return u(a.length),{buffer:a,filename:o}}async function P(e){if(typeof e=="string"){let{buffer:t,filename:r}=await _(e);return E(t,r)}if(Buffer.isBuffer(e))return u(e.length),E(e,"document.pdf");if(e instanceof File)return R(e.name),u(e.size),e;if(e instanceof Blob)return u(e.size),e;throw new s("Invalid file input. Expected File, Blob, Buffer, or file path string.")}function F(e){return new Promise(t=>setTimeout(t,e))}function k(e,t=1e3){let r=t*Math.pow(2,e),o=Math.random()*.1*r;return Math.min(r+o,3e4)}var y=class{constructor(t){this.maxRetries=3;let r={};if(typeof t=="string"?r={apiKey:t}:t&&(r=t),this.apiKey=r.apiKey||this.getEnvApiKey(),!this.apiKey)throw new s("API key is required. Provide it in the constructor or set the PARSEFY_API_KEY environment variable.");this.baseUrl=r.baseUrl||h,this.timeout=r.timeout||g;}getEnvApiKey(){return d()&&process.env.PARSEFY_API_KEY||""}async extract(t){let{file:r,schema:o}=t,a=T(o),n=await P(r),p=new FormData;return p.append("file",n),p.append("output_schema",JSON.stringify(a)),this.makeRequestWithRetry(p)}async makeRequestWithRetry(t,r=0){try{return await this.makeRequest(t)}catch(o){if(o instanceof c&&o.statusCode===429&&r<this.maxRetries){let a=k(r);return await F(a),this.makeRequestWithRetry(t,r+1)}throw o}}async makeRequest(t){let r=`${this.baseUrl}/v1/extract`,o=new AbortController,a=setTimeout(()=>o.abort(),this.timeout);try{let n=await fetch(r,{method:"POST",headers:{Authorization:`Bearer ${this.apiKey}`},body:t,signal:o.signal});if(clearTimeout(a),!n.ok){let x=await this.parseErrorResponse(n);throw new c(x.message||`API request failed with status ${n.status}`,n.status,x)}let p=await n.json();return w(p)}catch(n){throw clearTimeout(a),n instanceof Error&&n.name==="AbortError"?new i(`Request timed out after ${this.timeout}ms`,"TIMEOUT"):n instanceof i?n:n instanceof TypeError?new i("Network error: Unable to connect to the Parsefy API","NETWORK_ERROR"):new i(`Unexpected error: ${n instanceof Error?n.message:String(n)}`,"UNKNOWN_ERROR")}}async parseErrorResponse(t){try{return await t.json()}catch{try{return {message:await t.text()||t.statusText}}catch{return {message:t.statusText}}}}};
|
|
2
|
+
exports.APIError=c;exports.ExtractionError=l;exports.Parsefy=y;exports.ParsefyError=i;exports.ValidationError=s;
|
package/dist/index.d.cts
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Configuration options for the Parsefy client.
|
|
5
|
+
*/
|
|
6
|
+
interface ParsefyConfig {
|
|
7
|
+
/** API key for authentication. If not provided, reads from PARSEFY_API_KEY environment variable. */
|
|
8
|
+
apiKey?: string;
|
|
9
|
+
/** Base URL for the API. Defaults to https://api.parsefy.io */
|
|
10
|
+
baseUrl?: string;
|
|
11
|
+
/** Request timeout in milliseconds. Defaults to 60000 (60 seconds). */
|
|
12
|
+
timeout?: number;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Options for the extract method.
|
|
16
|
+
*/
|
|
17
|
+
interface ExtractOptions<T extends z.ZodType> {
|
|
18
|
+
/** The document file to extract data from. Supports File, Blob, Buffer, or file path (Node.js only). */
|
|
19
|
+
file: File | Blob | Buffer | string;
|
|
20
|
+
/** Zod schema defining the structure of data to extract. */
|
|
21
|
+
schema: T;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Metadata about the extraction process.
|
|
25
|
+
*/
|
|
26
|
+
interface ExtractionMetadata {
|
|
27
|
+
/** Time taken to process the document in milliseconds. */
|
|
28
|
+
processingTimeMs: number;
|
|
29
|
+
/** Number of input tokens used. */
|
|
30
|
+
inputTokens: number;
|
|
31
|
+
/** Number of output tokens generated. */
|
|
32
|
+
outputTokens: number;
|
|
33
|
+
/** Number of credits consumed (1 credit = 1 page). */
|
|
34
|
+
credits: number;
|
|
35
|
+
/** Whether the fallback model was triggered for higher accuracy. */
|
|
36
|
+
fallbackTriggered: boolean;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Error response from the API.
|
|
40
|
+
*/
|
|
41
|
+
interface APIErrorResponse {
|
|
42
|
+
/** Error code identifying the type of error. */
|
|
43
|
+
code: string;
|
|
44
|
+
/** Human-readable error message. */
|
|
45
|
+
message: string;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Result of an extraction operation.
|
|
49
|
+
*/
|
|
50
|
+
interface ExtractResult<T> {
|
|
51
|
+
/** Extracted data matching the schema, or null if extraction failed. */
|
|
52
|
+
object: T | null;
|
|
53
|
+
/** Metadata about the extraction process. */
|
|
54
|
+
metadata: ExtractionMetadata;
|
|
55
|
+
/** Error details if extraction failed, or null on success. */
|
|
56
|
+
error: APIErrorResponse | null;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Parsefy client for extracting structured data from documents.
|
|
61
|
+
*
|
|
62
|
+
* @example
|
|
63
|
+
* ```ts
|
|
64
|
+
* import { Parsefy } from 'parsefy';
|
|
65
|
+
* import * as z from 'zod';
|
|
66
|
+
*
|
|
67
|
+
* const client = new Parsefy('pk_your_api_key');
|
|
68
|
+
*
|
|
69
|
+
* const schema = z.object({
|
|
70
|
+
* name: z.string(),
|
|
71
|
+
* total: z.number(),
|
|
72
|
+
* });
|
|
73
|
+
*
|
|
74
|
+
* const { object, error } = await client.extract({
|
|
75
|
+
* file: './invoice.pdf',
|
|
76
|
+
* schema,
|
|
77
|
+
* });
|
|
78
|
+
* ```
|
|
79
|
+
*/
|
|
80
|
+
declare class Parsefy {
|
|
81
|
+
private readonly apiKey;
|
|
82
|
+
private readonly baseUrl;
|
|
83
|
+
private readonly timeout;
|
|
84
|
+
private readonly maxRetries;
|
|
85
|
+
/**
|
|
86
|
+
* Creates a new Parsefy client.
|
|
87
|
+
*
|
|
88
|
+
* @param configOrApiKey - API key string or configuration object.
|
|
89
|
+
* If not provided, reads from PARSEFY_API_KEY environment variable.
|
|
90
|
+
*
|
|
91
|
+
* @example
|
|
92
|
+
* ```ts
|
|
93
|
+
* // Using API key directly
|
|
94
|
+
* const client = new Parsefy('pk_your_api_key');
|
|
95
|
+
*
|
|
96
|
+
* // Using configuration object
|
|
97
|
+
* const client = new Parsefy({
|
|
98
|
+
* apiKey: 'pk_your_api_key',
|
|
99
|
+
* timeout: 120000,
|
|
100
|
+
* });
|
|
101
|
+
*
|
|
102
|
+
* // Using environment variable
|
|
103
|
+
* const client = new Parsefy();
|
|
104
|
+
* ```
|
|
105
|
+
*/
|
|
106
|
+
constructor(configOrApiKey?: string | ParsefyConfig);
|
|
107
|
+
/**
|
|
108
|
+
* Gets the API key from environment variable.
|
|
109
|
+
*/
|
|
110
|
+
private getEnvApiKey;
|
|
111
|
+
/**
|
|
112
|
+
* Extracts structured data from a document using the provided Zod schema.
|
|
113
|
+
*
|
|
114
|
+
* @param options - Extraction options including file and schema.
|
|
115
|
+
* @returns Promise resolving to the extraction result with typed data.
|
|
116
|
+
*
|
|
117
|
+
* @example
|
|
118
|
+
* ```ts
|
|
119
|
+
* const schema = z.object({
|
|
120
|
+
* invoice_number: z.string().describe('The invoice number'),
|
|
121
|
+
* total: z.number().describe('Total amount'),
|
|
122
|
+
* });
|
|
123
|
+
*
|
|
124
|
+
* const { object, metadata, error } = await client.extract({
|
|
125
|
+
* file: './invoice.pdf',
|
|
126
|
+
* schema,
|
|
127
|
+
* });
|
|
128
|
+
*
|
|
129
|
+
* if (!error && object) {
|
|
130
|
+
* console.log(object.invoice_number); // Fully typed!
|
|
131
|
+
* }
|
|
132
|
+
* ```
|
|
133
|
+
*/
|
|
134
|
+
extract<T extends z.ZodType>(options: ExtractOptions<T>): Promise<ExtractResult<z.infer<T>>>;
|
|
135
|
+
/**
|
|
136
|
+
* Makes a request with retry logic for rate limiting.
|
|
137
|
+
*/
|
|
138
|
+
private makeRequestWithRetry;
|
|
139
|
+
/**
|
|
140
|
+
* Makes the actual HTTP request to the API.
|
|
141
|
+
*/
|
|
142
|
+
private makeRequest;
|
|
143
|
+
/**
|
|
144
|
+
* Parses error response body safely.
|
|
145
|
+
*/
|
|
146
|
+
private parseErrorResponse;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Base error class for all Parsefy errors.
|
|
151
|
+
*/
|
|
152
|
+
declare class ParsefyError extends Error {
|
|
153
|
+
/** Error code, if applicable. */
|
|
154
|
+
readonly code?: string;
|
|
155
|
+
constructor(message: string, code?: string);
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Error thrown when the API returns an HTTP error (4xx/5xx).
|
|
159
|
+
*/
|
|
160
|
+
declare class APIError extends ParsefyError {
|
|
161
|
+
/** HTTP status code of the response. */
|
|
162
|
+
readonly statusCode: number;
|
|
163
|
+
/** Raw response body, if available. */
|
|
164
|
+
readonly response?: unknown;
|
|
165
|
+
constructor(message: string, statusCode: number, response?: unknown);
|
|
166
|
+
}
|
|
167
|
+
/**
|
|
168
|
+
* Error thrown when document extraction fails (returned in response.error).
|
|
169
|
+
* This is not an HTTP error - the request succeeded but extraction failed.
|
|
170
|
+
*/
|
|
171
|
+
declare class ExtractionError extends ParsefyError {
|
|
172
|
+
/** Metadata about the extraction attempt. */
|
|
173
|
+
readonly metadata: ExtractionMetadata;
|
|
174
|
+
constructor(message: string, code: string, metadata: ExtractionMetadata);
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* Error thrown for client-side validation failures.
|
|
178
|
+
*/
|
|
179
|
+
declare class ValidationError extends ParsefyError {
|
|
180
|
+
constructor(message: string);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
export { APIError, type APIErrorResponse, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
|
package/dist/index.d.mts
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Configuration options for the Parsefy client.
|
|
5
|
+
*/
|
|
6
|
+
interface ParsefyConfig {
|
|
7
|
+
/** API key for authentication. If not provided, reads from PARSEFY_API_KEY environment variable. */
|
|
8
|
+
apiKey?: string;
|
|
9
|
+
/** Base URL for the API. Defaults to https://api.parsefy.io */
|
|
10
|
+
baseUrl?: string;
|
|
11
|
+
/** Request timeout in milliseconds. Defaults to 60000 (60 seconds). */
|
|
12
|
+
timeout?: number;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Options for the extract method.
|
|
16
|
+
*/
|
|
17
|
+
interface ExtractOptions<T extends z.ZodType> {
|
|
18
|
+
/** The document file to extract data from. Supports File, Blob, Buffer, or file path (Node.js only). */
|
|
19
|
+
file: File | Blob | Buffer | string;
|
|
20
|
+
/** Zod schema defining the structure of data to extract. */
|
|
21
|
+
schema: T;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Metadata about the extraction process.
|
|
25
|
+
*/
|
|
26
|
+
interface ExtractionMetadata {
|
|
27
|
+
/** Time taken to process the document in milliseconds. */
|
|
28
|
+
processingTimeMs: number;
|
|
29
|
+
/** Number of input tokens used. */
|
|
30
|
+
inputTokens: number;
|
|
31
|
+
/** Number of output tokens generated. */
|
|
32
|
+
outputTokens: number;
|
|
33
|
+
/** Number of credits consumed (1 credit = 1 page). */
|
|
34
|
+
credits: number;
|
|
35
|
+
/** Whether the fallback model was triggered for higher accuracy. */
|
|
36
|
+
fallbackTriggered: boolean;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Error response from the API.
|
|
40
|
+
*/
|
|
41
|
+
interface APIErrorResponse {
|
|
42
|
+
/** Error code identifying the type of error. */
|
|
43
|
+
code: string;
|
|
44
|
+
/** Human-readable error message. */
|
|
45
|
+
message: string;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Result of an extraction operation.
|
|
49
|
+
*/
|
|
50
|
+
interface ExtractResult<T> {
|
|
51
|
+
/** Extracted data matching the schema, or null if extraction failed. */
|
|
52
|
+
object: T | null;
|
|
53
|
+
/** Metadata about the extraction process. */
|
|
54
|
+
metadata: ExtractionMetadata;
|
|
55
|
+
/** Error details if extraction failed, or null on success. */
|
|
56
|
+
error: APIErrorResponse | null;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Parsefy client for extracting structured data from documents.
|
|
61
|
+
*
|
|
62
|
+
* @example
|
|
63
|
+
* ```ts
|
|
64
|
+
* import { Parsefy } from 'parsefy';
|
|
65
|
+
* import * as z from 'zod';
|
|
66
|
+
*
|
|
67
|
+
* const client = new Parsefy('pk_your_api_key');
|
|
68
|
+
*
|
|
69
|
+
* const schema = z.object({
|
|
70
|
+
* name: z.string(),
|
|
71
|
+
* total: z.number(),
|
|
72
|
+
* });
|
|
73
|
+
*
|
|
74
|
+
* const { object, error } = await client.extract({
|
|
75
|
+
* file: './invoice.pdf',
|
|
76
|
+
* schema,
|
|
77
|
+
* });
|
|
78
|
+
* ```
|
|
79
|
+
*/
|
|
80
|
+
declare class Parsefy {
|
|
81
|
+
private readonly apiKey;
|
|
82
|
+
private readonly baseUrl;
|
|
83
|
+
private readonly timeout;
|
|
84
|
+
private readonly maxRetries;
|
|
85
|
+
/**
|
|
86
|
+
* Creates a new Parsefy client.
|
|
87
|
+
*
|
|
88
|
+
* @param configOrApiKey - API key string or configuration object.
|
|
89
|
+
* If not provided, reads from PARSEFY_API_KEY environment variable.
|
|
90
|
+
*
|
|
91
|
+
* @example
|
|
92
|
+
* ```ts
|
|
93
|
+
* // Using API key directly
|
|
94
|
+
* const client = new Parsefy('pk_your_api_key');
|
|
95
|
+
*
|
|
96
|
+
* // Using configuration object
|
|
97
|
+
* const client = new Parsefy({
|
|
98
|
+
* apiKey: 'pk_your_api_key',
|
|
99
|
+
* timeout: 120000,
|
|
100
|
+
* });
|
|
101
|
+
*
|
|
102
|
+
* // Using environment variable
|
|
103
|
+
* const client = new Parsefy();
|
|
104
|
+
* ```
|
|
105
|
+
*/
|
|
106
|
+
constructor(configOrApiKey?: string | ParsefyConfig);
|
|
107
|
+
/**
|
|
108
|
+
* Gets the API key from environment variable.
|
|
109
|
+
*/
|
|
110
|
+
private getEnvApiKey;
|
|
111
|
+
/**
|
|
112
|
+
* Extracts structured data from a document using the provided Zod schema.
|
|
113
|
+
*
|
|
114
|
+
* @param options - Extraction options including file and schema.
|
|
115
|
+
* @returns Promise resolving to the extraction result with typed data.
|
|
116
|
+
*
|
|
117
|
+
* @example
|
|
118
|
+
* ```ts
|
|
119
|
+
* const schema = z.object({
|
|
120
|
+
* invoice_number: z.string().describe('The invoice number'),
|
|
121
|
+
* total: z.number().describe('Total amount'),
|
|
122
|
+
* });
|
|
123
|
+
*
|
|
124
|
+
* const { object, metadata, error } = await client.extract({
|
|
125
|
+
* file: './invoice.pdf',
|
|
126
|
+
* schema,
|
|
127
|
+
* });
|
|
128
|
+
*
|
|
129
|
+
* if (!error && object) {
|
|
130
|
+
* console.log(object.invoice_number); // Fully typed!
|
|
131
|
+
* }
|
|
132
|
+
* ```
|
|
133
|
+
*/
|
|
134
|
+
extract<T extends z.ZodType>(options: ExtractOptions<T>): Promise<ExtractResult<z.infer<T>>>;
|
|
135
|
+
/**
|
|
136
|
+
* Makes a request with retry logic for rate limiting.
|
|
137
|
+
*/
|
|
138
|
+
private makeRequestWithRetry;
|
|
139
|
+
/**
|
|
140
|
+
* Makes the actual HTTP request to the API.
|
|
141
|
+
*/
|
|
142
|
+
private makeRequest;
|
|
143
|
+
/**
|
|
144
|
+
* Parses error response body safely.
|
|
145
|
+
*/
|
|
146
|
+
private parseErrorResponse;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Base error class for all Parsefy errors.
|
|
151
|
+
*/
|
|
152
|
+
declare class ParsefyError extends Error {
|
|
153
|
+
/** Error code, if applicable. */
|
|
154
|
+
readonly code?: string;
|
|
155
|
+
constructor(message: string, code?: string);
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Error thrown when the API returns an HTTP error (4xx/5xx).
|
|
159
|
+
*/
|
|
160
|
+
declare class APIError extends ParsefyError {
|
|
161
|
+
/** HTTP status code of the response. */
|
|
162
|
+
readonly statusCode: number;
|
|
163
|
+
/** Raw response body, if available. */
|
|
164
|
+
readonly response?: unknown;
|
|
165
|
+
constructor(message: string, statusCode: number, response?: unknown);
|
|
166
|
+
}
|
|
167
|
+
/**
|
|
168
|
+
* Error thrown when document extraction fails (returned in response.error).
|
|
169
|
+
* This is not an HTTP error - the request succeeded but extraction failed.
|
|
170
|
+
*/
|
|
171
|
+
declare class ExtractionError extends ParsefyError {
|
|
172
|
+
/** Metadata about the extraction attempt. */
|
|
173
|
+
readonly metadata: ExtractionMetadata;
|
|
174
|
+
constructor(message: string, code: string, metadata: ExtractionMetadata);
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* Error thrown for client-side validation failures.
|
|
178
|
+
*/
|
|
179
|
+
declare class ValidationError extends ParsefyError {
|
|
180
|
+
constructor(message: string);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
export { APIError, type APIErrorResponse, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Configuration options for the Parsefy client.
|
|
5
|
+
*/
|
|
6
|
+
interface ParsefyConfig {
|
|
7
|
+
/** API key for authentication. If not provided, reads from PARSEFY_API_KEY environment variable. */
|
|
8
|
+
apiKey?: string;
|
|
9
|
+
/** Base URL for the API. Defaults to https://api.parsefy.io */
|
|
10
|
+
baseUrl?: string;
|
|
11
|
+
/** Request timeout in milliseconds. Defaults to 60000 (60 seconds). */
|
|
12
|
+
timeout?: number;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Options for the extract method.
|
|
16
|
+
*/
|
|
17
|
+
interface ExtractOptions<T extends z.ZodType> {
|
|
18
|
+
/** The document file to extract data from. Supports File, Blob, Buffer, or file path (Node.js only). */
|
|
19
|
+
file: File | Blob | Buffer | string;
|
|
20
|
+
/** Zod schema defining the structure of data to extract. */
|
|
21
|
+
schema: T;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Metadata about the extraction process.
|
|
25
|
+
*/
|
|
26
|
+
interface ExtractionMetadata {
|
|
27
|
+
/** Time taken to process the document in milliseconds. */
|
|
28
|
+
processingTimeMs: number;
|
|
29
|
+
/** Number of input tokens used. */
|
|
30
|
+
inputTokens: number;
|
|
31
|
+
/** Number of output tokens generated. */
|
|
32
|
+
outputTokens: number;
|
|
33
|
+
/** Number of credits consumed (1 credit = 1 page). */
|
|
34
|
+
credits: number;
|
|
35
|
+
/** Whether the fallback model was triggered for higher accuracy. */
|
|
36
|
+
fallbackTriggered: boolean;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Error response from the API.
|
|
40
|
+
*/
|
|
41
|
+
interface APIErrorResponse {
|
|
42
|
+
/** Error code identifying the type of error. */
|
|
43
|
+
code: string;
|
|
44
|
+
/** Human-readable error message. */
|
|
45
|
+
message: string;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Result of an extraction operation.
|
|
49
|
+
*/
|
|
50
|
+
interface ExtractResult<T> {
|
|
51
|
+
/** Extracted data matching the schema, or null if extraction failed. */
|
|
52
|
+
object: T | null;
|
|
53
|
+
/** Metadata about the extraction process. */
|
|
54
|
+
metadata: ExtractionMetadata;
|
|
55
|
+
/** Error details if extraction failed, or null on success. */
|
|
56
|
+
error: APIErrorResponse | null;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Parsefy client for extracting structured data from documents.
|
|
61
|
+
*
|
|
62
|
+
* @example
|
|
63
|
+
* ```ts
|
|
64
|
+
* import { Parsefy } from 'parsefy';
|
|
65
|
+
* import * as z from 'zod';
|
|
66
|
+
*
|
|
67
|
+
* const client = new Parsefy('pk_your_api_key');
|
|
68
|
+
*
|
|
69
|
+
* const schema = z.object({
|
|
70
|
+
* name: z.string(),
|
|
71
|
+
* total: z.number(),
|
|
72
|
+
* });
|
|
73
|
+
*
|
|
74
|
+
* const { object, error } = await client.extract({
|
|
75
|
+
* file: './invoice.pdf',
|
|
76
|
+
* schema,
|
|
77
|
+
* });
|
|
78
|
+
* ```
|
|
79
|
+
*/
|
|
80
|
+
declare class Parsefy {
|
|
81
|
+
private readonly apiKey;
|
|
82
|
+
private readonly baseUrl;
|
|
83
|
+
private readonly timeout;
|
|
84
|
+
private readonly maxRetries;
|
|
85
|
+
/**
|
|
86
|
+
* Creates a new Parsefy client.
|
|
87
|
+
*
|
|
88
|
+
* @param configOrApiKey - API key string or configuration object.
|
|
89
|
+
* If not provided, reads from PARSEFY_API_KEY environment variable.
|
|
90
|
+
*
|
|
91
|
+
* @example
|
|
92
|
+
* ```ts
|
|
93
|
+
* // Using API key directly
|
|
94
|
+
* const client = new Parsefy('pk_your_api_key');
|
|
95
|
+
*
|
|
96
|
+
* // Using configuration object
|
|
97
|
+
* const client = new Parsefy({
|
|
98
|
+
* apiKey: 'pk_your_api_key',
|
|
99
|
+
* timeout: 120000,
|
|
100
|
+
* });
|
|
101
|
+
*
|
|
102
|
+
* // Using environment variable
|
|
103
|
+
* const client = new Parsefy();
|
|
104
|
+
* ```
|
|
105
|
+
*/
|
|
106
|
+
constructor(configOrApiKey?: string | ParsefyConfig);
|
|
107
|
+
/**
|
|
108
|
+
* Gets the API key from environment variable.
|
|
109
|
+
*/
|
|
110
|
+
private getEnvApiKey;
|
|
111
|
+
/**
|
|
112
|
+
* Extracts structured data from a document using the provided Zod schema.
|
|
113
|
+
*
|
|
114
|
+
* @param options - Extraction options including file and schema.
|
|
115
|
+
* @returns Promise resolving to the extraction result with typed data.
|
|
116
|
+
*
|
|
117
|
+
* @example
|
|
118
|
+
* ```ts
|
|
119
|
+
* const schema = z.object({
|
|
120
|
+
* invoice_number: z.string().describe('The invoice number'),
|
|
121
|
+
* total: z.number().describe('Total amount'),
|
|
122
|
+
* });
|
|
123
|
+
*
|
|
124
|
+
* const { object, metadata, error } = await client.extract({
|
|
125
|
+
* file: './invoice.pdf',
|
|
126
|
+
* schema,
|
|
127
|
+
* });
|
|
128
|
+
*
|
|
129
|
+
* if (!error && object) {
|
|
130
|
+
* console.log(object.invoice_number); // Fully typed!
|
|
131
|
+
* }
|
|
132
|
+
* ```
|
|
133
|
+
*/
|
|
134
|
+
extract<T extends z.ZodType>(options: ExtractOptions<T>): Promise<ExtractResult<z.infer<T>>>;
|
|
135
|
+
/**
|
|
136
|
+
* Makes a request with retry logic for rate limiting.
|
|
137
|
+
*/
|
|
138
|
+
private makeRequestWithRetry;
|
|
139
|
+
/**
|
|
140
|
+
* Makes the actual HTTP request to the API.
|
|
141
|
+
*/
|
|
142
|
+
private makeRequest;
|
|
143
|
+
/**
|
|
144
|
+
* Parses error response body safely.
|
|
145
|
+
*/
|
|
146
|
+
private parseErrorResponse;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Base error class for all Parsefy errors.
|
|
151
|
+
*/
|
|
152
|
+
declare class ParsefyError extends Error {
|
|
153
|
+
/** Error code, if applicable. */
|
|
154
|
+
readonly code?: string;
|
|
155
|
+
constructor(message: string, code?: string);
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Error thrown when the API returns an HTTP error (4xx/5xx).
|
|
159
|
+
*/
|
|
160
|
+
declare class APIError extends ParsefyError {
|
|
161
|
+
/** HTTP status code of the response. */
|
|
162
|
+
readonly statusCode: number;
|
|
163
|
+
/** Raw response body, if available. */
|
|
164
|
+
readonly response?: unknown;
|
|
165
|
+
constructor(message: string, statusCode: number, response?: unknown);
|
|
166
|
+
}
|
|
167
|
+
/**
|
|
168
|
+
* Error thrown when document extraction fails (returned in response.error).
|
|
169
|
+
* This is not an HTTP error - the request succeeded but extraction failed.
|
|
170
|
+
*/
|
|
171
|
+
declare class ExtractionError extends ParsefyError {
|
|
172
|
+
/** Metadata about the extraction attempt. */
|
|
173
|
+
readonly metadata: ExtractionMetadata;
|
|
174
|
+
constructor(message: string, code: string, metadata: ExtractionMetadata);
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* Error thrown for client-side validation failures.
|
|
178
|
+
*/
|
|
179
|
+
declare class ValidationError extends ParsefyError {
|
|
180
|
+
constructor(message: string);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
export { APIError, type APIErrorResponse, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
|
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
import {zodToJsonSchema}from'zod-to-json-schema';var m={".pdf":"application/pdf",".docx":"application/vnd.openxmlformats-officedocument.wordprocessingml.document"},f=10*1024*1024,h="https://api.parsefy.io",g=6e4;var i=class extends Error{constructor(t,r){super(t),this.name="ParsefyError",this.code=r,typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,this.constructor);}},c=class extends i{constructor(t,r,o){super(t),this.name="APIError",this.statusCode=r,this.response=o;}},l=class extends i{constructor(t,r,o){super(t,r),this.name="ExtractionError",this.metadata=o;}},s=class extends i{constructor(t){super(t),this.name="ValidationError";}};function d(){return typeof process<"u"&&process.versions?.node!==void 0}function T(e){return zodToJsonSchema(e,{$refStrategy:"none",target:"openApi3"})}function b(e){let t=e.toLowerCase().match(/\.[^.]+$/)?.[0];return t&&m[t]||null}function R(e){if(!b(e)){let r=Object.keys(m).join(", ");throw new s(`Unsupported file type. Supported types: ${r}`)}}function u(e){if(e===0)throw new s("File is empty");if(e>f){let t=f/1048576;throw new s(`File size exceeds maximum limit of ${t}MB`)}}function w(e){return {object:e.object,metadata:{processingTimeMs:e.metadata.processing_time_ms,inputTokens:e.metadata.input_tokens,outputTokens:e.metadata.output_tokens,credits:e.metadata.credits,fallbackTriggered:e.metadata.fallback_triggered},error:e.error}}function E(e,t){let r=b(t)||"application/octet-stream",o=e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength);return typeof File<"u"?new File([o],t,{type:r}):new Blob([o],{type:r})}async function _(e){if(!d())throw new s("File path strings are only supported in Node.js. Use File or Blob in the browser.");let t=await import('fs'),r=await import('path');if(!t.existsSync(e))throw new s(`File not found: ${e}`);let o=r.basename(e);R(o);let a=t.readFileSync(e);return u(a.length),{buffer:a,filename:o}}async function P(e){if(typeof e=="string"){let{buffer:t,filename:r}=await _(e);return E(t,r)}if(Buffer.isBuffer(e))return u(e.length),E(e,"document.pdf");if(e instanceof File)return R(e.name),u(e.size),e;if(e instanceof Blob)return u(e.size),e;throw new s("Invalid file input. Expected File, Blob, Buffer, or file path string.")}function F(e){return new Promise(t=>setTimeout(t,e))}function k(e,t=1e3){let r=t*Math.pow(2,e),o=Math.random()*.1*r;return Math.min(r+o,3e4)}var y=class{constructor(t){this.maxRetries=3;let r={};if(typeof t=="string"?r={apiKey:t}:t&&(r=t),this.apiKey=r.apiKey||this.getEnvApiKey(),!this.apiKey)throw new s("API key is required. Provide it in the constructor or set the PARSEFY_API_KEY environment variable.");this.baseUrl=r.baseUrl||h,this.timeout=r.timeout||g;}getEnvApiKey(){return d()&&process.env.PARSEFY_API_KEY||""}async extract(t){let{file:r,schema:o}=t,a=T(o),n=await P(r),p=new FormData;return p.append("file",n),p.append("output_schema",JSON.stringify(a)),this.makeRequestWithRetry(p)}async makeRequestWithRetry(t,r=0){try{return await this.makeRequest(t)}catch(o){if(o instanceof c&&o.statusCode===429&&r<this.maxRetries){let a=k(r);return await F(a),this.makeRequestWithRetry(t,r+1)}throw o}}async makeRequest(t){let r=`${this.baseUrl}/v1/extract`,o=new AbortController,a=setTimeout(()=>o.abort(),this.timeout);try{let n=await fetch(r,{method:"POST",headers:{Authorization:`Bearer ${this.apiKey}`},body:t,signal:o.signal});if(clearTimeout(a),!n.ok){let x=await this.parseErrorResponse(n);throw new c(x.message||`API request failed with status ${n.status}`,n.status,x)}let p=await n.json();return w(p)}catch(n){throw clearTimeout(a),n instanceof Error&&n.name==="AbortError"?new i(`Request timed out after ${this.timeout}ms`,"TIMEOUT"):n instanceof i?n:n instanceof TypeError?new i("Network error: Unable to connect to the Parsefy API","NETWORK_ERROR"):new i(`Unexpected error: ${n instanceof Error?n.message:String(n)}`,"UNKNOWN_ERROR")}}async parseErrorResponse(t){try{return await t.json()}catch{try{return {message:await t.text()||t.statusText}}catch{return {message:t.statusText}}}}};
|
|
2
|
+
export{c as APIError,l as ExtractionError,y as Parsefy,i as ParsefyError,s as ValidationError};
|
package/package.json
CHANGED
|
@@ -1,12 +1,57 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "parsefy",
|
|
3
|
-
"version": "1.0.
|
|
4
|
-
"description": "
|
|
5
|
-
"
|
|
6
|
-
"
|
|
7
|
-
"
|
|
8
|
-
|
|
3
|
+
"version": "1.0.1",
|
|
4
|
+
"description": "Official TypeScript SDK for Parsefy - AI-powered document data extraction",
|
|
5
|
+
"author": "",
|
|
6
|
+
"license": "MIT",
|
|
7
|
+
"repository": {
|
|
8
|
+
"type": "git",
|
|
9
|
+
"url": "https://github.com/parsefy/parsefy-js.git"
|
|
10
|
+
},
|
|
11
|
+
"homepage": "https://parsefy.io",
|
|
12
|
+
"bugs": {
|
|
13
|
+
"url": "https://github.com/parsefy/parsefy-js/issues"
|
|
14
|
+
},
|
|
15
|
+
"main": "./dist/index.cjs",
|
|
16
|
+
"module": "./dist/index.mjs",
|
|
17
|
+
"types": "./dist/index.d.cts",
|
|
18
|
+
"exports": {
|
|
19
|
+
".": {
|
|
20
|
+
"import": {
|
|
21
|
+
"types": "./dist/index.d.mts",
|
|
22
|
+
"default": "./dist/index.mjs"
|
|
23
|
+
},
|
|
24
|
+
"require": {
|
|
25
|
+
"types": "./dist/index.d.cts",
|
|
26
|
+
"default": "./dist/index.cjs"
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
},
|
|
30
|
+
"files": [
|
|
31
|
+
"dist",
|
|
32
|
+
"README.md",
|
|
33
|
+
"LICENSE"
|
|
34
|
+
],
|
|
9
35
|
"scripts": {
|
|
10
|
-
"
|
|
36
|
+
"build": "tsup && cp dist/index.d.ts dist/index.d.cts",
|
|
37
|
+
"dev": "tsup --watch",
|
|
38
|
+
"typecheck": "tsc --noEmit",
|
|
39
|
+
"prepublishOnly": "npm run build"
|
|
40
|
+
},
|
|
41
|
+
"peerDependencies": {
|
|
42
|
+
"zod": "^3.0.0"
|
|
43
|
+
},
|
|
44
|
+
"dependencies": {
|
|
45
|
+
"zod-to-json-schema": "^3.22.0"
|
|
46
|
+
},
|
|
47
|
+
"devDependencies": {
|
|
48
|
+
"@types/node": "^20.0.0",
|
|
49
|
+
"tsup": "^8.0.0",
|
|
50
|
+
"tsx": "^4.21.0",
|
|
51
|
+
"typescript": "^5.0.0",
|
|
52
|
+
"zod": "^3.22.0"
|
|
53
|
+
},
|
|
54
|
+
"engines": {
|
|
55
|
+
"node": ">=18"
|
|
11
56
|
}
|
|
12
57
|
}
|
package/index.js
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
console.log("Parsefy SDK");
|