parsefy 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,8 +1,16 @@
1
- # Parsefy
1
+ <p align="center">
2
+ <img src="/assets/logo.png" alt="Parsefy Logo" width="120" />
3
+ </p>
2
4
 
3
- Official TypeScript SDK for [Parsefy](https://parsefy.io) AI-powered document data extraction.
5
+ <h1 align="center">Parsefy TypeScript / JavaScript SDK</h1>
4
6
 
5
- Extract structured data from PDFs and DOCX files using Zod schemas with full TypeScript type inference.
7
+ <p align="center">
8
+ <strong>Official TypeScript / JavaScript SDK for Parsefy - Financial Document Infrastructure for Developers</strong><br>
9
+
10
+ Turn financial PDFs (invoices, receipts, bills) into structured JSON with validation and risk signals.
11
+ </p>
12
+
13
+ ---
6
14
 
7
15
  ## Installation
8
16
 
@@ -19,21 +27,142 @@ import * as z from 'zod';
19
27
  const client = new Parsefy('pk_your_api_key');
20
28
 
21
29
  const schema = z.object({
30
+ // REQUIRED - triggers fallback if below confidence threshold
22
31
  invoice_number: z.string().describe('The invoice number'),
23
- date: z.string().describe('Invoice date in YYYY-MM-DD format'),
24
- total: z.number().describe('Total amount'),
32
+ total: z.number().describe('Total amount including tax'),
33
+
34
+ // OPTIONAL - won't trigger fallback if missing or low confidence
35
+ vendor: z.string().optional().describe('Vendor name'),
36
+ due_date: z.string().optional().describe('Payment due date'),
25
37
  });
26
38
 
27
- const { object, error } = await client.extract({
39
+ const { object, metadata, error } = await client.extract({
28
40
  file: './invoice.pdf',
29
41
  schema,
30
42
  });
31
43
 
32
44
  if (!error && object) {
33
45
  console.log(object.invoice_number); // Fully typed!
46
+
47
+ // Access field-level confidence and evidence
48
+ console.log(`Overall confidence: ${metadata.confidenceScore}`);
49
+ metadata.fieldConfidence.forEach((fc) => {
50
+ console.log(`${fc.field}: ${fc.score} (${fc.reason}) - "${fc.text}"`);
51
+ });
34
52
  }
35
53
  ```
36
54
 
55
+ ## ⚠️ Required vs Optional Fields (Important for Billing)
56
+
57
+ **All fields are required by default.** This is critical to understand:
58
+
59
+ | User writes (SDK) | SDK converts to (JSON Schema) | API interprets as |
60
+ |-------------------|-------------------------------|-------------------|
61
+ | `name: z.string()` | `required: ["name"]` | **Required** |
62
+ | `name: z.string().optional()` | `required: []` | **Optional** |
63
+
64
+ ### Why This Matters
65
+
66
+ If a **required** field returns `null` or falls below the `confidenceThreshold`, the API triggers the **fallback model** (Tier 2), which is more expensive.
67
+
68
+ **To avoid unexpected high billing:**
69
+
70
+ ```typescript
71
+ const schema = z.object({
72
+ // REQUIRED - Always present on invoices, keep required
73
+ invoice_number: z.string(),
74
+ total: z.number(),
75
+
76
+ // OPTIONAL - May not appear on all documents, mark optional!
77
+ vendor: z.string().optional(), // Not all invoices have vendor name
78
+ tax_id: z.string().optional(), // Rarely present
79
+ notes: z.string().optional(), // Usually empty
80
+ due_date: z.string().optional(), // Sometimes missing
81
+ });
82
+ ```
83
+
84
+ **Rule of thumb:** If a field might be missing in >20% of your documents, mark it `.optional()`.
85
+
86
+ ## Confidence Threshold
87
+
88
+ Control when the fallback model is triggered:
89
+
90
+ ```typescript
91
+ const { object, metadata } = await client.extract({
92
+ file: './invoice.pdf',
93
+ schema,
94
+ confidenceThreshold: 0.85, // default
95
+ });
96
+ ```
97
+
98
+ | Threshold | Behavior |
99
+ |-----------|----------|
100
+ | Lower (e.g., 0.70) | **Faster** – Accepts Tier 1 results more often |
101
+ | Higher (e.g., 0.95) | **More accurate** – Triggers Tier 2 fallback more often |
102
+
103
+ **Default:** `0.85`
104
+
105
+ ## Response Format
106
+
107
+ ```typescript
108
+ interface ExtractResult<T> {
109
+ // Extracted data matching your schema, or null if extraction failed
110
+ object: T | null;
111
+
112
+ // Metadata about the extraction
113
+ metadata: {
114
+ processingTimeMs: number; // Processing time in milliseconds
115
+ inputTokens: number; // Input tokens used
116
+ outputTokens: number; // Output tokens generated
117
+ credits: number; // Credits consumed (1 credit = 1 page)
118
+ fallbackTriggered: boolean; // Whether fallback model was used
119
+
120
+ // 🆕 Field-level confidence and evidence
121
+ confidenceScore: number; // Overall confidence (0.0 to 1.0)
122
+ fieldConfidence: Array<{
123
+ field: string; // JSON path (e.g., "$.invoice_number")
124
+ score: number; // Confidence score (0.0 to 1.0)
125
+ reason: string; // "Exact match", "Inferred from header", etc.
126
+ page: number; // Page number where found
127
+ text: string; // Source text evidence
128
+ }>;
129
+ issues: string[]; // Warnings or anomalies detected
130
+ };
131
+
132
+ // Error details if extraction failed
133
+ error: {
134
+ code: string;
135
+ message: string;
136
+ } | null;
137
+ }
138
+ ```
139
+
140
+ ### Example Response
141
+
142
+ ```typescript
143
+ const { object, metadata } = await client.extract({ file, schema });
144
+
145
+ // object:
146
+ {
147
+ invoice_number: "INV-2024-0042",
148
+ date: "2024-01-15",
149
+ total: 1250.00,
150
+ vendor: "Acme Corp"
151
+ }
152
+
153
+ // metadata.confidenceScore: 0.94
154
+
155
+ // metadata.fieldConfidence:
156
+ [
157
+ { field: "$.invoice_number", score: 0.98, reason: "Exact match", page: 1, text: "Invoice # INV-2024-0042" },
158
+ { field: "$.date", score: 0.95, reason: "Exact match", page: 1, text: "Date: 01/15/2024" },
159
+ { field: "$.total", score: 0.92, reason: "Formatting ambiguous", page: 1, text: "Total: $1,250.00" },
160
+ { field: "$.vendor", score: 0.90, reason: "Inferred from header", page: 1, text: "Acme Corp" }
161
+ ]
162
+
163
+ // metadata.issues: []
164
+ ```
165
+
37
166
  ## Configuration
38
167
 
39
168
  ### API Key
@@ -60,35 +189,19 @@ const client = new Parsefy({
60
189
  | `apiKey` | `string` | `process.env.PARSEFY_API_KEY` | Your Parsefy API key |
61
190
  | `timeout` | `number` | `60000` | Request timeout in ms |
62
191
 
63
- ## Usage
64
-
65
- ### Basic Extraction
66
-
67
- ```typescript
68
- import { Parsefy } from 'parsefy';
69
- import * as z from 'zod';
70
-
71
- const client = new Parsefy();
72
-
73
- const schema = z.object({
74
- name: z.string(),
75
- email: z.string().email(),
76
- phone: z.string().optional(),
77
- });
192
+ ### Extract Options
78
193
 
79
- const { object, metadata, error } = await client.extract({
80
- file: './contact.pdf',
81
- schema,
82
- });
194
+ | Option | Type | Default | Description |
195
+ |--------|------|---------|-------------|
196
+ | `file` | `File \| Blob \| Buffer \| string` | required | Document to extract from |
197
+ | `schema` | `z.ZodType` | required | Zod schema defining extraction structure |
198
+ | `confidenceThreshold` | `number` | `0.85` | Minimum confidence before triggering fallback |
83
199
 
84
- if (!error) {
85
- console.log(object);
86
- }
87
- ```
200
+ ## Usage
88
201
 
89
202
  ### File Input Options
90
203
 
91
- The SDK supports multiple file input types. **Files don't need to be on disk** – you can work entirely in memory, which is ideal for building APIs and serverless functions.
204
+ The SDK supports multiple file input types. **Files don't need to be on disk** – you can work entirely in memory.
92
205
 
93
206
  | Input Type | Usage | Environment |
94
207
  |------------|-------|-------------|
@@ -96,19 +209,18 @@ The SDK supports multiple file input types. **Files don't need to be on disk**
96
209
  | `Buffer` | In-memory bytes | Node.js |
97
210
  | `File` | From file input or FormData | Browser, Node.js 20+, Edge |
98
211
  | `Blob` | Raw binary with MIME type | Universal |
99
- | `ArrayBuffer` | Wrap in `Blob` first | Universal |
100
212
 
101
213
  ```typescript
102
- // Node.js: File path (convenience for scripts/CLI)
214
+ // Node.js: File path
103
215
  const result = await client.extract({
104
- file: './document.pdf',
216
+ file: './invoice.pdf',
105
217
  schema,
106
218
  });
107
219
 
108
220
  // Node.js: Buffer (in-memory)
109
221
  import { readFileSync } from 'fs';
110
222
  const result = await client.extract({
111
- file: readFileSync('./document.pdf'),
223
+ file: readFileSync('./invoice.pdf'),
112
224
  schema,
113
225
  });
114
226
 
@@ -118,18 +230,43 @@ const result = await client.extract({
118
230
  file: fileInput.files[0],
119
231
  schema,
120
232
  });
233
+ ```
121
234
 
122
- // Universal: Blob (with explicit MIME type)
123
- const result = await client.extract({
124
- file: new Blob([arrayBuffer], { type: 'application/pdf' }),
125
- schema,
235
+ ### Complex Schemas for Financial Documents
236
+
237
+ Use `.describe()` to guide the AI extraction:
238
+
239
+ ```typescript
240
+ const invoiceSchema = z.object({
241
+ // REQUIRED - Core financial data
242
+ invoice_number: z.string().describe('The invoice or receipt number'),
243
+ date: z.string().describe('Invoice date in YYYY-MM-DD format'),
244
+ total: z.number().describe('Total amount due including tax'),
245
+ currency: z.string().describe('3-letter currency code (USD, EUR, etc.)'),
246
+
247
+ // REQUIRED - Line items (usually present)
248
+ line_items: z.array(z.object({
249
+ description: z.string().describe('Item description'),
250
+ quantity: z.number().describe('Number of units'),
251
+ unit_price: z.number().describe('Price per unit'),
252
+ amount: z.number().describe('Total amount for this line'),
253
+ })).describe('List of items on the invoice'),
254
+
255
+ // OPTIONAL - May not appear on all invoices
256
+ vendor: z.object({
257
+ name: z.string().describe('Company name of the vendor'),
258
+ address: z.string().optional().describe('Full address'),
259
+ tax_id: z.string().optional().describe('Tax ID or VAT number'),
260
+ }).optional(),
261
+ subtotal: z.number().optional().describe('Subtotal before tax'),
262
+ tax: z.number().optional().describe('Tax amount'),
263
+ due_date: z.string().optional().describe('Payment due date'),
264
+ payment_terms: z.string().optional().describe('e.g., Net 30'),
126
265
  });
127
266
  ```
128
267
 
129
268
  ### Server-Side / API Usage
130
269
 
131
- When building APIs that receive file uploads, files are typically kept in memory. The SDK handles this seamlessly:
132
-
133
270
  **Express with Multer:**
134
271
 
135
272
  ```typescript
@@ -137,38 +274,22 @@ import express from 'express';
137
274
  import multer from 'multer';
138
275
  import { Parsefy } from 'parsefy';
139
276
 
140
- const upload = multer(); // Store in memory, not disk
277
+ const upload = multer(); // Store in memory
141
278
  const client = new Parsefy();
142
279
 
143
280
  app.post('/extract', upload.single('document'), async (req, res) => {
144
- const { object, error } = await client.extract({
145
- file: req.file.buffer, // Buffer from multer
281
+ const { object, metadata, error } = await client.extract({
282
+ file: req.file.buffer,
146
283
  schema,
284
+ confidenceThreshold: 0.80, // Adjust based on your needs
147
285
  });
148
- res.json({ data: object, error });
149
- });
150
- ```
151
-
152
- **Fastify:**
153
-
154
- ```typescript
155
- import Fastify from 'fastify';
156
- import multipart from '@fastify/multipart';
157
- import { Parsefy } from 'parsefy';
158
286
 
159
- const fastify = Fastify();
160
- await fastify.register(multipart);
161
- const client = new Parsefy();
162
-
163
- fastify.post('/extract', async (request) => {
164
- const data = await request.file();
165
- const buffer = await data.toBuffer();
166
-
167
- const { object, error } = await client.extract({
168
- file: buffer,
169
- schema,
287
+ res.json({
288
+ data: object,
289
+ confidence: metadata.confidenceScore,
290
+ fieldDetails: metadata.fieldConfidence,
291
+ error,
170
292
  });
171
- return { data: object, error };
172
293
  });
173
294
  ```
174
295
 
@@ -183,43 +304,19 @@ const client = new Parsefy();
183
304
 
184
305
  app.post('/extract', async (c) => {
185
306
  const formData = await c.req.formData();
186
- const file = formData.get('document'); // File object
307
+ const file = formData.get('document');
187
308
 
188
- const { object, error } = await client.extract({
189
- file, // File from FormData works directly
309
+ const { object, metadata, error } = await client.extract({
310
+ file,
190
311
  schema,
191
312
  });
192
- return c.json({ data: object, error });
193
- });
194
- ```
195
-
196
- ### Complex Schemas
197
-
198
- Use `.describe()` to guide the AI extraction:
199
-
200
- ```typescript
201
- const invoiceSchema = z.object({
202
- invoice_number: z.string().describe('The invoice or receipt number'),
203
- date: z.string().describe('Invoice date in YYYY-MM-DD format'),
204
- vendor: z.object({
205
- name: z.string().describe('Company name of the vendor'),
206
- address: z.string().describe('Full address of the vendor'),
207
- }),
208
- line_items: z.array(z.object({
209
- description: z.string().describe('Item description'),
210
- quantity: z.number().describe('Number of units'),
211
- unit_price: z.number().describe('Price per unit'),
212
- amount: z.number().describe('Total amount for this line'),
213
- })).describe('List of items on the invoice'),
214
- subtotal: z.number().describe('Subtotal before tax'),
215
- tax: z.number().describe('Tax amount'),
216
- total: z.number().describe('Total amount due'),
217
- currency: z.string().describe('3-letter currency code (USD, EUR, etc.)'),
218
- });
219
313
 
220
- const { object } = await client.extract({
221
- file: './invoice.pdf',
222
- schema: invoiceSchema,
314
+ return c.json({
315
+ data: object,
316
+ confidence: metadata.confidenceScore,
317
+ issues: metadata.issues,
318
+ error,
319
+ });
223
320
  });
224
321
  ```
225
322
 
@@ -230,17 +327,24 @@ import { Parsefy, APIError, ValidationError, ParsefyError } from 'parsefy';
230
327
 
231
328
  try {
232
329
  const { object, error, metadata } = await client.extract({
233
- file: './document.pdf',
330
+ file: './invoice.pdf',
234
331
  schema,
235
332
  });
236
333
 
237
334
  // Extraction-level errors (request succeeded, but extraction failed)
238
335
  if (error) {
239
336
  console.error(`Extraction failed: [${error.code}] ${error.message}`);
240
- console.log(`Tokens used: ${metadata.inputTokens} in, ${metadata.outputTokens} out`);
337
+ console.log(`Fallback triggered: ${metadata.fallbackTriggered}`);
338
+ console.log(`Issues: ${metadata.issues.join(', ')}`);
241
339
  return;
242
340
  }
243
341
 
342
+ // Check for low confidence fields
343
+ const lowConfidence = metadata.fieldConfidence.filter((fc) => fc.score < 0.80);
344
+ if (lowConfidence.length > 0) {
345
+ console.warn('Low confidence fields:', lowConfidence);
346
+ }
347
+
244
348
  console.log('Success:', object);
245
349
  } catch (err) {
246
350
  // HTTP/Network errors
@@ -254,30 +358,6 @@ try {
254
358
  }
255
359
  ```
256
360
 
257
- ## Response Format
258
-
259
- ```typescript
260
- interface ExtractResult<T> {
261
- // Extracted data matching your schema, or null if extraction failed
262
- object: T | null;
263
-
264
- // Metadata about the extraction
265
- metadata: {
266
- processingTimeMs: number; // Processing time in milliseconds
267
- inputTokens: number; // Input tokens used
268
- outputTokens: number; // Output tokens generated
269
- credits: number; // Credits consumed (1 credit = 1 page)
270
- fallbackTriggered: boolean; // Whether fallback model was used
271
- };
272
-
273
- // Error details if extraction failed
274
- error: {
275
- code: string; // EXTRACTION_FAILED, LLM_ERROR, PARSING_ERROR, TIMEOUT_ERROR
276
- message: string;
277
- } | null;
278
- }
279
- ```
280
-
281
361
  ## Error Types
282
362
 
283
363
  | Error Class | Description |
@@ -301,7 +381,23 @@ The API allows 1 request per second. The SDK automatically retries with exponent
301
381
  - Node.js 18+ (for native `fetch` and `FormData`)
302
382
  - Zod 3.x (peer dependency)
303
383
 
384
+ ## TypeScript Types
385
+
386
+ All types are exported for your convenience:
387
+
388
+ ```typescript
389
+ import type {
390
+ ParsefyConfig,
391
+ ExtractOptions,
392
+ ExtractResult,
393
+ ExtractionMetadata,
394
+ FieldConfidence,
395
+ APIErrorResponse,
396
+ } from 'parsefy';
397
+
398
+ import { DEFAULT_CONFIDENCE_THRESHOLD } from 'parsefy'; // 0.85
399
+ ```
400
+
304
401
  ## License
305
402
 
306
403
  MIT © [Parsefy](https://parsefy.io)
307
-
package/dist/index.cjs CHANGED
@@ -1,2 +1,2 @@
1
- 'use strict';var zodToJsonSchema=require('zod-to-json-schema');var m={".pdf":"application/pdf",".docx":"application/vnd.openxmlformats-officedocument.wordprocessingml.document"},f=10*1024*1024,h="https://api.parsefy.io",g=6e4;var i=class extends Error{constructor(t,r){super(t),this.name="ParsefyError",this.code=r,typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,this.constructor);}},c=class extends i{constructor(t,r,o){super(t),this.name="APIError",this.statusCode=r,this.response=o;}},l=class extends i{constructor(t,r,o){super(t,r),this.name="ExtractionError",this.metadata=o;}},s=class extends i{constructor(t){super(t),this.name="ValidationError";}};function d(){return typeof process<"u"&&process.versions?.node!==void 0}function T(e){return zodToJsonSchema.zodToJsonSchema(e,{$refStrategy:"none",target:"openApi3"})}function b(e){let t=e.toLowerCase().match(/\.[^.]+$/)?.[0];return t&&m[t]||null}function R(e){if(!b(e)){let r=Object.keys(m).join(", ");throw new s(`Unsupported file type. Supported types: ${r}`)}}function u(e){if(e===0)throw new s("File is empty");if(e>f){let t=f/1048576;throw new s(`File size exceeds maximum limit of ${t}MB`)}}function w(e){return {object:e.object,metadata:{processingTimeMs:e.metadata.processing_time_ms,inputTokens:e.metadata.input_tokens,outputTokens:e.metadata.output_tokens,credits:e.metadata.credits,fallbackTriggered:e.metadata.fallback_triggered},error:e.error}}function E(e,t){let r=b(t)||"application/octet-stream",o=e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength);return typeof File<"u"?new File([o],t,{type:r}):new Blob([o],{type:r})}async function _(e){if(!d())throw new s("File path strings are only supported in Node.js. Use File or Blob in the browser.");let t=await import('fs'),r=await import('path');if(!t.existsSync(e))throw new s(`File not found: ${e}`);let o=r.basename(e);R(o);let a=t.readFileSync(e);return u(a.length),{buffer:a,filename:o}}async function P(e){if(typeof e=="string"){let{buffer:t,filename:r}=await _(e);return E(t,r)}if(Buffer.isBuffer(e))return u(e.length),E(e,"document.pdf");if(e instanceof File)return R(e.name),u(e.size),e;if(e instanceof Blob)return u(e.size),e;throw new s("Invalid file input. Expected File, Blob, Buffer, or file path string.")}function F(e){return new Promise(t=>setTimeout(t,e))}function k(e,t=1e3){let r=t*Math.pow(2,e),o=Math.random()*.1*r;return Math.min(r+o,3e4)}var y=class{constructor(t){this.maxRetries=3;let r={};if(typeof t=="string"?r={apiKey:t}:t&&(r=t),this.apiKey=r.apiKey||this.getEnvApiKey(),!this.apiKey)throw new s("API key is required. Provide it in the constructor or set the PARSEFY_API_KEY environment variable.");this.baseUrl=r.baseUrl||h,this.timeout=r.timeout||g;}getEnvApiKey(){return d()&&process.env.PARSEFY_API_KEY||""}async extract(t){let{file:r,schema:o}=t,a=T(o),n=await P(r),p=new FormData;return p.append("file",n),p.append("output_schema",JSON.stringify(a)),this.makeRequestWithRetry(p)}async makeRequestWithRetry(t,r=0){try{return await this.makeRequest(t)}catch(o){if(o instanceof c&&o.statusCode===429&&r<this.maxRetries){let a=k(r);return await F(a),this.makeRequestWithRetry(t,r+1)}throw o}}async makeRequest(t){let r=`${this.baseUrl}/v1/extract`,o=new AbortController,a=setTimeout(()=>o.abort(),this.timeout);try{let n=await fetch(r,{method:"POST",headers:{Authorization:`Bearer ${this.apiKey}`},body:t,signal:o.signal});if(clearTimeout(a),!n.ok){let x=await this.parseErrorResponse(n);throw new c(x.message||`API request failed with status ${n.status}`,n.status,x)}let p=await n.json();return w(p)}catch(n){throw clearTimeout(a),n instanceof Error&&n.name==="AbortError"?new i(`Request timed out after ${this.timeout}ms`,"TIMEOUT"):n instanceof i?n:n instanceof TypeError?new i("Network error: Unable to connect to the Parsefy API","NETWORK_ERROR"):new i(`Unexpected error: ${n instanceof Error?n.message:String(n)}`,"UNKNOWN_ERROR")}}async parseErrorResponse(t){try{return await t.json()}catch{try{return {message:await t.text()||t.statusText}}catch{return {message:t.statusText}}}}};
2
- exports.APIError=c;exports.ExtractionError=l;exports.Parsefy=y;exports.ParsefyError=i;exports.ValidationError=s;
1
+ 'use strict';var zodToJsonSchema=require('zod-to-json-schema');var u=.85,d={".pdf":"application/pdf",".docx":"application/vnd.openxmlformats-officedocument.wordprocessingml.document"},l=10*1024*1024,h="https://api.parsefy.io",E=6e4;var i=class extends Error{constructor(e,r){super(e),this.name="ParsefyError",this.code=r,typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,this.constructor);}},p=class extends i{constructor(e,r,o){super(e),this.name="APIError",this.statusCode=r,this.response=o;}},y=class extends i{constructor(e,r,o){super(e,r),this.name="ExtractionError",this.metadata=o;}},s=class extends i{constructor(e){super(e),this.name="ValidationError";}};function x(){return typeof process<"u"&&process.versions?.node!==void 0}function b(t){let e=zodToJsonSchema.zodToJsonSchema(t,{$refStrategy:"none",target:"jsonSchema7"});return "$schema"in e&&delete e.$schema,e}function R(t){let e=t.toLowerCase().match(/\.[^.]+$/)?.[0];return e&&d[e]||null}function w(t){if(!R(t)){let r=Object.keys(d).join(", ");throw new s(`Unsupported file type. Supported types: ${r}`)}}function f(t){if(t===0)throw new s("File is empty");if(t>l){let e=l/1048576;throw new s(`File size exceeds maximum limit of ${e}MB`)}}function F(t){return {object:t.object,metadata:{processingTimeMs:t.metadata.processing_time_ms,inputTokens:t.metadata.input_tokens,outputTokens:t.metadata.output_tokens,credits:t.metadata.credits,fallbackTriggered:t.metadata.fallback_triggered,confidenceScore:t._meta.confidence_score,fieldConfidence:t._meta.field_confidence.map(e=>({field:e.field,score:e.score,reason:e.reason,page:e.page,text:e.text})),issues:t._meta.issues},error:t.error}}function T(t,e){let r=R(e)||"application/octet-stream",o=t.buffer.slice(t.byteOffset,t.byteOffset+t.byteLength);return typeof File<"u"?new File([o],e,{type:r}):new Blob([o],{type:r})}async function k(t){if(!x())throw new s("File path strings are only supported in Node.js. Use File or Blob in the browser.");let e=await import('fs'),r=await import('path');if(!e.existsSync(t))throw new s(`File not found: ${t}`);let o=r.basename(t);w(o);let a=e.readFileSync(t);return f(a.length),{buffer:a,filename:o}}async function _(t){if(typeof t=="string"){let{buffer:e,filename:r}=await k(t);return T(e,r)}if(Buffer.isBuffer(t))return f(t.length),T(t,"document.pdf");if(t instanceof File)return w(t.name),f(t.size),t;if(t instanceof Blob)return f(t.size),t;throw new s("Invalid file input. Expected File, Blob, Buffer, or file path string.")}function P(t){return new Promise(e=>setTimeout(e,t))}function S(t,e=1e3){let r=e*Math.pow(2,t),o=Math.random()*.1*r;return Math.min(r+o,3e4)}var g=class{constructor(e){this.maxRetries=3;let r={};if(typeof e=="string"?r={apiKey:e}:e&&(r=e),this.apiKey=r.apiKey||this.getEnvApiKey(),!this.apiKey)throw new s("API key is required. Provide it in the constructor or set the PARSEFY_API_KEY environment variable.");this.baseUrl=r.baseUrl||h,this.timeout=r.timeout||E;}getEnvApiKey(){return x()&&process.env.PARSEFY_API_KEY||""}async extract(e){let{file:r,schema:o,confidenceThreshold:a}=e,n=b(o),m=await _(r),c=new FormData;return c.append("file",m),c.append("output_schema",JSON.stringify(n)),c.append("confidence_threshold",String(a??.85)),this.makeRequestWithRetry(c)}async makeRequestWithRetry(e,r=0){try{return await this.makeRequest(e)}catch(o){if(o instanceof p&&o.statusCode===429&&r<this.maxRetries){let a=S(r);return await P(a),this.makeRequestWithRetry(e,r+1)}throw o}}async makeRequest(e){let r=`${this.baseUrl}/v1/extract`,o=new AbortController,a=setTimeout(()=>o.abort(),this.timeout);try{let n=await fetch(r,{method:"POST",headers:{Authorization:`Bearer ${this.apiKey}`},body:e,signal:o.signal});if(clearTimeout(a),!n.ok){let c=await this.parseErrorResponse(n);throw new p(c.message||`API request failed with status ${n.status}`,n.status,c)}let m=await n.json();return F(m)}catch(n){throw clearTimeout(a),n instanceof Error&&n.name==="AbortError"?new i(`Request timed out after ${this.timeout}ms`,"TIMEOUT"):n instanceof i?n:n instanceof TypeError?new i("Network error: Unable to connect to the Parsefy API","NETWORK_ERROR"):new i(`Unexpected error: ${n instanceof Error?n.message:String(n)}`,"UNKNOWN_ERROR")}}async parseErrorResponse(e){try{return await e.json()}catch{try{return {message:await e.text()||e.statusText}}catch{return {message:e.statusText}}}}};
2
+ exports.APIError=p;exports.DEFAULT_CONFIDENCE_THRESHOLD=u;exports.ExtractionError=y;exports.Parsefy=g;exports.ParsefyError=i;exports.ValidationError=s;
package/dist/index.d.cts CHANGED
@@ -11,6 +11,11 @@ interface ParsefyConfig {
11
11
  /** Request timeout in milliseconds. Defaults to 60000 (60 seconds). */
12
12
  timeout?: number;
13
13
  }
14
+ /**
15
+ * Default confidence threshold for extraction.
16
+ * Fields below this threshold on required fields will trigger the fallback model.
17
+ */
18
+ declare const DEFAULT_CONFIDENCE_THRESHOLD = 0.85;
14
19
  /**
15
20
  * Options for the extract method.
16
21
  */
@@ -19,6 +24,35 @@ interface ExtractOptions<T extends z.ZodType> {
19
24
  file: File | Blob | Buffer | string;
20
25
  /** Zod schema defining the structure of data to extract. */
21
26
  schema: T;
27
+ /**
28
+ * Confidence threshold for extraction (0.0 to 1.0). Defaults to 0.85.
29
+ *
30
+ * If a **required** field's confidence falls below this threshold (or returns null),
31
+ * the fallback model is triggered for higher accuracy.
32
+ *
33
+ * **Tip**: Lower threshold = faster (accepts Tier 1 more often).
34
+ * Higher threshold = more accurate (triggers Tier 2 fallback more often).
35
+ *
36
+ * **Important**: Mark fields as `.optional()` in your Zod schema if they might not
37
+ * appear in all documents. This prevents unnecessary fallback triggers and reduces costs.
38
+ */
39
+ confidenceThreshold?: number;
40
+ }
41
+ /**
42
+ * Confidence details for a single extracted field.
43
+ * Provides evidence and explanation for each extraction.
44
+ */
45
+ interface FieldConfidence {
46
+ /** JSON path to the field (e.g., "$.invoice_number"). */
47
+ field: string;
48
+ /** Confidence score for this field (0.0 to 1.0). */
49
+ score: number;
50
+ /** Explanation of how the value was extracted (e.g., "Exact match", "Inferred from header"). */
51
+ reason: string;
52
+ /** Page number where the field was found. */
53
+ page: number;
54
+ /** Source text evidence from the document. */
55
+ text: string;
22
56
  }
23
57
  /**
24
58
  * Metadata about the extraction process.
@@ -34,6 +68,12 @@ interface ExtractionMetadata {
34
68
  credits: number;
35
69
  /** Whether the fallback model was triggered for higher accuracy. */
36
70
  fallbackTriggered: boolean;
71
+ /** Overall confidence score for the extraction (0.0 to 1.0). */
72
+ confidenceScore: number;
73
+ /** Per-field confidence details with evidence and explanations. */
74
+ fieldConfidence: FieldConfidence[];
75
+ /** List of issues or warnings encountered during extraction. */
76
+ issues: string[];
37
77
  }
38
78
  /**
39
79
  * Error response from the API.
@@ -57,7 +97,10 @@ interface ExtractResult<T> {
57
97
  }
58
98
 
59
99
  /**
60
- * Parsefy client for extracting structured data from documents.
100
+ * Parsefy client for extracting structured data from financial documents.
101
+ *
102
+ * **Important**: All fields are **required by default**. Use `.optional()` for fields
103
+ * that may not appear in all documents to avoid triggering expensive fallback models.
61
104
  *
62
105
  * @example
63
106
  * ```ts
@@ -67,13 +110,24 @@ interface ExtractResult<T> {
67
110
  * const client = new Parsefy('pk_your_api_key');
68
111
  *
69
112
  * const schema = z.object({
70
- * name: z.string(),
113
+ * // REQUIRED - fallback triggered if below confidence threshold
114
+ * invoice_number: z.string(),
71
115
  * total: z.number(),
116
+ *
117
+ * // OPTIONAL - won't trigger fallback if missing
118
+ * vendor: z.string().optional(),
119
+ * notes: z.string().optional(),
72
120
  * });
73
121
  *
74
- * const { object, error } = await client.extract({
122
+ * const { object, metadata, error } = await client.extract({
75
123
  * file: './invoice.pdf',
76
124
  * schema,
125
+ * confidenceThreshold: 0.85, // default
126
+ * });
127
+ *
128
+ * // Check per-field confidence and evidence
129
+ * metadata.fieldConfidence.forEach((fc) => {
130
+ * console.log(`${fc.field}: ${fc.score} - "${fc.text}"`);
77
131
  * });
78
132
  * ```
79
133
  */
@@ -109,25 +163,41 @@ declare class Parsefy {
109
163
  */
110
164
  private getEnvApiKey;
111
165
  /**
112
- * Extracts structured data from a document using the provided Zod schema.
166
+ * Extracts structured data from a financial document using the provided Zod schema.
167
+ *
168
+ * ** Billing Warning**: All fields are **required by default**. If a required field
169
+ * returns `null` or falls below the `confidenceThreshold`, the fallback model is triggered,
170
+ * which is more expensive. Use `.optional()` for fields that may not appear in all documents.
113
171
  *
114
- * @param options - Extraction options including file and schema.
115
- * @returns Promise resolving to the extraction result with typed data.
172
+ * @param options - Extraction options including file, schema, and confidence threshold.
173
+ * @returns Promise resolving to the extraction result with typed data and field-level confidence.
116
174
  *
117
175
  * @example
118
176
  * ```ts
119
177
  * const schema = z.object({
178
+ * // REQUIRED - triggers fallback if confidence < threshold
120
179
  * invoice_number: z.string().describe('The invoice number'),
121
- * total: z.number().describe('Total amount'),
180
+ * total: z.number().describe('Total amount including tax'),
181
+ *
182
+ * // OPTIONAL - won't trigger fallback if missing or low confidence
183
+ * vendor: z.string().optional().describe('Vendor/supplier name'),
184
+ * due_date: z.string().optional().describe('Payment due date'),
122
185
  * });
123
186
  *
124
187
  * const { object, metadata, error } = await client.extract({
125
188
  * file: './invoice.pdf',
126
189
  * schema,
190
+ * confidenceThreshold: 0.85, // Lower = faster, Higher = more accurate
127
191
  * });
128
192
  *
129
193
  * if (!error && object) {
130
194
  * console.log(object.invoice_number); // Fully typed!
195
+ *
196
+ * // Access field-level confidence and evidence
197
+ * console.log(`Overall confidence: ${metadata.confidenceScore}`);
198
+ * metadata.fieldConfidence.forEach((fc) => {
199
+ * console.log(`${fc.field}: ${fc.score} (${fc.reason}) - "${fc.text}"`);
200
+ * });
131
201
  * }
132
202
  * ```
133
203
  */
@@ -180,4 +250,4 @@ declare class ValidationError extends ParsefyError {
180
250
  constructor(message: string);
181
251
  }
182
252
 
183
- export { APIError, type APIErrorResponse, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
253
+ export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
package/dist/index.d.mts CHANGED
@@ -11,6 +11,11 @@ interface ParsefyConfig {
11
11
  /** Request timeout in milliseconds. Defaults to 60000 (60 seconds). */
12
12
  timeout?: number;
13
13
  }
14
+ /**
15
+ * Default confidence threshold for extraction.
16
+ * Fields below this threshold on required fields will trigger the fallback model.
17
+ */
18
+ declare const DEFAULT_CONFIDENCE_THRESHOLD = 0.85;
14
19
  /**
15
20
  * Options for the extract method.
16
21
  */
@@ -19,6 +24,35 @@ interface ExtractOptions<T extends z.ZodType> {
19
24
  file: File | Blob | Buffer | string;
20
25
  /** Zod schema defining the structure of data to extract. */
21
26
  schema: T;
27
+ /**
28
+ * Confidence threshold for extraction (0.0 to 1.0). Defaults to 0.85.
29
+ *
30
+ * If a **required** field's confidence falls below this threshold (or returns null),
31
+ * the fallback model is triggered for higher accuracy.
32
+ *
33
+ * **Tip**: Lower threshold = faster (accepts Tier 1 more often).
34
+ * Higher threshold = more accurate (triggers Tier 2 fallback more often).
35
+ *
36
+ * **Important**: Mark fields as `.optional()` in your Zod schema if they might not
37
+ * appear in all documents. This prevents unnecessary fallback triggers and reduces costs.
38
+ */
39
+ confidenceThreshold?: number;
40
+ }
41
+ /**
42
+ * Confidence details for a single extracted field.
43
+ * Provides evidence and explanation for each extraction.
44
+ */
45
+ interface FieldConfidence {
46
+ /** JSON path to the field (e.g., "$.invoice_number"). */
47
+ field: string;
48
+ /** Confidence score for this field (0.0 to 1.0). */
49
+ score: number;
50
+ /** Explanation of how the value was extracted (e.g., "Exact match", "Inferred from header"). */
51
+ reason: string;
52
+ /** Page number where the field was found. */
53
+ page: number;
54
+ /** Source text evidence from the document. */
55
+ text: string;
22
56
  }
23
57
  /**
24
58
  * Metadata about the extraction process.
@@ -34,6 +68,12 @@ interface ExtractionMetadata {
34
68
  credits: number;
35
69
  /** Whether the fallback model was triggered for higher accuracy. */
36
70
  fallbackTriggered: boolean;
71
+ /** Overall confidence score for the extraction (0.0 to 1.0). */
72
+ confidenceScore: number;
73
+ /** Per-field confidence details with evidence and explanations. */
74
+ fieldConfidence: FieldConfidence[];
75
+ /** List of issues or warnings encountered during extraction. */
76
+ issues: string[];
37
77
  }
38
78
  /**
39
79
  * Error response from the API.
@@ -57,7 +97,10 @@ interface ExtractResult<T> {
57
97
  }
58
98
 
59
99
  /**
60
- * Parsefy client for extracting structured data from documents.
100
+ * Parsefy client for extracting structured data from financial documents.
101
+ *
102
+ * **Important**: All fields are **required by default**. Use `.optional()` for fields
103
+ * that may not appear in all documents to avoid triggering expensive fallback models.
61
104
  *
62
105
  * @example
63
106
  * ```ts
@@ -67,13 +110,24 @@ interface ExtractResult<T> {
67
110
  * const client = new Parsefy('pk_your_api_key');
68
111
  *
69
112
  * const schema = z.object({
70
- * name: z.string(),
113
+ * // REQUIRED - fallback triggered if below confidence threshold
114
+ * invoice_number: z.string(),
71
115
  * total: z.number(),
116
+ *
117
+ * // OPTIONAL - won't trigger fallback if missing
118
+ * vendor: z.string().optional(),
119
+ * notes: z.string().optional(),
72
120
  * });
73
121
  *
74
- * const { object, error } = await client.extract({
122
+ * const { object, metadata, error } = await client.extract({
75
123
  * file: './invoice.pdf',
76
124
  * schema,
125
+ * confidenceThreshold: 0.85, // default
126
+ * });
127
+ *
128
+ * // Check per-field confidence and evidence
129
+ * metadata.fieldConfidence.forEach((fc) => {
130
+ * console.log(`${fc.field}: ${fc.score} - "${fc.text}"`);
77
131
  * });
78
132
  * ```
79
133
  */
@@ -109,25 +163,41 @@ declare class Parsefy {
109
163
  */
110
164
  private getEnvApiKey;
111
165
  /**
112
- * Extracts structured data from a document using the provided Zod schema.
166
+ * Extracts structured data from a financial document using the provided Zod schema.
167
+ *
168
+ * ** Billing Warning**: All fields are **required by default**. If a required field
169
+ * returns `null` or falls below the `confidenceThreshold`, the fallback model is triggered,
170
+ * which is more expensive. Use `.optional()` for fields that may not appear in all documents.
113
171
  *
114
- * @param options - Extraction options including file and schema.
115
- * @returns Promise resolving to the extraction result with typed data.
172
+ * @param options - Extraction options including file, schema, and confidence threshold.
173
+ * @returns Promise resolving to the extraction result with typed data and field-level confidence.
116
174
  *
117
175
  * @example
118
176
  * ```ts
119
177
  * const schema = z.object({
178
+ * // REQUIRED - triggers fallback if confidence < threshold
120
179
  * invoice_number: z.string().describe('The invoice number'),
121
- * total: z.number().describe('Total amount'),
180
+ * total: z.number().describe('Total amount including tax'),
181
+ *
182
+ * // OPTIONAL - won't trigger fallback if missing or low confidence
183
+ * vendor: z.string().optional().describe('Vendor/supplier name'),
184
+ * due_date: z.string().optional().describe('Payment due date'),
122
185
  * });
123
186
  *
124
187
  * const { object, metadata, error } = await client.extract({
125
188
  * file: './invoice.pdf',
126
189
  * schema,
190
+ * confidenceThreshold: 0.85, // Lower = faster, Higher = more accurate
127
191
  * });
128
192
  *
129
193
  * if (!error && object) {
130
194
  * console.log(object.invoice_number); // Fully typed!
195
+ *
196
+ * // Access field-level confidence and evidence
197
+ * console.log(`Overall confidence: ${metadata.confidenceScore}`);
198
+ * metadata.fieldConfidence.forEach((fc) => {
199
+ * console.log(`${fc.field}: ${fc.score} (${fc.reason}) - "${fc.text}"`);
200
+ * });
131
201
  * }
132
202
  * ```
133
203
  */
@@ -180,4 +250,4 @@ declare class ValidationError extends ParsefyError {
180
250
  constructor(message: string);
181
251
  }
182
252
 
183
- export { APIError, type APIErrorResponse, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
253
+ export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
package/dist/index.d.ts CHANGED
@@ -11,6 +11,11 @@ interface ParsefyConfig {
11
11
  /** Request timeout in milliseconds. Defaults to 60000 (60 seconds). */
12
12
  timeout?: number;
13
13
  }
14
+ /**
15
+ * Default confidence threshold for extraction.
16
+ * Fields below this threshold on required fields will trigger the fallback model.
17
+ */
18
+ declare const DEFAULT_CONFIDENCE_THRESHOLD = 0.85;
14
19
  /**
15
20
  * Options for the extract method.
16
21
  */
@@ -19,6 +24,35 @@ interface ExtractOptions<T extends z.ZodType> {
19
24
  file: File | Blob | Buffer | string;
20
25
  /** Zod schema defining the structure of data to extract. */
21
26
  schema: T;
27
+ /**
28
+ * Confidence threshold for extraction (0.0 to 1.0). Defaults to 0.85.
29
+ *
30
+ * If a **required** field's confidence falls below this threshold (or returns null),
31
+ * the fallback model is triggered for higher accuracy.
32
+ *
33
+ * **Tip**: Lower threshold = faster (accepts Tier 1 more often).
34
+ * Higher threshold = more accurate (triggers Tier 2 fallback more often).
35
+ *
36
+ * **Important**: Mark fields as `.optional()` in your Zod schema if they might not
37
+ * appear in all documents. This prevents unnecessary fallback triggers and reduces costs.
38
+ */
39
+ confidenceThreshold?: number;
40
+ }
41
+ /**
42
+ * Confidence details for a single extracted field.
43
+ * Provides evidence and explanation for each extraction.
44
+ */
45
+ interface FieldConfidence {
46
+ /** JSON path to the field (e.g., "$.invoice_number"). */
47
+ field: string;
48
+ /** Confidence score for this field (0.0 to 1.0). */
49
+ score: number;
50
+ /** Explanation of how the value was extracted (e.g., "Exact match", "Inferred from header"). */
51
+ reason: string;
52
+ /** Page number where the field was found. */
53
+ page: number;
54
+ /** Source text evidence from the document. */
55
+ text: string;
22
56
  }
23
57
  /**
24
58
  * Metadata about the extraction process.
@@ -34,6 +68,12 @@ interface ExtractionMetadata {
34
68
  credits: number;
35
69
  /** Whether the fallback model was triggered for higher accuracy. */
36
70
  fallbackTriggered: boolean;
71
+ /** Overall confidence score for the extraction (0.0 to 1.0). */
72
+ confidenceScore: number;
73
+ /** Per-field confidence details with evidence and explanations. */
74
+ fieldConfidence: FieldConfidence[];
75
+ /** List of issues or warnings encountered during extraction. */
76
+ issues: string[];
37
77
  }
38
78
  /**
39
79
  * Error response from the API.
@@ -57,7 +97,10 @@ interface ExtractResult<T> {
57
97
  }
58
98
 
59
99
  /**
60
- * Parsefy client for extracting structured data from documents.
100
+ * Parsefy client for extracting structured data from financial documents.
101
+ *
102
+ * **Important**: All fields are **required by default**. Use `.optional()` for fields
103
+ * that may not appear in all documents to avoid triggering expensive fallback models.
61
104
  *
62
105
  * @example
63
106
  * ```ts
@@ -67,13 +110,24 @@ interface ExtractResult<T> {
67
110
  * const client = new Parsefy('pk_your_api_key');
68
111
  *
69
112
  * const schema = z.object({
70
- * name: z.string(),
113
+ * // REQUIRED - fallback triggered if below confidence threshold
114
+ * invoice_number: z.string(),
71
115
  * total: z.number(),
116
+ *
117
+ * // OPTIONAL - won't trigger fallback if missing
118
+ * vendor: z.string().optional(),
119
+ * notes: z.string().optional(),
72
120
  * });
73
121
  *
74
- * const { object, error } = await client.extract({
122
+ * const { object, metadata, error } = await client.extract({
75
123
  * file: './invoice.pdf',
76
124
  * schema,
125
+ * confidenceThreshold: 0.85, // default
126
+ * });
127
+ *
128
+ * // Check per-field confidence and evidence
129
+ * metadata.fieldConfidence.forEach((fc) => {
130
+ * console.log(`${fc.field}: ${fc.score} - "${fc.text}"`);
77
131
  * });
78
132
  * ```
79
133
  */
@@ -109,25 +163,41 @@ declare class Parsefy {
109
163
  */
110
164
  private getEnvApiKey;
111
165
  /**
112
- * Extracts structured data from a document using the provided Zod schema.
166
+ * Extracts structured data from a financial document using the provided Zod schema.
167
+ *
168
+ * ** Billing Warning**: All fields are **required by default**. If a required field
169
+ * returns `null` or falls below the `confidenceThreshold`, the fallback model is triggered,
170
+ * which is more expensive. Use `.optional()` for fields that may not appear in all documents.
113
171
  *
114
- * @param options - Extraction options including file and schema.
115
- * @returns Promise resolving to the extraction result with typed data.
172
+ * @param options - Extraction options including file, schema, and confidence threshold.
173
+ * @returns Promise resolving to the extraction result with typed data and field-level confidence.
116
174
  *
117
175
  * @example
118
176
  * ```ts
119
177
  * const schema = z.object({
178
+ * // REQUIRED - triggers fallback if confidence < threshold
120
179
  * invoice_number: z.string().describe('The invoice number'),
121
- * total: z.number().describe('Total amount'),
180
+ * total: z.number().describe('Total amount including tax'),
181
+ *
182
+ * // OPTIONAL - won't trigger fallback if missing or low confidence
183
+ * vendor: z.string().optional().describe('Vendor/supplier name'),
184
+ * due_date: z.string().optional().describe('Payment due date'),
122
185
  * });
123
186
  *
124
187
  * const { object, metadata, error } = await client.extract({
125
188
  * file: './invoice.pdf',
126
189
  * schema,
190
+ * confidenceThreshold: 0.85, // Lower = faster, Higher = more accurate
127
191
  * });
128
192
  *
129
193
  * if (!error && object) {
130
194
  * console.log(object.invoice_number); // Fully typed!
195
+ *
196
+ * // Access field-level confidence and evidence
197
+ * console.log(`Overall confidence: ${metadata.confidenceScore}`);
198
+ * metadata.fieldConfidence.forEach((fc) => {
199
+ * console.log(`${fc.field}: ${fc.score} (${fc.reason}) - "${fc.text}"`);
200
+ * });
131
201
  * }
132
202
  * ```
133
203
  */
@@ -180,4 +250,4 @@ declare class ValidationError extends ParsefyError {
180
250
  constructor(message: string);
181
251
  }
182
252
 
183
- export { APIError, type APIErrorResponse, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
253
+ export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
package/dist/index.mjs CHANGED
@@ -1,2 +1,2 @@
1
- import {zodToJsonSchema}from'zod-to-json-schema';var m={".pdf":"application/pdf",".docx":"application/vnd.openxmlformats-officedocument.wordprocessingml.document"},f=10*1024*1024,h="https://api.parsefy.io",g=6e4;var i=class extends Error{constructor(t,r){super(t),this.name="ParsefyError",this.code=r,typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,this.constructor);}},c=class extends i{constructor(t,r,o){super(t),this.name="APIError",this.statusCode=r,this.response=o;}},l=class extends i{constructor(t,r,o){super(t,r),this.name="ExtractionError",this.metadata=o;}},s=class extends i{constructor(t){super(t),this.name="ValidationError";}};function d(){return typeof process<"u"&&process.versions?.node!==void 0}function T(e){return zodToJsonSchema(e,{$refStrategy:"none",target:"openApi3"})}function b(e){let t=e.toLowerCase().match(/\.[^.]+$/)?.[0];return t&&m[t]||null}function R(e){if(!b(e)){let r=Object.keys(m).join(", ");throw new s(`Unsupported file type. Supported types: ${r}`)}}function u(e){if(e===0)throw new s("File is empty");if(e>f){let t=f/1048576;throw new s(`File size exceeds maximum limit of ${t}MB`)}}function w(e){return {object:e.object,metadata:{processingTimeMs:e.metadata.processing_time_ms,inputTokens:e.metadata.input_tokens,outputTokens:e.metadata.output_tokens,credits:e.metadata.credits,fallbackTriggered:e.metadata.fallback_triggered},error:e.error}}function E(e,t){let r=b(t)||"application/octet-stream",o=e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength);return typeof File<"u"?new File([o],t,{type:r}):new Blob([o],{type:r})}async function _(e){if(!d())throw new s("File path strings are only supported in Node.js. Use File or Blob in the browser.");let t=await import('fs'),r=await import('path');if(!t.existsSync(e))throw new s(`File not found: ${e}`);let o=r.basename(e);R(o);let a=t.readFileSync(e);return u(a.length),{buffer:a,filename:o}}async function P(e){if(typeof e=="string"){let{buffer:t,filename:r}=await _(e);return E(t,r)}if(Buffer.isBuffer(e))return u(e.length),E(e,"document.pdf");if(e instanceof File)return R(e.name),u(e.size),e;if(e instanceof Blob)return u(e.size),e;throw new s("Invalid file input. Expected File, Blob, Buffer, or file path string.")}function F(e){return new Promise(t=>setTimeout(t,e))}function k(e,t=1e3){let r=t*Math.pow(2,e),o=Math.random()*.1*r;return Math.min(r+o,3e4)}var y=class{constructor(t){this.maxRetries=3;let r={};if(typeof t=="string"?r={apiKey:t}:t&&(r=t),this.apiKey=r.apiKey||this.getEnvApiKey(),!this.apiKey)throw new s("API key is required. Provide it in the constructor or set the PARSEFY_API_KEY environment variable.");this.baseUrl=r.baseUrl||h,this.timeout=r.timeout||g;}getEnvApiKey(){return d()&&process.env.PARSEFY_API_KEY||""}async extract(t){let{file:r,schema:o}=t,a=T(o),n=await P(r),p=new FormData;return p.append("file",n),p.append("output_schema",JSON.stringify(a)),this.makeRequestWithRetry(p)}async makeRequestWithRetry(t,r=0){try{return await this.makeRequest(t)}catch(o){if(o instanceof c&&o.statusCode===429&&r<this.maxRetries){let a=k(r);return await F(a),this.makeRequestWithRetry(t,r+1)}throw o}}async makeRequest(t){let r=`${this.baseUrl}/v1/extract`,o=new AbortController,a=setTimeout(()=>o.abort(),this.timeout);try{let n=await fetch(r,{method:"POST",headers:{Authorization:`Bearer ${this.apiKey}`},body:t,signal:o.signal});if(clearTimeout(a),!n.ok){let x=await this.parseErrorResponse(n);throw new c(x.message||`API request failed with status ${n.status}`,n.status,x)}let p=await n.json();return w(p)}catch(n){throw clearTimeout(a),n instanceof Error&&n.name==="AbortError"?new i(`Request timed out after ${this.timeout}ms`,"TIMEOUT"):n instanceof i?n:n instanceof TypeError?new i("Network error: Unable to connect to the Parsefy API","NETWORK_ERROR"):new i(`Unexpected error: ${n instanceof Error?n.message:String(n)}`,"UNKNOWN_ERROR")}}async parseErrorResponse(t){try{return await t.json()}catch{try{return {message:await t.text()||t.statusText}}catch{return {message:t.statusText}}}}};
2
- export{c as APIError,l as ExtractionError,y as Parsefy,i as ParsefyError,s as ValidationError};
1
+ import {zodToJsonSchema}from'zod-to-json-schema';var u=.85,d={".pdf":"application/pdf",".docx":"application/vnd.openxmlformats-officedocument.wordprocessingml.document"},l=10*1024*1024,h="https://api.parsefy.io",E=6e4;var i=class extends Error{constructor(e,r){super(e),this.name="ParsefyError",this.code=r,typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,this.constructor);}},p=class extends i{constructor(e,r,o){super(e),this.name="APIError",this.statusCode=r,this.response=o;}},y=class extends i{constructor(e,r,o){super(e,r),this.name="ExtractionError",this.metadata=o;}},s=class extends i{constructor(e){super(e),this.name="ValidationError";}};function x(){return typeof process<"u"&&process.versions?.node!==void 0}function b(t){let e=zodToJsonSchema(t,{$refStrategy:"none",target:"jsonSchema7"});return "$schema"in e&&delete e.$schema,e}function R(t){let e=t.toLowerCase().match(/\.[^.]+$/)?.[0];return e&&d[e]||null}function w(t){if(!R(t)){let r=Object.keys(d).join(", ");throw new s(`Unsupported file type. Supported types: ${r}`)}}function f(t){if(t===0)throw new s("File is empty");if(t>l){let e=l/1048576;throw new s(`File size exceeds maximum limit of ${e}MB`)}}function F(t){return {object:t.object,metadata:{processingTimeMs:t.metadata.processing_time_ms,inputTokens:t.metadata.input_tokens,outputTokens:t.metadata.output_tokens,credits:t.metadata.credits,fallbackTriggered:t.metadata.fallback_triggered,confidenceScore:t._meta.confidence_score,fieldConfidence:t._meta.field_confidence.map(e=>({field:e.field,score:e.score,reason:e.reason,page:e.page,text:e.text})),issues:t._meta.issues},error:t.error}}function T(t,e){let r=R(e)||"application/octet-stream",o=t.buffer.slice(t.byteOffset,t.byteOffset+t.byteLength);return typeof File<"u"?new File([o],e,{type:r}):new Blob([o],{type:r})}async function k(t){if(!x())throw new s("File path strings are only supported in Node.js. Use File or Blob in the browser.");let e=await import('fs'),r=await import('path');if(!e.existsSync(t))throw new s(`File not found: ${t}`);let o=r.basename(t);w(o);let a=e.readFileSync(t);return f(a.length),{buffer:a,filename:o}}async function _(t){if(typeof t=="string"){let{buffer:e,filename:r}=await k(t);return T(e,r)}if(Buffer.isBuffer(t))return f(t.length),T(t,"document.pdf");if(t instanceof File)return w(t.name),f(t.size),t;if(t instanceof Blob)return f(t.size),t;throw new s("Invalid file input. Expected File, Blob, Buffer, or file path string.")}function P(t){return new Promise(e=>setTimeout(e,t))}function S(t,e=1e3){let r=e*Math.pow(2,t),o=Math.random()*.1*r;return Math.min(r+o,3e4)}var g=class{constructor(e){this.maxRetries=3;let r={};if(typeof e=="string"?r={apiKey:e}:e&&(r=e),this.apiKey=r.apiKey||this.getEnvApiKey(),!this.apiKey)throw new s("API key is required. Provide it in the constructor or set the PARSEFY_API_KEY environment variable.");this.baseUrl=r.baseUrl||h,this.timeout=r.timeout||E;}getEnvApiKey(){return x()&&process.env.PARSEFY_API_KEY||""}async extract(e){let{file:r,schema:o,confidenceThreshold:a}=e,n=b(o),m=await _(r),c=new FormData;return c.append("file",m),c.append("output_schema",JSON.stringify(n)),c.append("confidence_threshold",String(a??.85)),this.makeRequestWithRetry(c)}async makeRequestWithRetry(e,r=0){try{return await this.makeRequest(e)}catch(o){if(o instanceof p&&o.statusCode===429&&r<this.maxRetries){let a=S(r);return await P(a),this.makeRequestWithRetry(e,r+1)}throw o}}async makeRequest(e){let r=`${this.baseUrl}/v1/extract`,o=new AbortController,a=setTimeout(()=>o.abort(),this.timeout);try{let n=await fetch(r,{method:"POST",headers:{Authorization:`Bearer ${this.apiKey}`},body:e,signal:o.signal});if(clearTimeout(a),!n.ok){let c=await this.parseErrorResponse(n);throw new p(c.message||`API request failed with status ${n.status}`,n.status,c)}let m=await n.json();return F(m)}catch(n){throw clearTimeout(a),n instanceof Error&&n.name==="AbortError"?new i(`Request timed out after ${this.timeout}ms`,"TIMEOUT"):n instanceof i?n:n instanceof TypeError?new i("Network error: Unable to connect to the Parsefy API","NETWORK_ERROR"):new i(`Unexpected error: ${n instanceof Error?n.message:String(n)}`,"UNKNOWN_ERROR")}}async parseErrorResponse(e){try{return await e.json()}catch{try{return {message:await e.text()||e.statusText}}catch{return {message:e.statusText}}}}};
2
+ export{p as APIError,u as DEFAULT_CONFIDENCE_THRESHOLD,y as ExtractionError,g as Parsefy,i as ParsefyError,s as ValidationError};
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "parsefy",
3
- "version": "1.0.1",
4
- "description": "Official TypeScript SDK for Parsefy - AI-powered document data extraction",
3
+ "version": "1.0.2",
4
+ "description": "Official TypeScript SDK for Parsefy - Financial Document Infrastructure for Developers",
5
5
  "author": "",
6
6
  "license": "MIT",
7
7
  "repository": {