parsefy 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Parsefy
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
package/README.md ADDED
@@ -0,0 +1,403 @@
1
+ <p align="center">
2
+ <img src="/assets/logo.png" alt="Parsefy Logo" width="120" />
3
+ </p>
4
+
5
+ <h1 align="center">Parsefy TypeScript / JavaScript SDK</h1>
6
+
7
+ <p align="center">
8
+ <strong>Official TypeScript / JavaScript SDK for Parsefy - Financial Document Infrastructure for Developers</strong><br>
9
+
10
+ Turn financial PDFs (invoices, receipts, bills) into structured JSON with validation and risk signals.
11
+ </p>
12
+
13
+ ---
14
+
15
+ ## Installation
16
+
17
+ ```bash
18
+ npm install parsefy zod
19
+ ```
20
+
21
+ ## Quick Start
22
+
23
+ ```typescript
24
+ import { Parsefy } from 'parsefy';
25
+ import * as z from 'zod';
26
+
27
+ const client = new Parsefy('pk_your_api_key');
28
+
29
+ const schema = z.object({
30
+ // REQUIRED - triggers fallback if below confidence threshold
31
+ invoice_number: z.string().describe('The invoice number'),
32
+ total: z.number().describe('Total amount including tax'),
33
+
34
+ // OPTIONAL - won't trigger fallback if missing or low confidence
35
+ vendor: z.string().optional().describe('Vendor name'),
36
+ due_date: z.string().optional().describe('Payment due date'),
37
+ });
38
+
39
+ const { object, metadata, error } = await client.extract({
40
+ file: './invoice.pdf',
41
+ schema,
42
+ });
43
+
44
+ if (!error && object) {
45
+ console.log(object.invoice_number); // Fully typed!
46
+
47
+ // Access field-level confidence and evidence
48
+ console.log(`Overall confidence: ${metadata.confidenceScore}`);
49
+ metadata.fieldConfidence.forEach((fc) => {
50
+ console.log(`${fc.field}: ${fc.score} (${fc.reason}) - "${fc.text}"`);
51
+ });
52
+ }
53
+ ```
54
+
55
+ ## ⚠️ Required vs Optional Fields (Important for Billing)
56
+
57
+ **All fields are required by default.** This is critical to understand:
58
+
59
+ | User writes (SDK) | SDK converts to (JSON Schema) | API interprets as |
60
+ |-------------------|-------------------------------|-------------------|
61
+ | `name: z.string()` | `required: ["name"]` | **Required** |
62
+ | `name: z.string().optional()` | `required: []` | **Optional** |
63
+
64
+ ### Why This Matters
65
+
66
+ If a **required** field returns `null` or falls below the `confidenceThreshold`, the API triggers the **fallback model** (Tier 2), which is more expensive.
67
+
68
+ **To avoid unexpected high billing:**
69
+
70
+ ```typescript
71
+ const schema = z.object({
72
+ // REQUIRED - Always present on invoices, keep required
73
+ invoice_number: z.string(),
74
+ total: z.number(),
75
+
76
+ // OPTIONAL - May not appear on all documents, mark optional!
77
+ vendor: z.string().optional(), // Not all invoices have vendor name
78
+ tax_id: z.string().optional(), // Rarely present
79
+ notes: z.string().optional(), // Usually empty
80
+ due_date: z.string().optional(), // Sometimes missing
81
+ });
82
+ ```
83
+
84
+ **Rule of thumb:** If a field might be missing in >20% of your documents, mark it `.optional()`.
85
+
86
+ ## Confidence Threshold
87
+
88
+ Control when the fallback model is triggered:
89
+
90
+ ```typescript
91
+ const { object, metadata } = await client.extract({
92
+ file: './invoice.pdf',
93
+ schema,
94
+ confidenceThreshold: 0.85, // default
95
+ });
96
+ ```
97
+
98
+ | Threshold | Behavior |
99
+ |-----------|----------|
100
+ | Lower (e.g., 0.70) | **Faster** – Accepts Tier 1 results more often |
101
+ | Higher (e.g., 0.95) | **More accurate** – Triggers Tier 2 fallback more often |
102
+
103
+ **Default:** `0.85`
104
+
105
+ ## Response Format
106
+
107
+ ```typescript
108
+ interface ExtractResult<T> {
109
+ // Extracted data matching your schema, or null if extraction failed
110
+ object: T | null;
111
+
112
+ // Metadata about the extraction
113
+ metadata: {
114
+ processingTimeMs: number; // Processing time in milliseconds
115
+ inputTokens: number; // Input tokens used
116
+ outputTokens: number; // Output tokens generated
117
+ credits: number; // Credits consumed (1 credit = 1 page)
118
+ fallbackTriggered: boolean; // Whether fallback model was used
119
+
120
+ // 🆕 Field-level confidence and evidence
121
+ confidenceScore: number; // Overall confidence (0.0 to 1.0)
122
+ fieldConfidence: Array<{
123
+ field: string; // JSON path (e.g., "$.invoice_number")
124
+ score: number; // Confidence score (0.0 to 1.0)
125
+ reason: string; // "Exact match", "Inferred from header", etc.
126
+ page: number; // Page number where found
127
+ text: string; // Source text evidence
128
+ }>;
129
+ issues: string[]; // Warnings or anomalies detected
130
+ };
131
+
132
+ // Error details if extraction failed
133
+ error: {
134
+ code: string;
135
+ message: string;
136
+ } | null;
137
+ }
138
+ ```
139
+
140
+ ### Example Response
141
+
142
+ ```typescript
143
+ const { object, metadata } = await client.extract({ file, schema });
144
+
145
+ // object:
146
+ {
147
+ invoice_number: "INV-2024-0042",
148
+ date: "2024-01-15",
149
+ total: 1250.00,
150
+ vendor: "Acme Corp"
151
+ }
152
+
153
+ // metadata.confidenceScore: 0.94
154
+
155
+ // metadata.fieldConfidence:
156
+ [
157
+ { field: "$.invoice_number", score: 0.98, reason: "Exact match", page: 1, text: "Invoice # INV-2024-0042" },
158
+ { field: "$.date", score: 0.95, reason: "Exact match", page: 1, text: "Date: 01/15/2024" },
159
+ { field: "$.total", score: 0.92, reason: "Formatting ambiguous", page: 1, text: "Total: $1,250.00" },
160
+ { field: "$.vendor", score: 0.90, reason: "Inferred from header", page: 1, text: "Acme Corp" }
161
+ ]
162
+
163
+ // metadata.issues: []
164
+ ```
165
+
166
+ ## Configuration
167
+
168
+ ### API Key
169
+
170
+ ```typescript
171
+ // Option 1: Pass API key directly
172
+ const client = new Parsefy('pk_your_api_key');
173
+
174
+ // Option 2: Use environment variable
175
+ // Set PARSEFY_API_KEY in your environment
176
+ const client = new Parsefy();
177
+
178
+ // Option 3: Configuration object
179
+ const client = new Parsefy({
180
+ apiKey: 'pk_your_api_key',
181
+ timeout: 120000, // 2 minutes
182
+ });
183
+ ```
184
+
185
+ ### Configuration Options
186
+
187
+ | Option | Type | Default | Description |
188
+ |--------|------|---------|-------------|
189
+ | `apiKey` | `string` | `process.env.PARSEFY_API_KEY` | Your Parsefy API key |
190
+ | `timeout` | `number` | `60000` | Request timeout in ms |
191
+
192
+ ### Extract Options
193
+
194
+ | Option | Type | Default | Description |
195
+ |--------|------|---------|-------------|
196
+ | `file` | `File \| Blob \| Buffer \| string` | required | Document to extract from |
197
+ | `schema` | `z.ZodType` | required | Zod schema defining extraction structure |
198
+ | `confidenceThreshold` | `number` | `0.85` | Minimum confidence before triggering fallback |
199
+
200
+ ## Usage
201
+
202
+ ### File Input Options
203
+
204
+ The SDK supports multiple file input types. **Files don't need to be on disk** – you can work entirely in memory.
205
+
206
+ | Input Type | Usage | Environment |
207
+ |------------|-------|-------------|
208
+ | `string` | File path | Node.js only |
209
+ | `Buffer` | In-memory bytes | Node.js |
210
+ | `File` | From file input or FormData | Browser, Node.js 20+, Edge |
211
+ | `Blob` | Raw binary with MIME type | Universal |
212
+
213
+ ```typescript
214
+ // Node.js: File path
215
+ const result = await client.extract({
216
+ file: './invoice.pdf',
217
+ schema,
218
+ });
219
+
220
+ // Node.js: Buffer (in-memory)
221
+ import { readFileSync } from 'fs';
222
+ const result = await client.extract({
223
+ file: readFileSync('./invoice.pdf'),
224
+ schema,
225
+ });
226
+
227
+ // Browser: File input
228
+ const fileInput = document.querySelector('input[type="file"]');
229
+ const result = await client.extract({
230
+ file: fileInput.files[0],
231
+ schema,
232
+ });
233
+ ```
234
+
235
+ ### Complex Schemas for Financial Documents
236
+
237
+ Use `.describe()` to guide the AI extraction:
238
+
239
+ ```typescript
240
+ const invoiceSchema = z.object({
241
+ // REQUIRED - Core financial data
242
+ invoice_number: z.string().describe('The invoice or receipt number'),
243
+ date: z.string().describe('Invoice date in YYYY-MM-DD format'),
244
+ total: z.number().describe('Total amount due including tax'),
245
+ currency: z.string().describe('3-letter currency code (USD, EUR, etc.)'),
246
+
247
+ // REQUIRED - Line items (usually present)
248
+ line_items: z.array(z.object({
249
+ description: z.string().describe('Item description'),
250
+ quantity: z.number().describe('Number of units'),
251
+ unit_price: z.number().describe('Price per unit'),
252
+ amount: z.number().describe('Total amount for this line'),
253
+ })).describe('List of items on the invoice'),
254
+
255
+ // OPTIONAL - May not appear on all invoices
256
+ vendor: z.object({
257
+ name: z.string().describe('Company name of the vendor'),
258
+ address: z.string().optional().describe('Full address'),
259
+ tax_id: z.string().optional().describe('Tax ID or VAT number'),
260
+ }).optional(),
261
+ subtotal: z.number().optional().describe('Subtotal before tax'),
262
+ tax: z.number().optional().describe('Tax amount'),
263
+ due_date: z.string().optional().describe('Payment due date'),
264
+ payment_terms: z.string().optional().describe('e.g., Net 30'),
265
+ });
266
+ ```
267
+
268
+ ### Server-Side / API Usage
269
+
270
+ **Express with Multer:**
271
+
272
+ ```typescript
273
+ import express from 'express';
274
+ import multer from 'multer';
275
+ import { Parsefy } from 'parsefy';
276
+
277
+ const upload = multer(); // Store in memory
278
+ const client = new Parsefy();
279
+
280
+ app.post('/extract', upload.single('document'), async (req, res) => {
281
+ const { object, metadata, error } = await client.extract({
282
+ file: req.file.buffer,
283
+ schema,
284
+ confidenceThreshold: 0.80, // Adjust based on your needs
285
+ });
286
+
287
+ res.json({
288
+ data: object,
289
+ confidence: metadata.confidenceScore,
290
+ fieldDetails: metadata.fieldConfidence,
291
+ error,
292
+ });
293
+ });
294
+ ```
295
+
296
+ **Hono / Cloudflare Workers:**
297
+
298
+ ```typescript
299
+ import { Hono } from 'hono';
300
+ import { Parsefy } from 'parsefy';
301
+
302
+ const app = new Hono();
303
+ const client = new Parsefy();
304
+
305
+ app.post('/extract', async (c) => {
306
+ const formData = await c.req.formData();
307
+ const file = formData.get('document');
308
+
309
+ const { object, metadata, error } = await client.extract({
310
+ file,
311
+ schema,
312
+ });
313
+
314
+ return c.json({
315
+ data: object,
316
+ confidence: metadata.confidenceScore,
317
+ issues: metadata.issues,
318
+ error,
319
+ });
320
+ });
321
+ ```
322
+
323
+ ### Error Handling
324
+
325
+ ```typescript
326
+ import { Parsefy, APIError, ValidationError, ParsefyError } from 'parsefy';
327
+
328
+ try {
329
+ const { object, error, metadata } = await client.extract({
330
+ file: './invoice.pdf',
331
+ schema,
332
+ });
333
+
334
+ // Extraction-level errors (request succeeded, but extraction failed)
335
+ if (error) {
336
+ console.error(`Extraction failed: [${error.code}] ${error.message}`);
337
+ console.log(`Fallback triggered: ${metadata.fallbackTriggered}`);
338
+ console.log(`Issues: ${metadata.issues.join(', ')}`);
339
+ return;
340
+ }
341
+
342
+ // Check for low confidence fields
343
+ const lowConfidence = metadata.fieldConfidence.filter((fc) => fc.score < 0.80);
344
+ if (lowConfidence.length > 0) {
345
+ console.warn('Low confidence fields:', lowConfidence);
346
+ }
347
+
348
+ console.log('Success:', object);
349
+ } catch (err) {
350
+ // HTTP/Network errors
351
+ if (err instanceof APIError) {
352
+ console.error(`API Error ${err.statusCode}: ${err.message}`);
353
+ } else if (err instanceof ValidationError) {
354
+ console.error(`Validation Error: ${err.message}`);
355
+ } else if (err instanceof ParsefyError) {
356
+ console.error(`Parsefy Error: ${err.message}`);
357
+ }
358
+ }
359
+ ```
360
+
361
+ ## Error Types
362
+
363
+ | Error Class | Description |
364
+ |-------------|-------------|
365
+ | `ParsefyError` | Base error class for all Parsefy errors |
366
+ | `APIError` | HTTP errors (4xx/5xx responses) |
367
+ | `ExtractionError` | Extraction failed (returned in response) |
368
+ | `ValidationError` | Client-side validation errors |
369
+
370
+ ## Supported File Types
371
+
372
+ - **PDF** (`.pdf`) – up to 10MB
373
+ - **DOCX** (`.docx`) – up to 10MB
374
+
375
+ ## Rate Limits
376
+
377
+ The API allows 1 request per second. The SDK automatically retries with exponential backoff on rate limit errors (HTTP 429).
378
+
379
+ ## Requirements
380
+
381
+ - Node.js 18+ (for native `fetch` and `FormData`)
382
+ - Zod 3.x (peer dependency)
383
+
384
+ ## TypeScript Types
385
+
386
+ All types are exported for your convenience:
387
+
388
+ ```typescript
389
+ import type {
390
+ ParsefyConfig,
391
+ ExtractOptions,
392
+ ExtractResult,
393
+ ExtractionMetadata,
394
+ FieldConfidence,
395
+ APIErrorResponse,
396
+ } from 'parsefy';
397
+
398
+ import { DEFAULT_CONFIDENCE_THRESHOLD } from 'parsefy'; // 0.85
399
+ ```
400
+
401
+ ## License
402
+
403
+ MIT © [Parsefy](https://parsefy.io)
package/dist/index.cjs ADDED
@@ -0,0 +1,2 @@
1
+ 'use strict';var zodToJsonSchema=require('zod-to-json-schema');var u=.85,d={".pdf":"application/pdf",".docx":"application/vnd.openxmlformats-officedocument.wordprocessingml.document"},l=10*1024*1024,h="https://api.parsefy.io",E=6e4;var i=class extends Error{constructor(e,r){super(e),this.name="ParsefyError",this.code=r,typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,this.constructor);}},p=class extends i{constructor(e,r,o){super(e),this.name="APIError",this.statusCode=r,this.response=o;}},y=class extends i{constructor(e,r,o){super(e,r),this.name="ExtractionError",this.metadata=o;}},s=class extends i{constructor(e){super(e),this.name="ValidationError";}};function x(){return typeof process<"u"&&process.versions?.node!==void 0}function b(t){let e=zodToJsonSchema.zodToJsonSchema(t,{$refStrategy:"none",target:"jsonSchema7"});return "$schema"in e&&delete e.$schema,e}function R(t){let e=t.toLowerCase().match(/\.[^.]+$/)?.[0];return e&&d[e]||null}function w(t){if(!R(t)){let r=Object.keys(d).join(", ");throw new s(`Unsupported file type. Supported types: ${r}`)}}function f(t){if(t===0)throw new s("File is empty");if(t>l){let e=l/1048576;throw new s(`File size exceeds maximum limit of ${e}MB`)}}function F(t){return {object:t.object,metadata:{processingTimeMs:t.metadata.processing_time_ms,inputTokens:t.metadata.input_tokens,outputTokens:t.metadata.output_tokens,credits:t.metadata.credits,fallbackTriggered:t.metadata.fallback_triggered,confidenceScore:t._meta.confidence_score,fieldConfidence:t._meta.field_confidence.map(e=>({field:e.field,score:e.score,reason:e.reason,page:e.page,text:e.text})),issues:t._meta.issues},error:t.error}}function T(t,e){let r=R(e)||"application/octet-stream",o=t.buffer.slice(t.byteOffset,t.byteOffset+t.byteLength);return typeof File<"u"?new File([o],e,{type:r}):new Blob([o],{type:r})}async function k(t){if(!x())throw new s("File path strings are only supported in Node.js. Use File or Blob in the browser.");let e=await import('fs'),r=await import('path');if(!e.existsSync(t))throw new s(`File not found: ${t}`);let o=r.basename(t);w(o);let a=e.readFileSync(t);return f(a.length),{buffer:a,filename:o}}async function _(t){if(typeof t=="string"){let{buffer:e,filename:r}=await k(t);return T(e,r)}if(Buffer.isBuffer(t))return f(t.length),T(t,"document.pdf");if(t instanceof File)return w(t.name),f(t.size),t;if(t instanceof Blob)return f(t.size),t;throw new s("Invalid file input. Expected File, Blob, Buffer, or file path string.")}function P(t){return new Promise(e=>setTimeout(e,t))}function S(t,e=1e3){let r=e*Math.pow(2,t),o=Math.random()*.1*r;return Math.min(r+o,3e4)}var g=class{constructor(e){this.maxRetries=3;let r={};if(typeof e=="string"?r={apiKey:e}:e&&(r=e),this.apiKey=r.apiKey||this.getEnvApiKey(),!this.apiKey)throw new s("API key is required. Provide it in the constructor or set the PARSEFY_API_KEY environment variable.");this.baseUrl=r.baseUrl||h,this.timeout=r.timeout||E;}getEnvApiKey(){return x()&&process.env.PARSEFY_API_KEY||""}async extract(e){let{file:r,schema:o,confidenceThreshold:a}=e,n=b(o),m=await _(r),c=new FormData;return c.append("file",m),c.append("output_schema",JSON.stringify(n)),c.append("confidence_threshold",String(a??.85)),this.makeRequestWithRetry(c)}async makeRequestWithRetry(e,r=0){try{return await this.makeRequest(e)}catch(o){if(o instanceof p&&o.statusCode===429&&r<this.maxRetries){let a=S(r);return await P(a),this.makeRequestWithRetry(e,r+1)}throw o}}async makeRequest(e){let r=`${this.baseUrl}/v1/extract`,o=new AbortController,a=setTimeout(()=>o.abort(),this.timeout);try{let n=await fetch(r,{method:"POST",headers:{Authorization:`Bearer ${this.apiKey}`},body:e,signal:o.signal});if(clearTimeout(a),!n.ok){let c=await this.parseErrorResponse(n);throw new p(c.message||`API request failed with status ${n.status}`,n.status,c)}let m=await n.json();return F(m)}catch(n){throw clearTimeout(a),n instanceof Error&&n.name==="AbortError"?new i(`Request timed out after ${this.timeout}ms`,"TIMEOUT"):n instanceof i?n:n instanceof TypeError?new i("Network error: Unable to connect to the Parsefy API","NETWORK_ERROR"):new i(`Unexpected error: ${n instanceof Error?n.message:String(n)}`,"UNKNOWN_ERROR")}}async parseErrorResponse(e){try{return await e.json()}catch{try{return {message:await e.text()||e.statusText}}catch{return {message:e.statusText}}}}};
2
+ exports.APIError=p;exports.DEFAULT_CONFIDENCE_THRESHOLD=u;exports.ExtractionError=y;exports.Parsefy=g;exports.ParsefyError=i;exports.ValidationError=s;
@@ -0,0 +1,253 @@
1
+ import { z } from 'zod';
2
+
3
+ /**
4
+ * Configuration options for the Parsefy client.
5
+ */
6
+ interface ParsefyConfig {
7
+ /** API key for authentication. If not provided, reads from PARSEFY_API_KEY environment variable. */
8
+ apiKey?: string;
9
+ /** Base URL for the API. Defaults to https://api.parsefy.io */
10
+ baseUrl?: string;
11
+ /** Request timeout in milliseconds. Defaults to 60000 (60 seconds). */
12
+ timeout?: number;
13
+ }
14
+ /**
15
+ * Default confidence threshold for extraction.
16
+ * Fields below this threshold on required fields will trigger the fallback model.
17
+ */
18
+ declare const DEFAULT_CONFIDENCE_THRESHOLD = 0.85;
19
+ /**
20
+ * Options for the extract method.
21
+ */
22
+ interface ExtractOptions<T extends z.ZodType> {
23
+ /** The document file to extract data from. Supports File, Blob, Buffer, or file path (Node.js only). */
24
+ file: File | Blob | Buffer | string;
25
+ /** Zod schema defining the structure of data to extract. */
26
+ schema: T;
27
+ /**
28
+ * Confidence threshold for extraction (0.0 to 1.0). Defaults to 0.85.
29
+ *
30
+ * If a **required** field's confidence falls below this threshold (or returns null),
31
+ * the fallback model is triggered for higher accuracy.
32
+ *
33
+ * **Tip**: Lower threshold = faster (accepts Tier 1 more often).
34
+ * Higher threshold = more accurate (triggers Tier 2 fallback more often).
35
+ *
36
+ * **Important**: Mark fields as `.optional()` in your Zod schema if they might not
37
+ * appear in all documents. This prevents unnecessary fallback triggers and reduces costs.
38
+ */
39
+ confidenceThreshold?: number;
40
+ }
41
+ /**
42
+ * Confidence details for a single extracted field.
43
+ * Provides evidence and explanation for each extraction.
44
+ */
45
+ interface FieldConfidence {
46
+ /** JSON path to the field (e.g., "$.invoice_number"). */
47
+ field: string;
48
+ /** Confidence score for this field (0.0 to 1.0). */
49
+ score: number;
50
+ /** Explanation of how the value was extracted (e.g., "Exact match", "Inferred from header"). */
51
+ reason: string;
52
+ /** Page number where the field was found. */
53
+ page: number;
54
+ /** Source text evidence from the document. */
55
+ text: string;
56
+ }
57
+ /**
58
+ * Metadata about the extraction process.
59
+ */
60
+ interface ExtractionMetadata {
61
+ /** Time taken to process the document in milliseconds. */
62
+ processingTimeMs: number;
63
+ /** Number of input tokens used. */
64
+ inputTokens: number;
65
+ /** Number of output tokens generated. */
66
+ outputTokens: number;
67
+ /** Number of credits consumed (1 credit = 1 page). */
68
+ credits: number;
69
+ /** Whether the fallback model was triggered for higher accuracy. */
70
+ fallbackTriggered: boolean;
71
+ /** Overall confidence score for the extraction (0.0 to 1.0). */
72
+ confidenceScore: number;
73
+ /** Per-field confidence details with evidence and explanations. */
74
+ fieldConfidence: FieldConfidence[];
75
+ /** List of issues or warnings encountered during extraction. */
76
+ issues: string[];
77
+ }
78
+ /**
79
+ * Error response from the API.
80
+ */
81
+ interface APIErrorResponse {
82
+ /** Error code identifying the type of error. */
83
+ code: string;
84
+ /** Human-readable error message. */
85
+ message: string;
86
+ }
87
+ /**
88
+ * Result of an extraction operation.
89
+ */
90
+ interface ExtractResult<T> {
91
+ /** Extracted data matching the schema, or null if extraction failed. */
92
+ object: T | null;
93
+ /** Metadata about the extraction process. */
94
+ metadata: ExtractionMetadata;
95
+ /** Error details if extraction failed, or null on success. */
96
+ error: APIErrorResponse | null;
97
+ }
98
+
99
+ /**
100
+ * Parsefy client for extracting structured data from financial documents.
101
+ *
102
+ * **Important**: All fields are **required by default**. Use `.optional()` for fields
103
+ * that may not appear in all documents to avoid triggering expensive fallback models.
104
+ *
105
+ * @example
106
+ * ```ts
107
+ * import { Parsefy } from 'parsefy';
108
+ * import * as z from 'zod';
109
+ *
110
+ * const client = new Parsefy('pk_your_api_key');
111
+ *
112
+ * const schema = z.object({
113
+ * // REQUIRED - fallback triggered if below confidence threshold
114
+ * invoice_number: z.string(),
115
+ * total: z.number(),
116
+ *
117
+ * // OPTIONAL - won't trigger fallback if missing
118
+ * vendor: z.string().optional(),
119
+ * notes: z.string().optional(),
120
+ * });
121
+ *
122
+ * const { object, metadata, error } = await client.extract({
123
+ * file: './invoice.pdf',
124
+ * schema,
125
+ * confidenceThreshold: 0.85, // default
126
+ * });
127
+ *
128
+ * // Check per-field confidence and evidence
129
+ * metadata.fieldConfidence.forEach((fc) => {
130
+ * console.log(`${fc.field}: ${fc.score} - "${fc.text}"`);
131
+ * });
132
+ * ```
133
+ */
134
+ declare class Parsefy {
135
+ private readonly apiKey;
136
+ private readonly baseUrl;
137
+ private readonly timeout;
138
+ private readonly maxRetries;
139
+ /**
140
+ * Creates a new Parsefy client.
141
+ *
142
+ * @param configOrApiKey - API key string or configuration object.
143
+ * If not provided, reads from PARSEFY_API_KEY environment variable.
144
+ *
145
+ * @example
146
+ * ```ts
147
+ * // Using API key directly
148
+ * const client = new Parsefy('pk_your_api_key');
149
+ *
150
+ * // Using configuration object
151
+ * const client = new Parsefy({
152
+ * apiKey: 'pk_your_api_key',
153
+ * timeout: 120000,
154
+ * });
155
+ *
156
+ * // Using environment variable
157
+ * const client = new Parsefy();
158
+ * ```
159
+ */
160
+ constructor(configOrApiKey?: string | ParsefyConfig);
161
+ /**
162
+ * Gets the API key from environment variable.
163
+ */
164
+ private getEnvApiKey;
165
+ /**
166
+ * Extracts structured data from a financial document using the provided Zod schema.
167
+ *
168
+ * ** Billing Warning**: All fields are **required by default**. If a required field
169
+ * returns `null` or falls below the `confidenceThreshold`, the fallback model is triggered,
170
+ * which is more expensive. Use `.optional()` for fields that may not appear in all documents.
171
+ *
172
+ * @param options - Extraction options including file, schema, and confidence threshold.
173
+ * @returns Promise resolving to the extraction result with typed data and field-level confidence.
174
+ *
175
+ * @example
176
+ * ```ts
177
+ * const schema = z.object({
178
+ * // REQUIRED - triggers fallback if confidence < threshold
179
+ * invoice_number: z.string().describe('The invoice number'),
180
+ * total: z.number().describe('Total amount including tax'),
181
+ *
182
+ * // OPTIONAL - won't trigger fallback if missing or low confidence
183
+ * vendor: z.string().optional().describe('Vendor/supplier name'),
184
+ * due_date: z.string().optional().describe('Payment due date'),
185
+ * });
186
+ *
187
+ * const { object, metadata, error } = await client.extract({
188
+ * file: './invoice.pdf',
189
+ * schema,
190
+ * confidenceThreshold: 0.85, // Lower = faster, Higher = more accurate
191
+ * });
192
+ *
193
+ * if (!error && object) {
194
+ * console.log(object.invoice_number); // Fully typed!
195
+ *
196
+ * // Access field-level confidence and evidence
197
+ * console.log(`Overall confidence: ${metadata.confidenceScore}`);
198
+ * metadata.fieldConfidence.forEach((fc) => {
199
+ * console.log(`${fc.field}: ${fc.score} (${fc.reason}) - "${fc.text}"`);
200
+ * });
201
+ * }
202
+ * ```
203
+ */
204
+ extract<T extends z.ZodType>(options: ExtractOptions<T>): Promise<ExtractResult<z.infer<T>>>;
205
+ /**
206
+ * Makes a request with retry logic for rate limiting.
207
+ */
208
+ private makeRequestWithRetry;
209
+ /**
210
+ * Makes the actual HTTP request to the API.
211
+ */
212
+ private makeRequest;
213
+ /**
214
+ * Parses error response body safely.
215
+ */
216
+ private parseErrorResponse;
217
+ }
218
+
219
+ /**
220
+ * Base error class for all Parsefy errors.
221
+ */
222
+ declare class ParsefyError extends Error {
223
+ /** Error code, if applicable. */
224
+ readonly code?: string;
225
+ constructor(message: string, code?: string);
226
+ }
227
+ /**
228
+ * Error thrown when the API returns an HTTP error (4xx/5xx).
229
+ */
230
+ declare class APIError extends ParsefyError {
231
+ /** HTTP status code of the response. */
232
+ readonly statusCode: number;
233
+ /** Raw response body, if available. */
234
+ readonly response?: unknown;
235
+ constructor(message: string, statusCode: number, response?: unknown);
236
+ }
237
+ /**
238
+ * Error thrown when document extraction fails (returned in response.error).
239
+ * This is not an HTTP error - the request succeeded but extraction failed.
240
+ */
241
+ declare class ExtractionError extends ParsefyError {
242
+ /** Metadata about the extraction attempt. */
243
+ readonly metadata: ExtractionMetadata;
244
+ constructor(message: string, code: string, metadata: ExtractionMetadata);
245
+ }
246
+ /**
247
+ * Error thrown for client-side validation failures.
248
+ */
249
+ declare class ValidationError extends ParsefyError {
250
+ constructor(message: string);
251
+ }
252
+
253
+ export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
@@ -0,0 +1,253 @@
1
+ import { z } from 'zod';
2
+
3
+ /**
4
+ * Configuration options for the Parsefy client.
5
+ */
6
+ interface ParsefyConfig {
7
+ /** API key for authentication. If not provided, reads from PARSEFY_API_KEY environment variable. */
8
+ apiKey?: string;
9
+ /** Base URL for the API. Defaults to https://api.parsefy.io */
10
+ baseUrl?: string;
11
+ /** Request timeout in milliseconds. Defaults to 60000 (60 seconds). */
12
+ timeout?: number;
13
+ }
14
+ /**
15
+ * Default confidence threshold for extraction.
16
+ * Fields below this threshold on required fields will trigger the fallback model.
17
+ */
18
+ declare const DEFAULT_CONFIDENCE_THRESHOLD = 0.85;
19
+ /**
20
+ * Options for the extract method.
21
+ */
22
+ interface ExtractOptions<T extends z.ZodType> {
23
+ /** The document file to extract data from. Supports File, Blob, Buffer, or file path (Node.js only). */
24
+ file: File | Blob | Buffer | string;
25
+ /** Zod schema defining the structure of data to extract. */
26
+ schema: T;
27
+ /**
28
+ * Confidence threshold for extraction (0.0 to 1.0). Defaults to 0.85.
29
+ *
30
+ * If a **required** field's confidence falls below this threshold (or returns null),
31
+ * the fallback model is triggered for higher accuracy.
32
+ *
33
+ * **Tip**: Lower threshold = faster (accepts Tier 1 more often).
34
+ * Higher threshold = more accurate (triggers Tier 2 fallback more often).
35
+ *
36
+ * **Important**: Mark fields as `.optional()` in your Zod schema if they might not
37
+ * appear in all documents. This prevents unnecessary fallback triggers and reduces costs.
38
+ */
39
+ confidenceThreshold?: number;
40
+ }
41
+ /**
42
+ * Confidence details for a single extracted field.
43
+ * Provides evidence and explanation for each extraction.
44
+ */
45
+ interface FieldConfidence {
46
+ /** JSON path to the field (e.g., "$.invoice_number"). */
47
+ field: string;
48
+ /** Confidence score for this field (0.0 to 1.0). */
49
+ score: number;
50
+ /** Explanation of how the value was extracted (e.g., "Exact match", "Inferred from header"). */
51
+ reason: string;
52
+ /** Page number where the field was found. */
53
+ page: number;
54
+ /** Source text evidence from the document. */
55
+ text: string;
56
+ }
57
+ /**
58
+ * Metadata about the extraction process.
59
+ */
60
+ interface ExtractionMetadata {
61
+ /** Time taken to process the document in milliseconds. */
62
+ processingTimeMs: number;
63
+ /** Number of input tokens used. */
64
+ inputTokens: number;
65
+ /** Number of output tokens generated. */
66
+ outputTokens: number;
67
+ /** Number of credits consumed (1 credit = 1 page). */
68
+ credits: number;
69
+ /** Whether the fallback model was triggered for higher accuracy. */
70
+ fallbackTriggered: boolean;
71
+ /** Overall confidence score for the extraction (0.0 to 1.0). */
72
+ confidenceScore: number;
73
+ /** Per-field confidence details with evidence and explanations. */
74
+ fieldConfidence: FieldConfidence[];
75
+ /** List of issues or warnings encountered during extraction. */
76
+ issues: string[];
77
+ }
78
+ /**
79
+ * Error response from the API.
80
+ */
81
+ interface APIErrorResponse {
82
+ /** Error code identifying the type of error. */
83
+ code: string;
84
+ /** Human-readable error message. */
85
+ message: string;
86
+ }
87
+ /**
88
+ * Result of an extraction operation.
89
+ */
90
+ interface ExtractResult<T> {
91
+ /** Extracted data matching the schema, or null if extraction failed. */
92
+ object: T | null;
93
+ /** Metadata about the extraction process. */
94
+ metadata: ExtractionMetadata;
95
+ /** Error details if extraction failed, or null on success. */
96
+ error: APIErrorResponse | null;
97
+ }
98
+
99
+ /**
100
+ * Parsefy client for extracting structured data from financial documents.
101
+ *
102
+ * **Important**: All fields are **required by default**. Use `.optional()` for fields
103
+ * that may not appear in all documents to avoid triggering expensive fallback models.
104
+ *
105
+ * @example
106
+ * ```ts
107
+ * import { Parsefy } from 'parsefy';
108
+ * import * as z from 'zod';
109
+ *
110
+ * const client = new Parsefy('pk_your_api_key');
111
+ *
112
+ * const schema = z.object({
113
+ * // REQUIRED - fallback triggered if below confidence threshold
114
+ * invoice_number: z.string(),
115
+ * total: z.number(),
116
+ *
117
+ * // OPTIONAL - won't trigger fallback if missing
118
+ * vendor: z.string().optional(),
119
+ * notes: z.string().optional(),
120
+ * });
121
+ *
122
+ * const { object, metadata, error } = await client.extract({
123
+ * file: './invoice.pdf',
124
+ * schema,
125
+ * confidenceThreshold: 0.85, // default
126
+ * });
127
+ *
128
+ * // Check per-field confidence and evidence
129
+ * metadata.fieldConfidence.forEach((fc) => {
130
+ * console.log(`${fc.field}: ${fc.score} - "${fc.text}"`);
131
+ * });
132
+ * ```
133
+ */
134
+ declare class Parsefy {
135
+ private readonly apiKey;
136
+ private readonly baseUrl;
137
+ private readonly timeout;
138
+ private readonly maxRetries;
139
+ /**
140
+ * Creates a new Parsefy client.
141
+ *
142
+ * @param configOrApiKey - API key string or configuration object.
143
+ * If not provided, reads from PARSEFY_API_KEY environment variable.
144
+ *
145
+ * @example
146
+ * ```ts
147
+ * // Using API key directly
148
+ * const client = new Parsefy('pk_your_api_key');
149
+ *
150
+ * // Using configuration object
151
+ * const client = new Parsefy({
152
+ * apiKey: 'pk_your_api_key',
153
+ * timeout: 120000,
154
+ * });
155
+ *
156
+ * // Using environment variable
157
+ * const client = new Parsefy();
158
+ * ```
159
+ */
160
+ constructor(configOrApiKey?: string | ParsefyConfig);
161
+ /**
162
+ * Gets the API key from environment variable.
163
+ */
164
+ private getEnvApiKey;
165
+ /**
166
+ * Extracts structured data from a financial document using the provided Zod schema.
167
+ *
168
+ * ** Billing Warning**: All fields are **required by default**. If a required field
169
+ * returns `null` or falls below the `confidenceThreshold`, the fallback model is triggered,
170
+ * which is more expensive. Use `.optional()` for fields that may not appear in all documents.
171
+ *
172
+ * @param options - Extraction options including file, schema, and confidence threshold.
173
+ * @returns Promise resolving to the extraction result with typed data and field-level confidence.
174
+ *
175
+ * @example
176
+ * ```ts
177
+ * const schema = z.object({
178
+ * // REQUIRED - triggers fallback if confidence < threshold
179
+ * invoice_number: z.string().describe('The invoice number'),
180
+ * total: z.number().describe('Total amount including tax'),
181
+ *
182
+ * // OPTIONAL - won't trigger fallback if missing or low confidence
183
+ * vendor: z.string().optional().describe('Vendor/supplier name'),
184
+ * due_date: z.string().optional().describe('Payment due date'),
185
+ * });
186
+ *
187
+ * const { object, metadata, error } = await client.extract({
188
+ * file: './invoice.pdf',
189
+ * schema,
190
+ * confidenceThreshold: 0.85, // Lower = faster, Higher = more accurate
191
+ * });
192
+ *
193
+ * if (!error && object) {
194
+ * console.log(object.invoice_number); // Fully typed!
195
+ *
196
+ * // Access field-level confidence and evidence
197
+ * console.log(`Overall confidence: ${metadata.confidenceScore}`);
198
+ * metadata.fieldConfidence.forEach((fc) => {
199
+ * console.log(`${fc.field}: ${fc.score} (${fc.reason}) - "${fc.text}"`);
200
+ * });
201
+ * }
202
+ * ```
203
+ */
204
+ extract<T extends z.ZodType>(options: ExtractOptions<T>): Promise<ExtractResult<z.infer<T>>>;
205
+ /**
206
+ * Makes a request with retry logic for rate limiting.
207
+ */
208
+ private makeRequestWithRetry;
209
+ /**
210
+ * Makes the actual HTTP request to the API.
211
+ */
212
+ private makeRequest;
213
+ /**
214
+ * Parses error response body safely.
215
+ */
216
+ private parseErrorResponse;
217
+ }
218
+
219
+ /**
220
+ * Base error class for all Parsefy errors.
221
+ */
222
+ declare class ParsefyError extends Error {
223
+ /** Error code, if applicable. */
224
+ readonly code?: string;
225
+ constructor(message: string, code?: string);
226
+ }
227
+ /**
228
+ * Error thrown when the API returns an HTTP error (4xx/5xx).
229
+ */
230
+ declare class APIError extends ParsefyError {
231
+ /** HTTP status code of the response. */
232
+ readonly statusCode: number;
233
+ /** Raw response body, if available. */
234
+ readonly response?: unknown;
235
+ constructor(message: string, statusCode: number, response?: unknown);
236
+ }
237
+ /**
238
+ * Error thrown when document extraction fails (returned in response.error).
239
+ * This is not an HTTP error - the request succeeded but extraction failed.
240
+ */
241
+ declare class ExtractionError extends ParsefyError {
242
+ /** Metadata about the extraction attempt. */
243
+ readonly metadata: ExtractionMetadata;
244
+ constructor(message: string, code: string, metadata: ExtractionMetadata);
245
+ }
246
+ /**
247
+ * Error thrown for client-side validation failures.
248
+ */
249
+ declare class ValidationError extends ParsefyError {
250
+ constructor(message: string);
251
+ }
252
+
253
+ export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
@@ -0,0 +1,253 @@
1
+ import { z } from 'zod';
2
+
3
+ /**
4
+ * Configuration options for the Parsefy client.
5
+ */
6
+ interface ParsefyConfig {
7
+ /** API key for authentication. If not provided, reads from PARSEFY_API_KEY environment variable. */
8
+ apiKey?: string;
9
+ /** Base URL for the API. Defaults to https://api.parsefy.io */
10
+ baseUrl?: string;
11
+ /** Request timeout in milliseconds. Defaults to 60000 (60 seconds). */
12
+ timeout?: number;
13
+ }
14
+ /**
15
+ * Default confidence threshold for extraction.
16
+ * Fields below this threshold on required fields will trigger the fallback model.
17
+ */
18
+ declare const DEFAULT_CONFIDENCE_THRESHOLD = 0.85;
19
+ /**
20
+ * Options for the extract method.
21
+ */
22
+ interface ExtractOptions<T extends z.ZodType> {
23
+ /** The document file to extract data from. Supports File, Blob, Buffer, or file path (Node.js only). */
24
+ file: File | Blob | Buffer | string;
25
+ /** Zod schema defining the structure of data to extract. */
26
+ schema: T;
27
+ /**
28
+ * Confidence threshold for extraction (0.0 to 1.0). Defaults to 0.85.
29
+ *
30
+ * If a **required** field's confidence falls below this threshold (or returns null),
31
+ * the fallback model is triggered for higher accuracy.
32
+ *
33
+ * **Tip**: Lower threshold = faster (accepts Tier 1 more often).
34
+ * Higher threshold = more accurate (triggers Tier 2 fallback more often).
35
+ *
36
+ * **Important**: Mark fields as `.optional()` in your Zod schema if they might not
37
+ * appear in all documents. This prevents unnecessary fallback triggers and reduces costs.
38
+ */
39
+ confidenceThreshold?: number;
40
+ }
41
+ /**
42
+ * Confidence details for a single extracted field.
43
+ * Provides evidence and explanation for each extraction.
44
+ */
45
+ interface FieldConfidence {
46
+ /** JSON path to the field (e.g., "$.invoice_number"). */
47
+ field: string;
48
+ /** Confidence score for this field (0.0 to 1.0). */
49
+ score: number;
50
+ /** Explanation of how the value was extracted (e.g., "Exact match", "Inferred from header"). */
51
+ reason: string;
52
+ /** Page number where the field was found. */
53
+ page: number;
54
+ /** Source text evidence from the document. */
55
+ text: string;
56
+ }
57
+ /**
58
+ * Metadata about the extraction process.
59
+ */
60
+ interface ExtractionMetadata {
61
+ /** Time taken to process the document in milliseconds. */
62
+ processingTimeMs: number;
63
+ /** Number of input tokens used. */
64
+ inputTokens: number;
65
+ /** Number of output tokens generated. */
66
+ outputTokens: number;
67
+ /** Number of credits consumed (1 credit = 1 page). */
68
+ credits: number;
69
+ /** Whether the fallback model was triggered for higher accuracy. */
70
+ fallbackTriggered: boolean;
71
+ /** Overall confidence score for the extraction (0.0 to 1.0). */
72
+ confidenceScore: number;
73
+ /** Per-field confidence details with evidence and explanations. */
74
+ fieldConfidence: FieldConfidence[];
75
+ /** List of issues or warnings encountered during extraction. */
76
+ issues: string[];
77
+ }
78
+ /**
79
+ * Error response from the API.
80
+ */
81
+ interface APIErrorResponse {
82
+ /** Error code identifying the type of error. */
83
+ code: string;
84
+ /** Human-readable error message. */
85
+ message: string;
86
+ }
87
+ /**
88
+ * Result of an extraction operation.
89
+ */
90
+ interface ExtractResult<T> {
91
+ /** Extracted data matching the schema, or null if extraction failed. */
92
+ object: T | null;
93
+ /** Metadata about the extraction process. */
94
+ metadata: ExtractionMetadata;
95
+ /** Error details if extraction failed, or null on success. */
96
+ error: APIErrorResponse | null;
97
+ }
98
+
99
+ /**
100
+ * Parsefy client for extracting structured data from financial documents.
101
+ *
102
+ * **Important**: All fields are **required by default**. Use `.optional()` for fields
103
+ * that may not appear in all documents to avoid triggering expensive fallback models.
104
+ *
105
+ * @example
106
+ * ```ts
107
+ * import { Parsefy } from 'parsefy';
108
+ * import * as z from 'zod';
109
+ *
110
+ * const client = new Parsefy('pk_your_api_key');
111
+ *
112
+ * const schema = z.object({
113
+ * // REQUIRED - fallback triggered if below confidence threshold
114
+ * invoice_number: z.string(),
115
+ * total: z.number(),
116
+ *
117
+ * // OPTIONAL - won't trigger fallback if missing
118
+ * vendor: z.string().optional(),
119
+ * notes: z.string().optional(),
120
+ * });
121
+ *
122
+ * const { object, metadata, error } = await client.extract({
123
+ * file: './invoice.pdf',
124
+ * schema,
125
+ * confidenceThreshold: 0.85, // default
126
+ * });
127
+ *
128
+ * // Check per-field confidence and evidence
129
+ * metadata.fieldConfidence.forEach((fc) => {
130
+ * console.log(`${fc.field}: ${fc.score} - "${fc.text}"`);
131
+ * });
132
+ * ```
133
+ */
134
+ declare class Parsefy {
135
+ private readonly apiKey;
136
+ private readonly baseUrl;
137
+ private readonly timeout;
138
+ private readonly maxRetries;
139
+ /**
140
+ * Creates a new Parsefy client.
141
+ *
142
+ * @param configOrApiKey - API key string or configuration object.
143
+ * If not provided, reads from PARSEFY_API_KEY environment variable.
144
+ *
145
+ * @example
146
+ * ```ts
147
+ * // Using API key directly
148
+ * const client = new Parsefy('pk_your_api_key');
149
+ *
150
+ * // Using configuration object
151
+ * const client = new Parsefy({
152
+ * apiKey: 'pk_your_api_key',
153
+ * timeout: 120000,
154
+ * });
155
+ *
156
+ * // Using environment variable
157
+ * const client = new Parsefy();
158
+ * ```
159
+ */
160
+ constructor(configOrApiKey?: string | ParsefyConfig);
161
+ /**
162
+ * Gets the API key from environment variable.
163
+ */
164
+ private getEnvApiKey;
165
+ /**
166
+ * Extracts structured data from a financial document using the provided Zod schema.
167
+ *
168
+ * ** Billing Warning**: All fields are **required by default**. If a required field
169
+ * returns `null` or falls below the `confidenceThreshold`, the fallback model is triggered,
170
+ * which is more expensive. Use `.optional()` for fields that may not appear in all documents.
171
+ *
172
+ * @param options - Extraction options including file, schema, and confidence threshold.
173
+ * @returns Promise resolving to the extraction result with typed data and field-level confidence.
174
+ *
175
+ * @example
176
+ * ```ts
177
+ * const schema = z.object({
178
+ * // REQUIRED - triggers fallback if confidence < threshold
179
+ * invoice_number: z.string().describe('The invoice number'),
180
+ * total: z.number().describe('Total amount including tax'),
181
+ *
182
+ * // OPTIONAL - won't trigger fallback if missing or low confidence
183
+ * vendor: z.string().optional().describe('Vendor/supplier name'),
184
+ * due_date: z.string().optional().describe('Payment due date'),
185
+ * });
186
+ *
187
+ * const { object, metadata, error } = await client.extract({
188
+ * file: './invoice.pdf',
189
+ * schema,
190
+ * confidenceThreshold: 0.85, // Lower = faster, Higher = more accurate
191
+ * });
192
+ *
193
+ * if (!error && object) {
194
+ * console.log(object.invoice_number); // Fully typed!
195
+ *
196
+ * // Access field-level confidence and evidence
197
+ * console.log(`Overall confidence: ${metadata.confidenceScore}`);
198
+ * metadata.fieldConfidence.forEach((fc) => {
199
+ * console.log(`${fc.field}: ${fc.score} (${fc.reason}) - "${fc.text}"`);
200
+ * });
201
+ * }
202
+ * ```
203
+ */
204
+ extract<T extends z.ZodType>(options: ExtractOptions<T>): Promise<ExtractResult<z.infer<T>>>;
205
+ /**
206
+ * Makes a request with retry logic for rate limiting.
207
+ */
208
+ private makeRequestWithRetry;
209
+ /**
210
+ * Makes the actual HTTP request to the API.
211
+ */
212
+ private makeRequest;
213
+ /**
214
+ * Parses error response body safely.
215
+ */
216
+ private parseErrorResponse;
217
+ }
218
+
219
+ /**
220
+ * Base error class for all Parsefy errors.
221
+ */
222
+ declare class ParsefyError extends Error {
223
+ /** Error code, if applicable. */
224
+ readonly code?: string;
225
+ constructor(message: string, code?: string);
226
+ }
227
+ /**
228
+ * Error thrown when the API returns an HTTP error (4xx/5xx).
229
+ */
230
+ declare class APIError extends ParsefyError {
231
+ /** HTTP status code of the response. */
232
+ readonly statusCode: number;
233
+ /** Raw response body, if available. */
234
+ readonly response?: unknown;
235
+ constructor(message: string, statusCode: number, response?: unknown);
236
+ }
237
+ /**
238
+ * Error thrown when document extraction fails (returned in response.error).
239
+ * This is not an HTTP error - the request succeeded but extraction failed.
240
+ */
241
+ declare class ExtractionError extends ParsefyError {
242
+ /** Metadata about the extraction attempt. */
243
+ readonly metadata: ExtractionMetadata;
244
+ constructor(message: string, code: string, metadata: ExtractionMetadata);
245
+ }
246
+ /**
247
+ * Error thrown for client-side validation failures.
248
+ */
249
+ declare class ValidationError extends ParsefyError {
250
+ constructor(message: string);
251
+ }
252
+
253
+ export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
package/dist/index.mjs ADDED
@@ -0,0 +1,2 @@
1
+ import {zodToJsonSchema}from'zod-to-json-schema';var u=.85,d={".pdf":"application/pdf",".docx":"application/vnd.openxmlformats-officedocument.wordprocessingml.document"},l=10*1024*1024,h="https://api.parsefy.io",E=6e4;var i=class extends Error{constructor(e,r){super(e),this.name="ParsefyError",this.code=r,typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,this.constructor);}},p=class extends i{constructor(e,r,o){super(e),this.name="APIError",this.statusCode=r,this.response=o;}},y=class extends i{constructor(e,r,o){super(e,r),this.name="ExtractionError",this.metadata=o;}},s=class extends i{constructor(e){super(e),this.name="ValidationError";}};function x(){return typeof process<"u"&&process.versions?.node!==void 0}function b(t){let e=zodToJsonSchema(t,{$refStrategy:"none",target:"jsonSchema7"});return "$schema"in e&&delete e.$schema,e}function R(t){let e=t.toLowerCase().match(/\.[^.]+$/)?.[0];return e&&d[e]||null}function w(t){if(!R(t)){let r=Object.keys(d).join(", ");throw new s(`Unsupported file type. Supported types: ${r}`)}}function f(t){if(t===0)throw new s("File is empty");if(t>l){let e=l/1048576;throw new s(`File size exceeds maximum limit of ${e}MB`)}}function F(t){return {object:t.object,metadata:{processingTimeMs:t.metadata.processing_time_ms,inputTokens:t.metadata.input_tokens,outputTokens:t.metadata.output_tokens,credits:t.metadata.credits,fallbackTriggered:t.metadata.fallback_triggered,confidenceScore:t._meta.confidence_score,fieldConfidence:t._meta.field_confidence.map(e=>({field:e.field,score:e.score,reason:e.reason,page:e.page,text:e.text})),issues:t._meta.issues},error:t.error}}function T(t,e){let r=R(e)||"application/octet-stream",o=t.buffer.slice(t.byteOffset,t.byteOffset+t.byteLength);return typeof File<"u"?new File([o],e,{type:r}):new Blob([o],{type:r})}async function k(t){if(!x())throw new s("File path strings are only supported in Node.js. Use File or Blob in the browser.");let e=await import('fs'),r=await import('path');if(!e.existsSync(t))throw new s(`File not found: ${t}`);let o=r.basename(t);w(o);let a=e.readFileSync(t);return f(a.length),{buffer:a,filename:o}}async function _(t){if(typeof t=="string"){let{buffer:e,filename:r}=await k(t);return T(e,r)}if(Buffer.isBuffer(t))return f(t.length),T(t,"document.pdf");if(t instanceof File)return w(t.name),f(t.size),t;if(t instanceof Blob)return f(t.size),t;throw new s("Invalid file input. Expected File, Blob, Buffer, or file path string.")}function P(t){return new Promise(e=>setTimeout(e,t))}function S(t,e=1e3){let r=e*Math.pow(2,t),o=Math.random()*.1*r;return Math.min(r+o,3e4)}var g=class{constructor(e){this.maxRetries=3;let r={};if(typeof e=="string"?r={apiKey:e}:e&&(r=e),this.apiKey=r.apiKey||this.getEnvApiKey(),!this.apiKey)throw new s("API key is required. Provide it in the constructor or set the PARSEFY_API_KEY environment variable.");this.baseUrl=r.baseUrl||h,this.timeout=r.timeout||E;}getEnvApiKey(){return x()&&process.env.PARSEFY_API_KEY||""}async extract(e){let{file:r,schema:o,confidenceThreshold:a}=e,n=b(o),m=await _(r),c=new FormData;return c.append("file",m),c.append("output_schema",JSON.stringify(n)),c.append("confidence_threshold",String(a??.85)),this.makeRequestWithRetry(c)}async makeRequestWithRetry(e,r=0){try{return await this.makeRequest(e)}catch(o){if(o instanceof p&&o.statusCode===429&&r<this.maxRetries){let a=S(r);return await P(a),this.makeRequestWithRetry(e,r+1)}throw o}}async makeRequest(e){let r=`${this.baseUrl}/v1/extract`,o=new AbortController,a=setTimeout(()=>o.abort(),this.timeout);try{let n=await fetch(r,{method:"POST",headers:{Authorization:`Bearer ${this.apiKey}`},body:e,signal:o.signal});if(clearTimeout(a),!n.ok){let c=await this.parseErrorResponse(n);throw new p(c.message||`API request failed with status ${n.status}`,n.status,c)}let m=await n.json();return F(m)}catch(n){throw clearTimeout(a),n instanceof Error&&n.name==="AbortError"?new i(`Request timed out after ${this.timeout}ms`,"TIMEOUT"):n instanceof i?n:n instanceof TypeError?new i("Network error: Unable to connect to the Parsefy API","NETWORK_ERROR"):new i(`Unexpected error: ${n instanceof Error?n.message:String(n)}`,"UNKNOWN_ERROR")}}async parseErrorResponse(e){try{return await e.json()}catch{try{return {message:await e.text()||e.statusText}}catch{return {message:e.statusText}}}}};
2
+ export{p as APIError,u as DEFAULT_CONFIDENCE_THRESHOLD,y as ExtractionError,g as Parsefy,i as ParsefyError,s as ValidationError};
package/package.json CHANGED
@@ -1,12 +1,57 @@
1
1
  {
2
2
  "name": "parsefy",
3
- "version": "1.0.0",
4
- "description": "High-accuracy document data extraction powered by AI",
5
- "license": "ISC",
6
- "author": "marceloakalopes",
7
- "type": "commonjs",
8
- "main": "index.js",
3
+ "version": "1.0.2",
4
+ "description": "Official TypeScript SDK for Parsefy - Financial Document Infrastructure for Developers",
5
+ "author": "",
6
+ "license": "MIT",
7
+ "repository": {
8
+ "type": "git",
9
+ "url": "https://github.com/parsefy/parsefy-js.git"
10
+ },
11
+ "homepage": "https://parsefy.io",
12
+ "bugs": {
13
+ "url": "https://github.com/parsefy/parsefy-js/issues"
14
+ },
15
+ "main": "./dist/index.cjs",
16
+ "module": "./dist/index.mjs",
17
+ "types": "./dist/index.d.cts",
18
+ "exports": {
19
+ ".": {
20
+ "import": {
21
+ "types": "./dist/index.d.mts",
22
+ "default": "./dist/index.mjs"
23
+ },
24
+ "require": {
25
+ "types": "./dist/index.d.cts",
26
+ "default": "./dist/index.cjs"
27
+ }
28
+ }
29
+ },
30
+ "files": [
31
+ "dist",
32
+ "README.md",
33
+ "LICENSE"
34
+ ],
9
35
  "scripts": {
10
- "test": "echo \"Error: no test specified\" && exit 1"
36
+ "build": "tsup && cp dist/index.d.ts dist/index.d.cts",
37
+ "dev": "tsup --watch",
38
+ "typecheck": "tsc --noEmit",
39
+ "prepublishOnly": "npm run build"
40
+ },
41
+ "peerDependencies": {
42
+ "zod": "^3.0.0"
43
+ },
44
+ "dependencies": {
45
+ "zod-to-json-schema": "^3.22.0"
46
+ },
47
+ "devDependencies": {
48
+ "@types/node": "^20.0.0",
49
+ "tsup": "^8.0.0",
50
+ "tsx": "^4.21.0",
51
+ "typescript": "^5.0.0",
52
+ "zod": "^3.22.0"
53
+ },
54
+ "engines": {
55
+ "node": ">=18"
11
56
  }
12
57
  }
package/index.js DELETED
@@ -1 +0,0 @@
1
- console.log("Parsefy SDK");