parsefy 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -36,19 +36,28 @@ const schema = z.object({
36
36
  due_date: z.string().optional().describe('Payment due date'),
37
37
  });
38
38
 
39
- const { object, metadata, error } = await client.extract({
39
+ const { object, metadata, verification, error } = await client.extract({
40
40
  file: './invoice.pdf',
41
41
  schema,
42
+ enableVerification: true, // Enable math verification
42
43
  });
43
44
 
44
45
  if (!error && object) {
45
46
  console.log(object.invoice_number); // Fully typed!
46
47
 
47
48
  // Access field-level confidence and evidence
48
- console.log(`Overall confidence: ${metadata.confidenceScore}`);
49
- metadata.fieldConfidence.forEach((fc) => {
49
+ console.log(`Overall confidence: ${metadata.confidence_score}`);
50
+ metadata.field_confidence.forEach((fc) => {
50
51
  console.log(`${fc.field}: ${fc.score} (${fc.reason}) - "${fc.text}"`);
51
52
  });
53
+
54
+ // Access verification results if enabled
55
+ if (verification) {
56
+ console.log(`Verification status: ${verification.status}`);
57
+ verification.checks_run.forEach((check) => {
58
+ console.log(`${check.type}: ${check.passed ? 'PASSED' : 'FAILED'}`);
59
+ });
60
+ }
52
61
  }
53
62
  ```
54
63
 
@@ -102,6 +111,50 @@ const { object, metadata } = await client.extract({
102
111
 
103
112
  **Default:** `0.85`
104
113
 
114
+ ## Math Verification
115
+
116
+ Enable automatic math verification to ensure extracted numeric data is mathematically consistent:
117
+
118
+ ```typescript
119
+ const { object, verification } = await client.extract({
120
+ file: './invoice.pdf',
121
+ schema,
122
+ enableVerification: true, // Enable math verification
123
+ });
124
+
125
+ if (verification) {
126
+ console.log(`Verification status: ${verification.status}`);
127
+ console.log(`Checks passed: ${verification.checks_passed}`);
128
+ console.log(`Checks failed: ${verification.checks_failed}`);
129
+
130
+ verification.checks_run.forEach((check) => {
131
+ console.log(`${check.type}: ${check.passed ? 'PASSED' : 'FAILED'}`);
132
+ console.log(` Fields: ${check.fields.join(', ')}`);
133
+ console.log(` Expected: ${check.expected}, Actual: ${check.actual}`);
134
+ console.log(` Delta: ${check.delta}`);
135
+ });
136
+ }
137
+ ```
138
+
139
+ ### Verification Status Values
140
+
141
+ | Status | Description |
142
+ |--------|-------------|
143
+ | `PASSED` | All math checks passed |
144
+ | `FAILED` | One or more math checks failed |
145
+ | `PARTIAL` | Some checks passed, some failed or couldn't be verified |
146
+ | `CANNOT_VERIFY` | Required fields are missing (not a math error) |
147
+ | `NO_RULES` | No verifiable fields detected in schema |
148
+
149
+ ### Supported Verification Rules
150
+
151
+ - **HORIZONTAL_SUM**: Verifies `total = subtotal + tax`
152
+ - **VERTICAL_SUM**: Verifies `subtotal = sum(line_items)`
153
+
154
+ ### Shadow Extraction
155
+
156
+ When `enableVerification: true` and only a single verifiable field is requested (e.g., just `total`), Parsefy automatically extracts supporting fields in the background for verification, then removes them from the response.
157
+
105
158
  ## Response Format
106
159
 
107
160
  ```typescript
@@ -111,15 +164,13 @@ interface ExtractResult<T> {
111
164
 
112
165
  // Metadata about the extraction
113
166
  metadata: {
114
- processingTimeMs: number; // Processing time in milliseconds
115
- inputTokens: number; // Input tokens used
116
- outputTokens: number; // Output tokens generated
167
+ processing_time_ms: number; // Processing time in milliseconds
117
168
  credits: number; // Credits consumed (1 credit = 1 page)
118
- fallbackTriggered: boolean; // Whether fallback model was used
169
+ fallback_triggered: boolean; // Whether fallback model was used
119
170
 
120
171
  // 🆕 Field-level confidence and evidence
121
- confidenceScore: number; // Overall confidence (0.0 to 1.0)
122
- fieldConfidence: Array<{
172
+ confidence_score: number; // Overall confidence (0.0 to 1.0)
173
+ field_confidence: Array<{
123
174
  field: string; // JSON path (e.g., "$.invoice_number")
124
175
  score: number; // Confidence score (0.0 to 1.0)
125
176
  reason: string; // "Exact match", "Inferred from header", etc.
@@ -129,6 +180,23 @@ interface ExtractResult<T> {
129
180
  issues: string[]; // Warnings or anomalies detected
130
181
  };
131
182
 
183
+ // Math verification results (only present if enableVerification was true)
184
+ verification?: {
185
+ status: "PASSED" | "FAILED" | "PARTIAL" | "CANNOT_VERIFY" | "NO_RULES";
186
+ checks_passed: number;
187
+ checks_failed: number;
188
+ cannot_verify_count: number;
189
+ checks_run: Array<{
190
+ type: string; // e.g., "HORIZONTAL_SUM", "VERTICAL_SUM"
191
+ status: string;
192
+ fields: string[];
193
+ passed: boolean;
194
+ delta: number;
195
+ expected: number;
196
+ actual: number;
197
+ }>;
198
+ };
199
+
132
200
  // Error details if extraction failed
133
201
  error: {
134
202
  code: string;
@@ -140,7 +208,11 @@ interface ExtractResult<T> {
140
208
  ### Example Response
141
209
 
142
210
  ```typescript
143
- const { object, metadata } = await client.extract({ file, schema });
211
+ const { object, metadata, verification } = await client.extract({
212
+ file,
213
+ schema,
214
+ enableVerification: true
215
+ });
144
216
 
145
217
  // object:
146
218
  {
@@ -150,9 +222,9 @@ const { object, metadata } = await client.extract({ file, schema });
150
222
  vendor: "Acme Corp"
151
223
  }
152
224
 
153
- // metadata.confidenceScore: 0.94
225
+ // metadata.confidence_score: 0.94
154
226
 
155
- // metadata.fieldConfidence:
227
+ // metadata.field_confidence:
156
228
  [
157
229
  { field: "$.invoice_number", score: 0.98, reason: "Exact match", page: 1, text: "Invoice # INV-2024-0042" },
158
230
  { field: "$.date", score: 0.95, reason: "Exact match", page: 1, text: "Date: 01/15/2024" },
@@ -161,6 +233,25 @@ const { object, metadata } = await client.extract({ file, schema });
161
233
  ]
162
234
 
163
235
  // metadata.issues: []
236
+
237
+ // verification (only present if enableVerification was true):
238
+ {
239
+ status: "PASSED",
240
+ checks_passed: 1,
241
+ checks_failed: 0,
242
+ cannot_verify_count: 0,
243
+ checks_run: [
244
+ {
245
+ type: "HORIZONTAL_SUM",
246
+ status: "PASSED",
247
+ fields: ["total", "subtotal", "tax"],
248
+ passed: true,
249
+ delta: 0.0,
250
+ expected: 1250.00,
251
+ actual: 1250.00
252
+ }
253
+ ]
254
+ }
164
255
  ```
165
256
 
166
257
  ## Configuration
@@ -196,6 +287,7 @@ const client = new Parsefy({
196
287
  | `file` | `File \| Blob \| Buffer \| string` | required | Document to extract from |
197
288
  | `schema` | `z.ZodType` | required | Zod schema defining extraction structure |
198
289
  | `confidenceThreshold` | `number` | `0.85` | Minimum confidence before triggering fallback |
290
+ | `enableVerification` | `boolean` | `false` | Enable math verification (includes shadow extraction) |
199
291
 
200
292
  ## Usage
201
293
 
@@ -286,8 +378,8 @@ app.post('/extract', upload.single('document'), async (req, res) => {
286
378
 
287
379
  res.json({
288
380
  data: object,
289
- confidence: metadata.confidenceScore,
290
- fieldDetails: metadata.fieldConfidence,
381
+ confidence: metadata.confidence_score,
382
+ fieldDetails: metadata.field_confidence,
291
383
  error,
292
384
  });
293
385
  });
@@ -313,7 +405,7 @@ app.post('/extract', async (c) => {
313
405
 
314
406
  return c.json({
315
407
  data: object,
316
- confidence: metadata.confidenceScore,
408
+ confidence: metadata.confidence_score,
317
409
  issues: metadata.issues,
318
410
  error,
319
411
  });
@@ -334,13 +426,13 @@ try {
334
426
  // Extraction-level errors (request succeeded, but extraction failed)
335
427
  if (error) {
336
428
  console.error(`Extraction failed: [${error.code}] ${error.message}`);
337
- console.log(`Fallback triggered: ${metadata.fallbackTriggered}`);
429
+ console.log(`Fallback triggered: ${metadata.fallback_triggered}`);
338
430
  console.log(`Issues: ${metadata.issues.join(', ')}`);
339
431
  return;
340
432
  }
341
433
 
342
434
  // Check for low confidence fields
343
- const lowConfidence = metadata.fieldConfidence.filter((fc) => fc.score < 0.80);
435
+ const lowConfidence = metadata.field_confidence.filter((fc) => fc.score < 0.80);
344
436
  if (lowConfidence.length > 0) {
345
437
  console.warn('Low confidence fields:', lowConfidence);
346
438
  }
@@ -392,6 +484,9 @@ import type {
392
484
  ExtractResult,
393
485
  ExtractionMetadata,
394
486
  FieldConfidence,
487
+ Verification,
488
+ VerificationStatus,
489
+ VerificationCheck,
395
490
  APIErrorResponse,
396
491
  } from 'parsefy';
397
492
 
package/dist/index.cjs CHANGED
@@ -1,2 +1,2 @@
1
- 'use strict';var zodToJsonSchema=require('zod-to-json-schema');var u=.85,d={".pdf":"application/pdf",".docx":"application/vnd.openxmlformats-officedocument.wordprocessingml.document"},l=10*1024*1024,g="https://api.parsefy.io",x=6e4;var s=class extends Error{constructor(e,r){super(e),this.name="ParsefyError",this.code=r,typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,this.constructor);}},p=class extends s{constructor(e,r,o){super(e),this.name="APIError",this.statusCode=r,this.response=o;}},y=class extends s{constructor(e,r,o){super(e,r),this.name="ExtractionError",this.metadata=o;}},a=class extends s{constructor(e){super(e),this.name="ValidationError";}};function h(){return typeof process<"u"&&process.versions?.node!==void 0}function R(t){let e=zodToJsonSchema.zodToJsonSchema(t,{$refStrategy:"none",target:"jsonSchema7"});return "$schema"in e&&delete e.$schema,e}function b(t){let e=t.toLowerCase().match(/\.[^.]+$/)?.[0];return e&&d[e]||null}function w(t){if(!b(t)){let r=Object.keys(d).join(", ");throw new a(`Unsupported file type. Supported types: ${r}`)}}function m(t){if(t===0)throw new a("File is empty");if(t>l){let e=l/1048576;throw new a(`File size exceeds maximum limit of ${e}MB`)}}function F(t){let e=t._meta||{confidence_score:1,field_confidence:[],issues:[]};return {object:t.object,metadata:{processingTimeMs:t.metadata.processing_time_ms,inputTokens:t.metadata.input_tokens,outputTokens:t.metadata.output_tokens,credits:t.metadata.credits,fallbackTriggered:t.metadata.fallback_triggered,confidenceScore:e.confidence_score,fieldConfidence:e.field_confidence.map(r=>({field:r.field,score:r.score,reason:r.reason,page:r.page,text:r.text})),issues:e.issues},error:t.error}}function T(t,e){let r=b(e)||"application/octet-stream",o=t.buffer.slice(t.byteOffset,t.byteOffset+t.byteLength);return typeof File<"u"?new File([o],e,{type:r}):new Blob([o],{type:r})}async function I(t){if(!h())throw new a("File path strings are only supported in Node.js. Use File or Blob in the browser.");let e=await import('fs'),r=await import('path');if(!e.existsSync(t))throw new a(`File not found: ${t}`);let o=r.basename(t);w(o);let c=e.readFileSync(t);return m(c.length),{buffer:c,filename:o}}async function _(t){if(typeof t=="string"){let{buffer:e,filename:r}=await I(t);return T(e,r)}if(Buffer.isBuffer(t))return m(t.length),T(t,"document.pdf");if(t instanceof File)return w(t.name),m(t.size),t;if(t instanceof Blob)return m(t.size),t;throw new a("Invalid file input. Expected File, Blob, Buffer, or file path string.")}function P(t){return new Promise(e=>setTimeout(e,t))}function S(t,e=1e3){let r=e*Math.pow(2,t),o=Math.random()*.1*r;return Math.min(r+o,3e4)}var E=class{constructor(e){this.maxRetries=3;let r={};if(typeof e=="string"?r={apiKey:e}:e&&(r=e),this.apiKey=r.apiKey||this.getEnvApiKey(),!this.apiKey)throw new a("API key is required. Provide it in the constructor or set the PARSEFY_API_KEY environment variable.");this.baseUrl=r.baseUrl||g,this.timeout=r.timeout||x;}getEnvApiKey(){return h()&&process.env.PARSEFY_API_KEY||""}async extract(e){let{file:r,schema:o,confidenceThreshold:c}=e,n=R(o),f=await _(r),i=new FormData;return i.append("file",f),i.append("output_schema",JSON.stringify(n)),i.append("confidence_threshold",String(c??.85)),this.makeRequestWithRetry(i)}async makeRequestWithRetry(e,r=0){try{return await this.makeRequest(e)}catch(o){if(o instanceof p&&o.statusCode===429&&r<this.maxRetries){let c=S(r);return await P(c),this.makeRequestWithRetry(e,r+1)}throw o}}async makeRequest(e){let r=`${this.baseUrl}/v1/extract`,o=new AbortController,c=setTimeout(()=>o.abort(),this.timeout);try{let n=await fetch(r,{method:"POST",headers:{Authorization:`Bearer ${this.apiKey}`},body:e,signal:o.signal});if(clearTimeout(c),!n.ok){let i=await this.parseErrorResponse(n);throw new p(i.message||`API request failed with status ${n.status}`,n.status,i)}let f;try{f=await n.json();}catch{throw new s("Failed to parse API response as JSON. The API may have returned an invalid response.","PARSE_ERROR")}try{return F(f)}catch(i){throw new s(`Failed to transform API response: ${i instanceof Error?i.message:String(i)}`,"TRANSFORM_ERROR")}}catch(n){throw clearTimeout(c),n instanceof Error&&n.name==="AbortError"?new s(`Request timed out after ${this.timeout}ms`,"TIMEOUT"):n instanceof s?n:n instanceof TypeError&&n.message.includes("fetch")?new s(`Network error: Unable to connect to the Parsefy API. ${n.message}`,"NETWORK_ERROR"):n instanceof TypeError?new s(`Type error: ${n.message}. This may indicate an API response format issue.`,"TYPE_ERROR"):new s(`Unexpected error: ${n instanceof Error?n.message:String(n)}`,"UNKNOWN_ERROR")}}async parseErrorResponse(e){try{return await e.json()}catch{try{return {message:await e.text()||e.statusText}}catch{return {message:e.statusText}}}}};
2
- exports.APIError=p;exports.DEFAULT_CONFIDENCE_THRESHOLD=u;exports.ExtractionError=y;exports.Parsefy=E;exports.ParsefyError=s;exports.ValidationError=a;
1
+ 'use strict';var zodToJsonSchema=require('zod-to-json-schema');var m=.85,l={".pdf":"application/pdf",".docx":"application/vnd.openxmlformats-officedocument.wordprocessingml.document"},y=10*1024*1024,x="https://api.parsefy.io",b=6e4;var s=class extends Error{constructor(t,r){super(t),this.name="ParsefyError",this.code=r,typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,this.constructor);}},f=class extends s{constructor(t,r,n){super(t),this.name="APIError",this.statusCode=r,this.response=n;}},h=class extends s{constructor(t,r,n){super(t,r),this.name="ExtractionError",this.metadata=n;}},i=class extends s{constructor(t){super(t),this.name="ValidationError";}};function g(){return typeof process<"u"&&process.versions?.node!==void 0}function R(e){let t=zodToJsonSchema.zodToJsonSchema(e,{$refStrategy:"none",target:"jsonSchema7"});return "$schema"in t&&delete t.$schema,t}function T(e){let t=e.toLowerCase().match(/\.[^.]+$/)?.[0];return t&&l[t]||null}function w(e){if(!T(e)){let r=Object.keys(l).join(", ");throw new i(`Unsupported file type. Supported types: ${r}`)}}function d(e){if(e===0)throw new i("File is empty");if(e>y){let t=y/1048576;throw new i(`File size exceeds maximum limit of ${t}MB`)}}function F(e){let t=e._meta||{confidence_score:1,field_confidence:[],issues:[]},r={object:e.object,metadata:{processing_time_ms:e.metadata.processing_time_ms,credits:e.metadata.credits,fallback_triggered:e.metadata.fallback_triggered,confidence_score:t.confidence_score,field_confidence:t.field_confidence.map(n=>({field:n.field,score:n.score,reason:n.reason,page:n.page,text:n.text})),issues:t.issues},error:e.error};return e.verification&&(r.verification={status:e.verification.status,checks_passed:e.verification.checks_passed,checks_failed:e.verification.checks_failed,cannot_verify_count:e.verification.cannot_verify_count,checks_run:e.verification.checks_run.map(n=>({type:n.type,status:n.status,fields:n.fields,passed:n.passed,delta:n.delta,expected:n.expected,actual:n.actual}))}),r}function _(e,t){let r=T(t)||"application/octet-stream",n=e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength);return typeof File<"u"?new File([n],t,{type:r}):new Blob([n],{type:r})}async function I(e){if(!g())throw new i("File path strings are only supported in Node.js. Use File or Blob in the browser.");let t=await import('fs'),r=await import('path');if(!t.existsSync(e))throw new i(`File not found: ${e}`);let n=r.basename(e);w(n);let a=t.readFileSync(e);return d(a.length),{buffer:a,filename:n}}async function S(e){if(typeof e=="string"){let{buffer:t,filename:r}=await I(e);return _(t,r)}if(Buffer.isBuffer(e))return d(e.length),_(e,"document.pdf");if(e instanceof File)return w(e.name),d(e.size),e;if(e instanceof Blob)return d(e.size),e;throw new i("Invalid file input. Expected File, Blob, Buffer, or file path string.")}function A(e){return new Promise(t=>setTimeout(t,e))}function P(e,t=1e3){let r=t*Math.pow(2,e),n=Math.random()*.1*r;return Math.min(r+n,3e4)}var E=class{constructor(t){this.maxRetries=3;let r={};if(typeof t=="string"?r={apiKey:t}:t&&(r=t),this.apiKey=r.apiKey||this.getEnvApiKey(),!this.apiKey)throw new i("API key is required. Provide it in the constructor or set the PARSEFY_API_KEY environment variable.");this.baseUrl=r.baseUrl||x,this.timeout=r.timeout||b;}getEnvApiKey(){return g()&&process.env.PARSEFY_API_KEY||""}async extract(t){let{file:r,schema:n,confidenceThreshold:a,enableVerification:o}=t,u=R(n),c=await S(r),p=new FormData;return p.append("file",c),p.append("output_schema",JSON.stringify(u)),p.append("confidence_threshold",String(a??.85)),o!==void 0&&p.append("enable_verification",String(o)),this.makeRequestWithRetry(p)}async makeRequestWithRetry(t,r=0){try{return await this.makeRequest(t)}catch(n){if(n instanceof f&&n.statusCode===429&&r<this.maxRetries){let a=P(r);return await A(a),this.makeRequestWithRetry(t,r+1)}throw n}}async makeRequest(t){let r=`${this.baseUrl}/v1/extract`,n=new AbortController,a=setTimeout(()=>n.abort(),this.timeout);try{let o=await fetch(r,{method:"POST",headers:{Authorization:`Bearer ${this.apiKey}`},body:t,signal:n.signal});if(clearTimeout(a),!o.ok){let c=await this.parseErrorResponse(o);throw new f(c.message||`API request failed with status ${o.status}`,o.status,c)}let u;try{u=await o.json();}catch{throw new s("Failed to parse API response as JSON. The API may have returned an invalid response.","PARSE_ERROR")}try{return F(u)}catch(c){throw new s(`Failed to transform API response: ${c instanceof Error?c.message:String(c)}`,"TRANSFORM_ERROR")}}catch(o){throw clearTimeout(a),o instanceof Error&&o.name==="AbortError"?new s(`Request timed out after ${this.timeout}ms`,"TIMEOUT"):o instanceof s?o:o instanceof TypeError&&o.message.includes("fetch")?new s(`Network error: Unable to connect to the Parsefy API. ${o.message}`,"NETWORK_ERROR"):o instanceof TypeError?new s(`Type error: ${o.message}. This may indicate an API response format issue.`,"TYPE_ERROR"):new s(`Unexpected error: ${o instanceof Error?o.message:String(o)}`,"UNKNOWN_ERROR")}}async parseErrorResponse(t){try{return await t.json()}catch{try{return {message:await t.text()||t.statusText}}catch{return {message:t.statusText}}}}};
2
+ exports.APIError=f;exports.DEFAULT_CONFIDENCE_THRESHOLD=m;exports.ExtractionError=h;exports.Parsefy=E;exports.ParsefyError=s;exports.ValidationError=i;
package/dist/index.d.cts CHANGED
@@ -37,6 +37,14 @@ interface ExtractOptions<T extends z.ZodType> {
37
37
  * appear in all documents. This prevents unnecessary fallback triggers and reduces costs.
38
38
  */
39
39
  confidenceThreshold?: number;
40
+ /**
41
+ * Enable math verification (includes shadow extraction). Defaults to false.
42
+ *
43
+ * When enabled, Parsefy automatically verifies mathematical consistency of numeric data
44
+ * (totals, subtotals, taxes, line items). If only a single verifiable field is requested,
45
+ * supporting fields are automatically extracted in the background for verification.
46
+ */
47
+ enableVerification?: boolean;
40
48
  }
41
49
  /**
42
50
  * Confidence details for a single extracted field.
@@ -59,22 +67,56 @@ interface FieldConfidence {
59
67
  */
60
68
  interface ExtractionMetadata {
61
69
  /** Time taken to process the document in milliseconds. */
62
- processingTimeMs: number;
63
- /** Number of input tokens used. */
64
- inputTokens: number;
65
- /** Number of output tokens generated. */
66
- outputTokens: number;
70
+ processing_time_ms: number;
67
71
  /** Number of credits consumed (1 credit = 1 page). */
68
72
  credits: number;
69
73
  /** Whether the fallback model was triggered for higher accuracy. */
70
- fallbackTriggered: boolean;
74
+ fallback_triggered: boolean;
71
75
  /** Overall confidence score for the extraction (0.0 to 1.0). */
72
- confidenceScore: number;
76
+ confidence_score: number;
73
77
  /** Per-field confidence details with evidence and explanations. */
74
- fieldConfidence: FieldConfidence[];
78
+ field_confidence: FieldConfidence[];
75
79
  /** List of issues or warnings encountered during extraction. */
76
80
  issues: string[];
77
81
  }
82
+ /**
83
+ * Verification status values.
84
+ */
85
+ type VerificationStatus = 'PASSED' | 'FAILED' | 'PARTIAL' | 'CANNOT_VERIFY' | 'NO_RULES';
86
+ /**
87
+ * Individual verification check result.
88
+ */
89
+ interface VerificationCheck {
90
+ /** Type of verification check (e.g., "HORIZONTAL_SUM", "VERTICAL_SUM"). */
91
+ type: string;
92
+ /** Status of this check. */
93
+ status: string;
94
+ /** Fields involved in this check. */
95
+ fields: string[];
96
+ /** Whether this check passed. */
97
+ passed: boolean;
98
+ /** Difference between expected and actual values. */
99
+ delta: number;
100
+ /** Expected value based on the rule. */
101
+ expected: number;
102
+ /** Actual extracted value. */
103
+ actual: number;
104
+ }
105
+ /**
106
+ * Math verification results.
107
+ */
108
+ interface Verification {
109
+ /** Overall verification status. */
110
+ status: VerificationStatus;
111
+ /** Number of checks that passed. */
112
+ checks_passed: number;
113
+ /** Number of checks that failed. */
114
+ checks_failed: number;
115
+ /** Number of checks that could not be verified. */
116
+ cannot_verify_count: number;
117
+ /** Detailed results for each check that was run. */
118
+ checks_run: VerificationCheck[];
119
+ }
78
120
  /**
79
121
  * Error response from the API.
80
122
  */
@@ -92,6 +134,8 @@ interface ExtractResult<T> {
92
134
  object: T | null;
93
135
  /** Metadata about the extraction process. */
94
136
  metadata: ExtractionMetadata;
137
+ /** Math verification results (only present if enableVerification was true). */
138
+ verification?: Verification;
95
139
  /** Error details if extraction failed, or null on success. */
96
140
  error: APIErrorResponse | null;
97
141
  }
@@ -126,7 +170,7 @@ interface ExtractResult<T> {
126
170
  * });
127
171
  *
128
172
  * // Check per-field confidence and evidence
129
- * metadata.fieldConfidence.forEach((fc) => {
173
+ * metadata.field_confidence.forEach((fc) => {
130
174
  * console.log(`${fc.field}: ${fc.score} - "${fc.text}"`);
131
175
  * });
132
176
  * ```
@@ -184,20 +228,29 @@ declare class Parsefy {
184
228
  * due_date: z.string().optional().describe('Payment due date'),
185
229
  * });
186
230
  *
187
- * const { object, metadata, error } = await client.extract({
231
+ * const { object, metadata, verification, error } = await client.extract({
188
232
  * file: './invoice.pdf',
189
233
  * schema,
190
234
  * confidenceThreshold: 0.85, // Lower = faster, Higher = more accurate
235
+ * enableVerification: true, // Enable math verification
191
236
  * });
192
237
  *
193
238
  * if (!error && object) {
194
239
  * console.log(object.invoice_number); // Fully typed!
195
240
  *
196
241
  * // Access field-level confidence and evidence
197
- * console.log(`Overall confidence: ${metadata.confidenceScore}`);
198
- * metadata.fieldConfidence.forEach((fc) => {
242
+ * console.log(`Overall confidence: ${metadata.confidence_score}`);
243
+ * metadata.field_confidence.forEach((fc) => {
199
244
  * console.log(`${fc.field}: ${fc.score} (${fc.reason}) - "${fc.text}"`);
200
245
  * });
246
+ *
247
+ * // Access verification results if enabled
248
+ * if (verification) {
249
+ * console.log(`Verification status: ${verification.status}`);
250
+ * verification.checks_run.forEach((check) => {
251
+ * console.log(`${check.type}: ${check.passed ? 'PASSED' : 'FAILED'}`);
252
+ * });
253
+ * }
201
254
  * }
202
255
  * ```
203
256
  */
@@ -250,4 +303,4 @@ declare class ValidationError extends ParsefyError {
250
303
  constructor(message: string);
251
304
  }
252
305
 
253
- export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
306
+ export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError, type Verification, type VerificationCheck, type VerificationStatus };
package/dist/index.d.mts CHANGED
@@ -37,6 +37,14 @@ interface ExtractOptions<T extends z.ZodType> {
37
37
  * appear in all documents. This prevents unnecessary fallback triggers and reduces costs.
38
38
  */
39
39
  confidenceThreshold?: number;
40
+ /**
41
+ * Enable math verification (includes shadow extraction). Defaults to false.
42
+ *
43
+ * When enabled, Parsefy automatically verifies mathematical consistency of numeric data
44
+ * (totals, subtotals, taxes, line items). If only a single verifiable field is requested,
45
+ * supporting fields are automatically extracted in the background for verification.
46
+ */
47
+ enableVerification?: boolean;
40
48
  }
41
49
  /**
42
50
  * Confidence details for a single extracted field.
@@ -59,22 +67,56 @@ interface FieldConfidence {
59
67
  */
60
68
  interface ExtractionMetadata {
61
69
  /** Time taken to process the document in milliseconds. */
62
- processingTimeMs: number;
63
- /** Number of input tokens used. */
64
- inputTokens: number;
65
- /** Number of output tokens generated. */
66
- outputTokens: number;
70
+ processing_time_ms: number;
67
71
  /** Number of credits consumed (1 credit = 1 page). */
68
72
  credits: number;
69
73
  /** Whether the fallback model was triggered for higher accuracy. */
70
- fallbackTriggered: boolean;
74
+ fallback_triggered: boolean;
71
75
  /** Overall confidence score for the extraction (0.0 to 1.0). */
72
- confidenceScore: number;
76
+ confidence_score: number;
73
77
  /** Per-field confidence details with evidence and explanations. */
74
- fieldConfidence: FieldConfidence[];
78
+ field_confidence: FieldConfidence[];
75
79
  /** List of issues or warnings encountered during extraction. */
76
80
  issues: string[];
77
81
  }
82
+ /**
83
+ * Verification status values.
84
+ */
85
+ type VerificationStatus = 'PASSED' | 'FAILED' | 'PARTIAL' | 'CANNOT_VERIFY' | 'NO_RULES';
86
+ /**
87
+ * Individual verification check result.
88
+ */
89
+ interface VerificationCheck {
90
+ /** Type of verification check (e.g., "HORIZONTAL_SUM", "VERTICAL_SUM"). */
91
+ type: string;
92
+ /** Status of this check. */
93
+ status: string;
94
+ /** Fields involved in this check. */
95
+ fields: string[];
96
+ /** Whether this check passed. */
97
+ passed: boolean;
98
+ /** Difference between expected and actual values. */
99
+ delta: number;
100
+ /** Expected value based on the rule. */
101
+ expected: number;
102
+ /** Actual extracted value. */
103
+ actual: number;
104
+ }
105
+ /**
106
+ * Math verification results.
107
+ */
108
+ interface Verification {
109
+ /** Overall verification status. */
110
+ status: VerificationStatus;
111
+ /** Number of checks that passed. */
112
+ checks_passed: number;
113
+ /** Number of checks that failed. */
114
+ checks_failed: number;
115
+ /** Number of checks that could not be verified. */
116
+ cannot_verify_count: number;
117
+ /** Detailed results for each check that was run. */
118
+ checks_run: VerificationCheck[];
119
+ }
78
120
  /**
79
121
  * Error response from the API.
80
122
  */
@@ -92,6 +134,8 @@ interface ExtractResult<T> {
92
134
  object: T | null;
93
135
  /** Metadata about the extraction process. */
94
136
  metadata: ExtractionMetadata;
137
+ /** Math verification results (only present if enableVerification was true). */
138
+ verification?: Verification;
95
139
  /** Error details if extraction failed, or null on success. */
96
140
  error: APIErrorResponse | null;
97
141
  }
@@ -126,7 +170,7 @@ interface ExtractResult<T> {
126
170
  * });
127
171
  *
128
172
  * // Check per-field confidence and evidence
129
- * metadata.fieldConfidence.forEach((fc) => {
173
+ * metadata.field_confidence.forEach((fc) => {
130
174
  * console.log(`${fc.field}: ${fc.score} - "${fc.text}"`);
131
175
  * });
132
176
  * ```
@@ -184,20 +228,29 @@ declare class Parsefy {
184
228
  * due_date: z.string().optional().describe('Payment due date'),
185
229
  * });
186
230
  *
187
- * const { object, metadata, error } = await client.extract({
231
+ * const { object, metadata, verification, error } = await client.extract({
188
232
  * file: './invoice.pdf',
189
233
  * schema,
190
234
  * confidenceThreshold: 0.85, // Lower = faster, Higher = more accurate
235
+ * enableVerification: true, // Enable math verification
191
236
  * });
192
237
  *
193
238
  * if (!error && object) {
194
239
  * console.log(object.invoice_number); // Fully typed!
195
240
  *
196
241
  * // Access field-level confidence and evidence
197
- * console.log(`Overall confidence: ${metadata.confidenceScore}`);
198
- * metadata.fieldConfidence.forEach((fc) => {
242
+ * console.log(`Overall confidence: ${metadata.confidence_score}`);
243
+ * metadata.field_confidence.forEach((fc) => {
199
244
  * console.log(`${fc.field}: ${fc.score} (${fc.reason}) - "${fc.text}"`);
200
245
  * });
246
+ *
247
+ * // Access verification results if enabled
248
+ * if (verification) {
249
+ * console.log(`Verification status: ${verification.status}`);
250
+ * verification.checks_run.forEach((check) => {
251
+ * console.log(`${check.type}: ${check.passed ? 'PASSED' : 'FAILED'}`);
252
+ * });
253
+ * }
201
254
  * }
202
255
  * ```
203
256
  */
@@ -250,4 +303,4 @@ declare class ValidationError extends ParsefyError {
250
303
  constructor(message: string);
251
304
  }
252
305
 
253
- export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
306
+ export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError, type Verification, type VerificationCheck, type VerificationStatus };
package/dist/index.d.ts CHANGED
@@ -37,6 +37,14 @@ interface ExtractOptions<T extends z.ZodType> {
37
37
  * appear in all documents. This prevents unnecessary fallback triggers and reduces costs.
38
38
  */
39
39
  confidenceThreshold?: number;
40
+ /**
41
+ * Enable math verification (includes shadow extraction). Defaults to false.
42
+ *
43
+ * When enabled, Parsefy automatically verifies mathematical consistency of numeric data
44
+ * (totals, subtotals, taxes, line items). If only a single verifiable field is requested,
45
+ * supporting fields are automatically extracted in the background for verification.
46
+ */
47
+ enableVerification?: boolean;
40
48
  }
41
49
  /**
42
50
  * Confidence details for a single extracted field.
@@ -59,22 +67,56 @@ interface FieldConfidence {
59
67
  */
60
68
  interface ExtractionMetadata {
61
69
  /** Time taken to process the document in milliseconds. */
62
- processingTimeMs: number;
63
- /** Number of input tokens used. */
64
- inputTokens: number;
65
- /** Number of output tokens generated. */
66
- outputTokens: number;
70
+ processing_time_ms: number;
67
71
  /** Number of credits consumed (1 credit = 1 page). */
68
72
  credits: number;
69
73
  /** Whether the fallback model was triggered for higher accuracy. */
70
- fallbackTriggered: boolean;
74
+ fallback_triggered: boolean;
71
75
  /** Overall confidence score for the extraction (0.0 to 1.0). */
72
- confidenceScore: number;
76
+ confidence_score: number;
73
77
  /** Per-field confidence details with evidence and explanations. */
74
- fieldConfidence: FieldConfidence[];
78
+ field_confidence: FieldConfidence[];
75
79
  /** List of issues or warnings encountered during extraction. */
76
80
  issues: string[];
77
81
  }
82
+ /**
83
+ * Verification status values.
84
+ */
85
+ type VerificationStatus = 'PASSED' | 'FAILED' | 'PARTIAL' | 'CANNOT_VERIFY' | 'NO_RULES';
86
+ /**
87
+ * Individual verification check result.
88
+ */
89
+ interface VerificationCheck {
90
+ /** Type of verification check (e.g., "HORIZONTAL_SUM", "VERTICAL_SUM"). */
91
+ type: string;
92
+ /** Status of this check. */
93
+ status: string;
94
+ /** Fields involved in this check. */
95
+ fields: string[];
96
+ /** Whether this check passed. */
97
+ passed: boolean;
98
+ /** Difference between expected and actual values. */
99
+ delta: number;
100
+ /** Expected value based on the rule. */
101
+ expected: number;
102
+ /** Actual extracted value. */
103
+ actual: number;
104
+ }
105
+ /**
106
+ * Math verification results.
107
+ */
108
+ interface Verification {
109
+ /** Overall verification status. */
110
+ status: VerificationStatus;
111
+ /** Number of checks that passed. */
112
+ checks_passed: number;
113
+ /** Number of checks that failed. */
114
+ checks_failed: number;
115
+ /** Number of checks that could not be verified. */
116
+ cannot_verify_count: number;
117
+ /** Detailed results for each check that was run. */
118
+ checks_run: VerificationCheck[];
119
+ }
78
120
  /**
79
121
  * Error response from the API.
80
122
  */
@@ -92,6 +134,8 @@ interface ExtractResult<T> {
92
134
  object: T | null;
93
135
  /** Metadata about the extraction process. */
94
136
  metadata: ExtractionMetadata;
137
+ /** Math verification results (only present if enableVerification was true). */
138
+ verification?: Verification;
95
139
  /** Error details if extraction failed, or null on success. */
96
140
  error: APIErrorResponse | null;
97
141
  }
@@ -126,7 +170,7 @@ interface ExtractResult<T> {
126
170
  * });
127
171
  *
128
172
  * // Check per-field confidence and evidence
129
- * metadata.fieldConfidence.forEach((fc) => {
173
+ * metadata.field_confidence.forEach((fc) => {
130
174
  * console.log(`${fc.field}: ${fc.score} - "${fc.text}"`);
131
175
  * });
132
176
  * ```
@@ -184,20 +228,29 @@ declare class Parsefy {
184
228
  * due_date: z.string().optional().describe('Payment due date'),
185
229
  * });
186
230
  *
187
- * const { object, metadata, error } = await client.extract({
231
+ * const { object, metadata, verification, error } = await client.extract({
188
232
  * file: './invoice.pdf',
189
233
  * schema,
190
234
  * confidenceThreshold: 0.85, // Lower = faster, Higher = more accurate
235
+ * enableVerification: true, // Enable math verification
191
236
  * });
192
237
  *
193
238
  * if (!error && object) {
194
239
  * console.log(object.invoice_number); // Fully typed!
195
240
  *
196
241
  * // Access field-level confidence and evidence
197
- * console.log(`Overall confidence: ${metadata.confidenceScore}`);
198
- * metadata.fieldConfidence.forEach((fc) => {
242
+ * console.log(`Overall confidence: ${metadata.confidence_score}`);
243
+ * metadata.field_confidence.forEach((fc) => {
199
244
  * console.log(`${fc.field}: ${fc.score} (${fc.reason}) - "${fc.text}"`);
200
245
  * });
246
+ *
247
+ * // Access verification results if enabled
248
+ * if (verification) {
249
+ * console.log(`Verification status: ${verification.status}`);
250
+ * verification.checks_run.forEach((check) => {
251
+ * console.log(`${check.type}: ${check.passed ? 'PASSED' : 'FAILED'}`);
252
+ * });
253
+ * }
201
254
  * }
202
255
  * ```
203
256
  */
@@ -250,4 +303,4 @@ declare class ValidationError extends ParsefyError {
250
303
  constructor(message: string);
251
304
  }
252
305
 
253
- export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
306
+ export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError, type Verification, type VerificationCheck, type VerificationStatus };
package/dist/index.mjs CHANGED
@@ -1,2 +1,2 @@
1
- import {zodToJsonSchema}from'zod-to-json-schema';var u=.85,d={".pdf":"application/pdf",".docx":"application/vnd.openxmlformats-officedocument.wordprocessingml.document"},l=10*1024*1024,g="https://api.parsefy.io",x=6e4;var s=class extends Error{constructor(e,r){super(e),this.name="ParsefyError",this.code=r,typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,this.constructor);}},p=class extends s{constructor(e,r,o){super(e),this.name="APIError",this.statusCode=r,this.response=o;}},y=class extends s{constructor(e,r,o){super(e,r),this.name="ExtractionError",this.metadata=o;}},a=class extends s{constructor(e){super(e),this.name="ValidationError";}};function h(){return typeof process<"u"&&process.versions?.node!==void 0}function R(t){let e=zodToJsonSchema(t,{$refStrategy:"none",target:"jsonSchema7"});return "$schema"in e&&delete e.$schema,e}function b(t){let e=t.toLowerCase().match(/\.[^.]+$/)?.[0];return e&&d[e]||null}function w(t){if(!b(t)){let r=Object.keys(d).join(", ");throw new a(`Unsupported file type. Supported types: ${r}`)}}function m(t){if(t===0)throw new a("File is empty");if(t>l){let e=l/1048576;throw new a(`File size exceeds maximum limit of ${e}MB`)}}function F(t){let e=t._meta||{confidence_score:1,field_confidence:[],issues:[]};return {object:t.object,metadata:{processingTimeMs:t.metadata.processing_time_ms,inputTokens:t.metadata.input_tokens,outputTokens:t.metadata.output_tokens,credits:t.metadata.credits,fallbackTriggered:t.metadata.fallback_triggered,confidenceScore:e.confidence_score,fieldConfidence:e.field_confidence.map(r=>({field:r.field,score:r.score,reason:r.reason,page:r.page,text:r.text})),issues:e.issues},error:t.error}}function T(t,e){let r=b(e)||"application/octet-stream",o=t.buffer.slice(t.byteOffset,t.byteOffset+t.byteLength);return typeof File<"u"?new File([o],e,{type:r}):new Blob([o],{type:r})}async function I(t){if(!h())throw new a("File path strings are only supported in Node.js. Use File or Blob in the browser.");let e=await import('fs'),r=await import('path');if(!e.existsSync(t))throw new a(`File not found: ${t}`);let o=r.basename(t);w(o);let c=e.readFileSync(t);return m(c.length),{buffer:c,filename:o}}async function _(t){if(typeof t=="string"){let{buffer:e,filename:r}=await I(t);return T(e,r)}if(Buffer.isBuffer(t))return m(t.length),T(t,"document.pdf");if(t instanceof File)return w(t.name),m(t.size),t;if(t instanceof Blob)return m(t.size),t;throw new a("Invalid file input. Expected File, Blob, Buffer, or file path string.")}function P(t){return new Promise(e=>setTimeout(e,t))}function S(t,e=1e3){let r=e*Math.pow(2,t),o=Math.random()*.1*r;return Math.min(r+o,3e4)}var E=class{constructor(e){this.maxRetries=3;let r={};if(typeof e=="string"?r={apiKey:e}:e&&(r=e),this.apiKey=r.apiKey||this.getEnvApiKey(),!this.apiKey)throw new a("API key is required. Provide it in the constructor or set the PARSEFY_API_KEY environment variable.");this.baseUrl=r.baseUrl||g,this.timeout=r.timeout||x;}getEnvApiKey(){return h()&&process.env.PARSEFY_API_KEY||""}async extract(e){let{file:r,schema:o,confidenceThreshold:c}=e,n=R(o),f=await _(r),i=new FormData;return i.append("file",f),i.append("output_schema",JSON.stringify(n)),i.append("confidence_threshold",String(c??.85)),this.makeRequestWithRetry(i)}async makeRequestWithRetry(e,r=0){try{return await this.makeRequest(e)}catch(o){if(o instanceof p&&o.statusCode===429&&r<this.maxRetries){let c=S(r);return await P(c),this.makeRequestWithRetry(e,r+1)}throw o}}async makeRequest(e){let r=`${this.baseUrl}/v1/extract`,o=new AbortController,c=setTimeout(()=>o.abort(),this.timeout);try{let n=await fetch(r,{method:"POST",headers:{Authorization:`Bearer ${this.apiKey}`},body:e,signal:o.signal});if(clearTimeout(c),!n.ok){let i=await this.parseErrorResponse(n);throw new p(i.message||`API request failed with status ${n.status}`,n.status,i)}let f;try{f=await n.json();}catch{throw new s("Failed to parse API response as JSON. The API may have returned an invalid response.","PARSE_ERROR")}try{return F(f)}catch(i){throw new s(`Failed to transform API response: ${i instanceof Error?i.message:String(i)}`,"TRANSFORM_ERROR")}}catch(n){throw clearTimeout(c),n instanceof Error&&n.name==="AbortError"?new s(`Request timed out after ${this.timeout}ms`,"TIMEOUT"):n instanceof s?n:n instanceof TypeError&&n.message.includes("fetch")?new s(`Network error: Unable to connect to the Parsefy API. ${n.message}`,"NETWORK_ERROR"):n instanceof TypeError?new s(`Type error: ${n.message}. This may indicate an API response format issue.`,"TYPE_ERROR"):new s(`Unexpected error: ${n instanceof Error?n.message:String(n)}`,"UNKNOWN_ERROR")}}async parseErrorResponse(e){try{return await e.json()}catch{try{return {message:await e.text()||e.statusText}}catch{return {message:e.statusText}}}}};
2
- export{p as APIError,u as DEFAULT_CONFIDENCE_THRESHOLD,y as ExtractionError,E as Parsefy,s as ParsefyError,a as ValidationError};
1
+ import {zodToJsonSchema}from'zod-to-json-schema';var m=.85,l={".pdf":"application/pdf",".docx":"application/vnd.openxmlformats-officedocument.wordprocessingml.document"},y=10*1024*1024,x="https://api.parsefy.io",b=6e4;var s=class extends Error{constructor(t,r){super(t),this.name="ParsefyError",this.code=r,typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,this.constructor);}},f=class extends s{constructor(t,r,n){super(t),this.name="APIError",this.statusCode=r,this.response=n;}},h=class extends s{constructor(t,r,n){super(t,r),this.name="ExtractionError",this.metadata=n;}},i=class extends s{constructor(t){super(t),this.name="ValidationError";}};function g(){return typeof process<"u"&&process.versions?.node!==void 0}function R(e){let t=zodToJsonSchema(e,{$refStrategy:"none",target:"jsonSchema7"});return "$schema"in t&&delete t.$schema,t}function T(e){let t=e.toLowerCase().match(/\.[^.]+$/)?.[0];return t&&l[t]||null}function w(e){if(!T(e)){let r=Object.keys(l).join(", ");throw new i(`Unsupported file type. Supported types: ${r}`)}}function d(e){if(e===0)throw new i("File is empty");if(e>y){let t=y/1048576;throw new i(`File size exceeds maximum limit of ${t}MB`)}}function F(e){let t=e._meta||{confidence_score:1,field_confidence:[],issues:[]},r={object:e.object,metadata:{processing_time_ms:e.metadata.processing_time_ms,credits:e.metadata.credits,fallback_triggered:e.metadata.fallback_triggered,confidence_score:t.confidence_score,field_confidence:t.field_confidence.map(n=>({field:n.field,score:n.score,reason:n.reason,page:n.page,text:n.text})),issues:t.issues},error:e.error};return e.verification&&(r.verification={status:e.verification.status,checks_passed:e.verification.checks_passed,checks_failed:e.verification.checks_failed,cannot_verify_count:e.verification.cannot_verify_count,checks_run:e.verification.checks_run.map(n=>({type:n.type,status:n.status,fields:n.fields,passed:n.passed,delta:n.delta,expected:n.expected,actual:n.actual}))}),r}function _(e,t){let r=T(t)||"application/octet-stream",n=e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength);return typeof File<"u"?new File([n],t,{type:r}):new Blob([n],{type:r})}async function I(e){if(!g())throw new i("File path strings are only supported in Node.js. Use File or Blob in the browser.");let t=await import('fs'),r=await import('path');if(!t.existsSync(e))throw new i(`File not found: ${e}`);let n=r.basename(e);w(n);let a=t.readFileSync(e);return d(a.length),{buffer:a,filename:n}}async function S(e){if(typeof e=="string"){let{buffer:t,filename:r}=await I(e);return _(t,r)}if(Buffer.isBuffer(e))return d(e.length),_(e,"document.pdf");if(e instanceof File)return w(e.name),d(e.size),e;if(e instanceof Blob)return d(e.size),e;throw new i("Invalid file input. Expected File, Blob, Buffer, or file path string.")}function A(e){return new Promise(t=>setTimeout(t,e))}function P(e,t=1e3){let r=t*Math.pow(2,e),n=Math.random()*.1*r;return Math.min(r+n,3e4)}var E=class{constructor(t){this.maxRetries=3;let r={};if(typeof t=="string"?r={apiKey:t}:t&&(r=t),this.apiKey=r.apiKey||this.getEnvApiKey(),!this.apiKey)throw new i("API key is required. Provide it in the constructor or set the PARSEFY_API_KEY environment variable.");this.baseUrl=r.baseUrl||x,this.timeout=r.timeout||b;}getEnvApiKey(){return g()&&process.env.PARSEFY_API_KEY||""}async extract(t){let{file:r,schema:n,confidenceThreshold:a,enableVerification:o}=t,u=R(n),c=await S(r),p=new FormData;return p.append("file",c),p.append("output_schema",JSON.stringify(u)),p.append("confidence_threshold",String(a??.85)),o!==void 0&&p.append("enable_verification",String(o)),this.makeRequestWithRetry(p)}async makeRequestWithRetry(t,r=0){try{return await this.makeRequest(t)}catch(n){if(n instanceof f&&n.statusCode===429&&r<this.maxRetries){let a=P(r);return await A(a),this.makeRequestWithRetry(t,r+1)}throw n}}async makeRequest(t){let r=`${this.baseUrl}/v1/extract`,n=new AbortController,a=setTimeout(()=>n.abort(),this.timeout);try{let o=await fetch(r,{method:"POST",headers:{Authorization:`Bearer ${this.apiKey}`},body:t,signal:n.signal});if(clearTimeout(a),!o.ok){let c=await this.parseErrorResponse(o);throw new f(c.message||`API request failed with status ${o.status}`,o.status,c)}let u;try{u=await o.json();}catch{throw new s("Failed to parse API response as JSON. The API may have returned an invalid response.","PARSE_ERROR")}try{return F(u)}catch(c){throw new s(`Failed to transform API response: ${c instanceof Error?c.message:String(c)}`,"TRANSFORM_ERROR")}}catch(o){throw clearTimeout(a),o instanceof Error&&o.name==="AbortError"?new s(`Request timed out after ${this.timeout}ms`,"TIMEOUT"):o instanceof s?o:o instanceof TypeError&&o.message.includes("fetch")?new s(`Network error: Unable to connect to the Parsefy API. ${o.message}`,"NETWORK_ERROR"):o instanceof TypeError?new s(`Type error: ${o.message}. This may indicate an API response format issue.`,"TYPE_ERROR"):new s(`Unexpected error: ${o instanceof Error?o.message:String(o)}`,"UNKNOWN_ERROR")}}async parseErrorResponse(t){try{return await t.json()}catch{try{return {message:await t.text()||t.statusText}}catch{return {message:t.statusText}}}}};
2
+ export{f as APIError,m as DEFAULT_CONFIDENCE_THRESHOLD,h as ExtractionError,E as Parsefy,s as ParsefyError,i as ValidationError};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "parsefy",
3
- "version": "1.0.3",
3
+ "version": "1.1.0",
4
4
  "description": "Official TypeScript SDK for Parsefy - Financial Document Infrastructure for Developers",
5
5
  "author": "",
6
6
  "license": "MIT",