parsefy 1.0.4 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -36,9 +36,10 @@ const schema = z.object({
36
36
  due_date: z.string().optional().describe('Payment due date'),
37
37
  });
38
38
 
39
- const { object, metadata, error } = await client.extract({
39
+ const { object, metadata, verification, error } = await client.extract({
40
40
  file: './invoice.pdf',
41
41
  schema,
42
+ enableVerification: true, // Enable math verification
42
43
  });
43
44
 
44
45
  if (!error && object) {
@@ -49,6 +50,14 @@ if (!error && object) {
49
50
  metadata.field_confidence.forEach((fc) => {
50
51
  console.log(`${fc.field}: ${fc.score} (${fc.reason}) - "${fc.text}"`);
51
52
  });
53
+
54
+ // Access verification results if enabled
55
+ if (verification) {
56
+ console.log(`Verification status: ${verification.status}`);
57
+ verification.checks_run.forEach((check) => {
58
+ console.log(`${check.type}: ${check.passed ? 'PASSED' : 'FAILED'}`);
59
+ });
60
+ }
52
61
  }
53
62
  ```
54
63
 
@@ -102,6 +111,50 @@ const { object, metadata } = await client.extract({
102
111
 
103
112
  **Default:** `0.85`
104
113
 
114
+ ## Math Verification
115
+
116
+ Enable automatic math verification to ensure extracted numeric data is mathematically consistent:
117
+
118
+ ```typescript
119
+ const { object, verification } = await client.extract({
120
+ file: './invoice.pdf',
121
+ schema,
122
+ enableVerification: true, // Enable math verification
123
+ });
124
+
125
+ if (verification) {
126
+ console.log(`Verification status: ${verification.status}`);
127
+ console.log(`Checks passed: ${verification.checks_passed}`);
128
+ console.log(`Checks failed: ${verification.checks_failed}`);
129
+
130
+ verification.checks_run.forEach((check) => {
131
+ console.log(`${check.type}: ${check.passed ? 'PASSED' : 'FAILED'}`);
132
+ console.log(` Fields: ${check.fields.join(', ')}`);
133
+ console.log(` Expected: ${check.expected}, Actual: ${check.actual}`);
134
+ console.log(` Delta: ${check.delta}`);
135
+ });
136
+ }
137
+ ```
138
+
139
+ ### Verification Status Values
140
+
141
+ | Status | Description |
142
+ |--------|-------------|
143
+ | `PASSED` | All math checks passed |
144
+ | `FAILED` | One or more math checks failed |
145
+ | `PARTIAL` | Some checks passed, some failed or couldn't be verified |
146
+ | `CANNOT_VERIFY` | Required fields are missing (not a math error) |
147
+ | `NO_RULES` | No verifiable fields detected in schema |
148
+
149
+ ### Supported Verification Rules
150
+
151
+ - **HORIZONTAL_SUM**: Verifies `total = subtotal + tax`
152
+ - **VERTICAL_SUM**: Verifies `subtotal = sum(line_items)`
153
+
154
+ ### Shadow Extraction
155
+
156
+ When `enableVerification: true` and only a single verifiable field is requested (e.g., just `total`), Parsefy automatically extracts supporting fields in the background for verification, then removes them from the response.
157
+
105
158
  ## Response Format
106
159
 
107
160
  ```typescript
@@ -112,8 +165,6 @@ interface ExtractResult<T> {
112
165
  // Metadata about the extraction
113
166
  metadata: {
114
167
  processing_time_ms: number; // Processing time in milliseconds
115
- input_tokens: number; // Input tokens used
116
- output_tokens: number; // Output tokens generated
117
168
  credits: number; // Credits consumed (1 credit = 1 page)
118
169
  fallback_triggered: boolean; // Whether fallback model was used
119
170
 
@@ -129,6 +180,23 @@ interface ExtractResult<T> {
129
180
  issues: string[]; // Warnings or anomalies detected
130
181
  };
131
182
 
183
+ // Math verification results (only present if enableVerification was true)
184
+ verification?: {
185
+ status: "PASSED" | "FAILED" | "PARTIAL" | "CANNOT_VERIFY" | "NO_RULES";
186
+ checks_passed: number;
187
+ checks_failed: number;
188
+ cannot_verify_count: number;
189
+ checks_run: Array<{
190
+ type: string; // e.g., "HORIZONTAL_SUM", "VERTICAL_SUM"
191
+ status: string;
192
+ fields: string[];
193
+ passed: boolean;
194
+ delta: number;
195
+ expected: number;
196
+ actual: number;
197
+ }>;
198
+ };
199
+
132
200
  // Error details if extraction failed
133
201
  error: {
134
202
  code: string;
@@ -140,7 +208,11 @@ interface ExtractResult<T> {
140
208
  ### Example Response
141
209
 
142
210
  ```typescript
143
- const { object, metadata } = await client.extract({ file, schema });
211
+ const { object, metadata, verification } = await client.extract({
212
+ file,
213
+ schema,
214
+ enableVerification: true
215
+ });
144
216
 
145
217
  // object:
146
218
  {
@@ -161,6 +233,25 @@ const { object, metadata } = await client.extract({ file, schema });
161
233
  ]
162
234
 
163
235
  // metadata.issues: []
236
+
237
+ // verification (only present if enableVerification was true):
238
+ {
239
+ status: "PASSED",
240
+ checks_passed: 1,
241
+ checks_failed: 0,
242
+ cannot_verify_count: 0,
243
+ checks_run: [
244
+ {
245
+ type: "HORIZONTAL_SUM",
246
+ status: "PASSED",
247
+ fields: ["total", "subtotal", "tax"],
248
+ passed: true,
249
+ delta: 0.0,
250
+ expected: 1250.00,
251
+ actual: 1250.00
252
+ }
253
+ ]
254
+ }
164
255
  ```
165
256
 
166
257
  ## Configuration
@@ -196,6 +287,7 @@ const client = new Parsefy({
196
287
  | `file` | `File \| Blob \| Buffer \| string` | required | Document to extract from |
197
288
  | `schema` | `z.ZodType` | required | Zod schema defining extraction structure |
198
289
  | `confidenceThreshold` | `number` | `0.85` | Minimum confidence before triggering fallback |
290
+ | `enableVerification` | `boolean` | `false` | Enable math verification (includes shadow extraction) |
199
291
 
200
292
  ## Usage
201
293
 
@@ -392,6 +484,9 @@ import type {
392
484
  ExtractResult,
393
485
  ExtractionMetadata,
394
486
  FieldConfidence,
487
+ Verification,
488
+ VerificationStatus,
489
+ VerificationCheck,
395
490
  APIErrorResponse,
396
491
  } from 'parsefy';
397
492
 
package/dist/index.cjs CHANGED
@@ -1,2 +1,2 @@
1
- 'use strict';var zodToJsonSchema=require('zod-to-json-schema');var u=.85,d={".pdf":"application/pdf",".docx":"application/vnd.openxmlformats-officedocument.wordprocessingml.document"},l=10*1024*1024,g="https://api.parsefy.io",x=6e4;var s=class extends Error{constructor(e,r){super(e),this.name="ParsefyError",this.code=r,typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,this.constructor);}},p=class extends s{constructor(e,r,o){super(e),this.name="APIError",this.statusCode=r,this.response=o;}},y=class extends s{constructor(e,r,o){super(e,r),this.name="ExtractionError",this.metadata=o;}},a=class extends s{constructor(e){super(e),this.name="ValidationError";}};function h(){return typeof process<"u"&&process.versions?.node!==void 0}function T(t){let e=zodToJsonSchema.zodToJsonSchema(t,{$refStrategy:"none",target:"jsonSchema7"});return "$schema"in e&&delete e.$schema,e}function b(t){let e=t.toLowerCase().match(/\.[^.]+$/)?.[0];return e&&d[e]||null}function _(t){if(!b(t)){let r=Object.keys(d).join(", ");throw new a(`Unsupported file type. Supported types: ${r}`)}}function m(t){if(t===0)throw new a("File is empty");if(t>l){let e=l/1048576;throw new a(`File size exceeds maximum limit of ${e}MB`)}}function w(t){let e=t._meta||{confidence_score:1,field_confidence:[],issues:[]};return {object:t.object,metadata:{processing_time_ms:t.metadata.processing_time_ms,input_tokens:t.metadata.input_tokens,output_tokens:t.metadata.output_tokens,credits:t.metadata.credits,fallback_triggered:t.metadata.fallback_triggered,confidence_score:e.confidence_score,field_confidence:e.field_confidence.map(r=>({field:r.field,score:r.score,reason:r.reason,page:r.page,text:r.text})),issues:e.issues},error:t.error}}function R(t,e){let r=b(e)||"application/octet-stream",o=t.buffer.slice(t.byteOffset,t.byteOffset+t.byteLength);return typeof File<"u"?new File([o],e,{type:r}):new Blob([o],{type:r})}async function I(t){if(!h())throw new a("File path strings are only supported in Node.js. Use File or Blob in the browser.");let e=await import('fs'),r=await import('path');if(!e.existsSync(t))throw new a(`File not found: ${t}`);let o=r.basename(t);_(o);let c=e.readFileSync(t);return m(c.length),{buffer:c,filename:o}}async function F(t){if(typeof t=="string"){let{buffer:e,filename:r}=await I(t);return R(e,r)}if(Buffer.isBuffer(t))return m(t.length),R(t,"document.pdf");if(t instanceof File)return _(t.name),m(t.size),t;if(t instanceof Blob)return m(t.size),t;throw new a("Invalid file input. Expected File, Blob, Buffer, or file path string.")}function P(t){return new Promise(e=>setTimeout(e,t))}function A(t,e=1e3){let r=e*Math.pow(2,t),o=Math.random()*.1*r;return Math.min(r+o,3e4)}var E=class{constructor(e){this.maxRetries=3;let r={};if(typeof e=="string"?r={apiKey:e}:e&&(r=e),this.apiKey=r.apiKey||this.getEnvApiKey(),!this.apiKey)throw new a("API key is required. Provide it in the constructor or set the PARSEFY_API_KEY environment variable.");this.baseUrl=r.baseUrl||g,this.timeout=r.timeout||x;}getEnvApiKey(){return h()&&process.env.PARSEFY_API_KEY||""}async extract(e){let{file:r,schema:o,confidenceThreshold:c}=e,n=T(o),f=await F(r),i=new FormData;return i.append("file",f),i.append("output_schema",JSON.stringify(n)),i.append("confidence_threshold",String(c??.85)),this.makeRequestWithRetry(i)}async makeRequestWithRetry(e,r=0){try{return await this.makeRequest(e)}catch(o){if(o instanceof p&&o.statusCode===429&&r<this.maxRetries){let c=A(r);return await P(c),this.makeRequestWithRetry(e,r+1)}throw o}}async makeRequest(e){let r=`${this.baseUrl}/v1/extract`,o=new AbortController,c=setTimeout(()=>o.abort(),this.timeout);try{let n=await fetch(r,{method:"POST",headers:{Authorization:`Bearer ${this.apiKey}`},body:e,signal:o.signal});if(clearTimeout(c),!n.ok){let i=await this.parseErrorResponse(n);throw new p(i.message||`API request failed with status ${n.status}`,n.status,i)}let f;try{f=await n.json();}catch{throw new s("Failed to parse API response as JSON. The API may have returned an invalid response.","PARSE_ERROR")}try{return w(f)}catch(i){throw new s(`Failed to transform API response: ${i instanceof Error?i.message:String(i)}`,"TRANSFORM_ERROR")}}catch(n){throw clearTimeout(c),n instanceof Error&&n.name==="AbortError"?new s(`Request timed out after ${this.timeout}ms`,"TIMEOUT"):n instanceof s?n:n instanceof TypeError&&n.message.includes("fetch")?new s(`Network error: Unable to connect to the Parsefy API. ${n.message}`,"NETWORK_ERROR"):n instanceof TypeError?new s(`Type error: ${n.message}. This may indicate an API response format issue.`,"TYPE_ERROR"):new s(`Unexpected error: ${n instanceof Error?n.message:String(n)}`,"UNKNOWN_ERROR")}}async parseErrorResponse(e){try{return await e.json()}catch{try{return {message:await e.text()||e.statusText}}catch{return {message:e.statusText}}}}};
2
- exports.APIError=p;exports.DEFAULT_CONFIDENCE_THRESHOLD=u;exports.ExtractionError=y;exports.Parsefy=E;exports.ParsefyError=s;exports.ValidationError=a;
1
+ 'use strict';var zodToJsonSchema=require('zod-to-json-schema');var m=.85,l={".pdf":"application/pdf",".docx":"application/vnd.openxmlformats-officedocument.wordprocessingml.document"},y=10*1024*1024,x="https://api.parsefy.io",b=6e4;var s=class extends Error{constructor(t,r){super(t),this.name="ParsefyError",this.code=r,typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,this.constructor);}},f=class extends s{constructor(t,r,n){super(t),this.name="APIError",this.statusCode=r,this.response=n;}},h=class extends s{constructor(t,r,n){super(t,r),this.name="ExtractionError",this.metadata=n;}},i=class extends s{constructor(t){super(t),this.name="ValidationError";}};function g(){return typeof process<"u"&&process.versions?.node!==void 0}function R(e){let t=zodToJsonSchema.zodToJsonSchema(e,{$refStrategy:"none",target:"jsonSchema7"});return "$schema"in t&&delete t.$schema,t}function T(e){let t=e.toLowerCase().match(/\.[^.]+$/)?.[0];return t&&l[t]||null}function w(e){if(!T(e)){let r=Object.keys(l).join(", ");throw new i(`Unsupported file type. Supported types: ${r}`)}}function d(e){if(e===0)throw new i("File is empty");if(e>y){let t=y/1048576;throw new i(`File size exceeds maximum limit of ${t}MB`)}}function F(e){let t=e._meta||{confidence_score:1,field_confidence:[],issues:[]},r={object:e.object,metadata:{processing_time_ms:e.metadata.processing_time_ms,credits:e.metadata.credits,fallback_triggered:e.metadata.fallback_triggered,confidence_score:t.confidence_score,field_confidence:t.field_confidence.map(n=>({field:n.field,score:n.score,reason:n.reason,page:n.page,text:n.text})),issues:t.issues},error:e.error};return e.verification&&(r.verification={status:e.verification.status,checks_passed:e.verification.checks_passed,checks_failed:e.verification.checks_failed,cannot_verify_count:e.verification.cannot_verify_count,checks_run:e.verification.checks_run.map(n=>({type:n.type,status:n.status,fields:n.fields,passed:n.passed,delta:n.delta,expected:n.expected,actual:n.actual}))}),r}function _(e,t){let r=T(t)||"application/octet-stream",n=e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength);return typeof File<"u"?new File([n],t,{type:r}):new Blob([n],{type:r})}async function I(e){if(!g())throw new i("File path strings are only supported in Node.js. Use File or Blob in the browser.");let t=await import('fs'),r=await import('path');if(!t.existsSync(e))throw new i(`File not found: ${e}`);let n=r.basename(e);w(n);let a=t.readFileSync(e);return d(a.length),{buffer:a,filename:n}}async function S(e){if(typeof e=="string"){let{buffer:t,filename:r}=await I(e);return _(t,r)}if(Buffer.isBuffer(e))return d(e.length),_(e,"document.pdf");if(e instanceof File)return w(e.name),d(e.size),e;if(e instanceof Blob)return d(e.size),e;throw new i("Invalid file input. Expected File, Blob, Buffer, or file path string.")}function A(e){return new Promise(t=>setTimeout(t,e))}function P(e,t=1e3){let r=t*Math.pow(2,e),n=Math.random()*.1*r;return Math.min(r+n,3e4)}var E=class{constructor(t){this.maxRetries=3;let r={};if(typeof t=="string"?r={apiKey:t}:t&&(r=t),this.apiKey=r.apiKey||this.getEnvApiKey(),!this.apiKey)throw new i("API key is required. Provide it in the constructor or set the PARSEFY_API_KEY environment variable.");this.baseUrl=r.baseUrl||x,this.timeout=r.timeout||b;}getEnvApiKey(){return g()&&process.env.PARSEFY_API_KEY||""}async extract(t){let{file:r,schema:n,confidenceThreshold:a,enableVerification:o}=t,u=R(n),c=await S(r),p=new FormData;return p.append("file",c),p.append("output_schema",JSON.stringify(u)),p.append("confidence_threshold",String(a??.85)),o!==void 0&&p.append("enable_verification",String(o)),this.makeRequestWithRetry(p)}async makeRequestWithRetry(t,r=0){try{return await this.makeRequest(t)}catch(n){if(n instanceof f&&n.statusCode===429&&r<this.maxRetries){let a=P(r);return await A(a),this.makeRequestWithRetry(t,r+1)}throw n}}async makeRequest(t){let r=`${this.baseUrl}/v1/extract`,n=new AbortController,a=setTimeout(()=>n.abort(),this.timeout);try{let o=await fetch(r,{method:"POST",headers:{Authorization:`Bearer ${this.apiKey}`},body:t,signal:n.signal});if(clearTimeout(a),!o.ok){let c=await this.parseErrorResponse(o);throw new f(c.message||`API request failed with status ${o.status}`,o.status,c)}let u;try{u=await o.json();}catch{throw new s("Failed to parse API response as JSON. The API may have returned an invalid response.","PARSE_ERROR")}try{return F(u)}catch(c){throw new s(`Failed to transform API response: ${c instanceof Error?c.message:String(c)}`,"TRANSFORM_ERROR")}}catch(o){throw clearTimeout(a),o instanceof Error&&o.name==="AbortError"?new s(`Request timed out after ${this.timeout}ms`,"TIMEOUT"):o instanceof s?o:o instanceof TypeError&&o.message.includes("fetch")?new s(`Network error: Unable to connect to the Parsefy API. ${o.message}`,"NETWORK_ERROR"):o instanceof TypeError?new s(`Type error: ${o.message}. This may indicate an API response format issue.`,"TYPE_ERROR"):new s(`Unexpected error: ${o instanceof Error?o.message:String(o)}`,"UNKNOWN_ERROR")}}async parseErrorResponse(t){try{return await t.json()}catch{try{return {message:await t.text()||t.statusText}}catch{return {message:t.statusText}}}}};
2
+ exports.APIError=f;exports.DEFAULT_CONFIDENCE_THRESHOLD=m;exports.ExtractionError=h;exports.Parsefy=E;exports.ParsefyError=s;exports.ValidationError=i;
package/dist/index.d.cts CHANGED
@@ -37,6 +37,14 @@ interface ExtractOptions<T extends z.ZodType> {
37
37
  * appear in all documents. This prevents unnecessary fallback triggers and reduces costs.
38
38
  */
39
39
  confidenceThreshold?: number;
40
+ /**
41
+ * Enable math verification (includes shadow extraction). Defaults to false.
42
+ *
43
+ * When enabled, Parsefy automatically verifies mathematical consistency of numeric data
44
+ * (totals, subtotals, taxes, line items). If only a single verifiable field is requested,
45
+ * supporting fields are automatically extracted in the background for verification.
46
+ */
47
+ enableVerification?: boolean;
40
48
  }
41
49
  /**
42
50
  * Confidence details for a single extracted field.
@@ -60,10 +68,6 @@ interface FieldConfidence {
60
68
  interface ExtractionMetadata {
61
69
  /** Time taken to process the document in milliseconds. */
62
70
  processing_time_ms: number;
63
- /** Number of input tokens used. */
64
- input_tokens: number;
65
- /** Number of output tokens generated. */
66
- output_tokens: number;
67
71
  /** Number of credits consumed (1 credit = 1 page). */
68
72
  credits: number;
69
73
  /** Whether the fallback model was triggered for higher accuracy. */
@@ -75,6 +79,44 @@ interface ExtractionMetadata {
75
79
  /** List of issues or warnings encountered during extraction. */
76
80
  issues: string[];
77
81
  }
82
+ /**
83
+ * Verification status values.
84
+ */
85
+ type VerificationStatus = 'PASSED' | 'FAILED' | 'PARTIAL' | 'CANNOT_VERIFY' | 'NO_RULES';
86
+ /**
87
+ * Individual verification check result.
88
+ */
89
+ interface VerificationCheck {
90
+ /** Type of verification check (e.g., "HORIZONTAL_SUM", "VERTICAL_SUM"). */
91
+ type: string;
92
+ /** Status of this check. */
93
+ status: string;
94
+ /** Fields involved in this check. */
95
+ fields: string[];
96
+ /** Whether this check passed. */
97
+ passed: boolean;
98
+ /** Difference between expected and actual values. */
99
+ delta: number;
100
+ /** Expected value based on the rule. */
101
+ expected: number;
102
+ /** Actual extracted value. */
103
+ actual: number;
104
+ }
105
+ /**
106
+ * Math verification results.
107
+ */
108
+ interface Verification {
109
+ /** Overall verification status. */
110
+ status: VerificationStatus;
111
+ /** Number of checks that passed. */
112
+ checks_passed: number;
113
+ /** Number of checks that failed. */
114
+ checks_failed: number;
115
+ /** Number of checks that could not be verified. */
116
+ cannot_verify_count: number;
117
+ /** Detailed results for each check that was run. */
118
+ checks_run: VerificationCheck[];
119
+ }
78
120
  /**
79
121
  * Error response from the API.
80
122
  */
@@ -92,6 +134,8 @@ interface ExtractResult<T> {
92
134
  object: T | null;
93
135
  /** Metadata about the extraction process. */
94
136
  metadata: ExtractionMetadata;
137
+ /** Math verification results (only present if enableVerification was true). */
138
+ verification?: Verification;
95
139
  /** Error details if extraction failed, or null on success. */
96
140
  error: APIErrorResponse | null;
97
141
  }
@@ -184,10 +228,11 @@ declare class Parsefy {
184
228
  * due_date: z.string().optional().describe('Payment due date'),
185
229
  * });
186
230
  *
187
- * const { object, metadata, error } = await client.extract({
231
+ * const { object, metadata, verification, error } = await client.extract({
188
232
  * file: './invoice.pdf',
189
233
  * schema,
190
234
  * confidenceThreshold: 0.85, // Lower = faster, Higher = more accurate
235
+ * enableVerification: true, // Enable math verification
191
236
  * });
192
237
  *
193
238
  * if (!error && object) {
@@ -198,6 +243,14 @@ declare class Parsefy {
198
243
  * metadata.field_confidence.forEach((fc) => {
199
244
  * console.log(`${fc.field}: ${fc.score} (${fc.reason}) - "${fc.text}"`);
200
245
  * });
246
+ *
247
+ * // Access verification results if enabled
248
+ * if (verification) {
249
+ * console.log(`Verification status: ${verification.status}`);
250
+ * verification.checks_run.forEach((check) => {
251
+ * console.log(`${check.type}: ${check.passed ? 'PASSED' : 'FAILED'}`);
252
+ * });
253
+ * }
201
254
  * }
202
255
  * ```
203
256
  */
@@ -250,4 +303,4 @@ declare class ValidationError extends ParsefyError {
250
303
  constructor(message: string);
251
304
  }
252
305
 
253
- export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
306
+ export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError, type Verification, type VerificationCheck, type VerificationStatus };
package/dist/index.d.mts CHANGED
@@ -37,6 +37,14 @@ interface ExtractOptions<T extends z.ZodType> {
37
37
  * appear in all documents. This prevents unnecessary fallback triggers and reduces costs.
38
38
  */
39
39
  confidenceThreshold?: number;
40
+ /**
41
+ * Enable math verification (includes shadow extraction). Defaults to false.
42
+ *
43
+ * When enabled, Parsefy automatically verifies mathematical consistency of numeric data
44
+ * (totals, subtotals, taxes, line items). If only a single verifiable field is requested,
45
+ * supporting fields are automatically extracted in the background for verification.
46
+ */
47
+ enableVerification?: boolean;
40
48
  }
41
49
  /**
42
50
  * Confidence details for a single extracted field.
@@ -60,10 +68,6 @@ interface FieldConfidence {
60
68
  interface ExtractionMetadata {
61
69
  /** Time taken to process the document in milliseconds. */
62
70
  processing_time_ms: number;
63
- /** Number of input tokens used. */
64
- input_tokens: number;
65
- /** Number of output tokens generated. */
66
- output_tokens: number;
67
71
  /** Number of credits consumed (1 credit = 1 page). */
68
72
  credits: number;
69
73
  /** Whether the fallback model was triggered for higher accuracy. */
@@ -75,6 +79,44 @@ interface ExtractionMetadata {
75
79
  /** List of issues or warnings encountered during extraction. */
76
80
  issues: string[];
77
81
  }
82
+ /**
83
+ * Verification status values.
84
+ */
85
+ type VerificationStatus = 'PASSED' | 'FAILED' | 'PARTIAL' | 'CANNOT_VERIFY' | 'NO_RULES';
86
+ /**
87
+ * Individual verification check result.
88
+ */
89
+ interface VerificationCheck {
90
+ /** Type of verification check (e.g., "HORIZONTAL_SUM", "VERTICAL_SUM"). */
91
+ type: string;
92
+ /** Status of this check. */
93
+ status: string;
94
+ /** Fields involved in this check. */
95
+ fields: string[];
96
+ /** Whether this check passed. */
97
+ passed: boolean;
98
+ /** Difference between expected and actual values. */
99
+ delta: number;
100
+ /** Expected value based on the rule. */
101
+ expected: number;
102
+ /** Actual extracted value. */
103
+ actual: number;
104
+ }
105
+ /**
106
+ * Math verification results.
107
+ */
108
+ interface Verification {
109
+ /** Overall verification status. */
110
+ status: VerificationStatus;
111
+ /** Number of checks that passed. */
112
+ checks_passed: number;
113
+ /** Number of checks that failed. */
114
+ checks_failed: number;
115
+ /** Number of checks that could not be verified. */
116
+ cannot_verify_count: number;
117
+ /** Detailed results for each check that was run. */
118
+ checks_run: VerificationCheck[];
119
+ }
78
120
  /**
79
121
  * Error response from the API.
80
122
  */
@@ -92,6 +134,8 @@ interface ExtractResult<T> {
92
134
  object: T | null;
93
135
  /** Metadata about the extraction process. */
94
136
  metadata: ExtractionMetadata;
137
+ /** Math verification results (only present if enableVerification was true). */
138
+ verification?: Verification;
95
139
  /** Error details if extraction failed, or null on success. */
96
140
  error: APIErrorResponse | null;
97
141
  }
@@ -184,10 +228,11 @@ declare class Parsefy {
184
228
  * due_date: z.string().optional().describe('Payment due date'),
185
229
  * });
186
230
  *
187
- * const { object, metadata, error } = await client.extract({
231
+ * const { object, metadata, verification, error } = await client.extract({
188
232
  * file: './invoice.pdf',
189
233
  * schema,
190
234
  * confidenceThreshold: 0.85, // Lower = faster, Higher = more accurate
235
+ * enableVerification: true, // Enable math verification
191
236
  * });
192
237
  *
193
238
  * if (!error && object) {
@@ -198,6 +243,14 @@ declare class Parsefy {
198
243
  * metadata.field_confidence.forEach((fc) => {
199
244
  * console.log(`${fc.field}: ${fc.score} (${fc.reason}) - "${fc.text}"`);
200
245
  * });
246
+ *
247
+ * // Access verification results if enabled
248
+ * if (verification) {
249
+ * console.log(`Verification status: ${verification.status}`);
250
+ * verification.checks_run.forEach((check) => {
251
+ * console.log(`${check.type}: ${check.passed ? 'PASSED' : 'FAILED'}`);
252
+ * });
253
+ * }
201
254
  * }
202
255
  * ```
203
256
  */
@@ -250,4 +303,4 @@ declare class ValidationError extends ParsefyError {
250
303
  constructor(message: string);
251
304
  }
252
305
 
253
- export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
306
+ export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError, type Verification, type VerificationCheck, type VerificationStatus };
package/dist/index.d.ts CHANGED
@@ -37,6 +37,14 @@ interface ExtractOptions<T extends z.ZodType> {
37
37
  * appear in all documents. This prevents unnecessary fallback triggers and reduces costs.
38
38
  */
39
39
  confidenceThreshold?: number;
40
+ /**
41
+ * Enable math verification (includes shadow extraction). Defaults to false.
42
+ *
43
+ * When enabled, Parsefy automatically verifies mathematical consistency of numeric data
44
+ * (totals, subtotals, taxes, line items). If only a single verifiable field is requested,
45
+ * supporting fields are automatically extracted in the background for verification.
46
+ */
47
+ enableVerification?: boolean;
40
48
  }
41
49
  /**
42
50
  * Confidence details for a single extracted field.
@@ -60,10 +68,6 @@ interface FieldConfidence {
60
68
  interface ExtractionMetadata {
61
69
  /** Time taken to process the document in milliseconds. */
62
70
  processing_time_ms: number;
63
- /** Number of input tokens used. */
64
- input_tokens: number;
65
- /** Number of output tokens generated. */
66
- output_tokens: number;
67
71
  /** Number of credits consumed (1 credit = 1 page). */
68
72
  credits: number;
69
73
  /** Whether the fallback model was triggered for higher accuracy. */
@@ -75,6 +79,44 @@ interface ExtractionMetadata {
75
79
  /** List of issues or warnings encountered during extraction. */
76
80
  issues: string[];
77
81
  }
82
+ /**
83
+ * Verification status values.
84
+ */
85
+ type VerificationStatus = 'PASSED' | 'FAILED' | 'PARTIAL' | 'CANNOT_VERIFY' | 'NO_RULES';
86
+ /**
87
+ * Individual verification check result.
88
+ */
89
+ interface VerificationCheck {
90
+ /** Type of verification check (e.g., "HORIZONTAL_SUM", "VERTICAL_SUM"). */
91
+ type: string;
92
+ /** Status of this check. */
93
+ status: string;
94
+ /** Fields involved in this check. */
95
+ fields: string[];
96
+ /** Whether this check passed. */
97
+ passed: boolean;
98
+ /** Difference between expected and actual values. */
99
+ delta: number;
100
+ /** Expected value based on the rule. */
101
+ expected: number;
102
+ /** Actual extracted value. */
103
+ actual: number;
104
+ }
105
+ /**
106
+ * Math verification results.
107
+ */
108
+ interface Verification {
109
+ /** Overall verification status. */
110
+ status: VerificationStatus;
111
+ /** Number of checks that passed. */
112
+ checks_passed: number;
113
+ /** Number of checks that failed. */
114
+ checks_failed: number;
115
+ /** Number of checks that could not be verified. */
116
+ cannot_verify_count: number;
117
+ /** Detailed results for each check that was run. */
118
+ checks_run: VerificationCheck[];
119
+ }
78
120
  /**
79
121
  * Error response from the API.
80
122
  */
@@ -92,6 +134,8 @@ interface ExtractResult<T> {
92
134
  object: T | null;
93
135
  /** Metadata about the extraction process. */
94
136
  metadata: ExtractionMetadata;
137
+ /** Math verification results (only present if enableVerification was true). */
138
+ verification?: Verification;
95
139
  /** Error details if extraction failed, or null on success. */
96
140
  error: APIErrorResponse | null;
97
141
  }
@@ -184,10 +228,11 @@ declare class Parsefy {
184
228
  * due_date: z.string().optional().describe('Payment due date'),
185
229
  * });
186
230
  *
187
- * const { object, metadata, error } = await client.extract({
231
+ * const { object, metadata, verification, error } = await client.extract({
188
232
  * file: './invoice.pdf',
189
233
  * schema,
190
234
  * confidenceThreshold: 0.85, // Lower = faster, Higher = more accurate
235
+ * enableVerification: true, // Enable math verification
191
236
  * });
192
237
  *
193
238
  * if (!error && object) {
@@ -198,6 +243,14 @@ declare class Parsefy {
198
243
  * metadata.field_confidence.forEach((fc) => {
199
244
  * console.log(`${fc.field}: ${fc.score} (${fc.reason}) - "${fc.text}"`);
200
245
  * });
246
+ *
247
+ * // Access verification results if enabled
248
+ * if (verification) {
249
+ * console.log(`Verification status: ${verification.status}`);
250
+ * verification.checks_run.forEach((check) => {
251
+ * console.log(`${check.type}: ${check.passed ? 'PASSED' : 'FAILED'}`);
252
+ * });
253
+ * }
201
254
  * }
202
255
  * ```
203
256
  */
@@ -250,4 +303,4 @@ declare class ValidationError extends ParsefyError {
250
303
  constructor(message: string);
251
304
  }
252
305
 
253
- export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
306
+ export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError, type Verification, type VerificationCheck, type VerificationStatus };
package/dist/index.mjs CHANGED
@@ -1,2 +1,2 @@
1
- import {zodToJsonSchema}from'zod-to-json-schema';var u=.85,d={".pdf":"application/pdf",".docx":"application/vnd.openxmlformats-officedocument.wordprocessingml.document"},l=10*1024*1024,g="https://api.parsefy.io",x=6e4;var s=class extends Error{constructor(e,r){super(e),this.name="ParsefyError",this.code=r,typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,this.constructor);}},p=class extends s{constructor(e,r,o){super(e),this.name="APIError",this.statusCode=r,this.response=o;}},y=class extends s{constructor(e,r,o){super(e,r),this.name="ExtractionError",this.metadata=o;}},a=class extends s{constructor(e){super(e),this.name="ValidationError";}};function h(){return typeof process<"u"&&process.versions?.node!==void 0}function T(t){let e=zodToJsonSchema(t,{$refStrategy:"none",target:"jsonSchema7"});return "$schema"in e&&delete e.$schema,e}function b(t){let e=t.toLowerCase().match(/\.[^.]+$/)?.[0];return e&&d[e]||null}function _(t){if(!b(t)){let r=Object.keys(d).join(", ");throw new a(`Unsupported file type. Supported types: ${r}`)}}function m(t){if(t===0)throw new a("File is empty");if(t>l){let e=l/1048576;throw new a(`File size exceeds maximum limit of ${e}MB`)}}function w(t){let e=t._meta||{confidence_score:1,field_confidence:[],issues:[]};return {object:t.object,metadata:{processing_time_ms:t.metadata.processing_time_ms,input_tokens:t.metadata.input_tokens,output_tokens:t.metadata.output_tokens,credits:t.metadata.credits,fallback_triggered:t.metadata.fallback_triggered,confidence_score:e.confidence_score,field_confidence:e.field_confidence.map(r=>({field:r.field,score:r.score,reason:r.reason,page:r.page,text:r.text})),issues:e.issues},error:t.error}}function R(t,e){let r=b(e)||"application/octet-stream",o=t.buffer.slice(t.byteOffset,t.byteOffset+t.byteLength);return typeof File<"u"?new File([o],e,{type:r}):new Blob([o],{type:r})}async function I(t){if(!h())throw new a("File path strings are only supported in Node.js. Use File or Blob in the browser.");let e=await import('fs'),r=await import('path');if(!e.existsSync(t))throw new a(`File not found: ${t}`);let o=r.basename(t);_(o);let c=e.readFileSync(t);return m(c.length),{buffer:c,filename:o}}async function F(t){if(typeof t=="string"){let{buffer:e,filename:r}=await I(t);return R(e,r)}if(Buffer.isBuffer(t))return m(t.length),R(t,"document.pdf");if(t instanceof File)return _(t.name),m(t.size),t;if(t instanceof Blob)return m(t.size),t;throw new a("Invalid file input. Expected File, Blob, Buffer, or file path string.")}function P(t){return new Promise(e=>setTimeout(e,t))}function A(t,e=1e3){let r=e*Math.pow(2,t),o=Math.random()*.1*r;return Math.min(r+o,3e4)}var E=class{constructor(e){this.maxRetries=3;let r={};if(typeof e=="string"?r={apiKey:e}:e&&(r=e),this.apiKey=r.apiKey||this.getEnvApiKey(),!this.apiKey)throw new a("API key is required. Provide it in the constructor or set the PARSEFY_API_KEY environment variable.");this.baseUrl=r.baseUrl||g,this.timeout=r.timeout||x;}getEnvApiKey(){return h()&&process.env.PARSEFY_API_KEY||""}async extract(e){let{file:r,schema:o,confidenceThreshold:c}=e,n=T(o),f=await F(r),i=new FormData;return i.append("file",f),i.append("output_schema",JSON.stringify(n)),i.append("confidence_threshold",String(c??.85)),this.makeRequestWithRetry(i)}async makeRequestWithRetry(e,r=0){try{return await this.makeRequest(e)}catch(o){if(o instanceof p&&o.statusCode===429&&r<this.maxRetries){let c=A(r);return await P(c),this.makeRequestWithRetry(e,r+1)}throw o}}async makeRequest(e){let r=`${this.baseUrl}/v1/extract`,o=new AbortController,c=setTimeout(()=>o.abort(),this.timeout);try{let n=await fetch(r,{method:"POST",headers:{Authorization:`Bearer ${this.apiKey}`},body:e,signal:o.signal});if(clearTimeout(c),!n.ok){let i=await this.parseErrorResponse(n);throw new p(i.message||`API request failed with status ${n.status}`,n.status,i)}let f;try{f=await n.json();}catch{throw new s("Failed to parse API response as JSON. The API may have returned an invalid response.","PARSE_ERROR")}try{return w(f)}catch(i){throw new s(`Failed to transform API response: ${i instanceof Error?i.message:String(i)}`,"TRANSFORM_ERROR")}}catch(n){throw clearTimeout(c),n instanceof Error&&n.name==="AbortError"?new s(`Request timed out after ${this.timeout}ms`,"TIMEOUT"):n instanceof s?n:n instanceof TypeError&&n.message.includes("fetch")?new s(`Network error: Unable to connect to the Parsefy API. ${n.message}`,"NETWORK_ERROR"):n instanceof TypeError?new s(`Type error: ${n.message}. This may indicate an API response format issue.`,"TYPE_ERROR"):new s(`Unexpected error: ${n instanceof Error?n.message:String(n)}`,"UNKNOWN_ERROR")}}async parseErrorResponse(e){try{return await e.json()}catch{try{return {message:await e.text()||e.statusText}}catch{return {message:e.statusText}}}}};
2
- export{p as APIError,u as DEFAULT_CONFIDENCE_THRESHOLD,y as ExtractionError,E as Parsefy,s as ParsefyError,a as ValidationError};
1
+ import {zodToJsonSchema}from'zod-to-json-schema';var m=.85,l={".pdf":"application/pdf",".docx":"application/vnd.openxmlformats-officedocument.wordprocessingml.document"},y=10*1024*1024,x="https://api.parsefy.io",b=6e4;var s=class extends Error{constructor(t,r){super(t),this.name="ParsefyError",this.code=r,typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,this.constructor);}},f=class extends s{constructor(t,r,n){super(t),this.name="APIError",this.statusCode=r,this.response=n;}},h=class extends s{constructor(t,r,n){super(t,r),this.name="ExtractionError",this.metadata=n;}},i=class extends s{constructor(t){super(t),this.name="ValidationError";}};function g(){return typeof process<"u"&&process.versions?.node!==void 0}function R(e){let t=zodToJsonSchema(e,{$refStrategy:"none",target:"jsonSchema7"});return "$schema"in t&&delete t.$schema,t}function T(e){let t=e.toLowerCase().match(/\.[^.]+$/)?.[0];return t&&l[t]||null}function w(e){if(!T(e)){let r=Object.keys(l).join(", ");throw new i(`Unsupported file type. Supported types: ${r}`)}}function d(e){if(e===0)throw new i("File is empty");if(e>y){let t=y/1048576;throw new i(`File size exceeds maximum limit of ${t}MB`)}}function F(e){let t=e._meta||{confidence_score:1,field_confidence:[],issues:[]},r={object:e.object,metadata:{processing_time_ms:e.metadata.processing_time_ms,credits:e.metadata.credits,fallback_triggered:e.metadata.fallback_triggered,confidence_score:t.confidence_score,field_confidence:t.field_confidence.map(n=>({field:n.field,score:n.score,reason:n.reason,page:n.page,text:n.text})),issues:t.issues},error:e.error};return e.verification&&(r.verification={status:e.verification.status,checks_passed:e.verification.checks_passed,checks_failed:e.verification.checks_failed,cannot_verify_count:e.verification.cannot_verify_count,checks_run:e.verification.checks_run.map(n=>({type:n.type,status:n.status,fields:n.fields,passed:n.passed,delta:n.delta,expected:n.expected,actual:n.actual}))}),r}function _(e,t){let r=T(t)||"application/octet-stream",n=e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength);return typeof File<"u"?new File([n],t,{type:r}):new Blob([n],{type:r})}async function I(e){if(!g())throw new i("File path strings are only supported in Node.js. Use File or Blob in the browser.");let t=await import('fs'),r=await import('path');if(!t.existsSync(e))throw new i(`File not found: ${e}`);let n=r.basename(e);w(n);let a=t.readFileSync(e);return d(a.length),{buffer:a,filename:n}}async function S(e){if(typeof e=="string"){let{buffer:t,filename:r}=await I(e);return _(t,r)}if(Buffer.isBuffer(e))return d(e.length),_(e,"document.pdf");if(e instanceof File)return w(e.name),d(e.size),e;if(e instanceof Blob)return d(e.size),e;throw new i("Invalid file input. Expected File, Blob, Buffer, or file path string.")}function A(e){return new Promise(t=>setTimeout(t,e))}function P(e,t=1e3){let r=t*Math.pow(2,e),n=Math.random()*.1*r;return Math.min(r+n,3e4)}var E=class{constructor(t){this.maxRetries=3;let r={};if(typeof t=="string"?r={apiKey:t}:t&&(r=t),this.apiKey=r.apiKey||this.getEnvApiKey(),!this.apiKey)throw new i("API key is required. Provide it in the constructor or set the PARSEFY_API_KEY environment variable.");this.baseUrl=r.baseUrl||x,this.timeout=r.timeout||b;}getEnvApiKey(){return g()&&process.env.PARSEFY_API_KEY||""}async extract(t){let{file:r,schema:n,confidenceThreshold:a,enableVerification:o}=t,u=R(n),c=await S(r),p=new FormData;return p.append("file",c),p.append("output_schema",JSON.stringify(u)),p.append("confidence_threshold",String(a??.85)),o!==void 0&&p.append("enable_verification",String(o)),this.makeRequestWithRetry(p)}async makeRequestWithRetry(t,r=0){try{return await this.makeRequest(t)}catch(n){if(n instanceof f&&n.statusCode===429&&r<this.maxRetries){let a=P(r);return await A(a),this.makeRequestWithRetry(t,r+1)}throw n}}async makeRequest(t){let r=`${this.baseUrl}/v1/extract`,n=new AbortController,a=setTimeout(()=>n.abort(),this.timeout);try{let o=await fetch(r,{method:"POST",headers:{Authorization:`Bearer ${this.apiKey}`},body:t,signal:n.signal});if(clearTimeout(a),!o.ok){let c=await this.parseErrorResponse(o);throw new f(c.message||`API request failed with status ${o.status}`,o.status,c)}let u;try{u=await o.json();}catch{throw new s("Failed to parse API response as JSON. The API may have returned an invalid response.","PARSE_ERROR")}try{return F(u)}catch(c){throw new s(`Failed to transform API response: ${c instanceof Error?c.message:String(c)}`,"TRANSFORM_ERROR")}}catch(o){throw clearTimeout(a),o instanceof Error&&o.name==="AbortError"?new s(`Request timed out after ${this.timeout}ms`,"TIMEOUT"):o instanceof s?o:o instanceof TypeError&&o.message.includes("fetch")?new s(`Network error: Unable to connect to the Parsefy API. ${o.message}`,"NETWORK_ERROR"):o instanceof TypeError?new s(`Type error: ${o.message}. This may indicate an API response format issue.`,"TYPE_ERROR"):new s(`Unexpected error: ${o instanceof Error?o.message:String(o)}`,"UNKNOWN_ERROR")}}async parseErrorResponse(t){try{return await t.json()}catch{try{return {message:await t.text()||t.statusText}}catch{return {message:t.statusText}}}}};
2
+ export{f as APIError,m as DEFAULT_CONFIDENCE_THRESHOLD,h as ExtractionError,E as Parsefy,s as ParsefyError,i as ValidationError};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "parsefy",
3
- "version": "1.0.4",
3
+ "version": "1.1.0",
4
4
  "description": "Official TypeScript SDK for Parsefy - Financial Document Infrastructure for Developers",
5
5
  "author": "",
6
6
  "license": "MIT",