parsefy 1.0.4 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +99 -4
- package/dist/index.cjs +2 -2
- package/dist/index.d.cts +59 -6
- package/dist/index.d.mts +59 -6
- package/dist/index.d.ts +59 -6
- package/dist/index.mjs +2 -2
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -36,9 +36,10 @@ const schema = z.object({
|
|
|
36
36
|
due_date: z.string().optional().describe('Payment due date'),
|
|
37
37
|
});
|
|
38
38
|
|
|
39
|
-
const { object, metadata, error } = await client.extract({
|
|
39
|
+
const { object, metadata, verification, error } = await client.extract({
|
|
40
40
|
file: './invoice.pdf',
|
|
41
41
|
schema,
|
|
42
|
+
enableVerification: true, // Enable math verification
|
|
42
43
|
});
|
|
43
44
|
|
|
44
45
|
if (!error && object) {
|
|
@@ -49,6 +50,14 @@ if (!error && object) {
|
|
|
49
50
|
metadata.field_confidence.forEach((fc) => {
|
|
50
51
|
console.log(`${fc.field}: ${fc.score} (${fc.reason}) - "${fc.text}"`);
|
|
51
52
|
});
|
|
53
|
+
|
|
54
|
+
// Access verification results if enabled
|
|
55
|
+
if (verification) {
|
|
56
|
+
console.log(`Verification status: ${verification.status}`);
|
|
57
|
+
verification.checks_run.forEach((check) => {
|
|
58
|
+
console.log(`${check.type}: ${check.passed ? 'PASSED' : 'FAILED'}`);
|
|
59
|
+
});
|
|
60
|
+
}
|
|
52
61
|
}
|
|
53
62
|
```
|
|
54
63
|
|
|
@@ -102,6 +111,50 @@ const { object, metadata } = await client.extract({
|
|
|
102
111
|
|
|
103
112
|
**Default:** `0.85`
|
|
104
113
|
|
|
114
|
+
## Math Verification
|
|
115
|
+
|
|
116
|
+
Enable automatic math verification to ensure extracted numeric data is mathematically consistent:
|
|
117
|
+
|
|
118
|
+
```typescript
|
|
119
|
+
const { object, verification } = await client.extract({
|
|
120
|
+
file: './invoice.pdf',
|
|
121
|
+
schema,
|
|
122
|
+
enableVerification: true, // Enable math verification
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
if (verification) {
|
|
126
|
+
console.log(`Verification status: ${verification.status}`);
|
|
127
|
+
console.log(`Checks passed: ${verification.checks_passed}`);
|
|
128
|
+
console.log(`Checks failed: ${verification.checks_failed}`);
|
|
129
|
+
|
|
130
|
+
verification.checks_run.forEach((check) => {
|
|
131
|
+
console.log(`${check.type}: ${check.passed ? 'PASSED' : 'FAILED'}`);
|
|
132
|
+
console.log(` Fields: ${check.fields.join(', ')}`);
|
|
133
|
+
console.log(` Expected: ${check.expected}, Actual: ${check.actual}`);
|
|
134
|
+
console.log(` Delta: ${check.delta}`);
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### Verification Status Values
|
|
140
|
+
|
|
141
|
+
| Status | Description |
|
|
142
|
+
|--------|-------------|
|
|
143
|
+
| `PASSED` | All math checks passed |
|
|
144
|
+
| `FAILED` | One or more math checks failed |
|
|
145
|
+
| `PARTIAL` | Some checks passed, some failed or couldn't be verified |
|
|
146
|
+
| `CANNOT_VERIFY` | Required fields are missing (not a math error) |
|
|
147
|
+
| `NO_RULES` | No verifiable fields detected in schema |
|
|
148
|
+
|
|
149
|
+
### Supported Verification Rules
|
|
150
|
+
|
|
151
|
+
- **HORIZONTAL_SUM**: Verifies `total = subtotal + tax`
|
|
152
|
+
- **VERTICAL_SUM**: Verifies `subtotal = sum(line_items)`
|
|
153
|
+
|
|
154
|
+
### Shadow Extraction
|
|
155
|
+
|
|
156
|
+
When `enableVerification: true` and only a single verifiable field is requested (e.g., just `total`), Parsefy automatically extracts supporting fields in the background for verification, then removes them from the response.
|
|
157
|
+
|
|
105
158
|
## Response Format
|
|
106
159
|
|
|
107
160
|
```typescript
|
|
@@ -112,8 +165,6 @@ interface ExtractResult<T> {
|
|
|
112
165
|
// Metadata about the extraction
|
|
113
166
|
metadata: {
|
|
114
167
|
processing_time_ms: number; // Processing time in milliseconds
|
|
115
|
-
input_tokens: number; // Input tokens used
|
|
116
|
-
output_tokens: number; // Output tokens generated
|
|
117
168
|
credits: number; // Credits consumed (1 credit = 1 page)
|
|
118
169
|
fallback_triggered: boolean; // Whether fallback model was used
|
|
119
170
|
|
|
@@ -129,6 +180,23 @@ interface ExtractResult<T> {
|
|
|
129
180
|
issues: string[]; // Warnings or anomalies detected
|
|
130
181
|
};
|
|
131
182
|
|
|
183
|
+
// Math verification results (only present if enableVerification was true)
|
|
184
|
+
verification?: {
|
|
185
|
+
status: "PASSED" | "FAILED" | "PARTIAL" | "CANNOT_VERIFY" | "NO_RULES";
|
|
186
|
+
checks_passed: number;
|
|
187
|
+
checks_failed: number;
|
|
188
|
+
cannot_verify_count: number;
|
|
189
|
+
checks_run: Array<{
|
|
190
|
+
type: string; // e.g., "HORIZONTAL_SUM", "VERTICAL_SUM"
|
|
191
|
+
status: string;
|
|
192
|
+
fields: string[];
|
|
193
|
+
passed: boolean;
|
|
194
|
+
delta: number;
|
|
195
|
+
expected: number;
|
|
196
|
+
actual: number;
|
|
197
|
+
}>;
|
|
198
|
+
};
|
|
199
|
+
|
|
132
200
|
// Error details if extraction failed
|
|
133
201
|
error: {
|
|
134
202
|
code: string;
|
|
@@ -140,7 +208,11 @@ interface ExtractResult<T> {
|
|
|
140
208
|
### Example Response
|
|
141
209
|
|
|
142
210
|
```typescript
|
|
143
|
-
const { object, metadata } = await client.extract({
|
|
211
|
+
const { object, metadata, verification } = await client.extract({
|
|
212
|
+
file,
|
|
213
|
+
schema,
|
|
214
|
+
enableVerification: true
|
|
215
|
+
});
|
|
144
216
|
|
|
145
217
|
// object:
|
|
146
218
|
{
|
|
@@ -161,6 +233,25 @@ const { object, metadata } = await client.extract({ file, schema });
|
|
|
161
233
|
]
|
|
162
234
|
|
|
163
235
|
// metadata.issues: []
|
|
236
|
+
|
|
237
|
+
// verification (only present if enableVerification was true):
|
|
238
|
+
{
|
|
239
|
+
status: "PASSED",
|
|
240
|
+
checks_passed: 1,
|
|
241
|
+
checks_failed: 0,
|
|
242
|
+
cannot_verify_count: 0,
|
|
243
|
+
checks_run: [
|
|
244
|
+
{
|
|
245
|
+
type: "HORIZONTAL_SUM",
|
|
246
|
+
status: "PASSED",
|
|
247
|
+
fields: ["total", "subtotal", "tax"],
|
|
248
|
+
passed: true,
|
|
249
|
+
delta: 0.0,
|
|
250
|
+
expected: 1250.00,
|
|
251
|
+
actual: 1250.00
|
|
252
|
+
}
|
|
253
|
+
]
|
|
254
|
+
}
|
|
164
255
|
```
|
|
165
256
|
|
|
166
257
|
## Configuration
|
|
@@ -196,6 +287,7 @@ const client = new Parsefy({
|
|
|
196
287
|
| `file` | `File \| Blob \| Buffer \| string` | required | Document to extract from |
|
|
197
288
|
| `schema` | `z.ZodType` | required | Zod schema defining extraction structure |
|
|
198
289
|
| `confidenceThreshold` | `number` | `0.85` | Minimum confidence before triggering fallback |
|
|
290
|
+
| `enableVerification` | `boolean` | `false` | Enable math verification (includes shadow extraction) |
|
|
199
291
|
|
|
200
292
|
## Usage
|
|
201
293
|
|
|
@@ -392,6 +484,9 @@ import type {
|
|
|
392
484
|
ExtractResult,
|
|
393
485
|
ExtractionMetadata,
|
|
394
486
|
FieldConfidence,
|
|
487
|
+
Verification,
|
|
488
|
+
VerificationStatus,
|
|
489
|
+
VerificationCheck,
|
|
395
490
|
APIErrorResponse,
|
|
396
491
|
} from 'parsefy';
|
|
397
492
|
|
package/dist/index.cjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
'use strict';var zodToJsonSchema=require('zod-to-json-schema');var
|
|
2
|
-
exports.APIError=
|
|
1
|
+
'use strict';var zodToJsonSchema=require('zod-to-json-schema');var m=.85,l={".pdf":"application/pdf",".docx":"application/vnd.openxmlformats-officedocument.wordprocessingml.document"},y=10*1024*1024,x="https://api.parsefy.io",b=6e4;var s=class extends Error{constructor(t,r){super(t),this.name="ParsefyError",this.code=r,typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,this.constructor);}},f=class extends s{constructor(t,r,n){super(t),this.name="APIError",this.statusCode=r,this.response=n;}},h=class extends s{constructor(t,r,n){super(t,r),this.name="ExtractionError",this.metadata=n;}},i=class extends s{constructor(t){super(t),this.name="ValidationError";}};function g(){return typeof process<"u"&&process.versions?.node!==void 0}function R(e){let t=zodToJsonSchema.zodToJsonSchema(e,{$refStrategy:"none",target:"jsonSchema7"});return "$schema"in t&&delete t.$schema,t}function T(e){let t=e.toLowerCase().match(/\.[^.]+$/)?.[0];return t&&l[t]||null}function w(e){if(!T(e)){let r=Object.keys(l).join(", ");throw new i(`Unsupported file type. Supported types: ${r}`)}}function d(e){if(e===0)throw new i("File is empty");if(e>y){let t=y/1048576;throw new i(`File size exceeds maximum limit of ${t}MB`)}}function F(e){let t=e._meta||{confidence_score:1,field_confidence:[],issues:[]},r={object:e.object,metadata:{processing_time_ms:e.metadata.processing_time_ms,credits:e.metadata.credits,fallback_triggered:e.metadata.fallback_triggered,confidence_score:t.confidence_score,field_confidence:t.field_confidence.map(n=>({field:n.field,score:n.score,reason:n.reason,page:n.page,text:n.text})),issues:t.issues},error:e.error};return e.verification&&(r.verification={status:e.verification.status,checks_passed:e.verification.checks_passed,checks_failed:e.verification.checks_failed,cannot_verify_count:e.verification.cannot_verify_count,checks_run:e.verification.checks_run.map(n=>({type:n.type,status:n.status,fields:n.fields,passed:n.passed,delta:n.delta,expected:n.expected,actual:n.actual}))}),r}function _(e,t){let r=T(t)||"application/octet-stream",n=e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength);return typeof File<"u"?new File([n],t,{type:r}):new Blob([n],{type:r})}async function I(e){if(!g())throw new i("File path strings are only supported in Node.js. Use File or Blob in the browser.");let t=await import('fs'),r=await import('path');if(!t.existsSync(e))throw new i(`File not found: ${e}`);let n=r.basename(e);w(n);let a=t.readFileSync(e);return d(a.length),{buffer:a,filename:n}}async function S(e){if(typeof e=="string"){let{buffer:t,filename:r}=await I(e);return _(t,r)}if(Buffer.isBuffer(e))return d(e.length),_(e,"document.pdf");if(e instanceof File)return w(e.name),d(e.size),e;if(e instanceof Blob)return d(e.size),e;throw new i("Invalid file input. Expected File, Blob, Buffer, or file path string.")}function A(e){return new Promise(t=>setTimeout(t,e))}function P(e,t=1e3){let r=t*Math.pow(2,e),n=Math.random()*.1*r;return Math.min(r+n,3e4)}var E=class{constructor(t){this.maxRetries=3;let r={};if(typeof t=="string"?r={apiKey:t}:t&&(r=t),this.apiKey=r.apiKey||this.getEnvApiKey(),!this.apiKey)throw new i("API key is required. Provide it in the constructor or set the PARSEFY_API_KEY environment variable.");this.baseUrl=r.baseUrl||x,this.timeout=r.timeout||b;}getEnvApiKey(){return g()&&process.env.PARSEFY_API_KEY||""}async extract(t){let{file:r,schema:n,confidenceThreshold:a,enableVerification:o}=t,u=R(n),c=await S(r),p=new FormData;return p.append("file",c),p.append("output_schema",JSON.stringify(u)),p.append("confidence_threshold",String(a??.85)),o!==void 0&&p.append("enable_verification",String(o)),this.makeRequestWithRetry(p)}async makeRequestWithRetry(t,r=0){try{return await this.makeRequest(t)}catch(n){if(n instanceof f&&n.statusCode===429&&r<this.maxRetries){let a=P(r);return await A(a),this.makeRequestWithRetry(t,r+1)}throw n}}async makeRequest(t){let r=`${this.baseUrl}/v1/extract`,n=new AbortController,a=setTimeout(()=>n.abort(),this.timeout);try{let o=await fetch(r,{method:"POST",headers:{Authorization:`Bearer ${this.apiKey}`},body:t,signal:n.signal});if(clearTimeout(a),!o.ok){let c=await this.parseErrorResponse(o);throw new f(c.message||`API request failed with status ${o.status}`,o.status,c)}let u;try{u=await o.json();}catch{throw new s("Failed to parse API response as JSON. The API may have returned an invalid response.","PARSE_ERROR")}try{return F(u)}catch(c){throw new s(`Failed to transform API response: ${c instanceof Error?c.message:String(c)}`,"TRANSFORM_ERROR")}}catch(o){throw clearTimeout(a),o instanceof Error&&o.name==="AbortError"?new s(`Request timed out after ${this.timeout}ms`,"TIMEOUT"):o instanceof s?o:o instanceof TypeError&&o.message.includes("fetch")?new s(`Network error: Unable to connect to the Parsefy API. ${o.message}`,"NETWORK_ERROR"):o instanceof TypeError?new s(`Type error: ${o.message}. This may indicate an API response format issue.`,"TYPE_ERROR"):new s(`Unexpected error: ${o instanceof Error?o.message:String(o)}`,"UNKNOWN_ERROR")}}async parseErrorResponse(t){try{return await t.json()}catch{try{return {message:await t.text()||t.statusText}}catch{return {message:t.statusText}}}}};
|
|
2
|
+
exports.APIError=f;exports.DEFAULT_CONFIDENCE_THRESHOLD=m;exports.ExtractionError=h;exports.Parsefy=E;exports.ParsefyError=s;exports.ValidationError=i;
|
package/dist/index.d.cts
CHANGED
|
@@ -37,6 +37,14 @@ interface ExtractOptions<T extends z.ZodType> {
|
|
|
37
37
|
* appear in all documents. This prevents unnecessary fallback triggers and reduces costs.
|
|
38
38
|
*/
|
|
39
39
|
confidenceThreshold?: number;
|
|
40
|
+
/**
|
|
41
|
+
* Enable math verification (includes shadow extraction). Defaults to false.
|
|
42
|
+
*
|
|
43
|
+
* When enabled, Parsefy automatically verifies mathematical consistency of numeric data
|
|
44
|
+
* (totals, subtotals, taxes, line items). If only a single verifiable field is requested,
|
|
45
|
+
* supporting fields are automatically extracted in the background for verification.
|
|
46
|
+
*/
|
|
47
|
+
enableVerification?: boolean;
|
|
40
48
|
}
|
|
41
49
|
/**
|
|
42
50
|
* Confidence details for a single extracted field.
|
|
@@ -60,10 +68,6 @@ interface FieldConfidence {
|
|
|
60
68
|
interface ExtractionMetadata {
|
|
61
69
|
/** Time taken to process the document in milliseconds. */
|
|
62
70
|
processing_time_ms: number;
|
|
63
|
-
/** Number of input tokens used. */
|
|
64
|
-
input_tokens: number;
|
|
65
|
-
/** Number of output tokens generated. */
|
|
66
|
-
output_tokens: number;
|
|
67
71
|
/** Number of credits consumed (1 credit = 1 page). */
|
|
68
72
|
credits: number;
|
|
69
73
|
/** Whether the fallback model was triggered for higher accuracy. */
|
|
@@ -75,6 +79,44 @@ interface ExtractionMetadata {
|
|
|
75
79
|
/** List of issues or warnings encountered during extraction. */
|
|
76
80
|
issues: string[];
|
|
77
81
|
}
|
|
82
|
+
/**
|
|
83
|
+
* Verification status values.
|
|
84
|
+
*/
|
|
85
|
+
type VerificationStatus = 'PASSED' | 'FAILED' | 'PARTIAL' | 'CANNOT_VERIFY' | 'NO_RULES';
|
|
86
|
+
/**
|
|
87
|
+
* Individual verification check result.
|
|
88
|
+
*/
|
|
89
|
+
interface VerificationCheck {
|
|
90
|
+
/** Type of verification check (e.g., "HORIZONTAL_SUM", "VERTICAL_SUM"). */
|
|
91
|
+
type: string;
|
|
92
|
+
/** Status of this check. */
|
|
93
|
+
status: string;
|
|
94
|
+
/** Fields involved in this check. */
|
|
95
|
+
fields: string[];
|
|
96
|
+
/** Whether this check passed. */
|
|
97
|
+
passed: boolean;
|
|
98
|
+
/** Difference between expected and actual values. */
|
|
99
|
+
delta: number;
|
|
100
|
+
/** Expected value based on the rule. */
|
|
101
|
+
expected: number;
|
|
102
|
+
/** Actual extracted value. */
|
|
103
|
+
actual: number;
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Math verification results.
|
|
107
|
+
*/
|
|
108
|
+
interface Verification {
|
|
109
|
+
/** Overall verification status. */
|
|
110
|
+
status: VerificationStatus;
|
|
111
|
+
/** Number of checks that passed. */
|
|
112
|
+
checks_passed: number;
|
|
113
|
+
/** Number of checks that failed. */
|
|
114
|
+
checks_failed: number;
|
|
115
|
+
/** Number of checks that could not be verified. */
|
|
116
|
+
cannot_verify_count: number;
|
|
117
|
+
/** Detailed results for each check that was run. */
|
|
118
|
+
checks_run: VerificationCheck[];
|
|
119
|
+
}
|
|
78
120
|
/**
|
|
79
121
|
* Error response from the API.
|
|
80
122
|
*/
|
|
@@ -92,6 +134,8 @@ interface ExtractResult<T> {
|
|
|
92
134
|
object: T | null;
|
|
93
135
|
/** Metadata about the extraction process. */
|
|
94
136
|
metadata: ExtractionMetadata;
|
|
137
|
+
/** Math verification results (only present if enableVerification was true). */
|
|
138
|
+
verification?: Verification;
|
|
95
139
|
/** Error details if extraction failed, or null on success. */
|
|
96
140
|
error: APIErrorResponse | null;
|
|
97
141
|
}
|
|
@@ -184,10 +228,11 @@ declare class Parsefy {
|
|
|
184
228
|
* due_date: z.string().optional().describe('Payment due date'),
|
|
185
229
|
* });
|
|
186
230
|
*
|
|
187
|
-
* const { object, metadata, error } = await client.extract({
|
|
231
|
+
* const { object, metadata, verification, error } = await client.extract({
|
|
188
232
|
* file: './invoice.pdf',
|
|
189
233
|
* schema,
|
|
190
234
|
* confidenceThreshold: 0.85, // Lower = faster, Higher = more accurate
|
|
235
|
+
* enableVerification: true, // Enable math verification
|
|
191
236
|
* });
|
|
192
237
|
*
|
|
193
238
|
* if (!error && object) {
|
|
@@ -198,6 +243,14 @@ declare class Parsefy {
|
|
|
198
243
|
* metadata.field_confidence.forEach((fc) => {
|
|
199
244
|
* console.log(`${fc.field}: ${fc.score} (${fc.reason}) - "${fc.text}"`);
|
|
200
245
|
* });
|
|
246
|
+
*
|
|
247
|
+
* // Access verification results if enabled
|
|
248
|
+
* if (verification) {
|
|
249
|
+
* console.log(`Verification status: ${verification.status}`);
|
|
250
|
+
* verification.checks_run.forEach((check) => {
|
|
251
|
+
* console.log(`${check.type}: ${check.passed ? 'PASSED' : 'FAILED'}`);
|
|
252
|
+
* });
|
|
253
|
+
* }
|
|
201
254
|
* }
|
|
202
255
|
* ```
|
|
203
256
|
*/
|
|
@@ -250,4 +303,4 @@ declare class ValidationError extends ParsefyError {
|
|
|
250
303
|
constructor(message: string);
|
|
251
304
|
}
|
|
252
305
|
|
|
253
|
-
export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
|
|
306
|
+
export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError, type Verification, type VerificationCheck, type VerificationStatus };
|
package/dist/index.d.mts
CHANGED
|
@@ -37,6 +37,14 @@ interface ExtractOptions<T extends z.ZodType> {
|
|
|
37
37
|
* appear in all documents. This prevents unnecessary fallback triggers and reduces costs.
|
|
38
38
|
*/
|
|
39
39
|
confidenceThreshold?: number;
|
|
40
|
+
/**
|
|
41
|
+
* Enable math verification (includes shadow extraction). Defaults to false.
|
|
42
|
+
*
|
|
43
|
+
* When enabled, Parsefy automatically verifies mathematical consistency of numeric data
|
|
44
|
+
* (totals, subtotals, taxes, line items). If only a single verifiable field is requested,
|
|
45
|
+
* supporting fields are automatically extracted in the background for verification.
|
|
46
|
+
*/
|
|
47
|
+
enableVerification?: boolean;
|
|
40
48
|
}
|
|
41
49
|
/**
|
|
42
50
|
* Confidence details for a single extracted field.
|
|
@@ -60,10 +68,6 @@ interface FieldConfidence {
|
|
|
60
68
|
interface ExtractionMetadata {
|
|
61
69
|
/** Time taken to process the document in milliseconds. */
|
|
62
70
|
processing_time_ms: number;
|
|
63
|
-
/** Number of input tokens used. */
|
|
64
|
-
input_tokens: number;
|
|
65
|
-
/** Number of output tokens generated. */
|
|
66
|
-
output_tokens: number;
|
|
67
71
|
/** Number of credits consumed (1 credit = 1 page). */
|
|
68
72
|
credits: number;
|
|
69
73
|
/** Whether the fallback model was triggered for higher accuracy. */
|
|
@@ -75,6 +79,44 @@ interface ExtractionMetadata {
|
|
|
75
79
|
/** List of issues or warnings encountered during extraction. */
|
|
76
80
|
issues: string[];
|
|
77
81
|
}
|
|
82
|
+
/**
|
|
83
|
+
* Verification status values.
|
|
84
|
+
*/
|
|
85
|
+
type VerificationStatus = 'PASSED' | 'FAILED' | 'PARTIAL' | 'CANNOT_VERIFY' | 'NO_RULES';
|
|
86
|
+
/**
|
|
87
|
+
* Individual verification check result.
|
|
88
|
+
*/
|
|
89
|
+
interface VerificationCheck {
|
|
90
|
+
/** Type of verification check (e.g., "HORIZONTAL_SUM", "VERTICAL_SUM"). */
|
|
91
|
+
type: string;
|
|
92
|
+
/** Status of this check. */
|
|
93
|
+
status: string;
|
|
94
|
+
/** Fields involved in this check. */
|
|
95
|
+
fields: string[];
|
|
96
|
+
/** Whether this check passed. */
|
|
97
|
+
passed: boolean;
|
|
98
|
+
/** Difference between expected and actual values. */
|
|
99
|
+
delta: number;
|
|
100
|
+
/** Expected value based on the rule. */
|
|
101
|
+
expected: number;
|
|
102
|
+
/** Actual extracted value. */
|
|
103
|
+
actual: number;
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Math verification results.
|
|
107
|
+
*/
|
|
108
|
+
interface Verification {
|
|
109
|
+
/** Overall verification status. */
|
|
110
|
+
status: VerificationStatus;
|
|
111
|
+
/** Number of checks that passed. */
|
|
112
|
+
checks_passed: number;
|
|
113
|
+
/** Number of checks that failed. */
|
|
114
|
+
checks_failed: number;
|
|
115
|
+
/** Number of checks that could not be verified. */
|
|
116
|
+
cannot_verify_count: number;
|
|
117
|
+
/** Detailed results for each check that was run. */
|
|
118
|
+
checks_run: VerificationCheck[];
|
|
119
|
+
}
|
|
78
120
|
/**
|
|
79
121
|
* Error response from the API.
|
|
80
122
|
*/
|
|
@@ -92,6 +134,8 @@ interface ExtractResult<T> {
|
|
|
92
134
|
object: T | null;
|
|
93
135
|
/** Metadata about the extraction process. */
|
|
94
136
|
metadata: ExtractionMetadata;
|
|
137
|
+
/** Math verification results (only present if enableVerification was true). */
|
|
138
|
+
verification?: Verification;
|
|
95
139
|
/** Error details if extraction failed, or null on success. */
|
|
96
140
|
error: APIErrorResponse | null;
|
|
97
141
|
}
|
|
@@ -184,10 +228,11 @@ declare class Parsefy {
|
|
|
184
228
|
* due_date: z.string().optional().describe('Payment due date'),
|
|
185
229
|
* });
|
|
186
230
|
*
|
|
187
|
-
* const { object, metadata, error } = await client.extract({
|
|
231
|
+
* const { object, metadata, verification, error } = await client.extract({
|
|
188
232
|
* file: './invoice.pdf',
|
|
189
233
|
* schema,
|
|
190
234
|
* confidenceThreshold: 0.85, // Lower = faster, Higher = more accurate
|
|
235
|
+
* enableVerification: true, // Enable math verification
|
|
191
236
|
* });
|
|
192
237
|
*
|
|
193
238
|
* if (!error && object) {
|
|
@@ -198,6 +243,14 @@ declare class Parsefy {
|
|
|
198
243
|
* metadata.field_confidence.forEach((fc) => {
|
|
199
244
|
* console.log(`${fc.field}: ${fc.score} (${fc.reason}) - "${fc.text}"`);
|
|
200
245
|
* });
|
|
246
|
+
*
|
|
247
|
+
* // Access verification results if enabled
|
|
248
|
+
* if (verification) {
|
|
249
|
+
* console.log(`Verification status: ${verification.status}`);
|
|
250
|
+
* verification.checks_run.forEach((check) => {
|
|
251
|
+
* console.log(`${check.type}: ${check.passed ? 'PASSED' : 'FAILED'}`);
|
|
252
|
+
* });
|
|
253
|
+
* }
|
|
201
254
|
* }
|
|
202
255
|
* ```
|
|
203
256
|
*/
|
|
@@ -250,4 +303,4 @@ declare class ValidationError extends ParsefyError {
|
|
|
250
303
|
constructor(message: string);
|
|
251
304
|
}
|
|
252
305
|
|
|
253
|
-
export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
|
|
306
|
+
export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError, type Verification, type VerificationCheck, type VerificationStatus };
|
package/dist/index.d.ts
CHANGED
|
@@ -37,6 +37,14 @@ interface ExtractOptions<T extends z.ZodType> {
|
|
|
37
37
|
* appear in all documents. This prevents unnecessary fallback triggers and reduces costs.
|
|
38
38
|
*/
|
|
39
39
|
confidenceThreshold?: number;
|
|
40
|
+
/**
|
|
41
|
+
* Enable math verification (includes shadow extraction). Defaults to false.
|
|
42
|
+
*
|
|
43
|
+
* When enabled, Parsefy automatically verifies mathematical consistency of numeric data
|
|
44
|
+
* (totals, subtotals, taxes, line items). If only a single verifiable field is requested,
|
|
45
|
+
* supporting fields are automatically extracted in the background for verification.
|
|
46
|
+
*/
|
|
47
|
+
enableVerification?: boolean;
|
|
40
48
|
}
|
|
41
49
|
/**
|
|
42
50
|
* Confidence details for a single extracted field.
|
|
@@ -60,10 +68,6 @@ interface FieldConfidence {
|
|
|
60
68
|
interface ExtractionMetadata {
|
|
61
69
|
/** Time taken to process the document in milliseconds. */
|
|
62
70
|
processing_time_ms: number;
|
|
63
|
-
/** Number of input tokens used. */
|
|
64
|
-
input_tokens: number;
|
|
65
|
-
/** Number of output tokens generated. */
|
|
66
|
-
output_tokens: number;
|
|
67
71
|
/** Number of credits consumed (1 credit = 1 page). */
|
|
68
72
|
credits: number;
|
|
69
73
|
/** Whether the fallback model was triggered for higher accuracy. */
|
|
@@ -75,6 +79,44 @@ interface ExtractionMetadata {
|
|
|
75
79
|
/** List of issues or warnings encountered during extraction. */
|
|
76
80
|
issues: string[];
|
|
77
81
|
}
|
|
82
|
+
/**
|
|
83
|
+
* Verification status values.
|
|
84
|
+
*/
|
|
85
|
+
type VerificationStatus = 'PASSED' | 'FAILED' | 'PARTIAL' | 'CANNOT_VERIFY' | 'NO_RULES';
|
|
86
|
+
/**
|
|
87
|
+
* Individual verification check result.
|
|
88
|
+
*/
|
|
89
|
+
interface VerificationCheck {
|
|
90
|
+
/** Type of verification check (e.g., "HORIZONTAL_SUM", "VERTICAL_SUM"). */
|
|
91
|
+
type: string;
|
|
92
|
+
/** Status of this check. */
|
|
93
|
+
status: string;
|
|
94
|
+
/** Fields involved in this check. */
|
|
95
|
+
fields: string[];
|
|
96
|
+
/** Whether this check passed. */
|
|
97
|
+
passed: boolean;
|
|
98
|
+
/** Difference between expected and actual values. */
|
|
99
|
+
delta: number;
|
|
100
|
+
/** Expected value based on the rule. */
|
|
101
|
+
expected: number;
|
|
102
|
+
/** Actual extracted value. */
|
|
103
|
+
actual: number;
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Math verification results.
|
|
107
|
+
*/
|
|
108
|
+
interface Verification {
|
|
109
|
+
/** Overall verification status. */
|
|
110
|
+
status: VerificationStatus;
|
|
111
|
+
/** Number of checks that passed. */
|
|
112
|
+
checks_passed: number;
|
|
113
|
+
/** Number of checks that failed. */
|
|
114
|
+
checks_failed: number;
|
|
115
|
+
/** Number of checks that could not be verified. */
|
|
116
|
+
cannot_verify_count: number;
|
|
117
|
+
/** Detailed results for each check that was run. */
|
|
118
|
+
checks_run: VerificationCheck[];
|
|
119
|
+
}
|
|
78
120
|
/**
|
|
79
121
|
* Error response from the API.
|
|
80
122
|
*/
|
|
@@ -92,6 +134,8 @@ interface ExtractResult<T> {
|
|
|
92
134
|
object: T | null;
|
|
93
135
|
/** Metadata about the extraction process. */
|
|
94
136
|
metadata: ExtractionMetadata;
|
|
137
|
+
/** Math verification results (only present if enableVerification was true). */
|
|
138
|
+
verification?: Verification;
|
|
95
139
|
/** Error details if extraction failed, or null on success. */
|
|
96
140
|
error: APIErrorResponse | null;
|
|
97
141
|
}
|
|
@@ -184,10 +228,11 @@ declare class Parsefy {
|
|
|
184
228
|
* due_date: z.string().optional().describe('Payment due date'),
|
|
185
229
|
* });
|
|
186
230
|
*
|
|
187
|
-
* const { object, metadata, error } = await client.extract({
|
|
231
|
+
* const { object, metadata, verification, error } = await client.extract({
|
|
188
232
|
* file: './invoice.pdf',
|
|
189
233
|
* schema,
|
|
190
234
|
* confidenceThreshold: 0.85, // Lower = faster, Higher = more accurate
|
|
235
|
+
* enableVerification: true, // Enable math verification
|
|
191
236
|
* });
|
|
192
237
|
*
|
|
193
238
|
* if (!error && object) {
|
|
@@ -198,6 +243,14 @@ declare class Parsefy {
|
|
|
198
243
|
* metadata.field_confidence.forEach((fc) => {
|
|
199
244
|
* console.log(`${fc.field}: ${fc.score} (${fc.reason}) - "${fc.text}"`);
|
|
200
245
|
* });
|
|
246
|
+
*
|
|
247
|
+
* // Access verification results if enabled
|
|
248
|
+
* if (verification) {
|
|
249
|
+
* console.log(`Verification status: ${verification.status}`);
|
|
250
|
+
* verification.checks_run.forEach((check) => {
|
|
251
|
+
* console.log(`${check.type}: ${check.passed ? 'PASSED' : 'FAILED'}`);
|
|
252
|
+
* });
|
|
253
|
+
* }
|
|
201
254
|
* }
|
|
202
255
|
* ```
|
|
203
256
|
*/
|
|
@@ -250,4 +303,4 @@ declare class ValidationError extends ParsefyError {
|
|
|
250
303
|
constructor(message: string);
|
|
251
304
|
}
|
|
252
305
|
|
|
253
|
-
export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
|
|
306
|
+
export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError, type Verification, type VerificationCheck, type VerificationStatus };
|
package/dist/index.mjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import {zodToJsonSchema}from'zod-to-json-schema';var
|
|
2
|
-
export{
|
|
1
|
+
import {zodToJsonSchema}from'zod-to-json-schema';var m=.85,l={".pdf":"application/pdf",".docx":"application/vnd.openxmlformats-officedocument.wordprocessingml.document"},y=10*1024*1024,x="https://api.parsefy.io",b=6e4;var s=class extends Error{constructor(t,r){super(t),this.name="ParsefyError",this.code=r,typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,this.constructor);}},f=class extends s{constructor(t,r,n){super(t),this.name="APIError",this.statusCode=r,this.response=n;}},h=class extends s{constructor(t,r,n){super(t,r),this.name="ExtractionError",this.metadata=n;}},i=class extends s{constructor(t){super(t),this.name="ValidationError";}};function g(){return typeof process<"u"&&process.versions?.node!==void 0}function R(e){let t=zodToJsonSchema(e,{$refStrategy:"none",target:"jsonSchema7"});return "$schema"in t&&delete t.$schema,t}function T(e){let t=e.toLowerCase().match(/\.[^.]+$/)?.[0];return t&&l[t]||null}function w(e){if(!T(e)){let r=Object.keys(l).join(", ");throw new i(`Unsupported file type. Supported types: ${r}`)}}function d(e){if(e===0)throw new i("File is empty");if(e>y){let t=y/1048576;throw new i(`File size exceeds maximum limit of ${t}MB`)}}function F(e){let t=e._meta||{confidence_score:1,field_confidence:[],issues:[]},r={object:e.object,metadata:{processing_time_ms:e.metadata.processing_time_ms,credits:e.metadata.credits,fallback_triggered:e.metadata.fallback_triggered,confidence_score:t.confidence_score,field_confidence:t.field_confidence.map(n=>({field:n.field,score:n.score,reason:n.reason,page:n.page,text:n.text})),issues:t.issues},error:e.error};return e.verification&&(r.verification={status:e.verification.status,checks_passed:e.verification.checks_passed,checks_failed:e.verification.checks_failed,cannot_verify_count:e.verification.cannot_verify_count,checks_run:e.verification.checks_run.map(n=>({type:n.type,status:n.status,fields:n.fields,passed:n.passed,delta:n.delta,expected:n.expected,actual:n.actual}))}),r}function _(e,t){let r=T(t)||"application/octet-stream",n=e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength);return typeof File<"u"?new File([n],t,{type:r}):new Blob([n],{type:r})}async function I(e){if(!g())throw new i("File path strings are only supported in Node.js. Use File or Blob in the browser.");let t=await import('fs'),r=await import('path');if(!t.existsSync(e))throw new i(`File not found: ${e}`);let n=r.basename(e);w(n);let a=t.readFileSync(e);return d(a.length),{buffer:a,filename:n}}async function S(e){if(typeof e=="string"){let{buffer:t,filename:r}=await I(e);return _(t,r)}if(Buffer.isBuffer(e))return d(e.length),_(e,"document.pdf");if(e instanceof File)return w(e.name),d(e.size),e;if(e instanceof Blob)return d(e.size),e;throw new i("Invalid file input. Expected File, Blob, Buffer, or file path string.")}function A(e){return new Promise(t=>setTimeout(t,e))}function P(e,t=1e3){let r=t*Math.pow(2,e),n=Math.random()*.1*r;return Math.min(r+n,3e4)}var E=class{constructor(t){this.maxRetries=3;let r={};if(typeof t=="string"?r={apiKey:t}:t&&(r=t),this.apiKey=r.apiKey||this.getEnvApiKey(),!this.apiKey)throw new i("API key is required. Provide it in the constructor or set the PARSEFY_API_KEY environment variable.");this.baseUrl=r.baseUrl||x,this.timeout=r.timeout||b;}getEnvApiKey(){return g()&&process.env.PARSEFY_API_KEY||""}async extract(t){let{file:r,schema:n,confidenceThreshold:a,enableVerification:o}=t,u=R(n),c=await S(r),p=new FormData;return p.append("file",c),p.append("output_schema",JSON.stringify(u)),p.append("confidence_threshold",String(a??.85)),o!==void 0&&p.append("enable_verification",String(o)),this.makeRequestWithRetry(p)}async makeRequestWithRetry(t,r=0){try{return await this.makeRequest(t)}catch(n){if(n instanceof f&&n.statusCode===429&&r<this.maxRetries){let a=P(r);return await A(a),this.makeRequestWithRetry(t,r+1)}throw n}}async makeRequest(t){let r=`${this.baseUrl}/v1/extract`,n=new AbortController,a=setTimeout(()=>n.abort(),this.timeout);try{let o=await fetch(r,{method:"POST",headers:{Authorization:`Bearer ${this.apiKey}`},body:t,signal:n.signal});if(clearTimeout(a),!o.ok){let c=await this.parseErrorResponse(o);throw new f(c.message||`API request failed with status ${o.status}`,o.status,c)}let u;try{u=await o.json();}catch{throw new s("Failed to parse API response as JSON. The API may have returned an invalid response.","PARSE_ERROR")}try{return F(u)}catch(c){throw new s(`Failed to transform API response: ${c instanceof Error?c.message:String(c)}`,"TRANSFORM_ERROR")}}catch(o){throw clearTimeout(a),o instanceof Error&&o.name==="AbortError"?new s(`Request timed out after ${this.timeout}ms`,"TIMEOUT"):o instanceof s?o:o instanceof TypeError&&o.message.includes("fetch")?new s(`Network error: Unable to connect to the Parsefy API. ${o.message}`,"NETWORK_ERROR"):o instanceof TypeError?new s(`Type error: ${o.message}. This may indicate an API response format issue.`,"TYPE_ERROR"):new s(`Unexpected error: ${o instanceof Error?o.message:String(o)}`,"UNKNOWN_ERROR")}}async parseErrorResponse(t){try{return await t.json()}catch{try{return {message:await t.text()||t.statusText}}catch{return {message:t.statusText}}}}};
|
|
2
|
+
export{f as APIError,m as DEFAULT_CONFIDENCE_THRESHOLD,h as ExtractionError,E as Parsefy,s as ParsefyError,i as ValidationError};
|