npm - parsefy - Versions diffs - 1.0.3 → 1.1.0 - Mend

parsefy 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md CHANGED Viewed

@@ -36,19 +36,28 @@ const schema = z.object({
   due_date: z.string().optional().describe('Payment due date'),
 });
-const { object, metadata, error } = await client.extract({
+const { object, metadata, verification, error } = await client.extract({
   file: './invoice.pdf',
   schema,
+  enableVerification: true, // Enable math verification
 });
 if (!error && object) {
   console.log(object.invoice_number); // Fully typed!
   // Access field-level confidence and evidence
-  console.log(`Overall confidence: ${metadata.confidenceScore}`);
-  metadata.fieldConfidence.forEach((fc) => {
+  console.log(`Overall confidence: ${metadata.confidence_score}`);
+  metadata.field_confidence.forEach((fc) => {
     console.log(`${fc.field}: ${fc.score} (${fc.reason}) - "${fc.text}"`);
   });
+  // Access verification results if enabled
+  if (verification) {
+    console.log(`Verification status: ${verification.status}`);
+    verification.checks_run.forEach((check) => {
+      console.log(`${check.type}: ${check.passed ? 'PASSED' : 'FAILED'}`);
+    });
+  }
 }
 ```
@@ -102,6 +111,50 @@ const { object, metadata } = await client.extract({
 **Default:** `0.85`
+## Math Verification
+Enable automatic math verification to ensure extracted numeric data is mathematically consistent:
+```typescript
+const { object, verification } = await client.extract({
+  file: './invoice.pdf',
+  schema,
+  enableVerification: true, // Enable math verification
+});
+if (verification) {
+  console.log(`Verification status: ${verification.status}`);
+  console.log(`Checks passed: ${verification.checks_passed}`);
+  console.log(`Checks failed: ${verification.checks_failed}`);
+  verification.checks_run.forEach((check) => {
+    console.log(`${check.type}: ${check.passed ? 'PASSED' : 'FAILED'}`);
+    console.log(`  Fields: ${check.fields.join(', ')}`);
+    console.log(`  Expected: ${check.expected}, Actual: ${check.actual}`);
+    console.log(`  Delta: ${check.delta}`);
+  });
+}
+```
+### Verification Status Values
+| Status | Description |
+|--------|-------------|
+| `PASSED` | All math checks passed |
+| `FAILED` | One or more math checks failed |
+| `PARTIAL` | Some checks passed, some failed or couldn't be verified |
+| `CANNOT_VERIFY` | Required fields are missing (not a math error) |
+| `NO_RULES` | No verifiable fields detected in schema |
+### Supported Verification Rules
+- **HORIZONTAL_SUM**: Verifies `total = subtotal + tax`
+- **VERTICAL_SUM**: Verifies `subtotal = sum(line_items)`
+### Shadow Extraction
+When `enableVerification: true` and only a single verifiable field is requested (e.g., just `total`), Parsefy automatically extracts supporting fields in the background for verification, then removes them from the response.
 ## Response Format
 ```typescript
@@ -111,15 +164,13 @@ interface ExtractResult<T> {
   // Metadata about the extraction
   metadata: {
-    processingTimeMs: number;     // Processing time in milliseconds
-    inputTokens: number;          // Input tokens used
-    outputTokens: number;         // Output tokens generated
+    processing_time_ms: number;     // Processing time in milliseconds
     credits: number;              // Credits consumed (1 credit = 1 page)
-    fallbackTriggered: boolean;   // Whether fallback model was used
+    fallback_triggered: boolean;   // Whether fallback model was used
     // 🆕 Field-level confidence and evidence
-    confidenceScore: number;      // Overall confidence (0.0 to 1.0)
-    fieldConfidence: Array<{
+    confidence_score: number;      // Overall confidence (0.0 to 1.0)
+    field_confidence: Array<{
       field: string;              // JSON path (e.g., "$.invoice_number")
       score: number;              // Confidence score (0.0 to 1.0)
       reason: string;             // "Exact match", "Inferred from header", etc.
@@ -129,6 +180,23 @@ interface ExtractResult<T> {
     issues: string[];             // Warnings or anomalies detected
   };
+  // Math verification results (only present if enableVerification was true)
+  verification?: {
+    status: "PASSED" | "FAILED" | "PARTIAL" | "CANNOT_VERIFY" | "NO_RULES";
+    checks_passed: number;
+    checks_failed: number;
+    cannot_verify_count: number;
+    checks_run: Array<{
+      type: string;              // e.g., "HORIZONTAL_SUM", "VERTICAL_SUM"
+      status: string;
+      fields: string[];
+      passed: boolean;
+      delta: number;
+      expected: number;
+      actual: number;
+    }>;
+  };
   // Error details if extraction failed
   error: {
     code: string;
@@ -140,7 +208,11 @@ interface ExtractResult<T> {
 ### Example Response
 ```typescript
-const { object, metadata } = await client.extract({ file, schema });
+const { object, metadata, verification } = await client.extract({
+  file,
+  schema,
+  enableVerification: true
+});
 // object:
 {
@@ -150,9 +222,9 @@ const { object, metadata } = await client.extract({ file, schema });
   vendor: "Acme Corp"
 }
-// metadata.confidenceScore: 0.94
+// metadata.confidence_score: 0.94
-// metadata.fieldConfidence:
+// metadata.field_confidence:
 [
   { field: "$.invoice_number", score: 0.98, reason: "Exact match", page: 1, text: "Invoice # INV-2024-0042" },
   { field: "$.date", score: 0.95, reason: "Exact match", page: 1, text: "Date: 01/15/2024" },
@@ -161,6 +233,25 @@ const { object, metadata } = await client.extract({ file, schema });
 ]
 // metadata.issues: []
+// verification (only present if enableVerification was true):
+{
+  status: "PASSED",
+  checks_passed: 1,
+  checks_failed: 0,
+  cannot_verify_count: 0,
+  checks_run: [
+    {
+      type: "HORIZONTAL_SUM",
+      status: "PASSED",
+      fields: ["total", "subtotal", "tax"],
+      passed: true,
+      delta: 0.0,
+      expected: 1250.00,
+      actual: 1250.00
+    }
+  ]
+}
 ```
 ## Configuration
@@ -196,6 +287,7 @@ const client = new Parsefy({
 | `file` | `File \| Blob \| Buffer \| string` | required | Document to extract from |
 | `schema` | `z.ZodType` | required | Zod schema defining extraction structure |
 | `confidenceThreshold` | `number` | `0.85` | Minimum confidence before triggering fallback |
+| `enableVerification` | `boolean` | `false` | Enable math verification (includes shadow extraction) |
 ## Usage
@@ -286,8 +378,8 @@ app.post('/extract', upload.single('document'), async (req, res) => {
   res.json({
     data: object,
-    confidence: metadata.confidenceScore,
-    fieldDetails: metadata.fieldConfidence,
+    confidence: metadata.confidence_score,
+    fieldDetails: metadata.field_confidence,
     error,
   });
 });
@@ -313,7 +405,7 @@ app.post('/extract', async (c) => {
   return c.json({
     data: object,
-    confidence: metadata.confidenceScore,
+    confidence: metadata.confidence_score,
     issues: metadata.issues,
     error,
   });
@@ -334,13 +426,13 @@ try {
   // Extraction-level errors (request succeeded, but extraction failed)
   if (error) {
     console.error(`Extraction failed: [${error.code}] ${error.message}`);
-    console.log(`Fallback triggered: ${metadata.fallbackTriggered}`);
+    console.log(`Fallback triggered: ${metadata.fallback_triggered}`);
     console.log(`Issues: ${metadata.issues.join(', ')}`);
     return;
   }
   // Check for low confidence fields
-  const lowConfidence = metadata.fieldConfidence.filter((fc) => fc.score < 0.80);
+  const lowConfidence = metadata.field_confidence.filter((fc) => fc.score < 0.80);
   if (lowConfidence.length > 0) {
     console.warn('Low confidence fields:', lowConfidence);
   }
@@ -392,6 +484,9 @@ import type {
   ExtractResult,
   ExtractionMetadata,
   FieldConfidence,
+  Verification,
+  VerificationStatus,
+  VerificationCheck,
   APIErrorResponse,
 } from 'parsefy';

package/dist/index.cjs CHANGED Viewed

@@ -1,2 +1,2 @@
-'use strict';var zodToJsonSchema=require('zod-to-json-schema');var u=.85,d={".pdf":"application/pdf",".docx":"application/vnd.openxmlformats-officedocument.wordprocessingml.document"},l=10*1024*1024,g="https://api.parsefy.io",x=6e4;var s=class extends Error{constructor(e,r){super(e),this.name="ParsefyError",this.code=r,typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,this.constructor);}},p=class extends s{constructor(e,r,o){super(e),this.name="APIError",this.statusCode=r,this.response=o;}},y=class extends s{constructor(e,r,o){super(e,r),this.name="ExtractionError",this.metadata=o;}},a=class extends s{constructor(e){super(e),this.name="ValidationError";}};function h(){return typeof process<"u"&&process.versions?.node!==void 0}function R(t){let e=zodToJsonSchema.zodToJsonSchema(t,{$refStrategy:"none",target:"jsonSchema7"});return "$schema"in e&&delete e.$schema,e}function b(t){let e=t.toLowerCase().match(/\.[^.]+$/)?.[0];return e&&d[e]||null}function w(t){if(!b(t)){let r=Object.keys(d).join(", ");throw new a(`Unsupported file type. Supported types: ${r}`)}}function m(t){if(t===0)throw new a("File is empty");if(t>l){let e=l/1048576;throw new a(`File size exceeds maximum limit of ${e}MB`)}}function F(t){let e=t._meta||{confidence_score:1,field_confidence:[],issues:[]};return {object:t.object,metadata:{processingTimeMs:t.metadata.processing_time_ms,inputTokens:t.metadata.input_tokens,outputTokens:t.metadata.output_tokens,credits:t.metadata.credits,fallbackTriggered:t.metadata.fallback_triggered,confidenceScore:e.confidence_score,fieldConfidence:e.field_confidence.map(r=>({field:r.field,score:r.score,reason:r.reason,page:r.page,text:r.text})),issues:e.issues},error:t.error}}function T(t,e){let r=b(e)||"application/octet-stream",o=t.buffer.slice(t.byteOffset,t.byteOffset+t.byteLength);return typeof File<"u"?new File([o],e,{type:r}):new Blob([o],{type:r})}async function I(t){if(!h())throw new a("File path strings are only supported in Node.js. Use File or Blob in the browser.");let e=await import('fs'),r=await import('path');if(!e.existsSync(t))throw new a(`File not found: ${t}`);let o=r.basename(t);w(o);let c=e.readFileSync(t);return m(c.length),{buffer:c,filename:o}}async function _(t){if(typeof t=="string"){let{buffer:e,filename:r}=await I(t);return T(e,r)}if(Buffer.isBuffer(t))return m(t.length),T(t,"document.pdf");if(t instanceof File)return w(t.name),m(t.size),t;if(t instanceof Blob)return m(t.size),t;throw new a("Invalid file input. Expected File, Blob, Buffer, or file path string.")}function P(t){return new Promise(e=>setTimeout(e,t))}function S(t,e=1e3){let r=e*Math.pow(2,t),o=Math.random()*.1*r;return Math.min(r+o,3e4)}var E=class{constructor(e){this.maxRetries=3;let r={};if(typeof e=="string"?r={apiKey:e}:e&&(r=e),this.apiKey=r.apiKey||this.getEnvApiKey(),!this.apiKey)throw new a("API key is required. Provide it in the constructor or set the PARSEFY_API_KEY environment variable.");this.baseUrl=r.baseUrl||g,this.timeout=r.timeout||x;}getEnvApiKey(){return h()&&process.env.PARSEFY_API_KEY||""}async extract(e){let{file:r,schema:o,confidenceThreshold:c}=e,n=R(o),f=await _(r),i=new FormData;return i.append("file",f),i.append("output_schema",JSON.stringify(n)),i.append("confidence_threshold",String(c??.85)),this.makeRequestWithRetry(i)}async makeRequestWithRetry(e,r=0){try{return await this.makeRequest(e)}catch(o){if(o instanceof p&&o.statusCode===429&&r<this.maxRetries){let c=S(r);return await P(c),this.makeRequestWithRetry(e,r+1)}throw o}}async makeRequest(e){let r=`${this.baseUrl}/v1/extract`,o=new AbortController,c=setTimeout(()=>o.abort(),this.timeout);try{let n=await fetch(r,{method:"POST",headers:{Authorization:`Bearer ${this.apiKey}`},body:e,signal:o.signal});if(clearTimeout(c),!n.ok){let i=await this.parseErrorResponse(n);throw new p(i.message||`API request failed with status ${n.status}`,n.status,i)}let f;try{f=await n.json();}catch{throw new s("Failed to parse API response as JSON. The API may have returned an invalid response.","PARSE_ERROR")}try{return F(f)}catch(i){throw new s(`Failed to transform API response: ${i instanceof Error?i.message:String(i)}`,"TRANSFORM_ERROR")}}catch(n){throw clearTimeout(c),n instanceof Error&&n.name==="AbortError"?new s(`Request timed out after ${this.timeout}ms`,"TIMEOUT"):n instanceof s?n:n instanceof TypeError&&n.message.includes("fetch")?new s(`Network error: Unable to connect to the Parsefy API. ${n.message}`,"NETWORK_ERROR"):n instanceof TypeError?new s(`Type error: ${n.message}. This may indicate an API response format issue.`,"TYPE_ERROR"):new s(`Unexpected error: ${n instanceof Error?n.message:String(n)}`,"UNKNOWN_ERROR")}}async parseErrorResponse(e){try{return await e.json()}catch{try{return {message:await e.text()||e.statusText}}catch{return {message:e.statusText}}}}};
-exports.APIError=p;exports.DEFAULT_CONFIDENCE_THRESHOLD=u;exports.ExtractionError=y;exports.Parsefy=E;exports.ParsefyError=s;exports.ValidationError=a;
+'use strict';var zodToJsonSchema=require('zod-to-json-schema');var m=.85,l={".pdf":"application/pdf",".docx":"application/vnd.openxmlformats-officedocument.wordprocessingml.document"},y=10*1024*1024,x="https://api.parsefy.io",b=6e4;var s=class extends Error{constructor(t,r){super(t),this.name="ParsefyError",this.code=r,typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,this.constructor);}},f=class extends s{constructor(t,r,n){super(t),this.name="APIError",this.statusCode=r,this.response=n;}},h=class extends s{constructor(t,r,n){super(t,r),this.name="ExtractionError",this.metadata=n;}},i=class extends s{constructor(t){super(t),this.name="ValidationError";}};function g(){return typeof process<"u"&&process.versions?.node!==void 0}function R(e){let t=zodToJsonSchema.zodToJsonSchema(e,{$refStrategy:"none",target:"jsonSchema7"});return "$schema"in t&&delete t.$schema,t}function T(e){let t=e.toLowerCase().match(/\.[^.]+$/)?.[0];return t&&l[t]||null}function w(e){if(!T(e)){let r=Object.keys(l).join(", ");throw new i(`Unsupported file type. Supported types: ${r}`)}}function d(e){if(e===0)throw new i("File is empty");if(e>y){let t=y/1048576;throw new i(`File size exceeds maximum limit of ${t}MB`)}}function F(e){let t=e._meta||{confidence_score:1,field_confidence:[],issues:[]},r={object:e.object,metadata:{processing_time_ms:e.metadata.processing_time_ms,credits:e.metadata.credits,fallback_triggered:e.metadata.fallback_triggered,confidence_score:t.confidence_score,field_confidence:t.field_confidence.map(n=>({field:n.field,score:n.score,reason:n.reason,page:n.page,text:n.text})),issues:t.issues},error:e.error};return e.verification&&(r.verification={status:e.verification.status,checks_passed:e.verification.checks_passed,checks_failed:e.verification.checks_failed,cannot_verify_count:e.verification.cannot_verify_count,checks_run:e.verification.checks_run.map(n=>({type:n.type,status:n.status,fields:n.fields,passed:n.passed,delta:n.delta,expected:n.expected,actual:n.actual}))}),r}function _(e,t){let r=T(t)||"application/octet-stream",n=e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength);return typeof File<"u"?new File([n],t,{type:r}):new Blob([n],{type:r})}async function I(e){if(!g())throw new i("File path strings are only supported in Node.js. Use File or Blob in the browser.");let t=await import('fs'),r=await import('path');if(!t.existsSync(e))throw new i(`File not found: ${e}`);let n=r.basename(e);w(n);let a=t.readFileSync(e);return d(a.length),{buffer:a,filename:n}}async function S(e){if(typeof e=="string"){let{buffer:t,filename:r}=await I(e);return _(t,r)}if(Buffer.isBuffer(e))return d(e.length),_(e,"document.pdf");if(e instanceof File)return w(e.name),d(e.size),e;if(e instanceof Blob)return d(e.size),e;throw new i("Invalid file input. Expected File, Blob, Buffer, or file path string.")}function A(e){return new Promise(t=>setTimeout(t,e))}function P(e,t=1e3){let r=t*Math.pow(2,e),n=Math.random()*.1*r;return Math.min(r+n,3e4)}var E=class{constructor(t){this.maxRetries=3;let r={};if(typeof t=="string"?r={apiKey:t}:t&&(r=t),this.apiKey=r.apiKey||this.getEnvApiKey(),!this.apiKey)throw new i("API key is required. Provide it in the constructor or set the PARSEFY_API_KEY environment variable.");this.baseUrl=r.baseUrl||x,this.timeout=r.timeout||b;}getEnvApiKey(){return g()&&process.env.PARSEFY_API_KEY||""}async extract(t){let{file:r,schema:n,confidenceThreshold:a,enableVerification:o}=t,u=R(n),c=await S(r),p=new FormData;return p.append("file",c),p.append("output_schema",JSON.stringify(u)),p.append("confidence_threshold",String(a??.85)),o!==void 0&&p.append("enable_verification",String(o)),this.makeRequestWithRetry(p)}async makeRequestWithRetry(t,r=0){try{return await this.makeRequest(t)}catch(n){if(n instanceof f&&n.statusCode===429&&r<this.maxRetries){let a=P(r);return await A(a),this.makeRequestWithRetry(t,r+1)}throw n}}async makeRequest(t){let r=`${this.baseUrl}/v1/extract`,n=new AbortController,a=setTimeout(()=>n.abort(),this.timeout);try{let o=await fetch(r,{method:"POST",headers:{Authorization:`Bearer ${this.apiKey}`},body:t,signal:n.signal});if(clearTimeout(a),!o.ok){let c=await this.parseErrorResponse(o);throw new f(c.message||`API request failed with status ${o.status}`,o.status,c)}let u;try{u=await o.json();}catch{throw new s("Failed to parse API response as JSON. The API may have returned an invalid response.","PARSE_ERROR")}try{return F(u)}catch(c){throw new s(`Failed to transform API response: ${c instanceof Error?c.message:String(c)}`,"TRANSFORM_ERROR")}}catch(o){throw clearTimeout(a),o instanceof Error&&o.name==="AbortError"?new s(`Request timed out after ${this.timeout}ms`,"TIMEOUT"):o instanceof s?o:o instanceof TypeError&&o.message.includes("fetch")?new s(`Network error: Unable to connect to the Parsefy API. ${o.message}`,"NETWORK_ERROR"):o instanceof TypeError?new s(`Type error: ${o.message}. This may indicate an API response format issue.`,"TYPE_ERROR"):new s(`Unexpected error: ${o instanceof Error?o.message:String(o)}`,"UNKNOWN_ERROR")}}async parseErrorResponse(t){try{return await t.json()}catch{try{return {message:await t.text()||t.statusText}}catch{return {message:t.statusText}}}}};
+exports.APIError=f;exports.DEFAULT_CONFIDENCE_THRESHOLD=m;exports.ExtractionError=h;exports.Parsefy=E;exports.ParsefyError=s;exports.ValidationError=i;

package/dist/index.d.cts CHANGED Viewed

@@ -37,6 +37,14 @@ interface ExtractOptions<T extends z.ZodType> {
      * appear in all documents. This prevents unnecessary fallback triggers and reduces costs.
      */
     confidenceThreshold?: number;
+    /**
+     * Enable math verification (includes shadow extraction). Defaults to false.
+     *
+     * When enabled, Parsefy automatically verifies mathematical consistency of numeric data
+     * (totals, subtotals, taxes, line items). If only a single verifiable field is requested,
+     * supporting fields are automatically extracted in the background for verification.
+     */
+    enableVerification?: boolean;
 }
 /**
  * Confidence details for a single extracted field.
@@ -59,22 +67,56 @@ interface FieldConfidence {
  */
 interface ExtractionMetadata {
     /** Time taken to process the document in milliseconds. */
-    processingTimeMs: number;
-    /** Number of input tokens used. */
-    inputTokens: number;
-    /** Number of output tokens generated. */
-    outputTokens: number;
+    processing_time_ms: number;
     /** Number of credits consumed (1 credit = 1 page). */
     credits: number;
     /** Whether the fallback model was triggered for higher accuracy. */
-    fallbackTriggered: boolean;
+    fallback_triggered: boolean;
     /** Overall confidence score for the extraction (0.0 to 1.0). */
-    confidenceScore: number;
+    confidence_score: number;
     /** Per-field confidence details with evidence and explanations. */
-    fieldConfidence: FieldConfidence[];
+    field_confidence: FieldConfidence[];
     /** List of issues or warnings encountered during extraction. */
     issues: string[];
 }
+/**
+ * Verification status values.
+ */
+type VerificationStatus = 'PASSED' | 'FAILED' | 'PARTIAL' | 'CANNOT_VERIFY' | 'NO_RULES';
+/**
+ * Individual verification check result.
+ */
+interface VerificationCheck {
+    /** Type of verification check (e.g., "HORIZONTAL_SUM", "VERTICAL_SUM"). */
+    type: string;
+    /** Status of this check. */
+    status: string;
+    /** Fields involved in this check. */
+    fields: string[];
+    /** Whether this check passed. */
+    passed: boolean;
+    /** Difference between expected and actual values. */
+    delta: number;
+    /** Expected value based on the rule. */
+    expected: number;
+    /** Actual extracted value. */
+    actual: number;
+}
+/**
+ * Math verification results.
+ */
+interface Verification {
+    /** Overall verification status. */
+    status: VerificationStatus;
+    /** Number of checks that passed. */
+    checks_passed: number;
+    /** Number of checks that failed. */
+    checks_failed: number;
+    /** Number of checks that could not be verified. */
+    cannot_verify_count: number;
+    /** Detailed results for each check that was run. */
+    checks_run: VerificationCheck[];
+}
 /**
  * Error response from the API.
  */
@@ -92,6 +134,8 @@ interface ExtractResult<T> {
     object: T | null;
     /** Metadata about the extraction process. */
     metadata: ExtractionMetadata;
+    /** Math verification results (only present if enableVerification was true). */
+    verification?: Verification;
     /** Error details if extraction failed, or null on success. */
     error: APIErrorResponse | null;
 }
@@ -126,7 +170,7 @@ interface ExtractResult<T> {
  * });
  *
  * // Check per-field confidence and evidence
- * metadata.fieldConfidence.forEach((fc) => {
+ * metadata.field_confidence.forEach((fc) => {
  *   console.log(`${fc.field}: ${fc.score} - "${fc.text}"`);
  * });
  * ```
@@ -184,20 +228,29 @@ declare class Parsefy {
      *   due_date: z.string().optional().describe('Payment due date'),
      * });
      *
-     * const { object, metadata, error } = await client.extract({
+     * const { object, metadata, verification, error } = await client.extract({
      *   file: './invoice.pdf',
      *   schema,
      *   confidenceThreshold: 0.85, // Lower = faster, Higher = more accurate
+     *   enableVerification: true, // Enable math verification
      * });
      *
      * if (!error && object) {
      *   console.log(object.invoice_number); // Fully typed!
      *
      *   // Access field-level confidence and evidence
-     *   console.log(`Overall confidence: ${metadata.confidenceScore}`);
-     *   metadata.fieldConfidence.forEach((fc) => {
+     *   console.log(`Overall confidence: ${metadata.confidence_score}`);
+     *   metadata.field_confidence.forEach((fc) => {
      *     console.log(`${fc.field}: ${fc.score} (${fc.reason}) - "${fc.text}"`);
      *   });
+     *
+     *   // Access verification results if enabled
+     *   if (verification) {
+     *     console.log(`Verification status: ${verification.status}`);
+     *     verification.checks_run.forEach((check) => {
+     *       console.log(`${check.type}: ${check.passed ? 'PASSED' : 'FAILED'}`);
+     *     });
+     *   }
      * }
      * ```
      */
@@ -250,4 +303,4 @@ declare class ValidationError extends ParsefyError {
     constructor(message: string);
 }
-export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
+export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError, type Verification, type VerificationCheck, type VerificationStatus };

package/dist/index.d.mts CHANGED Viewed

@@ -37,6 +37,14 @@ interface ExtractOptions<T extends z.ZodType> {
      * appear in all documents. This prevents unnecessary fallback triggers and reduces costs.
      */
     confidenceThreshold?: number;
+    /**
+     * Enable math verification (includes shadow extraction). Defaults to false.
+     *
+     * When enabled, Parsefy automatically verifies mathematical consistency of numeric data
+     * (totals, subtotals, taxes, line items). If only a single verifiable field is requested,
+     * supporting fields are automatically extracted in the background for verification.
+     */
+    enableVerification?: boolean;
 }
 /**
  * Confidence details for a single extracted field.
@@ -59,22 +67,56 @@ interface FieldConfidence {
  */
 interface ExtractionMetadata {
     /** Time taken to process the document in milliseconds. */
-    processingTimeMs: number;
-    /** Number of input tokens used. */
-    inputTokens: number;
-    /** Number of output tokens generated. */
-    outputTokens: number;
+    processing_time_ms: number;
     /** Number of credits consumed (1 credit = 1 page). */
     credits: number;
     /** Whether the fallback model was triggered for higher accuracy. */
-    fallbackTriggered: boolean;
+    fallback_triggered: boolean;
     /** Overall confidence score for the extraction (0.0 to 1.0). */
-    confidenceScore: number;
+    confidence_score: number;
     /** Per-field confidence details with evidence and explanations. */
-    fieldConfidence: FieldConfidence[];
+    field_confidence: FieldConfidence[];
     /** List of issues or warnings encountered during extraction. */
     issues: string[];
 }
+/**
+ * Verification status values.
+ */
+type VerificationStatus = 'PASSED' | 'FAILED' | 'PARTIAL' | 'CANNOT_VERIFY' | 'NO_RULES';
+/**
+ * Individual verification check result.
+ */
+interface VerificationCheck {
+    /** Type of verification check (e.g., "HORIZONTAL_SUM", "VERTICAL_SUM"). */
+    type: string;
+    /** Status of this check. */
+    status: string;
+    /** Fields involved in this check. */
+    fields: string[];
+    /** Whether this check passed. */
+    passed: boolean;
+    /** Difference between expected and actual values. */
+    delta: number;
+    /** Expected value based on the rule. */
+    expected: number;
+    /** Actual extracted value. */
+    actual: number;
+}
+/**
+ * Math verification results.
+ */
+interface Verification {
+    /** Overall verification status. */
+    status: VerificationStatus;
+    /** Number of checks that passed. */
+    checks_passed: number;
+    /** Number of checks that failed. */
+    checks_failed: number;
+    /** Number of checks that could not be verified. */
+    cannot_verify_count: number;
+    /** Detailed results for each check that was run. */
+    checks_run: VerificationCheck[];
+}
 /**
  * Error response from the API.
  */
@@ -92,6 +134,8 @@ interface ExtractResult<T> {
     object: T | null;
     /** Metadata about the extraction process. */
     metadata: ExtractionMetadata;
+    /** Math verification results (only present if enableVerification was true). */
+    verification?: Verification;
     /** Error details if extraction failed, or null on success. */
     error: APIErrorResponse | null;
 }
@@ -126,7 +170,7 @@ interface ExtractResult<T> {
  * });
  *
  * // Check per-field confidence and evidence
- * metadata.fieldConfidence.forEach((fc) => {
+ * metadata.field_confidence.forEach((fc) => {
  *   console.log(`${fc.field}: ${fc.score} - "${fc.text}"`);
  * });
  * ```
@@ -184,20 +228,29 @@ declare class Parsefy {
      *   due_date: z.string().optional().describe('Payment due date'),
      * });
      *
-     * const { object, metadata, error } = await client.extract({
+     * const { object, metadata, verification, error } = await client.extract({
      *   file: './invoice.pdf',
      *   schema,
      *   confidenceThreshold: 0.85, // Lower = faster, Higher = more accurate
+     *   enableVerification: true, // Enable math verification
      * });
      *
      * if (!error && object) {
      *   console.log(object.invoice_number); // Fully typed!
      *
      *   // Access field-level confidence and evidence
-     *   console.log(`Overall confidence: ${metadata.confidenceScore}`);
-     *   metadata.fieldConfidence.forEach((fc) => {
+     *   console.log(`Overall confidence: ${metadata.confidence_score}`);
+     *   metadata.field_confidence.forEach((fc) => {
      *     console.log(`${fc.field}: ${fc.score} (${fc.reason}) - "${fc.text}"`);
      *   });
+     *
+     *   // Access verification results if enabled
+     *   if (verification) {
+     *     console.log(`Verification status: ${verification.status}`);
+     *     verification.checks_run.forEach((check) => {
+     *       console.log(`${check.type}: ${check.passed ? 'PASSED' : 'FAILED'}`);
+     *     });
+     *   }
      * }
      * ```
      */
@@ -250,4 +303,4 @@ declare class ValidationError extends ParsefyError {
     constructor(message: string);
 }
-export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
+export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError, type Verification, type VerificationCheck, type VerificationStatus };

package/dist/index.d.ts CHANGED Viewed

@@ -37,6 +37,14 @@ interface ExtractOptions<T extends z.ZodType> {
      * appear in all documents. This prevents unnecessary fallback triggers and reduces costs.
      */
     confidenceThreshold?: number;
+    /**
+     * Enable math verification (includes shadow extraction). Defaults to false.
+     *
+     * When enabled, Parsefy automatically verifies mathematical consistency of numeric data
+     * (totals, subtotals, taxes, line items). If only a single verifiable field is requested,
+     * supporting fields are automatically extracted in the background for verification.
+     */
+    enableVerification?: boolean;
 }
 /**
  * Confidence details for a single extracted field.
@@ -59,22 +67,56 @@ interface FieldConfidence {
  */
 interface ExtractionMetadata {
     /** Time taken to process the document in milliseconds. */
-    processingTimeMs: number;
-    /** Number of input tokens used. */
-    inputTokens: number;
-    /** Number of output tokens generated. */
-    outputTokens: number;
+    processing_time_ms: number;
     /** Number of credits consumed (1 credit = 1 page). */
     credits: number;
     /** Whether the fallback model was triggered for higher accuracy. */
-    fallbackTriggered: boolean;
+    fallback_triggered: boolean;
     /** Overall confidence score for the extraction (0.0 to 1.0). */
-    confidenceScore: number;
+    confidence_score: number;
     /** Per-field confidence details with evidence and explanations. */
-    fieldConfidence: FieldConfidence[];
+    field_confidence: FieldConfidence[];
     /** List of issues or warnings encountered during extraction. */
     issues: string[];
 }
+/**
+ * Verification status values.
+ */
+type VerificationStatus = 'PASSED' | 'FAILED' | 'PARTIAL' | 'CANNOT_VERIFY' | 'NO_RULES';
+/**
+ * Individual verification check result.
+ */
+interface VerificationCheck {
+    /** Type of verification check (e.g., "HORIZONTAL_SUM", "VERTICAL_SUM"). */
+    type: string;
+    /** Status of this check. */
+    status: string;
+    /** Fields involved in this check. */
+    fields: string[];
+    /** Whether this check passed. */
+    passed: boolean;
+    /** Difference between expected and actual values. */
+    delta: number;
+    /** Expected value based on the rule. */
+    expected: number;
+    /** Actual extracted value. */
+    actual: number;
+}
+/**
+ * Math verification results.
+ */
+interface Verification {
+    /** Overall verification status. */
+    status: VerificationStatus;
+    /** Number of checks that passed. */
+    checks_passed: number;
+    /** Number of checks that failed. */
+    checks_failed: number;
+    /** Number of checks that could not be verified. */
+    cannot_verify_count: number;
+    /** Detailed results for each check that was run. */
+    checks_run: VerificationCheck[];
+}
 /**
  * Error response from the API.
  */
@@ -92,6 +134,8 @@ interface ExtractResult<T> {
     object: T | null;
     /** Metadata about the extraction process. */
     metadata: ExtractionMetadata;
+    /** Math verification results (only present if enableVerification was true). */
+    verification?: Verification;
     /** Error details if extraction failed, or null on success. */
     error: APIErrorResponse | null;
 }
@@ -126,7 +170,7 @@ interface ExtractResult<T> {
  * });
  *
  * // Check per-field confidence and evidence
- * metadata.fieldConfidence.forEach((fc) => {
+ * metadata.field_confidence.forEach((fc) => {
  *   console.log(`${fc.field}: ${fc.score} - "${fc.text}"`);
  * });
  * ```
@@ -184,20 +228,29 @@ declare class Parsefy {
      *   due_date: z.string().optional().describe('Payment due date'),
      * });
      *
-     * const { object, metadata, error } = await client.extract({
+     * const { object, metadata, verification, error } = await client.extract({
      *   file: './invoice.pdf',
      *   schema,
      *   confidenceThreshold: 0.85, // Lower = faster, Higher = more accurate
+     *   enableVerification: true, // Enable math verification
      * });
      *
      * if (!error && object) {
      *   console.log(object.invoice_number); // Fully typed!
      *
      *   // Access field-level confidence and evidence
-     *   console.log(`Overall confidence: ${metadata.confidenceScore}`);
-     *   metadata.fieldConfidence.forEach((fc) => {
+     *   console.log(`Overall confidence: ${metadata.confidence_score}`);
+     *   metadata.field_confidence.forEach((fc) => {
      *     console.log(`${fc.field}: ${fc.score} (${fc.reason}) - "${fc.text}"`);
      *   });
+     *
+     *   // Access verification results if enabled
+     *   if (verification) {
+     *     console.log(`Verification status: ${verification.status}`);
+     *     verification.checks_run.forEach((check) => {
+     *       console.log(`${check.type}: ${check.passed ? 'PASSED' : 'FAILED'}`);
+     *     });
+     *   }
      * }
      * ```
      */
@@ -250,4 +303,4 @@ declare class ValidationError extends ParsefyError {
     constructor(message: string);
 }
-export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
+export { APIError, type APIErrorResponse, DEFAULT_CONFIDENCE_THRESHOLD, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, type FieldConfidence, Parsefy, type ParsefyConfig, ParsefyError, ValidationError, type Verification, type VerificationCheck, type VerificationStatus };

package/dist/index.mjs CHANGED Viewed

@@ -1,2 +1,2 @@
-import {zodToJsonSchema}from'zod-to-json-schema';var u=.85,d={".pdf":"application/pdf",".docx":"application/vnd.openxmlformats-officedocument.wordprocessingml.document"},l=10*1024*1024,g="https://api.parsefy.io",x=6e4;var s=class extends Error{constructor(e,r){super(e),this.name="ParsefyError",this.code=r,typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,this.constructor);}},p=class extends s{constructor(e,r,o){super(e),this.name="APIError",this.statusCode=r,this.response=o;}},y=class extends s{constructor(e,r,o){super(e,r),this.name="ExtractionError",this.metadata=o;}},a=class extends s{constructor(e){super(e),this.name="ValidationError";}};function h(){return typeof process<"u"&&process.versions?.node!==void 0}function R(t){let e=zodToJsonSchema(t,{$refStrategy:"none",target:"jsonSchema7"});return "$schema"in e&&delete e.$schema,e}function b(t){let e=t.toLowerCase().match(/\.[^.]+$/)?.[0];return e&&d[e]||null}function w(t){if(!b(t)){let r=Object.keys(d).join(", ");throw new a(`Unsupported file type. Supported types: ${r}`)}}function m(t){if(t===0)throw new a("File is empty");if(t>l){let e=l/1048576;throw new a(`File size exceeds maximum limit of ${e}MB`)}}function F(t){let e=t._meta||{confidence_score:1,field_confidence:[],issues:[]};return {object:t.object,metadata:{processingTimeMs:t.metadata.processing_time_ms,inputTokens:t.metadata.input_tokens,outputTokens:t.metadata.output_tokens,credits:t.metadata.credits,fallbackTriggered:t.metadata.fallback_triggered,confidenceScore:e.confidence_score,fieldConfidence:e.field_confidence.map(r=>({field:r.field,score:r.score,reason:r.reason,page:r.page,text:r.text})),issues:e.issues},error:t.error}}function T(t,e){let r=b(e)||"application/octet-stream",o=t.buffer.slice(t.byteOffset,t.byteOffset+t.byteLength);return typeof File<"u"?new File([o],e,{type:r}):new Blob([o],{type:r})}async function I(t){if(!h())throw new a("File path strings are only supported in Node.js. Use File or Blob in the browser.");let e=await import('fs'),r=await import('path');if(!e.existsSync(t))throw new a(`File not found: ${t}`);let o=r.basename(t);w(o);let c=e.readFileSync(t);return m(c.length),{buffer:c,filename:o}}async function _(t){if(typeof t=="string"){let{buffer:e,filename:r}=await I(t);return T(e,r)}if(Buffer.isBuffer(t))return m(t.length),T(t,"document.pdf");if(t instanceof File)return w(t.name),m(t.size),t;if(t instanceof Blob)return m(t.size),t;throw new a("Invalid file input. Expected File, Blob, Buffer, or file path string.")}function P(t){return new Promise(e=>setTimeout(e,t))}function S(t,e=1e3){let r=e*Math.pow(2,t),o=Math.random()*.1*r;return Math.min(r+o,3e4)}var E=class{constructor(e){this.maxRetries=3;let r={};if(typeof e=="string"?r={apiKey:e}:e&&(r=e),this.apiKey=r.apiKey||this.getEnvApiKey(),!this.apiKey)throw new a("API key is required. Provide it in the constructor or set the PARSEFY_API_KEY environment variable.");this.baseUrl=r.baseUrl||g,this.timeout=r.timeout||x;}getEnvApiKey(){return h()&&process.env.PARSEFY_API_KEY||""}async extract(e){let{file:r,schema:o,confidenceThreshold:c}=e,n=R(o),f=await _(r),i=new FormData;return i.append("file",f),i.append("output_schema",JSON.stringify(n)),i.append("confidence_threshold",String(c??.85)),this.makeRequestWithRetry(i)}async makeRequestWithRetry(e,r=0){try{return await this.makeRequest(e)}catch(o){if(o instanceof p&&o.statusCode===429&&r<this.maxRetries){let c=S(r);return await P(c),this.makeRequestWithRetry(e,r+1)}throw o}}async makeRequest(e){let r=`${this.baseUrl}/v1/extract`,o=new AbortController,c=setTimeout(()=>o.abort(),this.timeout);try{let n=await fetch(r,{method:"POST",headers:{Authorization:`Bearer ${this.apiKey}`},body:e,signal:o.signal});if(clearTimeout(c),!n.ok){let i=await this.parseErrorResponse(n);throw new p(i.message||`API request failed with status ${n.status}`,n.status,i)}let f;try{f=await n.json();}catch{throw new s("Failed to parse API response as JSON. The API may have returned an invalid response.","PARSE_ERROR")}try{return F(f)}catch(i){throw new s(`Failed to transform API response: ${i instanceof Error?i.message:String(i)}`,"TRANSFORM_ERROR")}}catch(n){throw clearTimeout(c),n instanceof Error&&n.name==="AbortError"?new s(`Request timed out after ${this.timeout}ms`,"TIMEOUT"):n instanceof s?n:n instanceof TypeError&&n.message.includes("fetch")?new s(`Network error: Unable to connect to the Parsefy API. ${n.message}`,"NETWORK_ERROR"):n instanceof TypeError?new s(`Type error: ${n.message}. This may indicate an API response format issue.`,"TYPE_ERROR"):new s(`Unexpected error: ${n instanceof Error?n.message:String(n)}`,"UNKNOWN_ERROR")}}async parseErrorResponse(e){try{return await e.json()}catch{try{return {message:await e.text()||e.statusText}}catch{return {message:e.statusText}}}}};
-export{p as APIError,u as DEFAULT_CONFIDENCE_THRESHOLD,y as ExtractionError,E as Parsefy,s as ParsefyError,a as ValidationError};
+import {zodToJsonSchema}from'zod-to-json-schema';var m=.85,l={".pdf":"application/pdf",".docx":"application/vnd.openxmlformats-officedocument.wordprocessingml.document"},y=10*1024*1024,x="https://api.parsefy.io",b=6e4;var s=class extends Error{constructor(t,r){super(t),this.name="ParsefyError",this.code=r,typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,this.constructor);}},f=class extends s{constructor(t,r,n){super(t),this.name="APIError",this.statusCode=r,this.response=n;}},h=class extends s{constructor(t,r,n){super(t,r),this.name="ExtractionError",this.metadata=n;}},i=class extends s{constructor(t){super(t),this.name="ValidationError";}};function g(){return typeof process<"u"&&process.versions?.node!==void 0}function R(e){let t=zodToJsonSchema(e,{$refStrategy:"none",target:"jsonSchema7"});return "$schema"in t&&delete t.$schema,t}function T(e){let t=e.toLowerCase().match(/\.[^.]+$/)?.[0];return t&&l[t]||null}function w(e){if(!T(e)){let r=Object.keys(l).join(", ");throw new i(`Unsupported file type. Supported types: ${r}`)}}function d(e){if(e===0)throw new i("File is empty");if(e>y){let t=y/1048576;throw new i(`File size exceeds maximum limit of ${t}MB`)}}function F(e){let t=e._meta||{confidence_score:1,field_confidence:[],issues:[]},r={object:e.object,metadata:{processing_time_ms:e.metadata.processing_time_ms,credits:e.metadata.credits,fallback_triggered:e.metadata.fallback_triggered,confidence_score:t.confidence_score,field_confidence:t.field_confidence.map(n=>({field:n.field,score:n.score,reason:n.reason,page:n.page,text:n.text})),issues:t.issues},error:e.error};return e.verification&&(r.verification={status:e.verification.status,checks_passed:e.verification.checks_passed,checks_failed:e.verification.checks_failed,cannot_verify_count:e.verification.cannot_verify_count,checks_run:e.verification.checks_run.map(n=>({type:n.type,status:n.status,fields:n.fields,passed:n.passed,delta:n.delta,expected:n.expected,actual:n.actual}))}),r}function _(e,t){let r=T(t)||"application/octet-stream",n=e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength);return typeof File<"u"?new File([n],t,{type:r}):new Blob([n],{type:r})}async function I(e){if(!g())throw new i("File path strings are only supported in Node.js. Use File or Blob in the browser.");let t=await import('fs'),r=await import('path');if(!t.existsSync(e))throw new i(`File not found: ${e}`);let n=r.basename(e);w(n);let a=t.readFileSync(e);return d(a.length),{buffer:a,filename:n}}async function S(e){if(typeof e=="string"){let{buffer:t,filename:r}=await I(e);return _(t,r)}if(Buffer.isBuffer(e))return d(e.length),_(e,"document.pdf");if(e instanceof File)return w(e.name),d(e.size),e;if(e instanceof Blob)return d(e.size),e;throw new i("Invalid file input. Expected File, Blob, Buffer, or file path string.")}function A(e){return new Promise(t=>setTimeout(t,e))}function P(e,t=1e3){let r=t*Math.pow(2,e),n=Math.random()*.1*r;return Math.min(r+n,3e4)}var E=class{constructor(t){this.maxRetries=3;let r={};if(typeof t=="string"?r={apiKey:t}:t&&(r=t),this.apiKey=r.apiKey||this.getEnvApiKey(),!this.apiKey)throw new i("API key is required. Provide it in the constructor or set the PARSEFY_API_KEY environment variable.");this.baseUrl=r.baseUrl||x,this.timeout=r.timeout||b;}getEnvApiKey(){return g()&&process.env.PARSEFY_API_KEY||""}async extract(t){let{file:r,schema:n,confidenceThreshold:a,enableVerification:o}=t,u=R(n),c=await S(r),p=new FormData;return p.append("file",c),p.append("output_schema",JSON.stringify(u)),p.append("confidence_threshold",String(a??.85)),o!==void 0&&p.append("enable_verification",String(o)),this.makeRequestWithRetry(p)}async makeRequestWithRetry(t,r=0){try{return await this.makeRequest(t)}catch(n){if(n instanceof f&&n.statusCode===429&&r<this.maxRetries){let a=P(r);return await A(a),this.makeRequestWithRetry(t,r+1)}throw n}}async makeRequest(t){let r=`${this.baseUrl}/v1/extract`,n=new AbortController,a=setTimeout(()=>n.abort(),this.timeout);try{let o=await fetch(r,{method:"POST",headers:{Authorization:`Bearer ${this.apiKey}`},body:t,signal:n.signal});if(clearTimeout(a),!o.ok){let c=await this.parseErrorResponse(o);throw new f(c.message||`API request failed with status ${o.status}`,o.status,c)}let u;try{u=await o.json();}catch{throw new s("Failed to parse API response as JSON. The API may have returned an invalid response.","PARSE_ERROR")}try{return F(u)}catch(c){throw new s(`Failed to transform API response: ${c instanceof Error?c.message:String(c)}`,"TRANSFORM_ERROR")}}catch(o){throw clearTimeout(a),o instanceof Error&&o.name==="AbortError"?new s(`Request timed out after ${this.timeout}ms`,"TIMEOUT"):o instanceof s?o:o instanceof TypeError&&o.message.includes("fetch")?new s(`Network error: Unable to connect to the Parsefy API. ${o.message}`,"NETWORK_ERROR"):o instanceof TypeError?new s(`Type error: ${o.message}. This may indicate an API response format issue.`,"TYPE_ERROR"):new s(`Unexpected error: ${o instanceof Error?o.message:String(o)}`,"UNKNOWN_ERROR")}}async parseErrorResponse(t){try{return await t.json()}catch{try{return {message:await t.text()||t.statusText}}catch{return {message:t.statusText}}}}};
+export{f as APIError,m as DEFAULT_CONFIDENCE_THRESHOLD,h as ExtractionError,E as Parsefy,s as ParsefyError,i as ValidationError};

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "parsefy",
-  "version": "1.0.3",
+  "version": "1.1.0",
   "description": "Official TypeScript SDK for Parsefy - Financial Document Infrastructure for Developers",
   "author": "",
   "license": "MIT",