parsefy 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Parsefy
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
package/README.md ADDED
@@ -0,0 +1,307 @@
1
+ # Parsefy
2
+
3
+ Official TypeScript SDK for [Parsefy](https://parsefy.io) – AI-powered document data extraction.
4
+
5
+ Extract structured data from PDFs and DOCX files using Zod schemas with full TypeScript type inference.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ npm install parsefy zod
11
+ ```
12
+
13
+ ## Quick Start
14
+
15
+ ```typescript
16
+ import { Parsefy } from 'parsefy';
17
+ import * as z from 'zod';
18
+
19
+ const client = new Parsefy('pk_your_api_key');
20
+
21
+ const schema = z.object({
22
+ invoice_number: z.string().describe('The invoice number'),
23
+ date: z.string().describe('Invoice date in YYYY-MM-DD format'),
24
+ total: z.number().describe('Total amount'),
25
+ });
26
+
27
+ const { object, error } = await client.extract({
28
+ file: './invoice.pdf',
29
+ schema,
30
+ });
31
+
32
+ if (!error && object) {
33
+ console.log(object.invoice_number); // Fully typed!
34
+ }
35
+ ```
36
+
37
+ ## Configuration
38
+
39
+ ### API Key
40
+
41
+ ```typescript
42
+ // Option 1: Pass API key directly
43
+ const client = new Parsefy('pk_your_api_key');
44
+
45
+ // Option 2: Use environment variable
46
+ // Set PARSEFY_API_KEY in your environment
47
+ const client = new Parsefy();
48
+
49
+ // Option 3: Configuration object
50
+ const client = new Parsefy({
51
+ apiKey: 'pk_your_api_key',
52
+ timeout: 120000, // 2 minutes
53
+ });
54
+ ```
55
+
56
+ ### Configuration Options
57
+
58
+ | Option | Type | Default | Description |
59
+ |--------|------|---------|-------------|
60
+ | `apiKey` | `string` | `process.env.PARSEFY_API_KEY` | Your Parsefy API key |
61
+ | `timeout` | `number` | `60000` | Request timeout in ms |
62
+
63
+ ## Usage
64
+
65
+ ### Basic Extraction
66
+
67
+ ```typescript
68
+ import { Parsefy } from 'parsefy';
69
+ import * as z from 'zod';
70
+
71
+ const client = new Parsefy();
72
+
73
+ const schema = z.object({
74
+ name: z.string(),
75
+ email: z.string().email(),
76
+ phone: z.string().optional(),
77
+ });
78
+
79
+ const { object, metadata, error } = await client.extract({
80
+ file: './contact.pdf',
81
+ schema,
82
+ });
83
+
84
+ if (!error) {
85
+ console.log(object);
86
+ }
87
+ ```
88
+
89
+ ### File Input Options
90
+
91
+ The SDK supports multiple file input types. **Files don't need to be on disk** – you can work entirely in memory, which is ideal for building APIs and serverless functions.
92
+
93
+ | Input Type | Usage | Environment |
94
+ |------------|-------|-------------|
95
+ | `string` | File path | Node.js only |
96
+ | `Buffer` | In-memory bytes | Node.js |
97
+ | `File` | From file input or FormData | Browser, Node.js 20+, Edge |
98
+ | `Blob` | Raw binary with MIME type | Universal |
99
+ | `ArrayBuffer` | Wrap in `Blob` first | Universal |
100
+
101
+ ```typescript
102
+ // Node.js: File path (convenience for scripts/CLI)
103
+ const result = await client.extract({
104
+ file: './document.pdf',
105
+ schema,
106
+ });
107
+
108
+ // Node.js: Buffer (in-memory)
109
+ import { readFileSync } from 'fs';
110
+ const result = await client.extract({
111
+ file: readFileSync('./document.pdf'),
112
+ schema,
113
+ });
114
+
115
+ // Browser: File input
116
+ const fileInput = document.querySelector('input[type="file"]');
117
+ const result = await client.extract({
118
+ file: fileInput.files[0],
119
+ schema,
120
+ });
121
+
122
+ // Universal: Blob (with explicit MIME type)
123
+ const result = await client.extract({
124
+ file: new Blob([arrayBuffer], { type: 'application/pdf' }),
125
+ schema,
126
+ });
127
+ ```
128
+
129
+ ### Server-Side / API Usage
130
+
131
+ When building APIs that receive file uploads, files are typically kept in memory. The SDK handles this seamlessly:
132
+
133
+ **Express with Multer:**
134
+
135
+ ```typescript
136
+ import express from 'express';
137
+ import multer from 'multer';
138
+ import { Parsefy } from 'parsefy';
139
+
140
+ const upload = multer(); // Store in memory, not disk
141
+ const client = new Parsefy();
142
+
143
+ app.post('/extract', upload.single('document'), async (req, res) => {
144
+ const { object, error } = await client.extract({
145
+ file: req.file.buffer, // Buffer from multer
146
+ schema,
147
+ });
148
+ res.json({ data: object, error });
149
+ });
150
+ ```
151
+
152
+ **Fastify:**
153
+
154
+ ```typescript
155
+ import Fastify from 'fastify';
156
+ import multipart from '@fastify/multipart';
157
+ import { Parsefy } from 'parsefy';
158
+
159
+ const fastify = Fastify();
160
+ await fastify.register(multipart);
161
+ const client = new Parsefy();
162
+
163
+ fastify.post('/extract', async (request) => {
164
+ const data = await request.file();
165
+ const buffer = await data.toBuffer();
166
+
167
+ const { object, error } = await client.extract({
168
+ file: buffer,
169
+ schema,
170
+ });
171
+ return { data: object, error };
172
+ });
173
+ ```
174
+
175
+ **Hono / Cloudflare Workers:**
176
+
177
+ ```typescript
178
+ import { Hono } from 'hono';
179
+ import { Parsefy } from 'parsefy';
180
+
181
+ const app = new Hono();
182
+ const client = new Parsefy();
183
+
184
+ app.post('/extract', async (c) => {
185
+ const formData = await c.req.formData();
186
+ const file = formData.get('document'); // File object
187
+
188
+ const { object, error } = await client.extract({
189
+ file, // File from FormData works directly
190
+ schema,
191
+ });
192
+ return c.json({ data: object, error });
193
+ });
194
+ ```
195
+
196
+ ### Complex Schemas
197
+
198
+ Use `.describe()` to guide the AI extraction:
199
+
200
+ ```typescript
201
+ const invoiceSchema = z.object({
202
+ invoice_number: z.string().describe('The invoice or receipt number'),
203
+ date: z.string().describe('Invoice date in YYYY-MM-DD format'),
204
+ vendor: z.object({
205
+ name: z.string().describe('Company name of the vendor'),
206
+ address: z.string().describe('Full address of the vendor'),
207
+ }),
208
+ line_items: z.array(z.object({
209
+ description: z.string().describe('Item description'),
210
+ quantity: z.number().describe('Number of units'),
211
+ unit_price: z.number().describe('Price per unit'),
212
+ amount: z.number().describe('Total amount for this line'),
213
+ })).describe('List of items on the invoice'),
214
+ subtotal: z.number().describe('Subtotal before tax'),
215
+ tax: z.number().describe('Tax amount'),
216
+ total: z.number().describe('Total amount due'),
217
+ currency: z.string().describe('3-letter currency code (USD, EUR, etc.)'),
218
+ });
219
+
220
+ const { object } = await client.extract({
221
+ file: './invoice.pdf',
222
+ schema: invoiceSchema,
223
+ });
224
+ ```
225
+
226
+ ### Error Handling
227
+
228
+ ```typescript
229
+ import { Parsefy, APIError, ValidationError, ParsefyError } from 'parsefy';
230
+
231
+ try {
232
+ const { object, error, metadata } = await client.extract({
233
+ file: './document.pdf',
234
+ schema,
235
+ });
236
+
237
+ // Extraction-level errors (request succeeded, but extraction failed)
238
+ if (error) {
239
+ console.error(`Extraction failed: [${error.code}] ${error.message}`);
240
+ console.log(`Tokens used: ${metadata.inputTokens} in, ${metadata.outputTokens} out`);
241
+ return;
242
+ }
243
+
244
+ console.log('Success:', object);
245
+ } catch (err) {
246
+ // HTTP/Network errors
247
+ if (err instanceof APIError) {
248
+ console.error(`API Error ${err.statusCode}: ${err.message}`);
249
+ } else if (err instanceof ValidationError) {
250
+ console.error(`Validation Error: ${err.message}`);
251
+ } else if (err instanceof ParsefyError) {
252
+ console.error(`Parsefy Error: ${err.message}`);
253
+ }
254
+ }
255
+ ```
256
+
257
+ ## Response Format
258
+
259
+ ```typescript
260
+ interface ExtractResult<T> {
261
+ // Extracted data matching your schema, or null if extraction failed
262
+ object: T | null;
263
+
264
+ // Metadata about the extraction
265
+ metadata: {
266
+ processingTimeMs: number; // Processing time in milliseconds
267
+ inputTokens: number; // Input tokens used
268
+ outputTokens: number; // Output tokens generated
269
+ credits: number; // Credits consumed (1 credit = 1 page)
270
+ fallbackTriggered: boolean; // Whether fallback model was used
271
+ };
272
+
273
+ // Error details if extraction failed
274
+ error: {
275
+ code: string; // EXTRACTION_FAILED, LLM_ERROR, PARSING_ERROR, TIMEOUT_ERROR
276
+ message: string;
277
+ } | null;
278
+ }
279
+ ```
280
+
281
+ ## Error Types
282
+
283
+ | Error Class | Description |
284
+ |-------------|-------------|
285
+ | `ParsefyError` | Base error class for all Parsefy errors |
286
+ | `APIError` | HTTP errors (4xx/5xx responses) |
287
+ | `ExtractionError` | Extraction failed (returned in response) |
288
+ | `ValidationError` | Client-side validation errors |
289
+
290
+ ## Supported File Types
291
+
292
+ - **PDF** (`.pdf`) – up to 10MB
293
+ - **DOCX** (`.docx`) – up to 10MB
294
+
295
+ ## Rate Limits
296
+
297
+ The API allows 1 request per second. The SDK automatically retries with exponential backoff on rate limit errors (HTTP 429).
298
+
299
+ ## Requirements
300
+
301
+ - Node.js 18+ (for native `fetch` and `FormData`)
302
+ - Zod 3.x (peer dependency)
303
+
304
+ ## License
305
+
306
+ MIT © [Parsefy](https://parsefy.io)
307
+
package/dist/index.cjs ADDED
@@ -0,0 +1,2 @@
1
+ 'use strict';var zodToJsonSchema=require('zod-to-json-schema');var m={".pdf":"application/pdf",".docx":"application/vnd.openxmlformats-officedocument.wordprocessingml.document"},f=10*1024*1024,h="https://api.parsefy.io",g=6e4;var i=class extends Error{constructor(t,r){super(t),this.name="ParsefyError",this.code=r,typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,this.constructor);}},c=class extends i{constructor(t,r,o){super(t),this.name="APIError",this.statusCode=r,this.response=o;}},l=class extends i{constructor(t,r,o){super(t,r),this.name="ExtractionError",this.metadata=o;}},s=class extends i{constructor(t){super(t),this.name="ValidationError";}};function d(){return typeof process<"u"&&process.versions?.node!==void 0}function T(e){return zodToJsonSchema.zodToJsonSchema(e,{$refStrategy:"none",target:"openApi3"})}function b(e){let t=e.toLowerCase().match(/\.[^.]+$/)?.[0];return t&&m[t]||null}function R(e){if(!b(e)){let r=Object.keys(m).join(", ");throw new s(`Unsupported file type. Supported types: ${r}`)}}function u(e){if(e===0)throw new s("File is empty");if(e>f){let t=f/1048576;throw new s(`File size exceeds maximum limit of ${t}MB`)}}function w(e){return {object:e.object,metadata:{processingTimeMs:e.metadata.processing_time_ms,inputTokens:e.metadata.input_tokens,outputTokens:e.metadata.output_tokens,credits:e.metadata.credits,fallbackTriggered:e.metadata.fallback_triggered},error:e.error}}function E(e,t){let r=b(t)||"application/octet-stream",o=e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength);return typeof File<"u"?new File([o],t,{type:r}):new Blob([o],{type:r})}async function _(e){if(!d())throw new s("File path strings are only supported in Node.js. Use File or Blob in the browser.");let t=await import('fs'),r=await import('path');if(!t.existsSync(e))throw new s(`File not found: ${e}`);let o=r.basename(e);R(o);let a=t.readFileSync(e);return u(a.length),{buffer:a,filename:o}}async function P(e){if(typeof e=="string"){let{buffer:t,filename:r}=await _(e);return E(t,r)}if(Buffer.isBuffer(e))return u(e.length),E(e,"document.pdf");if(e instanceof File)return R(e.name),u(e.size),e;if(e instanceof Blob)return u(e.size),e;throw new s("Invalid file input. Expected File, Blob, Buffer, or file path string.")}function F(e){return new Promise(t=>setTimeout(t,e))}function k(e,t=1e3){let r=t*Math.pow(2,e),o=Math.random()*.1*r;return Math.min(r+o,3e4)}var y=class{constructor(t){this.maxRetries=3;let r={};if(typeof t=="string"?r={apiKey:t}:t&&(r=t),this.apiKey=r.apiKey||this.getEnvApiKey(),!this.apiKey)throw new s("API key is required. Provide it in the constructor or set the PARSEFY_API_KEY environment variable.");this.baseUrl=r.baseUrl||h,this.timeout=r.timeout||g;}getEnvApiKey(){return d()&&process.env.PARSEFY_API_KEY||""}async extract(t){let{file:r,schema:o}=t,a=T(o),n=await P(r),p=new FormData;return p.append("file",n),p.append("output_schema",JSON.stringify(a)),this.makeRequestWithRetry(p)}async makeRequestWithRetry(t,r=0){try{return await this.makeRequest(t)}catch(o){if(o instanceof c&&o.statusCode===429&&r<this.maxRetries){let a=k(r);return await F(a),this.makeRequestWithRetry(t,r+1)}throw o}}async makeRequest(t){let r=`${this.baseUrl}/v1/extract`,o=new AbortController,a=setTimeout(()=>o.abort(),this.timeout);try{let n=await fetch(r,{method:"POST",headers:{Authorization:`Bearer ${this.apiKey}`},body:t,signal:o.signal});if(clearTimeout(a),!n.ok){let x=await this.parseErrorResponse(n);throw new c(x.message||`API request failed with status ${n.status}`,n.status,x)}let p=await n.json();return w(p)}catch(n){throw clearTimeout(a),n instanceof Error&&n.name==="AbortError"?new i(`Request timed out after ${this.timeout}ms`,"TIMEOUT"):n instanceof i?n:n instanceof TypeError?new i("Network error: Unable to connect to the Parsefy API","NETWORK_ERROR"):new i(`Unexpected error: ${n instanceof Error?n.message:String(n)}`,"UNKNOWN_ERROR")}}async parseErrorResponse(t){try{return await t.json()}catch{try{return {message:await t.text()||t.statusText}}catch{return {message:t.statusText}}}}};
2
+ exports.APIError=c;exports.ExtractionError=l;exports.Parsefy=y;exports.ParsefyError=i;exports.ValidationError=s;
@@ -0,0 +1,183 @@
1
+ import { z } from 'zod';
2
+
3
+ /**
4
+ * Configuration options for the Parsefy client.
5
+ */
6
+ interface ParsefyConfig {
7
+ /** API key for authentication. If not provided, reads from PARSEFY_API_KEY environment variable. */
8
+ apiKey?: string;
9
+ /** Base URL for the API. Defaults to https://api.parsefy.io */
10
+ baseUrl?: string;
11
+ /** Request timeout in milliseconds. Defaults to 60000 (60 seconds). */
12
+ timeout?: number;
13
+ }
14
+ /**
15
+ * Options for the extract method.
16
+ */
17
+ interface ExtractOptions<T extends z.ZodType> {
18
+ /** The document file to extract data from. Supports File, Blob, Buffer, or file path (Node.js only). */
19
+ file: File | Blob | Buffer | string;
20
+ /** Zod schema defining the structure of data to extract. */
21
+ schema: T;
22
+ }
23
+ /**
24
+ * Metadata about the extraction process.
25
+ */
26
+ interface ExtractionMetadata {
27
+ /** Time taken to process the document in milliseconds. */
28
+ processingTimeMs: number;
29
+ /** Number of input tokens used. */
30
+ inputTokens: number;
31
+ /** Number of output tokens generated. */
32
+ outputTokens: number;
33
+ /** Number of credits consumed (1 credit = 1 page). */
34
+ credits: number;
35
+ /** Whether the fallback model was triggered for higher accuracy. */
36
+ fallbackTriggered: boolean;
37
+ }
38
+ /**
39
+ * Error response from the API.
40
+ */
41
+ interface APIErrorResponse {
42
+ /** Error code identifying the type of error. */
43
+ code: string;
44
+ /** Human-readable error message. */
45
+ message: string;
46
+ }
47
+ /**
48
+ * Result of an extraction operation.
49
+ */
50
+ interface ExtractResult<T> {
51
+ /** Extracted data matching the schema, or null if extraction failed. */
52
+ object: T | null;
53
+ /** Metadata about the extraction process. */
54
+ metadata: ExtractionMetadata;
55
+ /** Error details if extraction failed, or null on success. */
56
+ error: APIErrorResponse | null;
57
+ }
58
+
59
+ /**
60
+ * Parsefy client for extracting structured data from documents.
61
+ *
62
+ * @example
63
+ * ```ts
64
+ * import { Parsefy } from 'parsefy';
65
+ * import * as z from 'zod';
66
+ *
67
+ * const client = new Parsefy('pk_your_api_key');
68
+ *
69
+ * const schema = z.object({
70
+ * name: z.string(),
71
+ * total: z.number(),
72
+ * });
73
+ *
74
+ * const { object, error } = await client.extract({
75
+ * file: './invoice.pdf',
76
+ * schema,
77
+ * });
78
+ * ```
79
+ */
80
+ declare class Parsefy {
81
+ private readonly apiKey;
82
+ private readonly baseUrl;
83
+ private readonly timeout;
84
+ private readonly maxRetries;
85
+ /**
86
+ * Creates a new Parsefy client.
87
+ *
88
+ * @param configOrApiKey - API key string or configuration object.
89
+ * If not provided, reads from PARSEFY_API_KEY environment variable.
90
+ *
91
+ * @example
92
+ * ```ts
93
+ * // Using API key directly
94
+ * const client = new Parsefy('pk_your_api_key');
95
+ *
96
+ * // Using configuration object
97
+ * const client = new Parsefy({
98
+ * apiKey: 'pk_your_api_key',
99
+ * timeout: 120000,
100
+ * });
101
+ *
102
+ * // Using environment variable
103
+ * const client = new Parsefy();
104
+ * ```
105
+ */
106
+ constructor(configOrApiKey?: string | ParsefyConfig);
107
+ /**
108
+ * Gets the API key from environment variable.
109
+ */
110
+ private getEnvApiKey;
111
+ /**
112
+ * Extracts structured data from a document using the provided Zod schema.
113
+ *
114
+ * @param options - Extraction options including file and schema.
115
+ * @returns Promise resolving to the extraction result with typed data.
116
+ *
117
+ * @example
118
+ * ```ts
119
+ * const schema = z.object({
120
+ * invoice_number: z.string().describe('The invoice number'),
121
+ * total: z.number().describe('Total amount'),
122
+ * });
123
+ *
124
+ * const { object, metadata, error } = await client.extract({
125
+ * file: './invoice.pdf',
126
+ * schema,
127
+ * });
128
+ *
129
+ * if (!error && object) {
130
+ * console.log(object.invoice_number); // Fully typed!
131
+ * }
132
+ * ```
133
+ */
134
+ extract<T extends z.ZodType>(options: ExtractOptions<T>): Promise<ExtractResult<z.infer<T>>>;
135
+ /**
136
+ * Makes a request with retry logic for rate limiting.
137
+ */
138
+ private makeRequestWithRetry;
139
+ /**
140
+ * Makes the actual HTTP request to the API.
141
+ */
142
+ private makeRequest;
143
+ /**
144
+ * Parses error response body safely.
145
+ */
146
+ private parseErrorResponse;
147
+ }
148
+
149
+ /**
150
+ * Base error class for all Parsefy errors.
151
+ */
152
+ declare class ParsefyError extends Error {
153
+ /** Error code, if applicable. */
154
+ readonly code?: string;
155
+ constructor(message: string, code?: string);
156
+ }
157
+ /**
158
+ * Error thrown when the API returns an HTTP error (4xx/5xx).
159
+ */
160
+ declare class APIError extends ParsefyError {
161
+ /** HTTP status code of the response. */
162
+ readonly statusCode: number;
163
+ /** Raw response body, if available. */
164
+ readonly response?: unknown;
165
+ constructor(message: string, statusCode: number, response?: unknown);
166
+ }
167
+ /**
168
+ * Error thrown when document extraction fails (returned in response.error).
169
+ * This is not an HTTP error - the request succeeded but extraction failed.
170
+ */
171
+ declare class ExtractionError extends ParsefyError {
172
+ /** Metadata about the extraction attempt. */
173
+ readonly metadata: ExtractionMetadata;
174
+ constructor(message: string, code: string, metadata: ExtractionMetadata);
175
+ }
176
+ /**
177
+ * Error thrown for client-side validation failures.
178
+ */
179
+ declare class ValidationError extends ParsefyError {
180
+ constructor(message: string);
181
+ }
182
+
183
+ export { APIError, type APIErrorResponse, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
@@ -0,0 +1,183 @@
1
+ import { z } from 'zod';
2
+
3
+ /**
4
+ * Configuration options for the Parsefy client.
5
+ */
6
+ interface ParsefyConfig {
7
+ /** API key for authentication. If not provided, reads from PARSEFY_API_KEY environment variable. */
8
+ apiKey?: string;
9
+ /** Base URL for the API. Defaults to https://api.parsefy.io */
10
+ baseUrl?: string;
11
+ /** Request timeout in milliseconds. Defaults to 60000 (60 seconds). */
12
+ timeout?: number;
13
+ }
14
+ /**
15
+ * Options for the extract method.
16
+ */
17
+ interface ExtractOptions<T extends z.ZodType> {
18
+ /** The document file to extract data from. Supports File, Blob, Buffer, or file path (Node.js only). */
19
+ file: File | Blob | Buffer | string;
20
+ /** Zod schema defining the structure of data to extract. */
21
+ schema: T;
22
+ }
23
+ /**
24
+ * Metadata about the extraction process.
25
+ */
26
+ interface ExtractionMetadata {
27
+ /** Time taken to process the document in milliseconds. */
28
+ processingTimeMs: number;
29
+ /** Number of input tokens used. */
30
+ inputTokens: number;
31
+ /** Number of output tokens generated. */
32
+ outputTokens: number;
33
+ /** Number of credits consumed (1 credit = 1 page). */
34
+ credits: number;
35
+ /** Whether the fallback model was triggered for higher accuracy. */
36
+ fallbackTriggered: boolean;
37
+ }
38
+ /**
39
+ * Error response from the API.
40
+ */
41
+ interface APIErrorResponse {
42
+ /** Error code identifying the type of error. */
43
+ code: string;
44
+ /** Human-readable error message. */
45
+ message: string;
46
+ }
47
+ /**
48
+ * Result of an extraction operation.
49
+ */
50
+ interface ExtractResult<T> {
51
+ /** Extracted data matching the schema, or null if extraction failed. */
52
+ object: T | null;
53
+ /** Metadata about the extraction process. */
54
+ metadata: ExtractionMetadata;
55
+ /** Error details if extraction failed, or null on success. */
56
+ error: APIErrorResponse | null;
57
+ }
58
+
59
+ /**
60
+ * Parsefy client for extracting structured data from documents.
61
+ *
62
+ * @example
63
+ * ```ts
64
+ * import { Parsefy } from 'parsefy';
65
+ * import * as z from 'zod';
66
+ *
67
+ * const client = new Parsefy('pk_your_api_key');
68
+ *
69
+ * const schema = z.object({
70
+ * name: z.string(),
71
+ * total: z.number(),
72
+ * });
73
+ *
74
+ * const { object, error } = await client.extract({
75
+ * file: './invoice.pdf',
76
+ * schema,
77
+ * });
78
+ * ```
79
+ */
80
+ declare class Parsefy {
81
+ private readonly apiKey;
82
+ private readonly baseUrl;
83
+ private readonly timeout;
84
+ private readonly maxRetries;
85
+ /**
86
+ * Creates a new Parsefy client.
87
+ *
88
+ * @param configOrApiKey - API key string or configuration object.
89
+ * If not provided, reads from PARSEFY_API_KEY environment variable.
90
+ *
91
+ * @example
92
+ * ```ts
93
+ * // Using API key directly
94
+ * const client = new Parsefy('pk_your_api_key');
95
+ *
96
+ * // Using configuration object
97
+ * const client = new Parsefy({
98
+ * apiKey: 'pk_your_api_key',
99
+ * timeout: 120000,
100
+ * });
101
+ *
102
+ * // Using environment variable
103
+ * const client = new Parsefy();
104
+ * ```
105
+ */
106
+ constructor(configOrApiKey?: string | ParsefyConfig);
107
+ /**
108
+ * Gets the API key from environment variable.
109
+ */
110
+ private getEnvApiKey;
111
+ /**
112
+ * Extracts structured data from a document using the provided Zod schema.
113
+ *
114
+ * @param options - Extraction options including file and schema.
115
+ * @returns Promise resolving to the extraction result with typed data.
116
+ *
117
+ * @example
118
+ * ```ts
119
+ * const schema = z.object({
120
+ * invoice_number: z.string().describe('The invoice number'),
121
+ * total: z.number().describe('Total amount'),
122
+ * });
123
+ *
124
+ * const { object, metadata, error } = await client.extract({
125
+ * file: './invoice.pdf',
126
+ * schema,
127
+ * });
128
+ *
129
+ * if (!error && object) {
130
+ * console.log(object.invoice_number); // Fully typed!
131
+ * }
132
+ * ```
133
+ */
134
+ extract<T extends z.ZodType>(options: ExtractOptions<T>): Promise<ExtractResult<z.infer<T>>>;
135
+ /**
136
+ * Makes a request with retry logic for rate limiting.
137
+ */
138
+ private makeRequestWithRetry;
139
+ /**
140
+ * Makes the actual HTTP request to the API.
141
+ */
142
+ private makeRequest;
143
+ /**
144
+ * Parses error response body safely.
145
+ */
146
+ private parseErrorResponse;
147
+ }
148
+
149
+ /**
150
+ * Base error class for all Parsefy errors.
151
+ */
152
+ declare class ParsefyError extends Error {
153
+ /** Error code, if applicable. */
154
+ readonly code?: string;
155
+ constructor(message: string, code?: string);
156
+ }
157
+ /**
158
+ * Error thrown when the API returns an HTTP error (4xx/5xx).
159
+ */
160
+ declare class APIError extends ParsefyError {
161
+ /** HTTP status code of the response. */
162
+ readonly statusCode: number;
163
+ /** Raw response body, if available. */
164
+ readonly response?: unknown;
165
+ constructor(message: string, statusCode: number, response?: unknown);
166
+ }
167
+ /**
168
+ * Error thrown when document extraction fails (returned in response.error).
169
+ * This is not an HTTP error - the request succeeded but extraction failed.
170
+ */
171
+ declare class ExtractionError extends ParsefyError {
172
+ /** Metadata about the extraction attempt. */
173
+ readonly metadata: ExtractionMetadata;
174
+ constructor(message: string, code: string, metadata: ExtractionMetadata);
175
+ }
176
+ /**
177
+ * Error thrown for client-side validation failures.
178
+ */
179
+ declare class ValidationError extends ParsefyError {
180
+ constructor(message: string);
181
+ }
182
+
183
+ export { APIError, type APIErrorResponse, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
@@ -0,0 +1,183 @@
1
+ import { z } from 'zod';
2
+
3
+ /**
4
+ * Configuration options for the Parsefy client.
5
+ */
6
+ interface ParsefyConfig {
7
+ /** API key for authentication. If not provided, reads from PARSEFY_API_KEY environment variable. */
8
+ apiKey?: string;
9
+ /** Base URL for the API. Defaults to https://api.parsefy.io */
10
+ baseUrl?: string;
11
+ /** Request timeout in milliseconds. Defaults to 60000 (60 seconds). */
12
+ timeout?: number;
13
+ }
14
+ /**
15
+ * Options for the extract method.
16
+ */
17
+ interface ExtractOptions<T extends z.ZodType> {
18
+ /** The document file to extract data from. Supports File, Blob, Buffer, or file path (Node.js only). */
19
+ file: File | Blob | Buffer | string;
20
+ /** Zod schema defining the structure of data to extract. */
21
+ schema: T;
22
+ }
23
+ /**
24
+ * Metadata about the extraction process.
25
+ */
26
+ interface ExtractionMetadata {
27
+ /** Time taken to process the document in milliseconds. */
28
+ processingTimeMs: number;
29
+ /** Number of input tokens used. */
30
+ inputTokens: number;
31
+ /** Number of output tokens generated. */
32
+ outputTokens: number;
33
+ /** Number of credits consumed (1 credit = 1 page). */
34
+ credits: number;
35
+ /** Whether the fallback model was triggered for higher accuracy. */
36
+ fallbackTriggered: boolean;
37
+ }
38
+ /**
39
+ * Error response from the API.
40
+ */
41
+ interface APIErrorResponse {
42
+ /** Error code identifying the type of error. */
43
+ code: string;
44
+ /** Human-readable error message. */
45
+ message: string;
46
+ }
47
+ /**
48
+ * Result of an extraction operation.
49
+ */
50
+ interface ExtractResult<T> {
51
+ /** Extracted data matching the schema, or null if extraction failed. */
52
+ object: T | null;
53
+ /** Metadata about the extraction process. */
54
+ metadata: ExtractionMetadata;
55
+ /** Error details if extraction failed, or null on success. */
56
+ error: APIErrorResponse | null;
57
+ }
58
+
59
+ /**
60
+ * Parsefy client for extracting structured data from documents.
61
+ *
62
+ * @example
63
+ * ```ts
64
+ * import { Parsefy } from 'parsefy';
65
+ * import * as z from 'zod';
66
+ *
67
+ * const client = new Parsefy('pk_your_api_key');
68
+ *
69
+ * const schema = z.object({
70
+ * name: z.string(),
71
+ * total: z.number(),
72
+ * });
73
+ *
74
+ * const { object, error } = await client.extract({
75
+ * file: './invoice.pdf',
76
+ * schema,
77
+ * });
78
+ * ```
79
+ */
80
+ declare class Parsefy {
81
+ private readonly apiKey;
82
+ private readonly baseUrl;
83
+ private readonly timeout;
84
+ private readonly maxRetries;
85
+ /**
86
+ * Creates a new Parsefy client.
87
+ *
88
+ * @param configOrApiKey - API key string or configuration object.
89
+ * If not provided, reads from PARSEFY_API_KEY environment variable.
90
+ *
91
+ * @example
92
+ * ```ts
93
+ * // Using API key directly
94
+ * const client = new Parsefy('pk_your_api_key');
95
+ *
96
+ * // Using configuration object
97
+ * const client = new Parsefy({
98
+ * apiKey: 'pk_your_api_key',
99
+ * timeout: 120000,
100
+ * });
101
+ *
102
+ * // Using environment variable
103
+ * const client = new Parsefy();
104
+ * ```
105
+ */
106
+ constructor(configOrApiKey?: string | ParsefyConfig);
107
+ /**
108
+ * Gets the API key from environment variable.
109
+ */
110
+ private getEnvApiKey;
111
+ /**
112
+ * Extracts structured data from a document using the provided Zod schema.
113
+ *
114
+ * @param options - Extraction options including file and schema.
115
+ * @returns Promise resolving to the extraction result with typed data.
116
+ *
117
+ * @example
118
+ * ```ts
119
+ * const schema = z.object({
120
+ * invoice_number: z.string().describe('The invoice number'),
121
+ * total: z.number().describe('Total amount'),
122
+ * });
123
+ *
124
+ * const { object, metadata, error } = await client.extract({
125
+ * file: './invoice.pdf',
126
+ * schema,
127
+ * });
128
+ *
129
+ * if (!error && object) {
130
+ * console.log(object.invoice_number); // Fully typed!
131
+ * }
132
+ * ```
133
+ */
134
+ extract<T extends z.ZodType>(options: ExtractOptions<T>): Promise<ExtractResult<z.infer<T>>>;
135
+ /**
136
+ * Makes a request with retry logic for rate limiting.
137
+ */
138
+ private makeRequestWithRetry;
139
+ /**
140
+ * Makes the actual HTTP request to the API.
141
+ */
142
+ private makeRequest;
143
+ /**
144
+ * Parses error response body safely.
145
+ */
146
+ private parseErrorResponse;
147
+ }
148
+
149
+ /**
150
+ * Base error class for all Parsefy errors.
151
+ */
152
+ declare class ParsefyError extends Error {
153
+ /** Error code, if applicable. */
154
+ readonly code?: string;
155
+ constructor(message: string, code?: string);
156
+ }
157
+ /**
158
+ * Error thrown when the API returns an HTTP error (4xx/5xx).
159
+ */
160
+ declare class APIError extends ParsefyError {
161
+ /** HTTP status code of the response. */
162
+ readonly statusCode: number;
163
+ /** Raw response body, if available. */
164
+ readonly response?: unknown;
165
+ constructor(message: string, statusCode: number, response?: unknown);
166
+ }
167
+ /**
168
+ * Error thrown when document extraction fails (returned in response.error).
169
+ * This is not an HTTP error - the request succeeded but extraction failed.
170
+ */
171
+ declare class ExtractionError extends ParsefyError {
172
+ /** Metadata about the extraction attempt. */
173
+ readonly metadata: ExtractionMetadata;
174
+ constructor(message: string, code: string, metadata: ExtractionMetadata);
175
+ }
176
+ /**
177
+ * Error thrown for client-side validation failures.
178
+ */
179
+ declare class ValidationError extends ParsefyError {
180
+ constructor(message: string);
181
+ }
182
+
183
+ export { APIError, type APIErrorResponse, type ExtractOptions, type ExtractResult, ExtractionError, type ExtractionMetadata, Parsefy, type ParsefyConfig, ParsefyError, ValidationError };
package/dist/index.mjs ADDED
@@ -0,0 +1,2 @@
1
+ import {zodToJsonSchema}from'zod-to-json-schema';var m={".pdf":"application/pdf",".docx":"application/vnd.openxmlformats-officedocument.wordprocessingml.document"},f=10*1024*1024,h="https://api.parsefy.io",g=6e4;var i=class extends Error{constructor(t,r){super(t),this.name="ParsefyError",this.code=r,typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,this.constructor);}},c=class extends i{constructor(t,r,o){super(t),this.name="APIError",this.statusCode=r,this.response=o;}},l=class extends i{constructor(t,r,o){super(t,r),this.name="ExtractionError",this.metadata=o;}},s=class extends i{constructor(t){super(t),this.name="ValidationError";}};function d(){return typeof process<"u"&&process.versions?.node!==void 0}function T(e){return zodToJsonSchema(e,{$refStrategy:"none",target:"openApi3"})}function b(e){let t=e.toLowerCase().match(/\.[^.]+$/)?.[0];return t&&m[t]||null}function R(e){if(!b(e)){let r=Object.keys(m).join(", ");throw new s(`Unsupported file type. Supported types: ${r}`)}}function u(e){if(e===0)throw new s("File is empty");if(e>f){let t=f/1048576;throw new s(`File size exceeds maximum limit of ${t}MB`)}}function w(e){return {object:e.object,metadata:{processingTimeMs:e.metadata.processing_time_ms,inputTokens:e.metadata.input_tokens,outputTokens:e.metadata.output_tokens,credits:e.metadata.credits,fallbackTriggered:e.metadata.fallback_triggered},error:e.error}}function E(e,t){let r=b(t)||"application/octet-stream",o=e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength);return typeof File<"u"?new File([o],t,{type:r}):new Blob([o],{type:r})}async function _(e){if(!d())throw new s("File path strings are only supported in Node.js. Use File or Blob in the browser.");let t=await import('fs'),r=await import('path');if(!t.existsSync(e))throw new s(`File not found: ${e}`);let o=r.basename(e);R(o);let a=t.readFileSync(e);return u(a.length),{buffer:a,filename:o}}async function P(e){if(typeof e=="string"){let{buffer:t,filename:r}=await _(e);return E(t,r)}if(Buffer.isBuffer(e))return u(e.length),E(e,"document.pdf");if(e instanceof File)return R(e.name),u(e.size),e;if(e instanceof Blob)return u(e.size),e;throw new s("Invalid file input. Expected File, Blob, Buffer, or file path string.")}function F(e){return new Promise(t=>setTimeout(t,e))}function k(e,t=1e3){let r=t*Math.pow(2,e),o=Math.random()*.1*r;return Math.min(r+o,3e4)}var y=class{constructor(t){this.maxRetries=3;let r={};if(typeof t=="string"?r={apiKey:t}:t&&(r=t),this.apiKey=r.apiKey||this.getEnvApiKey(),!this.apiKey)throw new s("API key is required. Provide it in the constructor or set the PARSEFY_API_KEY environment variable.");this.baseUrl=r.baseUrl||h,this.timeout=r.timeout||g;}getEnvApiKey(){return d()&&process.env.PARSEFY_API_KEY||""}async extract(t){let{file:r,schema:o}=t,a=T(o),n=await P(r),p=new FormData;return p.append("file",n),p.append("output_schema",JSON.stringify(a)),this.makeRequestWithRetry(p)}async makeRequestWithRetry(t,r=0){try{return await this.makeRequest(t)}catch(o){if(o instanceof c&&o.statusCode===429&&r<this.maxRetries){let a=k(r);return await F(a),this.makeRequestWithRetry(t,r+1)}throw o}}async makeRequest(t){let r=`${this.baseUrl}/v1/extract`,o=new AbortController,a=setTimeout(()=>o.abort(),this.timeout);try{let n=await fetch(r,{method:"POST",headers:{Authorization:`Bearer ${this.apiKey}`},body:t,signal:o.signal});if(clearTimeout(a),!n.ok){let x=await this.parseErrorResponse(n);throw new c(x.message||`API request failed with status ${n.status}`,n.status,x)}let p=await n.json();return w(p)}catch(n){throw clearTimeout(a),n instanceof Error&&n.name==="AbortError"?new i(`Request timed out after ${this.timeout}ms`,"TIMEOUT"):n instanceof i?n:n instanceof TypeError?new i("Network error: Unable to connect to the Parsefy API","NETWORK_ERROR"):new i(`Unexpected error: ${n instanceof Error?n.message:String(n)}`,"UNKNOWN_ERROR")}}async parseErrorResponse(t){try{return await t.json()}catch{try{return {message:await t.text()||t.statusText}}catch{return {message:t.statusText}}}}};
2
+ export{c as APIError,l as ExtractionError,y as Parsefy,i as ParsefyError,s as ValidationError};
package/package.json CHANGED
@@ -1,12 +1,57 @@
1
1
  {
2
2
  "name": "parsefy",
3
- "version": "1.0.0",
4
- "description": "High-accuracy document data extraction powered by AI",
5
- "license": "ISC",
6
- "author": "marceloakalopes",
7
- "type": "commonjs",
8
- "main": "index.js",
3
+ "version": "1.0.1",
4
+ "description": "Official TypeScript SDK for Parsefy - AI-powered document data extraction",
5
+ "author": "",
6
+ "license": "MIT",
7
+ "repository": {
8
+ "type": "git",
9
+ "url": "https://github.com/parsefy/parsefy-js.git"
10
+ },
11
+ "homepage": "https://parsefy.io",
12
+ "bugs": {
13
+ "url": "https://github.com/parsefy/parsefy-js/issues"
14
+ },
15
+ "main": "./dist/index.cjs",
16
+ "module": "./dist/index.mjs",
17
+ "types": "./dist/index.d.cts",
18
+ "exports": {
19
+ ".": {
20
+ "import": {
21
+ "types": "./dist/index.d.mts",
22
+ "default": "./dist/index.mjs"
23
+ },
24
+ "require": {
25
+ "types": "./dist/index.d.cts",
26
+ "default": "./dist/index.cjs"
27
+ }
28
+ }
29
+ },
30
+ "files": [
31
+ "dist",
32
+ "README.md",
33
+ "LICENSE"
34
+ ],
9
35
  "scripts": {
10
- "test": "echo \"Error: no test specified\" && exit 1"
36
+ "build": "tsup && cp dist/index.d.ts dist/index.d.cts",
37
+ "dev": "tsup --watch",
38
+ "typecheck": "tsc --noEmit",
39
+ "prepublishOnly": "npm run build"
40
+ },
41
+ "peerDependencies": {
42
+ "zod": "^3.0.0"
43
+ },
44
+ "dependencies": {
45
+ "zod-to-json-schema": "^3.22.0"
46
+ },
47
+ "devDependencies": {
48
+ "@types/node": "^20.0.0",
49
+ "tsup": "^8.0.0",
50
+ "tsx": "^4.21.0",
51
+ "typescript": "^5.0.0",
52
+ "zod": "^3.22.0"
53
+ },
54
+ "engines": {
55
+ "node": ">=18"
11
56
  }
12
57
  }
package/index.js DELETED
@@ -1 +0,0 @@
1
- console.log("Parsefy SDK");