ocr-ai 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,320 @@
1
+ # extracta-ai
2
+
3
+ Multi-provider AI document extraction for Node.js. Extract text or structured JSON from documents using Gemini, OpenAI, Claude, Grok, or Vertex AI.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ npm install extracta-ai
9
+ ```
10
+
11
+ ## Quick Start
12
+
13
+ ### Using Gemini
14
+
15
+ ```typescript
16
+ import { ExtractaAI } from 'extracta-ai';
17
+
18
+ const extracta = new ExtractaAI({
19
+ provider: 'gemini',
20
+ apiKey: 'YOUR_GEMINI_API_KEY',
21
+ });
22
+
23
+ const result = await extracta.extract('./invoice.png');
24
+
25
+ if (result.success) {
26
+ const text = result.content;
27
+ console.log(text);
28
+ }
29
+ ```
30
+
31
+ ### Using OpenAI
32
+
33
+ ```typescript
34
+ import { ExtractaAI } from 'extracta-ai';
35
+
36
+ const extracta = new ExtractaAI({
37
+ provider: 'openai',
38
+ apiKey: 'YOUR_OPENAI_API_KEY',
39
+ });
40
+
41
+ const result = await extracta.extract('./document.pdf');
42
+
43
+ if (result.success) {
44
+ const text = result.content;
45
+ console.log(text);
46
+ }
47
+ ```
48
+
49
+ ### From URL
50
+
51
+ Extract directly from a URL:
52
+
53
+ ```typescript
54
+ const result = await extracta.extract('https://example.com/invoice.png');
55
+
56
+ if (result.success) {
57
+ console.log(result.content);
58
+ }
59
+ ```
60
+
61
+ ### Custom Instructions
62
+
63
+ You can provide custom instructions to guide the extraction:
64
+
65
+ ```typescript
66
+ const result = await extracta.extract('./receipt.png', {
67
+ prompt: 'Extract only the total amount and date from this receipt',
68
+ });
69
+
70
+ if (result.success) {
71
+ console.log(result.content);
72
+ // Output: "Total: $154.06, Date: 11/02/2019"
73
+ }
74
+ ```
75
+
76
+ ### Output Format
77
+
78
+ By default, extraction returns text. You can also extract structured JSON:
79
+
80
+ ```typescript
81
+ // Text output (default)
82
+ const textResult = await extracta.extract('./invoice.png', {
83
+ format: 'text',
84
+ });
85
+
86
+ if (textResult.success) {
87
+ console.log(textResult.content); // string
88
+ }
89
+
90
+ // JSON output with schema
91
+ const jsonResult = await extracta.extract('./invoice.png', {
92
+ format: 'json',
93
+ schema: {
94
+ invoice_number: 'string',
95
+ date: 'string',
96
+ total: 'number',
97
+ items: [{ name: 'string', quantity: 'number', price: 'number' }],
98
+ },
99
+ });
100
+
101
+ if (jsonResult.success) {
102
+ console.log(jsonResult.data); // { invoice_number: "US-001", date: "11/02/2019", total: 154.06, items: [...] }
103
+ }
104
+ ```
105
+
106
+ ### JSON Schema
107
+
108
+ The schema defines the structure of the data you want to extract. Use a simple object where keys are field names and values are types:
109
+
110
+ **Basic types:**
111
+ - `'string'` - Text values
112
+ - `'number'` - Numeric values
113
+ - `'boolean'` - True/false values
114
+
115
+ **Nested objects:**
116
+ ```typescript
117
+ const schema = {
118
+ company: {
119
+ name: 'string',
120
+ address: 'string',
121
+ phone: 'string',
122
+ },
123
+ customer: {
124
+ name: 'string',
125
+ email: 'string',
126
+ },
127
+ };
128
+ ```
129
+
130
+ **Arrays:**
131
+ ```typescript
132
+ const schema = {
133
+ // Array of objects
134
+ items: [
135
+ {
136
+ description: 'string',
137
+ quantity: 'number',
138
+ unit_price: 'number',
139
+ total: 'number',
140
+ },
141
+ ],
142
+ // Simple array
143
+ tags: ['string'],
144
+ };
145
+ ```
146
+
147
+ **Complete example (invoice):**
148
+ ```typescript
149
+ const invoiceSchema = {
150
+ invoice_number: 'string',
151
+ date: 'string',
152
+ due_date: 'string',
153
+ company: {
154
+ name: 'string',
155
+ address: 'string',
156
+ phone: 'string',
157
+ email: 'string',
158
+ },
159
+ bill_to: {
160
+ name: 'string',
161
+ address: 'string',
162
+ },
163
+ items: [
164
+ {
165
+ description: 'string',
166
+ quantity: 'number',
167
+ unit_price: 'number',
168
+ total: 'number',
169
+ },
170
+ ],
171
+ subtotal: 'number',
172
+ tax: 'number',
173
+ total: 'number',
174
+ };
175
+
176
+ const result = await extracta.extract('./invoice.png', {
177
+ format: 'json',
178
+ schema: invoiceSchema,
179
+ prompt: 'Extract all invoice data from this document.',
180
+ });
181
+ ```
182
+
183
+ ### Model Configuration
184
+
185
+ You can pass model-specific parameters like temperature, max tokens, and more:
186
+
187
+ ```typescript
188
+ // Gemini with model config
189
+ const result = await extracta.extract('./invoice.png', {
190
+ modelConfig: {
191
+ temperature: 0.2,
192
+ maxTokens: 4096,
193
+ topP: 0.8,
194
+ topK: 40,
195
+ },
196
+ });
197
+
198
+ // OpenAI with model config
199
+ const result = await extracta.extract('./invoice.png', {
200
+ modelConfig: {
201
+ temperature: 0,
202
+ maxTokens: 2048,
203
+ topP: 1,
204
+ },
205
+ });
206
+ ```
207
+
208
+ Available options:
209
+
210
+ | Option | Description | Supported Providers |
211
+ |--------|-------------|---------------------|
212
+ | temperature | Controls randomness (0.0-1.0+) | All |
213
+ | maxTokens | Maximum tokens to generate | All |
214
+ | topP | Nucleus sampling | All |
215
+ | topK | Top-k sampling | Gemini, Claude, Vertex |
216
+ | stopSequences | Stop generation at these strings | All |
217
+
218
+ ### Token Usage
219
+
220
+ Access token usage information from the metadata:
221
+
222
+ ```typescript
223
+ const result = await extracta.extract('./invoice.png');
224
+
225
+ if (result.success) {
226
+ console.log(result.content);
227
+
228
+ // Access metadata
229
+ console.log(result.metadata.processingTimeMs); // 2351
230
+ console.log(result.metadata.tokens?.inputTokens); // 1855
231
+ console.log(result.metadata.tokens?.outputTokens); // 260
232
+ console.log(result.metadata.tokens?.totalTokens); // 2115
233
+ }
234
+ ```
235
+
236
+ ## Supported Providers
237
+
238
+ | Provider | Default Model | Auth |
239
+ |----------|---------------|------|
240
+ | gemini | gemini-1.5-flash | API Key |
241
+ | openai | gpt-4o | API Key |
242
+ | claude | claude-sonnet-4-20250514 | API Key |
243
+ | grok | grok-2-vision-1212 | API Key |
244
+ | vertex | gemini-2.0-flash | Google Cloud |
245
+
246
+ > **Note:** For enterprise OCR needs, see [Advanced: Vertex AI](#advanced-vertex-ai-google-cloud) section below.
247
+
248
+ ## Supported Inputs
249
+
250
+ - **Local files**: `./invoice.png`, `./document.pdf`
251
+ - **URLs**: `https://example.com/invoice.png`
252
+
253
+ ## Supported Files
254
+
255
+ - **Images**: jpg, png, gif, webp
256
+ - **Documents**: pdf
257
+ - **Text**: txt, md, csv, json, xml, html
258
+
259
+ ## Advanced: Vertex AI (Google Cloud)
260
+
261
+ The `vertex` provider enables access to Google Cloud's AI infrastructure, which is useful for enterprise scenarios requiring:
262
+
263
+ - **Compliance**: Data residency and regulatory requirements
264
+ - **Integration**: Native integration with Google Cloud services (BigQuery, Cloud Storage, etc.)
265
+ - **Specialized OCR**: Access to Google's Document AI and Vision AI processors
266
+
267
+ ### Basic Setup
268
+
269
+ Vertex AI uses Google Cloud authentication instead of API keys:
270
+
271
+ ```typescript
272
+ import { ExtractaAI } from 'extracta-ai';
273
+
274
+ const extracta = new ExtractaAI({
275
+ provider: 'vertex',
276
+ vertexConfig: {
277
+ project: 'your-gcp-project-id',
278
+ location: 'us-central1',
279
+ },
280
+ });
281
+
282
+ const result = await extracta.extract('./invoice.png');
283
+ ```
284
+
285
+ **Requirements:**
286
+ 1. Install the [gcloud CLI](https://cloud.google.com/sdk/docs/install)
287
+ 2. Run `gcloud auth application-default login`
288
+ 3. Enable the Vertex AI API in your GCP project
289
+
290
+ ### When to Use Vertex AI vs Gemini API
291
+
292
+ | Scenario | Recommended |
293
+ |----------|-------------|
294
+ | Quick prototyping | Gemini (API Key) |
295
+ | Personal projects | Gemini (API Key) |
296
+ | Enterprise/production | Vertex AI |
297
+ | Data residency requirements | Vertex AI |
298
+ | High-volume processing | Vertex AI |
299
+
300
+ ### Related Google Cloud OCR Services
301
+
302
+ For specialized document processing beyond what Gemini models offer, Google Cloud provides dedicated OCR services:
303
+
304
+ **[Document AI](https://cloud.google.com/document-ai)** - Optimized for structured documents:
305
+ - Invoice Parser, Receipt Parser, Form Parser
306
+ - W2, 1040, Bank Statement processors
307
+ - Custom extractors for domain-specific documents
308
+ - Higher accuracy for tables, forms, and handwritten text
309
+
310
+ **[Vision API](https://cloud.google.com/vision/docs/ocr)** - Optimized for images:
311
+ - Real-time OCR with low latency
312
+ - 80+ language support
313
+ - Handwriting detection
314
+ - Simple integration, ~98% accuracy on clean documents
315
+
316
+ These services are separate from extracta-ai but can complement it for enterprise document pipelines.
317
+
318
+ ## License
319
+
320
+ MIT
@@ -0,0 +1,355 @@
1
+ /**
2
+ * Supported AI providers
3
+ */
4
+ type AIProvider = 'gemini' | 'openai' | 'claude' | 'grok' | 'vertex';
5
+ /**
6
+ * Output format for extraction
7
+ */
8
+ type OutputFormat = 'text' | 'json';
9
+ /**
10
+ * Supported file types
11
+ */
12
+ type SupportedFileType = 'pdf' | 'image' | 'text';
13
+ /**
14
+ * Configuration for a specific AI provider
15
+ */
16
+ interface ProviderConfig {
17
+ apiKey: string;
18
+ model?: string;
19
+ }
20
+ /**
21
+ * Vertex AI specific configuration
22
+ */
23
+ interface VertexConfig$1 {
24
+ project: string;
25
+ location: string;
26
+ }
27
+ /**
28
+ * Main configuration for ExtractaAI
29
+ */
30
+ interface ExtractaConfig {
31
+ provider: AIProvider;
32
+ apiKey?: string;
33
+ model?: string;
34
+ /**
35
+ * Vertex AI configuration (required when provider is 'vertex')
36
+ */
37
+ vertexConfig?: VertexConfig$1;
38
+ }
39
+ /**
40
+ * Model-specific configuration parameters
41
+ */
42
+ interface ModelConfig {
43
+ /**
44
+ * Controls randomness (0.0 = deterministic, 1.0+ = more random)
45
+ */
46
+ temperature?: number;
47
+ /**
48
+ * Maximum tokens to generate in the response
49
+ */
50
+ maxTokens?: number;
51
+ /**
52
+ * Top-p (nucleus) sampling
53
+ */
54
+ topP?: number;
55
+ /**
56
+ * Top-k sampling (Gemini/Claude only)
57
+ */
58
+ topK?: number;
59
+ /**
60
+ * Stop sequences to end generation
61
+ */
62
+ stopSequences?: string[];
63
+ }
64
+ /**
65
+ * Options for extraction
66
+ */
67
+ interface ExtractionOptions {
68
+ /**
69
+ * Output format: 'text' for plain text, 'json' for structured JSON
70
+ */
71
+ format?: OutputFormat;
72
+ /**
73
+ * JSON schema to validate/structure the output (only for format: 'json')
74
+ * Can be a JSON Schema object or a simple object describing the structure
75
+ */
76
+ schema?: Record<string, unknown>;
77
+ /**
78
+ * Custom prompt to guide the extraction
79
+ */
80
+ prompt?: string;
81
+ /**
82
+ * Language for extraction (default: 'auto')
83
+ */
84
+ language?: string;
85
+ /**
86
+ * Output file path (if you want to save to disk)
87
+ */
88
+ outputPath?: string;
89
+ /**
90
+ * Model-specific configuration (temperature, maxTokens, etc.)
91
+ */
92
+ modelConfig?: ModelConfig;
93
+ }
94
+ /**
95
+ * Result of text extraction
96
+ */
97
+ interface TextExtractionResult {
98
+ success: true;
99
+ format: 'text';
100
+ content: string;
101
+ metadata: ExtractionMetadata;
102
+ }
103
+ /**
104
+ * Result of JSON extraction
105
+ */
106
+ interface JsonExtractionResult<T = Record<string, unknown>> {
107
+ success: true;
108
+ format: 'json';
109
+ data: T;
110
+ metadata: ExtractionMetadata;
111
+ }
112
+ /**
113
+ * Error result
114
+ */
115
+ interface ExtractionError {
116
+ success: false;
117
+ error: string;
118
+ code: string;
119
+ }
120
+ /**
121
+ * Combined extraction result type
122
+ */
123
+ type ExtractionResult<T = Record<string, unknown>> = TextExtractionResult | JsonExtractionResult<T> | ExtractionError;
124
+ /**
125
+ * Token usage information
126
+ */
127
+ interface TokenUsage {
128
+ inputTokens: number;
129
+ outputTokens: number;
130
+ totalTokens: number;
131
+ }
132
+ /**
133
+ * Metadata about the extraction
134
+ */
135
+ interface ExtractionMetadata {
136
+ provider: AIProvider;
137
+ model: string;
138
+ fileType: SupportedFileType;
139
+ fileName: string;
140
+ processingTimeMs: number;
141
+ tokens?: TokenUsage;
142
+ }
143
+ /**
144
+ * File information after loading
145
+ */
146
+ interface FileInfo {
147
+ path: string;
148
+ name: string;
149
+ type: SupportedFileType;
150
+ mimeType: string;
151
+ size: number;
152
+ content: Buffer;
153
+ base64?: string;
154
+ }
155
+ /**
156
+ * Result from provider extraction including tokens
157
+ */
158
+ interface ProviderResult<T = string> {
159
+ content: T;
160
+ tokens?: TokenUsage;
161
+ }
162
+ /**
163
+ * Interface that all AI providers must implement
164
+ */
165
+ interface IAIProvider {
166
+ readonly name: AIProvider;
167
+ readonly model: string;
168
+ /**
169
+ * Extract text from a file
170
+ */
171
+ extractText(file: FileInfo, options?: ExtractionOptions): Promise<ProviderResult<string>>;
172
+ /**
173
+ * Extract structured JSON from a file
174
+ */
175
+ extractJson<T = Record<string, unknown>>(file: FileInfo, schema: Record<string, unknown>, options?: ExtractionOptions): Promise<ProviderResult<T>>;
176
+ /**
177
+ * Check if the provider supports the given file type
178
+ */
179
+ supportsFileType(type: SupportedFileType): boolean;
180
+ }
181
+
182
+ /**
183
+ * Main class for document extraction using AI
184
+ */
185
+ declare class ExtractaAI {
186
+ private provider;
187
+ private config;
188
+ constructor(config: ExtractaConfig);
189
+ /**
190
+ * Create a provider instance based on configuration
191
+ */
192
+ private createProvider;
193
+ /**
194
+ * Extract content from a file path or URL
195
+ */
196
+ extract(source: string, options?: ExtractionOptions): Promise<ExtractionResult>;
197
+ /**
198
+ * Extract content from a Buffer
199
+ */
200
+ extractFromBuffer(buffer: Buffer, fileName: string, options?: ExtractionOptions): Promise<ExtractionResult>;
201
+ /**
202
+ * Extract content from a base64 string
203
+ */
204
+ extractFromBase64(base64: string, fileName: string, options?: ExtractionOptions): Promise<ExtractionResult>;
205
+ /**
206
+ * Process the extraction based on format
207
+ */
208
+ private processExtraction;
209
+ /**
210
+ * Create an error result
211
+ */
212
+ private createErrorResult;
213
+ /**
214
+ * Get current provider name
215
+ */
216
+ getProvider(): AIProvider;
217
+ /**
218
+ * Get current model
219
+ */
220
+ getModel(): string;
221
+ /**
222
+ * Change the AI provider
223
+ */
224
+ setProvider(provider: AIProvider, apiKey: string, model?: string): void;
225
+ }
226
+ /**
227
+ * Factory function to create ExtractaAI instance
228
+ */
229
+ declare function createExtractaAI(config: ExtractaConfig): ExtractaAI;
230
+
231
+ /**
232
+ * Load a file from disk and prepare it for AI processing
233
+ */
234
+ declare function loadFile(filePath: string): Promise<FileInfo>;
235
+ /**
236
+ * Load a file from a Buffer
237
+ */
238
+ declare function loadFileFromBuffer(buffer: Buffer, fileName: string, mimeType?: string): FileInfo;
239
+ /**
240
+ * Load a file from base64 string
241
+ */
242
+ declare function loadFileFromBase64(base64: string, fileName: string, mimeType?: string): FileInfo;
243
+ /**
244
+ * Save content to a file
245
+ */
246
+ declare function saveToFile(filePath: string, content: string | Buffer): Promise<void>;
247
+ /**
248
+ * Get supported file extensions
249
+ */
250
+ declare function getSupportedExtensions(): string[];
251
+ /**
252
+ * Check if a file extension is supported
253
+ */
254
+ declare function isExtensionSupported(ext: string): boolean;
255
+ /**
256
+ * Check if a string is a URL
257
+ */
258
+ declare function isUrl(str: string): boolean;
259
+ /**
260
+ * Load a file from a URL
261
+ */
262
+ declare function loadFileFromUrl(url: string): Promise<FileInfo>;
263
+
264
+ /**
265
+ * Base class for AI providers with common functionality
266
+ */
267
+ declare abstract class BaseProvider implements IAIProvider {
268
+ abstract readonly name: AIProvider;
269
+ abstract readonly model: string;
270
+ protected apiKey: string;
271
+ constructor(apiKey: string);
272
+ abstract extractText(file: FileInfo, options?: ExtractionOptions): Promise<ProviderResult<string>>;
273
+ abstract extractJson<T = Record<string, unknown>>(file: FileInfo, schema: Record<string, unknown>, options?: ExtractionOptions): Promise<ProviderResult<T>>;
274
+ supportsFileType(type: SupportedFileType): boolean;
275
+ /**
276
+ * Build the text extraction prompt
277
+ */
278
+ protected buildTextPrompt(options?: ExtractionOptions): string;
279
+ /**
280
+ * Build the JSON extraction prompt
281
+ */
282
+ protected buildJsonPrompt(schema: Record<string, unknown>, options?: ExtractionOptions): string;
283
+ /**
284
+ * Parse JSON response from AI, handling potential formatting issues
285
+ */
286
+ protected parseJsonResponse<T>(response: string): T;
287
+ }
288
+
289
+ declare class GeminiProvider extends BaseProvider {
290
+ readonly name: AIProvider;
291
+ readonly model: string;
292
+ private client;
293
+ constructor(apiKey: string, model?: string);
294
+ extractText(file: FileInfo, options?: ExtractionOptions): Promise<ProviderResult<string>>;
295
+ extractJson<T = Record<string, unknown>>(file: FileInfo, schema: Record<string, unknown>, options?: ExtractionOptions): Promise<ProviderResult<T>>;
296
+ private buildGenerationConfig;
297
+ private extractTokenUsage;
298
+ private buildContent;
299
+ }
300
+
301
+ declare class OpenAIProvider extends BaseProvider {
302
+ readonly name: AIProvider;
303
+ readonly model: string;
304
+ private client;
305
+ constructor(apiKey: string, model?: string);
306
+ extractText(file: FileInfo, options?: ExtractionOptions): Promise<ProviderResult<string>>;
307
+ extractJson<T = Record<string, unknown>>(file: FileInfo, schema: Record<string, unknown>, options?: ExtractionOptions): Promise<ProviderResult<T>>;
308
+ private buildCompletionOptions;
309
+ private extractTokenUsage;
310
+ private buildMessages;
311
+ }
312
+
313
+ declare class ClaudeProvider extends BaseProvider {
314
+ readonly name: AIProvider;
315
+ readonly model: string;
316
+ private client;
317
+ constructor(apiKey: string, model?: string);
318
+ extractText(file: FileInfo, options?: ExtractionOptions): Promise<ProviderResult<string>>;
319
+ extractJson<T = Record<string, unknown>>(file: FileInfo, schema: Record<string, unknown>, options?: ExtractionOptions): Promise<ProviderResult<T>>;
320
+ private buildMessageOptions;
321
+ supportsFileType(type: SupportedFileType): boolean;
322
+ private extractTokenUsage;
323
+ private buildContent;
324
+ private getMediaType;
325
+ }
326
+
327
+ declare class GrokProvider extends BaseProvider {
328
+ readonly name: AIProvider;
329
+ readonly model: string;
330
+ private client;
331
+ constructor(apiKey: string, model?: string);
332
+ extractText(file: FileInfo, options?: ExtractionOptions): Promise<ProviderResult<string>>;
333
+ extractJson<T = Record<string, unknown>>(file: FileInfo, schema: Record<string, unknown>, options?: ExtractionOptions): Promise<ProviderResult<T>>;
334
+ private buildCompletionOptions;
335
+ private extractTokenUsage;
336
+ private buildMessages;
337
+ }
338
+
339
+ interface VertexConfig {
340
+ project: string;
341
+ location: string;
342
+ }
343
+ declare class VertexProvider extends BaseProvider {
344
+ readonly name: AIProvider;
345
+ readonly model: string;
346
+ private client;
347
+ constructor(config: VertexConfig, model?: string);
348
+ extractText(file: FileInfo, options?: ExtractionOptions): Promise<ProviderResult<string>>;
349
+ extractJson<T = Record<string, unknown>>(file: FileInfo, schema: Record<string, unknown>, options?: ExtractionOptions): Promise<ProviderResult<T>>;
350
+ private buildGenerationConfig;
351
+ private extractTokenUsage;
352
+ private buildContents;
353
+ }
354
+
355
+ export { type AIProvider, BaseProvider, ClaudeProvider, ExtractaAI, type ExtractaConfig, type ExtractionError, type ExtractionMetadata, type ExtractionOptions, type ExtractionResult, type FileInfo, GeminiProvider, GrokProvider, type IAIProvider, type JsonExtractionResult, type ModelConfig, OpenAIProvider, type OutputFormat, type ProviderConfig, type SupportedFileType, type TextExtractionResult, type TokenUsage, type VertexConfig$1 as VertexConfig, VertexProvider, createExtractaAI, getSupportedExtensions, isExtensionSupported, isUrl, loadFile, loadFileFromBase64, loadFileFromBuffer, loadFileFromUrl, saveToFile };