ocr-ai 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +320 -0
- package/dist/index.d.mts +355 -0
- package/dist/index.d.ts +355 -0
- package/dist/index.js +1013 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +971 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +68 -0
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,971 @@
|
|
|
1
|
+
import { GoogleGenerativeAI } from '@google/generative-ai';
|
|
2
|
+
import OpenAI from 'openai';
|
|
3
|
+
import Anthropic from '@anthropic-ai/sdk';
|
|
4
|
+
import { GoogleGenAI } from '@google/genai';
|
|
5
|
+
import * as fs from 'fs/promises';
|
|
6
|
+
import * as path from 'path';
|
|
7
|
+
|
|
8
|
+
// src/providers/base.provider.ts
|
|
9
|
+
var BaseProvider = class {
|
|
10
|
+
apiKey;
|
|
11
|
+
constructor(apiKey) {
|
|
12
|
+
if (!apiKey || apiKey.trim() === "") {
|
|
13
|
+
throw new Error(`API key is required for ${this.constructor.name}`);
|
|
14
|
+
}
|
|
15
|
+
this.apiKey = apiKey;
|
|
16
|
+
}
|
|
17
|
+
supportsFileType(type) {
|
|
18
|
+
return ["pdf", "image", "text"].includes(type);
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Build the text extraction prompt
|
|
22
|
+
*/
|
|
23
|
+
buildTextPrompt(options) {
|
|
24
|
+
const basePrompt = options?.prompt || "Extract all text content from this document.";
|
|
25
|
+
const languageHint = options?.language && options.language !== "auto" ? ` Respond in ${options.language}.` : "";
|
|
26
|
+
return `${basePrompt}${languageHint}
|
|
27
|
+
|
|
28
|
+
Please extract and return all the text content from the provided document.
|
|
29
|
+
Maintain the original structure and formatting as much as possible.
|
|
30
|
+
Return only the extracted text, without any additional commentary or metadata.`;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Build the JSON extraction prompt
|
|
34
|
+
*/
|
|
35
|
+
buildJsonPrompt(schema, options) {
|
|
36
|
+
const basePrompt = options?.prompt || "Extract structured data from this document.";
|
|
37
|
+
const languageHint = options?.language && options.language !== "auto" ? ` Text content should be in ${options.language}.` : "";
|
|
38
|
+
return `${basePrompt}${languageHint}
|
|
39
|
+
|
|
40
|
+
Extract data from the provided document and return it as a JSON object following this schema:
|
|
41
|
+
|
|
42
|
+
${JSON.stringify(schema, null, 2)}
|
|
43
|
+
|
|
44
|
+
Important:
|
|
45
|
+
- Return ONLY valid JSON, no additional text or markdown formatting
|
|
46
|
+
- Follow the schema structure exactly
|
|
47
|
+
- If a field cannot be extracted, use null
|
|
48
|
+
- Do not include any explanation, just the JSON object`;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Parse JSON response from AI, handling potential formatting issues
|
|
52
|
+
*/
|
|
53
|
+
parseJsonResponse(response) {
|
|
54
|
+
let cleaned = response.trim();
|
|
55
|
+
if (cleaned.startsWith("```json")) {
|
|
56
|
+
cleaned = cleaned.slice(7);
|
|
57
|
+
} else if (cleaned.startsWith("```")) {
|
|
58
|
+
cleaned = cleaned.slice(3);
|
|
59
|
+
}
|
|
60
|
+
if (cleaned.endsWith("```")) {
|
|
61
|
+
cleaned = cleaned.slice(0, -3);
|
|
62
|
+
}
|
|
63
|
+
cleaned = cleaned.trim();
|
|
64
|
+
try {
|
|
65
|
+
return JSON.parse(cleaned);
|
|
66
|
+
} catch {
|
|
67
|
+
throw new Error(`Failed to parse JSON response: ${response.substring(0, 200)}...`);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
};
|
|
71
|
+
var DEFAULT_MODEL = "gemini-1.5-flash";
|
|
72
|
+
var GeminiProvider = class extends BaseProvider {
|
|
73
|
+
name = "gemini";
|
|
74
|
+
model;
|
|
75
|
+
client;
|
|
76
|
+
constructor(apiKey, model) {
|
|
77
|
+
super(apiKey);
|
|
78
|
+
this.model = model || DEFAULT_MODEL;
|
|
79
|
+
this.client = new GoogleGenerativeAI(apiKey);
|
|
80
|
+
}
|
|
81
|
+
async extractText(file, options) {
|
|
82
|
+
const generationConfig = this.buildGenerationConfig(options?.modelConfig);
|
|
83
|
+
const model = this.client.getGenerativeModel({
|
|
84
|
+
model: this.model,
|
|
85
|
+
generationConfig
|
|
86
|
+
});
|
|
87
|
+
const prompt = this.buildTextPrompt(options);
|
|
88
|
+
const content = this.buildContent(file, prompt);
|
|
89
|
+
const result = await model.generateContent(content);
|
|
90
|
+
const response = result.response;
|
|
91
|
+
const tokens = this.extractTokenUsage(response);
|
|
92
|
+
return {
|
|
93
|
+
content: response.text(),
|
|
94
|
+
tokens
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
async extractJson(file, schema, options) {
|
|
98
|
+
const generationConfig = this.buildGenerationConfig(options?.modelConfig, {
|
|
99
|
+
responseMimeType: "application/json"
|
|
100
|
+
});
|
|
101
|
+
const model = this.client.getGenerativeModel({
|
|
102
|
+
model: this.model,
|
|
103
|
+
generationConfig
|
|
104
|
+
});
|
|
105
|
+
const prompt = this.buildJsonPrompt(schema, options);
|
|
106
|
+
const content = this.buildContent(file, prompt);
|
|
107
|
+
const result = await model.generateContent(content);
|
|
108
|
+
const response = result.response;
|
|
109
|
+
const text = response.text();
|
|
110
|
+
const tokens = this.extractTokenUsage(response);
|
|
111
|
+
return {
|
|
112
|
+
content: this.parseJsonResponse(text),
|
|
113
|
+
tokens
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
buildGenerationConfig(modelConfig, defaults) {
|
|
117
|
+
const config = { ...defaults };
|
|
118
|
+
if (modelConfig?.temperature !== void 0) {
|
|
119
|
+
config.temperature = modelConfig.temperature;
|
|
120
|
+
}
|
|
121
|
+
if (modelConfig?.maxTokens !== void 0) {
|
|
122
|
+
config.maxOutputTokens = modelConfig.maxTokens;
|
|
123
|
+
}
|
|
124
|
+
if (modelConfig?.topP !== void 0) {
|
|
125
|
+
config.topP = modelConfig.topP;
|
|
126
|
+
}
|
|
127
|
+
if (modelConfig?.topK !== void 0) {
|
|
128
|
+
config.topK = modelConfig.topK;
|
|
129
|
+
}
|
|
130
|
+
if (modelConfig?.stopSequences !== void 0) {
|
|
131
|
+
config.stopSequences = modelConfig.stopSequences;
|
|
132
|
+
}
|
|
133
|
+
return config;
|
|
134
|
+
}
|
|
135
|
+
extractTokenUsage(response) {
|
|
136
|
+
const usage = response.usageMetadata;
|
|
137
|
+
if (!usage) return void 0;
|
|
138
|
+
return {
|
|
139
|
+
inputTokens: usage.promptTokenCount || 0,
|
|
140
|
+
outputTokens: usage.candidatesTokenCount || 0,
|
|
141
|
+
totalTokens: usage.totalTokenCount || 0
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
buildContent(file, prompt) {
|
|
145
|
+
if (file.type === "text") {
|
|
146
|
+
return `${prompt}
|
|
147
|
+
|
|
148
|
+
Document content:
|
|
149
|
+
${file.content.toString("utf-8")}`;
|
|
150
|
+
}
|
|
151
|
+
return [
|
|
152
|
+
{
|
|
153
|
+
inlineData: {
|
|
154
|
+
mimeType: file.mimeType,
|
|
155
|
+
data: file.base64 || file.content.toString("base64")
|
|
156
|
+
}
|
|
157
|
+
},
|
|
158
|
+
{ text: prompt }
|
|
159
|
+
];
|
|
160
|
+
}
|
|
161
|
+
};
|
|
162
|
+
var DEFAULT_MODEL2 = "gpt-4o";
|
|
163
|
+
var OpenAIProvider = class extends BaseProvider {
|
|
164
|
+
name = "openai";
|
|
165
|
+
model;
|
|
166
|
+
client;
|
|
167
|
+
constructor(apiKey, model) {
|
|
168
|
+
super(apiKey);
|
|
169
|
+
this.model = model || DEFAULT_MODEL2;
|
|
170
|
+
this.client = new OpenAI({ apiKey });
|
|
171
|
+
}
|
|
172
|
+
async extractText(file, options) {
|
|
173
|
+
const prompt = this.buildTextPrompt(options);
|
|
174
|
+
const messages = this.buildMessages(file, prompt);
|
|
175
|
+
const completionOptions = this.buildCompletionOptions(options?.modelConfig);
|
|
176
|
+
const response = await this.client.chat.completions.create({
|
|
177
|
+
model: this.model,
|
|
178
|
+
messages,
|
|
179
|
+
...completionOptions
|
|
180
|
+
});
|
|
181
|
+
const tokens = this.extractTokenUsage(response);
|
|
182
|
+
return {
|
|
183
|
+
content: response.choices[0]?.message?.content || "",
|
|
184
|
+
tokens
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
async extractJson(file, schema, options) {
|
|
188
|
+
const prompt = this.buildJsonPrompt(schema, options);
|
|
189
|
+
const messages = this.buildMessages(file, prompt);
|
|
190
|
+
const completionOptions = this.buildCompletionOptions(options?.modelConfig);
|
|
191
|
+
const response = await this.client.chat.completions.create({
|
|
192
|
+
model: this.model,
|
|
193
|
+
messages,
|
|
194
|
+
...completionOptions,
|
|
195
|
+
response_format: { type: "json_object" }
|
|
196
|
+
});
|
|
197
|
+
const text = response.choices[0]?.message?.content || "{}";
|
|
198
|
+
const tokens = this.extractTokenUsage(response);
|
|
199
|
+
return {
|
|
200
|
+
content: this.parseJsonResponse(text),
|
|
201
|
+
tokens
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
buildCompletionOptions(modelConfig) {
|
|
205
|
+
const options = {
|
|
206
|
+
max_tokens: 16384
|
|
207
|
+
};
|
|
208
|
+
if (modelConfig?.temperature !== void 0) {
|
|
209
|
+
options.temperature = modelConfig.temperature;
|
|
210
|
+
}
|
|
211
|
+
if (modelConfig?.maxTokens !== void 0) {
|
|
212
|
+
options.max_tokens = modelConfig.maxTokens;
|
|
213
|
+
}
|
|
214
|
+
if (modelConfig?.topP !== void 0) {
|
|
215
|
+
options.top_p = modelConfig.topP;
|
|
216
|
+
}
|
|
217
|
+
if (modelConfig?.stopSequences !== void 0) {
|
|
218
|
+
options.stop = modelConfig.stopSequences;
|
|
219
|
+
}
|
|
220
|
+
return options;
|
|
221
|
+
}
|
|
222
|
+
extractTokenUsage(response) {
|
|
223
|
+
const usage = response.usage;
|
|
224
|
+
if (!usage) return void 0;
|
|
225
|
+
return {
|
|
226
|
+
inputTokens: usage.prompt_tokens,
|
|
227
|
+
outputTokens: usage.completion_tokens,
|
|
228
|
+
totalTokens: usage.total_tokens
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
buildMessages(file, prompt) {
|
|
232
|
+
if (file.type === "text") {
|
|
233
|
+
return [
|
|
234
|
+
{
|
|
235
|
+
role: "user",
|
|
236
|
+
content: `${prompt}
|
|
237
|
+
|
|
238
|
+
Document content:
|
|
239
|
+
${file.content.toString("utf-8")}`
|
|
240
|
+
}
|
|
241
|
+
];
|
|
242
|
+
}
|
|
243
|
+
const base64 = file.base64 || file.content.toString("base64");
|
|
244
|
+
const imageUrl = `data:${file.mimeType};base64,${base64}`;
|
|
245
|
+
return [
|
|
246
|
+
{
|
|
247
|
+
role: "user",
|
|
248
|
+
content: [
|
|
249
|
+
{
|
|
250
|
+
type: "image_url",
|
|
251
|
+
image_url: {
|
|
252
|
+
url: imageUrl,
|
|
253
|
+
detail: "high"
|
|
254
|
+
}
|
|
255
|
+
},
|
|
256
|
+
{
|
|
257
|
+
type: "text",
|
|
258
|
+
text: prompt
|
|
259
|
+
}
|
|
260
|
+
]
|
|
261
|
+
}
|
|
262
|
+
];
|
|
263
|
+
}
|
|
264
|
+
};
|
|
265
|
+
var DEFAULT_MODEL3 = "claude-sonnet-4-20250514";
|
|
266
|
+
var ClaudeProvider = class extends BaseProvider {
|
|
267
|
+
name = "claude";
|
|
268
|
+
model;
|
|
269
|
+
client;
|
|
270
|
+
constructor(apiKey, model) {
|
|
271
|
+
super(apiKey);
|
|
272
|
+
this.model = model || DEFAULT_MODEL3;
|
|
273
|
+
this.client = new Anthropic({ apiKey });
|
|
274
|
+
}
|
|
275
|
+
async extractText(file, options) {
|
|
276
|
+
const prompt = this.buildTextPrompt(options);
|
|
277
|
+
const messageOptions = this.buildMessageOptions(options?.modelConfig);
|
|
278
|
+
const response = await this.client.messages.create({
|
|
279
|
+
model: this.model,
|
|
280
|
+
messages: [
|
|
281
|
+
{
|
|
282
|
+
role: "user",
|
|
283
|
+
content: this.buildContent(file, prompt)
|
|
284
|
+
}
|
|
285
|
+
],
|
|
286
|
+
max_tokens: messageOptions.max_tokens,
|
|
287
|
+
temperature: messageOptions.temperature,
|
|
288
|
+
top_p: messageOptions.top_p,
|
|
289
|
+
top_k: messageOptions.top_k,
|
|
290
|
+
stop_sequences: messageOptions.stop_sequences
|
|
291
|
+
});
|
|
292
|
+
const textBlock = response.content.find((block) => block.type === "text");
|
|
293
|
+
const tokens = this.extractTokenUsage(response);
|
|
294
|
+
return {
|
|
295
|
+
content: textBlock?.text || "",
|
|
296
|
+
tokens
|
|
297
|
+
};
|
|
298
|
+
}
|
|
299
|
+
async extractJson(file, schema, options) {
|
|
300
|
+
const prompt = this.buildJsonPrompt(schema, options);
|
|
301
|
+
const messageOptions = this.buildMessageOptions(options?.modelConfig);
|
|
302
|
+
const response = await this.client.messages.create({
|
|
303
|
+
model: this.model,
|
|
304
|
+
messages: [
|
|
305
|
+
{
|
|
306
|
+
role: "user",
|
|
307
|
+
content: this.buildContent(file, prompt)
|
|
308
|
+
}
|
|
309
|
+
],
|
|
310
|
+
max_tokens: messageOptions.max_tokens,
|
|
311
|
+
temperature: messageOptions.temperature,
|
|
312
|
+
top_p: messageOptions.top_p,
|
|
313
|
+
top_k: messageOptions.top_k,
|
|
314
|
+
stop_sequences: messageOptions.stop_sequences
|
|
315
|
+
});
|
|
316
|
+
const textBlock = response.content.find((block) => block.type === "text");
|
|
317
|
+
const text = textBlock?.text || "{}";
|
|
318
|
+
const tokens = this.extractTokenUsage(response);
|
|
319
|
+
return {
|
|
320
|
+
content: this.parseJsonResponse(text),
|
|
321
|
+
tokens
|
|
322
|
+
};
|
|
323
|
+
}
|
|
324
|
+
buildMessageOptions(modelConfig) {
|
|
325
|
+
const options = {
|
|
326
|
+
max_tokens: 16384
|
|
327
|
+
};
|
|
328
|
+
if (modelConfig?.temperature !== void 0) {
|
|
329
|
+
options.temperature = modelConfig.temperature;
|
|
330
|
+
}
|
|
331
|
+
if (modelConfig?.maxTokens !== void 0) {
|
|
332
|
+
options.max_tokens = modelConfig.maxTokens;
|
|
333
|
+
}
|
|
334
|
+
if (modelConfig?.topP !== void 0) {
|
|
335
|
+
options.top_p = modelConfig.topP;
|
|
336
|
+
}
|
|
337
|
+
if (modelConfig?.topK !== void 0) {
|
|
338
|
+
options.top_k = modelConfig.topK;
|
|
339
|
+
}
|
|
340
|
+
if (modelConfig?.stopSequences !== void 0) {
|
|
341
|
+
options.stop_sequences = modelConfig.stopSequences;
|
|
342
|
+
}
|
|
343
|
+
return options;
|
|
344
|
+
}
|
|
345
|
+
supportsFileType(type) {
|
|
346
|
+
return ["pdf", "image", "text"].includes(type);
|
|
347
|
+
}
|
|
348
|
+
extractTokenUsage(response) {
|
|
349
|
+
return {
|
|
350
|
+
inputTokens: response.usage.input_tokens,
|
|
351
|
+
outputTokens: response.usage.output_tokens,
|
|
352
|
+
totalTokens: response.usage.input_tokens + response.usage.output_tokens
|
|
353
|
+
};
|
|
354
|
+
}
|
|
355
|
+
buildContent(file, prompt) {
|
|
356
|
+
if (file.type === "text") {
|
|
357
|
+
return `${prompt}
|
|
358
|
+
|
|
359
|
+
Document content:
|
|
360
|
+
${file.content.toString("utf-8")}`;
|
|
361
|
+
}
|
|
362
|
+
const base64 = file.base64 || file.content.toString("base64");
|
|
363
|
+
if (file.type === "pdf") {
|
|
364
|
+
return [
|
|
365
|
+
{
|
|
366
|
+
type: "document",
|
|
367
|
+
source: {
|
|
368
|
+
type: "base64",
|
|
369
|
+
media_type: "application/pdf",
|
|
370
|
+
data: base64
|
|
371
|
+
}
|
|
372
|
+
},
|
|
373
|
+
{
|
|
374
|
+
type: "text",
|
|
375
|
+
text: prompt
|
|
376
|
+
}
|
|
377
|
+
];
|
|
378
|
+
}
|
|
379
|
+
return [
|
|
380
|
+
{
|
|
381
|
+
type: "image",
|
|
382
|
+
source: {
|
|
383
|
+
type: "base64",
|
|
384
|
+
media_type: this.getMediaType(file.mimeType),
|
|
385
|
+
data: base64
|
|
386
|
+
}
|
|
387
|
+
},
|
|
388
|
+
{
|
|
389
|
+
type: "text",
|
|
390
|
+
text: prompt
|
|
391
|
+
}
|
|
392
|
+
];
|
|
393
|
+
}
|
|
394
|
+
getMediaType(mimeType) {
|
|
395
|
+
const supportedTypes = [
|
|
396
|
+
"image/jpeg",
|
|
397
|
+
"image/png",
|
|
398
|
+
"image/gif",
|
|
399
|
+
"image/webp"
|
|
400
|
+
];
|
|
401
|
+
if (supportedTypes.includes(mimeType)) {
|
|
402
|
+
return mimeType;
|
|
403
|
+
}
|
|
404
|
+
return "image/jpeg";
|
|
405
|
+
}
|
|
406
|
+
};
|
|
407
|
+
var GROK_BASE_URL = "https://api.x.ai/v1";
|
|
408
|
+
var DEFAULT_MODEL4 = "grok-2-vision-1212";
|
|
409
|
+
var GrokProvider = class extends BaseProvider {
|
|
410
|
+
name = "grok";
|
|
411
|
+
model;
|
|
412
|
+
client;
|
|
413
|
+
constructor(apiKey, model) {
|
|
414
|
+
super(apiKey);
|
|
415
|
+
this.model = model || DEFAULT_MODEL4;
|
|
416
|
+
this.client = new OpenAI({
|
|
417
|
+
apiKey,
|
|
418
|
+
baseURL: GROK_BASE_URL
|
|
419
|
+
});
|
|
420
|
+
}
|
|
421
|
+
async extractText(file, options) {
|
|
422
|
+
const prompt = this.buildTextPrompt(options);
|
|
423
|
+
const messages = this.buildMessages(file, prompt);
|
|
424
|
+
const completionOptions = this.buildCompletionOptions(options?.modelConfig);
|
|
425
|
+
const response = await this.client.chat.completions.create({
|
|
426
|
+
model: this.model,
|
|
427
|
+
messages,
|
|
428
|
+
...completionOptions
|
|
429
|
+
});
|
|
430
|
+
const tokens = this.extractTokenUsage(response);
|
|
431
|
+
return {
|
|
432
|
+
content: response.choices[0]?.message?.content || "",
|
|
433
|
+
tokens
|
|
434
|
+
};
|
|
435
|
+
}
|
|
436
|
+
async extractJson(file, schema, options) {
|
|
437
|
+
const prompt = this.buildJsonPrompt(schema, options);
|
|
438
|
+
const messages = this.buildMessages(file, prompt);
|
|
439
|
+
const completionOptions = this.buildCompletionOptions(options?.modelConfig);
|
|
440
|
+
const response = await this.client.chat.completions.create({
|
|
441
|
+
model: this.model,
|
|
442
|
+
messages,
|
|
443
|
+
...completionOptions
|
|
444
|
+
});
|
|
445
|
+
const text = response.choices[0]?.message?.content || "{}";
|
|
446
|
+
const tokens = this.extractTokenUsage(response);
|
|
447
|
+
return {
|
|
448
|
+
content: this.parseJsonResponse(text),
|
|
449
|
+
tokens
|
|
450
|
+
};
|
|
451
|
+
}
|
|
452
|
+
buildCompletionOptions(modelConfig) {
|
|
453
|
+
const options = {
|
|
454
|
+
max_tokens: 16384
|
|
455
|
+
};
|
|
456
|
+
if (modelConfig?.temperature !== void 0) {
|
|
457
|
+
options.temperature = modelConfig.temperature;
|
|
458
|
+
}
|
|
459
|
+
if (modelConfig?.maxTokens !== void 0) {
|
|
460
|
+
options.max_tokens = modelConfig.maxTokens;
|
|
461
|
+
}
|
|
462
|
+
if (modelConfig?.topP !== void 0) {
|
|
463
|
+
options.top_p = modelConfig.topP;
|
|
464
|
+
}
|
|
465
|
+
if (modelConfig?.stopSequences !== void 0) {
|
|
466
|
+
options.stop = modelConfig.stopSequences;
|
|
467
|
+
}
|
|
468
|
+
return options;
|
|
469
|
+
}
|
|
470
|
+
extractTokenUsage(response) {
|
|
471
|
+
const usage = response.usage;
|
|
472
|
+
if (!usage) return void 0;
|
|
473
|
+
return {
|
|
474
|
+
inputTokens: usage.prompt_tokens,
|
|
475
|
+
outputTokens: usage.completion_tokens,
|
|
476
|
+
totalTokens: usage.total_tokens
|
|
477
|
+
};
|
|
478
|
+
}
|
|
479
|
+
buildMessages(file, prompt) {
|
|
480
|
+
if (file.type === "text") {
|
|
481
|
+
return [
|
|
482
|
+
{
|
|
483
|
+
role: "user",
|
|
484
|
+
content: `${prompt}
|
|
485
|
+
|
|
486
|
+
Document content:
|
|
487
|
+
${file.content.toString("utf-8")}`
|
|
488
|
+
}
|
|
489
|
+
];
|
|
490
|
+
}
|
|
491
|
+
const base64 = file.base64 || file.content.toString("base64");
|
|
492
|
+
const imageUrl = `data:${file.mimeType};base64,${base64}`;
|
|
493
|
+
return [
|
|
494
|
+
{
|
|
495
|
+
role: "user",
|
|
496
|
+
content: [
|
|
497
|
+
{
|
|
498
|
+
type: "image_url",
|
|
499
|
+
image_url: {
|
|
500
|
+
url: imageUrl,
|
|
501
|
+
detail: "high"
|
|
502
|
+
}
|
|
503
|
+
},
|
|
504
|
+
{
|
|
505
|
+
type: "text",
|
|
506
|
+
text: prompt
|
|
507
|
+
}
|
|
508
|
+
]
|
|
509
|
+
}
|
|
510
|
+
];
|
|
511
|
+
}
|
|
512
|
+
};
|
|
513
|
+
var DEFAULT_MODEL5 = "gemini-2.0-flash";
|
|
514
|
+
var VertexProvider = class extends BaseProvider {
|
|
515
|
+
name = "vertex";
|
|
516
|
+
model;
|
|
517
|
+
client;
|
|
518
|
+
constructor(config, model) {
|
|
519
|
+
super("vertex");
|
|
520
|
+
this.model = model || DEFAULT_MODEL5;
|
|
521
|
+
this.client = new GoogleGenAI({
|
|
522
|
+
vertexai: true,
|
|
523
|
+
project: config.project,
|
|
524
|
+
location: config.location
|
|
525
|
+
});
|
|
526
|
+
}
|
|
527
|
+
async extractText(file, options) {
|
|
528
|
+
const prompt = this.buildTextPrompt(options);
|
|
529
|
+
const generationConfig = this.buildGenerationConfig(options?.modelConfig);
|
|
530
|
+
const response = await this.client.models.generateContent({
|
|
531
|
+
model: this.model,
|
|
532
|
+
contents: this.buildContents(file, prompt),
|
|
533
|
+
config: generationConfig
|
|
534
|
+
});
|
|
535
|
+
const tokens = this.extractTokenUsage(response);
|
|
536
|
+
return {
|
|
537
|
+
content: response.text || "",
|
|
538
|
+
tokens
|
|
539
|
+
};
|
|
540
|
+
}
|
|
541
|
+
async extractJson(file, schema, options) {
|
|
542
|
+
const prompt = this.buildJsonPrompt(schema, options);
|
|
543
|
+
const generationConfig = this.buildGenerationConfig(options?.modelConfig, {
|
|
544
|
+
responseMimeType: "application/json"
|
|
545
|
+
});
|
|
546
|
+
const response = await this.client.models.generateContent({
|
|
547
|
+
model: this.model,
|
|
548
|
+
contents: this.buildContents(file, prompt),
|
|
549
|
+
config: generationConfig
|
|
550
|
+
});
|
|
551
|
+
const text = response.text || "{}";
|
|
552
|
+
const tokens = this.extractTokenUsage(response);
|
|
553
|
+
return {
|
|
554
|
+
content: this.parseJsonResponse(text),
|
|
555
|
+
tokens
|
|
556
|
+
};
|
|
557
|
+
}
|
|
558
|
+
buildGenerationConfig(modelConfig, defaults) {
|
|
559
|
+
const config = { ...defaults };
|
|
560
|
+
if (modelConfig?.temperature !== void 0) {
|
|
561
|
+
config.temperature = modelConfig.temperature;
|
|
562
|
+
}
|
|
563
|
+
if (modelConfig?.maxTokens !== void 0) {
|
|
564
|
+
config.maxOutputTokens = modelConfig.maxTokens;
|
|
565
|
+
}
|
|
566
|
+
if (modelConfig?.topP !== void 0) {
|
|
567
|
+
config.topP = modelConfig.topP;
|
|
568
|
+
}
|
|
569
|
+
if (modelConfig?.topK !== void 0) {
|
|
570
|
+
config.topK = modelConfig.topK;
|
|
571
|
+
}
|
|
572
|
+
if (modelConfig?.stopSequences !== void 0) {
|
|
573
|
+
config.stopSequences = modelConfig.stopSequences;
|
|
574
|
+
}
|
|
575
|
+
return config;
|
|
576
|
+
}
|
|
577
|
+
extractTokenUsage(response) {
|
|
578
|
+
const usage = response.usageMetadata;
|
|
579
|
+
if (!usage) return void 0;
|
|
580
|
+
return {
|
|
581
|
+
inputTokens: usage.promptTokenCount || 0,
|
|
582
|
+
outputTokens: usage.candidatesTokenCount || 0,
|
|
583
|
+
totalTokens: usage.totalTokenCount || 0
|
|
584
|
+
};
|
|
585
|
+
}
|
|
586
|
+
buildContents(file, prompt) {
|
|
587
|
+
if (file.type === "text") {
|
|
588
|
+
return [
|
|
589
|
+
{
|
|
590
|
+
role: "user",
|
|
591
|
+
parts: [
|
|
592
|
+
{ text: `${prompt}
|
|
593
|
+
|
|
594
|
+
Document content:
|
|
595
|
+
${file.content.toString("utf-8")}` }
|
|
596
|
+
]
|
|
597
|
+
}
|
|
598
|
+
];
|
|
599
|
+
}
|
|
600
|
+
const base64 = file.base64 || file.content.toString("base64");
|
|
601
|
+
return [
|
|
602
|
+
{
|
|
603
|
+
role: "user",
|
|
604
|
+
parts: [
|
|
605
|
+
{
|
|
606
|
+
inlineData: {
|
|
607
|
+
mimeType: file.mimeType,
|
|
608
|
+
data: base64
|
|
609
|
+
}
|
|
610
|
+
},
|
|
611
|
+
{ text: prompt }
|
|
612
|
+
]
|
|
613
|
+
}
|
|
614
|
+
];
|
|
615
|
+
}
|
|
616
|
+
};
|
|
617
|
+
var MIME_TO_FILE_TYPE = {
|
|
618
|
+
"application/pdf": "pdf",
|
|
619
|
+
"image/jpeg": "image",
|
|
620
|
+
"image/png": "image",
|
|
621
|
+
"image/gif": "image",
|
|
622
|
+
"image/webp": "image",
|
|
623
|
+
"image/bmp": "image",
|
|
624
|
+
"image/tiff": "image",
|
|
625
|
+
"text/plain": "text",
|
|
626
|
+
"text/markdown": "text",
|
|
627
|
+
"text/csv": "text",
|
|
628
|
+
"application/json": "text",
|
|
629
|
+
"application/xml": "text",
|
|
630
|
+
"text/html": "text"
|
|
631
|
+
};
|
|
632
|
+
var MIME_TYPES = {
|
|
633
|
+
// PDF
|
|
634
|
+
".pdf": "application/pdf",
|
|
635
|
+
// Images
|
|
636
|
+
".jpg": "image/jpeg",
|
|
637
|
+
".jpeg": "image/jpeg",
|
|
638
|
+
".png": "image/png",
|
|
639
|
+
".gif": "image/gif",
|
|
640
|
+
".webp": "image/webp",
|
|
641
|
+
".bmp": "image/bmp",
|
|
642
|
+
".tiff": "image/tiff",
|
|
643
|
+
".tif": "image/tiff",
|
|
644
|
+
// Text files
|
|
645
|
+
".txt": "text/plain",
|
|
646
|
+
".md": "text/markdown",
|
|
647
|
+
".csv": "text/csv",
|
|
648
|
+
".json": "application/json",
|
|
649
|
+
".xml": "application/xml",
|
|
650
|
+
".html": "text/html",
|
|
651
|
+
".htm": "text/html"
|
|
652
|
+
};
|
|
653
|
+
var FILE_TYPE_CATEGORIES = {
|
|
654
|
+
".pdf": "pdf",
|
|
655
|
+
".jpg": "image",
|
|
656
|
+
".jpeg": "image",
|
|
657
|
+
".png": "image",
|
|
658
|
+
".gif": "image",
|
|
659
|
+
".webp": "image",
|
|
660
|
+
".bmp": "image",
|
|
661
|
+
".tiff": "image",
|
|
662
|
+
".tif": "image",
|
|
663
|
+
".txt": "text",
|
|
664
|
+
".md": "text",
|
|
665
|
+
".csv": "text",
|
|
666
|
+
".json": "text",
|
|
667
|
+
".xml": "text",
|
|
668
|
+
".html": "text",
|
|
669
|
+
".htm": "text"
|
|
670
|
+
};
|
|
671
|
+
async function loadFile(filePath) {
|
|
672
|
+
const absolutePath = path.resolve(filePath);
|
|
673
|
+
try {
|
|
674
|
+
await fs.access(absolutePath);
|
|
675
|
+
} catch {
|
|
676
|
+
throw new Error(`File not found: ${absolutePath}`);
|
|
677
|
+
}
|
|
678
|
+
const stats = await fs.stat(absolutePath);
|
|
679
|
+
if (!stats.isFile()) {
|
|
680
|
+
throw new Error(`Path is not a file: ${absolutePath}`);
|
|
681
|
+
}
|
|
682
|
+
const ext = path.extname(absolutePath).toLowerCase();
|
|
683
|
+
const fileName = path.basename(absolutePath);
|
|
684
|
+
const mimeType = MIME_TYPES[ext];
|
|
685
|
+
const fileType = FILE_TYPE_CATEGORIES[ext];
|
|
686
|
+
if (!mimeType || !fileType) {
|
|
687
|
+
throw new Error(
|
|
688
|
+
`Unsupported file type: ${ext}. Supported types: ${Object.keys(MIME_TYPES).join(", ")}`
|
|
689
|
+
);
|
|
690
|
+
}
|
|
691
|
+
const content = await fs.readFile(absolutePath);
|
|
692
|
+
const base64 = fileType !== "text" ? content.toString("base64") : void 0;
|
|
693
|
+
return {
|
|
694
|
+
path: absolutePath,
|
|
695
|
+
name: fileName,
|
|
696
|
+
type: fileType,
|
|
697
|
+
mimeType,
|
|
698
|
+
size: stats.size,
|
|
699
|
+
content,
|
|
700
|
+
base64
|
|
701
|
+
};
|
|
702
|
+
}
|
|
703
|
+
function loadFileFromBuffer(buffer, fileName, mimeType) {
|
|
704
|
+
const ext = path.extname(fileName).toLowerCase();
|
|
705
|
+
const detectedMimeType = mimeType || MIME_TYPES[ext];
|
|
706
|
+
const fileType = FILE_TYPE_CATEGORIES[ext];
|
|
707
|
+
if (!detectedMimeType || !fileType) {
|
|
708
|
+
throw new Error(
|
|
709
|
+
`Unsupported file type: ${ext}. Supported types: ${Object.keys(MIME_TYPES).join(", ")}`
|
|
710
|
+
);
|
|
711
|
+
}
|
|
712
|
+
const base64 = fileType !== "text" ? buffer.toString("base64") : void 0;
|
|
713
|
+
return {
|
|
714
|
+
path: "",
|
|
715
|
+
name: fileName,
|
|
716
|
+
type: fileType,
|
|
717
|
+
mimeType: detectedMimeType,
|
|
718
|
+
size: buffer.length,
|
|
719
|
+
content: buffer,
|
|
720
|
+
base64
|
|
721
|
+
};
|
|
722
|
+
}
|
|
723
|
+
function loadFileFromBase64(base64, fileName, mimeType) {
|
|
724
|
+
const base64Data = base64.includes(",") ? base64.split(",")[1] : base64;
|
|
725
|
+
const buffer = Buffer.from(base64Data, "base64");
|
|
726
|
+
const ext = path.extname(fileName).toLowerCase();
|
|
727
|
+
const detectedMimeType = mimeType || MIME_TYPES[ext];
|
|
728
|
+
const fileType = FILE_TYPE_CATEGORIES[ext];
|
|
729
|
+
if (!detectedMimeType || !fileType) {
|
|
730
|
+
throw new Error(
|
|
731
|
+
`Unsupported file type: ${ext}. Supported types: ${Object.keys(MIME_TYPES).join(", ")}`
|
|
732
|
+
);
|
|
733
|
+
}
|
|
734
|
+
return {
|
|
735
|
+
path: "",
|
|
736
|
+
name: fileName,
|
|
737
|
+
type: fileType,
|
|
738
|
+
mimeType: detectedMimeType,
|
|
739
|
+
size: buffer.length,
|
|
740
|
+
content: buffer,
|
|
741
|
+
base64: base64Data
|
|
742
|
+
};
|
|
743
|
+
}
|
|
744
|
+
async function saveToFile(filePath, content) {
|
|
745
|
+
const absolutePath = path.resolve(filePath);
|
|
746
|
+
const dir = path.dirname(absolutePath);
|
|
747
|
+
await fs.mkdir(dir, { recursive: true });
|
|
748
|
+
await fs.writeFile(absolutePath, content, typeof content === "string" ? "utf-8" : void 0);
|
|
749
|
+
}
|
|
750
|
+
function getSupportedExtensions() {
|
|
751
|
+
return Object.keys(MIME_TYPES);
|
|
752
|
+
}
|
|
753
|
+
function isExtensionSupported(ext) {
|
|
754
|
+
const normalizedExt = ext.startsWith(".") ? ext.toLowerCase() : `.${ext.toLowerCase()}`;
|
|
755
|
+
return normalizedExt in MIME_TYPES;
|
|
756
|
+
}
|
|
757
|
+
function isUrl(str) {
|
|
758
|
+
return str.startsWith("http://") || str.startsWith("https://");
|
|
759
|
+
}
|
|
760
|
+
async function loadFileFromUrl(url) {
|
|
761
|
+
const response = await fetch(url);
|
|
762
|
+
if (!response.ok) {
|
|
763
|
+
throw new Error(`Failed to fetch URL: ${response.status} ${response.statusText}`);
|
|
764
|
+
}
|
|
765
|
+
const contentType = response.headers.get("content-type")?.split(";")[0] || "";
|
|
766
|
+
const buffer = Buffer.from(await response.arrayBuffer());
|
|
767
|
+
const contentDisposition = response.headers.get("content-disposition");
|
|
768
|
+
let fileName = "";
|
|
769
|
+
if (contentDisposition) {
|
|
770
|
+
const match = contentDisposition.match(/filename[^;=\n]*=((['"]).*?\2|[^;\n]*)/);
|
|
771
|
+
if (match) {
|
|
772
|
+
fileName = match[1].replace(/['"]/g, "");
|
|
773
|
+
}
|
|
774
|
+
}
|
|
775
|
+
if (!fileName) {
|
|
776
|
+
const urlPath = new URL(url).pathname;
|
|
777
|
+
fileName = path.basename(urlPath) || "download";
|
|
778
|
+
}
|
|
779
|
+
let fileType = MIME_TO_FILE_TYPE[contentType];
|
|
780
|
+
let mimeType = contentType;
|
|
781
|
+
if (!fileType) {
|
|
782
|
+
const ext = path.extname(fileName).toLowerCase();
|
|
783
|
+
mimeType = MIME_TYPES[ext];
|
|
784
|
+
fileType = FILE_TYPE_CATEGORIES[ext];
|
|
785
|
+
}
|
|
786
|
+
if (!fileType || !mimeType) {
|
|
787
|
+
throw new Error(
|
|
788
|
+
`Unsupported file type from URL. Content-Type: ${contentType}, Filename: ${fileName}`
|
|
789
|
+
);
|
|
790
|
+
}
|
|
791
|
+
const base64 = fileType !== "text" ? buffer.toString("base64") : void 0;
|
|
792
|
+
return {
|
|
793
|
+
path: url,
|
|
794
|
+
name: fileName,
|
|
795
|
+
type: fileType,
|
|
796
|
+
mimeType,
|
|
797
|
+
size: buffer.length,
|
|
798
|
+
content: buffer,
|
|
799
|
+
base64
|
|
800
|
+
};
|
|
801
|
+
}
|
|
802
|
+
|
|
803
|
+
// src/extracta.ts
|
|
804
|
+
var ExtractaAI = class {
|
|
805
|
+
provider;
|
|
806
|
+
config;
|
|
807
|
+
constructor(config) {
|
|
808
|
+
this.config = config;
|
|
809
|
+
this.provider = this.createProvider(config);
|
|
810
|
+
}
|
|
811
|
+
/**
|
|
812
|
+
* Create a provider instance based on configuration
|
|
813
|
+
*/
|
|
814
|
+
createProvider(config) {
|
|
815
|
+
switch (config.provider) {
|
|
816
|
+
case "gemini":
|
|
817
|
+
if (!config.apiKey) throw new Error("API key is required for Gemini provider");
|
|
818
|
+
return new GeminiProvider(config.apiKey, config.model);
|
|
819
|
+
case "openai":
|
|
820
|
+
if (!config.apiKey) throw new Error("API key is required for OpenAI provider");
|
|
821
|
+
return new OpenAIProvider(config.apiKey, config.model);
|
|
822
|
+
case "claude":
|
|
823
|
+
if (!config.apiKey) throw new Error("API key is required for Claude provider");
|
|
824
|
+
return new ClaudeProvider(config.apiKey, config.model);
|
|
825
|
+
case "grok":
|
|
826
|
+
if (!config.apiKey) throw new Error("API key is required for Grok provider");
|
|
827
|
+
return new GrokProvider(config.apiKey, config.model);
|
|
828
|
+
case "vertex":
|
|
829
|
+
if (!config.vertexConfig) throw new Error("vertexConfig is required for Vertex AI provider");
|
|
830
|
+
return new VertexProvider(config.vertexConfig, config.model);
|
|
831
|
+
default:
|
|
832
|
+
throw new Error(`Unsupported provider: ${config.provider}`);
|
|
833
|
+
}
|
|
834
|
+
}
|
|
835
|
+
/**
|
|
836
|
+
* Extract content from a file path or URL
|
|
837
|
+
*/
|
|
838
|
+
async extract(source, options) {
|
|
839
|
+
const startTime = Date.now();
|
|
840
|
+
try {
|
|
841
|
+
const file = isUrl(source) ? await loadFileFromUrl(source) : await loadFile(source);
|
|
842
|
+
return this.processExtraction(file, options, startTime);
|
|
843
|
+
} catch (error) {
|
|
844
|
+
return this.createErrorResult(error);
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
/**
|
|
848
|
+
* Extract content from a Buffer
|
|
849
|
+
*/
|
|
850
|
+
async extractFromBuffer(buffer, fileName, options) {
|
|
851
|
+
const startTime = Date.now();
|
|
852
|
+
try {
|
|
853
|
+
const file = loadFileFromBuffer(buffer, fileName);
|
|
854
|
+
return this.processExtraction(file, options, startTime);
|
|
855
|
+
} catch (error) {
|
|
856
|
+
return this.createErrorResult(error);
|
|
857
|
+
}
|
|
858
|
+
}
|
|
859
|
+
/**
|
|
860
|
+
* Extract content from a base64 string
|
|
861
|
+
*/
|
|
862
|
+
async extractFromBase64(base64, fileName, options) {
|
|
863
|
+
const startTime = Date.now();
|
|
864
|
+
try {
|
|
865
|
+
const file = loadFileFromBase64(base64, fileName);
|
|
866
|
+
return this.processExtraction(file, options, startTime);
|
|
867
|
+
} catch (error) {
|
|
868
|
+
return this.createErrorResult(error);
|
|
869
|
+
}
|
|
870
|
+
}
|
|
871
|
+
/**
|
|
872
|
+
* Process the extraction based on format
|
|
873
|
+
*/
|
|
874
|
+
async processExtraction(file, options, startTime) {
|
|
875
|
+
const format = options?.format || "text";
|
|
876
|
+
if (!this.provider.supportsFileType(file.type)) {
|
|
877
|
+
return {
|
|
878
|
+
success: false,
|
|
879
|
+
error: `Provider ${this.provider.name} does not support file type: ${file.type}`,
|
|
880
|
+
code: "UNSUPPORTED_FILE_TYPE"
|
|
881
|
+
};
|
|
882
|
+
}
|
|
883
|
+
try {
|
|
884
|
+
if (format === "json") {
|
|
885
|
+
if (!options?.schema) {
|
|
886
|
+
return {
|
|
887
|
+
success: false,
|
|
888
|
+
error: "Schema is required for JSON extraction",
|
|
889
|
+
code: "MISSING_SCHEMA"
|
|
890
|
+
};
|
|
891
|
+
}
|
|
892
|
+
const providerResult = await this.provider.extractJson(file, options.schema, options);
|
|
893
|
+
const result = {
|
|
894
|
+
success: true,
|
|
895
|
+
format: "json",
|
|
896
|
+
data: providerResult.content,
|
|
897
|
+
metadata: {
|
|
898
|
+
provider: this.provider.name,
|
|
899
|
+
model: this.provider.model,
|
|
900
|
+
fileType: file.type,
|
|
901
|
+
fileName: file.name,
|
|
902
|
+
processingTimeMs: Date.now() - startTime,
|
|
903
|
+
tokens: providerResult.tokens
|
|
904
|
+
}
|
|
905
|
+
};
|
|
906
|
+
if (options.outputPath) {
|
|
907
|
+
await saveToFile(options.outputPath, JSON.stringify(providerResult.content, null, 2));
|
|
908
|
+
}
|
|
909
|
+
return result;
|
|
910
|
+
} else {
|
|
911
|
+
const providerResult = await this.provider.extractText(file, options);
|
|
912
|
+
const result = {
|
|
913
|
+
success: true,
|
|
914
|
+
format: "text",
|
|
915
|
+
content: providerResult.content,
|
|
916
|
+
metadata: {
|
|
917
|
+
provider: this.provider.name,
|
|
918
|
+
model: this.provider.model,
|
|
919
|
+
fileType: file.type,
|
|
920
|
+
fileName: file.name,
|
|
921
|
+
processingTimeMs: Date.now() - startTime,
|
|
922
|
+
tokens: providerResult.tokens
|
|
923
|
+
}
|
|
924
|
+
};
|
|
925
|
+
if (options?.outputPath) {
|
|
926
|
+
await saveToFile(options.outputPath, providerResult.content);
|
|
927
|
+
}
|
|
928
|
+
return result;
|
|
929
|
+
}
|
|
930
|
+
} catch (error) {
|
|
931
|
+
return this.createErrorResult(error);
|
|
932
|
+
}
|
|
933
|
+
}
|
|
934
|
+
/**
|
|
935
|
+
* Create an error result
|
|
936
|
+
*/
|
|
937
|
+
createErrorResult(error) {
|
|
938
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
939
|
+
return {
|
|
940
|
+
success: false,
|
|
941
|
+
error: message,
|
|
942
|
+
code: "EXTRACTION_ERROR"
|
|
943
|
+
};
|
|
944
|
+
}
|
|
945
|
+
/**
|
|
946
|
+
* Get current provider name
|
|
947
|
+
*/
|
|
948
|
+
getProvider() {
|
|
949
|
+
return this.provider.name;
|
|
950
|
+
}
|
|
951
|
+
/**
|
|
952
|
+
* Get current model
|
|
953
|
+
*/
|
|
954
|
+
getModel() {
|
|
955
|
+
return this.provider.model;
|
|
956
|
+
}
|
|
957
|
+
/**
|
|
958
|
+
* Change the AI provider
|
|
959
|
+
*/
|
|
960
|
+
setProvider(provider, apiKey, model) {
|
|
961
|
+
this.config = { provider, apiKey, model };
|
|
962
|
+
this.provider = this.createProvider(this.config);
|
|
963
|
+
}
|
|
964
|
+
};
|
|
965
|
+
function createExtractaAI(config) {
|
|
966
|
+
return new ExtractaAI(config);
|
|
967
|
+
}
|
|
968
|
+
|
|
969
|
+
export { BaseProvider, ClaudeProvider, ExtractaAI, GeminiProvider, GrokProvider, OpenAIProvider, VertexProvider, createExtractaAI, getSupportedExtensions, isExtensionSupported, isUrl, loadFile, loadFileFromBase64, loadFileFromBuffer, loadFileFromUrl, saveToFile };
|
|
970
|
+
//# sourceMappingURL=index.mjs.map
|
|
971
|
+
//# sourceMappingURL=index.mjs.map
|