@pandi2352/gemini-ocr 2.0.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -1
- package/dist/index.js +49 -31
- package/dist/types.d.ts +1 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -112,6 +112,23 @@ console.log('Extracted Data:', result.entityResult);
|
|
|
112
112
|
*/
|
|
113
113
|
```
|
|
114
114
|
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### 6. Realtime Progress Feedback
|
|
118
|
+
Get granular updates on the processing stages.
|
|
119
|
+
|
|
120
|
+
```typescript
|
|
121
|
+
await processOCR({
|
|
122
|
+
input: ['./large_document.pdf'],
|
|
123
|
+
apiKey: process.env.GEMINI_API_KEY,
|
|
124
|
+
|
|
125
|
+
onProgress: (stage, message) => {
|
|
126
|
+
// stage: 'upload' | 'generate_text' | 'enrich' | 'complete'
|
|
127
|
+
console.log(`[${stage}]: ${message}`);
|
|
128
|
+
}
|
|
129
|
+
});
|
|
130
|
+
```
|
|
131
|
+
|
|
115
132
|
---
|
|
116
133
|
|
|
117
134
|
## 🛠️ Configuration Options
|
|
@@ -120,11 +137,12 @@ console.log('Extracted Data:', result.entityResult);
|
|
|
120
137
|
| :--- | :--- | :--- | :--- |
|
|
121
138
|
| `input` | `Array<string \| Buffer \| Object>` | **Required** | Array of file paths, URLs, Buffers, or Base64 strings. |
|
|
122
139
|
| `apiKey` | `string` | **Required** | Your Google Gemini API Key. |
|
|
123
|
-
| `model` | `string` | `gemini-1.5-flash` | The AI model
|
|
140
|
+
| `model` | `string` | `gemini-1.5-flash` | The AI model (use `gemini-1.5-flash-8b` for speed). |
|
|
124
141
|
| `summarize` | `boolean` | `false` | Generate `metadata` (title, desc, thumbnail). |
|
|
125
142
|
| `mindmap` | `boolean` | `false` | Generate Mermaid.js syntax for visual mapping. |
|
|
126
143
|
| `extractEntities`| `boolean` | `false` | Enable structured field extraction. |
|
|
127
144
|
| `entitySchema` | `string[]` | `auto` | Custom fields to extract (optional). |
|
|
145
|
+
| `onProgress` | `(stage, userMsg) => void` | `undefined` | Callback for realtime progress updates. |
|
|
128
146
|
|
|
129
147
|
---
|
|
130
148
|
|
package/dist/index.js
CHANGED
|
@@ -52,15 +52,19 @@ async function processSingleFile(input, options) {
|
|
|
52
52
|
const logger = new utils_1.Logger();
|
|
53
53
|
const requestId = (0, utils_1.generateRequestId)();
|
|
54
54
|
logger.log(`INIT: Processing file. RequestId: ${requestId}`);
|
|
55
|
+
if (options.onProgress)
|
|
56
|
+
options.onProgress('init', 'Initializing processing...');
|
|
55
57
|
try {
|
|
56
58
|
if (!options.apiKey)
|
|
57
59
|
throw new Error('Gemini API key is required.');
|
|
58
60
|
const gemini = new llm_1.GeminiClient(options.apiKey, logger);
|
|
59
|
-
const modelName = options.model || 'gemini-
|
|
61
|
+
const modelName = options.model || 'gemini-2.5-flash';
|
|
60
62
|
// Input Processing
|
|
61
63
|
const inputHandler = new input_handler_1.InputHandler(logger);
|
|
62
64
|
const normalized = await inputHandler.processInput(input);
|
|
63
65
|
const mimeType = normalized.mimeType;
|
|
66
|
+
if (options.onProgress)
|
|
67
|
+
options.onProgress('upload', 'Processing input file...');
|
|
64
68
|
// Strategy
|
|
65
69
|
let strategy = 'MEDIA';
|
|
66
70
|
if (mimeType === 'text/plain' || mimeType === 'text/csv' || normalized.extension === 'txt' || normalized.extension === 'csv') {
|
|
@@ -85,6 +89,8 @@ async function processSingleFile(input, options) {
|
|
|
85
89
|
let analysisText = '';
|
|
86
90
|
let extractedTextDocx = '';
|
|
87
91
|
let fileUri;
|
|
92
|
+
if (options.onProgress)
|
|
93
|
+
options.onProgress('generate_text', 'Generating analysis...');
|
|
88
94
|
if (strategy === 'TEXT') {
|
|
89
95
|
const content = normalized.data.toString('utf-8');
|
|
90
96
|
finalPrompt += `\n\nDOCUMENT CONTENT:\n${content}`;
|
|
@@ -136,48 +142,58 @@ async function processSingleFile(input, options) {
|
|
|
136
142
|
catch (e) { }
|
|
137
143
|
}
|
|
138
144
|
}
|
|
139
|
-
//
|
|
145
|
+
// Parallel Processing for Advanced Features
|
|
146
|
+
// Optimization Note: Future version can combine these into a single "Mega-Prompt" to reduce HTTP round-trips.
|
|
147
|
+
if (options.onProgress && (options.mindmap || options.extractEntities)) {
|
|
148
|
+
options.onProgress('enrich', 'Generating mindmap/entities...');
|
|
149
|
+
}
|
|
150
|
+
const tasks = [];
|
|
140
151
|
let mindmap = null;
|
|
152
|
+
let entityResult = null;
|
|
141
153
|
if (options.mindmap) {
|
|
142
154
|
const enrichPrompt = `${prompts_1.ENRICHMENT_PROMPT}\n\nCONTEXT:\n${mainAnalysis}`;
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
155
|
+
tasks.push((async () => {
|
|
156
|
+
try {
|
|
157
|
+
const enrichRes = await gemini.generateContent(modelName, enrichPrompt);
|
|
158
|
+
const jsonPart = enrichRes.match(/\{[\s\S]*\}/);
|
|
159
|
+
const mermaidPart = enrichRes.match(/```mermaid\n([\s\S]*?)\n```/);
|
|
160
|
+
if (jsonPart) {
|
|
161
|
+
const parsed = JSON.parse(jsonPart[0]);
|
|
162
|
+
mindmap = parsed.mermaid || null;
|
|
163
|
+
}
|
|
164
|
+
else if (mermaidPart) {
|
|
165
|
+
mindmap = mermaidPart[1];
|
|
166
|
+
}
|
|
167
|
+
else {
|
|
168
|
+
mindmap = enrichRes.replace(/```/g, '');
|
|
169
|
+
}
|
|
153
170
|
}
|
|
154
|
-
|
|
155
|
-
|
|
171
|
+
catch (e) {
|
|
172
|
+
logger.log(`Mindmap failed: ${e}`);
|
|
156
173
|
}
|
|
157
|
-
}
|
|
158
|
-
catch (e) {
|
|
159
|
-
logger.log(`Mindmap failed: ${e}`);
|
|
160
|
-
}
|
|
174
|
+
})());
|
|
161
175
|
}
|
|
162
|
-
// Entities
|
|
163
|
-
let entityResult = null;
|
|
164
176
|
if (options.extractEntities) {
|
|
165
177
|
let entityPromptStr = options.entitySchema
|
|
166
178
|
? (0, prompts_1.generateEntityPrompt)(options.entitySchema)
|
|
167
179
|
: prompts_1.AUTO_ENTITY_EXTRACTION_PROMPT;
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
180
|
+
tasks.push((async () => {
|
|
181
|
+
try {
|
|
182
|
+
const context = strategy === 'DOCX' ? extractedTextDocx : mainAnalysis;
|
|
183
|
+
const finalEntityPrompt = `${entityPromptStr}\n\nDATA CONTEXT:\n${context}`;
|
|
184
|
+
const res = await gemini.generateContent(modelName, finalEntityPrompt);
|
|
185
|
+
const json = res.match(/\{[\s\S]*\}/);
|
|
186
|
+
if (json) {
|
|
187
|
+
entityResult = JSON.parse(json[0]);
|
|
188
|
+
}
|
|
175
189
|
}
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
}
|
|
190
|
+
catch (e) {
|
|
191
|
+
logger.log(`Entity extraction failed: ${e}`);
|
|
192
|
+
}
|
|
193
|
+
})());
|
|
180
194
|
}
|
|
195
|
+
// Wait for all parallel tasks to complete
|
|
196
|
+
await Promise.all(tasks);
|
|
181
197
|
// Page Count
|
|
182
198
|
let pageCount = 1;
|
|
183
199
|
if (mimeType === 'application/pdf') {
|
|
@@ -188,6 +204,8 @@ async function processSingleFile(input, options) {
|
|
|
188
204
|
catch (e) { }
|
|
189
205
|
}
|
|
190
206
|
const endTime = new Date();
|
|
207
|
+
if (options.onProgress)
|
|
208
|
+
options.onProgress('complete', 'Processing complete.');
|
|
191
209
|
// Success Result
|
|
192
210
|
return {
|
|
193
211
|
status: 'success',
|
package/dist/types.d.ts
CHANGED