pi-ocr 1.1.2 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/extensions/mineru.ts +27 -4
- package/package.json +1 -1
package/extensions/mineru.ts
CHANGED
|
@@ -166,7 +166,7 @@ async function mineruProcessFile(
|
|
|
166
166
|
onProgress(`${progressPrefix} uploading…`);
|
|
167
167
|
const { task_id, file_url } = await apiPost(`${BASE_URL}/parse/file`, {
|
|
168
168
|
file_name: fileName,
|
|
169
|
-
language: "
|
|
169
|
+
language: "ch", // Chinese + English (default: ch_server for better handwriting/Japanese)
|
|
170
170
|
enable_table: true,
|
|
171
171
|
enable_formula: true,
|
|
172
172
|
is_ocr: false,
|
|
@@ -186,6 +186,19 @@ async function mineruProcessFile(
|
|
|
186
186
|
return markdown;
|
|
187
187
|
}
|
|
188
188
|
|
|
189
|
+
// ── Image → PDF wrapper (so MinerU applies language=\"ch\" pipeline) ──────────
|
|
190
|
+
|
|
191
|
+
const IMG2PDF_SCRIPT = `
|
|
192
|
+
import sys
|
|
193
|
+
from PIL import Image
|
|
194
|
+
img = Image.open(sys.argv[1])
|
|
195
|
+
img.save(sys.argv[2], "PDF")
|
|
196
|
+
`;
|
|
197
|
+
|
|
198
|
+
async function wrapImageAsPdf(imagePath: string, pdfPath: string): Promise<void> {
|
|
199
|
+
await execPy(IMG2PDF_SCRIPT, [imagePath, pdfPath]);
|
|
200
|
+
}
|
|
201
|
+
|
|
189
202
|
// ── Public API ───────────────────────────────────────────────────────────────
|
|
190
203
|
|
|
191
204
|
export async function mineruOcr(
|
|
@@ -195,13 +208,23 @@ export async function mineruOcr(
|
|
|
195
208
|
const ext = extname(filePath).toLowerCase();
|
|
196
209
|
const fileName = basename(filePath);
|
|
197
210
|
|
|
198
|
-
// For images (non-PDF):
|
|
211
|
+
// For images (non-PDF): wrap in PDF so MinerU applies language=\"ch\" pipeline
|
|
199
212
|
if (ext !== ".pdf") {
|
|
200
213
|
if (![".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tiff", ".tif"].includes(ext)) {
|
|
201
214
|
throw new Error(`MinerU does not support this file type: ${ext}. Use PDF, PNG, JPG, Docx, PPTx, or Xlsx.`);
|
|
202
215
|
}
|
|
203
|
-
|
|
204
|
-
|
|
216
|
+
|
|
217
|
+
// Wrap image as 1-page PDF so language/chinese OCR works
|
|
218
|
+
onProgress("[1/1] converting image to PDF…");
|
|
219
|
+
const pdfPath = join(tmpdir(), `pi-mineru-img-${Date.now()}.pdf`);
|
|
220
|
+
try {
|
|
221
|
+
await wrapImageAsPdf(filePath, pdfPath);
|
|
222
|
+
const pdfName = fileName.replace(/\.[^.]+$/, "") + ".pdf";
|
|
223
|
+
const markdown = await mineruProcessFile(pdfPath, pdfName, "[1/1]", onProgress);
|
|
224
|
+
return { text: markdown, details: { backend: "mineru", fileName, pages: 1 } };
|
|
225
|
+
} finally {
|
|
226
|
+
try { unlinkSync(pdfPath); } catch { /* cleanup */ }
|
|
227
|
+
}
|
|
205
228
|
}
|
|
206
229
|
|
|
207
230
|
// ── PDF handling ──
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-ocr",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.2.0",
|
|
4
4
|
"description": "Pi extension: Zero-setup multi-backend OCR — MinerU (free cloud), Ollama (local GPU, LaTeX formulas), Pix2Text (local Python). Extract text, formulas, and tables from images and PDFs. Default: zero config, works out of the box.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi-package",
|