pi-ocr 1.1.3 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/extensions/index.ts +12 -3
- package/extensions/mineru.ts +26 -3
- package/extensions/ollama.ts +1 -1
- package/package.json +1 -1
package/extensions/index.ts
CHANGED
|
@@ -43,7 +43,7 @@ import { homedir } from "node:os";
|
|
|
43
43
|
|
|
44
44
|
import type { Backend, Task, OcrConfig } from "./types";
|
|
45
45
|
import { TASKS, BACKENDS } from "./types";
|
|
46
|
-
import { isImage, isPdf,
|
|
46
|
+
import { isImage, isPdf, ollamaOcr, ollamaCheckModel, ollamaPullModel } from "./ollama";
|
|
47
47
|
import { mineruOcr } from "./mineru";
|
|
48
48
|
import { tesseractOcr } from "./tesseract";
|
|
49
49
|
import { pix2textOcr } from "./pix2text";
|
|
@@ -55,7 +55,16 @@ const SETTINGS_PATH = join(homedir(), ".pi", "agent", "settings.json");
|
|
|
55
55
|
function loadOcrConfig(): Partial<OcrConfig> {
|
|
56
56
|
try {
|
|
57
57
|
if (!existsSync(SETTINGS_PATH)) return {};
|
|
58
|
-
|
|
58
|
+
const settings = JSON.parse(readFileSync(SETTINGS_PATH, "utf8"));
|
|
59
|
+
// Migrate from old key (pi-minimodel-ocr) to new key (pi-ocr)
|
|
60
|
+
const old = (settings as any).minimodelOcr;
|
|
61
|
+
const current = (settings as any).piOcr;
|
|
62
|
+
if (old && !current) {
|
|
63
|
+
(settings as any).piOcr = old;
|
|
64
|
+
delete (settings as any).minimodelOcr;
|
|
65
|
+
writeFileSync(SETTINGS_PATH, JSON.stringify(settings, null, 2) + "\n", "utf8");
|
|
66
|
+
}
|
|
67
|
+
return (settings as any).piOcr || {};
|
|
59
68
|
} catch { return {}; }
|
|
60
69
|
}
|
|
61
70
|
|
|
@@ -66,7 +75,7 @@ function saveOcrConfig(updates: Partial<OcrConfig>) {
|
|
|
66
75
|
const settings = existsSync(SETTINGS_PATH)
|
|
67
76
|
? JSON.parse(readFileSync(SETTINGS_PATH, "utf8"))
|
|
68
77
|
: {};
|
|
69
|
-
settings.
|
|
78
|
+
settings.piOcr = { ...(settings.piOcr || {}), ...updates };
|
|
70
79
|
writeFileSync(SETTINGS_PATH, JSON.stringify(settings, null, 2) + "\n", "utf8");
|
|
71
80
|
} catch { /* best effort */ }
|
|
72
81
|
}
|
package/extensions/mineru.ts
CHANGED
|
@@ -186,6 +186,19 @@ async function mineruProcessFile(
|
|
|
186
186
|
return markdown;
|
|
187
187
|
}
|
|
188
188
|
|
|
189
|
+
// ── Image → PDF wrapper (so MinerU applies language=\"ch\" pipeline) ──────────
|
|
190
|
+
|
|
191
|
+
const IMG2PDF_SCRIPT = `
|
|
192
|
+
import sys
|
|
193
|
+
from PIL import Image
|
|
194
|
+
img = Image.open(sys.argv[1])
|
|
195
|
+
img.save(sys.argv[2], "PDF")
|
|
196
|
+
`;
|
|
197
|
+
|
|
198
|
+
async function wrapImageAsPdf(imagePath: string, pdfPath: string): Promise<void> {
|
|
199
|
+
await execPy(IMG2PDF_SCRIPT, [imagePath, pdfPath]);
|
|
200
|
+
}
|
|
201
|
+
|
|
189
202
|
// ── Public API ───────────────────────────────────────────────────────────────
|
|
190
203
|
|
|
191
204
|
export async function mineruOcr(
|
|
@@ -195,13 +208,23 @@ export async function mineruOcr(
|
|
|
195
208
|
const ext = extname(filePath).toLowerCase();
|
|
196
209
|
const fileName = basename(filePath);
|
|
197
210
|
|
|
198
|
-
// For images (non-PDF):
|
|
211
|
+
// For images (non-PDF): wrap in PDF so MinerU applies language=\"ch\" pipeline
|
|
199
212
|
if (ext !== ".pdf") {
|
|
200
213
|
if (![".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tiff", ".tif"].includes(ext)) {
|
|
201
214
|
throw new Error(`MinerU does not support this file type: ${ext}. Use PDF, PNG, JPG, Docx, PPTx, or Xlsx.`);
|
|
202
215
|
}
|
|
203
|
-
|
|
204
|
-
|
|
216
|
+
|
|
217
|
+
// Wrap image as 1-page PDF so language/chinese OCR works
|
|
218
|
+
onProgress("[1/1] converting image to PDF…");
|
|
219
|
+
const pdfPath = join(tmpdir(), `pi-mineru-img-${Date.now()}.pdf`);
|
|
220
|
+
try {
|
|
221
|
+
await wrapImageAsPdf(filePath, pdfPath);
|
|
222
|
+
const pdfName = fileName.replace(/\.[^.]+$/, "") + ".pdf";
|
|
223
|
+
const markdown = await mineruProcessFile(pdfPath, pdfName, "[1/1]", onProgress);
|
|
224
|
+
return { text: markdown, details: { backend: "mineru", fileName, pages: 1 } };
|
|
225
|
+
} finally {
|
|
226
|
+
try { unlinkSync(pdfPath); } catch { /* cleanup */ }
|
|
227
|
+
}
|
|
205
228
|
}
|
|
206
229
|
|
|
207
230
|
// ── PDF handling ──
|
package/extensions/ollama.ts
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
import { readFileSync, existsSync, mkdtempSync, readdirSync, unlinkSync, rmdirSync } from "node:fs";
|
|
9
|
-
import {
|
|
9
|
+
import { extname, join } from "node:path";
|
|
10
10
|
import { tmpdir } from "node:os";
|
|
11
11
|
import { spawn } from "node:child_process";
|
|
12
12
|
import type { Task, OcrResult, OcrProgressCallback } from "./types";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-ocr",
|
|
3
|
-
"version": "1.1
|
|
3
|
+
"version": "1.2.1",
|
|
4
4
|
"description": "Pi extension: Zero-setup multi-backend OCR — MinerU (free cloud), Ollama (local GPU, LaTeX formulas), Pix2Text (local Python). Extract text, formulas, and tables from images and PDFs. Default: zero config, works out of the box.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi-package",
|