npm - pi-ocr - Versions diffs - 1.1.2 → 1.2.0 - Mend

pi-ocr 1.1.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/extensions/mineru.ts +27 -4
package/package.json +1 -1

package/extensions/mineru.ts CHANGED Viewed

@@ -166,7 +166,7 @@ async function mineruProcessFile(
   onProgress(`${progressPrefix} uploading…`);
   const { task_id, file_url } = await apiPost(`${BASE_URL}/parse/file`, {
     file_name: fileName,
-    language: "en",
+    language: "ch",   // Chinese + English (default: ch_server for better handwriting/Japanese)
     enable_table: true,
     enable_formula: true,
     is_ocr: false,
@@ -186,6 +186,19 @@ async function mineruProcessFile(
   return markdown;
 }
+// ── Image → PDF wrapper (so MinerU applies language=\"ch\" pipeline) ──────────
+const IMG2PDF_SCRIPT = `
+import sys
+from PIL import Image
+img = Image.open(sys.argv[1])
+img.save(sys.argv[2], "PDF")
+`;
+async function wrapImageAsPdf(imagePath: string, pdfPath: string): Promise<void> {
+  await execPy(IMG2PDF_SCRIPT, [imagePath, pdfPath]);
+}
 // ── Public API ───────────────────────────────────────────────────────────────
 export async function mineruOcr(
@@ -195,13 +208,23 @@ export async function mineruOcr(
   const ext = extname(filePath).toLowerCase();
   const fileName = basename(filePath);
-  // For images (non-PDF): process as a single individual request
+  // For images (non-PDF): wrap in PDF so MinerU applies language=\"ch\" pipeline
   if (ext !== ".pdf") {
     if (![".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tiff", ".tif"].includes(ext)) {
       throw new Error(`MinerU does not support this file type: ${ext}. Use PDF, PNG, JPG, Docx, PPTx, or Xlsx.`);
     }
-    const markdown = await mineruProcessFile(filePath, fileName, "[1/1]", onProgress);
-    return { text: markdown, details: { backend: "mineru", fileName, pages: 1 } };
+    // Wrap image as 1-page PDF so language/chinese OCR works
+    onProgress("[1/1] converting image to PDF…");
+    const pdfPath = join(tmpdir(), `pi-mineru-img-${Date.now()}.pdf`);
+    try {
+      await wrapImageAsPdf(filePath, pdfPath);
+      const pdfName = fileName.replace(/\.[^.]+$/, "") + ".pdf";
+      const markdown = await mineruProcessFile(pdfPath, pdfName, "[1/1]", onProgress);
+      return { text: markdown, details: { backend: "mineru", fileName, pages: 1 } };
+    } finally {
+      try { unlinkSync(pdfPath); } catch { /* cleanup */ }
+    }
   }
   // ── PDF handling ──

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-ocr",
-  "version": "1.1.2",
+  "version": "1.2.0",
   "description": "Pi extension: Zero-setup multi-backend OCR — MinerU (free cloud), Ollama (local GPU, LaTeX formulas), Pix2Text (local Python). Extract text, formulas, and tables from images and PDFs. Default: zero config, works out of the box.",
   "keywords": [
     "pi-package",