pi-ocr 1.1.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -166,7 +166,7 @@ async function mineruProcessFile(
166
166
  onProgress(`${progressPrefix} uploading…`);
167
167
  const { task_id, file_url } = await apiPost(`${BASE_URL}/parse/file`, {
168
168
  file_name: fileName,
169
- language: "en",
169
+ language: "ch", // Chinese + English (default: ch_server for better handwriting/Japanese)
170
170
  enable_table: true,
171
171
  enable_formula: true,
172
172
  is_ocr: false,
@@ -186,6 +186,19 @@ async function mineruProcessFile(
186
186
  return markdown;
187
187
  }
188
188
 
189
+ // ── Image → PDF wrapper (so MinerU applies language=\"ch\" pipeline) ──────────
190
+
191
+ const IMG2PDF_SCRIPT = `
192
+ import sys
193
+ from PIL import Image
194
+ img = Image.open(sys.argv[1])
195
+ img.save(sys.argv[2], "PDF")
196
+ `;
197
+
198
+ async function wrapImageAsPdf(imagePath: string, pdfPath: string): Promise<void> {
199
+ await execPy(IMG2PDF_SCRIPT, [imagePath, pdfPath]);
200
+ }
201
+
189
202
  // ── Public API ───────────────────────────────────────────────────────────────
190
203
 
191
204
  export async function mineruOcr(
@@ -195,13 +208,23 @@ export async function mineruOcr(
195
208
  const ext = extname(filePath).toLowerCase();
196
209
  const fileName = basename(filePath);
197
210
 
198
- // For images (non-PDF): process as a single individual request
211
+ // For images (non-PDF): wrap in PDF so MinerU applies language=\"ch\" pipeline
199
212
  if (ext !== ".pdf") {
200
213
  if (![".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tiff", ".tif"].includes(ext)) {
201
214
  throw new Error(`MinerU does not support this file type: ${ext}. Use PDF, PNG, JPG, Docx, PPTx, or Xlsx.`);
202
215
  }
203
- const markdown = await mineruProcessFile(filePath, fileName, "[1/1]", onProgress);
204
- return { text: markdown, details: { backend: "mineru", fileName, pages: 1 } };
216
+
217
+ // Wrap image as 1-page PDF so language/chinese OCR works
218
+ onProgress("[1/1] converting image to PDF…");
219
+ const pdfPath = join(tmpdir(), `pi-mineru-img-${Date.now()}.pdf`);
220
+ try {
221
+ await wrapImageAsPdf(filePath, pdfPath);
222
+ const pdfName = fileName.replace(/\.[^.]+$/, "") + ".pdf";
223
+ const markdown = await mineruProcessFile(pdfPath, pdfName, "[1/1]", onProgress);
224
+ return { text: markdown, details: { backend: "mineru", fileName, pages: 1 } };
225
+ } finally {
226
+ try { unlinkSync(pdfPath); } catch { /* cleanup */ }
227
+ }
205
228
  }
206
229
 
207
230
  // ── PDF handling ──
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-ocr",
3
- "version": "1.1.2",
3
+ "version": "1.2.0",
4
4
  "description": "Pi extension: Zero-setup multi-backend OCR — MinerU (free cloud), Ollama (local GPU, LaTeX formulas), Pix2Text (local Python). Extract text, formulas, and tables from images and PDFs. Default: zero config, works out of the box.",
5
5
  "keywords": [
6
6
  "pi-package",