pi-ocr 1.3.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -144,7 +144,7 @@ async function pollSingleTask(
144
144
  const state: string = data.state || "unknown";
145
145
 
146
146
  if (state === "done") {
147
- return await downloadAndExtractMd(data.full_zip_url);
147
+ return cleanMarkdown(await downloadAndExtractMd(data.full_zip_url));
148
148
  }
149
149
  if (state === "failed") {
150
150
  throw new Error(`MinerU Pro failed: ${data.err_msg || "unknown"}`);
@@ -202,7 +202,7 @@ async function pollBatch(
202
202
  for (const r of results) {
203
203
  if (r.state === "done" && r.full_zip_url) {
204
204
  onProgress(`${progressPrefix} downloading ${r.file_name}…`);
205
- const md = await downloadAndExtractMd(r.full_zip_url);
205
+ const md = cleanMarkdown(await downloadAndExtractMd(r.full_zip_url));
206
206
  markdowns.push(md);
207
207
  }
208
208
  }
@@ -225,6 +225,13 @@ async function pollBatch(
225
225
  throw new Error(`MinerU Pro batch ${batchId} timed out`);
226
226
  }
227
227
 
228
+ // ── Output cleanup ───────────────────────────────────────────────────────────
229
+
230
+ function cleanMarkdown(md: string): string {
231
+ // Remove MinerU's embedded image references
232
+ return md.replace(/!\[.*?\]\(images\/.*?\)\n*/g, "");
233
+ }
234
+
228
235
  // ── Public API ───────────────────────────────────────────────────────────────
229
236
 
230
237
  export async function mineruProOcr(
@@ -245,5 +252,5 @@ export async function mineruProOcr(
245
252
  const markdown = await processLocalFile(token, filePath, fileName, "[1/1]", onProgress);
246
253
  onProgress("[1/1] done");
247
254
 
248
- return { text: markdown, details: { backend: "mineru-pro", fileName } };
255
+ return { text: cleanMarkdown(markdown), details: { backend: "mineru-pro", fileName } };
249
256
  }
@@ -46,13 +46,26 @@ def progress(payload):
46
46
 
47
47
  progress({"status": "loading", "message": "Initializing Pix2Text models..."})
48
48
 
49
+ # Auto-detect optimal GPU device
50
+ import torch
51
+ if torch.cuda.is_available():
52
+ _device = "cuda"
53
+ elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
54
+ _device = "mps"
55
+ else:
56
+ _device = "cpu"
57
+
58
+ # Prevent MPS memory fragmentation on macOS
59
+ if _device == "mps":
60
+ os.environ.setdefault("PYTORCH_MPS_HIGH_WATERMARK_RATIO", "0.0")
61
+
49
62
  # Suppress model-loading noise on stdout during initialization
50
63
  _real_stdout = sys.stdout
51
64
  sys.stdout = sys.stderr
52
65
 
53
66
  try:
54
67
  from pix2text import Pix2Text
55
- p2t = Pix2Text.from_config(enable_formula=True, enable_table=False)
68
+ p2t = Pix2Text.from_config(enable_formula=True, enable_table=False, device=_device)
56
69
  finally:
57
70
  sys.stdout = _real_stdout
58
71
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-ocr",
3
- "version": "1.3.0",
3
+ "version": "1.3.2",
4
4
  "description": "Pi extension: Zero-setup multi-backend OCR — MinerU (free cloud), Ollama (local GPU, LaTeX formulas), Pix2Text (local Python). Extract text, formulas, and tables from images and PDFs. Default: zero config, works out of the box.",
5
5
  "keywords": [
6
6
  "pi-package",