pi-ocr 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -144,7 +144,7 @@ async function pollSingleTask(
144
144
  const state: string = data.state || "unknown";
145
145
 
146
146
  if (state === "done") {
147
- return await downloadAndExtractMd(data.full_zip_url);
147
+ return cleanMarkdown(await downloadAndExtractMd(data.full_zip_url));
148
148
  }
149
149
  if (state === "failed") {
150
150
  throw new Error(`MinerU Pro failed: ${data.err_msg || "unknown"}`);
@@ -202,7 +202,7 @@ async function pollBatch(
202
202
  for (const r of results) {
203
203
  if (r.state === "done" && r.full_zip_url) {
204
204
  onProgress(`${progressPrefix} downloading ${r.file_name}…`);
205
- const md = await downloadAndExtractMd(r.full_zip_url);
205
+ const md = cleanMarkdown(await downloadAndExtractMd(r.full_zip_url));
206
206
  markdowns.push(md);
207
207
  }
208
208
  }
@@ -225,6 +225,13 @@ async function pollBatch(
225
225
  throw new Error(`MinerU Pro batch ${batchId} timed out`);
226
226
  }
227
227
 
228
+ // ── Output cleanup ───────────────────────────────────────────────────────────
229
+
230
+ function cleanMarkdown(md: string): string {
231
+ // Remove MinerU's embedded image references
232
+ return md.replace(/!\[.*?\]\(images\/.*?\)\n*/g, "");
233
+ }
234
+
228
235
  // ── Public API ───────────────────────────────────────────────────────────────
229
236
 
230
237
  export async function mineruProOcr(
@@ -245,5 +252,5 @@ export async function mineruProOcr(
245
252
  const markdown = await processLocalFile(token, filePath, fileName, "[1/1]", onProgress);
246
253
  onProgress("[1/1] done");
247
254
 
248
- return { text: markdown, details: { backend: "mineru-pro", fileName } };
255
+ return { text: cleanMarkdown(markdown), details: { backend: "mineru-pro", fileName } };
249
256
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-ocr",
3
- "version": "1.3.0",
3
+ "version": "1.3.1",
4
4
  "description": "Pi extension: Zero-setup multi-backend OCR — MinerU (free cloud), Ollama (local GPU, LaTeX formulas), Pix2Text (local Python). Extract text, formulas, and tables from images and PDFs. Default: zero config, works out of the box.",
5
5
  "keywords": [
6
6
  "pi-package",