npm - pi-ocr - Versions diffs - 1.0.0 → 1.0.1 - Mend

pi-ocr 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md CHANGED Viewed

@@ -7,7 +7,7 @@
 Multi-backend OCR for [Pi Coding Agent](https://pi.dev) — extract text, LaTeX math formulas, and tables from images and PDFs. Choose the backend that fits your needs: free cloud API, local GPU, or pure Python.
-> Bridges the multimodal gap for non-vision LLMs like **DeepSeek**. When your model can't see images, `minimodel_ocr` acts as its eyes.
+> Bridges the multimodal gap for non-vision LLMs like **DeepSeek**. When your model can't see images, `pi_ocr` acts as its eyes.
 ## Three Backends — One Tool
@@ -158,6 +158,9 @@ Free tier limits:
 - ≤ 20 pages per request
 - IP-based rate limiting
+> 💡 PDFs >20 pages: auto-splitting needs `python3` + `pypdfium2` (`pip install pypdfium2`).
+> Most PDFs are under 20 pages — you'll likely never need this.
 For files >10MB, compress first at [ilovepdf.com/compress_pdf](https://ilovepdf.com/compress_pdf).
 ---
@@ -210,7 +213,7 @@ Opens an interactive `SettingsList` with keyboard navigation:
 ### LLM-invoked (automatic)
-The extension registers a `minimodel_ocr` tool. The agent invokes it automatically:
+The extension registers a `pi_ocr` tool. The agent invokes it automatically:
 ```
 > What formula is written in this screenshot?
@@ -371,7 +374,7 @@ sudo pacman -S poppler
 ```
 ┌──────────────────┐     ┌──────────────────┐     ┌──────────────────────┐
-│  pi (DeepSeek)   │────▶│  minimodel_ocr   │────▶│  Ollama / MinerU    │
+│  pi (DeepSeek)   │────▶│  pi_ocr   │────▶│  Ollama / MinerU    │
 │  (no vision)     │     │  pi extension    │     │  / Pix2Text        │
 └──────────────────┘     └──────────────────┘     └──────────────────────┘
         │                         │                           │

package/extensions/index.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 /**
- * pi-minimodel-ocr — Multi-backend OCR for Pi Coding Agent
+ * pi-ocr — Multi-backend OCR for Pi Coding Agent
  *
- * Registers a `minimodel_ocr` tool that the LLM can call to read images and PDFs
+ * Registers a `pi_ocr` tool that the LLM can call to read images and PDFs
  * using one of three backends:
  *   - Ollama (local vision models like glm-ocr)
  *   - MinerU API (free Agent API, ≤10MB, ≤20 pages)
@@ -19,7 +19,7 @@
  *   Pix2Text:  pip install pix2text
  *   PDF tools:  brew install poppler (macOS multi-page PDF for Ollama)
  *
- * Install: pi install npm:pi-minimodel-ocr
+ * Install: pi install npm:pi-ocr
  */
 import { Type } from "@earendil-works/pi-ai";
@@ -111,7 +111,7 @@ const ocrSchema = Type.Object({
 });
 const ocrTool = defineTool({
-  name: "minimodel_ocr",
+  name: "pi_ocr",
   label: "Minimodel OCR",
   description:
     "Extract text, math formulas (LaTeX), and tables from images or PDFs using local Ollama vision models. " +
@@ -120,9 +120,9 @@ const ocrTool = defineTool({
   promptSnippet:
     "Extract text/formulas/tables from images and PDFs using local Ollama OCR",
   promptGuidelines: [
-    "When the user asks about the content of an image or PDF, use minimodel_ocr to extract the text first.",
-    "For mathematical documents, use minimodel_ocr with task='formula' or task='auto' to get LaTeX output.",
-    "Use minimodel_ocr with task='auto' for general document OCR to extract all text, formulas, tables, and figures.",
+    "When the user asks about the content of an image or PDF, use pi_ocr to extract the text first.",
+    "For mathematical documents, use pi_ocr with task='formula' or task='auto' to get LaTeX output.",
+    "Use pi_ocr with task='auto' for general document OCR to extract all text, formulas, tables, and figures.",
   ],
   parameters: ocrSchema,
   async execute(_toolCallId, params, signal, onUpdate, _ctx) {
@@ -408,7 +408,7 @@ export default function ocrExtension(pi: ExtensionAPI) {
     const text = config.backend === "ollama"
       ? `OCR: ollama ${config.model}`
       : `OCR: ${config.backend}`;
-    ctx.ui.setStatus("minimodel-ocr", text);
+    ctx.ui.setStatus("pi-ocr", text);
   }
   // ── Startup ────────────────────────────────────────────────────────────────
@@ -430,5 +430,5 @@ export default function ocrExtension(pi: ExtensionAPI) {
     }
   });
-  console.log("[pi-ocr] Loaded — /ocr (file or settings), tool: minimodel_ocr, default: mineru");
+  console.log("[pi-ocr] Loaded — /ocr (file or settings), tool: pi_ocr, default: mineru");
 }

package/extensions/mineru.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * pi-minimodel-ocr — MinerU API backend
+ * pi-ocr — MinerU API backend
  *
  * Uses the free Agent Lightweight API (no token required):
  *   - File ≤10MB, ≤20 pages → one free request

package/extensions/ollama.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * pi-minimodel-ocr — Ollama backend
+ * pi-ocr — Ollama backend
  *
  * Uses any locally-running Ollama vision model (default: glm-ocr) to OCR
  * images and PDFs. Converts PDF pages to PNG before sending to Ollama.

package/extensions/pix2text.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * pi-minimodel-ocr — Pix2Text backend
+ * pi-ocr — Pix2Text backend
  *
  * Uses Pix2Text (https://github.com/breezedeus/Pix2Text) — an open-source
  * Python alternative to Mathpix. Recognizes layouts, text, math formulas (LaTeX),

package/extensions/types.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * pi-minimodel-ocr — shared types for OCR backends
+ * pi-ocr — shared types for OCR backends
  */
 export const TASKS = ["text", "formula", "table", "figure", "auto"] as const;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-ocr",
-  "version": "1.0.0",
+  "version": "1.0.1",
   "description": "Pi extension: Zero-setup multi-backend OCR — MinerU (free cloud), Ollama (local GPU, LaTeX formulas), Pix2Text (local Python). Extract text, formulas, and tables from images and PDFs. Default: zero config, works out of the box.",
   "keywords": [
     "pi-package",