npm - @tiens.nguyen/gonext-local-worker - Versions diffs - 1.0.69 → 1.0.72 - Mend

@tiens.nguyen/gonext-local-worker 1.0.69 → 1.0.72

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/gonext-local-worker.mjs +128 -0
package/package.json +1 -1

package/gonext-local-worker.mjs CHANGED Viewed

@@ -137,6 +137,8 @@ const OCR_PROMPT = "Text Recognition:";
 const OCR_MAX_TOKENS = 2048;
 const OCR_TIMEOUT_MS = 180_000;
 const OCR_DEBUG = String(process.env.GONEXT_OCR_DEBUG ?? "").trim() === "1";
+const OCR_SKIP_CORRECTION = String(process.env.GONEXT_OCR_SKIP_CORRECTION ?? "").trim() === "1";
+const OCR_CORRECT_TIMEOUT_MS = 120_000;
 async function workerFetch(path, init = {}) {
   const url = `${apiBase}${path.startsWith("/") ? path : `/${path}`}`;
@@ -981,6 +983,125 @@ function normalizeOcrOutput(output) {
   return lines.join("\n").trim();
 }
+function resolveOcrCorrectionModelPath(override) {
+  if (override) {
+    const expanded = override.replace(/^~(?=\/)/, homedir());
+    return expanded.startsWith("/") ? expanded : join(homedir(), "mlx-models", expanded);
+  }
+  const fromEnv = String(process.env.GONEXT_OCR_CORRECT_MODEL_PATH ?? "").trim();
+  if (fromEnv) return fromEnv.replace(/^~(?=\/)/, homedir());
+  return join(homedir(), "mlx-models", "translategemma-4b-it-4bit");
+}
+function normalizeCorrection(raw, originalText) {
+  let text = String(raw ?? "").replace(/\r\n/g, "\n");
+  // Strip common mlx_lm.generate noise lines
+  const lines = text
+    .split(/\r?\n/)
+    .map((l) => l.trim())
+    .filter((l) => l.length > 0)
+    .filter((l) =>
+      !l.startsWith("=======") &&
+      !l.startsWith("Prompt:") &&
+      !l.startsWith("Generation:") &&
+      !l.startsWith("Peak memory:") &&
+      !l.startsWith("Files:") &&
+      !l.startsWith("<frozen runpy>:")
+    );
+  const result = lines.join("\n").trim();
+  // Safety: if correction came back empty or too short, keep original
+  if (!result || result.length < originalText.length * 0.3) {
+    return originalText;
+  }
+  return result;
+}
+/**
+ * Call a correction model that's already running as an OpenAI-compatible server
+ * (e.g. mlx_lm.server --model translategemma-4b-it-4bit --port 8083).
+ * baseUrl should be like http://127.0.0.1:8083/v1
+ */
+async function correctOcrTextViaServer(extractedText, baseUrl) {
+  const systemPrompt =
+    "You are a text correction assistant. Fix grammar and language errors in the " +
+    "provided text while preserving the original meaning and language. " +
+    "Return only the corrected text without any explanation.";
+  const res = await fetch(`${baseUrl.replace(/\/+$/, "")}/chat/completions`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      model: "default",
+      messages: [
+        { role: "system", content: systemPrompt },
+        { role: "user", content: `Text:\n${extractedText}` },
+      ],
+      max_tokens: 2048,
+      temperature: 0,
+    }),
+    signal: AbortSignal.timeout(OCR_CORRECT_TIMEOUT_MS),
+  });
+  if (!res.ok) {
+    throw new Error(`Correction server ${baseUrl} returned ${res.status}`);
+  }
+  const json = await res.json();
+  const corrected = json?.choices?.[0]?.message?.content?.trim() || "";
+  return normalizeCorrection(corrected, extractedText);
+}
+async function correctOcrText(extractedText, modelOverride = "") {
+  if (OCR_SKIP_CORRECTION) {
+    console.log("[gonext-worker] OCR correction skipped (GONEXT_OCR_SKIP_CORRECTION=1)");
+    return extractedText;
+  }
+  const override = modelOverride.trim();
+  // If the override looks like a URL, call it as a running mlx_lm.server endpoint.
+  // e.g. "http://127.0.0.1:8083/v1" or "http://127.0.0.1:8083"
+  if (override.startsWith("http://") || override.startsWith("https://")) {
+    const baseUrl = /\/v1\/?$/i.test(override) ? override : `${override}/v1`;
+    console.log(`[gonext-worker] OCR correction via server ${baseUrl}`);
+    try {
+      const corrected = await correctOcrTextViaServer(extractedText, baseUrl);
+      console.log(`[gonext-worker] OCR correction done: ${extractedText.length} → ${corrected.length} chars`);
+      return corrected;
+    } catch (e) {
+      const msg = e instanceof Error ? e.message : String(e);
+      console.warn(`[gonext-worker] OCR correction server failed (using raw OCR text): ${msg.slice(0, 200)}`);
+      return extractedText;
+    }
+  }
+  // Otherwise invoke mlx_lm.generate CLI (model loads per job — no server needed).
+  const modelPath = resolveOcrCorrectionModelPath(override);
+  const prompt =
+    "The following text was extracted by an OCR model and may contain grammar errors, " +
+    "wrong language, or garbled characters. " +
+    "Correct any errors while preserving the original meaning and language. " +
+    "Return only the corrected text without any explanation.\n\n" +
+    `Text:\n${extractedText}`;
+  console.log(`[gonext-worker] OCR correction via CLI model=${modelPath}`);
+  // Write prompt to a temp file so long text / special chars don't break --prompt arg.
+  const promptFile = join(tmpdir(), `gonext-ocr-correct-${Date.now()}.txt`);
+  try {
+    await writeFile(promptFile, prompt, "utf8");
+    const { stdout } = await execFile(
+      "python3",
+      ["-m", "mlx_lm.generate", "--model", modelPath, "--prompt-file", promptFile, "--max-tokens", "2048", "--temp", "0.0"],
+      { timeout: OCR_CORRECT_TIMEOUT_MS, maxBuffer: 10 * 1024 * 1024 }
+    );
+    const corrected = normalizeCorrection(stdout, extractedText);
+    console.log(`[gonext-worker] OCR correction done: ${extractedText.length} → ${corrected.length} chars`);
+    return corrected;
+  } catch (e) {
+    const msg = e instanceof Error ? e.message : String(e);
+    console.warn(`[gonext-worker] OCR correction failed (using raw OCR text): ${msg.slice(0, 200)}`);
+    return extractedText;
+  } finally {
+    await rm(promptFile, { force: true }).catch(() => {});
+  }
+}
 async function runMlxVlmGenerate(modelPath, imagePath) {
   const sharedArgs = [
     "--model",
@@ -1123,6 +1244,13 @@ async function runOcrJob(job) {
     } finally {
       await rm(tempDir, { recursive: true, force: true }).catch(() => {});
     }
+    // Post-process: correct grammar/language with translategemma-4b-it-4bit
+    // (or the model set by the user in Settings > OCR correction model).
+    // Falls back to raw OCR text on any error so the job never fails here.
+    const correctionModelOverride = typeof payload.correctionModel === "string"
+      ? payload.correctionModel.trim()
+      : "";
+    extractedText = await correctOcrText(extractedText, correctionModelOverride);
     const totalTimeSeconds = (Date.now() - start) / 1000;
     const doneRes = await workerFetch(`/api/worker/jobs/${jobId}`, {
       method: "PATCH",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tiens.nguyen/gonext-local-worker",
-  "version": "1.0.69",
+  "version": "1.0.72",
   "description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
   "type": "module",
   "license": "MIT",