npm - @tiens.nguyen/gonext-local-worker - Versions diffs - 1.0.39 → 1.0.41 - Mend

@tiens.nguyen/gonext-local-worker 1.0.39 → 1.0.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/gonext-local-worker.mjs +219 -6
package/package.json +1 -1

package/gonext-local-worker.mjs CHANGED Viewed

@@ -7,10 +7,10 @@
  * - `gonext-local-worker simulate-chat [text]` — claim next chat job, push fake reply like the real worker (needs GONEXT_* env)
  * - `gonext-local-worker` — starts polling loop (claims jobs and runs models)
  */
-import { mkdir, readFile, writeFile } from "node:fs/promises";
+import { mkdir, mkdtemp, readFile, rm, writeFile } from "node:fs/promises";
 import { execFile as execFileCallback } from "node:child_process";
-import { homedir, platform } from "node:os";
-import { join } from "node:path";
+import { homedir, platform, tmpdir } from "node:os";
+import { extname, join } from "node:path";
 import { promisify } from "node:util";
 import dotenv from "dotenv";
 import OpenAI from "openai";
@@ -43,6 +43,7 @@ Examples:
 Env (optional):
   GONEXT_SIMULATE_TEXT   default body for simulate-chat when no args
   GONEXT_MLX_LM_PYTHON   Python executable for MLX LM native probe (default: python3)
+  GONEXT_OCR_MODEL_PATH  Local GLM-OCR-bf16 directory (default: ~/mlx-models/GLM-OCR-bf16)
 `);
 }
@@ -101,6 +102,10 @@ const pollMs = 500;
 const localHealthConcurrency = 4;
 const CHUNK_PATH = "/api/worker/job-chunk";
+const OCR_PROMPT =
+  "Extract all readable text from this image. Return plain text only and preserve line breaks.";
+const OCR_MAX_TOKENS = 2048;
+const OCR_TIMEOUT_MS = 180_000;
 async function workerFetch(path, init = {}) {
   const url = `${apiBase}${path.startsWith("/") ? path : `/${path}`}`;
@@ -181,18 +186,27 @@ if (args[0] === "simulate-chat") {
   }
   const isLocalHealth =
     job.jobType === "local_health" || job.modelKey === "local_health";
-  if (isLocalHealth) {
+  const isOcrJob =
+    job.jobType === "ocr" ||
+    (job.payload &&
+      typeof job.payload === "object" &&
+      typeof job.payload.ocrId === "string");
+  if (isLocalHealth || isOcrJob) {
     await workerFetch(`/api/worker/jobs/${jobId}`, {
       method: "PATCH",
       body: JSON.stringify({
         jobStatus: "failed",
         errorMessage:
-          "simulate-chat: claimed a local_health job. Mark failed so you can retry. Queue a chat message (not Settings refresh) and stop the normal worker before simulate-chat.",
+          isLocalHealth
+            ? "simulate-chat: claimed a local_health job. Mark failed so you can retry. Queue a chat message (not Settings refresh) and stop the normal worker before simulate-chat."
+            : "simulate-chat: claimed an OCR job. Mark failed so you can retry. Queue a chat message and stop the normal worker before simulate-chat.",
         totalTimeSeconds: 0,
       }),
     });
     console.error(
-      "[gonext-worker] simulate-chat: claimed local_health instead of chat. Job marked failed. Retry with only a pending chat job."
+      isLocalHealth
+        ? "[gonext-worker] simulate-chat: claimed local_health instead of chat. Job marked failed. Retry with only a pending chat job."
+        : "[gonext-worker] simulate-chat: claimed OCR instead of chat. Job marked failed. Retry with only a pending chat job."
     );
     process.exit(1);
   }
@@ -490,6 +504,195 @@ async function runChatJob(job) {
   }
 }
+function resolveImageExtension(mimeType, fileName) {
+  const byMime = {
+    "image/png": ".png",
+    "image/jpeg": ".jpg",
+    "image/jpg": ".jpg",
+    "image/webp": ".webp",
+    "image/gif": ".gif",
+    "image/bmp": ".bmp",
+    "image/tiff": ".tiff",
+    "image/heic": ".heic",
+    "image/heif": ".heif",
+  };
+  const byMimeExt = byMime[String(mimeType ?? "").toLowerCase()];
+  if (byMimeExt) return byMimeExt;
+  const ext = fileName ? extname(String(fileName)).toLowerCase() : "";
+  return ext || ".png";
+}
+function resolveOcrModelPath() {
+  const raw = String(process.env.GONEXT_OCR_MODEL_PATH ?? "").trim();
+  if (raw) {
+    return raw.replace(/^~(?=\/)/, homedir());
+  }
+  return join(homedir(), "mlx-models", "GLM-OCR-bf16");
+}
+function normalizeOcrOutput(output) {
+  let text = String(output ?? "").replace(/\r\n/g, "\n");
+  const afterAssistant = text.includes("<|assistant|>")
+    ? text.split("<|assistant|>").pop()
+    : "";
+  if (afterAssistant && afterAssistant.trim()) {
+    text = afterAssistant;
+  }
+  const escapedPrompt = OCR_PROMPT.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+  text = text
+    .replace(/<\|[^|>]+\|>/g, " ")
+    .replace(/<think>[\s\S]*?<\/think>/gi, " ")
+    .replace(/<think>|<\/think>/gi, " ")
+    .replace(new RegExp(escapedPrompt, "gi"), " ")
+    .replace(/(?:^|[\s/\\])nothink\b/gi, " ")
+    .replace(/\\n/g, "\n");
+  const lines = text
+    .split(/\r?\n/)
+    .map((line) => line.trim())
+    .filter((line) => line.length > 0)
+    .filter((line) => !line.startsWith("<frozen runpy>:"))
+    .filter(
+      (line) =>
+        !line.startsWith("Files:") &&
+        !line.startsWith("Prompt:") &&
+        !line.startsWith("Generation:") &&
+        !line.startsWith("Calling `python -m mlx_vlm.generate") &&
+        !line.startsWith("Peak memory:") &&
+        !line.startsWith("=======") &&
+        line !== "<think>" &&
+        line !== "</think>" &&
+        line !== "<think></think>" &&
+        line !== "No text generated for this prompt"
+    );
+  return lines.join("\n").trim();
+}
+async function runMlxVlmGenerate(modelPath, imagePath) {
+  const sharedArgs = [
+    "--model",
+    modelPath,
+    "--prompt",
+    OCR_PROMPT,
+    "--image",
+    imagePath,
+    "--temperature",
+    "0.0",
+    "--max-tokens",
+    String(OCR_MAX_TOKENS),
+  ];
+  try {
+    return await execFile(
+      "python3",
+      ["-m", "mlx_vlm.generate", ...sharedArgs],
+      {
+        timeout: OCR_TIMEOUT_MS,
+        maxBuffer: 10 * 1024 * 1024,
+      }
+    );
+  } catch (primaryError) {
+    const stderr =
+      primaryError && typeof primaryError === "object" && "stderr" in primaryError
+        ? String(primaryError.stderr ?? "").toLowerCase()
+        : "";
+    const message =
+      primaryError instanceof Error ? primaryError.message.toLowerCase() : "";
+    const missingLegacyModule =
+      stderr.includes("no module named mlx_vlm.generate") ||
+      message.includes("no module named mlx_vlm.generate");
+    if (!missingLegacyModule) {
+      throw primaryError;
+    }
+    return execFile(
+      "python3",
+      ["-m", "mlx_vlm", "generate", ...sharedArgs],
+      {
+        timeout: OCR_TIMEOUT_MS,
+        maxBuffer: 10 * 1024 * 1024,
+      }
+    );
+  }
+}
+async function runOcrJob(job) {
+  const { jobId, payload } = job;
+  const start = Date.now();
+  const runRes = await workerFetch(`/api/worker/jobs/${jobId}`, {
+    method: "PATCH",
+    body: JSON.stringify({ jobStatus: "running" }),
+  });
+  await ensureWorkerOk(runRes, `mark running OCR jobId=${jobId}`);
+  try {
+    if (!payload || typeof payload !== "object") {
+      throw new Error("Invalid OCR payload.");
+    }
+    const attachment = payload.attachment;
+    const mimeType = typeof attachment?.mimeType === "string" ? attachment.mimeType : "";
+    const data = typeof attachment?.data === "string" ? attachment.data : "";
+    const name = typeof attachment?.name === "string" ? attachment.name : "";
+    if (!mimeType.startsWith("image/")) {
+      throw new Error("OCR job attachment must be an image.");
+    }
+    if (!data) {
+      throw new Error("OCR job attachment is empty.");
+    }
+    const bytes = Buffer.from(data, "base64");
+    if (!bytes.length) {
+      throw new Error("OCR job attachment data is not valid base64.");
+    }
+    const tempDir = await mkdtemp(join(tmpdir(), "gonext-ocr-worker-"));
+    let extractedText = "";
+    try {
+      const imagePath = join(
+        tempDir,
+        `input${resolveImageExtension(mimeType, name)}`
+      );
+      await writeFile(imagePath, bytes);
+      const modelPath = resolveOcrModelPath();
+      const { stdout } = await runMlxVlmGenerate(modelPath, imagePath);
+      extractedText = normalizeOcrOutput(stdout);
+    } finally {
+      await rm(tempDir, { recursive: true, force: true }).catch(() => {});
+    }
+    if (!extractedText) {
+      throw new Error("OCR returned empty text for this image.");
+    }
+    const totalTimeSeconds = (Date.now() - start) / 1000;
+    const doneRes = await workerFetch(`/api/worker/jobs/${jobId}`, {
+      method: "PATCH",
+      body: JSON.stringify({
+        jobStatus: "completed",
+        resultText: extractedText,
+        tokenCount: Math.max(1, Math.ceil(extractedText.length / 4)),
+        totalTimeSeconds,
+      }),
+    });
+    await ensureWorkerOk(doneRes, `complete OCR jobId=${jobId}`);
+    console.log(
+      `[gonext-worker] completed OCR ${jobId} (${totalTimeSeconds.toFixed(1)}s)`
+    );
+  } catch (e) {
+    const message = e instanceof Error ? e.message : String(e);
+    const failRes = await workerFetch(`/api/worker/jobs/${jobId}`, {
+      method: "PATCH",
+      body: JSON.stringify({
+        jobStatus: "failed",
+        errorMessage: message,
+        totalTimeSeconds: (Date.now() - start) / 1000,
+      }),
+    });
+    if (!failRes.ok) {
+      const snippet = (await failRes.text().catch(() => "")).trim().slice(0, 500);
+      console.error(
+        `[gonext-worker] OCR fail PATCH also failed ${failRes.status} jobId=${jobId}` +
+          (snippet ? ` response=${snippet}` : "")
+      );
+    }
+    console.error(`[gonext-worker] failed OCR ${jobId}:`, message);
+  }
+}
 function normalizeBaseUrl(raw) {
   return typeof raw === "string" ? raw.trim().replace(/\/+$/, "") : "";
 }
@@ -794,6 +997,16 @@ async function pollOnce() {
       );
       continue;
     }
+    const isOcrByType = job.jobType === "ocr";
+    const isOcrByPayload =
+      job.payload &&
+      typeof job.payload === "object" &&
+      typeof job.payload.ocrId === "string" &&
+      typeof job.payload.attachment?.data === "string";
+    if (isOcrByType || isOcrByPayload) {
+      await runOcrJob(job);
+      return;
+    }
     await runChatJob(job);
     return;
   }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tiens.nguyen/gonext-local-worker",
-  "version": "1.0.39",
+  "version": "1.0.41",
   "description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
   "type": "module",
   "license": "MIT",