npm - @tiens.nguyen/gonext-local-worker - Versions diffs - 1.0.40 → 1.0.41 - Mend

@tiens.nguyen/gonext-local-worker 1.0.40 → 1.0.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/gonext-local-worker.mjs +64 -23
package/package.json +1 -1

package/gonext-local-worker.mjs CHANGED Viewed

@@ -531,7 +531,22 @@ function resolveOcrModelPath() {
 }
 function normalizeOcrOutput(output) {
-  const lines = String(output ?? "")
+  let text = String(output ?? "").replace(/\r\n/g, "\n");
+  const afterAssistant = text.includes("<|assistant|>")
+    ? text.split("<|assistant|>").pop()
+    : "";
+  if (afterAssistant && afterAssistant.trim()) {
+    text = afterAssistant;
+  }
+  const escapedPrompt = OCR_PROMPT.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+  text = text
+    .replace(/<\|[^|>]+\|>/g, " ")
+    .replace(/<think>[\s\S]*?<\/think>/gi, " ")
+    .replace(/<think>|<\/think>/gi, " ")
+    .replace(new RegExp(escapedPrompt, "gi"), " ")
+    .replace(/(?:^|[\s/\\])nothink\b/gi, " ")
+    .replace(/\\n/g, "\n");
+  const lines = text
     .split(/\r?\n/)
     .map((line) => line.trim())
     .filter((line) => line.length > 0)
@@ -541,6 +556,7 @@ function normalizeOcrOutput(output) {
         !line.startsWith("Files:") &&
         !line.startsWith("Prompt:") &&
         !line.startsWith("Generation:") &&
+        !line.startsWith("Calling `python -m mlx_vlm.generate") &&
         !line.startsWith("Peak memory:") &&
         !line.startsWith("=======") &&
         line !== "<think>" &&
@@ -551,6 +567,52 @@ function normalizeOcrOutput(output) {
   return lines.join("\n").trim();
 }
+async function runMlxVlmGenerate(modelPath, imagePath) {
+  const sharedArgs = [
+    "--model",
+    modelPath,
+    "--prompt",
+    OCR_PROMPT,
+    "--image",
+    imagePath,
+    "--temperature",
+    "0.0",
+    "--max-tokens",
+    String(OCR_MAX_TOKENS),
+  ];
+  try {
+    return await execFile(
+      "python3",
+      ["-m", "mlx_vlm.generate", ...sharedArgs],
+      {
+        timeout: OCR_TIMEOUT_MS,
+        maxBuffer: 10 * 1024 * 1024,
+      }
+    );
+  } catch (primaryError) {
+    const stderr =
+      primaryError && typeof primaryError === "object" && "stderr" in primaryError
+        ? String(primaryError.stderr ?? "").toLowerCase()
+        : "";
+    const message =
+      primaryError instanceof Error ? primaryError.message.toLowerCase() : "";
+    const missingLegacyModule =
+      stderr.includes("no module named mlx_vlm.generate") ||
+      message.includes("no module named mlx_vlm.generate");
+    if (!missingLegacyModule) {
+      throw primaryError;
+    }
+    return execFile(
+      "python3",
+      ["-m", "mlx_vlm", "generate", ...sharedArgs],
+      {
+        timeout: OCR_TIMEOUT_MS,
+        maxBuffer: 10 * 1024 * 1024,
+      }
+    );
+  }
+}
 async function runOcrJob(job) {
   const { jobId, payload } = job;
   const start = Date.now();
@@ -587,28 +649,7 @@ async function runOcrJob(job) {
       );
       await writeFile(imagePath, bytes);
       const modelPath = resolveOcrModelPath();
-      const { stdout } = await execFile(
-        "python3",
-        [
-          "-m",
-          "mlx_vlm",
-          "generate",
-          "--model",
-          modelPath,
-          "--prompt",
-          OCR_PROMPT,
-          "--image",
-          imagePath,
-          "--temperature",
-          "0.0",
-          "--max-tokens",
-          String(OCR_MAX_TOKENS),
-        ],
-        {
-          timeout: OCR_TIMEOUT_MS,
-          maxBuffer: 10 * 1024 * 1024,
-        }
-      );
+      const { stdout } = await runMlxVlmGenerate(modelPath, imagePath);
       extractedText = normalizeOcrOutput(stdout);
     } finally {
       await rm(tempDir, { recursive: true, force: true }).catch(() => {});

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tiens.nguyen/gonext-local-worker",
-  "version": "1.0.40",
+  "version": "1.0.41",
   "description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
   "type": "module",
   "license": "MIT",