@tiens.nguyen/gonext-local-worker 1.0.40 → 1.0.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -531,7 +531,22 @@ function resolveOcrModelPath() {
531
531
  }
532
532
 
533
533
  function normalizeOcrOutput(output) {
534
- const lines = String(output ?? "")
534
+ let text = String(output ?? "").replace(/\r\n/g, "\n");
535
+ const afterAssistant = text.includes("<|assistant|>")
536
+ ? text.split("<|assistant|>").pop()
537
+ : "";
538
+ if (afterAssistant && afterAssistant.trim()) {
539
+ text = afterAssistant;
540
+ }
541
+ const escapedPrompt = OCR_PROMPT.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
542
+ text = text
543
+ .replace(/<\|[^|>]+\|>/g, " ")
544
+ .replace(/<think>[\s\S]*?<\/think>/gi, " ")
545
+ .replace(/<think>|<\/think>/gi, " ")
546
+ .replace(new RegExp(escapedPrompt, "gi"), " ")
547
+ .replace(/(?:^|[\s/\\])nothink\b/gi, " ")
548
+ .replace(/\\n/g, "\n");
549
+ const lines = text
535
550
  .split(/\r?\n/)
536
551
  .map((line) => line.trim())
537
552
  .filter((line) => line.length > 0)
@@ -541,6 +556,7 @@ function normalizeOcrOutput(output) {
541
556
  !line.startsWith("Files:") &&
542
557
  !line.startsWith("Prompt:") &&
543
558
  !line.startsWith("Generation:") &&
559
+ !line.startsWith("Calling `python -m mlx_vlm.generate") &&
544
560
  !line.startsWith("Peak memory:") &&
545
561
  !line.startsWith("=======") &&
546
562
  line !== "<think>" &&
@@ -551,6 +567,52 @@ function normalizeOcrOutput(output) {
551
567
  return lines.join("\n").trim();
552
568
  }
553
569
 
570
+ async function runMlxVlmGenerate(modelPath, imagePath) {
571
+ const sharedArgs = [
572
+ "--model",
573
+ modelPath,
574
+ "--prompt",
575
+ OCR_PROMPT,
576
+ "--image",
577
+ imagePath,
578
+ "--temperature",
579
+ "0.0",
580
+ "--max-tokens",
581
+ String(OCR_MAX_TOKENS),
582
+ ];
583
+ try {
584
+ return await execFile(
585
+ "python3",
586
+ ["-m", "mlx_vlm.generate", ...sharedArgs],
587
+ {
588
+ timeout: OCR_TIMEOUT_MS,
589
+ maxBuffer: 10 * 1024 * 1024,
590
+ }
591
+ );
592
+ } catch (primaryError) {
593
+ const stderr =
594
+ primaryError && typeof primaryError === "object" && "stderr" in primaryError
595
+ ? String(primaryError.stderr ?? "").toLowerCase()
596
+ : "";
597
+ const message =
598
+ primaryError instanceof Error ? primaryError.message.toLowerCase() : "";
599
+ const missingLegacyModule =
600
+ stderr.includes("no module named mlx_vlm.generate") ||
601
+ message.includes("no module named mlx_vlm.generate");
602
+ if (!missingLegacyModule) {
603
+ throw primaryError;
604
+ }
605
+ return execFile(
606
+ "python3",
607
+ ["-m", "mlx_vlm", "generate", ...sharedArgs],
608
+ {
609
+ timeout: OCR_TIMEOUT_MS,
610
+ maxBuffer: 10 * 1024 * 1024,
611
+ }
612
+ );
613
+ }
614
+ }
615
+
554
616
  async function runOcrJob(job) {
555
617
  const { jobId, payload } = job;
556
618
  const start = Date.now();
@@ -587,28 +649,7 @@ async function runOcrJob(job) {
587
649
  );
588
650
  await writeFile(imagePath, bytes);
589
651
  const modelPath = resolveOcrModelPath();
590
- const { stdout } = await execFile(
591
- "python3",
592
- [
593
- "-m",
594
- "mlx_vlm",
595
- "generate",
596
- "--model",
597
- modelPath,
598
- "--prompt",
599
- OCR_PROMPT,
600
- "--image",
601
- imagePath,
602
- "--temperature",
603
- "0.0",
604
- "--max-tokens",
605
- String(OCR_MAX_TOKENS),
606
- ],
607
- {
608
- timeout: OCR_TIMEOUT_MS,
609
- maxBuffer: 10 * 1024 * 1024,
610
- }
611
- );
652
+ const { stdout } = await runMlxVlmGenerate(modelPath, imagePath);
612
653
  extractedText = normalizeOcrOutput(stdout);
613
654
  } finally {
614
655
  await rm(tempDir, { recursive: true, force: true }).catch(() => {});
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tiens.nguyen/gonext-local-worker",
3
- "version": "1.0.40",
3
+ "version": "1.0.41",
4
4
  "description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
5
5
  "type": "module",
6
6
  "license": "MIT",