@tiens.nguyen/gonext-local-worker 1.0.40 → 1.0.42
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/gonext-local-worker.mjs +76 -23
- package/package.json +1 -1
package/gonext-local-worker.mjs
CHANGED
|
@@ -531,7 +531,22 @@ function resolveOcrModelPath() {
|
|
|
531
531
|
}
|
|
532
532
|
|
|
533
533
|
function normalizeOcrOutput(output) {
|
|
534
|
-
|
|
534
|
+
let text = String(output ?? "").replace(/\r\n/g, "\n");
|
|
535
|
+
const afterAssistant = text.includes("<|assistant|>")
|
|
536
|
+
? text.split("<|assistant|>").pop()
|
|
537
|
+
: "";
|
|
538
|
+
if (afterAssistant && afterAssistant.trim()) {
|
|
539
|
+
text = afterAssistant;
|
|
540
|
+
}
|
|
541
|
+
const escapedPrompt = OCR_PROMPT.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
542
|
+
text = text
|
|
543
|
+
.replace(/<\|[^|>]+\|>/g, " ")
|
|
544
|
+
.replace(/<think>[\s\S]*?<\/think>/gi, " ")
|
|
545
|
+
.replace(/<think>|<\/think>/gi, " ")
|
|
546
|
+
.replace(new RegExp(escapedPrompt, "gi"), " ")
|
|
547
|
+
.replace(/(?:^|[\s/\\])nothink\b/gi, " ")
|
|
548
|
+
.replace(/\\n/g, "\n");
|
|
549
|
+
const lines = text
|
|
535
550
|
.split(/\r?\n/)
|
|
536
551
|
.map((line) => line.trim())
|
|
537
552
|
.filter((line) => line.length > 0)
|
|
@@ -541,6 +556,7 @@ function normalizeOcrOutput(output) {
|
|
|
541
556
|
!line.startsWith("Files:") &&
|
|
542
557
|
!line.startsWith("Prompt:") &&
|
|
543
558
|
!line.startsWith("Generation:") &&
|
|
559
|
+
!line.startsWith("Calling `python -m mlx_vlm.generate") &&
|
|
544
560
|
!line.startsWith("Peak memory:") &&
|
|
545
561
|
!line.startsWith("=======") &&
|
|
546
562
|
line !== "<think>" &&
|
|
@@ -551,6 +567,52 @@ function normalizeOcrOutput(output) {
|
|
|
551
567
|
return lines.join("\n").trim();
|
|
552
568
|
}
|
|
553
569
|
|
|
570
|
+
async function runMlxVlmGenerate(modelPath, imagePath) {
|
|
571
|
+
const sharedArgs = [
|
|
572
|
+
"--model",
|
|
573
|
+
modelPath,
|
|
574
|
+
"--prompt",
|
|
575
|
+
OCR_PROMPT,
|
|
576
|
+
"--image",
|
|
577
|
+
imagePath,
|
|
578
|
+
"--temperature",
|
|
579
|
+
"0.0",
|
|
580
|
+
"--max-tokens",
|
|
581
|
+
String(OCR_MAX_TOKENS),
|
|
582
|
+
];
|
|
583
|
+
try {
|
|
584
|
+
return await execFile(
|
|
585
|
+
"python3",
|
|
586
|
+
["-m", "mlx_vlm.generate", ...sharedArgs],
|
|
587
|
+
{
|
|
588
|
+
timeout: OCR_TIMEOUT_MS,
|
|
589
|
+
maxBuffer: 10 * 1024 * 1024,
|
|
590
|
+
}
|
|
591
|
+
);
|
|
592
|
+
} catch (primaryError) {
|
|
593
|
+
const stderr =
|
|
594
|
+
primaryError && typeof primaryError === "object" && "stderr" in primaryError
|
|
595
|
+
? String(primaryError.stderr ?? "").toLowerCase()
|
|
596
|
+
: "";
|
|
597
|
+
const message =
|
|
598
|
+
primaryError instanceof Error ? primaryError.message.toLowerCase() : "";
|
|
599
|
+
const missingLegacyModule =
|
|
600
|
+
stderr.includes("no module named mlx_vlm.generate") ||
|
|
601
|
+
message.includes("no module named mlx_vlm.generate");
|
|
602
|
+
if (!missingLegacyModule) {
|
|
603
|
+
throw primaryError;
|
|
604
|
+
}
|
|
605
|
+
return execFile(
|
|
606
|
+
"python3",
|
|
607
|
+
["-m", "mlx_vlm", "generate", ...sharedArgs],
|
|
608
|
+
{
|
|
609
|
+
timeout: OCR_TIMEOUT_MS,
|
|
610
|
+
maxBuffer: 10 * 1024 * 1024,
|
|
611
|
+
}
|
|
612
|
+
);
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
|
|
554
616
|
async function runOcrJob(job) {
|
|
555
617
|
const { jobId, payload } = job;
|
|
556
618
|
const start = Date.now();
|
|
@@ -587,28 +649,7 @@ async function runOcrJob(job) {
|
|
|
587
649
|
);
|
|
588
650
|
await writeFile(imagePath, bytes);
|
|
589
651
|
const modelPath = resolveOcrModelPath();
|
|
590
|
-
const { stdout } = await
|
|
591
|
-
"python3",
|
|
592
|
-
[
|
|
593
|
-
"-m",
|
|
594
|
-
"mlx_vlm",
|
|
595
|
-
"generate",
|
|
596
|
-
"--model",
|
|
597
|
-
modelPath,
|
|
598
|
-
"--prompt",
|
|
599
|
-
OCR_PROMPT,
|
|
600
|
-
"--image",
|
|
601
|
-
imagePath,
|
|
602
|
-
"--temperature",
|
|
603
|
-
"0.0",
|
|
604
|
-
"--max-tokens",
|
|
605
|
-
String(OCR_MAX_TOKENS),
|
|
606
|
-
],
|
|
607
|
-
{
|
|
608
|
-
timeout: OCR_TIMEOUT_MS,
|
|
609
|
-
maxBuffer: 10 * 1024 * 1024,
|
|
610
|
-
}
|
|
611
|
-
);
|
|
652
|
+
const { stdout } = await runMlxVlmGenerate(modelPath, imagePath);
|
|
612
653
|
extractedText = normalizeOcrOutput(stdout);
|
|
613
654
|
} finally {
|
|
614
655
|
await rm(tempDir, { recursive: true, force: true }).catch(() => {});
|
|
@@ -890,6 +931,12 @@ async function runLocalHealthJob(job) {
|
|
|
890
931
|
totalTimeSeconds,
|
|
891
932
|
}),
|
|
892
933
|
});
|
|
934
|
+
if (doneRes.status === 404) {
|
|
935
|
+
console.warn(
|
|
936
|
+
`[gonext-worker] local_health ${jobId} disappeared before completion PATCH (404 Job not found). Skipping.`
|
|
937
|
+
);
|
|
938
|
+
return;
|
|
939
|
+
}
|
|
893
940
|
await ensureWorkerOk(doneRes, `complete local_health jobId=${jobId}`);
|
|
894
941
|
const onlineCount = ollamaSources.filter((s) => s.online).length;
|
|
895
942
|
console.log(
|
|
@@ -897,6 +944,12 @@ async function runLocalHealthJob(job) {
|
|
|
897
944
|
);
|
|
898
945
|
} catch (e) {
|
|
899
946
|
const message = e instanceof Error ? e.message : String(e);
|
|
947
|
+
if (/failed 404/i.test(message) || /job not found/i.test(message)) {
|
|
948
|
+
console.warn(
|
|
949
|
+
`[gonext-worker] local_health ${jobId} no longer exists (404). Skipping fail PATCH.`
|
|
950
|
+
);
|
|
951
|
+
return;
|
|
952
|
+
}
|
|
900
953
|
const failRes = await workerFetch(`/api/worker/jobs/${jobId}`, {
|
|
901
954
|
method: "PATCH",
|
|
902
955
|
body: JSON.stringify({
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tiens.nguyen/gonext-local-worker",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.42",
|
|
4
4
|
"description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|