@tiens.nguyen/gonext-local-worker 1.0.69 → 1.0.72
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/gonext-local-worker.mjs +128 -0
- package/package.json +1 -1
package/gonext-local-worker.mjs
CHANGED
|
@@ -137,6 +137,8 @@ const OCR_PROMPT = "Text Recognition:";
|
|
|
137
137
|
const OCR_MAX_TOKENS = 2048;
|
|
138
138
|
const OCR_TIMEOUT_MS = 180_000;
|
|
139
139
|
const OCR_DEBUG = String(process.env.GONEXT_OCR_DEBUG ?? "").trim() === "1";
|
|
140
|
+
const OCR_SKIP_CORRECTION = String(process.env.GONEXT_OCR_SKIP_CORRECTION ?? "").trim() === "1";
|
|
141
|
+
const OCR_CORRECT_TIMEOUT_MS = 120_000;
|
|
140
142
|
|
|
141
143
|
async function workerFetch(path, init = {}) {
|
|
142
144
|
const url = `${apiBase}${path.startsWith("/") ? path : `/${path}`}`;
|
|
@@ -981,6 +983,125 @@ function normalizeOcrOutput(output) {
|
|
|
981
983
|
return lines.join("\n").trim();
|
|
982
984
|
}
|
|
983
985
|
|
|
986
|
+
function resolveOcrCorrectionModelPath(override) {
|
|
987
|
+
if (override) {
|
|
988
|
+
const expanded = override.replace(/^~(?=\/)/, homedir());
|
|
989
|
+
return expanded.startsWith("/") ? expanded : join(homedir(), "mlx-models", expanded);
|
|
990
|
+
}
|
|
991
|
+
const fromEnv = String(process.env.GONEXT_OCR_CORRECT_MODEL_PATH ?? "").trim();
|
|
992
|
+
if (fromEnv) return fromEnv.replace(/^~(?=\/)/, homedir());
|
|
993
|
+
return join(homedir(), "mlx-models", "translategemma-4b-it-4bit");
|
|
994
|
+
}
|
|
995
|
+
|
|
996
|
+
function normalizeCorrection(raw, originalText) {
|
|
997
|
+
let text = String(raw ?? "").replace(/\r\n/g, "\n");
|
|
998
|
+
// Strip common mlx_lm.generate noise lines
|
|
999
|
+
const lines = text
|
|
1000
|
+
.split(/\r?\n/)
|
|
1001
|
+
.map((l) => l.trim())
|
|
1002
|
+
.filter((l) => l.length > 0)
|
|
1003
|
+
.filter((l) =>
|
|
1004
|
+
!l.startsWith("=======") &&
|
|
1005
|
+
!l.startsWith("Prompt:") &&
|
|
1006
|
+
!l.startsWith("Generation:") &&
|
|
1007
|
+
!l.startsWith("Peak memory:") &&
|
|
1008
|
+
!l.startsWith("Files:") &&
|
|
1009
|
+
!l.startsWith("<frozen runpy>:")
|
|
1010
|
+
);
|
|
1011
|
+
const result = lines.join("\n").trim();
|
|
1012
|
+
// Safety: if correction came back empty or too short, keep original
|
|
1013
|
+
if (!result || result.length < originalText.length * 0.3) {
|
|
1014
|
+
return originalText;
|
|
1015
|
+
}
|
|
1016
|
+
return result;
|
|
1017
|
+
}
|
|
1018
|
+
|
|
1019
|
+
/**
|
|
1020
|
+
* Call a correction model that's already running as an OpenAI-compatible server
|
|
1021
|
+
* (e.g. mlx_lm.server --model translategemma-4b-it-4bit --port 8083).
|
|
1022
|
+
* baseUrl should be like http://127.0.0.1:8083/v1
|
|
1023
|
+
*/
|
|
1024
|
+
async function correctOcrTextViaServer(extractedText, baseUrl) {
|
|
1025
|
+
const systemPrompt =
|
|
1026
|
+
"You are a text correction assistant. Fix grammar and language errors in the " +
|
|
1027
|
+
"provided text while preserving the original meaning and language. " +
|
|
1028
|
+
"Return only the corrected text without any explanation.";
|
|
1029
|
+
const res = await fetch(`${baseUrl.replace(/\/+$/, "")}/chat/completions`, {
|
|
1030
|
+
method: "POST",
|
|
1031
|
+
headers: { "Content-Type": "application/json" },
|
|
1032
|
+
body: JSON.stringify({
|
|
1033
|
+
model: "default",
|
|
1034
|
+
messages: [
|
|
1035
|
+
{ role: "system", content: systemPrompt },
|
|
1036
|
+
{ role: "user", content: `Text:\n${extractedText}` },
|
|
1037
|
+
],
|
|
1038
|
+
max_tokens: 2048,
|
|
1039
|
+
temperature: 0,
|
|
1040
|
+
}),
|
|
1041
|
+
signal: AbortSignal.timeout(OCR_CORRECT_TIMEOUT_MS),
|
|
1042
|
+
});
|
|
1043
|
+
if (!res.ok) {
|
|
1044
|
+
throw new Error(`Correction server ${baseUrl} returned ${res.status}`);
|
|
1045
|
+
}
|
|
1046
|
+
const json = await res.json();
|
|
1047
|
+
const corrected = json?.choices?.[0]?.message?.content?.trim() || "";
|
|
1048
|
+
return normalizeCorrection(corrected, extractedText);
|
|
1049
|
+
}
|
|
1050
|
+
|
|
1051
|
+
async function correctOcrText(extractedText, modelOverride = "") {
|
|
1052
|
+
if (OCR_SKIP_CORRECTION) {
|
|
1053
|
+
console.log("[gonext-worker] OCR correction skipped (GONEXT_OCR_SKIP_CORRECTION=1)");
|
|
1054
|
+
return extractedText;
|
|
1055
|
+
}
|
|
1056
|
+
|
|
1057
|
+
const override = modelOverride.trim();
|
|
1058
|
+
|
|
1059
|
+
// If the override looks like a URL, call it as a running mlx_lm.server endpoint.
|
|
1060
|
+
// e.g. "http://127.0.0.1:8083/v1" or "http://127.0.0.1:8083"
|
|
1061
|
+
if (override.startsWith("http://") || override.startsWith("https://")) {
|
|
1062
|
+
const baseUrl = /\/v1\/?$/i.test(override) ? override : `${override}/v1`;
|
|
1063
|
+
console.log(`[gonext-worker] OCR correction via server ${baseUrl}`);
|
|
1064
|
+
try {
|
|
1065
|
+
const corrected = await correctOcrTextViaServer(extractedText, baseUrl);
|
|
1066
|
+
console.log(`[gonext-worker] OCR correction done: ${extractedText.length} → ${corrected.length} chars`);
|
|
1067
|
+
return corrected;
|
|
1068
|
+
} catch (e) {
|
|
1069
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
1070
|
+
console.warn(`[gonext-worker] OCR correction server failed (using raw OCR text): ${msg.slice(0, 200)}`);
|
|
1071
|
+
return extractedText;
|
|
1072
|
+
}
|
|
1073
|
+
}
|
|
1074
|
+
|
|
1075
|
+
// Otherwise invoke mlx_lm.generate CLI (model loads per job — no server needed).
|
|
1076
|
+
const modelPath = resolveOcrCorrectionModelPath(override);
|
|
1077
|
+
const prompt =
|
|
1078
|
+
"The following text was extracted by an OCR model and may contain grammar errors, " +
|
|
1079
|
+
"wrong language, or garbled characters. " +
|
|
1080
|
+
"Correct any errors while preserving the original meaning and language. " +
|
|
1081
|
+
"Return only the corrected text without any explanation.\n\n" +
|
|
1082
|
+
`Text:\n${extractedText}`;
|
|
1083
|
+
console.log(`[gonext-worker] OCR correction via CLI model=${modelPath}`);
|
|
1084
|
+
// Write prompt to a temp file so long text / special chars don't break --prompt arg.
|
|
1085
|
+
const promptFile = join(tmpdir(), `gonext-ocr-correct-${Date.now()}.txt`);
|
|
1086
|
+
try {
|
|
1087
|
+
await writeFile(promptFile, prompt, "utf8");
|
|
1088
|
+
const { stdout } = await execFile(
|
|
1089
|
+
"python3",
|
|
1090
|
+
["-m", "mlx_lm.generate", "--model", modelPath, "--prompt-file", promptFile, "--max-tokens", "2048", "--temp", "0.0"],
|
|
1091
|
+
{ timeout: OCR_CORRECT_TIMEOUT_MS, maxBuffer: 10 * 1024 * 1024 }
|
|
1092
|
+
);
|
|
1093
|
+
const corrected = normalizeCorrection(stdout, extractedText);
|
|
1094
|
+
console.log(`[gonext-worker] OCR correction done: ${extractedText.length} → ${corrected.length} chars`);
|
|
1095
|
+
return corrected;
|
|
1096
|
+
} catch (e) {
|
|
1097
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
1098
|
+
console.warn(`[gonext-worker] OCR correction failed (using raw OCR text): ${msg.slice(0, 200)}`);
|
|
1099
|
+
return extractedText;
|
|
1100
|
+
} finally {
|
|
1101
|
+
await rm(promptFile, { force: true }).catch(() => {});
|
|
1102
|
+
}
|
|
1103
|
+
}
|
|
1104
|
+
|
|
984
1105
|
async function runMlxVlmGenerate(modelPath, imagePath) {
|
|
985
1106
|
const sharedArgs = [
|
|
986
1107
|
"--model",
|
|
@@ -1123,6 +1244,13 @@ async function runOcrJob(job) {
|
|
|
1123
1244
|
} finally {
|
|
1124
1245
|
await rm(tempDir, { recursive: true, force: true }).catch(() => {});
|
|
1125
1246
|
}
|
|
1247
|
+
// Post-process: correct grammar/language with translategemma-4b-it-4bit
|
|
1248
|
+
// (or the model set by the user in Settings > OCR correction model).
|
|
1249
|
+
// Falls back to raw OCR text on any error so the job never fails here.
|
|
1250
|
+
const correctionModelOverride = typeof payload.correctionModel === "string"
|
|
1251
|
+
? payload.correctionModel.trim()
|
|
1252
|
+
: "";
|
|
1253
|
+
extractedText = await correctOcrText(extractedText, correctionModelOverride);
|
|
1126
1254
|
const totalTimeSeconds = (Date.now() - start) / 1000;
|
|
1127
1255
|
const doneRes = await workerFetch(`/api/worker/jobs/${jobId}`, {
|
|
1128
1256
|
method: "PATCH",
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tiens.nguyen/gonext-local-worker",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.72",
|
|
4
4
|
"description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|