@tiens.nguyen/gonext-local-worker 1.0.69 → 1.0.72

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -137,6 +137,8 @@ const OCR_PROMPT = "Text Recognition:";
137
137
  const OCR_MAX_TOKENS = 2048;
138
138
  const OCR_TIMEOUT_MS = 180_000;
139
139
  const OCR_DEBUG = String(process.env.GONEXT_OCR_DEBUG ?? "").trim() === "1";
140
+ const OCR_SKIP_CORRECTION = String(process.env.GONEXT_OCR_SKIP_CORRECTION ?? "").trim() === "1";
141
+ const OCR_CORRECT_TIMEOUT_MS = 120_000;
140
142
 
141
143
  async function workerFetch(path, init = {}) {
142
144
  const url = `${apiBase}${path.startsWith("/") ? path : `/${path}`}`;
@@ -981,6 +983,125 @@ function normalizeOcrOutput(output) {
981
983
  return lines.join("\n").trim();
982
984
  }
983
985
 
986
+ function resolveOcrCorrectionModelPath(override) {
987
+ if (override) {
988
+ const expanded = override.replace(/^~(?=\/)/, homedir());
989
+ return expanded.startsWith("/") ? expanded : join(homedir(), "mlx-models", expanded);
990
+ }
991
+ const fromEnv = String(process.env.GONEXT_OCR_CORRECT_MODEL_PATH ?? "").trim();
992
+ if (fromEnv) return fromEnv.replace(/^~(?=\/)/, homedir());
993
+ return join(homedir(), "mlx-models", "translategemma-4b-it-4bit");
994
+ }
995
+
996
+ function normalizeCorrection(raw, originalText) {
997
+ let text = String(raw ?? "").replace(/\r\n/g, "\n");
998
+ // Strip common mlx_lm.generate noise lines
999
+ const lines = text
1000
+ .split(/\r?\n/)
1001
+ .map((l) => l.trim())
1002
+ .filter((l) => l.length > 0)
1003
+ .filter((l) =>
1004
+ !l.startsWith("=======") &&
1005
+ !l.startsWith("Prompt:") &&
1006
+ !l.startsWith("Generation:") &&
1007
+ !l.startsWith("Peak memory:") &&
1008
+ !l.startsWith("Files:") &&
1009
+ !l.startsWith("<frozen runpy>:")
1010
+ );
1011
+ const result = lines.join("\n").trim();
1012
+ // Safety: if correction came back empty or too short, keep original
1013
+ if (!result || result.length < originalText.length * 0.3) {
1014
+ return originalText;
1015
+ }
1016
+ return result;
1017
+ }
1018
+
1019
+ /**
1020
+ * Call a correction model that's already running as an OpenAI-compatible server
1021
+ * (e.g. mlx_lm.server --model translategemma-4b-it-4bit --port 8083).
1022
+ * baseUrl should be like http://127.0.0.1:8083/v1
1023
+ */
1024
+ async function correctOcrTextViaServer(extractedText, baseUrl) {
1025
+ const systemPrompt =
1026
+ "You are a text correction assistant. Fix grammar and language errors in the " +
1027
+ "provided text while preserving the original meaning and language. " +
1028
+ "Return only the corrected text without any explanation.";
1029
+ const res = await fetch(`${baseUrl.replace(/\/+$/, "")}/chat/completions`, {
1030
+ method: "POST",
1031
+ headers: { "Content-Type": "application/json" },
1032
+ body: JSON.stringify({
1033
+ model: "default",
1034
+ messages: [
1035
+ { role: "system", content: systemPrompt },
1036
+ { role: "user", content: `Text:\n${extractedText}` },
1037
+ ],
1038
+ max_tokens: 2048,
1039
+ temperature: 0,
1040
+ }),
1041
+ signal: AbortSignal.timeout(OCR_CORRECT_TIMEOUT_MS),
1042
+ });
1043
+ if (!res.ok) {
1044
+ throw new Error(`Correction server ${baseUrl} returned ${res.status}`);
1045
+ }
1046
+ const json = await res.json();
1047
+ const corrected = json?.choices?.[0]?.message?.content?.trim() || "";
1048
+ return normalizeCorrection(corrected, extractedText);
1049
+ }
1050
+
1051
+ async function correctOcrText(extractedText, modelOverride = "") {
1052
+ if (OCR_SKIP_CORRECTION) {
1053
+ console.log("[gonext-worker] OCR correction skipped (GONEXT_OCR_SKIP_CORRECTION=1)");
1054
+ return extractedText;
1055
+ }
1056
+
1057
+ const override = modelOverride.trim();
1058
+
1059
+ // If the override looks like a URL, call it as a running mlx_lm.server endpoint.
1060
+ // e.g. "http://127.0.0.1:8083/v1" or "http://127.0.0.1:8083"
1061
+ if (override.startsWith("http://") || override.startsWith("https://")) {
1062
+ const baseUrl = /\/v1\/?$/i.test(override) ? override : `${override}/v1`;
1063
+ console.log(`[gonext-worker] OCR correction via server ${baseUrl}`);
1064
+ try {
1065
+ const corrected = await correctOcrTextViaServer(extractedText, baseUrl);
1066
+ console.log(`[gonext-worker] OCR correction done: ${extractedText.length} → ${corrected.length} chars`);
1067
+ return corrected;
1068
+ } catch (e) {
1069
+ const msg = e instanceof Error ? e.message : String(e);
1070
+ console.warn(`[gonext-worker] OCR correction server failed (using raw OCR text): ${msg.slice(0, 200)}`);
1071
+ return extractedText;
1072
+ }
1073
+ }
1074
+
1075
+ // Otherwise invoke mlx_lm.generate CLI (model loads per job — no server needed).
1076
+ const modelPath = resolveOcrCorrectionModelPath(override);
1077
+ const prompt =
1078
+ "The following text was extracted by an OCR model and may contain grammar errors, " +
1079
+ "wrong language, or garbled characters. " +
1080
+ "Correct any errors while preserving the original meaning and language. " +
1081
+ "Return only the corrected text without any explanation.\n\n" +
1082
+ `Text:\n${extractedText}`;
1083
+ console.log(`[gonext-worker] OCR correction via CLI model=${modelPath}`);
1084
+ // Write prompt to a temp file so long text / special chars don't break --prompt arg.
1085
+ const promptFile = join(tmpdir(), `gonext-ocr-correct-${Date.now()}.txt`);
1086
+ try {
1087
+ await writeFile(promptFile, prompt, "utf8");
1088
+ const { stdout } = await execFile(
1089
+ "python3",
1090
+ ["-m", "mlx_lm.generate", "--model", modelPath, "--prompt-file", promptFile, "--max-tokens", "2048", "--temp", "0.0"],
1091
+ { timeout: OCR_CORRECT_TIMEOUT_MS, maxBuffer: 10 * 1024 * 1024 }
1092
+ );
1093
+ const corrected = normalizeCorrection(stdout, extractedText);
1094
+ console.log(`[gonext-worker] OCR correction done: ${extractedText.length} → ${corrected.length} chars`);
1095
+ return corrected;
1096
+ } catch (e) {
1097
+ const msg = e instanceof Error ? e.message : String(e);
1098
+ console.warn(`[gonext-worker] OCR correction failed (using raw OCR text): ${msg.slice(0, 200)}`);
1099
+ return extractedText;
1100
+ } finally {
1101
+ await rm(promptFile, { force: true }).catch(() => {});
1102
+ }
1103
+ }
1104
+
984
1105
  async function runMlxVlmGenerate(modelPath, imagePath) {
985
1106
  const sharedArgs = [
986
1107
  "--model",
@@ -1123,6 +1244,13 @@ async function runOcrJob(job) {
1123
1244
  } finally {
1124
1245
  await rm(tempDir, { recursive: true, force: true }).catch(() => {});
1125
1246
  }
1247
+ // Post-process: correct grammar/language with translategemma-4b-it-4bit
1248
+ // (or the model set by the user in Settings > OCR correction model).
1249
+ // Falls back to raw OCR text on any error so the job never fails here.
1250
+ const correctionModelOverride = typeof payload.correctionModel === "string"
1251
+ ? payload.correctionModel.trim()
1252
+ : "";
1253
+ extractedText = await correctOcrText(extractedText, correctionModelOverride);
1126
1254
  const totalTimeSeconds = (Date.now() - start) / 1000;
1127
1255
  const doneRes = await workerFetch(`/api/worker/jobs/${jobId}`, {
1128
1256
  method: "PATCH",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tiens.nguyen/gonext-local-worker",
3
- "version": "1.0.69",
3
+ "version": "1.0.72",
4
4
  "description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
5
5
  "type": "module",
6
6
  "license": "MIT",