@tiens.nguyen/gonext-local-worker 1.0.72 → 1.0.73

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1081,23 +1081,38 @@ async function correctOcrText(extractedText, modelOverride = "") {
1081
1081
  "Return only the corrected text without any explanation.\n\n" +
1082
1082
  `Text:\n${extractedText}`;
1083
1083
  console.log(`[gonext-worker] OCR correction via CLI model=${modelPath}`);
1084
- // Write prompt to a temp file so long text / special chars don't break --prompt arg.
1084
+ // Write prompt to a temp Python script that calls mlx_lm programmatically,
1085
+ // avoiding CLI arg length/escaping limits and the missing --prompt-file flag.
1086
+ const scriptFile = join(tmpdir(), `gonext-ocr-correct-${Date.now()}.py`);
1085
1087
  const promptFile = join(tmpdir(), `gonext-ocr-correct-${Date.now()}.txt`);
1088
+ const pyScript = `import sys, mlx_lm
1089
+ model, tokenizer = mlx_lm.load(sys.argv[1])
1090
+ with open(sys.argv[2], encoding="utf-8") as f:
1091
+ prompt = f.read()
1092
+ result = mlx_lm.generate(model, tokenizer, prompt=prompt, max_tokens=2048, verbose=False)
1093
+ print(result)
1094
+ `;
1086
1095
  try {
1096
+ await writeFile(scriptFile, pyScript, "utf8");
1087
1097
  await writeFile(promptFile, prompt, "utf8");
1088
- const { stdout } = await execFile(
1098
+ const { stdout, stderr } = await execFile(
1089
1099
  "python3",
1090
- ["-m", "mlx_lm.generate", "--model", modelPath, "--prompt-file", promptFile, "--max-tokens", "2048", "--temp", "0.0"],
1100
+ [scriptFile, modelPath, promptFile],
1091
1101
  { timeout: OCR_CORRECT_TIMEOUT_MS, maxBuffer: 10 * 1024 * 1024 }
1092
1102
  );
1103
+ if (stderr?.trim()) {
1104
+ console.log(`[gonext-worker] OCR correction CLI stderr: ${stderr.trim().slice(0, 300)}`);
1105
+ }
1093
1106
  const corrected = normalizeCorrection(stdout, extractedText);
1094
1107
  console.log(`[gonext-worker] OCR correction done: ${extractedText.length} → ${corrected.length} chars`);
1095
1108
  return corrected;
1096
1109
  } catch (e) {
1110
+ const stderr = e && typeof e === "object" && "stderr" in e ? String(e.stderr ?? "").trim().slice(0, 300) : "";
1097
1111
  const msg = e instanceof Error ? e.message : String(e);
1098
- console.warn(`[gonext-worker] OCR correction failed (using raw OCR text): ${msg.slice(0, 200)}`);
1112
+ console.warn(`[gonext-worker] OCR correction CLI failed (using raw OCR text): ${msg.slice(0, 200)}${stderr ? ` | stderr: ${stderr}` : ""}`);
1099
1113
  return extractedText;
1100
1114
  } finally {
1115
+ await rm(scriptFile, { force: true }).catch(() => {});
1101
1116
  await rm(promptFile, { force: true }).catch(() => {});
1102
1117
  }
1103
1118
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tiens.nguyen/gonext-local-worker",
3
- "version": "1.0.72",
3
+ "version": "1.0.73",
4
4
  "description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
5
5
  "type": "module",
6
6
  "license": "MIT",