@tiens.nguyen/gonext-local-worker 1.0.70 → 1.0.73

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1081,19 +1081,39 @@ async function correctOcrText(extractedText, modelOverride = "") {
1081
1081
  "Return only the corrected text without any explanation.\n\n" +
1082
1082
  `Text:\n${extractedText}`;
1083
1083
  console.log(`[gonext-worker] OCR correction via CLI model=${modelPath}`);
1084
+ // Write prompt to a temp Python script that calls mlx_lm programmatically,
1085
+ // avoiding CLI arg length/escaping limits and the missing --prompt-file flag.
1086
+ const scriptFile = join(tmpdir(), `gonext-ocr-correct-${Date.now()}.py`);
1087
+ const promptFile = join(tmpdir(), `gonext-ocr-correct-${Date.now()}.txt`);
1088
+ const pyScript = `import sys, mlx_lm
1089
+ model, tokenizer = mlx_lm.load(sys.argv[1])
1090
+ with open(sys.argv[2], encoding="utf-8") as f:
1091
+ prompt = f.read()
1092
+ result = mlx_lm.generate(model, tokenizer, prompt=prompt, max_tokens=2048, verbose=False)
1093
+ print(result)
1094
+ `;
1084
1095
  try {
1085
- const { stdout } = await execFile(
1096
+ await writeFile(scriptFile, pyScript, "utf8");
1097
+ await writeFile(promptFile, prompt, "utf8");
1098
+ const { stdout, stderr } = await execFile(
1086
1099
  "python3",
1087
- ["-m", "mlx_lm.generate", "--model", modelPath, "--prompt", prompt, "--max-tokens", "2048", "--temp", "0.0"],
1100
+ [scriptFile, modelPath, promptFile],
1088
1101
  { timeout: OCR_CORRECT_TIMEOUT_MS, maxBuffer: 10 * 1024 * 1024 }
1089
1102
  );
1103
+ if (stderr?.trim()) {
1104
+ console.log(`[gonext-worker] OCR correction CLI stderr: ${stderr.trim().slice(0, 300)}`);
1105
+ }
1090
1106
  const corrected = normalizeCorrection(stdout, extractedText);
1091
1107
  console.log(`[gonext-worker] OCR correction done: ${extractedText.length} → ${corrected.length} chars`);
1092
1108
  return corrected;
1093
1109
  } catch (e) {
1110
+ const stderr = e && typeof e === "object" && "stderr" in e ? String(e.stderr ?? "").trim().slice(0, 300) : "";
1094
1111
  const msg = e instanceof Error ? e.message : String(e);
1095
- console.warn(`[gonext-worker] OCR correction failed (using raw OCR text): ${msg.slice(0, 200)}`);
1112
+ console.warn(`[gonext-worker] OCR correction CLI failed (using raw OCR text): ${msg.slice(0, 200)}${stderr ? ` | stderr: ${stderr}` : ""}`);
1096
1113
  return extractedText;
1114
+ } finally {
1115
+ await rm(scriptFile, { force: true }).catch(() => {});
1116
+ await rm(promptFile, { force: true }).catch(() => {});
1097
1117
  }
1098
1118
  }
1099
1119
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tiens.nguyen/gonext-local-worker",
3
- "version": "1.0.70",
3
+ "version": "1.0.73",
4
4
  "description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
5
5
  "type": "module",
6
6
  "license": "MIT",