@tiens.nguyen/gonext-local-worker 1.0.72 → 1.0.74

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1061,8 +1061,10 @@ async function correctOcrText(extractedText, modelOverride = "") {
1061
1061
  if (override.startsWith("http://") || override.startsWith("https://")) {
1062
1062
  const baseUrl = /\/v1\/?$/i.test(override) ? override : `${override}/v1`;
1063
1063
  console.log(`[gonext-worker] OCR correction via server ${baseUrl}`);
1064
+ console.log(`[gonext-worker] OCR correction input: ${extractedText.slice(0, 300)}`);
1064
1065
  try {
1065
1066
  const corrected = await correctOcrTextViaServer(extractedText, baseUrl);
1067
+ console.log(`[gonext-worker] OCR correction output: ${corrected.slice(0, 300)}`);
1066
1068
  console.log(`[gonext-worker] OCR correction done: ${extractedText.length} → ${corrected.length} chars`);
1067
1069
  return corrected;
1068
1070
  } catch (e) {
@@ -1081,23 +1083,40 @@ async function correctOcrText(extractedText, modelOverride = "") {
1081
1083
  "Return only the corrected text without any explanation.\n\n" +
1082
1084
  `Text:\n${extractedText}`;
1083
1085
  console.log(`[gonext-worker] OCR correction via CLI model=${modelPath}`);
1084
- // Write prompt to a temp file so long text / special chars don't break --prompt arg.
1086
+ console.log(`[gonext-worker] OCR correction input: ${extractedText.slice(0, 300)}`);
1087
+ // Write prompt to a temp Python script that calls mlx_lm programmatically,
1088
+ // avoiding CLI arg length/escaping limits and the missing --prompt-file flag.
1089
+ const scriptFile = join(tmpdir(), `gonext-ocr-correct-${Date.now()}.py`);
1085
1090
  const promptFile = join(tmpdir(), `gonext-ocr-correct-${Date.now()}.txt`);
1091
+ const pyScript = `import sys, mlx_lm
1092
+ model, tokenizer = mlx_lm.load(sys.argv[1])
1093
+ with open(sys.argv[2], encoding="utf-8") as f:
1094
+ prompt = f.read()
1095
+ result = mlx_lm.generate(model, tokenizer, prompt=prompt, max_tokens=2048, verbose=False)
1096
+ print(result)
1097
+ `;
1086
1098
  try {
1099
+ await writeFile(scriptFile, pyScript, "utf8");
1087
1100
  await writeFile(promptFile, prompt, "utf8");
1088
- const { stdout } = await execFile(
1101
+ const { stdout, stderr } = await execFile(
1089
1102
  "python3",
1090
- ["-m", "mlx_lm.generate", "--model", modelPath, "--prompt-file", promptFile, "--max-tokens", "2048", "--temp", "0.0"],
1103
+ [scriptFile, modelPath, promptFile],
1091
1104
  { timeout: OCR_CORRECT_TIMEOUT_MS, maxBuffer: 10 * 1024 * 1024 }
1092
1105
  );
1106
+ if (stderr?.trim()) {
1107
+ console.log(`[gonext-worker] OCR correction CLI stderr: ${stderr.trim().slice(0, 300)}`);
1108
+ }
1093
1109
  const corrected = normalizeCorrection(stdout, extractedText);
1110
+ console.log(`[gonext-worker] OCR correction output: ${corrected.slice(0, 300)}`);
1094
1111
  console.log(`[gonext-worker] OCR correction done: ${extractedText.length} → ${corrected.length} chars`);
1095
1112
  return corrected;
1096
1113
  } catch (e) {
1114
+ const stderr = e && typeof e === "object" && "stderr" in e ? String(e.stderr ?? "").trim().slice(0, 300) : "";
1097
1115
  const msg = e instanceof Error ? e.message : String(e);
1098
- console.warn(`[gonext-worker] OCR correction failed (using raw OCR text): ${msg.slice(0, 200)}`);
1116
+ console.warn(`[gonext-worker] OCR correction CLI failed (using raw OCR text): ${msg.slice(0, 200)}${stderr ? ` | stderr: ${stderr}` : ""}`);
1099
1117
  return extractedText;
1100
1118
  } finally {
1119
+ await rm(scriptFile, { force: true }).catch(() => {});
1101
1120
  await rm(promptFile, { force: true }).catch(() => {});
1102
1121
  }
1103
1122
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tiens.nguyen/gonext-local-worker",
3
- "version": "1.0.72",
3
+ "version": "1.0.74",
4
4
  "description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
5
5
  "type": "module",
6
6
  "license": "MIT",