@tiens.nguyen/gonext-local-worker 1.0.72 → 1.0.73
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/gonext-local-worker.mjs +19 -4
- package/package.json +1 -1
package/gonext-local-worker.mjs
CHANGED
|
@@ -1081,23 +1081,38 @@ async function correctOcrText(extractedText, modelOverride = "") {
|
|
|
1081
1081
|
"Return only the corrected text without any explanation.\n\n" +
|
|
1082
1082
|
`Text:\n${extractedText}`;
|
|
1083
1083
|
console.log(`[gonext-worker] OCR correction via CLI model=${modelPath}`);
|
|
1084
|
-
// Write prompt to a temp
|
|
1084
|
+
// Write prompt to a temp Python script that calls mlx_lm programmatically,
|
|
1085
|
+
// avoiding CLI arg length/escaping limits and the missing --prompt-file flag.
|
|
1086
|
+
const scriptFile = join(tmpdir(), `gonext-ocr-correct-${Date.now()}.py`);
|
|
1085
1087
|
const promptFile = join(tmpdir(), `gonext-ocr-correct-${Date.now()}.txt`);
|
|
1088
|
+
const pyScript = `import sys, mlx_lm
|
|
1089
|
+
model, tokenizer = mlx_lm.load(sys.argv[1])
|
|
1090
|
+
with open(sys.argv[2], encoding="utf-8") as f:
|
|
1091
|
+
prompt = f.read()
|
|
1092
|
+
result = mlx_lm.generate(model, tokenizer, prompt=prompt, max_tokens=2048, verbose=False)
|
|
1093
|
+
print(result)
|
|
1094
|
+
`;
|
|
1086
1095
|
try {
|
|
1096
|
+
await writeFile(scriptFile, pyScript, "utf8");
|
|
1087
1097
|
await writeFile(promptFile, prompt, "utf8");
|
|
1088
|
-
const { stdout } = await execFile(
|
|
1098
|
+
const { stdout, stderr } = await execFile(
|
|
1089
1099
|
"python3",
|
|
1090
|
-
[
|
|
1100
|
+
[scriptFile, modelPath, promptFile],
|
|
1091
1101
|
{ timeout: OCR_CORRECT_TIMEOUT_MS, maxBuffer: 10 * 1024 * 1024 }
|
|
1092
1102
|
);
|
|
1103
|
+
if (stderr?.trim()) {
|
|
1104
|
+
console.log(`[gonext-worker] OCR correction CLI stderr: ${stderr.trim().slice(0, 300)}`);
|
|
1105
|
+
}
|
|
1093
1106
|
const corrected = normalizeCorrection(stdout, extractedText);
|
|
1094
1107
|
console.log(`[gonext-worker] OCR correction done: ${extractedText.length} → ${corrected.length} chars`);
|
|
1095
1108
|
return corrected;
|
|
1096
1109
|
} catch (e) {
|
|
1110
|
+
const stderr = e && typeof e === "object" && "stderr" in e ? String(e.stderr ?? "").trim().slice(0, 300) : "";
|
|
1097
1111
|
const msg = e instanceof Error ? e.message : String(e);
|
|
1098
|
-
console.warn(`[gonext-worker] OCR correction failed (using raw OCR text): ${msg.slice(0, 200)}`);
|
|
1112
|
+
console.warn(`[gonext-worker] OCR correction CLI failed (using raw OCR text): ${msg.slice(0, 200)}${stderr ? ` | stderr: ${stderr}` : ""}`);
|
|
1099
1113
|
return extractedText;
|
|
1100
1114
|
} finally {
|
|
1115
|
+
await rm(scriptFile, { force: true }).catch(() => {});
|
|
1101
1116
|
await rm(promptFile, { force: true }).catch(() => {});
|
|
1102
1117
|
}
|
|
1103
1118
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tiens.nguyen/gonext-local-worker",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.73",
|
|
4
4
|
"description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|