@tiens.nguyen/gonext-local-worker 1.0.70 → 1.0.73
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/gonext-local-worker.mjs +23 -3
- package/package.json +1 -1
package/gonext-local-worker.mjs
CHANGED
|
@@ -1081,19 +1081,39 @@ async function correctOcrText(extractedText, modelOverride = "") {
|
|
|
1081
1081
|
"Return only the corrected text without any explanation.\n\n" +
|
|
1082
1082
|
`Text:\n${extractedText}`;
|
|
1083
1083
|
console.log(`[gonext-worker] OCR correction via CLI model=${modelPath}`);
|
|
1084
|
+
// Write prompt to a temp Python script that calls mlx_lm programmatically,
|
|
1085
|
+
// avoiding CLI arg length/escaping limits and the missing --prompt-file flag.
|
|
1086
|
+
const scriptFile = join(tmpdir(), `gonext-ocr-correct-${Date.now()}.py`);
|
|
1087
|
+
const promptFile = join(tmpdir(), `gonext-ocr-correct-${Date.now()}.txt`);
|
|
1088
|
+
const pyScript = `import sys, mlx_lm
|
|
1089
|
+
model, tokenizer = mlx_lm.load(sys.argv[1])
|
|
1090
|
+
with open(sys.argv[2], encoding="utf-8") as f:
|
|
1091
|
+
prompt = f.read()
|
|
1092
|
+
result = mlx_lm.generate(model, tokenizer, prompt=prompt, max_tokens=2048, verbose=False)
|
|
1093
|
+
print(result)
|
|
1094
|
+
`;
|
|
1084
1095
|
try {
|
|
1085
|
-
|
|
1096
|
+
await writeFile(scriptFile, pyScript, "utf8");
|
|
1097
|
+
await writeFile(promptFile, prompt, "utf8");
|
|
1098
|
+
const { stdout, stderr } = await execFile(
|
|
1086
1099
|
"python3",
|
|
1087
|
-
[
|
|
1100
|
+
[scriptFile, modelPath, promptFile],
|
|
1088
1101
|
{ timeout: OCR_CORRECT_TIMEOUT_MS, maxBuffer: 10 * 1024 * 1024 }
|
|
1089
1102
|
);
|
|
1103
|
+
if (stderr?.trim()) {
|
|
1104
|
+
console.log(`[gonext-worker] OCR correction CLI stderr: ${stderr.trim().slice(0, 300)}`);
|
|
1105
|
+
}
|
|
1090
1106
|
const corrected = normalizeCorrection(stdout, extractedText);
|
|
1091
1107
|
console.log(`[gonext-worker] OCR correction done: ${extractedText.length} → ${corrected.length} chars`);
|
|
1092
1108
|
return corrected;
|
|
1093
1109
|
} catch (e) {
|
|
1110
|
+
const stderr = e && typeof e === "object" && "stderr" in e ? String(e.stderr ?? "").trim().slice(0, 300) : "";
|
|
1094
1111
|
const msg = e instanceof Error ? e.message : String(e);
|
|
1095
|
-
console.warn(`[gonext-worker] OCR correction failed (using raw OCR text): ${msg.slice(0, 200)}`);
|
|
1112
|
+
console.warn(`[gonext-worker] OCR correction CLI failed (using raw OCR text): ${msg.slice(0, 200)}${stderr ? ` | stderr: ${stderr}` : ""}`);
|
|
1096
1113
|
return extractedText;
|
|
1114
|
+
} finally {
|
|
1115
|
+
await rm(scriptFile, { force: true }).catch(() => {});
|
|
1116
|
+
await rm(promptFile, { force: true }).catch(() => {});
|
|
1097
1117
|
}
|
|
1098
1118
|
}
|
|
1099
1119
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tiens.nguyen/gonext-local-worker",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.73",
|
|
4
4
|
"description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|