@tiens.nguyen/gonext-local-worker 1.0.72 → 1.0.74
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/gonext-local-worker.mjs +23 -4
- package/package.json +1 -1
package/gonext-local-worker.mjs
CHANGED
|
@@ -1061,8 +1061,10 @@ async function correctOcrText(extractedText, modelOverride = "") {
|
|
|
1061
1061
|
if (override.startsWith("http://") || override.startsWith("https://")) {
|
|
1062
1062
|
const baseUrl = /\/v1\/?$/i.test(override) ? override : `${override}/v1`;
|
|
1063
1063
|
console.log(`[gonext-worker] OCR correction via server ${baseUrl}`);
|
|
1064
|
+
console.log(`[gonext-worker] OCR correction input: ${extractedText.slice(0, 300)}`);
|
|
1064
1065
|
try {
|
|
1065
1066
|
const corrected = await correctOcrTextViaServer(extractedText, baseUrl);
|
|
1067
|
+
console.log(`[gonext-worker] OCR correction output: ${corrected.slice(0, 300)}`);
|
|
1066
1068
|
console.log(`[gonext-worker] OCR correction done: ${extractedText.length} → ${corrected.length} chars`);
|
|
1067
1069
|
return corrected;
|
|
1068
1070
|
} catch (e) {
|
|
@@ -1081,23 +1083,40 @@ async function correctOcrText(extractedText, modelOverride = "") {
|
|
|
1081
1083
|
"Return only the corrected text without any explanation.\n\n" +
|
|
1082
1084
|
`Text:\n${extractedText}`;
|
|
1083
1085
|
console.log(`[gonext-worker] OCR correction via CLI model=${modelPath}`);
|
|
1084
|
-
|
|
1086
|
+
console.log(`[gonext-worker] OCR correction input: ${extractedText.slice(0, 300)}`);
|
|
1087
|
+
// Write prompt to a temp Python script that calls mlx_lm programmatically,
|
|
1088
|
+
// avoiding CLI arg length/escaping limits and the missing --prompt-file flag.
|
|
1089
|
+
const scriptFile = join(tmpdir(), `gonext-ocr-correct-${Date.now()}.py`);
|
|
1085
1090
|
const promptFile = join(tmpdir(), `gonext-ocr-correct-${Date.now()}.txt`);
|
|
1091
|
+
const pyScript = `import sys, mlx_lm
|
|
1092
|
+
model, tokenizer = mlx_lm.load(sys.argv[1])
|
|
1093
|
+
with open(sys.argv[2], encoding="utf-8") as f:
|
|
1094
|
+
prompt = f.read()
|
|
1095
|
+
result = mlx_lm.generate(model, tokenizer, prompt=prompt, max_tokens=2048, verbose=False)
|
|
1096
|
+
print(result)
|
|
1097
|
+
`;
|
|
1086
1098
|
try {
|
|
1099
|
+
await writeFile(scriptFile, pyScript, "utf8");
|
|
1087
1100
|
await writeFile(promptFile, prompt, "utf8");
|
|
1088
|
-
const { stdout } = await execFile(
|
|
1101
|
+
const { stdout, stderr } = await execFile(
|
|
1089
1102
|
"python3",
|
|
1090
|
-
[
|
|
1103
|
+
[scriptFile, modelPath, promptFile],
|
|
1091
1104
|
{ timeout: OCR_CORRECT_TIMEOUT_MS, maxBuffer: 10 * 1024 * 1024 }
|
|
1092
1105
|
);
|
|
1106
|
+
if (stderr?.trim()) {
|
|
1107
|
+
console.log(`[gonext-worker] OCR correction CLI stderr: ${stderr.trim().slice(0, 300)}`);
|
|
1108
|
+
}
|
|
1093
1109
|
const corrected = normalizeCorrection(stdout, extractedText);
|
|
1110
|
+
console.log(`[gonext-worker] OCR correction output: ${corrected.slice(0, 300)}`);
|
|
1094
1111
|
console.log(`[gonext-worker] OCR correction done: ${extractedText.length} → ${corrected.length} chars`);
|
|
1095
1112
|
return corrected;
|
|
1096
1113
|
} catch (e) {
|
|
1114
|
+
const stderr = e && typeof e === "object" && "stderr" in e ? String(e.stderr ?? "").trim().slice(0, 300) : "";
|
|
1097
1115
|
const msg = e instanceof Error ? e.message : String(e);
|
|
1098
|
-
console.warn(`[gonext-worker] OCR correction failed (using raw OCR text): ${msg.slice(0, 200)}`);
|
|
1116
|
+
console.warn(`[gonext-worker] OCR correction CLI failed (using raw OCR text): ${msg.slice(0, 200)}${stderr ? ` | stderr: ${stderr}` : ""}`);
|
|
1099
1117
|
return extractedText;
|
|
1100
1118
|
} finally {
|
|
1119
|
+
await rm(scriptFile, { force: true }).catch(() => {});
|
|
1101
1120
|
await rm(promptFile, { force: true }).catch(() => {});
|
|
1102
1121
|
}
|
|
1103
1122
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tiens.nguyen/gonext-local-worker",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.74",
|
|
4
4
|
"description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|