@tiens.nguyen/gonext-local-worker 1.0.38 → 1.0.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/gonext-local-worker.mjs +41 -3
- package/package.json +1 -1
package/gonext-local-worker.mjs
CHANGED
|
@@ -278,6 +278,25 @@ function toOpenAIMessages(messages) {
|
|
|
278
278
|
});
|
|
279
279
|
}
|
|
280
280
|
|
|
281
|
+
function parseCompletionTokens(usage) {
|
|
282
|
+
if (!usage || typeof usage !== "object") {
|
|
283
|
+
return null;
|
|
284
|
+
}
|
|
285
|
+
if (typeof usage.completion_tokens === "number") {
|
|
286
|
+
return usage.completion_tokens;
|
|
287
|
+
}
|
|
288
|
+
if (typeof usage.output_tokens === "number") {
|
|
289
|
+
return usage.output_tokens;
|
|
290
|
+
}
|
|
291
|
+
return null;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
function shouldRetryWithoutUsage(err) {
|
|
295
|
+
const msg =
|
|
296
|
+
err instanceof Error ? err.message.toLowerCase() : String(err).toLowerCase();
|
|
297
|
+
return msg.includes("stream_options") || msg.includes("include_usage");
|
|
298
|
+
}
|
|
299
|
+
|
|
281
300
|
async function runChatJob(job) {
|
|
282
301
|
const { jobId, payload } = job;
|
|
283
302
|
if (!payload || !Array.isArray(payload.messages)) {
|
|
@@ -370,18 +389,34 @@ async function runChatJob(job) {
|
|
|
370
389
|
};
|
|
371
390
|
|
|
372
391
|
try {
|
|
373
|
-
const
|
|
392
|
+
const streamRequest = {
|
|
374
393
|
model: payload.modelId,
|
|
375
394
|
messages: toOpenAIMessages(payload.messages),
|
|
376
395
|
stream: true,
|
|
377
396
|
temperature: 0,
|
|
378
|
-
}
|
|
397
|
+
};
|
|
398
|
+
const stream = await client.chat.completions
|
|
399
|
+
.create({
|
|
400
|
+
...streamRequest,
|
|
401
|
+
stream_options: { include_usage: true },
|
|
402
|
+
})
|
|
403
|
+
.catch(async (e) => {
|
|
404
|
+
if (!shouldRetryWithoutUsage(e)) {
|
|
405
|
+
throw e;
|
|
406
|
+
}
|
|
407
|
+
return client.chat.completions.create(streamRequest);
|
|
408
|
+
});
|
|
379
409
|
|
|
380
410
|
let tokenCount = 0;
|
|
411
|
+
let completionTokensFromUsage = null;
|
|
381
412
|
let isStartThinking = false;
|
|
382
413
|
let isEndThinking = false;
|
|
383
414
|
|
|
384
415
|
for await (const chunk of stream) {
|
|
416
|
+
const usageTokens = parseCompletionTokens(chunk.usage);
|
|
417
|
+
if (usageTokens !== null) {
|
|
418
|
+
completionTokensFromUsage = usageTokens;
|
|
419
|
+
}
|
|
385
420
|
const delta = chunk.choices[0]?.delta;
|
|
386
421
|
const content = delta?.content ?? "";
|
|
387
422
|
const reasoningContent = delta?.reasoning_content;
|
|
@@ -419,7 +454,10 @@ async function runChatJob(job) {
|
|
|
419
454
|
body: JSON.stringify({
|
|
420
455
|
jobStatus: "completed",
|
|
421
456
|
resultText: fullText,
|
|
422
|
-
tokenCount:
|
|
457
|
+
tokenCount:
|
|
458
|
+
completionTokensFromUsage !== null
|
|
459
|
+
? completionTokensFromUsage
|
|
460
|
+
: Math.max(1, tokenCount),
|
|
423
461
|
totalTimeSeconds,
|
|
424
462
|
}),
|
|
425
463
|
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tiens.nguyen/gonext-local-worker",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.39",
|
|
4
4
|
"description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|