@tiens.nguyen/gonext-local-worker 1.0.37 → 1.0.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -278,6 +278,25 @@ function toOpenAIMessages(messages) {
278
278
  });
279
279
  }
280
280
 
281
+ function parseCompletionTokens(usage) {
282
+ if (!usage || typeof usage !== "object") {
283
+ return null;
284
+ }
285
+ if (typeof usage.completion_tokens === "number") {
286
+ return usage.completion_tokens;
287
+ }
288
+ if (typeof usage.output_tokens === "number") {
289
+ return usage.output_tokens;
290
+ }
291
+ return null;
292
+ }
293
+
294
+ function shouldRetryWithoutUsage(err) {
295
+ const msg =
296
+ err instanceof Error ? err.message.toLowerCase() : String(err).toLowerCase();
297
+ return msg.includes("stream_options") || msg.includes("include_usage");
298
+ }
299
+
281
300
  async function runChatJob(job) {
282
301
  const { jobId, payload } = job;
283
302
  if (!payload || !Array.isArray(payload.messages)) {
@@ -370,18 +389,34 @@ async function runChatJob(job) {
370
389
  };
371
390
 
372
391
  try {
373
- const stream = await client.chat.completions.create({
392
+ const streamRequest = {
374
393
  model: payload.modelId,
375
394
  messages: toOpenAIMessages(payload.messages),
376
395
  stream: true,
377
396
  temperature: 0,
378
- });
397
+ };
398
+ const stream = await client.chat.completions
399
+ .create({
400
+ ...streamRequest,
401
+ stream_options: { include_usage: true },
402
+ })
403
+ .catch(async (e) => {
404
+ if (!shouldRetryWithoutUsage(e)) {
405
+ throw e;
406
+ }
407
+ return client.chat.completions.create(streamRequest);
408
+ });
379
409
 
380
410
  let tokenCount = 0;
411
+ let completionTokensFromUsage = null;
381
412
  let isStartThinking = false;
382
413
  let isEndThinking = false;
383
414
 
384
415
  for await (const chunk of stream) {
416
+ const usageTokens = parseCompletionTokens(chunk.usage);
417
+ if (usageTokens !== null) {
418
+ completionTokensFromUsage = usageTokens;
419
+ }
385
420
  const delta = chunk.choices[0]?.delta;
386
421
  const content = delta?.content ?? "";
387
422
  const reasoningContent = delta?.reasoning_content;
@@ -419,7 +454,10 @@ async function runChatJob(job) {
419
454
  body: JSON.stringify({
420
455
  jobStatus: "completed",
421
456
  resultText: fullText,
422
- tokenCount: Math.max(1, tokenCount),
457
+ tokenCount:
458
+ completionTokensFromUsage !== null
459
+ ? completionTokensFromUsage
460
+ : Math.max(1, tokenCount),
423
461
  totalTimeSeconds,
424
462
  }),
425
463
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tiens.nguyen/gonext-local-worker",
3
- "version": "1.0.37",
3
+ "version": "1.0.39",
4
4
  "description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
5
5
  "type": "module",
6
6
  "license": "MIT",