npm - @tiens.nguyen/gonext-local-worker - Versions diffs - 1.0.38 → 1.0.39 - Mend

@tiens.nguyen/gonext-local-worker 1.0.38 → 1.0.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/gonext-local-worker.mjs +41 -3
package/package.json +1 -1

package/gonext-local-worker.mjs CHANGED Viewed

@@ -278,6 +278,25 @@ function toOpenAIMessages(messages) {
   });
 }
+function parseCompletionTokens(usage) {
+  if (!usage || typeof usage !== "object") {
+    return null;
+  }
+  if (typeof usage.completion_tokens === "number") {
+    return usage.completion_tokens;
+  }
+  if (typeof usage.output_tokens === "number") {
+    return usage.output_tokens;
+  }
+  return null;
+}
+function shouldRetryWithoutUsage(err) {
+  const msg =
+    err instanceof Error ? err.message.toLowerCase() : String(err).toLowerCase();
+  return msg.includes("stream_options") || msg.includes("include_usage");
+}
 async function runChatJob(job) {
   const { jobId, payload } = job;
   if (!payload || !Array.isArray(payload.messages)) {
@@ -370,18 +389,34 @@ async function runChatJob(job) {
   };
   try {
-    const stream = await client.chat.completions.create({
+    const streamRequest = {
       model: payload.modelId,
       messages: toOpenAIMessages(payload.messages),
       stream: true,
       temperature: 0,
-    });
+    };
+    const stream = await client.chat.completions
+      .create({
+        ...streamRequest,
+        stream_options: { include_usage: true },
+      })
+      .catch(async (e) => {
+        if (!shouldRetryWithoutUsage(e)) {
+          throw e;
+        }
+        return client.chat.completions.create(streamRequest);
+      });
     let tokenCount = 0;
+    let completionTokensFromUsage = null;
     let isStartThinking = false;
     let isEndThinking = false;
     for await (const chunk of stream) {
+      const usageTokens = parseCompletionTokens(chunk.usage);
+      if (usageTokens !== null) {
+        completionTokensFromUsage = usageTokens;
+      }
       const delta = chunk.choices[0]?.delta;
       const content = delta?.content ?? "";
       const reasoningContent = delta?.reasoning_content;
@@ -419,7 +454,10 @@ async function runChatJob(job) {
       body: JSON.stringify({
         jobStatus: "completed",
         resultText: fullText,
-        tokenCount: Math.max(1, tokenCount),
+        tokenCount:
+          completionTokensFromUsage !== null
+            ? completionTokensFromUsage
+            : Math.max(1, tokenCount),
         totalTimeSeconds,
       }),
     });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tiens.nguyen/gonext-local-worker",
-  "version": "1.0.38",
+  "version": "1.0.39",
   "description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
   "type": "module",
   "license": "MIT",