@tiens.nguyen/gonext-local-worker 1.0.13 → 1.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -145,25 +145,91 @@ async function runChatJob(job) {
145
145
  apiKey: payload.apiKey || "ollama",
146
146
  });
147
147
 
148
+ let buf = "";
149
+ let flushTimer = null;
150
+ let fullText = "";
151
+
152
+ const flushChunks = async () => {
153
+ flushTimer = null;
154
+ const t = buf;
155
+ buf = "";
156
+ if (!t) return;
157
+ const res = await workerFetch(`/api/worker/jobs/${jobId}/chunk`, {
158
+ method: "POST",
159
+ body: JSON.stringify({ text: t }),
160
+ });
161
+ if (!res.ok && res.status !== 204) {
162
+ console.error(`[gonext-worker] chunk POST failed ${res.status} for ${jobId}`);
163
+ }
164
+ };
165
+
166
+ const enqueueText = (s) => {
167
+ if (!s) return;
168
+ fullText += s;
169
+ buf += s;
170
+ if (!flushTimer) {
171
+ flushTimer = setTimeout(() => void flushChunks(), 12);
172
+ }
173
+ };
174
+
148
175
  try {
149
- const completion = await client.chat.completions.create({
176
+ const stream = await client.chat.completions.create({
150
177
  model: payload.modelId,
151
178
  messages: toOpenAIMessages(payload.messages),
179
+ stream: true,
152
180
  temperature: 0,
153
181
  });
154
- const text = completion.choices[0]?.message?.content ?? "";
182
+
183
+ let tokenCount = 0;
184
+ let isStartThinking = false;
185
+ let isEndThinking = false;
186
+
187
+ for await (const chunk of stream) {
188
+ const delta = chunk.choices[0]?.delta;
189
+ const content = delta?.content ?? "";
190
+ const reasoningContent = delta?.reasoning_content;
191
+ tokenCount += 1;
192
+
193
+ if (reasoningContent) {
194
+ if (!isStartThinking) {
195
+ isStartThinking = true;
196
+ enqueueText("<think>");
197
+ }
198
+ enqueueText(reasoningContent);
199
+ } else {
200
+ if (isStartThinking && !isEndThinking) {
201
+ isEndThinking = true;
202
+ enqueueText("</think>");
203
+ }
204
+ if (content) {
205
+ enqueueText(content);
206
+ }
207
+ }
208
+ }
209
+
210
+ if (flushTimer) {
211
+ clearTimeout(flushTimer);
212
+ flushTimer = null;
213
+ }
214
+ await flushChunks();
215
+
155
216
  const totalTimeSeconds = (Date.now() - start) / 1000;
156
217
  await workerFetch(`/api/worker/jobs/${jobId}`, {
157
218
  method: "PATCH",
158
219
  body: JSON.stringify({
159
220
  jobStatus: "completed",
160
- resultText: text,
161
- tokenCount: 1,
221
+ resultText: fullText,
222
+ tokenCount: Math.max(1, tokenCount),
162
223
  totalTimeSeconds,
163
224
  }),
164
225
  });
165
226
  console.log(`[gonext-worker] completed ${jobId} (${totalTimeSeconds.toFixed(1)}s)`);
166
227
  } catch (e) {
228
+ if (flushTimer) {
229
+ clearTimeout(flushTimer);
230
+ flushTimer = null;
231
+ }
232
+ await flushChunks().catch(() => {});
167
233
  const message = e instanceof Error ? e.message : String(e);
168
234
  await workerFetch(`/api/worker/jobs/${jobId}`, {
169
235
  method: "PATCH",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tiens.nguyen/gonext-local-worker",
3
- "version": "1.0.13",
3
+ "version": "1.0.15",
4
4
  "description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
5
5
  "type": "module",
6
6
  "license": "MIT",