@tiens.nguyen/gonext-local-worker 1.0.10 → 1.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,8 +6,9 @@
6
6
  * - `gonext-local-worker` starts polling loop
7
7
  */
8
8
  import { mkdir, readFile, writeFile } from "node:fs/promises";
9
- import { homedir } from "node:os";
9
+ import { homedir, platform } from "node:os";
10
10
  import { join } from "node:path";
11
+ import { execFile } from "node:child_process/promises";
11
12
  import dotenv from "dotenv";
12
13
  import OpenAI from "openai";
13
14
 
@@ -28,6 +29,9 @@ Usage:
28
29
  Examples:
29
30
  gonext-local-worker set abc123 --api-base https://hwohu56e8d.execute-api.ap-southeast-1.amazonaws.com
30
31
  gonext-local-worker
32
+
33
+ Env (optional):
34
+ GONEXT_MLX_LM_PYTHON Python executable for MLX LM native probe (default: python3)
31
35
  `);
32
36
  }
33
37
 
@@ -179,15 +183,28 @@ function normalizeOpenAiV1Root(raw) {
179
183
  return /\/v1$/i.test(base) ? base : `${base}/v1`;
180
184
  }
181
185
 
186
+ function sourceLabelFromBase(base) {
187
+ try {
188
+ return new URL(base).host || base;
189
+ } catch {
190
+ return base;
191
+ }
192
+ }
193
+
182
194
  async function checkOllamaTags(base) {
183
195
  const endpoint = `${base}/api/tags`;
184
196
  try {
185
197
  const res = await fetch(endpoint, { method: "GET" });
186
198
  if (!res.ok) return { online: false, endpoint, models: [] };
187
199
  const j = await res.json();
200
+ const source = sourceLabelFromBase(base);
188
201
  const models = (j.models ?? []).map((m) => {
189
202
  const name = m.name ?? m.model ?? "model";
190
- return { id: name, name, value: `ollama:${name}` };
203
+ return {
204
+ id: `${name}@@${source}`,
205
+ name: `${name} (${source})`,
206
+ value: `ollama:${name}@@${encodeURIComponent(base)}`,
207
+ };
191
208
  });
192
209
  return { online: true, endpoint, models };
193
210
  } catch {
@@ -215,9 +232,59 @@ async function checkOpenAiModels(base, apiKey) {
215
232
  }
216
233
  }
217
234
 
235
+ /** True MLX LM check: import mlx_lm in Python (macOS). Not the OpenAI HTTP surface. */
236
+ async function checkMlxLmNativeImport() {
237
+ const preferred = (process.env.GONEXT_MLX_LM_PYTHON ?? "").trim() || "python3";
238
+ const code = [
239
+ "import sys",
240
+ "try:",
241
+ " import mlx_lm",
242
+ " v = getattr(mlx_lm, '__version__', None)",
243
+ " print(v or 'ok')",
244
+ "except Exception:",
245
+ " sys.exit(1)",
246
+ ].join("\n");
247
+
248
+ const candidates = [preferred];
249
+ if (preferred === "python3") candidates.push("python");
250
+
251
+ const tried = [];
252
+ for (const exe of [...new Set(candidates)]) {
253
+ tried.push(exe);
254
+ try {
255
+ const { stdout } = await execFile(exe, ["-c", code], {
256
+ timeout: 15000,
257
+ maxBuffer: 65536,
258
+ windowsHide: true,
259
+ });
260
+ const version = String(stdout ?? "").trim();
261
+ return {
262
+ available: true,
263
+ python: exe,
264
+ version: version || undefined,
265
+ method: "python_import_mlx_lm",
266
+ };
267
+ } catch {
268
+ /* try next */
269
+ }
270
+ }
271
+ return {
272
+ available: false,
273
+ python: preferred,
274
+ method: "python_import_mlx_lm",
275
+ error: `Could not import mlx_lm (tried: ${tried.join(", ")})`,
276
+ };
277
+ }
278
+
218
279
  async function runLocalHealthJob(job) {
219
280
  const { jobId, payload } = job;
220
281
  const start = Date.now();
282
+ const ollamaPayloadCount = Array.isArray(payload?.ollamaBaseUrls)
283
+ ? payload.ollamaBaseUrls.length
284
+ : 0;
285
+ console.log(
286
+ `[gonext-worker] local_health ${jobId} start (ollamaUrls=${ollamaPayloadCount}, mlx=${payload?.mlxOpenAiBaseUrl ? "yes" : "no"})`
287
+ );
221
288
  await workerFetch(`/api/worker/jobs/${jobId}`, {
222
289
  method: "PATCH",
223
290
  body: JSON.stringify({ jobStatus: "running" }),
@@ -229,18 +296,87 @@ async function runLocalHealthJob(job) {
229
296
  const dedup = new Map();
230
297
  let ollamaOnline = false;
231
298
  let ollamaEndpoint = "";
299
+ const ollamaSources = [];
232
300
  for (const base of ollamaBases) {
301
+ const baseStart = Date.now();
302
+ console.log(`[gonext-worker] local_health ${jobId} check ollama ${base}`);
233
303
  const r = await checkOllamaTags(base);
304
+ console.log(
305
+ `[gonext-worker] local_health ${jobId} ollama result ${base} online=${r.online} models=${r.models.length} took=${((Date.now() - baseStart) / 1000).toFixed(2)}s`
306
+ );
234
307
  ollamaOnline = ollamaOnline || r.online;
235
308
  if (!ollamaEndpoint) ollamaEndpoint = r.endpoint;
309
+ ollamaSources.push({
310
+ base,
311
+ label: sourceLabelFromBase(base),
312
+ endpoint: r.endpoint,
313
+ online: r.online,
314
+ });
236
315
  for (const m of r.models) {
237
316
  if (!dedup.has(m.value)) dedup.set(m.value, m);
238
317
  }
239
318
  }
240
319
  const mlxRoot = normalizeOpenAiV1Root(payload?.mlxOpenAiBaseUrl);
241
- const mlx = mlxRoot
242
- ? await checkOpenAiModels(mlxRoot, payload?.mlxApiKey ?? "")
243
- : null;
320
+ let mlxHttp = null;
321
+ let mlxNative = null;
322
+
323
+ if (mlxRoot) {
324
+ const mlxStart = Date.now();
325
+ console.log(`[gonext-worker] local_health ${jobId} check mlx HTTP ${mlxRoot}`);
326
+ mlxHttp = await checkOpenAiModels(mlxRoot, payload?.mlxApiKey ?? "");
327
+ console.log(
328
+ `[gonext-worker] local_health ${jobId} mlx HTTP online=${mlxHttp.online} models=${mlxHttp.models.length} took=${((Date.now() - mlxStart) / 1000).toFixed(2)}s`
329
+ );
330
+ }
331
+
332
+ const wantNativeFallback =
333
+ mlxRoot &&
334
+ payload?.mlxNativeFallback !== false &&
335
+ platform() === "darwin" &&
336
+ (!mlxHttp?.online || (mlxHttp?.models?.length ?? 0) === 0);
337
+
338
+ if (wantNativeFallback) {
339
+ const t0 = Date.now();
340
+ console.log(
341
+ `[gonext-worker] local_health ${jobId} mlx native probe (Python mlx_lm import)`
342
+ );
343
+ mlxNative = await checkMlxLmNativeImport();
344
+ console.log(
345
+ `[gonext-worker] local_health ${jobId} mlx native available=${mlxNative.available} took=${((Date.now() - t0) / 1000).toFixed(2)}s`
346
+ );
347
+ }
348
+
349
+ let mlx = null;
350
+ if (mlxRoot || mlxNative?.available) {
351
+ const httpOk = Boolean(mlxHttp?.online && (mlxHttp?.models?.length ?? 0) > 0);
352
+ const nativeOk = mlxNative?.available === true;
353
+ mlx = {
354
+ configured: httpOk || nativeOk,
355
+ online: httpOk || nativeOk,
356
+ models: httpOk
357
+ ? mlxHttp.models
358
+ : nativeOk
359
+ ? [
360
+ {
361
+ id: "mlx_lm_native",
362
+ name: mlxNative.version
363
+ ? `MLX LM (${mlxNative.version})`
364
+ : "MLX LM (Python import OK)",
365
+ value: "mlx:mlx_lm_native",
366
+ },
367
+ ]
368
+ : [],
369
+ endpoint: mlxHttp?.endpoint,
370
+ http: mlxHttp
371
+ ? {
372
+ online: mlxHttp.online,
373
+ endpoint: mlxHttp.endpoint,
374
+ modelCount: mlxHttp.models.length,
375
+ }
376
+ : undefined,
377
+ native: mlxNative ?? undefined,
378
+ };
379
+ }
244
380
  const result = {
245
381
  ollama:
246
382
  ollamaBases.length > 0
@@ -249,16 +385,10 @@ async function runLocalHealthJob(job) {
249
385
  online: ollamaOnline,
250
386
  models: [...dedup.values()],
251
387
  endpoint: ollamaEndpoint,
388
+ sources: ollamaSources,
252
389
  }
253
390
  : undefined,
254
- mlx: mlx
255
- ? {
256
- configured: mlx.models.length > 0,
257
- online: mlx.online,
258
- models: mlx.models,
259
- endpoint: mlx.endpoint,
260
- }
261
- : undefined,
391
+ mlx,
262
392
  };
263
393
  const totalTimeSeconds = (Date.now() - start) / 1000;
264
394
  await workerFetch(`/api/worker/jobs/${jobId}`, {
@@ -270,8 +400,9 @@ async function runLocalHealthJob(job) {
270
400
  totalTimeSeconds,
271
401
  }),
272
402
  });
403
+ const onlineCount = ollamaSources.filter((s) => s.online).length;
273
404
  console.log(
274
- `[gonext-worker] completed local_health ${jobId} (${totalTimeSeconds.toFixed(1)}s)`
405
+ `[gonext-worker] completed local_health ${jobId} (${totalTimeSeconds.toFixed(1)}s) summary: ollamaOnline=${onlineCount}/${ollamaSources.length}, mlx=${mlx ? (mlx.online ? "online" : "offline") : "n/a"}`
275
406
  );
276
407
  } catch (e) {
277
408
  const message = e instanceof Error ? e.message : String(e);
@@ -296,6 +427,9 @@ async function pollOnce() {
296
427
  }
297
428
  const job = await res.json();
298
429
  if (job?.jobId) {
430
+ console.log(
431
+ `[gonext-worker] claimed ${job.jobId} type=${job.jobType ?? "unknown"} modelKey=${job.modelKey ?? "unknown"}`
432
+ );
299
433
  const isLocalHealthByType = job.jobType === "local_health";
300
434
  const isLocalHealthByModelKey = job.modelKey === "local_health";
301
435
  const isLocalHealthByPayload =
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tiens.nguyen/gonext-local-worker",
3
- "version": "1.0.10",
3
+ "version": "1.0.12",
4
4
  "description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
5
5
  "type": "module",
6
6
  "license": "MIT",