npm - @tiens.nguyen/gonext-local-worker - Versions diffs - 1.0.10 → 1.0.12 - Mend

@tiens.nguyen/gonext-local-worker 1.0.10 → 1.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/gonext-local-worker.mjs +148 -14
package/package.json +1 -1

package/gonext-local-worker.mjs CHANGED Viewed

@@ -6,8 +6,9 @@
  * - `gonext-local-worker` starts polling loop
  */
 import { mkdir, readFile, writeFile } from "node:fs/promises";
-import { homedir } from "node:os";
+import { homedir, platform } from "node:os";
 import { join } from "node:path";
+import { execFile } from "node:child_process/promises";
 import dotenv from "dotenv";
 import OpenAI from "openai";
@@ -28,6 +29,9 @@ Usage:
 Examples:
   gonext-local-worker set abc123 --api-base https://hwohu56e8d.execute-api.ap-southeast-1.amazonaws.com
   gonext-local-worker
+Env (optional):
+  GONEXT_MLX_LM_PYTHON   Python executable for MLX LM native probe (default: python3)
 `);
 }
@@ -179,15 +183,28 @@ function normalizeOpenAiV1Root(raw) {
   return /\/v1$/i.test(base) ? base : `${base}/v1`;
 }
+function sourceLabelFromBase(base) {
+  try {
+    return new URL(base).host || base;
+  } catch {
+    return base;
+  }
+}
 async function checkOllamaTags(base) {
   const endpoint = `${base}/api/tags`;
   try {
     const res = await fetch(endpoint, { method: "GET" });
     if (!res.ok) return { online: false, endpoint, models: [] };
     const j = await res.json();
+    const source = sourceLabelFromBase(base);
     const models = (j.models ?? []).map((m) => {
       const name = m.name ?? m.model ?? "model";
-      return { id: name, name, value: `ollama:${name}` };
+      return {
+        id: `${name}@@${source}`,
+        name: `${name} (${source})`,
+        value: `ollama:${name}@@${encodeURIComponent(base)}`,
+      };
     });
     return { online: true, endpoint, models };
   } catch {
@@ -215,9 +232,59 @@ async function checkOpenAiModels(base, apiKey) {
   }
 }
+/** True MLX LM check: import mlx_lm in Python (macOS). Not the OpenAI HTTP surface. */
+async function checkMlxLmNativeImport() {
+  const preferred = (process.env.GONEXT_MLX_LM_PYTHON ?? "").trim() || "python3";
+  const code = [
+    "import sys",
+    "try:",
+    "    import mlx_lm",
+    "    v = getattr(mlx_lm, '__version__', None)",
+    "    print(v or 'ok')",
+    "except Exception:",
+    "    sys.exit(1)",
+  ].join("\n");
+  const candidates = [preferred];
+  if (preferred === "python3") candidates.push("python");
+  const tried = [];
+  for (const exe of [...new Set(candidates)]) {
+    tried.push(exe);
+    try {
+      const { stdout } = await execFile(exe, ["-c", code], {
+        timeout: 15000,
+        maxBuffer: 65536,
+        windowsHide: true,
+      });
+      const version = String(stdout ?? "").trim();
+      return {
+        available: true,
+        python: exe,
+        version: version || undefined,
+        method: "python_import_mlx_lm",
+      };
+    } catch {
+      /* try next */
+    }
+  }
+  return {
+    available: false,
+    python: preferred,
+    method: "python_import_mlx_lm",
+    error: `Could not import mlx_lm (tried: ${tried.join(", ")})`,
+  };
+}
 async function runLocalHealthJob(job) {
   const { jobId, payload } = job;
   const start = Date.now();
+  const ollamaPayloadCount = Array.isArray(payload?.ollamaBaseUrls)
+    ? payload.ollamaBaseUrls.length
+    : 0;
+  console.log(
+    `[gonext-worker] local_health ${jobId} start (ollamaUrls=${ollamaPayloadCount}, mlx=${payload?.mlxOpenAiBaseUrl ? "yes" : "no"})`
+  );
   await workerFetch(`/api/worker/jobs/${jobId}`, {
     method: "PATCH",
     body: JSON.stringify({ jobStatus: "running" }),
@@ -229,18 +296,87 @@ async function runLocalHealthJob(job) {
     const dedup = new Map();
     let ollamaOnline = false;
     let ollamaEndpoint = "";
+    const ollamaSources = [];
     for (const base of ollamaBases) {
+      const baseStart = Date.now();
+      console.log(`[gonext-worker] local_health ${jobId} check ollama ${base}`);
       const r = await checkOllamaTags(base);
+      console.log(
+        `[gonext-worker] local_health ${jobId} ollama result ${base} online=${r.online} models=${r.models.length} took=${((Date.now() - baseStart) / 1000).toFixed(2)}s`
+      );
       ollamaOnline = ollamaOnline || r.online;
       if (!ollamaEndpoint) ollamaEndpoint = r.endpoint;
+       ollamaSources.push({
+        base,
+        label: sourceLabelFromBase(base),
+        endpoint: r.endpoint,
+        online: r.online,
+      });
       for (const m of r.models) {
         if (!dedup.has(m.value)) dedup.set(m.value, m);
       }
     }
     const mlxRoot = normalizeOpenAiV1Root(payload?.mlxOpenAiBaseUrl);
-    const mlx = mlxRoot
-      ? await checkOpenAiModels(mlxRoot, payload?.mlxApiKey ?? "")
-      : null;
+    let mlxHttp = null;
+    let mlxNative = null;
+    if (mlxRoot) {
+      const mlxStart = Date.now();
+      console.log(`[gonext-worker] local_health ${jobId} check mlx HTTP ${mlxRoot}`);
+      mlxHttp = await checkOpenAiModels(mlxRoot, payload?.mlxApiKey ?? "");
+      console.log(
+        `[gonext-worker] local_health ${jobId} mlx HTTP online=${mlxHttp.online} models=${mlxHttp.models.length} took=${((Date.now() - mlxStart) / 1000).toFixed(2)}s`
+      );
+    }
+    const wantNativeFallback =
+      mlxRoot &&
+      payload?.mlxNativeFallback !== false &&
+      platform() === "darwin" &&
+      (!mlxHttp?.online || (mlxHttp?.models?.length ?? 0) === 0);
+    if (wantNativeFallback) {
+      const t0 = Date.now();
+      console.log(
+        `[gonext-worker] local_health ${jobId} mlx native probe (Python mlx_lm import)`
+      );
+      mlxNative = await checkMlxLmNativeImport();
+      console.log(
+        `[gonext-worker] local_health ${jobId} mlx native available=${mlxNative.available} took=${((Date.now() - t0) / 1000).toFixed(2)}s`
+      );
+    }
+    let mlx = null;
+    if (mlxRoot || mlxNative?.available) {
+      const httpOk = Boolean(mlxHttp?.online && (mlxHttp?.models?.length ?? 0) > 0);
+      const nativeOk = mlxNative?.available === true;
+      mlx = {
+        configured: httpOk || nativeOk,
+        online: httpOk || nativeOk,
+        models: httpOk
+          ? mlxHttp.models
+          : nativeOk
+            ? [
+                {
+                  id: "mlx_lm_native",
+                  name: mlxNative.version
+                    ? `MLX LM (${mlxNative.version})`
+                    : "MLX LM (Python import OK)",
+                  value: "mlx:mlx_lm_native",
+                },
+              ]
+            : [],
+        endpoint: mlxHttp?.endpoint,
+        http: mlxHttp
+          ? {
+              online: mlxHttp.online,
+              endpoint: mlxHttp.endpoint,
+              modelCount: mlxHttp.models.length,
+            }
+          : undefined,
+        native: mlxNative ?? undefined,
+      };
+    }
     const result = {
       ollama:
         ollamaBases.length > 0
@@ -249,16 +385,10 @@ async function runLocalHealthJob(job) {
               online: ollamaOnline,
               models: [...dedup.values()],
               endpoint: ollamaEndpoint,
+              sources: ollamaSources,
             }
           : undefined,
-      mlx: mlx
-        ? {
-            configured: mlx.models.length > 0,
-            online: mlx.online,
-            models: mlx.models,
-            endpoint: mlx.endpoint,
-          }
-        : undefined,
+      mlx,
     };
     const totalTimeSeconds = (Date.now() - start) / 1000;
     await workerFetch(`/api/worker/jobs/${jobId}`, {
@@ -270,8 +400,9 @@ async function runLocalHealthJob(job) {
         totalTimeSeconds,
       }),
     });
+    const onlineCount = ollamaSources.filter((s) => s.online).length;
     console.log(
-      `[gonext-worker] completed local_health ${jobId} (${totalTimeSeconds.toFixed(1)}s)`
+      `[gonext-worker] completed local_health ${jobId} (${totalTimeSeconds.toFixed(1)}s) summary: ollamaOnline=${onlineCount}/${ollamaSources.length}, mlx=${mlx ? (mlx.online ? "online" : "offline") : "n/a"}`
     );
   } catch (e) {
     const message = e instanceof Error ? e.message : String(e);
@@ -296,6 +427,9 @@ async function pollOnce() {
   }
   const job = await res.json();
   if (job?.jobId) {
+    console.log(
+      `[gonext-worker] claimed ${job.jobId} type=${job.jobType ?? "unknown"} modelKey=${job.modelKey ?? "unknown"}`
+    );
     const isLocalHealthByType = job.jobType === "local_health";
     const isLocalHealthByModelKey = job.modelKey === "local_health";
     const isLocalHealthByPayload =

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tiens.nguyen/gonext-local-worker",
-  "version": "1.0.10",
+  "version": "1.0.12",
   "description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
   "type": "module",
   "license": "MIT",