npm - open-agents-ai - Versions diffs - 0.185.31 → 0.185.33 - Mend

open-agents-ai 0.185.31 → 0.185.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js +212 -104
package/package.json +1 -1
package/voices/personaplex/dequant-loader.py +174 -0

package/dist/index.js CHANGED Viewed

@@ -8663,7 +8663,7 @@ process.on('SIGINT', () => process.emit('SIGTERM'));
         const nodeModulesDir = resolve13(this.repoRoot, "node_modules");
         let nexusResolved = false;
         let installedVersion = "";
-        const execAsync2 = (cmd, opts = {}) => new Promise((res, rej) => {
+        const execAsync3 = (cmd, opts = {}) => new Promise((res, rej) => {
           const { exec: ex } = __require("node:child_process");
           ex(cmd, { encoding: "utf8", timeout: opts.timeout ?? 3e4, cwd: opts.cwd, maxBuffer: 10 * 1024 * 1024 }, (err, stdout) => {
             if (err)
@@ -8683,7 +8683,7 @@ process.on('SIGINT', () => process.emit('SIGTERM'));
             }
           } else {
             try {
-              const globalDir2 = await execAsync2("npm root -g", { timeout: 5e3 });
+              const globalDir2 = await execAsync3("npm root -g", { timeout: 5e3 });
               const globalPkg = join14(globalDir2, "open-agents-nexus", "package.json");
               if (existsSync11(globalPkg)) {
                 nexusResolved = true;
@@ -8700,10 +8700,10 @@ process.on('SIGINT', () => process.emit('SIGTERM'));
         }
         if (nexusResolved && installedVersion) {
           try {
-            const latestRaw = await execAsync2("npm view open-agents-nexus version 2>/dev/null", { timeout: 8e3 });
+            const latestRaw = await execAsync3("npm view open-agents-nexus version 2>/dev/null", { timeout: 8e3 });
             if (latestRaw && latestRaw !== installedVersion) {
               try {
-                await execAsync2(`npm install open-agents-nexus@${latestRaw} --save 2>&1`, {
+                await execAsync3(`npm install open-agents-nexus@${latestRaw} --save 2>&1`, {
                   cwd: this.repoRoot,
                   timeout: 6e4
                 });
@@ -8716,13 +8716,13 @@ process.on('SIGINT', () => process.emit('SIGTERM'));
         }
         if (!nexusResolved) {
           try {
-            await execAsync2("npm install open-agents-nexus@latest 2>&1", {
+            await execAsync3("npm install open-agents-nexus@latest 2>&1", {
               cwd: this.repoRoot,
               timeout: 12e4
             });
           } catch {
             try {
-              await execAsync2("npm install -g open-agents-nexus@latest 2>&1", { timeout: 12e4 });
+              await execAsync3("npm install -g open-agents-nexus@latest 2>&1", { timeout: 12e4 });
             } catch {
               throw new Error("Failed to install open-agents-nexus. Run: npm install open-agents-nexus");
             }
@@ -8767,7 +8767,7 @@ process.on('SIGINT', () => process.emit('SIGTERM'));
         const agentType = args.agent_type || "general";
         const nodePaths = [nodeModulesDir];
         try {
-          const globalDir2 = await execAsync2("npm root -g", { timeout: 5e3 });
+          const globalDir2 = await execAsync3("npm root -g", { timeout: 5e3 });
           nodePaths.push(globalDir2);
         } catch {
         }
@@ -25041,9 +25041,9 @@ var init_verifierRunner = __esm({
       async executeTests(patch, repoRoot) {
         if (patch.testsToRun.length === 0)
           return "(no tests specified)";
-        const { execFile: execFile7 } = await import("node:child_process");
+        const { execFile: execFile8 } = await import("node:child_process");
         const { promisify: promisify7 } = await import("node:util");
-        const execFileAsync6 = promisify7(execFile7);
+        const execFileAsync6 = promisify7(execFile8);
         const outputs = [];
         const workDir = this.options.workingDir || repoRoot;
         for (const cmd of patch.testsToRun.slice(0, 3)) {
@@ -41281,11 +41281,35 @@ __export(personaplex_exports, {
   startPersonaPlexDaemon: () => startPersonaPlexDaemon,
   stopPersonaPlex: () => stopPersonaPlex
 });
-import { existsSync as existsSync37, writeFileSync as writeFileSync16, readFileSync as readFileSync28, mkdirSync as mkdirSync15, copyFileSync as copyFileSync2, readdirSync as readdirSync11 } from "node:fs";
+import { existsSync as existsSync37, writeFileSync as writeFileSync16, readFileSync as readFileSync28, mkdirSync as mkdirSync15, copyFileSync as copyFileSync2, readdirSync as readdirSync11, statSync as statSync13 } from "node:fs";
 import { join as join54, dirname as dirname18 } from "node:path";
 import { homedir as homedir13 } from "node:os";
-import { execSync as execSync27, spawn as spawn19 } from "node:child_process";
+import { execSync as execSync27, spawn as spawn19, execFile as execFile7 } from "node:child_process";
 import { fileURLToPath as fileURLToPath11 } from "node:url";
+function execAsync(cmd, opts = {}) {
+  return new Promise((resolve36, reject) => {
+    const child = spawn19("bash", ["-c", cmd], {
+      stdio: ["ignore", "pipe", "pipe"],
+      timeout: opts.timeout ?? 3e5,
+      env: opts.env ?? process.env
+    });
+    let stdout = "";
+    let stderr = "";
+    child.stdout?.on("data", (d) => {
+      stdout += d.toString();
+    });
+    child.stderr?.on("data", (d) => {
+      stderr += d.toString();
+    });
+    child.on("close", (code) => {
+      if (code === 0)
+        resolve36(stdout.trim());
+      else
+        reject(new Error(`Exit ${code}: ${stderr.slice(0, 500)}`));
+    });
+    child.on("error", reject);
+  });
+}
 function selectWeightTier(vramGB) {
   if (vramGB >= 48)
     return "original";
@@ -41293,31 +41317,56 @@ function selectWeightTier(vramGB) {
     return "nf4";
   return "turbo2bit";
 }
+function detectJetson() {
+  try {
+    const model = readFileSync28("/proc/device-tree/model", "utf8").replace(/\0/g, "").trim();
+    if (/jetson|orin|tegra/i.test(model)) {
+      const memInfo = execSync27("grep MemTotal /proc/meminfo", { encoding: "utf8", timeout: 3e3, stdio: "pipe" });
+      const memKB = parseInt(memInfo.match(/(\d+)/)?.[1] ?? "0", 10);
+      return { isJetson: true, model, totalMemGB: memKB / 1024 / 1024 };
+    }
+  } catch {
+  }
+  return { isJetson: false, model: "", totalMemGB: 0 };
+}
 function detectPersonaPlexCapability() {
+  const fail = (reason) => ({
+    supported: false,
+    reason,
+    gpuName: "",
+    vramGB: 0,
+    weightTier: "turbo2bit",
+    needsHfToken: false
+  });
+  const jetson = detectJetson();
+  if (jetson.isJetson) {
+    const vramGB = jetson.totalMemGB;
+    if (vramGB < 8)
+      return { ...fail(`Jetson has ${vramGB.toFixed(0)}GB unified memory (need \u22658GB)`), gpuName: jetson.model, vramGB };
+    const tier = selectWeightTier(vramGB);
+    const hasHfToken = !!(process.env["HF_TOKEN"] || process.env["HUGGING_FACE_HUB_TOKEN"]);
+    const effectiveTier = tier === "original" && !hasHfToken ? "nf4" : tier;
+    return {
+      supported: true,
+      reason: `Jetson ${jetson.model} \u2014 ${effectiveTier} weights (${WEIGHT_REPOS[effectiveTier].sizeGB}GB)`,
+      gpuName: jetson.model,
+      vramGB,
+      weightTier: effectiveTier,
+      needsHfToken: WEIGHT_REPOS[effectiveTier].needsToken
+    };
+  }
   try {
     const nvsmi = execSync27("nvidia-smi --query-gpu=name,memory.total --format=csv,noheader,nounits", {
       encoding: "utf8",
       timeout: 5e3,
       stdio: "pipe"
     }).trim();
-    if (!nvsmi) {
-      return { supported: false, reason: "No NVIDIA GPU detected", gpuName: "", vramGB: 0, weightTier: "turbo2bit", needsHfToken: false };
-    }
+    if (!nvsmi)
+      return fail("No NVIDIA GPU detected");
     const [gpuName, vramMB] = nvsmi.split("\n")[0].split(", ");
-    let vramGB = parseInt(vramMB ?? "0", 10) / 1024;
-    const isJetson = /orin|tegra|jetson/i.test(gpuName ?? "");
-    if (isJetson) {
-      try {
-        const memInfo = execSync27("grep MemTotal /proc/meminfo", { encoding: "utf8", timeout: 3e3, stdio: "pipe" });
-        const memKB = parseInt(memInfo.match(/(\d+)/)?.[1] ?? "0", 10);
-        const totalGB = memKB / 1024 / 1024;
-        if (totalGB > vramGB)
-          vramGB = totalGB;
-      } catch {
-      }
-    }
+    const vramGB = parseInt(vramMB ?? "0", 10) / 1024;
     if (vramGB < 8) {
-      return { supported: false, reason: `GPU has ${vramGB.toFixed(1)}GB VRAM (need \u22658GB for 2-bit weights)`, gpuName: gpuName ?? "", vramGB, weightTier: "turbo2bit", needsHfToken: false };
+      return { ...fail(`GPU has ${vramGB.toFixed(1)}GB VRAM (need \u22658GB)`), gpuName: gpuName ?? "", vramGB };
     }
     try {
       execSync27('python3 -c "import torch; assert torch.cuda.is_available()"', {
@@ -41326,7 +41375,7 @@ function detectPersonaPlexCapability() {
       });
     } catch {
       const tier2 = selectWeightTier(vramGB);
-      return { supported: false, reason: "PyTorch CUDA not available", gpuName: gpuName ?? "", vramGB, weightTier: tier2, needsHfToken: WEIGHT_REPOS[tier2].needsToken };
+      return { ...fail("PyTorch CUDA not available"), gpuName: gpuName ?? "", vramGB, weightTier: tier2, needsHfToken: WEIGHT_REPOS[tier2].needsToken };
     }
     const tier = selectWeightTier(vramGB);
     const hasHfToken = !!(process.env["HF_TOKEN"] || process.env["HUGGING_FACE_HUB_TOKEN"]);
@@ -41340,7 +41389,7 @@ function detectPersonaPlexCapability() {
       needsHfToken: WEIGHT_REPOS[effectiveTier].needsToken
     };
   } catch {
-    return { supported: false, reason: "nvidia-smi not found", gpuName: "", vramGB: 0, weightTier: "turbo2bit", needsHfToken: false };
+    return fail("No NVIDIA GPU detected (nvidia-smi not found)");
   }
 }
 function isPersonaPlexRunning() {
@@ -41383,11 +41432,20 @@ async function installPersonaPlex(onInfo, weightTier) {
   const log = onInfo ?? (() => {
   });
   mkdirSync15(PERSONAPLEX_DIR, { recursive: true });
+  let arch2 = "";
+  try {
+    arch2 = execSync27("uname -m", { encoding: "utf8", timeout: 3e3, stdio: "pipe" }).trim();
+  } catch {
+  }
+  const isAarch64 = arch2 === "aarch64" || arch2 === "arm64";
+  if (isAarch64)
+    log(`Detected ARM64 platform (${arch2}) \u2014 Jetson/ARM install path`);
   const venvDir = join54(PERSONAPLEX_DIR, "venv");
   if (!existsSync37(venvDir)) {
     log("Creating Python virtual environment...");
     try {
-      execSync27(`python3 -m venv "${venvDir}"`, { timeout: 6e4, stdio: "pipe" });
+      const ssp = isAarch64 ? " --system-site-packages" : "";
+      await execAsync(`python3 -m venv${ssp} "${venvDir}"`, { timeout: 6e4 });
     } catch (err) {
       log(`Failed to create venv: ${err instanceof Error ? err.message : String(err)}`);
       return false;
@@ -41395,14 +41453,6 @@ async function installPersonaPlex(onInfo, weightTier) {
   }
   const pip = process.platform === "win32" ? join54(venvDir, "Scripts", "pip.exe") : join54(venvDir, "bin", "pip");
   const python = process.platform === "win32" ? join54(venvDir, "Scripts", "python.exe") : join54(venvDir, "bin", "python3");
-  let arch2 = "";
-  try {
-    arch2 = execSync27("uname -m", { encoding: "utf8", timeout: 3e3, stdio: "pipe" }).trim();
-  } catch {
-  }
-  const isAarch64 = arch2 === "aarch64" || arch2 === "arm64";
-  if (isAarch64)
-    log(`Detected ARM64 platform (${arch2}) \u2014 Jetson/ARM install path`);
   log("Checking system dependencies (libopus)...");
   try {
     if (process.platform === "linux") {
@@ -41419,7 +41469,7 @@ async function installPersonaPlex(onInfo, weightTier) {
     } catch {
       log("ARM64: Installing Rust toolchain (needed for sphn audio codec)...");
       try {
-        execSync27("curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y", { timeout: 12e4, stdio: "pipe" });
+        await execAsync("curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y", { timeout: 12e4 });
       } catch (e) {
         log(`Rust install failed: ${e instanceof Error ? e.message : String(e)}`);
         log("Install Rust manually: curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh");
@@ -41427,7 +41477,7 @@ async function installPersonaPlex(onInfo, weightTier) {
       }
     }
     try {
-      execSync27(`"${pip}" install --quiet maturin`, { timeout: 6e4, stdio: "pipe" });
+      await execAsync(`"${pip}" install --quiet maturin`, { timeout: 6e4, stdio: "pipe" });
     } catch {
     }
   }
@@ -41435,13 +41485,13 @@ async function installPersonaPlex(onInfo, weightTier) {
   const repoDir = join54(PERSONAPLEX_DIR, "personaplex-repo");
   try {
     if (!existsSync37(repoDir)) {
-      execSync27(`git clone https://github.com/NVIDIA/personaplex.git "${repoDir}"`, { timeout: 12e4, stdio: "pipe" });
+      await execAsync(`git clone https://github.com/NVIDIA/personaplex.git "${repoDir}"`, { timeout: 12e4 });
     }
     if (isAarch64) {
       log("ARM64: Building sphn from source (Opus codec bindings)...");
       try {
         const rustEnv = `export PATH="$HOME/.cargo/bin:$PATH" &&`;
-        execSync27(`${rustEnv} "${pip}" install --quiet --no-binary sphn sphn`, { timeout: 3e5, stdio: "pipe", shell: "/bin/bash" });
+        await execAsync(`${rustEnv} "${pip}" install --quiet --no-binary sphn sphn`, { timeout: 3e5 });
         log("ARM64: sphn built successfully");
       } catch (e) {
         log(`ARM64: sphn build failed \u2014 ${e instanceof Error ? e.message : String(e)}`);
@@ -41449,11 +41499,11 @@ async function installPersonaPlex(onInfo, weightTier) {
         return false;
       }
     }
-    execSync27(`"${pip}" install --quiet "${join54(repoDir, "moshi")}/."`, { timeout: 3e5, stdio: "pipe" });
+    await execAsync(`"${pip}" install --quiet "${join54(repoDir, "moshi")}/."`, { timeout: 3e5 });
   } catch (err) {
     log(`Moshi install failed: ${err instanceof Error ? err.message : String(err)}`);
     try {
-      execSync27(`"${pip}" install --quiet torch torchaudio websockets soundfile huggingface_hub`, { timeout: 3e5, stdio: "pipe" });
+      await execAsync(`"${pip}" install --quiet torch torchaudio websockets soundfile huggingface_hub`, { timeout: 3e5, stdio: "pipe" });
     } catch {
     }
     return false;
@@ -41479,12 +41529,12 @@ async function installPersonaPlex(onInfo, weightTier) {
   if (isAarch64) {
     log("ARM64: Installing bitsandbytes for INT4 inference...");
     try {
-      execSync27(`"${pip}" install --quiet bitsandbytes`, { timeout: 12e4, stdio: "pipe" });
+      await execAsync(`"${pip}" install --quiet bitsandbytes`, { timeout: 12e4, stdio: "pipe" });
     } catch {
     }
   }
   try {
-    execSync27(`"${pip}" install --quiet pyloudnorm noisereduce torchaudio`, { timeout: 12e4, stdio: "pipe" });
+    await execAsync(`"${pip}" install --quiet pyloudnorm noisereduce torchaudio`, { timeout: 12e4, stdio: "pipe" });
   } catch {
   }
   const tier = weightTier ?? detectPersonaPlexCapability().weightTier;
@@ -41494,29 +41544,26 @@ async function installPersonaPlex(onInfo, weightTier) {
   try {
     const tokenArg = repoInfo.needsToken ? "" : "--token ''";
     const dlCmd = `"${python}" -c "from huggingface_hub import hf_hub_download; f=hf_hub_download('${repoInfo.repo}', '${repoInfo.file}'${repoInfo.needsToken ? "" : ", token=False"}); print(f)"`;
-    const weightPath = execSync27(dlCmd, {
-      encoding: "utf8",
-      timeout: 6e5,
-      stdio: "pipe",
-      env: { ...process.env }
-    }).trim();
+    const weightPath = (await execAsync(dlCmd, { timeout: 6e5 })).trim();
     log(`Weights downloaded: ${repoInfo.file}`);
     if (tier !== "original") {
-      log("Downloading Mimi codec and tokenizer...");
-      try {
-        const hasToken = !!(process.env["HF_TOKEN"] || process.env["HUGGING_FACE_HUB_TOKEN"]);
-        if (hasToken) {
-          execSync27(`"${python}" -c "from huggingface_hub import hf_hub_download; hf_hub_download('nvidia/personaplex-7b-v1', 'tokenizer_spm_32k_3.model'); hf_hub_download('nvidia/personaplex-7b-v1', 'tokenizer-e351c8d8-checkpoint125.safetensors')"`, {
-            timeout: 3e5,
-            stdio: "pipe"
+      log("Downloading Mimi codec + tokenizer (no token needed)...");
+      const supportFiles = ["tokenizer-e351c8d8-checkpoint125.safetensors", "tokenizer_spm_32k_3.model", "config.json"];
+      for (const sf of supportFiles) {
+        try {
+          await execAsync(`"${python}" -c "from huggingface_hub import hf_hub_download; hf_hub_download('${repoInfo.repo}', '${sf}', token=False)"`, {
+            timeout: 3e5
           });
-          log("Codec + tokenizer downloaded.");
-        } else {
-          log("Note: Mimi codec needs HF_TOKEN on first run (set HF_TOKEN env var).");
-          log("Weights themselves are public \u2014 no token needed for the model.");
+        } catch {
+          try {
+            await execAsync(`"${python}" -c "from huggingface_hub import hf_hub_download; hf_hub_download('nvidia/personaplex-7b-v1', '${sf}')"`, {
+              timeout: 3e5
+            });
+          } catch {
+          }
         }
-      } catch {
       }
+      log("Codec + tokenizer downloaded.");
     }
   } catch (err) {
     const msg = err instanceof Error ? err.message : String(err);
@@ -41526,9 +41573,8 @@ async function installPersonaPlex(onInfo, weightTier) {
         log("Auto-downgrading to INT4 weights (no token required)...");
         const nf4 = WEIGHT_REPOS["nf4"];
         try {
-          execSync27(`"${python}" -c "from huggingface_hub import hf_hub_download; hf_hub_download('${nf4.repo}', '${nf4.file}', token=False)"`, {
-            timeout: 6e5,
-            stdio: "pipe"
+          await execAsync(`"${python}" -c "from huggingface_hub import hf_hub_download; hf_hub_download('${nf4.repo}', '${nf4.file}', token=False)"`, {
+            timeout: 6e5
           });
           writeFileSync16(join54(PERSONAPLEX_DIR, "weight_tier"), "nf4");
           log(`Downloaded INT4 weights instead (${nf4.sizeGB}GB, public).`);
@@ -41566,7 +41612,52 @@ async function startPersonaPlexDaemon(onInfo) {
   const venvPython2 = process.platform === "win32" ? join54(PERSONAPLEX_DIR, "venv", "Scripts", "python.exe") : join54(PERSONAPLEX_DIR, "venv", "bin", "python3");
   const sslDir = join54(PERSONAPLEX_DIR, "ssl");
   mkdirSync15(sslDir, { recursive: true });
-  log("Starting PersonaPlex daemon (loading ~7B model)...");
+  const tier = getWeightTier();
+  const repoInfo = WEIGHT_REPOS[tier];
+  const extraArgs = [];
+  if (tier !== "original") {
+    log(`Weight tier: ${tier} (${repoInfo.sizeGB}GB) \u2014 dequantizing to bf16 cache...`);
+    const dequantScript = join54(PERSONAPLEX_DIR, "dequant-loader.py");
+    const cachedBf16 = join54(PERSONAPLEX_DIR, "model-bf16-cache.safetensors");
+    if (!existsSync37(dequantScript)) {
+      const shipped = getShippedVoicesDir();
+      if (shipped) {
+        const src = join54(shipped, "dequant-loader.py");
+        if (existsSync37(src))
+          copyFileSync2(src, dequantScript);
+      }
+    }
+    try {
+      const weightPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', '${repoInfo.file}'${repoInfo.needsToken ? "" : ", token=False"}))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
+      if (existsSync37(dequantScript) && existsSync37(weightPath)) {
+        try {
+          execSync27(`"${venvPython2}" "${dequantScript}" --input "${weightPath}" --output "${cachedBf16}"`, { timeout: 3e5, stdio: "pipe" });
+          if (existsSync37(cachedBf16)) {
+            extraArgs.push("--moshi-weight", cachedBf16);
+            log(`Using dequantized cache: ${(statSync13(cachedBf16).size / 1024 ** 3).toFixed(1)}GB`);
+          }
+        } catch (e) {
+          log(`Dequantization failed \u2014 server will try to load original weights`);
+        }
+      }
+      try {
+        const mimiPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', 'tokenizer-e351c8d8-checkpoint125.safetensors', token=False))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
+        if (existsSync37(mimiPath))
+          extraArgs.push("--mimi-weight", mimiPath);
+      } catch {
+      }
+      try {
+        const tokPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', 'tokenizer_spm_32k_3.model', token=False))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
+        if (existsSync37(tokPath))
+          extraArgs.push("--tokenizer", tokPath);
+      } catch {
+      }
+    } catch {
+      log(`Weight file not found \u2014 server will download on first run`);
+    }
+    extraArgs.push("--hf-repo", repoInfo.repo);
+  }
+  log(`Starting PersonaPlex daemon (${tier} tier)...`);
   const child = spawn19(venvPython2, [
     "-m",
     "moshi.server",
@@ -41577,7 +41668,8 @@ async function startPersonaPlexDaemon(onInfo) {
     "--ssl",
     sslDir,
     "--device",
-    "cuda"
+    "cuda",
+    ...extraArgs
   ], {
     stdio: ["ignore", "pipe", "pipe"],
     detached: true,
@@ -41664,8 +41756,7 @@ function listPersonaPlexVoices() {
   }
   if (existsSync37(CUSTOM_VOICES_DIR)) {
     try {
-      const { readdirSync: readdirSync24 } = __require("node:fs");
-      for (const f of readdirSync24(CUSTOM_VOICES_DIR)) {
+      for (const f of readdirSync11(CUSTOM_VOICES_DIR)) {
         if (f.endsWith(".pt")) {
           const name = f.replace(/\.pt$/, "");
           voices.push({ name, type: "custom", path: join54(CUSTOM_VOICES_DIR, f) });
@@ -41993,7 +42084,7 @@ async function detectSystemSpecsAsync() {
   let gpuVramGB = 0;
   let gpuName = "";
   try {
-    const { stdout: memInfo } = await execAsync("free -b 2>/dev/null || sysctl -n hw.memsize 2>/dev/null", { timeout: 5e3 });
+    const { stdout: memInfo } = await execAsync2("free -b 2>/dev/null || sysctl -n hw.memsize 2>/dev/null", { timeout: 5e3 });
     if (memInfo.includes("Mem:")) {
       const match = memInfo.match(/^Mem:\s+(\d+)\s+\d+\s+\d+\s+\d+\s+\d+\s+(\d+)/m);
       if (match) {
@@ -42010,7 +42101,7 @@ async function detectSystemSpecsAsync() {
   } catch {
   }
   try {
-    const { stdout: nvidiaSmi } = await execAsync("nvidia-smi --query-gpu=memory.total,name --format=csv,noheader,nounits 2>/dev/null", { timeout: 5e3 });
+    const { stdout: nvidiaSmi } = await execAsync2("nvidia-smi --query-gpu=memory.total,name --format=csv,noheader,nounits 2>/dev/null", { timeout: 5e3 });
     const lines = nvidiaSmi.trim().split("\n");
     if (lines.length > 0) {
       for (const line of lines) {
@@ -43452,7 +43543,7 @@ async function createExpandedVariantAsync(baseModel, specs, sizeGB, kvBytesPerTo
     mkdirSync16(modelDir2, { recursive: true });
     const modelfilePath = join55(modelDir2, `Modelfile.${customName}`);
     writeFileSync17(modelfilePath, modelfileContent + "\n", "utf8");
-    await execAsync(`ollama create ${customName} -f ${modelfilePath}`, {
+    await execAsync2(`ollama create ${customName} -f ${modelfilePath}`, {
       timeout: 12e4
     });
     return customName;
@@ -43610,7 +43701,7 @@ export PATH="${binDir}:$PATH"  # Added by open-agents for nvim
   } catch {
   }
 }
-var execAsync, QWEN_VARIANTS, _toolSupportCache, _cloudflaredInstallPromise;
+var execAsync2, QWEN_VARIANTS, _toolSupportCache, _cloudflaredInstallPromise;
 var init_setup = __esm({
   "packages/cli/dist/tui/setup.js"() {
     "use strict";
@@ -43619,7 +43710,7 @@ var init_setup = __esm({
     init_config();
     init_dist();
     init_tui_select();
-    execAsync = promisify6(exec2);
+    execAsync2 = promisify6(exec2);
     QWEN_VARIANTS = [
       { tag: "qwen3.5:0.8b", sizeGB: 1, label: "0.8B params (1.0 GB)", cloud: false },
       { tag: "qwen3.5:2b", sizeGB: 2.7, label: "2B params (2.7 GB)", cloud: false },
@@ -45416,7 +45507,7 @@ __export(voice_exports, {
   registerCustomOnnxModel: () => registerCustomOnnxModel,
   resetNarrationContext: () => resetNarrationContext
 });
-import { existsSync as existsSync42, mkdirSync as mkdirSync18, writeFileSync as writeFileSync19, readFileSync as readFileSync31, unlinkSync as unlinkSync9, readdirSync as readdirSync12, renameSync, statSync as statSync13 } from "node:fs";
+import { existsSync as existsSync42, mkdirSync as mkdirSync18, writeFileSync as writeFileSync19, readFileSync as readFileSync31, unlinkSync as unlinkSync9, readdirSync as readdirSync12, renameSync, statSync as statSync14 } from "node:fs";
 import { join as join58, dirname as dirname19 } from "node:path";
 import { homedir as homedir15, tmpdir as tmpdir9, platform as platform3 } from "node:os";
 import { execSync as execSync30, spawn as nodeSpawn } from "node:child_process";
@@ -46550,7 +46641,7 @@ var init_voice = __esm({
           const p = join58(dir, f);
           let size = 0;
           try {
-            size = statSync13(p).size;
+            size = statSync14(p).size;
           } catch {
           }
           return {
@@ -48166,7 +48257,7 @@ Error: ${err instanceof Error ? err.message : String(err)}`);
 // packages/cli/dist/tui/commands.js
 import * as nodeOs from "node:os";
 import { execSync as nodeExecSync } from "node:child_process";
-import { existsSync as existsSync43, readFileSync as readFileSync32, writeFileSync as writeFileSync20, mkdirSync as mkdirSync19, readdirSync as readdirSync13, statSync as statSync14, rmSync } from "node:fs";
+import { existsSync as existsSync43, readFileSync as readFileSync32, writeFileSync as writeFileSync20, mkdirSync as mkdirSync19, readdirSync as readdirSync13, statSync as statSync15, rmSync } from "node:fs";
 import { join as join59 } from "node:path";
 function safeLog(text) {
   if (isNeovimActive()) {
@@ -48979,7 +49070,7 @@ async function handleSlashCommand(input, ctx) {
           ipfsFiles = files.length;
           for (const f of files) {
             try {
-              ipfsBytes += statSync14(join59(ipfsLocalDir, f)).size;
+              ipfsBytes += statSync15(join59(ipfsLocalDir, f)).size;
             } catch {
             }
           }
@@ -48993,7 +49084,7 @@ async function handleSlashCommand(input, ctx) {
               else {
                 heliaBlocks++;
                 try {
-                  heliaBytes += statSync14(join59(dir, entry.name)).size;
+                  heliaBytes += statSync15(join59(dir, entry.name)).size;
                 } catch {
                 }
               }
@@ -49086,7 +49177,7 @@ async function handleSlashCommand(input, ctx) {
           const count = memStore.count();
           lines.push(`
   ${c2.bold("Structured Memory (SQLite)")}`);
-          lines.push(`    Memories: ${c2.bold(String(count))}   DB: ${c2.dim(formatFileSize(statSync14(dbPath).size))}`);
+          lines.push(`    Memories: ${c2.bold(String(count))}   DB: ${c2.dim(formatFileSize(statSync15(dbPath).size))}`);
           cDb(db);
         }
       } catch {
@@ -49117,7 +49208,7 @@ async function handleSlashCommand(input, ctx) {
               walkStorage(full, subCat);
             } else {
               try {
-                const sz = statSync14(full).size;
+                const sz = statSync15(full).size;
                 totalBytes += sz;
                 if (!categories[category])
                   categories[category] = { files: 0, bytes: 0 };
@@ -49418,29 +49509,46 @@ async function handleSlashCommand(input, ctx) {
           const caps = detectPersonaPlexCapability2();
           if (!caps.supported) {
             renderWarning(`PersonaPlex not available: ${caps.reason}`);
-            renderInfo("Requirements: NVIDIA GPU with \u226516GB VRAM (RTX 3090/4090/A100+), CUDA 12.1+, PyTorch");
+            renderInfo("Requirements: NVIDIA GPU with \u22658GB VRAM (RTX 3060+, Jetson AGX Orin), CUDA, PyTorch");
             return "handled";
           }
-          renderInfo(`GPU: ${caps.gpuName} (${caps.vramGB.toFixed(0)}GB VRAM) \u2014 PersonaPlex compatible \u2713`);
+          const tierInfo = caps.weightTier;
+          renderInfo(`GPU: ${caps.gpuName} (${caps.vramGB.toFixed(0)}GB) \u2192 ${tierInfo} tier${caps.needsHfToken ? "" : " (no HF token needed)"}`);
           if (!isPersonaPlexInstalled2()) {
-            renderInfo("Installing PersonaPlex-7B (~14GB download)...");
-            const ok = await installPersonaPlex2((msg2) => renderInfo(msg2));
-            if (!ok) {
-              renderError("PersonaPlex installation failed.");
-              return "handled";
-            }
+            renderInfo("Setting up PersonaPlex in background \u2014 you can keep working...");
+            (async () => {
+              try {
+                const ok = await installPersonaPlex2((msg2) => renderInfo(msg2), caps.weightTier);
+                if (!ok) {
+                  renderError("PersonaPlex installation failed.");
+                  return;
+                }
+                if (!isPersonaPlexRunning2()) {
+                  const url = await startPersonaPlexDaemon2((msg2) => renderInfo(msg2));
+                  if (url) {
+                    renderInfo(`PersonaPlex ready at ${url} \u2014 use /call for full-duplex voice`);
+                  } else {
+                    renderError("PersonaPlex daemon failed to start. Check ~/.open-agents/voice/personaplex/daemon.log");
+                  }
+                }
+              } catch (e) {
+                renderError(`PersonaPlex setup error: ${e instanceof Error ? e.message : String(e)}`);
+              }
+            })();
+            return "handled";
           }
           if (isPersonaPlexRunning2()) {
-            renderInfo("PersonaPlex daemon is running.");
-            renderInfo("Use /call to start a full-duplex voice session.");
+            renderInfo("PersonaPlex daemon is running. Use /call for full-duplex voice.");
           } else {
-            const url = await startPersonaPlexDaemon2((msg2) => renderInfo(msg2));
-            if (url) {
-              renderInfo(`PersonaPlex ready at ${url}`);
-              renderInfo("Use /call to start a full-duplex voice session with PersonaPlex.");
-            } else {
-              renderError("PersonaPlex daemon failed to start.");
-            }
+            renderInfo("Starting PersonaPlex daemon...");
+            startPersonaPlexDaemon2((msg2) => renderInfo(msg2)).then((url) => {
+              if (url) {
+                renderInfo(`PersonaPlex ready at ${url} \u2014 use /call for full-duplex voice`);
+              } else {
+                renderError("PersonaPlex daemon failed to start. Check daemon.log");
+              }
+            }).catch(() => {
+            });
           }
           return "handled";
         }
@@ -51087,7 +51195,7 @@ async function showCohereDashboard(ctx) {
                 const snapItems = snaps.slice(0, 20).map((f) => ({
                   key: f,
                   label: f.replace(".json", ""),
-                  detail: `${formatFileSize(statSync14(join59(snapDir, f)).size)}`
+                  detail: `${formatFileSize(statSync15(join59(snapDir, f)).size)}`
                 }));
                 if (snapItems.length > 0) {
                   await tuiSelect({
@@ -59364,7 +59472,7 @@ var init_tool_policy = __esm({
 });
 // packages/cli/dist/tui/telegram-bridge.js
-import { mkdirSync as mkdirSync25, existsSync as existsSync51, unlinkSync as unlinkSync11, readdirSync as readdirSync19, statSync as statSync15 } from "node:fs";
+import { mkdirSync as mkdirSync25, existsSync as existsSync51, unlinkSync as unlinkSync11, readdirSync as readdirSync19, statSync as statSync16 } from "node:fs";
 import { join as join68, resolve as resolve30 } from "node:path";
 import { writeFile as writeFileAsync } from "node:fs/promises";
 function convertMarkdownToTelegramHTML(md) {
@@ -71366,7 +71474,7 @@ __export(index_repo_exports, {
   indexRepoCommand: () => indexRepoCommand
 });
 import { resolve as resolve34 } from "node:path";
-import { existsSync as existsSync56, statSync as statSync16 } from "node:fs";
+import { existsSync as existsSync56, statSync as statSync17 } from "node:fs";
 import { cwd as cwd2 } from "node:process";
 async function indexRepoCommand(opts, _config) {
   const repoRoot = resolve34(opts.repoPath ?? cwd2());
@@ -71376,7 +71484,7 @@ async function indexRepoCommand(opts, _config) {
     printError(`Path does not exist: ${repoRoot}`);
     process.exit(1);
   }
-  const stat5 = statSync16(repoRoot);
+  const stat5 = statSync17(repoRoot);
   if (!stat5.isDirectory()) {
     printError(`Path is not a directory: ${repoRoot}`);
     process.exit(1);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "open-agents-ai",
-  "version": "0.185.31",
+  "version": "0.185.33",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",

package/voices/personaplex/dequant-loader.py ADDED Viewed

@@ -0,0 +1,174 @@
+#!/usr/bin/env python3
+"""
+dequant-loader.py — Pre-dequantize quantized PersonaPlex weights to bf16 cache.
+For NF4 (INT4) or TurboQuant 2-bit weights, dequantizes to a temporary
+bf16 safetensors file that moshi.server can load natively.
+Usage:
+  python dequant-loader.py --input model-nf4.safetensors --output /tmp/model-bf16.safetensors
+  python dequant-loader.py --input model-turbo2bit.safetensors --output /tmp/model-bf16.safetensors
+The output file can then be passed to moshi.server via --moshi-weight.
+"""
+import os, sys, math, time
+import torch
+from safetensors.torch import load_file, save_file
+NF2_CENTROIDS = torch.tensor([-1.5104, -0.4528, 0.4528, 1.5104])
+def fast_wht(x):
+    """Vectorized Walsh-Hadamard Transform."""
+    n = x.shape[-1]
+    h = 1
+    while h < n:
+        x_view = x.view(*x.shape[:-1], -1, 2, h)
+        a = x_view[..., 0, :].clone()
+        b = x_view[..., 1, :].clone()
+        x_view[..., 0, :] = a + b
+        x_view[..., 1, :] = a - b
+        x = x_view.reshape(*x.shape)
+        h *= 2
+    return x / math.sqrt(n)
+def detect_format(state):
+    """Detect if weights are NF4 (INT4), TurboQuant 2-bit, or plain."""
+    has_scales = any(k.endswith(".__scales__") for k in state)
+    has_packed = any(k.endswith(".packed") for k in state)
+    if has_packed:
+        return "turbo2bit"
+    if has_scales:
+        return "nf4"
+    return "plain"
+def dequant_nf4(state):
+    """Dequantize INT4 NF4 weights."""
+    result = {}
+    processed = set()
+    for name in list(state.keys()):
+        if name.endswith(".__scales__") or name.endswith(".__shape__") or name.endswith(".__numel__"):
+            continue
+        if name in processed:
+            continue
+        scales_key = f"{name}.__scales__"
+        if scales_key in state:
+            packed = state[name]
+            scales = state[scales_key].float()
+            shape = state[f"{name}.__shape__"].tolist()
+            numel = state[f"{name}.__numel__"].item()
+            group_size = 64
+            lo = (packed & 0x0F).to(torch.int8) - 8
+            hi = ((packed >> 4) & 0x0F).to(torch.int8) - 8
+            unpacked = torch.zeros(packed.numel() * 2, dtype=torch.float32)
+            unpacked[0::2] = lo.float()
+            unpacked[1::2] = hi.float()
+            n_groups = scales.numel()
+            groups = unpacked[:n_groups * group_size].reshape(n_groups, group_size)
+            deq = (groups * scales.unsqueeze(1)).reshape(-1)[:numel]
+            orig_shape = [s for s in shape if s > 0]
+            result[name] = deq.reshape(orig_shape).to(torch.bfloat16)
+            processed.add(name)
+        else:
+            result[name] = state[name].to(torch.bfloat16)
+            processed.add(name)
+    return result
+def dequant_turbo2bit(state):
+    """Dequantize TurboQuant 2-bit (NF2 + WHT) weights."""
+    result = {}
+    processed = set()
+    for name in list(state.keys()):
+        if any(name.endswith(f".{s}") for s in ["packed", "scales", "shape", "numel", "gs", "np2"]):
+            continue
+        if name in processed:
+            continue
+        packed_key = f"{name}.packed"
+        if packed_key in state:
+            gs = state[f"{name}.gs"].item()
+            gs_pow2 = state[f"{name}.np2"].item()
+            numel = state[f"{name}.numel"].item()
+            shape = [s for s in state[f"{name}.shape"].tolist() if s > 0]
+            scales = state[f"{name}.scales"].float()
+            packed = state[packed_key]
+            n_groups = scales.numel()
+            # Unpack 2-bit
+            p = packed.reshape(n_groups, gs // 4)
+            codes = torch.zeros(n_groups, gs, dtype=torch.long)
+            for i in range(4):
+                codes[:, i::4] = (p >> (2 * i)) & 0x03
+            dequant = NF2_CENTROIDS[codes]
+            # Inverse WHT
+            if gs_pow2 > gs:
+                dequant = torch.cat([dequant, torch.zeros(n_groups, gs_pow2 - gs)], dim=1)
+            dequant = fast_wht(dequant)
+            dequant = dequant[:, :gs]
+            dequant = dequant * scales.unsqueeze(1)
+            result[name] = dequant.reshape(-1)[:numel].reshape(shape).to(torch.bfloat16)
+            processed.add(name)
+        else:
+            result[name] = state[name].to(torch.bfloat16)
+            processed.add(name)
+    return result
+def main():
+    import argparse
+    parser = argparse.ArgumentParser(description="Dequantize PersonaPlex weights to bf16")
+    parser.add_argument("--input", "-i", required=True, help="Quantized safetensors file")
+    parser.add_argument("--output", "-o", required=True, help="Output bf16 safetensors file")
+    parser.add_argument("--device", "-d", default="cpu", help="Device for dequantization")
+    args = parser.parse_args()
+    if not os.path.exists(args.input):
+        print(f"Error: {args.input} not found")
+        sys.exit(1)
+    # Skip if output already exists and is newer than input
+    if os.path.exists(args.output) and os.path.getmtime(args.output) > os.path.getmtime(args.input):
+        print(f"Cached: {args.output} is up to date")
+        sys.exit(0)
+    print(f"Loading {args.input}...")
+    t0 = time.time()
+    state = load_file(args.input, device=args.device)
+    fmt = detect_format(state)
+    print(f"Format: {fmt}")
+    if fmt == "nf4":
+        result = dequant_nf4(state)
+    elif fmt == "turbo2bit":
+        result = dequant_turbo2bit(state)
+    else:
+        print("Already plain bf16/fp16 — copying")
+        result = {k: v.to(torch.bfloat16) for k, v in state.items()}
+    t1 = time.time()
+    print(f"Dequantized {len(result)} tensors in {t1-t0:.1f}s")
+    print(f"Saving to {args.output}...")
+    save_file(result, args.output)
+    size_gb = os.path.getsize(args.output) / 1024**3
+    print(f"Done: {size_gb:.2f} GB")
+if __name__ == "__main__":
+    main()