npm - open-agents-ai - Versions diffs - 0.185.30 → 0.185.32 - Mend

open-agents-ai 0.185.30 → 0.185.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js +201 -54
package/package.json +1 -1
package/voices/personaplex/dequant-loader.py +174 -0

package/dist/index.js CHANGED Viewed

@@ -41270,6 +41270,8 @@ __export(personaplex_exports, {
   clonePersonaPlexVoice: () => clonePersonaPlexVoice,
   detectPersonaPlexCapability: () => detectPersonaPlexCapability,
   getPersonaPlexWSUrl: () => getPersonaPlexWSUrl,
+  getWeightRepoInfo: () => getWeightRepoInfo,
+  getWeightTier: () => getWeightTier,
   installPersonaPlex: () => installPersonaPlex,
   isPersonaPlexInstalled: () => isPersonaPlexInstalled,
   isPersonaPlexRunning: () => isPersonaPlexRunning,
@@ -41279,37 +41281,68 @@ __export(personaplex_exports, {
   startPersonaPlexDaemon: () => startPersonaPlexDaemon,
   stopPersonaPlex: () => stopPersonaPlex
 });
-import { existsSync as existsSync37, writeFileSync as writeFileSync16, readFileSync as readFileSync28, mkdirSync as mkdirSync15, copyFileSync as copyFileSync2, readdirSync as readdirSync11 } from "node:fs";
+import { existsSync as existsSync37, writeFileSync as writeFileSync16, readFileSync as readFileSync28, mkdirSync as mkdirSync15, copyFileSync as copyFileSync2, readdirSync as readdirSync11, statSync as statSync13 } from "node:fs";
 import { join as join54, dirname as dirname18 } from "node:path";
 import { homedir as homedir13 } from "node:os";
 import { execSync as execSync27, spawn as spawn19 } from "node:child_process";
 import { fileURLToPath as fileURLToPath11 } from "node:url";
+function selectWeightTier(vramGB) {
+  if (vramGB >= 48)
+    return "original";
+  if (vramGB >= 16)
+    return "nf4";
+  return "turbo2bit";
+}
+function detectJetson() {
+  try {
+    const model = readFileSync28("/proc/device-tree/model", "utf8").replace(/\0/g, "").trim();
+    if (/jetson|orin|tegra/i.test(model)) {
+      const memInfo = execSync27("grep MemTotal /proc/meminfo", { encoding: "utf8", timeout: 3e3, stdio: "pipe" });
+      const memKB = parseInt(memInfo.match(/(\d+)/)?.[1] ?? "0", 10);
+      return { isJetson: true, model, totalMemGB: memKB / 1024 / 1024 };
+    }
+  } catch {
+  }
+  return { isJetson: false, model: "", totalMemGB: 0 };
+}
 function detectPersonaPlexCapability() {
+  const fail = (reason) => ({
+    supported: false,
+    reason,
+    gpuName: "",
+    vramGB: 0,
+    weightTier: "turbo2bit",
+    needsHfToken: false
+  });
+  const jetson = detectJetson();
+  if (jetson.isJetson) {
+    const vramGB = jetson.totalMemGB;
+    if (vramGB < 8)
+      return { ...fail(`Jetson has ${vramGB.toFixed(0)}GB unified memory (need \u22658GB)`), gpuName: jetson.model, vramGB };
+    const tier = selectWeightTier(vramGB);
+    const hasHfToken = !!(process.env["HF_TOKEN"] || process.env["HUGGING_FACE_HUB_TOKEN"]);
+    const effectiveTier = tier === "original" && !hasHfToken ? "nf4" : tier;
+    return {
+      supported: true,
+      reason: `Jetson ${jetson.model} \u2014 ${effectiveTier} weights (${WEIGHT_REPOS[effectiveTier].sizeGB}GB)`,
+      gpuName: jetson.model,
+      vramGB,
+      weightTier: effectiveTier,
+      needsHfToken: WEIGHT_REPOS[effectiveTier].needsToken
+    };
+  }
   try {
     const nvsmi = execSync27("nvidia-smi --query-gpu=name,memory.total --format=csv,noheader,nounits", {
       encoding: "utf8",
       timeout: 5e3,
       stdio: "pipe"
     }).trim();
-    if (!nvsmi) {
-      return { supported: false, reason: "No NVIDIA GPU detected", gpuName: "", vramGB: 0 };
-    }
+    if (!nvsmi)
+      return fail("No NVIDIA GPU detected");
     const [gpuName, vramMB] = nvsmi.split("\n")[0].split(", ");
     const vramGB = parseInt(vramMB ?? "0", 10) / 1024;
-    if (vramGB < 16) {
-      const isJetson = /orin|tegra|jetson/i.test(gpuName ?? "");
-      if (isJetson) {
-        try {
-          const memInfo = execSync27("grep MemTotal /proc/meminfo", { encoding: "utf8", timeout: 3e3, stdio: "pipe" });
-          const memKB = parseInt(memInfo.match(/(\d+)/)?.[1] ?? "0", 10);
-          const totalGB = memKB / 1024 / 1024;
-          if (totalGB >= 32) {
-            return { supported: true, reason: `Jetson unified memory (${totalGB.toFixed(0)}GB total)`, gpuName: gpuName ?? "", vramGB: totalGB };
-          }
-        } catch {
-        }
-      }
-      return { supported: false, reason: `GPU has ${vramGB.toFixed(1)}GB VRAM (need \u226516GB)`, gpuName: gpuName ?? "", vramGB };
+    if (vramGB < 8) {
+      return { ...fail(`GPU has ${vramGB.toFixed(1)}GB VRAM (need \u22658GB)`), gpuName: gpuName ?? "", vramGB };
     }
     try {
       execSync27('python3 -c "import torch; assert torch.cuda.is_available()"', {
@@ -41317,11 +41350,22 @@ function detectPersonaPlexCapability() {
         stdio: "pipe"
       });
     } catch {
-      return { supported: false, reason: "PyTorch CUDA not available", gpuName: gpuName ?? "", vramGB };
+      const tier2 = selectWeightTier(vramGB);
+      return { ...fail("PyTorch CUDA not available"), gpuName: gpuName ?? "", vramGB, weightTier: tier2, needsHfToken: WEIGHT_REPOS[tier2].needsToken };
     }
-    return { supported: true, reason: "OK", gpuName: gpuName ?? "", vramGB };
+    const tier = selectWeightTier(vramGB);
+    const hasHfToken = !!(process.env["HF_TOKEN"] || process.env["HUGGING_FACE_HUB_TOKEN"]);
+    const effectiveTier = tier === "original" && !hasHfToken ? "nf4" : tier;
+    return {
+      supported: true,
+      reason: `OK \u2014 ${effectiveTier} weights (${WEIGHT_REPOS[effectiveTier].sizeGB}GB)`,
+      gpuName: gpuName ?? "",
+      vramGB,
+      weightTier: effectiveTier,
+      needsHfToken: WEIGHT_REPOS[effectiveTier].needsToken
+    };
   } catch {
-    return { supported: false, reason: "nvidia-smi not found", gpuName: "", vramGB: 0 };
+    return fail("No NVIDIA GPU detected (nvidia-smi not found)");
   }
 }
 function isPersonaPlexRunning() {
@@ -41348,15 +41392,36 @@ function getPersonaPlexWSUrl() {
 function isPersonaPlexInstalled() {
   return existsSync37(join54(PERSONAPLEX_DIR, "model_ready"));
 }
-async function installPersonaPlex(onInfo) {
+function getWeightTier() {
+  const tierFile = join54(PERSONAPLEX_DIR, "weight_tier");
+  if (existsSync37(tierFile)) {
+    const saved = readFileSync28(tierFile, "utf8").trim();
+    if (saved in WEIGHT_REPOS)
+      return saved;
+  }
+  return detectPersonaPlexCapability().weightTier;
+}
+function getWeightRepoInfo(tier) {
+  return WEIGHT_REPOS[tier];
+}
+async function installPersonaPlex(onInfo, weightTier) {
   const log = onInfo ?? (() => {
   });
   mkdirSync15(PERSONAPLEX_DIR, { recursive: true });
+  let arch2 = "";
+  try {
+    arch2 = execSync27("uname -m", { encoding: "utf8", timeout: 3e3, stdio: "pipe" }).trim();
+  } catch {
+  }
+  const isAarch64 = arch2 === "aarch64" || arch2 === "arm64";
+  if (isAarch64)
+    log(`Detected ARM64 platform (${arch2}) \u2014 Jetson/ARM install path`);
   const venvDir = join54(PERSONAPLEX_DIR, "venv");
   if (!existsSync37(venvDir)) {
     log("Creating Python virtual environment...");
     try {
-      execSync27(`python3 -m venv "${venvDir}"`, { timeout: 6e4, stdio: "pipe" });
+      const ssp = isAarch64 ? " --system-site-packages" : "";
+      execSync27(`python3 -m venv${ssp} "${venvDir}"`, { timeout: 6e4, stdio: "pipe" });
     } catch (err) {
       log(`Failed to create venv: ${err instanceof Error ? err.message : String(err)}`);
       return false;
@@ -41364,14 +41429,6 @@ async function installPersonaPlex(onInfo) {
   }
   const pip = process.platform === "win32" ? join54(venvDir, "Scripts", "pip.exe") : join54(venvDir, "bin", "pip");
   const python = process.platform === "win32" ? join54(venvDir, "Scripts", "python.exe") : join54(venvDir, "bin", "python3");
-  let arch2 = "";
-  try {
-    arch2 = execSync27("uname -m", { encoding: "utf8", timeout: 3e3, stdio: "pipe" }).trim();
-  } catch {
-  }
-  const isAarch64 = arch2 === "aarch64" || arch2 === "arm64";
-  if (isAarch64)
-    log(`Detected ARM64 platform (${arch2}) \u2014 Jetson/ARM install path`);
   log("Checking system dependencies (libopus)...");
   try {
     if (process.platform === "linux") {
@@ -41456,12 +41513,64 @@ async function installPersonaPlex(onInfo) {
     execSync27(`"${pip}" install --quiet pyloudnorm noisereduce torchaudio`, { timeout: 12e4, stdio: "pipe" });
   } catch {
   }
-  log("PersonaPlex installed. Model will download on first launch (~14GB).");
-  if (isAarch64) {
-    log("ARM64: On first run, weights will load in INT4 mode for real-time performance.");
+  const tier = weightTier ?? detectPersonaPlexCapability().weightTier;
+  const repoInfo = WEIGHT_REPOS[tier];
+  log(`Weight tier: ${tier} (${repoInfo.sizeGB}GB) \u2014 ${repoInfo.needsToken ? "requires HF_TOKEN" : "public, no token needed"}`);
+  log(`Downloading PersonaPlex weights (${repoInfo.sizeGB}GB)...`);
+  try {
+    const tokenArg = repoInfo.needsToken ? "" : "--token ''";
+    const dlCmd = `"${python}" -c "from huggingface_hub import hf_hub_download; f=hf_hub_download('${repoInfo.repo}', '${repoInfo.file}'${repoInfo.needsToken ? "" : ", token=False"}); print(f)"`;
+    const weightPath = execSync27(dlCmd, {
+      encoding: "utf8",
+      timeout: 6e5,
+      stdio: "pipe",
+      env: { ...process.env }
+    }).trim();
+    log(`Weights downloaded: ${repoInfo.file}`);
+    if (tier !== "original") {
+      log("Downloading Mimi codec and tokenizer...");
+      try {
+        const hasToken = !!(process.env["HF_TOKEN"] || process.env["HUGGING_FACE_HUB_TOKEN"]);
+        if (hasToken) {
+          execSync27(`"${python}" -c "from huggingface_hub import hf_hub_download; hf_hub_download('nvidia/personaplex-7b-v1', 'tokenizer_spm_32k_3.model'); hf_hub_download('nvidia/personaplex-7b-v1', 'tokenizer-e351c8d8-checkpoint125.safetensors')"`, {
+            timeout: 3e5,
+            stdio: "pipe"
+          });
+          log("Codec + tokenizer downloaded.");
+        } else {
+          log("Note: Mimi codec needs HF_TOKEN on first run (set HF_TOKEN env var).");
+          log("Weights themselves are public \u2014 no token needed for the model.");
+        }
+      } catch {
+      }
+    }
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : String(err);
+    if (repoInfo.needsToken && /401|403|gated|unauthorized/i.test(msg)) {
+      log(`HF_TOKEN required for ${tier} weights. Set HF_TOKEN or accept license at https://huggingface.co/${repoInfo.repo}`);
+      if (tier === "original") {
+        log("Auto-downgrading to INT4 weights (no token required)...");
+        const nf4 = WEIGHT_REPOS["nf4"];
+        try {
+          execSync27(`"${python}" -c "from huggingface_hub import hf_hub_download; hf_hub_download('${nf4.repo}', '${nf4.file}', token=False)"`, {
+            timeout: 6e5,
+            stdio: "pipe"
+          });
+          writeFileSync16(join54(PERSONAPLEX_DIR, "weight_tier"), "nf4");
+          log(`Downloaded INT4 weights instead (${nf4.sizeGB}GB, public).`);
+        } catch {
+          log("Weight download failed.");
+          return false;
+        }
+      }
+    } else {
+      log(`Weight download failed: ${msg}`);
+      log("Weights will download on first server launch.");
+    }
   }
+  writeFileSync16(join54(PERSONAPLEX_DIR, "weight_tier"), tier);
   writeFileSync16(join54(PERSONAPLEX_DIR, "model_ready"), (/* @__PURE__ */ new Date()).toISOString());
-  log("PersonaPlex installed successfully.");
+  log(`PersonaPlex installed (${tier} tier). Use /call to start voice session.`);
   return true;
 }
 async function startPersonaPlexDaemon(onInfo) {
@@ -41483,7 +41592,39 @@ async function startPersonaPlexDaemon(onInfo) {
   const venvPython2 = process.platform === "win32" ? join54(PERSONAPLEX_DIR, "venv", "Scripts", "python.exe") : join54(PERSONAPLEX_DIR, "venv", "bin", "python3");
   const sslDir = join54(PERSONAPLEX_DIR, "ssl");
   mkdirSync15(sslDir, { recursive: true });
-  log("Starting PersonaPlex daemon (loading ~7B model)...");
+  const tier = getWeightTier();
+  const repoInfo = WEIGHT_REPOS[tier];
+  const extraArgs = [];
+  if (tier !== "original") {
+    log(`Weight tier: ${tier} (${repoInfo.sizeGB}GB) \u2014 dequantizing to bf16 cache...`);
+    const dequantScript = join54(PERSONAPLEX_DIR, "dequant-loader.py");
+    const cachedBf16 = join54(PERSONAPLEX_DIR, "model-bf16-cache.safetensors");
+    if (!existsSync37(dequantScript)) {
+      const shipped = getShippedVoicesDir();
+      if (shipped) {
+        const src = join54(shipped, "dequant-loader.py");
+        if (existsSync37(src))
+          copyFileSync2(src, dequantScript);
+      }
+    }
+    try {
+      const weightPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', '${repoInfo.file}'${repoInfo.needsToken ? "" : ", token=False"}))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
+      if (existsSync37(dequantScript) && existsSync37(weightPath)) {
+        try {
+          execSync27(`"${venvPython2}" "${dequantScript}" --input "${weightPath}" --output "${cachedBf16}"`, { timeout: 3e5, stdio: "pipe" });
+          if (existsSync37(cachedBf16)) {
+            extraArgs.push("--moshi-weight", cachedBf16);
+            log(`Using dequantized cache: ${(statSync13(cachedBf16).size / 1024 ** 3).toFixed(1)}GB`);
+          }
+        } catch (e) {
+          log(`Dequantization failed \u2014 server will try to load original weights`);
+        }
+      }
+    } catch {
+      log(`Weight file not found \u2014 server will download on first run`);
+    }
+  }
+  log(`Starting PersonaPlex daemon (${tier} tier)...`);
   const child = spawn19(venvPython2, [
     "-m",
     "moshi.server",
@@ -41494,7 +41635,8 @@ async function startPersonaPlexDaemon(onInfo) {
     "--ssl",
     sslDir,
     "--device",
-    "cuda"
+    "cuda",
+    ...extraArgs
   ], {
     stdio: ["ignore", "pipe", "pipe"],
     detached: true,
@@ -41581,8 +41723,7 @@ function listPersonaPlexVoices() {
   }
   if (existsSync37(CUSTOM_VOICES_DIR)) {
     try {
-      const { readdirSync: readdirSync24 } = __require("node:fs");
-      for (const f of readdirSync24(CUSTOM_VOICES_DIR)) {
+      for (const f of readdirSync11(CUSTOM_VOICES_DIR)) {
         if (f.endsWith(".pt")) {
           const name = f.replace(/\.pt$/, "");
           voices.push({ name, type: "custom", path: join54(CUSTOM_VOICES_DIR, f) });
@@ -41784,10 +41925,11 @@ async function autoSetupPersonaPlex(onInfo) {
     log(`PersonaPlex not available: ${caps.reason}`);
     return null;
   }
-  log(`GPU: ${caps.gpuName} (${caps.vramGB.toFixed(0)}GB) \u2014 PersonaPlex compatible`);
+  const tierInfo = WEIGHT_REPOS[caps.weightTier];
+  log(`GPU: ${caps.gpuName} (${caps.vramGB.toFixed(0)}GB) \u2192 ${caps.weightTier} weights (${tierInfo.sizeGB}GB${caps.needsHfToken ? "" : ", no HF token needed"})`);
   if (!isPersonaPlexInstalled()) {
     log("Installing PersonaPlex (first time setup)...");
-    const ok = await installPersonaPlex(log);
+    const ok = await installPersonaPlex(log, caps.weightTier);
     if (!ok) {
       log("PersonaPlex installation failed.");
       return null;
@@ -41807,11 +41949,16 @@ async function autoSetupPersonaPlex(onInfo) {
   }
   return await startPersonaPlexDaemon(log);
 }
-var PERSONAPLEX_DIR, PID_FILE, PORT_FILE, LOG_FILE, CUSTOM_VOICES_DIR;
+var WEIGHT_REPOS, PERSONAPLEX_DIR, PID_FILE, PORT_FILE, LOG_FILE, CUSTOM_VOICES_DIR;
 var init_personaplex = __esm({
   "packages/cli/dist/tui/personaplex.js"() {
     "use strict";
     init_render();
+    WEIGHT_REPOS = {
+      original: { repo: "nvidia/personaplex-7b-v1", file: "model.safetensors", sizeGB: 15.6, needsToken: true },
+      nf4: { repo: "cudabenchmarktest/personaplex-7b-nf4", file: "model-nf4.safetensors", sizeGB: 4.1, needsToken: false },
+      turbo2bit: { repo: "cudabenchmarktest/personaplex-7b-turbo2bit", file: "model-turbo2bit.safetensors", sizeGB: 2.1, needsToken: false }
+    };
     PERSONAPLEX_DIR = join54(homedir13(), ".open-agents", "voice", "personaplex");
     PID_FILE = join54(PERSONAPLEX_DIR, "daemon.pid");
     PORT_FILE = join54(PERSONAPLEX_DIR, "daemon.port");
@@ -45327,7 +45474,7 @@ __export(voice_exports, {
   registerCustomOnnxModel: () => registerCustomOnnxModel,
   resetNarrationContext: () => resetNarrationContext
 });
-import { existsSync as existsSync42, mkdirSync as mkdirSync18, writeFileSync as writeFileSync19, readFileSync as readFileSync31, unlinkSync as unlinkSync9, readdirSync as readdirSync12, renameSync, statSync as statSync13 } from "node:fs";
+import { existsSync as existsSync42, mkdirSync as mkdirSync18, writeFileSync as writeFileSync19, readFileSync as readFileSync31, unlinkSync as unlinkSync9, readdirSync as readdirSync12, renameSync, statSync as statSync14 } from "node:fs";
 import { join as join58, dirname as dirname19 } from "node:path";
 import { homedir as homedir15, tmpdir as tmpdir9, platform as platform3 } from "node:os";
 import { execSync as execSync30, spawn as nodeSpawn } from "node:child_process";
@@ -46461,7 +46608,7 @@ var init_voice = __esm({
           const p = join58(dir, f);
           let size = 0;
           try {
-            size = statSync13(p).size;
+            size = statSync14(p).size;
           } catch {
           }
           return {
@@ -48077,7 +48224,7 @@ Error: ${err instanceof Error ? err.message : String(err)}`);
 // packages/cli/dist/tui/commands.js
 import * as nodeOs from "node:os";
 import { execSync as nodeExecSync } from "node:child_process";
-import { existsSync as existsSync43, readFileSync as readFileSync32, writeFileSync as writeFileSync20, mkdirSync as mkdirSync19, readdirSync as readdirSync13, statSync as statSync14, rmSync } from "node:fs";
+import { existsSync as existsSync43, readFileSync as readFileSync32, writeFileSync as writeFileSync20, mkdirSync as mkdirSync19, readdirSync as readdirSync13, statSync as statSync15, rmSync } from "node:fs";
 import { join as join59 } from "node:path";
 function safeLog(text) {
   if (isNeovimActive()) {
@@ -48890,7 +49037,7 @@ async function handleSlashCommand(input, ctx) {
           ipfsFiles = files.length;
           for (const f of files) {
             try {
-              ipfsBytes += statSync14(join59(ipfsLocalDir, f)).size;
+              ipfsBytes += statSync15(join59(ipfsLocalDir, f)).size;
             } catch {
             }
           }
@@ -48904,7 +49051,7 @@ async function handleSlashCommand(input, ctx) {
               else {
                 heliaBlocks++;
                 try {
-                  heliaBytes += statSync14(join59(dir, entry.name)).size;
+                  heliaBytes += statSync15(join59(dir, entry.name)).size;
                 } catch {
                 }
               }
@@ -48997,7 +49144,7 @@ async function handleSlashCommand(input, ctx) {
           const count = memStore.count();
           lines.push(`
   ${c2.bold("Structured Memory (SQLite)")}`);
-          lines.push(`    Memories: ${c2.bold(String(count))}   DB: ${c2.dim(formatFileSize(statSync14(dbPath).size))}`);
+          lines.push(`    Memories: ${c2.bold(String(count))}   DB: ${c2.dim(formatFileSize(statSync15(dbPath).size))}`);
           cDb(db);
         }
       } catch {
@@ -49028,7 +49175,7 @@ async function handleSlashCommand(input, ctx) {
               walkStorage(full, subCat);
             } else {
               try {
-                const sz = statSync14(full).size;
+                const sz = statSync15(full).size;
                 totalBytes += sz;
                 if (!categories[category])
                   categories[category] = { files: 0, bytes: 0 };
@@ -49329,7 +49476,7 @@ async function handleSlashCommand(input, ctx) {
           const caps = detectPersonaPlexCapability2();
           if (!caps.supported) {
             renderWarning(`PersonaPlex not available: ${caps.reason}`);
-            renderInfo("Requirements: NVIDIA GPU with \u226516GB VRAM (RTX 3090/4090/A100+), CUDA 12.1+, PyTorch");
+            renderInfo("Requirements: NVIDIA GPU with \u22658GB VRAM (RTX 3060+, Jetson AGX Orin), CUDA, PyTorch");
             return "handled";
           }
           renderInfo(`GPU: ${caps.gpuName} (${caps.vramGB.toFixed(0)}GB VRAM) \u2014 PersonaPlex compatible \u2713`);
@@ -50998,7 +51145,7 @@ async function showCohereDashboard(ctx) {
                 const snapItems = snaps.slice(0, 20).map((f) => ({
                   key: f,
                   label: f.replace(".json", ""),
-                  detail: `${formatFileSize(statSync14(join59(snapDir, f)).size)}`
+                  detail: `${formatFileSize(statSync15(join59(snapDir, f)).size)}`
                 }));
                 if (snapItems.length > 0) {
                   await tuiSelect({
@@ -59275,7 +59422,7 @@ var init_tool_policy = __esm({
 });
 // packages/cli/dist/tui/telegram-bridge.js
-import { mkdirSync as mkdirSync25, existsSync as existsSync51, unlinkSync as unlinkSync11, readdirSync as readdirSync19, statSync as statSync15 } from "node:fs";
+import { mkdirSync as mkdirSync25, existsSync as existsSync51, unlinkSync as unlinkSync11, readdirSync as readdirSync19, statSync as statSync16 } from "node:fs";
 import { join as join68, resolve as resolve30 } from "node:path";
 import { writeFile as writeFileAsync } from "node:fs/promises";
 function convertMarkdownToTelegramHTML(md) {
@@ -71277,7 +71424,7 @@ __export(index_repo_exports, {
   indexRepoCommand: () => indexRepoCommand
 });
 import { resolve as resolve34 } from "node:path";
-import { existsSync as existsSync56, statSync as statSync16 } from "node:fs";
+import { existsSync as existsSync56, statSync as statSync17 } from "node:fs";
 import { cwd as cwd2 } from "node:process";
 async function indexRepoCommand(opts, _config) {
   const repoRoot = resolve34(opts.repoPath ?? cwd2());
@@ -71287,7 +71434,7 @@ async function indexRepoCommand(opts, _config) {
     printError(`Path does not exist: ${repoRoot}`);
     process.exit(1);
   }
-  const stat5 = statSync16(repoRoot);
+  const stat5 = statSync17(repoRoot);
   if (!stat5.isDirectory()) {
     printError(`Path is not a directory: ${repoRoot}`);
     process.exit(1);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "open-agents-ai",
-  "version": "0.185.30",
+  "version": "0.185.32",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",

package/voices/personaplex/dequant-loader.py ADDED Viewed

@@ -0,0 +1,174 @@
+#!/usr/bin/env python3
+"""
+dequant-loader.py — Pre-dequantize quantized PersonaPlex weights to bf16 cache.
+For NF4 (INT4) or TurboQuant 2-bit weights, dequantizes to a temporary
+bf16 safetensors file that moshi.server can load natively.
+Usage:
+  python dequant-loader.py --input model-nf4.safetensors --output /tmp/model-bf16.safetensors
+  python dequant-loader.py --input model-turbo2bit.safetensors --output /tmp/model-bf16.safetensors
+The output file can then be passed to moshi.server via --moshi-weight.
+"""
+import os, sys, math, time
+import torch
+from safetensors.torch import load_file, save_file
+NF2_CENTROIDS = torch.tensor([-1.5104, -0.4528, 0.4528, 1.5104])
+def fast_wht(x):
+    """Vectorized Walsh-Hadamard Transform."""
+    n = x.shape[-1]
+    h = 1
+    while h < n:
+        x_view = x.view(*x.shape[:-1], -1, 2, h)
+        a = x_view[..., 0, :].clone()
+        b = x_view[..., 1, :].clone()
+        x_view[..., 0, :] = a + b
+        x_view[..., 1, :] = a - b
+        x = x_view.reshape(*x.shape)
+        h *= 2
+    return x / math.sqrt(n)
+def detect_format(state):
+    """Detect if weights are NF4 (INT4), TurboQuant 2-bit, or plain."""
+    has_scales = any(k.endswith(".__scales__") for k in state)
+    has_packed = any(k.endswith(".packed") for k in state)
+    if has_packed:
+        return "turbo2bit"
+    if has_scales:
+        return "nf4"
+    return "plain"
+def dequant_nf4(state):
+    """Dequantize INT4 NF4 weights."""
+    result = {}
+    processed = set()
+    for name in list(state.keys()):
+        if name.endswith(".__scales__") or name.endswith(".__shape__") or name.endswith(".__numel__"):
+            continue
+        if name in processed:
+            continue
+        scales_key = f"{name}.__scales__"
+        if scales_key in state:
+            packed = state[name]
+            scales = state[scales_key].float()
+            shape = state[f"{name}.__shape__"].tolist()
+            numel = state[f"{name}.__numel__"].item()
+            group_size = 64
+            lo = (packed & 0x0F).to(torch.int8) - 8
+            hi = ((packed >> 4) & 0x0F).to(torch.int8) - 8
+            unpacked = torch.zeros(packed.numel() * 2, dtype=torch.float32)
+            unpacked[0::2] = lo.float()
+            unpacked[1::2] = hi.float()
+            n_groups = scales.numel()
+            groups = unpacked[:n_groups * group_size].reshape(n_groups, group_size)
+            deq = (groups * scales.unsqueeze(1)).reshape(-1)[:numel]
+            orig_shape = [s for s in shape if s > 0]
+            result[name] = deq.reshape(orig_shape).to(torch.bfloat16)
+            processed.add(name)
+        else:
+            result[name] = state[name].to(torch.bfloat16)
+            processed.add(name)
+    return result
+def dequant_turbo2bit(state):
+    """Dequantize TurboQuant 2-bit (NF2 + WHT) weights."""
+    result = {}
+    processed = set()
+    for name in list(state.keys()):
+        if any(name.endswith(f".{s}") for s in ["packed", "scales", "shape", "numel", "gs", "np2"]):
+            continue
+        if name in processed:
+            continue
+        packed_key = f"{name}.packed"
+        if packed_key in state:
+            gs = state[f"{name}.gs"].item()
+            gs_pow2 = state[f"{name}.np2"].item()
+            numel = state[f"{name}.numel"].item()
+            shape = [s for s in state[f"{name}.shape"].tolist() if s > 0]
+            scales = state[f"{name}.scales"].float()
+            packed = state[packed_key]
+            n_groups = scales.numel()
+            # Unpack 2-bit
+            p = packed.reshape(n_groups, gs // 4)
+            codes = torch.zeros(n_groups, gs, dtype=torch.long)
+            for i in range(4):
+                codes[:, i::4] = (p >> (2 * i)) & 0x03
+            dequant = NF2_CENTROIDS[codes]
+            # Inverse WHT
+            if gs_pow2 > gs:
+                dequant = torch.cat([dequant, torch.zeros(n_groups, gs_pow2 - gs)], dim=1)
+            dequant = fast_wht(dequant)
+            dequant = dequant[:, :gs]
+            dequant = dequant * scales.unsqueeze(1)
+            result[name] = dequant.reshape(-1)[:numel].reshape(shape).to(torch.bfloat16)
+            processed.add(name)
+        else:
+            result[name] = state[name].to(torch.bfloat16)
+            processed.add(name)
+    return result
+def main():
+    import argparse
+    parser = argparse.ArgumentParser(description="Dequantize PersonaPlex weights to bf16")
+    parser.add_argument("--input", "-i", required=True, help="Quantized safetensors file")
+    parser.add_argument("--output", "-o", required=True, help="Output bf16 safetensors file")
+    parser.add_argument("--device", "-d", default="cpu", help="Device for dequantization")
+    args = parser.parse_args()
+    if not os.path.exists(args.input):
+        print(f"Error: {args.input} not found")
+        sys.exit(1)
+    # Skip if output already exists and is newer than input
+    if os.path.exists(args.output) and os.path.getmtime(args.output) > os.path.getmtime(args.input):
+        print(f"Cached: {args.output} is up to date")
+        sys.exit(0)
+    print(f"Loading {args.input}...")
+    t0 = time.time()
+    state = load_file(args.input, device=args.device)
+    fmt = detect_format(state)
+    print(f"Format: {fmt}")
+    if fmt == "nf4":
+        result = dequant_nf4(state)
+    elif fmt == "turbo2bit":
+        result = dequant_turbo2bit(state)
+    else:
+        print("Already plain bf16/fp16 — copying")
+        result = {k: v.to(torch.bfloat16) for k, v in state.items()}
+    t1 = time.time()
+    print(f"Dequantized {len(result)} tensors in {t1-t0:.1f}s")
+    print(f"Saving to {args.output}...")
+    save_file(result, args.output)
+    size_gb = os.path.getsize(args.output) / 1024**3
+    print(f"Done: {size_gb:.2f} GB")
+if __name__ == "__main__":
+    main()