npm - open-agents-ai - Versions diffs - 0.185.31 → 0.185.32 - Mend

open-agents-ai 0.185.31 → 0.185.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js +102 -44
package/package.json +1 -1
package/voices/personaplex/dequant-loader.py +174 -0

package/dist/index.js CHANGED Viewed

@@ -41281,7 +41281,7 @@ __export(personaplex_exports, {
   startPersonaPlexDaemon: () => startPersonaPlexDaemon,
   stopPersonaPlex: () => stopPersonaPlex
 });
-import { existsSync as existsSync37, writeFileSync as writeFileSync16, readFileSync as readFileSync28, mkdirSync as mkdirSync15, copyFileSync as copyFileSync2, readdirSync as readdirSync11 } from "node:fs";
+import { existsSync as existsSync37, writeFileSync as writeFileSync16, readFileSync as readFileSync28, mkdirSync as mkdirSync15, copyFileSync as copyFileSync2, readdirSync as readdirSync11, statSync as statSync13 } from "node:fs";
 import { join as join54, dirname as dirname18 } from "node:path";
 import { homedir as homedir13 } from "node:os";
 import { execSync as execSync27, spawn as spawn19 } from "node:child_process";
@@ -41293,31 +41293,56 @@ function selectWeightTier(vramGB) {
     return "nf4";
   return "turbo2bit";
 }
+function detectJetson() {
+  try {
+    const model = readFileSync28("/proc/device-tree/model", "utf8").replace(/\0/g, "").trim();
+    if (/jetson|orin|tegra/i.test(model)) {
+      const memInfo = execSync27("grep MemTotal /proc/meminfo", { encoding: "utf8", timeout: 3e3, stdio: "pipe" });
+      const memKB = parseInt(memInfo.match(/(\d+)/)?.[1] ?? "0", 10);
+      return { isJetson: true, model, totalMemGB: memKB / 1024 / 1024 };
+    }
+  } catch {
+  }
+  return { isJetson: false, model: "", totalMemGB: 0 };
+}
 function detectPersonaPlexCapability() {
+  const fail = (reason) => ({
+    supported: false,
+    reason,
+    gpuName: "",
+    vramGB: 0,
+    weightTier: "turbo2bit",
+    needsHfToken: false
+  });
+  const jetson = detectJetson();
+  if (jetson.isJetson) {
+    const vramGB = jetson.totalMemGB;
+    if (vramGB < 8)
+      return { ...fail(`Jetson has ${vramGB.toFixed(0)}GB unified memory (need \u22658GB)`), gpuName: jetson.model, vramGB };
+    const tier = selectWeightTier(vramGB);
+    const hasHfToken = !!(process.env["HF_TOKEN"] || process.env["HUGGING_FACE_HUB_TOKEN"]);
+    const effectiveTier = tier === "original" && !hasHfToken ? "nf4" : tier;
+    return {
+      supported: true,
+      reason: `Jetson ${jetson.model} \u2014 ${effectiveTier} weights (${WEIGHT_REPOS[effectiveTier].sizeGB}GB)`,
+      gpuName: jetson.model,
+      vramGB,
+      weightTier: effectiveTier,
+      needsHfToken: WEIGHT_REPOS[effectiveTier].needsToken
+    };
+  }
   try {
     const nvsmi = execSync27("nvidia-smi --query-gpu=name,memory.total --format=csv,noheader,nounits", {
       encoding: "utf8",
       timeout: 5e3,
       stdio: "pipe"
     }).trim();
-    if (!nvsmi) {
-      return { supported: false, reason: "No NVIDIA GPU detected", gpuName: "", vramGB: 0, weightTier: "turbo2bit", needsHfToken: false };
-    }
+    if (!nvsmi)
+      return fail("No NVIDIA GPU detected");
     const [gpuName, vramMB] = nvsmi.split("\n")[0].split(", ");
-    let vramGB = parseInt(vramMB ?? "0", 10) / 1024;
-    const isJetson = /orin|tegra|jetson/i.test(gpuName ?? "");
-    if (isJetson) {
-      try {
-        const memInfo = execSync27("grep MemTotal /proc/meminfo", { encoding: "utf8", timeout: 3e3, stdio: "pipe" });
-        const memKB = parseInt(memInfo.match(/(\d+)/)?.[1] ?? "0", 10);
-        const totalGB = memKB / 1024 / 1024;
-        if (totalGB > vramGB)
-          vramGB = totalGB;
-      } catch {
-      }
-    }
+    const vramGB = parseInt(vramMB ?? "0", 10) / 1024;
     if (vramGB < 8) {
-      return { supported: false, reason: `GPU has ${vramGB.toFixed(1)}GB VRAM (need \u22658GB for 2-bit weights)`, gpuName: gpuName ?? "", vramGB, weightTier: "turbo2bit", needsHfToken: false };
+      return { ...fail(`GPU has ${vramGB.toFixed(1)}GB VRAM (need \u22658GB)`), gpuName: gpuName ?? "", vramGB };
     }
     try {
       execSync27('python3 -c "import torch; assert torch.cuda.is_available()"', {
@@ -41326,7 +41351,7 @@ function detectPersonaPlexCapability() {
       });
     } catch {
       const tier2 = selectWeightTier(vramGB);
-      return { supported: false, reason: "PyTorch CUDA not available", gpuName: gpuName ?? "", vramGB, weightTier: tier2, needsHfToken: WEIGHT_REPOS[tier2].needsToken };
+      return { ...fail("PyTorch CUDA not available"), gpuName: gpuName ?? "", vramGB, weightTier: tier2, needsHfToken: WEIGHT_REPOS[tier2].needsToken };
     }
     const tier = selectWeightTier(vramGB);
     const hasHfToken = !!(process.env["HF_TOKEN"] || process.env["HUGGING_FACE_HUB_TOKEN"]);
@@ -41340,7 +41365,7 @@ function detectPersonaPlexCapability() {
       needsHfToken: WEIGHT_REPOS[effectiveTier].needsToken
     };
   } catch {
-    return { supported: false, reason: "nvidia-smi not found", gpuName: "", vramGB: 0, weightTier: "turbo2bit", needsHfToken: false };
+    return fail("No NVIDIA GPU detected (nvidia-smi not found)");
   }
 }
 function isPersonaPlexRunning() {
@@ -41383,11 +41408,20 @@ async function installPersonaPlex(onInfo, weightTier) {
   const log = onInfo ?? (() => {
   });
   mkdirSync15(PERSONAPLEX_DIR, { recursive: true });
+  let arch2 = "";
+  try {
+    arch2 = execSync27("uname -m", { encoding: "utf8", timeout: 3e3, stdio: "pipe" }).trim();
+  } catch {
+  }
+  const isAarch64 = arch2 === "aarch64" || arch2 === "arm64";
+  if (isAarch64)
+    log(`Detected ARM64 platform (${arch2}) \u2014 Jetson/ARM install path`);
   const venvDir = join54(PERSONAPLEX_DIR, "venv");
   if (!existsSync37(venvDir)) {
     log("Creating Python virtual environment...");
     try {
-      execSync27(`python3 -m venv "${venvDir}"`, { timeout: 6e4, stdio: "pipe" });
+      const ssp = isAarch64 ? " --system-site-packages" : "";
+      execSync27(`python3 -m venv${ssp} "${venvDir}"`, { timeout: 6e4, stdio: "pipe" });
     } catch (err) {
       log(`Failed to create venv: ${err instanceof Error ? err.message : String(err)}`);
       return false;
@@ -41395,14 +41429,6 @@ async function installPersonaPlex(onInfo, weightTier) {
   }
   const pip = process.platform === "win32" ? join54(venvDir, "Scripts", "pip.exe") : join54(venvDir, "bin", "pip");
   const python = process.platform === "win32" ? join54(venvDir, "Scripts", "python.exe") : join54(venvDir, "bin", "python3");
-  let arch2 = "";
-  try {
-    arch2 = execSync27("uname -m", { encoding: "utf8", timeout: 3e3, stdio: "pipe" }).trim();
-  } catch {
-  }
-  const isAarch64 = arch2 === "aarch64" || arch2 === "arm64";
-  if (isAarch64)
-    log(`Detected ARM64 platform (${arch2}) \u2014 Jetson/ARM install path`);
   log("Checking system dependencies (libopus)...");
   try {
     if (process.platform === "linux") {
@@ -41566,7 +41592,39 @@ async function startPersonaPlexDaemon(onInfo) {
   const venvPython2 = process.platform === "win32" ? join54(PERSONAPLEX_DIR, "venv", "Scripts", "python.exe") : join54(PERSONAPLEX_DIR, "venv", "bin", "python3");
   const sslDir = join54(PERSONAPLEX_DIR, "ssl");
   mkdirSync15(sslDir, { recursive: true });
-  log("Starting PersonaPlex daemon (loading ~7B model)...");
+  const tier = getWeightTier();
+  const repoInfo = WEIGHT_REPOS[tier];
+  const extraArgs = [];
+  if (tier !== "original") {
+    log(`Weight tier: ${tier} (${repoInfo.sizeGB}GB) \u2014 dequantizing to bf16 cache...`);
+    const dequantScript = join54(PERSONAPLEX_DIR, "dequant-loader.py");
+    const cachedBf16 = join54(PERSONAPLEX_DIR, "model-bf16-cache.safetensors");
+    if (!existsSync37(dequantScript)) {
+      const shipped = getShippedVoicesDir();
+      if (shipped) {
+        const src = join54(shipped, "dequant-loader.py");
+        if (existsSync37(src))
+          copyFileSync2(src, dequantScript);
+      }
+    }
+    try {
+      const weightPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', '${repoInfo.file}'${repoInfo.needsToken ? "" : ", token=False"}))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
+      if (existsSync37(dequantScript) && existsSync37(weightPath)) {
+        try {
+          execSync27(`"${venvPython2}" "${dequantScript}" --input "${weightPath}" --output "${cachedBf16}"`, { timeout: 3e5, stdio: "pipe" });
+          if (existsSync37(cachedBf16)) {
+            extraArgs.push("--moshi-weight", cachedBf16);
+            log(`Using dequantized cache: ${(statSync13(cachedBf16).size / 1024 ** 3).toFixed(1)}GB`);
+          }
+        } catch (e) {
+          log(`Dequantization failed \u2014 server will try to load original weights`);
+        }
+      }
+    } catch {
+      log(`Weight file not found \u2014 server will download on first run`);
+    }
+  }
+  log(`Starting PersonaPlex daemon (${tier} tier)...`);
   const child = spawn19(venvPython2, [
     "-m",
     "moshi.server",
@@ -41577,7 +41635,8 @@ async function startPersonaPlexDaemon(onInfo) {
     "--ssl",
     sslDir,
     "--device",
-    "cuda"
+    "cuda",
+    ...extraArgs
   ], {
     stdio: ["ignore", "pipe", "pipe"],
     detached: true,
@@ -41664,8 +41723,7 @@ function listPersonaPlexVoices() {
   }
   if (existsSync37(CUSTOM_VOICES_DIR)) {
     try {
-      const { readdirSync: readdirSync24 } = __require("node:fs");
-      for (const f of readdirSync24(CUSTOM_VOICES_DIR)) {
+      for (const f of readdirSync11(CUSTOM_VOICES_DIR)) {
         if (f.endsWith(".pt")) {
           const name = f.replace(/\.pt$/, "");
           voices.push({ name, type: "custom", path: join54(CUSTOM_VOICES_DIR, f) });
@@ -45416,7 +45474,7 @@ __export(voice_exports, {
   registerCustomOnnxModel: () => registerCustomOnnxModel,
   resetNarrationContext: () => resetNarrationContext
 });
-import { existsSync as existsSync42, mkdirSync as mkdirSync18, writeFileSync as writeFileSync19, readFileSync as readFileSync31, unlinkSync as unlinkSync9, readdirSync as readdirSync12, renameSync, statSync as statSync13 } from "node:fs";
+import { existsSync as existsSync42, mkdirSync as mkdirSync18, writeFileSync as writeFileSync19, readFileSync as readFileSync31, unlinkSync as unlinkSync9, readdirSync as readdirSync12, renameSync, statSync as statSync14 } from "node:fs";
 import { join as join58, dirname as dirname19 } from "node:path";
 import { homedir as homedir15, tmpdir as tmpdir9, platform as platform3 } from "node:os";
 import { execSync as execSync30, spawn as nodeSpawn } from "node:child_process";
@@ -46550,7 +46608,7 @@ var init_voice = __esm({
           const p = join58(dir, f);
           let size = 0;
           try {
-            size = statSync13(p).size;
+            size = statSync14(p).size;
           } catch {
           }
           return {
@@ -48166,7 +48224,7 @@ Error: ${err instanceof Error ? err.message : String(err)}`);
 // packages/cli/dist/tui/commands.js
 import * as nodeOs from "node:os";
 import { execSync as nodeExecSync } from "node:child_process";
-import { existsSync as existsSync43, readFileSync as readFileSync32, writeFileSync as writeFileSync20, mkdirSync as mkdirSync19, readdirSync as readdirSync13, statSync as statSync14, rmSync } from "node:fs";
+import { existsSync as existsSync43, readFileSync as readFileSync32, writeFileSync as writeFileSync20, mkdirSync as mkdirSync19, readdirSync as readdirSync13, statSync as statSync15, rmSync } from "node:fs";
 import { join as join59 } from "node:path";
 function safeLog(text) {
   if (isNeovimActive()) {
@@ -48979,7 +49037,7 @@ async function handleSlashCommand(input, ctx) {
           ipfsFiles = files.length;
           for (const f of files) {
             try {
-              ipfsBytes += statSync14(join59(ipfsLocalDir, f)).size;
+              ipfsBytes += statSync15(join59(ipfsLocalDir, f)).size;
             } catch {
             }
           }
@@ -48993,7 +49051,7 @@ async function handleSlashCommand(input, ctx) {
               else {
                 heliaBlocks++;
                 try {
-                  heliaBytes += statSync14(join59(dir, entry.name)).size;
+                  heliaBytes += statSync15(join59(dir, entry.name)).size;
                 } catch {
                 }
               }
@@ -49086,7 +49144,7 @@ async function handleSlashCommand(input, ctx) {
           const count = memStore.count();
           lines.push(`
   ${c2.bold("Structured Memory (SQLite)")}`);
-          lines.push(`    Memories: ${c2.bold(String(count))}   DB: ${c2.dim(formatFileSize(statSync14(dbPath).size))}`);
+          lines.push(`    Memories: ${c2.bold(String(count))}   DB: ${c2.dim(formatFileSize(statSync15(dbPath).size))}`);
           cDb(db);
         }
       } catch {
@@ -49117,7 +49175,7 @@ async function handleSlashCommand(input, ctx) {
               walkStorage(full, subCat);
             } else {
               try {
-                const sz = statSync14(full).size;
+                const sz = statSync15(full).size;
                 totalBytes += sz;
                 if (!categories[category])
                   categories[category] = { files: 0, bytes: 0 };
@@ -49418,7 +49476,7 @@ async function handleSlashCommand(input, ctx) {
           const caps = detectPersonaPlexCapability2();
           if (!caps.supported) {
             renderWarning(`PersonaPlex not available: ${caps.reason}`);
-            renderInfo("Requirements: NVIDIA GPU with \u226516GB VRAM (RTX 3090/4090/A100+), CUDA 12.1+, PyTorch");
+            renderInfo("Requirements: NVIDIA GPU with \u22658GB VRAM (RTX 3060+, Jetson AGX Orin), CUDA, PyTorch");
             return "handled";
           }
           renderInfo(`GPU: ${caps.gpuName} (${caps.vramGB.toFixed(0)}GB VRAM) \u2014 PersonaPlex compatible \u2713`);
@@ -51087,7 +51145,7 @@ async function showCohereDashboard(ctx) {
                 const snapItems = snaps.slice(0, 20).map((f) => ({
                   key: f,
                   label: f.replace(".json", ""),
-                  detail: `${formatFileSize(statSync14(join59(snapDir, f)).size)}`
+                  detail: `${formatFileSize(statSync15(join59(snapDir, f)).size)}`
                 }));
                 if (snapItems.length > 0) {
                   await tuiSelect({
@@ -59364,7 +59422,7 @@ var init_tool_policy = __esm({
 });
 // packages/cli/dist/tui/telegram-bridge.js
-import { mkdirSync as mkdirSync25, existsSync as existsSync51, unlinkSync as unlinkSync11, readdirSync as readdirSync19, statSync as statSync15 } from "node:fs";
+import { mkdirSync as mkdirSync25, existsSync as existsSync51, unlinkSync as unlinkSync11, readdirSync as readdirSync19, statSync as statSync16 } from "node:fs";
 import { join as join68, resolve as resolve30 } from "node:path";
 import { writeFile as writeFileAsync } from "node:fs/promises";
 function convertMarkdownToTelegramHTML(md) {
@@ -71366,7 +71424,7 @@ __export(index_repo_exports, {
   indexRepoCommand: () => indexRepoCommand
 });
 import { resolve as resolve34 } from "node:path";
-import { existsSync as existsSync56, statSync as statSync16 } from "node:fs";
+import { existsSync as existsSync56, statSync as statSync17 } from "node:fs";
 import { cwd as cwd2 } from "node:process";
 async function indexRepoCommand(opts, _config) {
   const repoRoot = resolve34(opts.repoPath ?? cwd2());
@@ -71376,7 +71434,7 @@ async function indexRepoCommand(opts, _config) {
     printError(`Path does not exist: ${repoRoot}`);
     process.exit(1);
   }
-  const stat5 = statSync16(repoRoot);
+  const stat5 = statSync17(repoRoot);
   if (!stat5.isDirectory()) {
     printError(`Path is not a directory: ${repoRoot}`);
     process.exit(1);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "open-agents-ai",
-  "version": "0.185.31",
+  "version": "0.185.32",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",

package/voices/personaplex/dequant-loader.py ADDED Viewed

@@ -0,0 +1,174 @@
+#!/usr/bin/env python3
+"""
+dequant-loader.py — Pre-dequantize quantized PersonaPlex weights to bf16 cache.
+For NF4 (INT4) or TurboQuant 2-bit weights, dequantizes to a temporary
+bf16 safetensors file that moshi.server can load natively.
+Usage:
+  python dequant-loader.py --input model-nf4.safetensors --output /tmp/model-bf16.safetensors
+  python dequant-loader.py --input model-turbo2bit.safetensors --output /tmp/model-bf16.safetensors
+The output file can then be passed to moshi.server via --moshi-weight.
+"""
+import os, sys, math, time
+import torch
+from safetensors.torch import load_file, save_file
+NF2_CENTROIDS = torch.tensor([-1.5104, -0.4528, 0.4528, 1.5104])
+def fast_wht(x):
+    """Vectorized Walsh-Hadamard Transform."""
+    n = x.shape[-1]
+    h = 1
+    while h < n:
+        x_view = x.view(*x.shape[:-1], -1, 2, h)
+        a = x_view[..., 0, :].clone()
+        b = x_view[..., 1, :].clone()
+        x_view[..., 0, :] = a + b
+        x_view[..., 1, :] = a - b
+        x = x_view.reshape(*x.shape)
+        h *= 2
+    return x / math.sqrt(n)
+def detect_format(state):
+    """Detect if weights are NF4 (INT4), TurboQuant 2-bit, or plain."""
+    has_scales = any(k.endswith(".__scales__") for k in state)
+    has_packed = any(k.endswith(".packed") for k in state)
+    if has_packed:
+        return "turbo2bit"
+    if has_scales:
+        return "nf4"
+    return "plain"
+def dequant_nf4(state):
+    """Dequantize INT4 NF4 weights."""
+    result = {}
+    processed = set()
+    for name in list(state.keys()):
+        if name.endswith(".__scales__") or name.endswith(".__shape__") or name.endswith(".__numel__"):
+            continue
+        if name in processed:
+            continue
+        scales_key = f"{name}.__scales__"
+        if scales_key in state:
+            packed = state[name]
+            scales = state[scales_key].float()
+            shape = state[f"{name}.__shape__"].tolist()
+            numel = state[f"{name}.__numel__"].item()
+            group_size = 64
+            lo = (packed & 0x0F).to(torch.int8) - 8
+            hi = ((packed >> 4) & 0x0F).to(torch.int8) - 8
+            unpacked = torch.zeros(packed.numel() * 2, dtype=torch.float32)
+            unpacked[0::2] = lo.float()
+            unpacked[1::2] = hi.float()
+            n_groups = scales.numel()
+            groups = unpacked[:n_groups * group_size].reshape(n_groups, group_size)
+            deq = (groups * scales.unsqueeze(1)).reshape(-1)[:numel]
+            orig_shape = [s for s in shape if s > 0]
+            result[name] = deq.reshape(orig_shape).to(torch.bfloat16)
+            processed.add(name)
+        else:
+            result[name] = state[name].to(torch.bfloat16)
+            processed.add(name)
+    return result
+def dequant_turbo2bit(state):
+    """Dequantize TurboQuant 2-bit (NF2 + WHT) weights."""
+    result = {}
+    processed = set()
+    for name in list(state.keys()):
+        if any(name.endswith(f".{s}") for s in ["packed", "scales", "shape", "numel", "gs", "np2"]):
+            continue
+        if name in processed:
+            continue
+        packed_key = f"{name}.packed"
+        if packed_key in state:
+            gs = state[f"{name}.gs"].item()
+            gs_pow2 = state[f"{name}.np2"].item()
+            numel = state[f"{name}.numel"].item()
+            shape = [s for s in state[f"{name}.shape"].tolist() if s > 0]
+            scales = state[f"{name}.scales"].float()
+            packed = state[packed_key]
+            n_groups = scales.numel()
+            # Unpack 2-bit
+            p = packed.reshape(n_groups, gs // 4)
+            codes = torch.zeros(n_groups, gs, dtype=torch.long)
+            for i in range(4):
+                codes[:, i::4] = (p >> (2 * i)) & 0x03
+            dequant = NF2_CENTROIDS[codes]
+            # Inverse WHT
+            if gs_pow2 > gs:
+                dequant = torch.cat([dequant, torch.zeros(n_groups, gs_pow2 - gs)], dim=1)
+            dequant = fast_wht(dequant)
+            dequant = dequant[:, :gs]
+            dequant = dequant * scales.unsqueeze(1)
+            result[name] = dequant.reshape(-1)[:numel].reshape(shape).to(torch.bfloat16)
+            processed.add(name)
+        else:
+            result[name] = state[name].to(torch.bfloat16)
+            processed.add(name)
+    return result
+def main():
+    import argparse
+    parser = argparse.ArgumentParser(description="Dequantize PersonaPlex weights to bf16")
+    parser.add_argument("--input", "-i", required=True, help="Quantized safetensors file")
+    parser.add_argument("--output", "-o", required=True, help="Output bf16 safetensors file")
+    parser.add_argument("--device", "-d", default="cpu", help="Device for dequantization")
+    args = parser.parse_args()
+    if not os.path.exists(args.input):
+        print(f"Error: {args.input} not found")
+        sys.exit(1)
+    # Skip if output already exists and is newer than input
+    if os.path.exists(args.output) and os.path.getmtime(args.output) > os.path.getmtime(args.input):
+        print(f"Cached: {args.output} is up to date")
+        sys.exit(0)
+    print(f"Loading {args.input}...")
+    t0 = time.time()
+    state = load_file(args.input, device=args.device)
+    fmt = detect_format(state)
+    print(f"Format: {fmt}")
+    if fmt == "nf4":
+        result = dequant_nf4(state)
+    elif fmt == "turbo2bit":
+        result = dequant_turbo2bit(state)
+    else:
+        print("Already plain bf16/fp16 — copying")
+        result = {k: v.to(torch.bfloat16) for k, v in state.items()}
+    t1 = time.time()
+    print(f"Dequantized {len(result)} tensors in {t1-t0:.1f}s")
+    print(f"Saving to {args.output}...")
+    save_file(result, args.output)
+    size_gb = os.path.getsize(args.output) / 1024**3
+    print(f"Done: {size_gb:.2f} GB")
+if __name__ == "__main__":
+    main()