npm - open-agents-ai - Versions diffs - 0.185.34 → 0.185.36 - Mend

open-agents-ai 0.185.34 → 0.185.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/index.js +352 -132
package/package.json +1 -1

package/dist/index.js CHANGED Viewed

@@ -26404,94 +26404,103 @@ If you're stuck, try a completely different approach. Do NOT repeat what failed
               this.emit({ type: "error", content: `Backend error: ${reqErr.message}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
               break;
             }
-            const recovered = await this.retryOnTransient(reqErr, chatRequest, turn);
-            if (!recovered) {
-              const errMsg = reqErr instanceof Error ? reqErr.message : String(reqErr);
-              const cause = reqErr instanceof Error && reqErr.cause ? ` (${reqErr.cause.message ?? ""} ${reqErr.cause?.code ?? ""})` : "";
-              this.emit({ type: "error", content: `Backend error: ${errMsg}${cause}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
-              if (/HTTP 404|not found|model.*not found/i.test(errMsg)) {
-                this.emit({ type: "error", content: `Model not available. Use /model to select a different model.`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
+            if (this.handleMaxTokensError(reqErr, chatRequest)) {
+              try {
+                response = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
+              } catch (retryErr) {
+                this.emit({ type: "error", content: `Retry with reduced max_tokens also failed: ${retryErr instanceof Error ? retryErr.message : String(retryErr)}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
                 break;
               }
-              let imageRecovered = false;
-              if (/invalid image|image.*invalid|image_url.*unsupported|does not support.*image|image.*not supported/i.test(errMsg)) {
-                imageRecovered = await this._recoverFromImageError(messages, chatRequest, turn);
-                if (imageRecovered) {
-                  try {
-                    const imgRetry = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
-                    response = imgRetry;
-                  } catch (imgRetryErr) {
-                    const msg2 = imgRetryErr instanceof Error ? imgRetryErr.message : String(imgRetryErr);
-                    this.emit({ type: "error", content: `Retry after image fallback also failed: ${msg2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
-                    imageRecovered = false;
-                    break;
-                  }
-                } else {
+            } else {
+              const recovered = await this.retryOnTransient(reqErr, chatRequest, turn);
+              if (!recovered) {
+                const errMsg = reqErr instanceof Error ? reqErr.message : String(reqErr);
+                const cause = reqErr instanceof Error && reqErr.cause ? ` (${reqErr.cause.message ?? ""} ${reqErr.cause?.code ?? ""})` : "";
+                this.emit({ type: "error", content: `Backend error: ${errMsg}${cause}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
+                if (/HTTP 404|not found|model.*not found/i.test(errMsg)) {
+                  this.emit({ type: "error", content: `Model not available. Use /model to select a different model.`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
                   break;
                 }
-              }
-              if (imageRecovered) {
-              } else if (/does not support tools|HTTP 400.*tools/i.test(errMsg)) {
-                this.emit({
-                  type: "status",
-                  content: `Model lacks native tool support \u2014 switching to prompt-injected tool mode`,
-                  timestamp: (/* @__PURE__ */ new Date()).toISOString()
-                });
-                const toolDescriptions = Array.from(this.tools.values()).map((t) => `- ${t.name}: ${t.description}`).join("\n");
-                const toolInjectMsg = [
-                  "\n\n[TOOL MODE \u2014 PROMPT INJECTION]",
-                  "This model does not have native tool-calling. To use tools, output a JSON block:",
-                  "```json",
-                  '{"tool": "tool_name", "args": {"param": "value"}}',
-                  "```",
-                  "\nAvailable tools:",
-                  toolDescriptions,
-                  "\nOutput EXACTLY ONE tool call per response in the JSON format above.",
-                  "After seeing the tool result, continue or call another tool.",
-                  'When done, output: {"tool": "task_complete", "args": {"summary": "what you did"}}'
-                ].join("\n");
-                messages.push({ role: "system", content: toolInjectMsg });
-                chatRequest.tools = [];
-                try {
-                  response = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
-                  const content = response.choices?.[0]?.message?.content ?? "";
-                  const jsonMatch = content.match(/```json\s*\n?([\s\S]*?)```/);
-                  if (jsonMatch) {
+                let imageRecovered = false;
+                if (/invalid image|image.*invalid|image_url.*unsupported|does not support.*image|image.*not supported/i.test(errMsg)) {
+                  imageRecovered = await this._recoverFromImageError(messages, chatRequest, turn);
+                  if (imageRecovered) {
                     try {
-                      const parsed = JSON.parse(jsonMatch[1]);
-                      if (parsed.tool && this.tools.has(parsed.tool)) {
-                        const tool = this.tools.get(parsed.tool);
-                        const result = await tool.execute(parsed.args ?? {});
-                        messages.push({ role: "assistant", content });
-                        messages.push({ role: "user", content: `Tool result (${parsed.tool}): ${result.output.slice(0, 2e3)}` });
-                        if (parsed.tool === "task_complete") {
-                          completed = true;
-                          summary = String(parsed.args?.summary ?? content);
+                      const imgRetry = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
+                      response = imgRetry;
+                    } catch (imgRetryErr) {
+                      const msg2 = imgRetryErr instanceof Error ? imgRetryErr.message : String(imgRetryErr);
+                      this.emit({ type: "error", content: `Retry after image fallback also failed: ${msg2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
+                      imageRecovered = false;
+                      break;
+                    }
+                  } else {
+                    break;
+                  }
+                }
+                if (imageRecovered) {
+                } else if (/does not support tools|HTTP 400.*tools/i.test(errMsg)) {
+                  this.emit({
+                    type: "status",
+                    content: `Model lacks native tool support \u2014 switching to prompt-injected tool mode`,
+                    timestamp: (/* @__PURE__ */ new Date()).toISOString()
+                  });
+                  const toolDescriptions = Array.from(this.tools.values()).map((t) => `- ${t.name}: ${t.description}`).join("\n");
+                  const toolInjectMsg = [
+                    "\n\n[TOOL MODE \u2014 PROMPT INJECTION]",
+                    "This model does not have native tool-calling. To use tools, output a JSON block:",
+                    "```json",
+                    '{"tool": "tool_name", "args": {"param": "value"}}',
+                    "```",
+                    "\nAvailable tools:",
+                    toolDescriptions,
+                    "\nOutput EXACTLY ONE tool call per response in the JSON format above.",
+                    "After seeing the tool result, continue or call another tool.",
+                    'When done, output: {"tool": "task_complete", "args": {"summary": "what you did"}}'
+                  ].join("\n");
+                  messages.push({ role: "system", content: toolInjectMsg });
+                  chatRequest.tools = [];
+                  try {
+                    response = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
+                    const content = response.choices?.[0]?.message?.content ?? "";
+                    const jsonMatch = content.match(/```json\s*\n?([\s\S]*?)```/);
+                    if (jsonMatch) {
+                      try {
+                        const parsed = JSON.parse(jsonMatch[1]);
+                        if (parsed.tool && this.tools.has(parsed.tool)) {
+                          const tool = this.tools.get(parsed.tool);
+                          const result = await tool.execute(parsed.args ?? {});
+                          messages.push({ role: "assistant", content });
+                          messages.push({ role: "user", content: `Tool result (${parsed.tool}): ${result.output.slice(0, 2e3)}` });
+                          if (parsed.tool === "task_complete") {
+                            completed = true;
+                            summary = String(parsed.args?.summary ?? content);
+                          }
+                          toolCallCount++;
+                          continue;
                         }
-                        toolCallCount++;
-                        continue;
+                      } catch {
                       }
-                    } catch {
                     }
+                    messages.push({ role: "assistant", content });
+                    continue;
+                  } catch (retryErr2) {
+                    const msg2 = retryErr2 instanceof Error ? retryErr2.message : String(retryErr2);
+                    this.emit({ type: "error", content: `Prompt-injected tool mode also failed: ${msg2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
+                    break;
                   }
-                  messages.push({ role: "assistant", content });
-                  continue;
-                } catch (retryErr2) {
-                  const msg2 = retryErr2 instanceof Error ? retryErr2.message : String(retryErr2);
-                  this.emit({ type: "error", content: `Prompt-injected tool mode also failed: ${msg2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
+                }
+                if (!imageRecovered) {
+                  this.emit({
+                    type: "error",
+                    content: `Backend unavailable \u2014 stopping task. Fix the issue and retry.`,
+                    timestamp: (/* @__PURE__ */ new Date()).toISOString()
+                  });
                   break;
                 }
               }
-              if (!imageRecovered) {
-                this.emit({
-                  type: "error",
-                  content: `Backend unavailable \u2014 stopping task. Fix the issue and retry.`,
-                  timestamp: (/* @__PURE__ */ new Date()).toISOString()
-                });
-                break;
-              }
+              response = recovered ?? response;
             }
-            response = recovered ?? response;
           }
           totalTokens += response.usage?.totalTokens ?? 0;
           promptTokens += response.usage?.promptTokens ?? 0;
@@ -27056,15 +27065,24 @@ Integrate this guidance into your current approach. Continue working on the task
                 this.emit({ type: "error", content: "Task aborted by user", timestamp: (/* @__PURE__ */ new Date()).toISOString() });
                 break;
               }
-              const recovered = await this.retryOnTransient(reqErr, chatRequest, turn);
-              if (!recovered) {
-                const errMsg2 = reqErr instanceof Error ? reqErr.message : String(reqErr);
-                const cause2 = reqErr instanceof Error && reqErr.cause ? ` (${reqErr.cause.message ?? ""} ${reqErr.cause?.code ?? ""})` : "";
-                this.emit({ type: "error", content: `Backend error: ${errMsg2}${cause2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
-                this.emit({ type: "error", content: `Backend unavailable \u2014 stopping task.`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
-                break;
+              if (this.handleMaxTokensError(reqErr, chatRequest)) {
+                try {
+                  response = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
+                } catch (retryErr) {
+                  this.emit({ type: "error", content: `Retry with reduced max_tokens also failed: ${retryErr instanceof Error ? retryErr.message : String(retryErr)}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
+                  break;
+                }
+              } else {
+                const recovered = await this.retryOnTransient(reqErr, chatRequest, turn);
+                if (!recovered) {
+                  const errMsg2 = reqErr instanceof Error ? reqErr.message : String(reqErr);
+                  const cause2 = reqErr instanceof Error && reqErr.cause ? ` (${reqErr.cause.message ?? ""} ${reqErr.cause?.code ?? ""})` : "";
+                  this.emit({ type: "error", content: `Backend error: ${errMsg2}${cause2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
+                  this.emit({ type: "error", content: `Backend unavailable \u2014 stopping task.`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
+                  break;
+                }
+                response = recovered;
               }
-              response = recovered;
             }
             totalTokens += response.usage?.totalTokens ?? 0;
             promptTokens += response.usage?.promptTokens ?? 0;
@@ -28443,6 +28461,28 @@ ${transcript}`
       // -------------------------------------------------------------------------
       // Transient error recovery — retry on 502, fetch failed, timeouts
       // -------------------------------------------------------------------------
+      /**
+       * Detect max_completion_tokens rejection (HTTP 400) and auto-reduce to the server's limit.
+       * Returns true if maxTokens was reduced and the caller should retry.
+       */
+      handleMaxTokensError(err, chatRequest) {
+        const msg = err instanceof Error ? err.message : String(err);
+        const match = msg.match(/max_?(?:completion_?)?tokens\s+is\s+too\s+large.*?allows?\s+up\s+to\s+(\d+)/i);
+        if (!match)
+          return false;
+        const serverLimit = parseInt(match[1], 10);
+        if (isNaN(serverLimit) || serverLimit <= 0)
+          return false;
+        const prev = this.options.maxTokens;
+        this.options.maxTokens = serverLimit;
+        chatRequest.maxTokens = serverLimit;
+        this.emit({
+          type: "status",
+          content: `Server max_tokens limit is ${serverLimit} (was ${prev}) \u2014 auto-adjusted`,
+          timestamp: (/* @__PURE__ */ new Date()).toISOString()
+        });
+        return true;
+      }
       /** Detect whether an error is transient (worth retrying) */
       isTransientError(err) {
         if (err instanceof Error && err.fatal)
@@ -41301,11 +41341,11 @@ function execAsync(cmd, opts = {}) {
     child.stderr?.on("data", (d) => {
       stderr += d.toString();
     });
-    child.on("close", (code) => {
+    child.on("close", (code, signal) => {
       if (code === 0)
         resolve36(stdout.trim());
       else
-        reject(new Error(`Exit ${code}: ${stderr.slice(0, 500)}`));
+        reject(new Error(`Exit ${code}${signal ? ` (signal: ${signal})` : ""}: ${stderr.slice(0, 500)}`));
     });
     child.on("error", reject);
   });
@@ -41314,8 +41354,8 @@ function selectWeightTier(vramGB) {
   if (vramGB >= 48)
     return "original";
   if (vramGB >= 16)
-    return "nf4";
-  return "turbo2bit";
+    return "nf4-distilled";
+  return "nf4";
 }
 function detectJetson() {
   try {
@@ -41499,9 +41539,21 @@ async function installPersonaPlex(onInfo, weightTier) {
         return false;
       }
     }
-    await execAsync(`"${pip}" install --quiet "${join54(repoDir, "moshi")}/."`, { timeout: 3e5 });
+    if (isAarch64) {
+      log("ARM64: Installing moshi (--no-deps to preserve JetPack torch)...");
+      await execAsync(`"${pip}" install --quiet --no-deps "${join54(repoDir, "moshi")}/."`, { timeout: 3e5 });
+      log("ARM64: Installing remaining moshi dependencies...");
+      await execAsync(`"${pip}" install --quiet "numpy>=1.26,<2.2" "safetensors>=0.4.0,<0.5" "huggingface-hub>=0.24,<0.25" "einops==0.7" "sentencepiece==0.2" "sounddevice==0.5" "aiohttp>=3.10.5,<3.11"`, { timeout: 3e5 });
+    } else {
+      await execAsync(`"${pip}" install --quiet "${join54(repoDir, "moshi")}/."`, { timeout: 3e5 });
+    }
   } catch (err) {
     log(`Moshi install failed: ${err instanceof Error ? err.message : String(err)}`);
+    if (isAarch64) {
+      log("ARM64: This often means the pip process was OOM-killed.");
+      log("Check: dmesg | grep -i 'oom\\|killed' | tail -5");
+      log("Ensure JetPack PyTorch is installed: pip3 show torch");
+    }
     try {
       await execAsync(`"${pip}" install --quiet torch torchaudio websockets soundfile huggingface_hub`, { timeout: 3e5, stdio: "pipe" });
     } catch {
@@ -41526,6 +41578,104 @@ async function installPersonaPlex(onInfo, weightTier) {
     }
   } catch {
   }
+  try {
+    const sitePackages = execSync27(`"${python}" -c "import moshi, os; print(os.path.dirname(moshi.__file__))"`, {
+      encoding: "utf8",
+      timeout: 5e3,
+      stdio: "pipe"
+    }).trim();
+    const loadersFile = join54(sitePackages, "models", "loaders.py");
+    if (existsSync37(loadersFile)) {
+      let src = readFileSync28(loadersFile, "utf8");
+      if (!src.includes("_dequantize_2bit_state_dict")) {
+        const dequantPatch = `
+import math
+# NF2 centroids (Lloyd-Max optimal for Gaussian distribution)
+_NF2_CENTROIDS = torch.tensor([-1.5104, -0.4528, 0.4528, 1.5104])
+def _is_2bit_quantized(filename):
+    return "turbo2bit" in str(filename).lower() or "2bit" in str(filename).lower()
+def _fast_wht(x):
+    n = x.shape[-1]
+    h = 1
+    while h < n:
+        x_view = x.view(*x.shape[:-1], -1, 2, h)
+        a = x_view[..., 0, :].clone()
+        b = x_view[..., 1, :].clone()
+        x_view[..., 0, :] = a + b
+        x_view[..., 1, :] = a - b
+        x = x_view.reshape(*x.shape)
+        h *= 2
+    return x / math.sqrt(n)
+def _dequantize_2bit_state_dict(state_dict):
+    result = {}
+    processed = set()
+    meta_suffixes = (".packed", ".scales", ".shape", ".numel", ".gs", ".np2")
+    base_names = set()
+    for key in state_dict:
+        if key.endswith(".packed"):
+            base_names.add(key[:-len(".packed")])
+    for name in base_names:
+        packed_key = f"{name}.packed"
+        if packed_key in state_dict:
+            gs = state_dict[f"{name}.gs"].item()
+            gs_pow2 = state_dict[f"{name}.np2"].item()
+            numel = state_dict[f"{name}.numel"].item()
+            shape = [s for s in state_dict[f"{name}.shape"].tolist() if s > 0]
+            scales = state_dict[f"{name}.scales"].float()
+            packed = state_dict[packed_key]
+            n_groups = scales.numel()
+            p = packed.reshape(n_groups, gs // 4)
+            codes = torch.zeros(n_groups, gs, dtype=torch.long)
+            for i in range(4):
+                codes[:, i::4] = (p >> (2 * i)) & 0x03
+            dequant = _NF2_CENTROIDS[codes]
+            if gs_pow2 > gs:
+                dequant = torch.cat([dequant, torch.zeros(n_groups, gs_pow2 - gs)], dim=1)
+            dequant = _fast_wht(dequant)
+            dequant = dequant[:, :gs]
+            dequant = dequant * scales.unsqueeze(1)
+            result[name] = dequant.reshape(-1)[:numel].reshape(shape).to(torch.bfloat16)
+            processed.add(name)
+    for name, tensor in state_dict.items():
+        if any(name.endswith(s) for s in meta_suffixes):
+            continue
+        if name not in processed:
+            result[name] = tensor.to(torch.bfloat16)
+    return result
+`;
+        const insertPoint = src.indexOf("\nSAMPLE_RATE");
+        if (insertPoint > 0) {
+          src = src.slice(0, insertPoint) + dequantPatch + src.slice(insertPoint);
+        }
+        src = src.replace(/( +)# Load state_dict\n( +)if filename\.endswith\("\.safetensors"\):/, `$1is_2bit = _is_2bit_quantized(filename)
+$1# Load state_dict \u2014 2-bit must load to CPU for dequant
+$2load_device = "cpu" if is_2bit else dev.type
+$2if filename.endswith(".safetensors"):`);
+        if (src.includes("device=dev.type)")) {
+          src = src.replace("device=dev.type)", "device=load_device)");
+        }
+        const patchPoint = "# Patch 1: expand depformer";
+        if (src.includes(patchPoint) && !src.includes("_dequantize_2bit_state_dict(state_dict)")) {
+          src = src.replace(patchPoint, `# Dequantize 2-bit weights if needed
+    if is_2bit:
+        logger.info("Dequantizing 2-bit TurboQuant weights...")
+        state_dict = _dequantize_2bit_state_dict(state_dict)
+    ${patchPoint}`);
+        }
+        writeFileSync16(loadersFile, src);
+        log("Patched loaders.py with 2-bit TurboQuant native dequant support.");
+      }
+    }
+  } catch {
+  }
   if (isAarch64) {
     log("ARM64: Installing bitsandbytes for INT4 inference...");
     try {
@@ -41533,6 +41683,10 @@ async function installPersonaPlex(onInfo, weightTier) {
     } catch {
     }
   }
+  try {
+    await execAsync(`"${pip}" install --quiet accelerate`, { timeout: 12e4, stdio: "pipe" });
+  } catch {
+  }
   try {
     await execAsync(`"${pip}" install --quiet pyloudnorm noisereduce torchaudio`, { timeout: 12e4, stdio: "pipe" });
   } catch {
@@ -41616,49 +41770,101 @@ async function startPersonaPlexDaemon(onInfo) {
   const repoInfo = WEIGHT_REPOS[tier];
   const extraArgs = [];
   if (tier !== "original") {
-    log(`Weight tier: ${tier} (${repoInfo.sizeGB}GB) \u2014 dequantizing to bf16 cache...`);
-    const dequantScript = join54(PERSONAPLEX_DIR, "dequant-loader.py");
     const cachedBf16 = join54(PERSONAPLEX_DIR, "model-bf16-cache.safetensors");
-    if (!existsSync37(dequantScript)) {
-      const shipped = getShippedVoicesDir();
-      if (shipped) {
-        const src = join54(shipped, "dequant-loader.py");
-        if (existsSync37(src))
-          copyFileSync2(src, dequantScript);
-      }
-    }
-    try {
-      const weightPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', '${repoInfo.file}'${repoInfo.needsToken ? "" : ", token=False"}))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
-      if (existsSync37(dequantScript) && existsSync37(weightPath)) {
-        try {
-          execSync27(`"${venvPython2}" "${dequantScript}" --input "${weightPath}" --output "${cachedBf16}"`, { timeout: 3e5, stdio: "pipe" });
+    if (tier === "nf4-distilled") {
+      log(`Weight tier: ${tier} \u2014 distilled NF4 (90% token match, ${repoInfo.sizeGB}GB)...`);
+      try {
+        const weightPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', '${repoInfo.file}', token=False))"`, { encoding: "utf8", timeout: 6e4, stdio: "pipe" }).trim();
+        if (existsSync37(weightPath)) {
+          if (!existsSync37(cachedBf16)) {
+            log("Converting .pt checkpoint to safetensors (one-time)...");
+            execSync27(`"${venvPython2}" -c "
+import torch; from safetensors.torch import save_file
+state = torch.load('${weightPath}', map_location='cpu', weights_only=True)
+state = {k: v.to(torch.bfloat16) if v.is_floating_point() else v for k, v in state.items()}
+save_file(state, '${cachedBf16}')
+print('Converted')
+"`, { timeout: 18e4, stdio: "pipe" });
+          }
           if (existsSync37(cachedBf16)) {
             extraArgs.push("--moshi-weight", cachedBf16);
-            log(`Using dequantized cache: ${(statSync13(cachedBf16).size / 1024 ** 3).toFixed(1)}GB`);
+            log(`Using distilled weights: ${(statSync13(cachedBf16).size / 1024 ** 3).toFixed(1)}GB`);
+          } else {
+            extraArgs.push("--moshi-weight", weightPath);
           }
-        } catch (e) {
-          log(`Dequantization failed \u2014 server will try to load original weights`);
         }
+      } catch (e) {
+        log(`Failed to load distilled weights \u2014 falling back to standard NF4`);
       }
-      try {
-        const mimiPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', 'tokenizer-e351c8d8-checkpoint125.safetensors', token=False))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
-        if (existsSync37(mimiPath))
-          extraArgs.push("--mimi-weight", mimiPath);
-      } catch {
+    } else {
+      log(`Weight tier: ${tier} (${repoInfo.sizeGB}GB) \u2014 dequantizing to bf16 cache...`);
+      const dequantScript = join54(PERSONAPLEX_DIR, "dequant-loader.py");
+      if (!existsSync37(dequantScript)) {
+        const shipped = getShippedVoicesDir();
+        if (shipped) {
+          const src = join54(shipped, "dequant-loader.py");
+          if (existsSync37(src))
+            copyFileSync2(src, dequantScript);
+        }
       }
       try {
-        const tokPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', 'tokenizer_spm_32k_3.model', token=False))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
-        if (existsSync37(tokPath))
-          extraArgs.push("--tokenizer", tokPath);
+        const weightPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', '${repoInfo.file}'${repoInfo.needsToken ? "" : ", token=False"}))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
+        if (existsSync37(dequantScript) && existsSync37(weightPath)) {
+          try {
+            execSync27(`"${venvPython2}" "${dequantScript}" --input "${weightPath}" --output "${cachedBf16}"`, { timeout: 3e5, stdio: "pipe" });
+            if (existsSync37(cachedBf16)) {
+              extraArgs.push("--moshi-weight", cachedBf16);
+              log(`Using dequantized cache: ${(statSync13(cachedBf16).size / 1024 ** 3).toFixed(1)}GB`);
+            }
+          } catch (e) {
+            log(`Dequantization failed \u2014 server will try to load original weights`);
+          }
+        }
+        try {
+          const mimiPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', 'tokenizer-e351c8d8-checkpoint125.safetensors', token=False))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
+          if (existsSync37(mimiPath))
+            extraArgs.push("--mimi-weight", mimiPath);
+        } catch {
+        }
+        try {
+          const tokPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', 'tokenizer_spm_32k_3.model', token=False))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
+          if (existsSync37(tokPath))
+            extraArgs.push("--tokenizer", tokPath);
+        } catch {
+        }
       } catch {
+        log(`Weight file not found \u2014 server will download on first run`);
       }
+      extraArgs.push("--hf-repo", repoInfo.repo);
+    }
+  }
+  let hybridEnabled = false;
+  let ollamaModel = process.env["HYBRID_LLM_MODEL"] || "";
+  if (!ollamaModel) {
+    try {
+      const oaConfig = JSON.parse(readFileSync28(join54(homedir13(), ".open-agents", "config.json"), "utf8"));
+      if (oaConfig.model)
+        ollamaModel = oaConfig.model;
     } catch {
-      log(`Weight file not found \u2014 server will download on first run`);
     }
-    extraArgs.push("--hf-repo", repoInfo.repo);
   }
-  log(`Starting PersonaPlex daemon (${tier} tier)...`);
-  const child = spawn19(venvPython2, [
+  if (!ollamaModel)
+    ollamaModel = "qwen3.5:4b";
+  try {
+    const ollamaCheck = execSync27("curl -s http://localhost:11434/api/tags", {
+      timeout: 3e3,
+      stdio: "pipe",
+      encoding: "utf8"
+    });
+    if (ollamaCheck.includes("models")) {
+      hybridEnabled = true;
+      log(`Hybrid mode: PersonaPlex voice + ${ollamaModel} reasoning`);
+    }
+  } catch {
+    log("Ollama not detected \u2014 running PersonaPlex standalone (no hybrid)");
+  }
+  log(`Starting PersonaPlex daemon (${tier} tier${hybridEnabled ? ", hybrid" : ""})...`);
+  const serverArgs = [
     "-m",
     "moshi.server",
     "--host",
@@ -41670,10 +41876,19 @@ async function startPersonaPlexDaemon(onInfo) {
     "--device",
     "cuda",
     ...extraArgs
-  ], {
+  ];
+  if (hybridEnabled)
+    serverArgs.push("--hybrid");
+  const serverEnv = { ...process.env };
+  if (hybridEnabled) {
+    serverEnv["HYBRID_ENABLED"] = "1";
+    serverEnv["HYBRID_LLM_MODEL"] = ollamaModel;
+    serverEnv["HYBRID_MODEL_FAST"] = "qwen3.5:4b";
+  }
+  const child = spawn19(venvPython2, serverArgs, {
     stdio: ["ignore", "pipe", "pipe"],
     detached: true,
-    env: { ...process.env },
+    env: serverEnv,
     cwd: PERSONAPLEX_DIR
   });
   if (child.pid) {
@@ -41990,7 +42205,7 @@ var init_personaplex = __esm({
     WEIGHT_REPOS = {
       original: { repo: "nvidia/personaplex-7b-v1", file: "model.safetensors", sizeGB: 15.6, needsToken: true },
       nf4: { repo: "cudabenchmarktest/personaplex-7b-nf4", file: "model-nf4.safetensors", sizeGB: 4.1, needsToken: false },
-      turbo2bit: { repo: "cudabenchmarktest/personaplex-7b-turbo2bit", file: "model-turbo2bit.safetensors", sizeGB: 2.1, needsToken: false }
+      "nf4-distilled": { repo: "cudabenchmarktest/personaplex-7b-nf4-distilled", file: "student_best.pt", sizeGB: 16.7, needsToken: false }
     };
     PERSONAPLEX_DIR = join54(homedir13(), ".open-agents", "voice", "personaplex");
     PID_FILE = join54(PERSONAPLEX_DIR, "daemon.pid");
@@ -43757,7 +43972,8 @@ function fitToWidth(text, width) {
   return text + " ".repeat(width - visible.length);
 }
 function showDropPanel(opts) {
-  const { title, instruction = "Drag and drop a file here, or type/paste a path", allowedExtensions = [], typeLabel, rl } = opts;
+  const { title, instruction = "Drag and drop a file here, or type/paste a path", allowedExtensions = [], typeLabel, rl, borderColor } = opts;
+  const bc = borderColor ?? dc.cyan;
   return new Promise((resolve_) => {
     const stdin = process.stdin;
     const hadRawMode = stdin.isRaw;
@@ -43809,16 +44025,16 @@ function showDropPanel(opts) {
       const bottomPad = Math.max(0, availableForPadding - topPad);
       const lines = [];
       const borderH = "\u2508".repeat(Math.max(2, cols - 4));
-      const emptyPipe = `  ${dc.cyan("\u250A")}${" ".repeat(innerSpace)}${dc.cyan("\u250A")}`;
-      lines.push(`  ${dc.cyan(borderH)}`);
+      const emptyPipe = `  ${bc("\u250A")}${" ".repeat(innerSpace)}${bc("\u250A")}`;
+      lines.push(`  ${bc(borderH)}`);
       for (let i = 0; i < topPad; i++)
         lines.push(emptyPipe);
       for (const line of content) {
-        lines.push(`  ${dc.cyan("\u250A")}${line}${dc.cyan("\u250A")}`);
+        lines.push(`  ${bc("\u250A")}${line}${bc("\u250A")}`);
       }
       for (let i = 0; i < bottomPad; i++)
         lines.push(emptyPipe);
-      lines.push(`  ${dc.cyan(borderH)}`);
+      lines.push(`  ${bc(borderH)}`);
       lines.push(`  ${dc.dim("Enter confirm  Esc cancel")}`);
       overlayWrite(lines.join("\n") + "\n");
     }
@@ -49644,18 +49860,22 @@ async function handleSlashCommand(input, ctx) {
           if (currentVoiceModel === "personaplex") {
             if (!cloneArg) {
               const dropResult = await showDropPanel({
-                title: "PersonaPlex Voice Clone \u2014 Drop Audio File",
-                instruction: "Drop a WAV file (4-10s clean speech) to clone into PersonaPlex",
+                title: "PersonaPlex Voice Clone",
+                instruction: "Drop an audio file (4-10s clean speech) to clone a voice",
                 allowedExtensions: [".wav", ".mp3", ".ogg", ".flac", ".m4a", ".opus", ".aac"],
                 typeLabel: "Audio files",
-                rl: ctx.rl
+                rl: ctx.rl,
+                borderColor: c2.green
               });
               if (dropResult.confirmed && dropResult.path) {
-                const voiceName2 = dropResult.path.replace(/.*[\\/]/, "").replace(/\.[^.]+$/, "").replace(/[^a-zA-Z0-9_-]/g, "_");
+                const defaultName = dropResult.path.replace(/.*[\\/]/, "").replace(/\.[^.]+$/, "").replace(/[^a-zA-Z0-9_-]/g, "_");
+                renderInfo(`File: ${dropResult.path}`);
+                renderInfo(`Voice name: ${defaultName} (derived from filename)`);
+                renderInfo("Cloning voice with preprocessing (denoise + normalize + multi-segment)...");
                 const { clonePersonaPlexVoice: clonePersonaPlexVoice3 } = await Promise.resolve().then(() => (init_personaplex(), personaplex_exports));
-                const result2 = await clonePersonaPlexVoice3(dropResult.path, voiceName2, (m) => renderInfo(m));
+                const result2 = await clonePersonaPlexVoice3(dropResult.path, defaultName, (m) => renderInfo(m));
                 if (result2)
-                  renderInfo(`Voice "${voiceName2}" ready \u2014 use /voice list to see all voices`);
+                  renderInfo(`Voice "${defaultName}" ready \u2014 use /voice list to see all voices`);
               } else {
                 renderInfo("Voice clone cancelled.");
               }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "open-agents-ai",
-  "version": "0.185.34",
+  "version": "0.185.36",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",