open-agents-ai 0.185.35 → 0.185.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +355 -134
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -26404,94 +26404,103 @@ If you're stuck, try a completely different approach. Do NOT repeat what failed
26404
26404
  this.emit({ type: "error", content: `Backend error: ${reqErr.message}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
26405
26405
  break;
26406
26406
  }
26407
- const recovered = await this.retryOnTransient(reqErr, chatRequest, turn);
26408
- if (!recovered) {
26409
- const errMsg = reqErr instanceof Error ? reqErr.message : String(reqErr);
26410
- const cause = reqErr instanceof Error && reqErr.cause ? ` (${reqErr.cause.message ?? ""} ${reqErr.cause?.code ?? ""})` : "";
26411
- this.emit({ type: "error", content: `Backend error: ${errMsg}${cause}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
26412
- if (/HTTP 404|not found|model.*not found/i.test(errMsg)) {
26413
- this.emit({ type: "error", content: `Model not available. Use /model to select a different model.`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
26407
+ if (this.handleMaxTokensError(reqErr, chatRequest)) {
26408
+ try {
26409
+ response = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
26410
+ } catch (retryErr) {
26411
+ this.emit({ type: "error", content: `Retry with reduced max_tokens also failed: ${retryErr instanceof Error ? retryErr.message : String(retryErr)}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
26414
26412
  break;
26415
26413
  }
26416
- let imageRecovered = false;
26417
- if (/invalid image|image.*invalid|image_url.*unsupported|does not support.*image|image.*not supported/i.test(errMsg)) {
26418
- imageRecovered = await this._recoverFromImageError(messages, chatRequest, turn);
26419
- if (imageRecovered) {
26420
- try {
26421
- const imgRetry = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
26422
- response = imgRetry;
26423
- } catch (imgRetryErr) {
26424
- const msg2 = imgRetryErr instanceof Error ? imgRetryErr.message : String(imgRetryErr);
26425
- this.emit({ type: "error", content: `Retry after image fallback also failed: ${msg2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
26426
- imageRecovered = false;
26427
- break;
26428
- }
26429
- } else {
26414
+ } else {
26415
+ const recovered = await this.retryOnTransient(reqErr, chatRequest, turn);
26416
+ if (!recovered) {
26417
+ const errMsg = reqErr instanceof Error ? reqErr.message : String(reqErr);
26418
+ const cause = reqErr instanceof Error && reqErr.cause ? ` (${reqErr.cause.message ?? ""} ${reqErr.cause?.code ?? ""})` : "";
26419
+ this.emit({ type: "error", content: `Backend error: ${errMsg}${cause}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
26420
+ if (/HTTP 404|not found|model.*not found/i.test(errMsg)) {
26421
+ this.emit({ type: "error", content: `Model not available. Use /model to select a different model.`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
26430
26422
  break;
26431
26423
  }
26432
- }
26433
- if (imageRecovered) {
26434
- } else if (/does not support tools|HTTP 400.*tools/i.test(errMsg)) {
26435
- this.emit({
26436
- type: "status",
26437
- content: `Model lacks native tool support \u2014 switching to prompt-injected tool mode`,
26438
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
26439
- });
26440
- const toolDescriptions = Array.from(this.tools.values()).map((t) => `- ${t.name}: ${t.description}`).join("\n");
26441
- const toolInjectMsg = [
26442
- "\n\n[TOOL MODE \u2014 PROMPT INJECTION]",
26443
- "This model does not have native tool-calling. To use tools, output a JSON block:",
26444
- "```json",
26445
- '{"tool": "tool_name", "args": {"param": "value"}}',
26446
- "```",
26447
- "\nAvailable tools:",
26448
- toolDescriptions,
26449
- "\nOutput EXACTLY ONE tool call per response in the JSON format above.",
26450
- "After seeing the tool result, continue or call another tool.",
26451
- 'When done, output: {"tool": "task_complete", "args": {"summary": "what you did"}}'
26452
- ].join("\n");
26453
- messages.push({ role: "system", content: toolInjectMsg });
26454
- chatRequest.tools = [];
26455
- try {
26456
- response = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
26457
- const content = response.choices?.[0]?.message?.content ?? "";
26458
- const jsonMatch = content.match(/```json\s*\n?([\s\S]*?)```/);
26459
- if (jsonMatch) {
26424
+ let imageRecovered = false;
26425
+ if (/invalid image|image.*invalid|image_url.*unsupported|does not support.*image|image.*not supported/i.test(errMsg)) {
26426
+ imageRecovered = await this._recoverFromImageError(messages, chatRequest, turn);
26427
+ if (imageRecovered) {
26460
26428
  try {
26461
- const parsed = JSON.parse(jsonMatch[1]);
26462
- if (parsed.tool && this.tools.has(parsed.tool)) {
26463
- const tool = this.tools.get(parsed.tool);
26464
- const result = await tool.execute(parsed.args ?? {});
26465
- messages.push({ role: "assistant", content });
26466
- messages.push({ role: "user", content: `Tool result (${parsed.tool}): ${result.output.slice(0, 2e3)}` });
26467
- if (parsed.tool === "task_complete") {
26468
- completed = true;
26469
- summary = String(parsed.args?.summary ?? content);
26429
+ const imgRetry = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
26430
+ response = imgRetry;
26431
+ } catch (imgRetryErr) {
26432
+ const msg2 = imgRetryErr instanceof Error ? imgRetryErr.message : String(imgRetryErr);
26433
+ this.emit({ type: "error", content: `Retry after image fallback also failed: ${msg2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
26434
+ imageRecovered = false;
26435
+ break;
26436
+ }
26437
+ } else {
26438
+ break;
26439
+ }
26440
+ }
26441
+ if (imageRecovered) {
26442
+ } else if (/does not support tools|HTTP 400.*tools/i.test(errMsg)) {
26443
+ this.emit({
26444
+ type: "status",
26445
+ content: `Model lacks native tool support \u2014 switching to prompt-injected tool mode`,
26446
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
26447
+ });
26448
+ const toolDescriptions = Array.from(this.tools.values()).map((t) => `- ${t.name}: ${t.description}`).join("\n");
26449
+ const toolInjectMsg = [
26450
+ "\n\n[TOOL MODE \u2014 PROMPT INJECTION]",
26451
+ "This model does not have native tool-calling. To use tools, output a JSON block:",
26452
+ "```json",
26453
+ '{"tool": "tool_name", "args": {"param": "value"}}',
26454
+ "```",
26455
+ "\nAvailable tools:",
26456
+ toolDescriptions,
26457
+ "\nOutput EXACTLY ONE tool call per response in the JSON format above.",
26458
+ "After seeing the tool result, continue or call another tool.",
26459
+ 'When done, output: {"tool": "task_complete", "args": {"summary": "what you did"}}'
26460
+ ].join("\n");
26461
+ messages.push({ role: "system", content: toolInjectMsg });
26462
+ chatRequest.tools = [];
26463
+ try {
26464
+ response = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
26465
+ const content = response.choices?.[0]?.message?.content ?? "";
26466
+ const jsonMatch = content.match(/```json\s*\n?([\s\S]*?)```/);
26467
+ if (jsonMatch) {
26468
+ try {
26469
+ const parsed = JSON.parse(jsonMatch[1]);
26470
+ if (parsed.tool && this.tools.has(parsed.tool)) {
26471
+ const tool = this.tools.get(parsed.tool);
26472
+ const result = await tool.execute(parsed.args ?? {});
26473
+ messages.push({ role: "assistant", content });
26474
+ messages.push({ role: "user", content: `Tool result (${parsed.tool}): ${result.output.slice(0, 2e3)}` });
26475
+ if (parsed.tool === "task_complete") {
26476
+ completed = true;
26477
+ summary = String(parsed.args?.summary ?? content);
26478
+ }
26479
+ toolCallCount++;
26480
+ continue;
26470
26481
  }
26471
- toolCallCount++;
26472
- continue;
26482
+ } catch {
26473
26483
  }
26474
- } catch {
26475
26484
  }
26485
+ messages.push({ role: "assistant", content });
26486
+ continue;
26487
+ } catch (retryErr2) {
26488
+ const msg2 = retryErr2 instanceof Error ? retryErr2.message : String(retryErr2);
26489
+ this.emit({ type: "error", content: `Prompt-injected tool mode also failed: ${msg2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
26490
+ break;
26476
26491
  }
26477
- messages.push({ role: "assistant", content });
26478
- continue;
26479
- } catch (retryErr2) {
26480
- const msg2 = retryErr2 instanceof Error ? retryErr2.message : String(retryErr2);
26481
- this.emit({ type: "error", content: `Prompt-injected tool mode also failed: ${msg2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
26492
+ }
26493
+ if (!imageRecovered) {
26494
+ this.emit({
26495
+ type: "error",
26496
+ content: `Backend unavailable \u2014 stopping task. Fix the issue and retry.`,
26497
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
26498
+ });
26482
26499
  break;
26483
26500
  }
26484
26501
  }
26485
- if (!imageRecovered) {
26486
- this.emit({
26487
- type: "error",
26488
- content: `Backend unavailable \u2014 stopping task. Fix the issue and retry.`,
26489
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
26490
- });
26491
- break;
26492
- }
26502
+ response = recovered ?? response;
26493
26503
  }
26494
- response = recovered ?? response;
26495
26504
  }
26496
26505
  totalTokens += response.usage?.totalTokens ?? 0;
26497
26506
  promptTokens += response.usage?.promptTokens ?? 0;
@@ -27056,15 +27065,24 @@ Integrate this guidance into your current approach. Continue working on the task
27056
27065
  this.emit({ type: "error", content: "Task aborted by user", timestamp: (/* @__PURE__ */ new Date()).toISOString() });
27057
27066
  break;
27058
27067
  }
27059
- const recovered = await this.retryOnTransient(reqErr, chatRequest, turn);
27060
- if (!recovered) {
27061
- const errMsg2 = reqErr instanceof Error ? reqErr.message : String(reqErr);
27062
- const cause2 = reqErr instanceof Error && reqErr.cause ? ` (${reqErr.cause.message ?? ""} ${reqErr.cause?.code ?? ""})` : "";
27063
- this.emit({ type: "error", content: `Backend error: ${errMsg2}${cause2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
27064
- this.emit({ type: "error", content: `Backend unavailable \u2014 stopping task.`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
27065
- break;
27068
+ if (this.handleMaxTokensError(reqErr, chatRequest)) {
27069
+ try {
27070
+ response = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
27071
+ } catch (retryErr) {
27072
+ this.emit({ type: "error", content: `Retry with reduced max_tokens also failed: ${retryErr instanceof Error ? retryErr.message : String(retryErr)}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
27073
+ break;
27074
+ }
27075
+ } else {
27076
+ const recovered = await this.retryOnTransient(reqErr, chatRequest, turn);
27077
+ if (!recovered) {
27078
+ const errMsg2 = reqErr instanceof Error ? reqErr.message : String(reqErr);
27079
+ const cause2 = reqErr instanceof Error && reqErr.cause ? ` (${reqErr.cause.message ?? ""} ${reqErr.cause?.code ?? ""})` : "";
27080
+ this.emit({ type: "error", content: `Backend error: ${errMsg2}${cause2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
27081
+ this.emit({ type: "error", content: `Backend unavailable \u2014 stopping task.`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
27082
+ break;
27083
+ }
27084
+ response = recovered;
27066
27085
  }
27067
- response = recovered;
27068
27086
  }
27069
27087
  totalTokens += response.usage?.totalTokens ?? 0;
27070
27088
  promptTokens += response.usage?.promptTokens ?? 0;
@@ -28443,6 +28461,28 @@ ${transcript}`
28443
28461
  // -------------------------------------------------------------------------
28444
28462
  // Transient error recovery — retry on 502, fetch failed, timeouts
28445
28463
  // -------------------------------------------------------------------------
28464
+ /**
28465
+ * Detect max_completion_tokens rejection (HTTP 400) and auto-reduce to the server's limit.
28466
+ * Returns true if maxTokens was reduced and the caller should retry.
28467
+ */
28468
+ handleMaxTokensError(err, chatRequest) {
28469
+ const msg = err instanceof Error ? err.message : String(err);
28470
+ const match = msg.match(/max_?(?:completion_?)?tokens\s+is\s+too\s+large.*?allows?\s+up\s+to\s+(\d+)/i);
28471
+ if (!match)
28472
+ return false;
28473
+ const serverLimit = parseInt(match[1], 10);
28474
+ if (isNaN(serverLimit) || serverLimit <= 0)
28475
+ return false;
28476
+ const prev = this.options.maxTokens;
28477
+ this.options.maxTokens = serverLimit;
28478
+ chatRequest.maxTokens = serverLimit;
28479
+ this.emit({
28480
+ type: "status",
28481
+ content: `Server max_tokens limit is ${serverLimit} (was ${prev}) \u2014 auto-adjusted`,
28482
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
28483
+ });
28484
+ return true;
28485
+ }
28446
28486
  /** Detect whether an error is transient (worth retrying) */
28447
28487
  isTransientError(err) {
28448
28488
  if (err instanceof Error && err.fatal)
@@ -36362,14 +36402,15 @@ var init_voice_session = __esm({
36362
36402
  }
36363
36403
  // ── HTTP handler ──────────────────────────────────────────────────────
36364
36404
  handleHTTP(req, res) {
36365
- if (req.url === "/" || req.url === "/index.html") {
36405
+ const pathname = (req.url ?? "/").split("?")[0];
36406
+ if (pathname === "/" || pathname === "/index.html") {
36366
36407
  res.writeHead(200, { "Content-Type": "text/html; charset=utf-8" });
36367
36408
  if (this.personaPlexWsUrl) {
36368
36409
  res.end(generatePersonaPlexHTML(this.personaPlexWsUrl, this.personaPlexTextPrompt, this.personaPlexVoicePrompt));
36369
36410
  } else {
36370
36411
  res.end(generateFrontendHTML());
36371
36412
  }
36372
- } else if (req.url === "/health") {
36413
+ } else if (pathname === "/health") {
36373
36414
  res.writeHead(200, { "Content-Type": "application/json" });
36374
36415
  res.end(JSON.stringify({
36375
36416
  active: this.state.active,
@@ -41301,11 +41342,11 @@ function execAsync(cmd, opts = {}) {
41301
41342
  child.stderr?.on("data", (d) => {
41302
41343
  stderr += d.toString();
41303
41344
  });
41304
- child.on("close", (code) => {
41345
+ child.on("close", (code, signal) => {
41305
41346
  if (code === 0)
41306
41347
  resolve36(stdout.trim());
41307
41348
  else
41308
- reject(new Error(`Exit ${code}: ${stderr.slice(0, 500)}`));
41349
+ reject(new Error(`Exit ${code}${signal ? ` (signal: ${signal})` : ""}: ${stderr.slice(0, 500)}`));
41309
41350
  });
41310
41351
  child.on("error", reject);
41311
41352
  });
@@ -41314,8 +41355,8 @@ function selectWeightTier(vramGB) {
41314
41355
  if (vramGB >= 48)
41315
41356
  return "original";
41316
41357
  if (vramGB >= 16)
41317
- return "nf4";
41318
- return "turbo2bit";
41358
+ return "nf4-distilled";
41359
+ return "nf4";
41319
41360
  }
41320
41361
  function detectJetson() {
41321
41362
  try {
@@ -41499,9 +41540,21 @@ async function installPersonaPlex(onInfo, weightTier) {
41499
41540
  return false;
41500
41541
  }
41501
41542
  }
41502
- await execAsync(`"${pip}" install --quiet "${join54(repoDir, "moshi")}/."`, { timeout: 3e5 });
41543
+ if (isAarch64) {
41544
+ log("ARM64: Installing moshi (--no-deps to preserve JetPack torch)...");
41545
+ await execAsync(`"${pip}" install --quiet --no-deps "${join54(repoDir, "moshi")}/."`, { timeout: 3e5 });
41546
+ log("ARM64: Installing remaining moshi dependencies...");
41547
+ await execAsync(`"${pip}" install --quiet "numpy>=1.26,<2.2" "safetensors>=0.4.0,<0.5" "huggingface-hub>=0.24,<0.25" "einops==0.7" "sentencepiece==0.2" "sounddevice==0.5" "aiohttp>=3.10.5,<3.11"`, { timeout: 3e5 });
41548
+ } else {
41549
+ await execAsync(`"${pip}" install --quiet "${join54(repoDir, "moshi")}/."`, { timeout: 3e5 });
41550
+ }
41503
41551
  } catch (err) {
41504
41552
  log(`Moshi install failed: ${err instanceof Error ? err.message : String(err)}`);
41553
+ if (isAarch64) {
41554
+ log("ARM64: This often means the pip process was OOM-killed.");
41555
+ log("Check: dmesg | grep -i 'oom\\|killed' | tail -5");
41556
+ log("Ensure JetPack PyTorch is installed: pip3 show torch");
41557
+ }
41505
41558
  try {
41506
41559
  await execAsync(`"${pip}" install --quiet torch torchaudio websockets soundfile huggingface_hub`, { timeout: 3e5, stdio: "pipe" });
41507
41560
  } catch {
@@ -41526,6 +41579,104 @@ async function installPersonaPlex(onInfo, weightTier) {
41526
41579
  }
41527
41580
  } catch {
41528
41581
  }
41582
+ try {
41583
+ const sitePackages = execSync27(`"${python}" -c "import moshi, os; print(os.path.dirname(moshi.__file__))"`, {
41584
+ encoding: "utf8",
41585
+ timeout: 5e3,
41586
+ stdio: "pipe"
41587
+ }).trim();
41588
+ const loadersFile = join54(sitePackages, "models", "loaders.py");
41589
+ if (existsSync37(loadersFile)) {
41590
+ let src = readFileSync28(loadersFile, "utf8");
41591
+ if (!src.includes("_dequantize_2bit_state_dict")) {
41592
+ const dequantPatch = `
41593
+ import math
41594
+
41595
+ # NF2 centroids (Lloyd-Max optimal for Gaussian distribution)
41596
+ _NF2_CENTROIDS = torch.tensor([-1.5104, -0.4528, 0.4528, 1.5104])
41597
+
41598
+
41599
+ def _is_2bit_quantized(filename):
41600
+ return "turbo2bit" in str(filename).lower() or "2bit" in str(filename).lower()
41601
+
41602
+
41603
+ def _fast_wht(x):
41604
+ n = x.shape[-1]
41605
+ h = 1
41606
+ while h < n:
41607
+ x_view = x.view(*x.shape[:-1], -1, 2, h)
41608
+ a = x_view[..., 0, :].clone()
41609
+ b = x_view[..., 1, :].clone()
41610
+ x_view[..., 0, :] = a + b
41611
+ x_view[..., 1, :] = a - b
41612
+ x = x_view.reshape(*x.shape)
41613
+ h *= 2
41614
+ return x / math.sqrt(n)
41615
+
41616
+
41617
+ def _dequantize_2bit_state_dict(state_dict):
41618
+ result = {}
41619
+ processed = set()
41620
+ meta_suffixes = (".packed", ".scales", ".shape", ".numel", ".gs", ".np2")
41621
+ base_names = set()
41622
+ for key in state_dict:
41623
+ if key.endswith(".packed"):
41624
+ base_names.add(key[:-len(".packed")])
41625
+ for name in base_names:
41626
+ packed_key = f"{name}.packed"
41627
+ if packed_key in state_dict:
41628
+ gs = state_dict[f"{name}.gs"].item()
41629
+ gs_pow2 = state_dict[f"{name}.np2"].item()
41630
+ numel = state_dict[f"{name}.numel"].item()
41631
+ shape = [s for s in state_dict[f"{name}.shape"].tolist() if s > 0]
41632
+ scales = state_dict[f"{name}.scales"].float()
41633
+ packed = state_dict[packed_key]
41634
+ n_groups = scales.numel()
41635
+ p = packed.reshape(n_groups, gs // 4)
41636
+ codes = torch.zeros(n_groups, gs, dtype=torch.long)
41637
+ for i in range(4):
41638
+ codes[:, i::4] = (p >> (2 * i)) & 0x03
41639
+ dequant = _NF2_CENTROIDS[codes]
41640
+ if gs_pow2 > gs:
41641
+ dequant = torch.cat([dequant, torch.zeros(n_groups, gs_pow2 - gs)], dim=1)
41642
+ dequant = _fast_wht(dequant)
41643
+ dequant = dequant[:, :gs]
41644
+ dequant = dequant * scales.unsqueeze(1)
41645
+ result[name] = dequant.reshape(-1)[:numel].reshape(shape).to(torch.bfloat16)
41646
+ processed.add(name)
41647
+ for name, tensor in state_dict.items():
41648
+ if any(name.endswith(s) for s in meta_suffixes):
41649
+ continue
41650
+ if name not in processed:
41651
+ result[name] = tensor.to(torch.bfloat16)
41652
+ return result
41653
+ `;
41654
+ const insertPoint = src.indexOf("\nSAMPLE_RATE");
41655
+ if (insertPoint > 0) {
41656
+ src = src.slice(0, insertPoint) + dequantPatch + src.slice(insertPoint);
41657
+ }
41658
+ src = src.replace(/( +)# Load state_dict\n( +)if filename\.endswith\("\.safetensors"\):/, `$1is_2bit = _is_2bit_quantized(filename)
41659
+ $1# Load state_dict \u2014 2-bit must load to CPU for dequant
41660
+ $2load_device = "cpu" if is_2bit else dev.type
41661
+ $2if filename.endswith(".safetensors"):`);
41662
+ if (src.includes("device=dev.type)")) {
41663
+ src = src.replace("device=dev.type)", "device=load_device)");
41664
+ }
41665
+ const patchPoint = "# Patch 1: expand depformer";
41666
+ if (src.includes(patchPoint) && !src.includes("_dequantize_2bit_state_dict(state_dict)")) {
41667
+ src = src.replace(patchPoint, `# Dequantize 2-bit weights if needed
41668
+ if is_2bit:
41669
+ logger.info("Dequantizing 2-bit TurboQuant weights...")
41670
+ state_dict = _dequantize_2bit_state_dict(state_dict)
41671
+
41672
+ ${patchPoint}`);
41673
+ }
41674
+ writeFileSync16(loadersFile, src);
41675
+ log("Patched loaders.py with 2-bit TurboQuant native dequant support.");
41676
+ }
41677
+ }
41678
+ } catch {
41679
+ }
41529
41680
  if (isAarch64) {
41530
41681
  log("ARM64: Installing bitsandbytes for INT4 inference...");
41531
41682
  try {
@@ -41533,6 +41684,10 @@ async function installPersonaPlex(onInfo, weightTier) {
41533
41684
  } catch {
41534
41685
  }
41535
41686
  }
41687
+ try {
41688
+ await execAsync(`"${pip}" install --quiet accelerate`, { timeout: 12e4, stdio: "pipe" });
41689
+ } catch {
41690
+ }
41536
41691
  try {
41537
41692
  await execAsync(`"${pip}" install --quiet pyloudnorm noisereduce torchaudio`, { timeout: 12e4, stdio: "pipe" });
41538
41693
  } catch {
@@ -41616,49 +41771,101 @@ async function startPersonaPlexDaemon(onInfo) {
41616
41771
  const repoInfo = WEIGHT_REPOS[tier];
41617
41772
  const extraArgs = [];
41618
41773
  if (tier !== "original") {
41619
- log(`Weight tier: ${tier} (${repoInfo.sizeGB}GB) \u2014 dequantizing to bf16 cache...`);
41620
- const dequantScript = join54(PERSONAPLEX_DIR, "dequant-loader.py");
41621
41774
  const cachedBf16 = join54(PERSONAPLEX_DIR, "model-bf16-cache.safetensors");
41622
- if (!existsSync37(dequantScript)) {
41623
- const shipped = getShippedVoicesDir();
41624
- if (shipped) {
41625
- const src = join54(shipped, "dequant-loader.py");
41626
- if (existsSync37(src))
41627
- copyFileSync2(src, dequantScript);
41628
- }
41629
- }
41630
- try {
41631
- const weightPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', '${repoInfo.file}'${repoInfo.needsToken ? "" : ", token=False"}))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
41632
- if (existsSync37(dequantScript) && existsSync37(weightPath)) {
41633
- try {
41634
- execSync27(`"${venvPython2}" "${dequantScript}" --input "${weightPath}" --output "${cachedBf16}"`, { timeout: 3e5, stdio: "pipe" });
41775
+ if (tier === "nf4-distilled") {
41776
+ log(`Weight tier: ${tier} \u2014 distilled NF4 (90% token match, ${repoInfo.sizeGB}GB)...`);
41777
+ try {
41778
+ const weightPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', '${repoInfo.file}', token=False))"`, { encoding: "utf8", timeout: 6e4, stdio: "pipe" }).trim();
41779
+ if (existsSync37(weightPath)) {
41780
+ if (!existsSync37(cachedBf16)) {
41781
+ log("Converting .pt checkpoint to safetensors (one-time)...");
41782
+ execSync27(`"${venvPython2}" -c "
41783
+ import torch; from safetensors.torch import save_file
41784
+ state = torch.load('${weightPath}', map_location='cpu', weights_only=True)
41785
+ state = {k: v.to(torch.bfloat16) if v.is_floating_point() else v for k, v in state.items()}
41786
+ save_file(state, '${cachedBf16}')
41787
+ print('Converted')
41788
+ "`, { timeout: 18e4, stdio: "pipe" });
41789
+ }
41635
41790
  if (existsSync37(cachedBf16)) {
41636
41791
  extraArgs.push("--moshi-weight", cachedBf16);
41637
- log(`Using dequantized cache: ${(statSync13(cachedBf16).size / 1024 ** 3).toFixed(1)}GB`);
41792
+ log(`Using distilled weights: ${(statSync13(cachedBf16).size / 1024 ** 3).toFixed(1)}GB`);
41793
+ } else {
41794
+ extraArgs.push("--moshi-weight", weightPath);
41638
41795
  }
41639
- } catch (e) {
41640
- log(`Dequantization failed \u2014 server will try to load original weights`);
41641
41796
  }
41797
+ } catch (e) {
41798
+ log(`Failed to load distilled weights \u2014 falling back to standard NF4`);
41642
41799
  }
41643
- try {
41644
- const mimiPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', 'tokenizer-e351c8d8-checkpoint125.safetensors', token=False))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
41645
- if (existsSync37(mimiPath))
41646
- extraArgs.push("--mimi-weight", mimiPath);
41647
- } catch {
41800
+ } else {
41801
+ log(`Weight tier: ${tier} (${repoInfo.sizeGB}GB) \u2014 dequantizing to bf16 cache...`);
41802
+ const dequantScript = join54(PERSONAPLEX_DIR, "dequant-loader.py");
41803
+ if (!existsSync37(dequantScript)) {
41804
+ const shipped = getShippedVoicesDir();
41805
+ if (shipped) {
41806
+ const src = join54(shipped, "dequant-loader.py");
41807
+ if (existsSync37(src))
41808
+ copyFileSync2(src, dequantScript);
41809
+ }
41648
41810
  }
41649
41811
  try {
41650
- const tokPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', 'tokenizer_spm_32k_3.model', token=False))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
41651
- if (existsSync37(tokPath))
41652
- extraArgs.push("--tokenizer", tokPath);
41812
+ const weightPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', '${repoInfo.file}'${repoInfo.needsToken ? "" : ", token=False"}))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
41813
+ if (existsSync37(dequantScript) && existsSync37(weightPath)) {
41814
+ try {
41815
+ execSync27(`"${venvPython2}" "${dequantScript}" --input "${weightPath}" --output "${cachedBf16}"`, { timeout: 3e5, stdio: "pipe" });
41816
+ if (existsSync37(cachedBf16)) {
41817
+ extraArgs.push("--moshi-weight", cachedBf16);
41818
+ log(`Using dequantized cache: ${(statSync13(cachedBf16).size / 1024 ** 3).toFixed(1)}GB`);
41819
+ }
41820
+ } catch (e) {
41821
+ log(`Dequantization failed \u2014 server will try to load original weights`);
41822
+ }
41823
+ }
41824
+ try {
41825
+ const mimiPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', 'tokenizer-e351c8d8-checkpoint125.safetensors', token=False))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
41826
+ if (existsSync37(mimiPath))
41827
+ extraArgs.push("--mimi-weight", mimiPath);
41828
+ } catch {
41829
+ }
41830
+ try {
41831
+ const tokPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', 'tokenizer_spm_32k_3.model', token=False))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
41832
+ if (existsSync37(tokPath))
41833
+ extraArgs.push("--tokenizer", tokPath);
41834
+ } catch {
41835
+ }
41653
41836
  } catch {
41837
+ log(`Weight file not found \u2014 server will download on first run`);
41654
41838
  }
41839
+ extraArgs.push("--hf-repo", repoInfo.repo);
41840
+ }
41841
+ }
41842
+ let hybridEnabled = false;
41843
+ let ollamaModel = process.env["HYBRID_LLM_MODEL"] || "";
41844
+ if (!ollamaModel) {
41845
+ try {
41846
+ const oaConfig = JSON.parse(readFileSync28(join54(homedir13(), ".open-agents", "config.json"), "utf8"));
41847
+ if (oaConfig.model)
41848
+ ollamaModel = oaConfig.model;
41655
41849
  } catch {
41656
- log(`Weight file not found \u2014 server will download on first run`);
41657
41850
  }
41658
- extraArgs.push("--hf-repo", repoInfo.repo);
41659
41851
  }
41660
- log(`Starting PersonaPlex daemon (${tier} tier)...`);
41661
- const child = spawn19(venvPython2, [
41852
+ if (!ollamaModel)
41853
+ ollamaModel = "qwen3.5:4b";
41854
+ try {
41855
+ const ollamaCheck = execSync27("curl -s http://localhost:11434/api/tags", {
41856
+ timeout: 3e3,
41857
+ stdio: "pipe",
41858
+ encoding: "utf8"
41859
+ });
41860
+ if (ollamaCheck.includes("models")) {
41861
+ hybridEnabled = true;
41862
+ log(`Hybrid mode: PersonaPlex voice + ${ollamaModel} reasoning`);
41863
+ }
41864
+ } catch {
41865
+ log("Ollama not detected \u2014 running PersonaPlex standalone (no hybrid)");
41866
+ }
41867
+ log(`Starting PersonaPlex daemon (${tier} tier${hybridEnabled ? ", hybrid" : ""})...`);
41868
+ const serverArgs = [
41662
41869
  "-m",
41663
41870
  "moshi.server",
41664
41871
  "--host",
@@ -41670,10 +41877,19 @@ async function startPersonaPlexDaemon(onInfo) {
41670
41877
  "--device",
41671
41878
  "cuda",
41672
41879
  ...extraArgs
41673
- ], {
41880
+ ];
41881
+ if (hybridEnabled)
41882
+ serverArgs.push("--hybrid");
41883
+ const serverEnv = { ...process.env };
41884
+ if (hybridEnabled) {
41885
+ serverEnv["HYBRID_ENABLED"] = "1";
41886
+ serverEnv["HYBRID_LLM_MODEL"] = ollamaModel;
41887
+ serverEnv["HYBRID_MODEL_FAST"] = "qwen3.5:4b";
41888
+ }
41889
+ const child = spawn19(venvPython2, serverArgs, {
41674
41890
  stdio: ["ignore", "pipe", "pipe"],
41675
41891
  detached: true,
41676
- env: { ...process.env },
41892
+ env: serverEnv,
41677
41893
  cwd: PERSONAPLEX_DIR
41678
41894
  });
41679
41895
  if (child.pid) {
@@ -41990,7 +42206,7 @@ var init_personaplex = __esm({
41990
42206
  WEIGHT_REPOS = {
41991
42207
  original: { repo: "nvidia/personaplex-7b-v1", file: "model.safetensors", sizeGB: 15.6, needsToken: true },
41992
42208
  nf4: { repo: "cudabenchmarktest/personaplex-7b-nf4", file: "model-nf4.safetensors", sizeGB: 4.1, needsToken: false },
41993
- turbo2bit: { repo: "cudabenchmarktest/personaplex-7b-turbo2bit", file: "model-turbo2bit.safetensors", sizeGB: 2.1, needsToken: false }
42209
+ "nf4-distilled": { repo: "cudabenchmarktest/personaplex-7b-nf4-distilled", file: "student_best.pt", sizeGB: 16.7, needsToken: false }
41994
42210
  };
41995
42211
  PERSONAPLEX_DIR = join54(homedir13(), ".open-agents", "voice", "personaplex");
41996
42212
  PID_FILE = join54(PERSONAPLEX_DIR, "daemon.pid");
@@ -43757,7 +43973,8 @@ function fitToWidth(text, width) {
43757
43973
  return text + " ".repeat(width - visible.length);
43758
43974
  }
43759
43975
  function showDropPanel(opts) {
43760
- const { title, instruction = "Drag and drop a file here, or type/paste a path", allowedExtensions = [], typeLabel, rl } = opts;
43976
+ const { title, instruction = "Drag and drop a file here, or type/paste a path", allowedExtensions = [], typeLabel, rl, borderColor } = opts;
43977
+ const bc = borderColor ?? dc.cyan;
43761
43978
  return new Promise((resolve_) => {
43762
43979
  const stdin = process.stdin;
43763
43980
  const hadRawMode = stdin.isRaw;
@@ -43809,16 +44026,16 @@ function showDropPanel(opts) {
43809
44026
  const bottomPad = Math.max(0, availableForPadding - topPad);
43810
44027
  const lines = [];
43811
44028
  const borderH = "\u2508".repeat(Math.max(2, cols - 4));
43812
- const emptyPipe = ` ${dc.cyan("\u250A")}${" ".repeat(innerSpace)}${dc.cyan("\u250A")}`;
43813
- lines.push(` ${dc.cyan(borderH)}`);
44029
+ const emptyPipe = ` ${bc("\u250A")}${" ".repeat(innerSpace)}${bc("\u250A")}`;
44030
+ lines.push(` ${bc(borderH)}`);
43814
44031
  for (let i = 0; i < topPad; i++)
43815
44032
  lines.push(emptyPipe);
43816
44033
  for (const line of content) {
43817
- lines.push(` ${dc.cyan("\u250A")}${line}${dc.cyan("\u250A")}`);
44034
+ lines.push(` ${bc("\u250A")}${line}${bc("\u250A")}`);
43818
44035
  }
43819
44036
  for (let i = 0; i < bottomPad; i++)
43820
44037
  lines.push(emptyPipe);
43821
- lines.push(` ${dc.cyan(borderH)}`);
44038
+ lines.push(` ${bc(borderH)}`);
43822
44039
  lines.push(` ${dc.dim("Enter confirm Esc cancel")}`);
43823
44040
  overlayWrite(lines.join("\n") + "\n");
43824
44041
  }
@@ -49644,18 +49861,22 @@ async function handleSlashCommand(input, ctx) {
49644
49861
  if (currentVoiceModel === "personaplex") {
49645
49862
  if (!cloneArg) {
49646
49863
  const dropResult = await showDropPanel({
49647
- title: "PersonaPlex Voice Clone \u2014 Drop Audio File",
49648
- instruction: "Drop a WAV file (4-10s clean speech) to clone into PersonaPlex",
49864
+ title: "PersonaPlex Voice Clone",
49865
+ instruction: "Drop an audio file (4-10s clean speech) to clone a voice",
49649
49866
  allowedExtensions: [".wav", ".mp3", ".ogg", ".flac", ".m4a", ".opus", ".aac"],
49650
49867
  typeLabel: "Audio files",
49651
- rl: ctx.rl
49868
+ rl: ctx.rl,
49869
+ borderColor: c2.green
49652
49870
  });
49653
49871
  if (dropResult.confirmed && dropResult.path) {
49654
- const voiceName2 = dropResult.path.replace(/.*[\\/]/, "").replace(/\.[^.]+$/, "").replace(/[^a-zA-Z0-9_-]/g, "_");
49872
+ const defaultName = dropResult.path.replace(/.*[\\/]/, "").replace(/\.[^.]+$/, "").replace(/[^a-zA-Z0-9_-]/g, "_");
49873
+ renderInfo(`File: ${dropResult.path}`);
49874
+ renderInfo(`Voice name: ${defaultName} (derived from filename)`);
49875
+ renderInfo("Cloning voice with preprocessing (denoise + normalize + multi-segment)...");
49655
49876
  const { clonePersonaPlexVoice: clonePersonaPlexVoice3 } = await Promise.resolve().then(() => (init_personaplex(), personaplex_exports));
49656
- const result2 = await clonePersonaPlexVoice3(dropResult.path, voiceName2, (m) => renderInfo(m));
49877
+ const result2 = await clonePersonaPlexVoice3(dropResult.path, defaultName, (m) => renderInfo(m));
49657
49878
  if (result2)
49658
- renderInfo(`Voice "${voiceName2}" ready \u2014 use /voice list to see all voices`);
49879
+ renderInfo(`Voice "${defaultName}" ready \u2014 use /voice list to see all voices`);
49659
49880
  } else {
49660
49881
  renderInfo("Voice clone cancelled.");
49661
49882
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "open-agents-ai",
3
- "version": "0.185.35",
3
+ "version": "0.185.37",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",