open-agents-ai 0.185.34 → 0.185.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +352 -132
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -26404,94 +26404,103 @@ If you're stuck, try a completely different approach. Do NOT repeat what failed
26404
26404
  this.emit({ type: "error", content: `Backend error: ${reqErr.message}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
26405
26405
  break;
26406
26406
  }
26407
- const recovered = await this.retryOnTransient(reqErr, chatRequest, turn);
26408
- if (!recovered) {
26409
- const errMsg = reqErr instanceof Error ? reqErr.message : String(reqErr);
26410
- const cause = reqErr instanceof Error && reqErr.cause ? ` (${reqErr.cause.message ?? ""} ${reqErr.cause?.code ?? ""})` : "";
26411
- this.emit({ type: "error", content: `Backend error: ${errMsg}${cause}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
26412
- if (/HTTP 404|not found|model.*not found/i.test(errMsg)) {
26413
- this.emit({ type: "error", content: `Model not available. Use /model to select a different model.`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
26407
+ if (this.handleMaxTokensError(reqErr, chatRequest)) {
26408
+ try {
26409
+ response = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
26410
+ } catch (retryErr) {
26411
+ this.emit({ type: "error", content: `Retry with reduced max_tokens also failed: ${retryErr instanceof Error ? retryErr.message : String(retryErr)}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
26414
26412
  break;
26415
26413
  }
26416
- let imageRecovered = false;
26417
- if (/invalid image|image.*invalid|image_url.*unsupported|does not support.*image|image.*not supported/i.test(errMsg)) {
26418
- imageRecovered = await this._recoverFromImageError(messages, chatRequest, turn);
26419
- if (imageRecovered) {
26420
- try {
26421
- const imgRetry = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
26422
- response = imgRetry;
26423
- } catch (imgRetryErr) {
26424
- const msg2 = imgRetryErr instanceof Error ? imgRetryErr.message : String(imgRetryErr);
26425
- this.emit({ type: "error", content: `Retry after image fallback also failed: ${msg2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
26426
- imageRecovered = false;
26427
- break;
26428
- }
26429
- } else {
26414
+ } else {
26415
+ const recovered = await this.retryOnTransient(reqErr, chatRequest, turn);
26416
+ if (!recovered) {
26417
+ const errMsg = reqErr instanceof Error ? reqErr.message : String(reqErr);
26418
+ const cause = reqErr instanceof Error && reqErr.cause ? ` (${reqErr.cause.message ?? ""} ${reqErr.cause?.code ?? ""})` : "";
26419
+ this.emit({ type: "error", content: `Backend error: ${errMsg}${cause}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
26420
+ if (/HTTP 404|not found|model.*not found/i.test(errMsg)) {
26421
+ this.emit({ type: "error", content: `Model not available. Use /model to select a different model.`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
26430
26422
  break;
26431
26423
  }
26432
- }
26433
- if (imageRecovered) {
26434
- } else if (/does not support tools|HTTP 400.*tools/i.test(errMsg)) {
26435
- this.emit({
26436
- type: "status",
26437
- content: `Model lacks native tool support \u2014 switching to prompt-injected tool mode`,
26438
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
26439
- });
26440
- const toolDescriptions = Array.from(this.tools.values()).map((t) => `- ${t.name}: ${t.description}`).join("\n");
26441
- const toolInjectMsg = [
26442
- "\n\n[TOOL MODE \u2014 PROMPT INJECTION]",
26443
- "This model does not have native tool-calling. To use tools, output a JSON block:",
26444
- "```json",
26445
- '{"tool": "tool_name", "args": {"param": "value"}}',
26446
- "```",
26447
- "\nAvailable tools:",
26448
- toolDescriptions,
26449
- "\nOutput EXACTLY ONE tool call per response in the JSON format above.",
26450
- "After seeing the tool result, continue or call another tool.",
26451
- 'When done, output: {"tool": "task_complete", "args": {"summary": "what you did"}}'
26452
- ].join("\n");
26453
- messages.push({ role: "system", content: toolInjectMsg });
26454
- chatRequest.tools = [];
26455
- try {
26456
- response = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
26457
- const content = response.choices?.[0]?.message?.content ?? "";
26458
- const jsonMatch = content.match(/```json\s*\n?([\s\S]*?)```/);
26459
- if (jsonMatch) {
26424
+ let imageRecovered = false;
26425
+ if (/invalid image|image.*invalid|image_url.*unsupported|does not support.*image|image.*not supported/i.test(errMsg)) {
26426
+ imageRecovered = await this._recoverFromImageError(messages, chatRequest, turn);
26427
+ if (imageRecovered) {
26460
26428
  try {
26461
- const parsed = JSON.parse(jsonMatch[1]);
26462
- if (parsed.tool && this.tools.has(parsed.tool)) {
26463
- const tool = this.tools.get(parsed.tool);
26464
- const result = await tool.execute(parsed.args ?? {});
26465
- messages.push({ role: "assistant", content });
26466
- messages.push({ role: "user", content: `Tool result (${parsed.tool}): ${result.output.slice(0, 2e3)}` });
26467
- if (parsed.tool === "task_complete") {
26468
- completed = true;
26469
- summary = String(parsed.args?.summary ?? content);
26429
+ const imgRetry = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
26430
+ response = imgRetry;
26431
+ } catch (imgRetryErr) {
26432
+ const msg2 = imgRetryErr instanceof Error ? imgRetryErr.message : String(imgRetryErr);
26433
+ this.emit({ type: "error", content: `Retry after image fallback also failed: ${msg2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
26434
+ imageRecovered = false;
26435
+ break;
26436
+ }
26437
+ } else {
26438
+ break;
26439
+ }
26440
+ }
26441
+ if (imageRecovered) {
26442
+ } else if (/does not support tools|HTTP 400.*tools/i.test(errMsg)) {
26443
+ this.emit({
26444
+ type: "status",
26445
+ content: `Model lacks native tool support \u2014 switching to prompt-injected tool mode`,
26446
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
26447
+ });
26448
+ const toolDescriptions = Array.from(this.tools.values()).map((t) => `- ${t.name}: ${t.description}`).join("\n");
26449
+ const toolInjectMsg = [
26450
+ "\n\n[TOOL MODE \u2014 PROMPT INJECTION]",
26451
+ "This model does not have native tool-calling. To use tools, output a JSON block:",
26452
+ "```json",
26453
+ '{"tool": "tool_name", "args": {"param": "value"}}',
26454
+ "```",
26455
+ "\nAvailable tools:",
26456
+ toolDescriptions,
26457
+ "\nOutput EXACTLY ONE tool call per response in the JSON format above.",
26458
+ "After seeing the tool result, continue or call another tool.",
26459
+ 'When done, output: {"tool": "task_complete", "args": {"summary": "what you did"}}'
26460
+ ].join("\n");
26461
+ messages.push({ role: "system", content: toolInjectMsg });
26462
+ chatRequest.tools = [];
26463
+ try {
26464
+ response = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
26465
+ const content = response.choices?.[0]?.message?.content ?? "";
26466
+ const jsonMatch = content.match(/```json\s*\n?([\s\S]*?)```/);
26467
+ if (jsonMatch) {
26468
+ try {
26469
+ const parsed = JSON.parse(jsonMatch[1]);
26470
+ if (parsed.tool && this.tools.has(parsed.tool)) {
26471
+ const tool = this.tools.get(parsed.tool);
26472
+ const result = await tool.execute(parsed.args ?? {});
26473
+ messages.push({ role: "assistant", content });
26474
+ messages.push({ role: "user", content: `Tool result (${parsed.tool}): ${result.output.slice(0, 2e3)}` });
26475
+ if (parsed.tool === "task_complete") {
26476
+ completed = true;
26477
+ summary = String(parsed.args?.summary ?? content);
26478
+ }
26479
+ toolCallCount++;
26480
+ continue;
26470
26481
  }
26471
- toolCallCount++;
26472
- continue;
26482
+ } catch {
26473
26483
  }
26474
- } catch {
26475
26484
  }
26485
+ messages.push({ role: "assistant", content });
26486
+ continue;
26487
+ } catch (retryErr2) {
26488
+ const msg2 = retryErr2 instanceof Error ? retryErr2.message : String(retryErr2);
26489
+ this.emit({ type: "error", content: `Prompt-injected tool mode also failed: ${msg2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
26490
+ break;
26476
26491
  }
26477
- messages.push({ role: "assistant", content });
26478
- continue;
26479
- } catch (retryErr2) {
26480
- const msg2 = retryErr2 instanceof Error ? retryErr2.message : String(retryErr2);
26481
- this.emit({ type: "error", content: `Prompt-injected tool mode also failed: ${msg2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
26492
+ }
26493
+ if (!imageRecovered) {
26494
+ this.emit({
26495
+ type: "error",
26496
+ content: `Backend unavailable \u2014 stopping task. Fix the issue and retry.`,
26497
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
26498
+ });
26482
26499
  break;
26483
26500
  }
26484
26501
  }
26485
- if (!imageRecovered) {
26486
- this.emit({
26487
- type: "error",
26488
- content: `Backend unavailable \u2014 stopping task. Fix the issue and retry.`,
26489
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
26490
- });
26491
- break;
26492
- }
26502
+ response = recovered ?? response;
26493
26503
  }
26494
- response = recovered ?? response;
26495
26504
  }
26496
26505
  totalTokens += response.usage?.totalTokens ?? 0;
26497
26506
  promptTokens += response.usage?.promptTokens ?? 0;
@@ -27056,15 +27065,24 @@ Integrate this guidance into your current approach. Continue working on the task
27056
27065
  this.emit({ type: "error", content: "Task aborted by user", timestamp: (/* @__PURE__ */ new Date()).toISOString() });
27057
27066
  break;
27058
27067
  }
27059
- const recovered = await this.retryOnTransient(reqErr, chatRequest, turn);
27060
- if (!recovered) {
27061
- const errMsg2 = reqErr instanceof Error ? reqErr.message : String(reqErr);
27062
- const cause2 = reqErr instanceof Error && reqErr.cause ? ` (${reqErr.cause.message ?? ""} ${reqErr.cause?.code ?? ""})` : "";
27063
- this.emit({ type: "error", content: `Backend error: ${errMsg2}${cause2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
27064
- this.emit({ type: "error", content: `Backend unavailable \u2014 stopping task.`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
27065
- break;
27068
+ if (this.handleMaxTokensError(reqErr, chatRequest)) {
27069
+ try {
27070
+ response = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
27071
+ } catch (retryErr) {
27072
+ this.emit({ type: "error", content: `Retry with reduced max_tokens also failed: ${retryErr instanceof Error ? retryErr.message : String(retryErr)}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
27073
+ break;
27074
+ }
27075
+ } else {
27076
+ const recovered = await this.retryOnTransient(reqErr, chatRequest, turn);
27077
+ if (!recovered) {
27078
+ const errMsg2 = reqErr instanceof Error ? reqErr.message : String(reqErr);
27079
+ const cause2 = reqErr instanceof Error && reqErr.cause ? ` (${reqErr.cause.message ?? ""} ${reqErr.cause?.code ?? ""})` : "";
27080
+ this.emit({ type: "error", content: `Backend error: ${errMsg2}${cause2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
27081
+ this.emit({ type: "error", content: `Backend unavailable \u2014 stopping task.`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
27082
+ break;
27083
+ }
27084
+ response = recovered;
27066
27085
  }
27067
- response = recovered;
27068
27086
  }
27069
27087
  totalTokens += response.usage?.totalTokens ?? 0;
27070
27088
  promptTokens += response.usage?.promptTokens ?? 0;
@@ -28443,6 +28461,28 @@ ${transcript}`
28443
28461
  // -------------------------------------------------------------------------
28444
28462
  // Transient error recovery — retry on 502, fetch failed, timeouts
28445
28463
  // -------------------------------------------------------------------------
28464
+ /**
28465
+ * Detect max_completion_tokens rejection (HTTP 400) and auto-reduce to the server's limit.
28466
+ * Returns true if maxTokens was reduced and the caller should retry.
28467
+ */
28468
+ handleMaxTokensError(err, chatRequest) {
28469
+ const msg = err instanceof Error ? err.message : String(err);
28470
+ const match = msg.match(/max_?(?:completion_?)?tokens\s+is\s+too\s+large.*?allows?\s+up\s+to\s+(\d+)/i);
28471
+ if (!match)
28472
+ return false;
28473
+ const serverLimit = parseInt(match[1], 10);
28474
+ if (isNaN(serverLimit) || serverLimit <= 0)
28475
+ return false;
28476
+ const prev = this.options.maxTokens;
28477
+ this.options.maxTokens = serverLimit;
28478
+ chatRequest.maxTokens = serverLimit;
28479
+ this.emit({
28480
+ type: "status",
28481
+ content: `Server max_tokens limit is ${serverLimit} (was ${prev}) \u2014 auto-adjusted`,
28482
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
28483
+ });
28484
+ return true;
28485
+ }
28446
28486
  /** Detect whether an error is transient (worth retrying) */
28447
28487
  isTransientError(err) {
28448
28488
  if (err instanceof Error && err.fatal)
@@ -41301,11 +41341,11 @@ function execAsync(cmd, opts = {}) {
41301
41341
  child.stderr?.on("data", (d) => {
41302
41342
  stderr += d.toString();
41303
41343
  });
41304
- child.on("close", (code) => {
41344
+ child.on("close", (code, signal) => {
41305
41345
  if (code === 0)
41306
41346
  resolve36(stdout.trim());
41307
41347
  else
41308
- reject(new Error(`Exit ${code}: ${stderr.slice(0, 500)}`));
41348
+ reject(new Error(`Exit ${code}${signal ? ` (signal: ${signal})` : ""}: ${stderr.slice(0, 500)}`));
41309
41349
  });
41310
41350
  child.on("error", reject);
41311
41351
  });
@@ -41314,8 +41354,8 @@ function selectWeightTier(vramGB) {
41314
41354
  if (vramGB >= 48)
41315
41355
  return "original";
41316
41356
  if (vramGB >= 16)
41317
- return "nf4";
41318
- return "turbo2bit";
41357
+ return "nf4-distilled";
41358
+ return "nf4";
41319
41359
  }
41320
41360
  function detectJetson() {
41321
41361
  try {
@@ -41499,9 +41539,21 @@ async function installPersonaPlex(onInfo, weightTier) {
41499
41539
  return false;
41500
41540
  }
41501
41541
  }
41502
- await execAsync(`"${pip}" install --quiet "${join54(repoDir, "moshi")}/."`, { timeout: 3e5 });
41542
+ if (isAarch64) {
41543
+ log("ARM64: Installing moshi (--no-deps to preserve JetPack torch)...");
41544
+ await execAsync(`"${pip}" install --quiet --no-deps "${join54(repoDir, "moshi")}/."`, { timeout: 3e5 });
41545
+ log("ARM64: Installing remaining moshi dependencies...");
41546
+ await execAsync(`"${pip}" install --quiet "numpy>=1.26,<2.2" "safetensors>=0.4.0,<0.5" "huggingface-hub>=0.24,<0.25" "einops==0.7" "sentencepiece==0.2" "sounddevice==0.5" "aiohttp>=3.10.5,<3.11"`, { timeout: 3e5 });
41547
+ } else {
41548
+ await execAsync(`"${pip}" install --quiet "${join54(repoDir, "moshi")}/."`, { timeout: 3e5 });
41549
+ }
41503
41550
  } catch (err) {
41504
41551
  log(`Moshi install failed: ${err instanceof Error ? err.message : String(err)}`);
41552
+ if (isAarch64) {
41553
+ log("ARM64: This often means the pip process was OOM-killed.");
41554
+ log("Check: dmesg | grep -i 'oom\\|killed' | tail -5");
41555
+ log("Ensure JetPack PyTorch is installed: pip3 show torch");
41556
+ }
41505
41557
  try {
41506
41558
  await execAsync(`"${pip}" install --quiet torch torchaudio websockets soundfile huggingface_hub`, { timeout: 3e5, stdio: "pipe" });
41507
41559
  } catch {
@@ -41526,6 +41578,104 @@ async function installPersonaPlex(onInfo, weightTier) {
41526
41578
  }
41527
41579
  } catch {
41528
41580
  }
41581
+ try {
41582
+ const sitePackages = execSync27(`"${python}" -c "import moshi, os; print(os.path.dirname(moshi.__file__))"`, {
41583
+ encoding: "utf8",
41584
+ timeout: 5e3,
41585
+ stdio: "pipe"
41586
+ }).trim();
41587
+ const loadersFile = join54(sitePackages, "models", "loaders.py");
41588
+ if (existsSync37(loadersFile)) {
41589
+ let src = readFileSync28(loadersFile, "utf8");
41590
+ if (!src.includes("_dequantize_2bit_state_dict")) {
41591
+ const dequantPatch = `
41592
+ import math
41593
+
41594
+ # NF2 centroids (Lloyd-Max optimal for Gaussian distribution)
41595
+ _NF2_CENTROIDS = torch.tensor([-1.5104, -0.4528, 0.4528, 1.5104])
41596
+
41597
+
41598
+ def _is_2bit_quantized(filename):
41599
+ return "turbo2bit" in str(filename).lower() or "2bit" in str(filename).lower()
41600
+
41601
+
41602
+ def _fast_wht(x):
41603
+ n = x.shape[-1]
41604
+ h = 1
41605
+ while h < n:
41606
+ x_view = x.view(*x.shape[:-1], -1, 2, h)
41607
+ a = x_view[..., 0, :].clone()
41608
+ b = x_view[..., 1, :].clone()
41609
+ x_view[..., 0, :] = a + b
41610
+ x_view[..., 1, :] = a - b
41611
+ x = x_view.reshape(*x.shape)
41612
+ h *= 2
41613
+ return x / math.sqrt(n)
41614
+
41615
+
41616
+ def _dequantize_2bit_state_dict(state_dict):
41617
+ result = {}
41618
+ processed = set()
41619
+ meta_suffixes = (".packed", ".scales", ".shape", ".numel", ".gs", ".np2")
41620
+ base_names = set()
41621
+ for key in state_dict:
41622
+ if key.endswith(".packed"):
41623
+ base_names.add(key[:-len(".packed")])
41624
+ for name in base_names:
41625
+ packed_key = f"{name}.packed"
41626
+ if packed_key in state_dict:
41627
+ gs = state_dict[f"{name}.gs"].item()
41628
+ gs_pow2 = state_dict[f"{name}.np2"].item()
41629
+ numel = state_dict[f"{name}.numel"].item()
41630
+ shape = [s for s in state_dict[f"{name}.shape"].tolist() if s > 0]
41631
+ scales = state_dict[f"{name}.scales"].float()
41632
+ packed = state_dict[packed_key]
41633
+ n_groups = scales.numel()
41634
+ p = packed.reshape(n_groups, gs // 4)
41635
+ codes = torch.zeros(n_groups, gs, dtype=torch.long)
41636
+ for i in range(4):
41637
+ codes[:, i::4] = (p >> (2 * i)) & 0x03
41638
+ dequant = _NF2_CENTROIDS[codes]
41639
+ if gs_pow2 > gs:
41640
+ dequant = torch.cat([dequant, torch.zeros(n_groups, gs_pow2 - gs)], dim=1)
41641
+ dequant = _fast_wht(dequant)
41642
+ dequant = dequant[:, :gs]
41643
+ dequant = dequant * scales.unsqueeze(1)
41644
+ result[name] = dequant.reshape(-1)[:numel].reshape(shape).to(torch.bfloat16)
41645
+ processed.add(name)
41646
+ for name, tensor in state_dict.items():
41647
+ if any(name.endswith(s) for s in meta_suffixes):
41648
+ continue
41649
+ if name not in processed:
41650
+ result[name] = tensor.to(torch.bfloat16)
41651
+ return result
41652
+ `;
41653
+ const insertPoint = src.indexOf("\nSAMPLE_RATE");
41654
+ if (insertPoint > 0) {
41655
+ src = src.slice(0, insertPoint) + dequantPatch + src.slice(insertPoint);
41656
+ }
41657
+ src = src.replace(/( +)# Load state_dict\n( +)if filename\.endswith\("\.safetensors"\):/, `$1is_2bit = _is_2bit_quantized(filename)
41658
+ $1# Load state_dict \u2014 2-bit must load to CPU for dequant
41659
+ $2load_device = "cpu" if is_2bit else dev.type
41660
+ $2if filename.endswith(".safetensors"):`);
41661
+ if (src.includes("device=dev.type)")) {
41662
+ src = src.replace("device=dev.type)", "device=load_device)");
41663
+ }
41664
+ const patchPoint = "# Patch 1: expand depformer";
41665
+ if (src.includes(patchPoint) && !src.includes("_dequantize_2bit_state_dict(state_dict)")) {
41666
+ src = src.replace(patchPoint, `# Dequantize 2-bit weights if needed
41667
+ if is_2bit:
41668
+ logger.info("Dequantizing 2-bit TurboQuant weights...")
41669
+ state_dict = _dequantize_2bit_state_dict(state_dict)
41670
+
41671
+ ${patchPoint}`);
41672
+ }
41673
+ writeFileSync16(loadersFile, src);
41674
+ log("Patched loaders.py with 2-bit TurboQuant native dequant support.");
41675
+ }
41676
+ }
41677
+ } catch {
41678
+ }
41529
41679
  if (isAarch64) {
41530
41680
  log("ARM64: Installing bitsandbytes for INT4 inference...");
41531
41681
  try {
@@ -41533,6 +41683,10 @@ async function installPersonaPlex(onInfo, weightTier) {
41533
41683
  } catch {
41534
41684
  }
41535
41685
  }
41686
+ try {
41687
+ await execAsync(`"${pip}" install --quiet accelerate`, { timeout: 12e4, stdio: "pipe" });
41688
+ } catch {
41689
+ }
41536
41690
  try {
41537
41691
  await execAsync(`"${pip}" install --quiet pyloudnorm noisereduce torchaudio`, { timeout: 12e4, stdio: "pipe" });
41538
41692
  } catch {
@@ -41616,49 +41770,101 @@ async function startPersonaPlexDaemon(onInfo) {
41616
41770
  const repoInfo = WEIGHT_REPOS[tier];
41617
41771
  const extraArgs = [];
41618
41772
  if (tier !== "original") {
41619
- log(`Weight tier: ${tier} (${repoInfo.sizeGB}GB) \u2014 dequantizing to bf16 cache...`);
41620
- const dequantScript = join54(PERSONAPLEX_DIR, "dequant-loader.py");
41621
41773
  const cachedBf16 = join54(PERSONAPLEX_DIR, "model-bf16-cache.safetensors");
41622
- if (!existsSync37(dequantScript)) {
41623
- const shipped = getShippedVoicesDir();
41624
- if (shipped) {
41625
- const src = join54(shipped, "dequant-loader.py");
41626
- if (existsSync37(src))
41627
- copyFileSync2(src, dequantScript);
41628
- }
41629
- }
41630
- try {
41631
- const weightPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', '${repoInfo.file}'${repoInfo.needsToken ? "" : ", token=False"}))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
41632
- if (existsSync37(dequantScript) && existsSync37(weightPath)) {
41633
- try {
41634
- execSync27(`"${venvPython2}" "${dequantScript}" --input "${weightPath}" --output "${cachedBf16}"`, { timeout: 3e5, stdio: "pipe" });
41774
+ if (tier === "nf4-distilled") {
41775
+ log(`Weight tier: ${tier} \u2014 distilled NF4 (90% token match, ${repoInfo.sizeGB}GB)...`);
41776
+ try {
41777
+ const weightPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', '${repoInfo.file}', token=False))"`, { encoding: "utf8", timeout: 6e4, stdio: "pipe" }).trim();
41778
+ if (existsSync37(weightPath)) {
41779
+ if (!existsSync37(cachedBf16)) {
41780
+ log("Converting .pt checkpoint to safetensors (one-time)...");
41781
+ execSync27(`"${venvPython2}" -c "
41782
+ import torch; from safetensors.torch import save_file
41783
+ state = torch.load('${weightPath}', map_location='cpu', weights_only=True)
41784
+ state = {k: v.to(torch.bfloat16) if v.is_floating_point() else v for k, v in state.items()}
41785
+ save_file(state, '${cachedBf16}')
41786
+ print('Converted')
41787
+ "`, { timeout: 18e4, stdio: "pipe" });
41788
+ }
41635
41789
  if (existsSync37(cachedBf16)) {
41636
41790
  extraArgs.push("--moshi-weight", cachedBf16);
41637
- log(`Using dequantized cache: ${(statSync13(cachedBf16).size / 1024 ** 3).toFixed(1)}GB`);
41791
+ log(`Using distilled weights: ${(statSync13(cachedBf16).size / 1024 ** 3).toFixed(1)}GB`);
41792
+ } else {
41793
+ extraArgs.push("--moshi-weight", weightPath);
41638
41794
  }
41639
- } catch (e) {
41640
- log(`Dequantization failed \u2014 server will try to load original weights`);
41641
41795
  }
41796
+ } catch (e) {
41797
+ log(`Failed to load distilled weights \u2014 falling back to standard NF4`);
41642
41798
  }
41643
- try {
41644
- const mimiPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', 'tokenizer-e351c8d8-checkpoint125.safetensors', token=False))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
41645
- if (existsSync37(mimiPath))
41646
- extraArgs.push("--mimi-weight", mimiPath);
41647
- } catch {
41799
+ } else {
41800
+ log(`Weight tier: ${tier} (${repoInfo.sizeGB}GB) \u2014 dequantizing to bf16 cache...`);
41801
+ const dequantScript = join54(PERSONAPLEX_DIR, "dequant-loader.py");
41802
+ if (!existsSync37(dequantScript)) {
41803
+ const shipped = getShippedVoicesDir();
41804
+ if (shipped) {
41805
+ const src = join54(shipped, "dequant-loader.py");
41806
+ if (existsSync37(src))
41807
+ copyFileSync2(src, dequantScript);
41808
+ }
41648
41809
  }
41649
41810
  try {
41650
- const tokPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', 'tokenizer_spm_32k_3.model', token=False))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
41651
- if (existsSync37(tokPath))
41652
- extraArgs.push("--tokenizer", tokPath);
41811
+ const weightPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', '${repoInfo.file}'${repoInfo.needsToken ? "" : ", token=False"}))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
41812
+ if (existsSync37(dequantScript) && existsSync37(weightPath)) {
41813
+ try {
41814
+ execSync27(`"${venvPython2}" "${dequantScript}" --input "${weightPath}" --output "${cachedBf16}"`, { timeout: 3e5, stdio: "pipe" });
41815
+ if (existsSync37(cachedBf16)) {
41816
+ extraArgs.push("--moshi-weight", cachedBf16);
41817
+ log(`Using dequantized cache: ${(statSync13(cachedBf16).size / 1024 ** 3).toFixed(1)}GB`);
41818
+ }
41819
+ } catch (e) {
41820
+ log(`Dequantization failed \u2014 server will try to load original weights`);
41821
+ }
41822
+ }
41823
+ try {
41824
+ const mimiPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', 'tokenizer-e351c8d8-checkpoint125.safetensors', token=False))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
41825
+ if (existsSync37(mimiPath))
41826
+ extraArgs.push("--mimi-weight", mimiPath);
41827
+ } catch {
41828
+ }
41829
+ try {
41830
+ const tokPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', 'tokenizer_spm_32k_3.model', token=False))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
41831
+ if (existsSync37(tokPath))
41832
+ extraArgs.push("--tokenizer", tokPath);
41833
+ } catch {
41834
+ }
41653
41835
  } catch {
41836
+ log(`Weight file not found \u2014 server will download on first run`);
41654
41837
  }
41838
+ extraArgs.push("--hf-repo", repoInfo.repo);
41839
+ }
41840
+ }
41841
+ let hybridEnabled = false;
41842
+ let ollamaModel = process.env["HYBRID_LLM_MODEL"] || "";
41843
+ if (!ollamaModel) {
41844
+ try {
41845
+ const oaConfig = JSON.parse(readFileSync28(join54(homedir13(), ".open-agents", "config.json"), "utf8"));
41846
+ if (oaConfig.model)
41847
+ ollamaModel = oaConfig.model;
41655
41848
  } catch {
41656
- log(`Weight file not found \u2014 server will download on first run`);
41657
41849
  }
41658
- extraArgs.push("--hf-repo", repoInfo.repo);
41659
41850
  }
41660
- log(`Starting PersonaPlex daemon (${tier} tier)...`);
41661
- const child = spawn19(venvPython2, [
41851
+ if (!ollamaModel)
41852
+ ollamaModel = "qwen3.5:4b";
41853
+ try {
41854
+ const ollamaCheck = execSync27("curl -s http://localhost:11434/api/tags", {
41855
+ timeout: 3e3,
41856
+ stdio: "pipe",
41857
+ encoding: "utf8"
41858
+ });
41859
+ if (ollamaCheck.includes("models")) {
41860
+ hybridEnabled = true;
41861
+ log(`Hybrid mode: PersonaPlex voice + ${ollamaModel} reasoning`);
41862
+ }
41863
+ } catch {
41864
+ log("Ollama not detected \u2014 running PersonaPlex standalone (no hybrid)");
41865
+ }
41866
+ log(`Starting PersonaPlex daemon (${tier} tier${hybridEnabled ? ", hybrid" : ""})...`);
41867
+ const serverArgs = [
41662
41868
  "-m",
41663
41869
  "moshi.server",
41664
41870
  "--host",
@@ -41670,10 +41876,19 @@ async function startPersonaPlexDaemon(onInfo) {
41670
41876
  "--device",
41671
41877
  "cuda",
41672
41878
  ...extraArgs
41673
- ], {
41879
+ ];
41880
+ if (hybridEnabled)
41881
+ serverArgs.push("--hybrid");
41882
+ const serverEnv = { ...process.env };
41883
+ if (hybridEnabled) {
41884
+ serverEnv["HYBRID_ENABLED"] = "1";
41885
+ serverEnv["HYBRID_LLM_MODEL"] = ollamaModel;
41886
+ serverEnv["HYBRID_MODEL_FAST"] = "qwen3.5:4b";
41887
+ }
41888
+ const child = spawn19(venvPython2, serverArgs, {
41674
41889
  stdio: ["ignore", "pipe", "pipe"],
41675
41890
  detached: true,
41676
- env: { ...process.env },
41891
+ env: serverEnv,
41677
41892
  cwd: PERSONAPLEX_DIR
41678
41893
  });
41679
41894
  if (child.pid) {
@@ -41990,7 +42205,7 @@ var init_personaplex = __esm({
41990
42205
  WEIGHT_REPOS = {
41991
42206
  original: { repo: "nvidia/personaplex-7b-v1", file: "model.safetensors", sizeGB: 15.6, needsToken: true },
41992
42207
  nf4: { repo: "cudabenchmarktest/personaplex-7b-nf4", file: "model-nf4.safetensors", sizeGB: 4.1, needsToken: false },
41993
- turbo2bit: { repo: "cudabenchmarktest/personaplex-7b-turbo2bit", file: "model-turbo2bit.safetensors", sizeGB: 2.1, needsToken: false }
42208
+ "nf4-distilled": { repo: "cudabenchmarktest/personaplex-7b-nf4-distilled", file: "student_best.pt", sizeGB: 16.7, needsToken: false }
41994
42209
  };
41995
42210
  PERSONAPLEX_DIR = join54(homedir13(), ".open-agents", "voice", "personaplex");
41996
42211
  PID_FILE = join54(PERSONAPLEX_DIR, "daemon.pid");
@@ -43757,7 +43972,8 @@ function fitToWidth(text, width) {
43757
43972
  return text + " ".repeat(width - visible.length);
43758
43973
  }
43759
43974
  function showDropPanel(opts) {
43760
- const { title, instruction = "Drag and drop a file here, or type/paste a path", allowedExtensions = [], typeLabel, rl } = opts;
43975
+ const { title, instruction = "Drag and drop a file here, or type/paste a path", allowedExtensions = [], typeLabel, rl, borderColor } = opts;
43976
+ const bc = borderColor ?? dc.cyan;
43761
43977
  return new Promise((resolve_) => {
43762
43978
  const stdin = process.stdin;
43763
43979
  const hadRawMode = stdin.isRaw;
@@ -43809,16 +44025,16 @@ function showDropPanel(opts) {
43809
44025
  const bottomPad = Math.max(0, availableForPadding - topPad);
43810
44026
  const lines = [];
43811
44027
  const borderH = "\u2508".repeat(Math.max(2, cols - 4));
43812
- const emptyPipe = ` ${dc.cyan("\u250A")}${" ".repeat(innerSpace)}${dc.cyan("\u250A")}`;
43813
- lines.push(` ${dc.cyan(borderH)}`);
44028
+ const emptyPipe = ` ${bc("\u250A")}${" ".repeat(innerSpace)}${bc("\u250A")}`;
44029
+ lines.push(` ${bc(borderH)}`);
43814
44030
  for (let i = 0; i < topPad; i++)
43815
44031
  lines.push(emptyPipe);
43816
44032
  for (const line of content) {
43817
- lines.push(` ${dc.cyan("\u250A")}${line}${dc.cyan("\u250A")}`);
44033
+ lines.push(` ${bc("\u250A")}${line}${bc("\u250A")}`);
43818
44034
  }
43819
44035
  for (let i = 0; i < bottomPad; i++)
43820
44036
  lines.push(emptyPipe);
43821
- lines.push(` ${dc.cyan(borderH)}`);
44037
+ lines.push(` ${bc(borderH)}`);
43822
44038
  lines.push(` ${dc.dim("Enter confirm Esc cancel")}`);
43823
44039
  overlayWrite(lines.join("\n") + "\n");
43824
44040
  }
@@ -49644,18 +49860,22 @@ async function handleSlashCommand(input, ctx) {
49644
49860
  if (currentVoiceModel === "personaplex") {
49645
49861
  if (!cloneArg) {
49646
49862
  const dropResult = await showDropPanel({
49647
- title: "PersonaPlex Voice Clone \u2014 Drop Audio File",
49648
- instruction: "Drop a WAV file (4-10s clean speech) to clone into PersonaPlex",
49863
+ title: "PersonaPlex Voice Clone",
49864
+ instruction: "Drop an audio file (4-10s clean speech) to clone a voice",
49649
49865
  allowedExtensions: [".wav", ".mp3", ".ogg", ".flac", ".m4a", ".opus", ".aac"],
49650
49866
  typeLabel: "Audio files",
49651
- rl: ctx.rl
49867
+ rl: ctx.rl,
49868
+ borderColor: c2.green
49652
49869
  });
49653
49870
  if (dropResult.confirmed && dropResult.path) {
49654
- const voiceName2 = dropResult.path.replace(/.*[\\/]/, "").replace(/\.[^.]+$/, "").replace(/[^a-zA-Z0-9_-]/g, "_");
49871
+ const defaultName = dropResult.path.replace(/.*[\\/]/, "").replace(/\.[^.]+$/, "").replace(/[^a-zA-Z0-9_-]/g, "_");
49872
+ renderInfo(`File: ${dropResult.path}`);
49873
+ renderInfo(`Voice name: ${defaultName} (derived from filename)`);
49874
+ renderInfo("Cloning voice with preprocessing (denoise + normalize + multi-segment)...");
49655
49875
  const { clonePersonaPlexVoice: clonePersonaPlexVoice3 } = await Promise.resolve().then(() => (init_personaplex(), personaplex_exports));
49656
- const result2 = await clonePersonaPlexVoice3(dropResult.path, voiceName2, (m) => renderInfo(m));
49876
+ const result2 = await clonePersonaPlexVoice3(dropResult.path, defaultName, (m) => renderInfo(m));
49657
49877
  if (result2)
49658
- renderInfo(`Voice "${voiceName2}" ready \u2014 use /voice list to see all voices`);
49878
+ renderInfo(`Voice "${defaultName}" ready \u2014 use /voice list to see all voices`);
49659
49879
  } else {
49660
49880
  renderInfo("Voice clone cancelled.");
49661
49881
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "open-agents-ai",
3
- "version": "0.185.34",
3
+ "version": "0.185.36",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",