open-agents-ai 0.185.35 → 0.185.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +352 -132
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -26404,94 +26404,103 @@ If you're stuck, try a completely different approach. Do NOT repeat what failed
|
|
|
26404
26404
|
this.emit({ type: "error", content: `Backend error: ${reqErr.message}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
26405
26405
|
break;
|
|
26406
26406
|
}
|
|
26407
|
-
|
|
26408
|
-
|
|
26409
|
-
|
|
26410
|
-
|
|
26411
|
-
|
|
26412
|
-
if (/HTTP 404|not found|model.*not found/i.test(errMsg)) {
|
|
26413
|
-
this.emit({ type: "error", content: `Model not available. Use /model to select a different model.`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
26407
|
+
if (this.handleMaxTokensError(reqErr, chatRequest)) {
|
|
26408
|
+
try {
|
|
26409
|
+
response = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
|
|
26410
|
+
} catch (retryErr) {
|
|
26411
|
+
this.emit({ type: "error", content: `Retry with reduced max_tokens also failed: ${retryErr instanceof Error ? retryErr.message : String(retryErr)}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
26414
26412
|
break;
|
|
26415
26413
|
}
|
|
26416
|
-
|
|
26417
|
-
|
|
26418
|
-
|
|
26419
|
-
|
|
26420
|
-
|
|
26421
|
-
|
|
26422
|
-
|
|
26423
|
-
|
|
26424
|
-
const msg2 = imgRetryErr instanceof Error ? imgRetryErr.message : String(imgRetryErr);
|
|
26425
|
-
this.emit({ type: "error", content: `Retry after image fallback also failed: ${msg2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
26426
|
-
imageRecovered = false;
|
|
26427
|
-
break;
|
|
26428
|
-
}
|
|
26429
|
-
} else {
|
|
26414
|
+
} else {
|
|
26415
|
+
const recovered = await this.retryOnTransient(reqErr, chatRequest, turn);
|
|
26416
|
+
if (!recovered) {
|
|
26417
|
+
const errMsg = reqErr instanceof Error ? reqErr.message : String(reqErr);
|
|
26418
|
+
const cause = reqErr instanceof Error && reqErr.cause ? ` (${reqErr.cause.message ?? ""} ${reqErr.cause?.code ?? ""})` : "";
|
|
26419
|
+
this.emit({ type: "error", content: `Backend error: ${errMsg}${cause}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
26420
|
+
if (/HTTP 404|not found|model.*not found/i.test(errMsg)) {
|
|
26421
|
+
this.emit({ type: "error", content: `Model not available. Use /model to select a different model.`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
26430
26422
|
break;
|
|
26431
26423
|
}
|
|
26432
|
-
|
|
26433
|
-
|
|
26434
|
-
|
|
26435
|
-
|
|
26436
|
-
type: "status",
|
|
26437
|
-
content: `Model lacks native tool support \u2014 switching to prompt-injected tool mode`,
|
|
26438
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
26439
|
-
});
|
|
26440
|
-
const toolDescriptions = Array.from(this.tools.values()).map((t) => `- ${t.name}: ${t.description}`).join("\n");
|
|
26441
|
-
const toolInjectMsg = [
|
|
26442
|
-
"\n\n[TOOL MODE \u2014 PROMPT INJECTION]",
|
|
26443
|
-
"This model does not have native tool-calling. To use tools, output a JSON block:",
|
|
26444
|
-
"```json",
|
|
26445
|
-
'{"tool": "tool_name", "args": {"param": "value"}}',
|
|
26446
|
-
"```",
|
|
26447
|
-
"\nAvailable tools:",
|
|
26448
|
-
toolDescriptions,
|
|
26449
|
-
"\nOutput EXACTLY ONE tool call per response in the JSON format above.",
|
|
26450
|
-
"After seeing the tool result, continue or call another tool.",
|
|
26451
|
-
'When done, output: {"tool": "task_complete", "args": {"summary": "what you did"}}'
|
|
26452
|
-
].join("\n");
|
|
26453
|
-
messages.push({ role: "system", content: toolInjectMsg });
|
|
26454
|
-
chatRequest.tools = [];
|
|
26455
|
-
try {
|
|
26456
|
-
response = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
|
|
26457
|
-
const content = response.choices?.[0]?.message?.content ?? "";
|
|
26458
|
-
const jsonMatch = content.match(/```json\s*\n?([\s\S]*?)```/);
|
|
26459
|
-
if (jsonMatch) {
|
|
26424
|
+
let imageRecovered = false;
|
|
26425
|
+
if (/invalid image|image.*invalid|image_url.*unsupported|does not support.*image|image.*not supported/i.test(errMsg)) {
|
|
26426
|
+
imageRecovered = await this._recoverFromImageError(messages, chatRequest, turn);
|
|
26427
|
+
if (imageRecovered) {
|
|
26460
26428
|
try {
|
|
26461
|
-
const
|
|
26462
|
-
|
|
26463
|
-
|
|
26464
|
-
|
|
26465
|
-
|
|
26466
|
-
|
|
26467
|
-
|
|
26468
|
-
|
|
26469
|
-
|
|
26429
|
+
const imgRetry = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
|
|
26430
|
+
response = imgRetry;
|
|
26431
|
+
} catch (imgRetryErr) {
|
|
26432
|
+
const msg2 = imgRetryErr instanceof Error ? imgRetryErr.message : String(imgRetryErr);
|
|
26433
|
+
this.emit({ type: "error", content: `Retry after image fallback also failed: ${msg2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
26434
|
+
imageRecovered = false;
|
|
26435
|
+
break;
|
|
26436
|
+
}
|
|
26437
|
+
} else {
|
|
26438
|
+
break;
|
|
26439
|
+
}
|
|
26440
|
+
}
|
|
26441
|
+
if (imageRecovered) {
|
|
26442
|
+
} else if (/does not support tools|HTTP 400.*tools/i.test(errMsg)) {
|
|
26443
|
+
this.emit({
|
|
26444
|
+
type: "status",
|
|
26445
|
+
content: `Model lacks native tool support \u2014 switching to prompt-injected tool mode`,
|
|
26446
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
26447
|
+
});
|
|
26448
|
+
const toolDescriptions = Array.from(this.tools.values()).map((t) => `- ${t.name}: ${t.description}`).join("\n");
|
|
26449
|
+
const toolInjectMsg = [
|
|
26450
|
+
"\n\n[TOOL MODE \u2014 PROMPT INJECTION]",
|
|
26451
|
+
"This model does not have native tool-calling. To use tools, output a JSON block:",
|
|
26452
|
+
"```json",
|
|
26453
|
+
'{"tool": "tool_name", "args": {"param": "value"}}',
|
|
26454
|
+
"```",
|
|
26455
|
+
"\nAvailable tools:",
|
|
26456
|
+
toolDescriptions,
|
|
26457
|
+
"\nOutput EXACTLY ONE tool call per response in the JSON format above.",
|
|
26458
|
+
"After seeing the tool result, continue or call another tool.",
|
|
26459
|
+
'When done, output: {"tool": "task_complete", "args": {"summary": "what you did"}}'
|
|
26460
|
+
].join("\n");
|
|
26461
|
+
messages.push({ role: "system", content: toolInjectMsg });
|
|
26462
|
+
chatRequest.tools = [];
|
|
26463
|
+
try {
|
|
26464
|
+
response = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
|
|
26465
|
+
const content = response.choices?.[0]?.message?.content ?? "";
|
|
26466
|
+
const jsonMatch = content.match(/```json\s*\n?([\s\S]*?)```/);
|
|
26467
|
+
if (jsonMatch) {
|
|
26468
|
+
try {
|
|
26469
|
+
const parsed = JSON.parse(jsonMatch[1]);
|
|
26470
|
+
if (parsed.tool && this.tools.has(parsed.tool)) {
|
|
26471
|
+
const tool = this.tools.get(parsed.tool);
|
|
26472
|
+
const result = await tool.execute(parsed.args ?? {});
|
|
26473
|
+
messages.push({ role: "assistant", content });
|
|
26474
|
+
messages.push({ role: "user", content: `Tool result (${parsed.tool}): ${result.output.slice(0, 2e3)}` });
|
|
26475
|
+
if (parsed.tool === "task_complete") {
|
|
26476
|
+
completed = true;
|
|
26477
|
+
summary = String(parsed.args?.summary ?? content);
|
|
26478
|
+
}
|
|
26479
|
+
toolCallCount++;
|
|
26480
|
+
continue;
|
|
26470
26481
|
}
|
|
26471
|
-
|
|
26472
|
-
continue;
|
|
26482
|
+
} catch {
|
|
26473
26483
|
}
|
|
26474
|
-
} catch {
|
|
26475
26484
|
}
|
|
26485
|
+
messages.push({ role: "assistant", content });
|
|
26486
|
+
continue;
|
|
26487
|
+
} catch (retryErr2) {
|
|
26488
|
+
const msg2 = retryErr2 instanceof Error ? retryErr2.message : String(retryErr2);
|
|
26489
|
+
this.emit({ type: "error", content: `Prompt-injected tool mode also failed: ${msg2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
26490
|
+
break;
|
|
26476
26491
|
}
|
|
26477
|
-
|
|
26478
|
-
|
|
26479
|
-
|
|
26480
|
-
|
|
26481
|
-
|
|
26492
|
+
}
|
|
26493
|
+
if (!imageRecovered) {
|
|
26494
|
+
this.emit({
|
|
26495
|
+
type: "error",
|
|
26496
|
+
content: `Backend unavailable \u2014 stopping task. Fix the issue and retry.`,
|
|
26497
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
26498
|
+
});
|
|
26482
26499
|
break;
|
|
26483
26500
|
}
|
|
26484
26501
|
}
|
|
26485
|
-
|
|
26486
|
-
this.emit({
|
|
26487
|
-
type: "error",
|
|
26488
|
-
content: `Backend unavailable \u2014 stopping task. Fix the issue and retry.`,
|
|
26489
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
26490
|
-
});
|
|
26491
|
-
break;
|
|
26492
|
-
}
|
|
26502
|
+
response = recovered ?? response;
|
|
26493
26503
|
}
|
|
26494
|
-
response = recovered ?? response;
|
|
26495
26504
|
}
|
|
26496
26505
|
totalTokens += response.usage?.totalTokens ?? 0;
|
|
26497
26506
|
promptTokens += response.usage?.promptTokens ?? 0;
|
|
@@ -27056,15 +27065,24 @@ Integrate this guidance into your current approach. Continue working on the task
|
|
|
27056
27065
|
this.emit({ type: "error", content: "Task aborted by user", timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
27057
27066
|
break;
|
|
27058
27067
|
}
|
|
27059
|
-
|
|
27060
|
-
|
|
27061
|
-
|
|
27062
|
-
|
|
27063
|
-
|
|
27064
|
-
|
|
27065
|
-
|
|
27068
|
+
if (this.handleMaxTokensError(reqErr, chatRequest)) {
|
|
27069
|
+
try {
|
|
27070
|
+
response = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
|
|
27071
|
+
} catch (retryErr) {
|
|
27072
|
+
this.emit({ type: "error", content: `Retry with reduced max_tokens also failed: ${retryErr instanceof Error ? retryErr.message : String(retryErr)}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
27073
|
+
break;
|
|
27074
|
+
}
|
|
27075
|
+
} else {
|
|
27076
|
+
const recovered = await this.retryOnTransient(reqErr, chatRequest, turn);
|
|
27077
|
+
if (!recovered) {
|
|
27078
|
+
const errMsg2 = reqErr instanceof Error ? reqErr.message : String(reqErr);
|
|
27079
|
+
const cause2 = reqErr instanceof Error && reqErr.cause ? ` (${reqErr.cause.message ?? ""} ${reqErr.cause?.code ?? ""})` : "";
|
|
27080
|
+
this.emit({ type: "error", content: `Backend error: ${errMsg2}${cause2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
27081
|
+
this.emit({ type: "error", content: `Backend unavailable \u2014 stopping task.`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
27082
|
+
break;
|
|
27083
|
+
}
|
|
27084
|
+
response = recovered;
|
|
27066
27085
|
}
|
|
27067
|
-
response = recovered;
|
|
27068
27086
|
}
|
|
27069
27087
|
totalTokens += response.usage?.totalTokens ?? 0;
|
|
27070
27088
|
promptTokens += response.usage?.promptTokens ?? 0;
|
|
@@ -28443,6 +28461,28 @@ ${transcript}`
|
|
|
28443
28461
|
// -------------------------------------------------------------------------
|
|
28444
28462
|
// Transient error recovery — retry on 502, fetch failed, timeouts
|
|
28445
28463
|
// -------------------------------------------------------------------------
|
|
28464
|
+
/**
|
|
28465
|
+
* Detect max_completion_tokens rejection (HTTP 400) and auto-reduce to the server's limit.
|
|
28466
|
+
* Returns true if maxTokens was reduced and the caller should retry.
|
|
28467
|
+
*/
|
|
28468
|
+
handleMaxTokensError(err, chatRequest) {
|
|
28469
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
28470
|
+
const match = msg.match(/max_?(?:completion_?)?tokens\s+is\s+too\s+large.*?allows?\s+up\s+to\s+(\d+)/i);
|
|
28471
|
+
if (!match)
|
|
28472
|
+
return false;
|
|
28473
|
+
const serverLimit = parseInt(match[1], 10);
|
|
28474
|
+
if (isNaN(serverLimit) || serverLimit <= 0)
|
|
28475
|
+
return false;
|
|
28476
|
+
const prev = this.options.maxTokens;
|
|
28477
|
+
this.options.maxTokens = serverLimit;
|
|
28478
|
+
chatRequest.maxTokens = serverLimit;
|
|
28479
|
+
this.emit({
|
|
28480
|
+
type: "status",
|
|
28481
|
+
content: `Server max_tokens limit is ${serverLimit} (was ${prev}) \u2014 auto-adjusted`,
|
|
28482
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
28483
|
+
});
|
|
28484
|
+
return true;
|
|
28485
|
+
}
|
|
28446
28486
|
/** Detect whether an error is transient (worth retrying) */
|
|
28447
28487
|
isTransientError(err) {
|
|
28448
28488
|
if (err instanceof Error && err.fatal)
|
|
@@ -41301,11 +41341,11 @@ function execAsync(cmd, opts = {}) {
|
|
|
41301
41341
|
child.stderr?.on("data", (d) => {
|
|
41302
41342
|
stderr += d.toString();
|
|
41303
41343
|
});
|
|
41304
|
-
child.on("close", (code) => {
|
|
41344
|
+
child.on("close", (code, signal) => {
|
|
41305
41345
|
if (code === 0)
|
|
41306
41346
|
resolve36(stdout.trim());
|
|
41307
41347
|
else
|
|
41308
|
-
reject(new Error(`Exit ${code}: ${stderr.slice(0, 500)}`));
|
|
41348
|
+
reject(new Error(`Exit ${code}${signal ? ` (signal: ${signal})` : ""}: ${stderr.slice(0, 500)}`));
|
|
41309
41349
|
});
|
|
41310
41350
|
child.on("error", reject);
|
|
41311
41351
|
});
|
|
@@ -41314,8 +41354,8 @@ function selectWeightTier(vramGB) {
|
|
|
41314
41354
|
if (vramGB >= 48)
|
|
41315
41355
|
return "original";
|
|
41316
41356
|
if (vramGB >= 16)
|
|
41317
|
-
return "nf4";
|
|
41318
|
-
return "
|
|
41357
|
+
return "nf4-distilled";
|
|
41358
|
+
return "nf4";
|
|
41319
41359
|
}
|
|
41320
41360
|
function detectJetson() {
|
|
41321
41361
|
try {
|
|
@@ -41499,9 +41539,21 @@ async function installPersonaPlex(onInfo, weightTier) {
|
|
|
41499
41539
|
return false;
|
|
41500
41540
|
}
|
|
41501
41541
|
}
|
|
41502
|
-
|
|
41542
|
+
if (isAarch64) {
|
|
41543
|
+
log("ARM64: Installing moshi (--no-deps to preserve JetPack torch)...");
|
|
41544
|
+
await execAsync(`"${pip}" install --quiet --no-deps "${join54(repoDir, "moshi")}/."`, { timeout: 3e5 });
|
|
41545
|
+
log("ARM64: Installing remaining moshi dependencies...");
|
|
41546
|
+
await execAsync(`"${pip}" install --quiet "numpy>=1.26,<2.2" "safetensors>=0.4.0,<0.5" "huggingface-hub>=0.24,<0.25" "einops==0.7" "sentencepiece==0.2" "sounddevice==0.5" "aiohttp>=3.10.5,<3.11"`, { timeout: 3e5 });
|
|
41547
|
+
} else {
|
|
41548
|
+
await execAsync(`"${pip}" install --quiet "${join54(repoDir, "moshi")}/."`, { timeout: 3e5 });
|
|
41549
|
+
}
|
|
41503
41550
|
} catch (err) {
|
|
41504
41551
|
log(`Moshi install failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
41552
|
+
if (isAarch64) {
|
|
41553
|
+
log("ARM64: This often means the pip process was OOM-killed.");
|
|
41554
|
+
log("Check: dmesg | grep -i 'oom\\|killed' | tail -5");
|
|
41555
|
+
log("Ensure JetPack PyTorch is installed: pip3 show torch");
|
|
41556
|
+
}
|
|
41505
41557
|
try {
|
|
41506
41558
|
await execAsync(`"${pip}" install --quiet torch torchaudio websockets soundfile huggingface_hub`, { timeout: 3e5, stdio: "pipe" });
|
|
41507
41559
|
} catch {
|
|
@@ -41526,6 +41578,104 @@ async function installPersonaPlex(onInfo, weightTier) {
|
|
|
41526
41578
|
}
|
|
41527
41579
|
} catch {
|
|
41528
41580
|
}
|
|
41581
|
+
try {
|
|
41582
|
+
const sitePackages = execSync27(`"${python}" -c "import moshi, os; print(os.path.dirname(moshi.__file__))"`, {
|
|
41583
|
+
encoding: "utf8",
|
|
41584
|
+
timeout: 5e3,
|
|
41585
|
+
stdio: "pipe"
|
|
41586
|
+
}).trim();
|
|
41587
|
+
const loadersFile = join54(sitePackages, "models", "loaders.py");
|
|
41588
|
+
if (existsSync37(loadersFile)) {
|
|
41589
|
+
let src = readFileSync28(loadersFile, "utf8");
|
|
41590
|
+
if (!src.includes("_dequantize_2bit_state_dict")) {
|
|
41591
|
+
const dequantPatch = `
|
|
41592
|
+
import math
|
|
41593
|
+
|
|
41594
|
+
# NF2 centroids (Lloyd-Max optimal for Gaussian distribution)
|
|
41595
|
+
_NF2_CENTROIDS = torch.tensor([-1.5104, -0.4528, 0.4528, 1.5104])
|
|
41596
|
+
|
|
41597
|
+
|
|
41598
|
+
def _is_2bit_quantized(filename):
|
|
41599
|
+
return "turbo2bit" in str(filename).lower() or "2bit" in str(filename).lower()
|
|
41600
|
+
|
|
41601
|
+
|
|
41602
|
+
def _fast_wht(x):
|
|
41603
|
+
n = x.shape[-1]
|
|
41604
|
+
h = 1
|
|
41605
|
+
while h < n:
|
|
41606
|
+
x_view = x.view(*x.shape[:-1], -1, 2, h)
|
|
41607
|
+
a = x_view[..., 0, :].clone()
|
|
41608
|
+
b = x_view[..., 1, :].clone()
|
|
41609
|
+
x_view[..., 0, :] = a + b
|
|
41610
|
+
x_view[..., 1, :] = a - b
|
|
41611
|
+
x = x_view.reshape(*x.shape)
|
|
41612
|
+
h *= 2
|
|
41613
|
+
return x / math.sqrt(n)
|
|
41614
|
+
|
|
41615
|
+
|
|
41616
|
+
def _dequantize_2bit_state_dict(state_dict):
|
|
41617
|
+
result = {}
|
|
41618
|
+
processed = set()
|
|
41619
|
+
meta_suffixes = (".packed", ".scales", ".shape", ".numel", ".gs", ".np2")
|
|
41620
|
+
base_names = set()
|
|
41621
|
+
for key in state_dict:
|
|
41622
|
+
if key.endswith(".packed"):
|
|
41623
|
+
base_names.add(key[:-len(".packed")])
|
|
41624
|
+
for name in base_names:
|
|
41625
|
+
packed_key = f"{name}.packed"
|
|
41626
|
+
if packed_key in state_dict:
|
|
41627
|
+
gs = state_dict[f"{name}.gs"].item()
|
|
41628
|
+
gs_pow2 = state_dict[f"{name}.np2"].item()
|
|
41629
|
+
numel = state_dict[f"{name}.numel"].item()
|
|
41630
|
+
shape = [s for s in state_dict[f"{name}.shape"].tolist() if s > 0]
|
|
41631
|
+
scales = state_dict[f"{name}.scales"].float()
|
|
41632
|
+
packed = state_dict[packed_key]
|
|
41633
|
+
n_groups = scales.numel()
|
|
41634
|
+
p = packed.reshape(n_groups, gs // 4)
|
|
41635
|
+
codes = torch.zeros(n_groups, gs, dtype=torch.long)
|
|
41636
|
+
for i in range(4):
|
|
41637
|
+
codes[:, i::4] = (p >> (2 * i)) & 0x03
|
|
41638
|
+
dequant = _NF2_CENTROIDS[codes]
|
|
41639
|
+
if gs_pow2 > gs:
|
|
41640
|
+
dequant = torch.cat([dequant, torch.zeros(n_groups, gs_pow2 - gs)], dim=1)
|
|
41641
|
+
dequant = _fast_wht(dequant)
|
|
41642
|
+
dequant = dequant[:, :gs]
|
|
41643
|
+
dequant = dequant * scales.unsqueeze(1)
|
|
41644
|
+
result[name] = dequant.reshape(-1)[:numel].reshape(shape).to(torch.bfloat16)
|
|
41645
|
+
processed.add(name)
|
|
41646
|
+
for name, tensor in state_dict.items():
|
|
41647
|
+
if any(name.endswith(s) for s in meta_suffixes):
|
|
41648
|
+
continue
|
|
41649
|
+
if name not in processed:
|
|
41650
|
+
result[name] = tensor.to(torch.bfloat16)
|
|
41651
|
+
return result
|
|
41652
|
+
`;
|
|
41653
|
+
const insertPoint = src.indexOf("\nSAMPLE_RATE");
|
|
41654
|
+
if (insertPoint > 0) {
|
|
41655
|
+
src = src.slice(0, insertPoint) + dequantPatch + src.slice(insertPoint);
|
|
41656
|
+
}
|
|
41657
|
+
src = src.replace(/( +)# Load state_dict\n( +)if filename\.endswith\("\.safetensors"\):/, `$1is_2bit = _is_2bit_quantized(filename)
|
|
41658
|
+
$1# Load state_dict \u2014 2-bit must load to CPU for dequant
|
|
41659
|
+
$2load_device = "cpu" if is_2bit else dev.type
|
|
41660
|
+
$2if filename.endswith(".safetensors"):`);
|
|
41661
|
+
if (src.includes("device=dev.type)")) {
|
|
41662
|
+
src = src.replace("device=dev.type)", "device=load_device)");
|
|
41663
|
+
}
|
|
41664
|
+
const patchPoint = "# Patch 1: expand depformer";
|
|
41665
|
+
if (src.includes(patchPoint) && !src.includes("_dequantize_2bit_state_dict(state_dict)")) {
|
|
41666
|
+
src = src.replace(patchPoint, `# Dequantize 2-bit weights if needed
|
|
41667
|
+
if is_2bit:
|
|
41668
|
+
logger.info("Dequantizing 2-bit TurboQuant weights...")
|
|
41669
|
+
state_dict = _dequantize_2bit_state_dict(state_dict)
|
|
41670
|
+
|
|
41671
|
+
${patchPoint}`);
|
|
41672
|
+
}
|
|
41673
|
+
writeFileSync16(loadersFile, src);
|
|
41674
|
+
log("Patched loaders.py with 2-bit TurboQuant native dequant support.");
|
|
41675
|
+
}
|
|
41676
|
+
}
|
|
41677
|
+
} catch {
|
|
41678
|
+
}
|
|
41529
41679
|
if (isAarch64) {
|
|
41530
41680
|
log("ARM64: Installing bitsandbytes for INT4 inference...");
|
|
41531
41681
|
try {
|
|
@@ -41533,6 +41683,10 @@ async function installPersonaPlex(onInfo, weightTier) {
|
|
|
41533
41683
|
} catch {
|
|
41534
41684
|
}
|
|
41535
41685
|
}
|
|
41686
|
+
try {
|
|
41687
|
+
await execAsync(`"${pip}" install --quiet accelerate`, { timeout: 12e4, stdio: "pipe" });
|
|
41688
|
+
} catch {
|
|
41689
|
+
}
|
|
41536
41690
|
try {
|
|
41537
41691
|
await execAsync(`"${pip}" install --quiet pyloudnorm noisereduce torchaudio`, { timeout: 12e4, stdio: "pipe" });
|
|
41538
41692
|
} catch {
|
|
@@ -41616,49 +41770,101 @@ async function startPersonaPlexDaemon(onInfo) {
|
|
|
41616
41770
|
const repoInfo = WEIGHT_REPOS[tier];
|
|
41617
41771
|
const extraArgs = [];
|
|
41618
41772
|
if (tier !== "original") {
|
|
41619
|
-
log(`Weight tier: ${tier} (${repoInfo.sizeGB}GB) \u2014 dequantizing to bf16 cache...`);
|
|
41620
|
-
const dequantScript = join54(PERSONAPLEX_DIR, "dequant-loader.py");
|
|
41621
41773
|
const cachedBf16 = join54(PERSONAPLEX_DIR, "model-bf16-cache.safetensors");
|
|
41622
|
-
if (
|
|
41623
|
-
|
|
41624
|
-
|
|
41625
|
-
const
|
|
41626
|
-
if (existsSync37(
|
|
41627
|
-
|
|
41628
|
-
|
|
41629
|
-
|
|
41630
|
-
|
|
41631
|
-
|
|
41632
|
-
|
|
41633
|
-
|
|
41634
|
-
|
|
41774
|
+
if (tier === "nf4-distilled") {
|
|
41775
|
+
log(`Weight tier: ${tier} \u2014 distilled NF4 (90% token match, ${repoInfo.sizeGB}GB)...`);
|
|
41776
|
+
try {
|
|
41777
|
+
const weightPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', '${repoInfo.file}', token=False))"`, { encoding: "utf8", timeout: 6e4, stdio: "pipe" }).trim();
|
|
41778
|
+
if (existsSync37(weightPath)) {
|
|
41779
|
+
if (!existsSync37(cachedBf16)) {
|
|
41780
|
+
log("Converting .pt checkpoint to safetensors (one-time)...");
|
|
41781
|
+
execSync27(`"${venvPython2}" -c "
|
|
41782
|
+
import torch; from safetensors.torch import save_file
|
|
41783
|
+
state = torch.load('${weightPath}', map_location='cpu', weights_only=True)
|
|
41784
|
+
state = {k: v.to(torch.bfloat16) if v.is_floating_point() else v for k, v in state.items()}
|
|
41785
|
+
save_file(state, '${cachedBf16}')
|
|
41786
|
+
print('Converted')
|
|
41787
|
+
"`, { timeout: 18e4, stdio: "pipe" });
|
|
41788
|
+
}
|
|
41635
41789
|
if (existsSync37(cachedBf16)) {
|
|
41636
41790
|
extraArgs.push("--moshi-weight", cachedBf16);
|
|
41637
|
-
log(`Using
|
|
41791
|
+
log(`Using distilled weights: ${(statSync13(cachedBf16).size / 1024 ** 3).toFixed(1)}GB`);
|
|
41792
|
+
} else {
|
|
41793
|
+
extraArgs.push("--moshi-weight", weightPath);
|
|
41638
41794
|
}
|
|
41639
|
-
} catch (e) {
|
|
41640
|
-
log(`Dequantization failed \u2014 server will try to load original weights`);
|
|
41641
41795
|
}
|
|
41796
|
+
} catch (e) {
|
|
41797
|
+
log(`Failed to load distilled weights \u2014 falling back to standard NF4`);
|
|
41642
41798
|
}
|
|
41643
|
-
|
|
41644
|
-
|
|
41645
|
-
|
|
41646
|
-
|
|
41647
|
-
|
|
41799
|
+
} else {
|
|
41800
|
+
log(`Weight tier: ${tier} (${repoInfo.sizeGB}GB) \u2014 dequantizing to bf16 cache...`);
|
|
41801
|
+
const dequantScript = join54(PERSONAPLEX_DIR, "dequant-loader.py");
|
|
41802
|
+
if (!existsSync37(dequantScript)) {
|
|
41803
|
+
const shipped = getShippedVoicesDir();
|
|
41804
|
+
if (shipped) {
|
|
41805
|
+
const src = join54(shipped, "dequant-loader.py");
|
|
41806
|
+
if (existsSync37(src))
|
|
41807
|
+
copyFileSync2(src, dequantScript);
|
|
41808
|
+
}
|
|
41648
41809
|
}
|
|
41649
41810
|
try {
|
|
41650
|
-
const
|
|
41651
|
-
if (existsSync37(
|
|
41652
|
-
|
|
41811
|
+
const weightPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', '${repoInfo.file}'${repoInfo.needsToken ? "" : ", token=False"}))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
|
|
41812
|
+
if (existsSync37(dequantScript) && existsSync37(weightPath)) {
|
|
41813
|
+
try {
|
|
41814
|
+
execSync27(`"${venvPython2}" "${dequantScript}" --input "${weightPath}" --output "${cachedBf16}"`, { timeout: 3e5, stdio: "pipe" });
|
|
41815
|
+
if (existsSync37(cachedBf16)) {
|
|
41816
|
+
extraArgs.push("--moshi-weight", cachedBf16);
|
|
41817
|
+
log(`Using dequantized cache: ${(statSync13(cachedBf16).size / 1024 ** 3).toFixed(1)}GB`);
|
|
41818
|
+
}
|
|
41819
|
+
} catch (e) {
|
|
41820
|
+
log(`Dequantization failed \u2014 server will try to load original weights`);
|
|
41821
|
+
}
|
|
41822
|
+
}
|
|
41823
|
+
try {
|
|
41824
|
+
const mimiPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', 'tokenizer-e351c8d8-checkpoint125.safetensors', token=False))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
|
|
41825
|
+
if (existsSync37(mimiPath))
|
|
41826
|
+
extraArgs.push("--mimi-weight", mimiPath);
|
|
41827
|
+
} catch {
|
|
41828
|
+
}
|
|
41829
|
+
try {
|
|
41830
|
+
const tokPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', 'tokenizer_spm_32k_3.model', token=False))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
|
|
41831
|
+
if (existsSync37(tokPath))
|
|
41832
|
+
extraArgs.push("--tokenizer", tokPath);
|
|
41833
|
+
} catch {
|
|
41834
|
+
}
|
|
41653
41835
|
} catch {
|
|
41836
|
+
log(`Weight file not found \u2014 server will download on first run`);
|
|
41654
41837
|
}
|
|
41838
|
+
extraArgs.push("--hf-repo", repoInfo.repo);
|
|
41839
|
+
}
|
|
41840
|
+
}
|
|
41841
|
+
let hybridEnabled = false;
|
|
41842
|
+
let ollamaModel = process.env["HYBRID_LLM_MODEL"] || "";
|
|
41843
|
+
if (!ollamaModel) {
|
|
41844
|
+
try {
|
|
41845
|
+
const oaConfig = JSON.parse(readFileSync28(join54(homedir13(), ".open-agents", "config.json"), "utf8"));
|
|
41846
|
+
if (oaConfig.model)
|
|
41847
|
+
ollamaModel = oaConfig.model;
|
|
41655
41848
|
} catch {
|
|
41656
|
-
log(`Weight file not found \u2014 server will download on first run`);
|
|
41657
41849
|
}
|
|
41658
|
-
extraArgs.push("--hf-repo", repoInfo.repo);
|
|
41659
41850
|
}
|
|
41660
|
-
|
|
41661
|
-
|
|
41851
|
+
if (!ollamaModel)
|
|
41852
|
+
ollamaModel = "qwen3.5:4b";
|
|
41853
|
+
try {
|
|
41854
|
+
const ollamaCheck = execSync27("curl -s http://localhost:11434/api/tags", {
|
|
41855
|
+
timeout: 3e3,
|
|
41856
|
+
stdio: "pipe",
|
|
41857
|
+
encoding: "utf8"
|
|
41858
|
+
});
|
|
41859
|
+
if (ollamaCheck.includes("models")) {
|
|
41860
|
+
hybridEnabled = true;
|
|
41861
|
+
log(`Hybrid mode: PersonaPlex voice + ${ollamaModel} reasoning`);
|
|
41862
|
+
}
|
|
41863
|
+
} catch {
|
|
41864
|
+
log("Ollama not detected \u2014 running PersonaPlex standalone (no hybrid)");
|
|
41865
|
+
}
|
|
41866
|
+
log(`Starting PersonaPlex daemon (${tier} tier${hybridEnabled ? ", hybrid" : ""})...`);
|
|
41867
|
+
const serverArgs = [
|
|
41662
41868
|
"-m",
|
|
41663
41869
|
"moshi.server",
|
|
41664
41870
|
"--host",
|
|
@@ -41670,10 +41876,19 @@ async function startPersonaPlexDaemon(onInfo) {
|
|
|
41670
41876
|
"--device",
|
|
41671
41877
|
"cuda",
|
|
41672
41878
|
...extraArgs
|
|
41673
|
-
]
|
|
41879
|
+
];
|
|
41880
|
+
if (hybridEnabled)
|
|
41881
|
+
serverArgs.push("--hybrid");
|
|
41882
|
+
const serverEnv = { ...process.env };
|
|
41883
|
+
if (hybridEnabled) {
|
|
41884
|
+
serverEnv["HYBRID_ENABLED"] = "1";
|
|
41885
|
+
serverEnv["HYBRID_LLM_MODEL"] = ollamaModel;
|
|
41886
|
+
serverEnv["HYBRID_MODEL_FAST"] = "qwen3.5:4b";
|
|
41887
|
+
}
|
|
41888
|
+
const child = spawn19(venvPython2, serverArgs, {
|
|
41674
41889
|
stdio: ["ignore", "pipe", "pipe"],
|
|
41675
41890
|
detached: true,
|
|
41676
|
-
env:
|
|
41891
|
+
env: serverEnv,
|
|
41677
41892
|
cwd: PERSONAPLEX_DIR
|
|
41678
41893
|
});
|
|
41679
41894
|
if (child.pid) {
|
|
@@ -41990,7 +42205,7 @@ var init_personaplex = __esm({
|
|
|
41990
42205
|
WEIGHT_REPOS = {
|
|
41991
42206
|
original: { repo: "nvidia/personaplex-7b-v1", file: "model.safetensors", sizeGB: 15.6, needsToken: true },
|
|
41992
42207
|
nf4: { repo: "cudabenchmarktest/personaplex-7b-nf4", file: "model-nf4.safetensors", sizeGB: 4.1, needsToken: false },
|
|
41993
|
-
|
|
42208
|
+
"nf4-distilled": { repo: "cudabenchmarktest/personaplex-7b-nf4-distilled", file: "student_best.pt", sizeGB: 16.7, needsToken: false }
|
|
41994
42209
|
};
|
|
41995
42210
|
PERSONAPLEX_DIR = join54(homedir13(), ".open-agents", "voice", "personaplex");
|
|
41996
42211
|
PID_FILE = join54(PERSONAPLEX_DIR, "daemon.pid");
|
|
@@ -43757,7 +43972,8 @@ function fitToWidth(text, width) {
|
|
|
43757
43972
|
return text + " ".repeat(width - visible.length);
|
|
43758
43973
|
}
|
|
43759
43974
|
function showDropPanel(opts) {
|
|
43760
|
-
const { title, instruction = "Drag and drop a file here, or type/paste a path", allowedExtensions = [], typeLabel, rl } = opts;
|
|
43975
|
+
const { title, instruction = "Drag and drop a file here, or type/paste a path", allowedExtensions = [], typeLabel, rl, borderColor } = opts;
|
|
43976
|
+
const bc = borderColor ?? dc.cyan;
|
|
43761
43977
|
return new Promise((resolve_) => {
|
|
43762
43978
|
const stdin = process.stdin;
|
|
43763
43979
|
const hadRawMode = stdin.isRaw;
|
|
@@ -43809,16 +44025,16 @@ function showDropPanel(opts) {
|
|
|
43809
44025
|
const bottomPad = Math.max(0, availableForPadding - topPad);
|
|
43810
44026
|
const lines = [];
|
|
43811
44027
|
const borderH = "\u2508".repeat(Math.max(2, cols - 4));
|
|
43812
|
-
const emptyPipe = ` ${
|
|
43813
|
-
lines.push(` ${
|
|
44028
|
+
const emptyPipe = ` ${bc("\u250A")}${" ".repeat(innerSpace)}${bc("\u250A")}`;
|
|
44029
|
+
lines.push(` ${bc(borderH)}`);
|
|
43814
44030
|
for (let i = 0; i < topPad; i++)
|
|
43815
44031
|
lines.push(emptyPipe);
|
|
43816
44032
|
for (const line of content) {
|
|
43817
|
-
lines.push(` ${
|
|
44033
|
+
lines.push(` ${bc("\u250A")}${line}${bc("\u250A")}`);
|
|
43818
44034
|
}
|
|
43819
44035
|
for (let i = 0; i < bottomPad; i++)
|
|
43820
44036
|
lines.push(emptyPipe);
|
|
43821
|
-
lines.push(` ${
|
|
44037
|
+
lines.push(` ${bc(borderH)}`);
|
|
43822
44038
|
lines.push(` ${dc.dim("Enter confirm Esc cancel")}`);
|
|
43823
44039
|
overlayWrite(lines.join("\n") + "\n");
|
|
43824
44040
|
}
|
|
@@ -49644,18 +49860,22 @@ async function handleSlashCommand(input, ctx) {
|
|
|
49644
49860
|
if (currentVoiceModel === "personaplex") {
|
|
49645
49861
|
if (!cloneArg) {
|
|
49646
49862
|
const dropResult = await showDropPanel({
|
|
49647
|
-
title: "PersonaPlex Voice Clone
|
|
49648
|
-
instruction: "Drop
|
|
49863
|
+
title: "PersonaPlex Voice Clone",
|
|
49864
|
+
instruction: "Drop an audio file (4-10s clean speech) to clone a voice",
|
|
49649
49865
|
allowedExtensions: [".wav", ".mp3", ".ogg", ".flac", ".m4a", ".opus", ".aac"],
|
|
49650
49866
|
typeLabel: "Audio files",
|
|
49651
|
-
rl: ctx.rl
|
|
49867
|
+
rl: ctx.rl,
|
|
49868
|
+
borderColor: c2.green
|
|
49652
49869
|
});
|
|
49653
49870
|
if (dropResult.confirmed && dropResult.path) {
|
|
49654
|
-
const
|
|
49871
|
+
const defaultName = dropResult.path.replace(/.*[\\/]/, "").replace(/\.[^.]+$/, "").replace(/[^a-zA-Z0-9_-]/g, "_");
|
|
49872
|
+
renderInfo(`File: ${dropResult.path}`);
|
|
49873
|
+
renderInfo(`Voice name: ${defaultName} (derived from filename)`);
|
|
49874
|
+
renderInfo("Cloning voice with preprocessing (denoise + normalize + multi-segment)...");
|
|
49655
49875
|
const { clonePersonaPlexVoice: clonePersonaPlexVoice3 } = await Promise.resolve().then(() => (init_personaplex(), personaplex_exports));
|
|
49656
|
-
const result2 = await clonePersonaPlexVoice3(dropResult.path,
|
|
49876
|
+
const result2 = await clonePersonaPlexVoice3(dropResult.path, defaultName, (m) => renderInfo(m));
|
|
49657
49877
|
if (result2)
|
|
49658
|
-
renderInfo(`Voice "${
|
|
49878
|
+
renderInfo(`Voice "${defaultName}" ready \u2014 use /voice list to see all voices`);
|
|
49659
49879
|
} else {
|
|
49660
49880
|
renderInfo("Voice clone cancelled.");
|
|
49661
49881
|
}
|
package/package.json
CHANGED