open-agents-ai 0.185.35 → 0.185.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +355 -134
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -26404,94 +26404,103 @@ If you're stuck, try a completely different approach. Do NOT repeat what failed
|
|
|
26404
26404
|
this.emit({ type: "error", content: `Backend error: ${reqErr.message}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
26405
26405
|
break;
|
|
26406
26406
|
}
|
|
26407
|
-
|
|
26408
|
-
|
|
26409
|
-
|
|
26410
|
-
|
|
26411
|
-
|
|
26412
|
-
if (/HTTP 404|not found|model.*not found/i.test(errMsg)) {
|
|
26413
|
-
this.emit({ type: "error", content: `Model not available. Use /model to select a different model.`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
26407
|
+
if (this.handleMaxTokensError(reqErr, chatRequest)) {
|
|
26408
|
+
try {
|
|
26409
|
+
response = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
|
|
26410
|
+
} catch (retryErr) {
|
|
26411
|
+
this.emit({ type: "error", content: `Retry with reduced max_tokens also failed: ${retryErr instanceof Error ? retryErr.message : String(retryErr)}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
26414
26412
|
break;
|
|
26415
26413
|
}
|
|
26416
|
-
|
|
26417
|
-
|
|
26418
|
-
|
|
26419
|
-
|
|
26420
|
-
|
|
26421
|
-
|
|
26422
|
-
|
|
26423
|
-
|
|
26424
|
-
const msg2 = imgRetryErr instanceof Error ? imgRetryErr.message : String(imgRetryErr);
|
|
26425
|
-
this.emit({ type: "error", content: `Retry after image fallback also failed: ${msg2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
26426
|
-
imageRecovered = false;
|
|
26427
|
-
break;
|
|
26428
|
-
}
|
|
26429
|
-
} else {
|
|
26414
|
+
} else {
|
|
26415
|
+
const recovered = await this.retryOnTransient(reqErr, chatRequest, turn);
|
|
26416
|
+
if (!recovered) {
|
|
26417
|
+
const errMsg = reqErr instanceof Error ? reqErr.message : String(reqErr);
|
|
26418
|
+
const cause = reqErr instanceof Error && reqErr.cause ? ` (${reqErr.cause.message ?? ""} ${reqErr.cause?.code ?? ""})` : "";
|
|
26419
|
+
this.emit({ type: "error", content: `Backend error: ${errMsg}${cause}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
26420
|
+
if (/HTTP 404|not found|model.*not found/i.test(errMsg)) {
|
|
26421
|
+
this.emit({ type: "error", content: `Model not available. Use /model to select a different model.`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
26430
26422
|
break;
|
|
26431
26423
|
}
|
|
26432
|
-
|
|
26433
|
-
|
|
26434
|
-
|
|
26435
|
-
|
|
26436
|
-
type: "status",
|
|
26437
|
-
content: `Model lacks native tool support \u2014 switching to prompt-injected tool mode`,
|
|
26438
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
26439
|
-
});
|
|
26440
|
-
const toolDescriptions = Array.from(this.tools.values()).map((t) => `- ${t.name}: ${t.description}`).join("\n");
|
|
26441
|
-
const toolInjectMsg = [
|
|
26442
|
-
"\n\n[TOOL MODE \u2014 PROMPT INJECTION]",
|
|
26443
|
-
"This model does not have native tool-calling. To use tools, output a JSON block:",
|
|
26444
|
-
"```json",
|
|
26445
|
-
'{"tool": "tool_name", "args": {"param": "value"}}',
|
|
26446
|
-
"```",
|
|
26447
|
-
"\nAvailable tools:",
|
|
26448
|
-
toolDescriptions,
|
|
26449
|
-
"\nOutput EXACTLY ONE tool call per response in the JSON format above.",
|
|
26450
|
-
"After seeing the tool result, continue or call another tool.",
|
|
26451
|
-
'When done, output: {"tool": "task_complete", "args": {"summary": "what you did"}}'
|
|
26452
|
-
].join("\n");
|
|
26453
|
-
messages.push({ role: "system", content: toolInjectMsg });
|
|
26454
|
-
chatRequest.tools = [];
|
|
26455
|
-
try {
|
|
26456
|
-
response = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
|
|
26457
|
-
const content = response.choices?.[0]?.message?.content ?? "";
|
|
26458
|
-
const jsonMatch = content.match(/```json\s*\n?([\s\S]*?)```/);
|
|
26459
|
-
if (jsonMatch) {
|
|
26424
|
+
let imageRecovered = false;
|
|
26425
|
+
if (/invalid image|image.*invalid|image_url.*unsupported|does not support.*image|image.*not supported/i.test(errMsg)) {
|
|
26426
|
+
imageRecovered = await this._recoverFromImageError(messages, chatRequest, turn);
|
|
26427
|
+
if (imageRecovered) {
|
|
26460
26428
|
try {
|
|
26461
|
-
const
|
|
26462
|
-
|
|
26463
|
-
|
|
26464
|
-
|
|
26465
|
-
|
|
26466
|
-
|
|
26467
|
-
|
|
26468
|
-
|
|
26469
|
-
|
|
26429
|
+
const imgRetry = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
|
|
26430
|
+
response = imgRetry;
|
|
26431
|
+
} catch (imgRetryErr) {
|
|
26432
|
+
const msg2 = imgRetryErr instanceof Error ? imgRetryErr.message : String(imgRetryErr);
|
|
26433
|
+
this.emit({ type: "error", content: `Retry after image fallback also failed: ${msg2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
26434
|
+
imageRecovered = false;
|
|
26435
|
+
break;
|
|
26436
|
+
}
|
|
26437
|
+
} else {
|
|
26438
|
+
break;
|
|
26439
|
+
}
|
|
26440
|
+
}
|
|
26441
|
+
if (imageRecovered) {
|
|
26442
|
+
} else if (/does not support tools|HTTP 400.*tools/i.test(errMsg)) {
|
|
26443
|
+
this.emit({
|
|
26444
|
+
type: "status",
|
|
26445
|
+
content: `Model lacks native tool support \u2014 switching to prompt-injected tool mode`,
|
|
26446
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
26447
|
+
});
|
|
26448
|
+
const toolDescriptions = Array.from(this.tools.values()).map((t) => `- ${t.name}: ${t.description}`).join("\n");
|
|
26449
|
+
const toolInjectMsg = [
|
|
26450
|
+
"\n\n[TOOL MODE \u2014 PROMPT INJECTION]",
|
|
26451
|
+
"This model does not have native tool-calling. To use tools, output a JSON block:",
|
|
26452
|
+
"```json",
|
|
26453
|
+
'{"tool": "tool_name", "args": {"param": "value"}}',
|
|
26454
|
+
"```",
|
|
26455
|
+
"\nAvailable tools:",
|
|
26456
|
+
toolDescriptions,
|
|
26457
|
+
"\nOutput EXACTLY ONE tool call per response in the JSON format above.",
|
|
26458
|
+
"After seeing the tool result, continue or call another tool.",
|
|
26459
|
+
'When done, output: {"tool": "task_complete", "args": {"summary": "what you did"}}'
|
|
26460
|
+
].join("\n");
|
|
26461
|
+
messages.push({ role: "system", content: toolInjectMsg });
|
|
26462
|
+
chatRequest.tools = [];
|
|
26463
|
+
try {
|
|
26464
|
+
response = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
|
|
26465
|
+
const content = response.choices?.[0]?.message?.content ?? "";
|
|
26466
|
+
const jsonMatch = content.match(/```json\s*\n?([\s\S]*?)```/);
|
|
26467
|
+
if (jsonMatch) {
|
|
26468
|
+
try {
|
|
26469
|
+
const parsed = JSON.parse(jsonMatch[1]);
|
|
26470
|
+
if (parsed.tool && this.tools.has(parsed.tool)) {
|
|
26471
|
+
const tool = this.tools.get(parsed.tool);
|
|
26472
|
+
const result = await tool.execute(parsed.args ?? {});
|
|
26473
|
+
messages.push({ role: "assistant", content });
|
|
26474
|
+
messages.push({ role: "user", content: `Tool result (${parsed.tool}): ${result.output.slice(0, 2e3)}` });
|
|
26475
|
+
if (parsed.tool === "task_complete") {
|
|
26476
|
+
completed = true;
|
|
26477
|
+
summary = String(parsed.args?.summary ?? content);
|
|
26478
|
+
}
|
|
26479
|
+
toolCallCount++;
|
|
26480
|
+
continue;
|
|
26470
26481
|
}
|
|
26471
|
-
|
|
26472
|
-
continue;
|
|
26482
|
+
} catch {
|
|
26473
26483
|
}
|
|
26474
|
-
} catch {
|
|
26475
26484
|
}
|
|
26485
|
+
messages.push({ role: "assistant", content });
|
|
26486
|
+
continue;
|
|
26487
|
+
} catch (retryErr2) {
|
|
26488
|
+
const msg2 = retryErr2 instanceof Error ? retryErr2.message : String(retryErr2);
|
|
26489
|
+
this.emit({ type: "error", content: `Prompt-injected tool mode also failed: ${msg2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
26490
|
+
break;
|
|
26476
26491
|
}
|
|
26477
|
-
|
|
26478
|
-
|
|
26479
|
-
|
|
26480
|
-
|
|
26481
|
-
|
|
26492
|
+
}
|
|
26493
|
+
if (!imageRecovered) {
|
|
26494
|
+
this.emit({
|
|
26495
|
+
type: "error",
|
|
26496
|
+
content: `Backend unavailable \u2014 stopping task. Fix the issue and retry.`,
|
|
26497
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
26498
|
+
});
|
|
26482
26499
|
break;
|
|
26483
26500
|
}
|
|
26484
26501
|
}
|
|
26485
|
-
|
|
26486
|
-
this.emit({
|
|
26487
|
-
type: "error",
|
|
26488
|
-
content: `Backend unavailable \u2014 stopping task. Fix the issue and retry.`,
|
|
26489
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
26490
|
-
});
|
|
26491
|
-
break;
|
|
26492
|
-
}
|
|
26502
|
+
response = recovered ?? response;
|
|
26493
26503
|
}
|
|
26494
|
-
response = recovered ?? response;
|
|
26495
26504
|
}
|
|
26496
26505
|
totalTokens += response.usage?.totalTokens ?? 0;
|
|
26497
26506
|
promptTokens += response.usage?.promptTokens ?? 0;
|
|
@@ -27056,15 +27065,24 @@ Integrate this guidance into your current approach. Continue working on the task
|
|
|
27056
27065
|
this.emit({ type: "error", content: "Task aborted by user", timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
27057
27066
|
break;
|
|
27058
27067
|
}
|
|
27059
|
-
|
|
27060
|
-
|
|
27061
|
-
|
|
27062
|
-
|
|
27063
|
-
|
|
27064
|
-
|
|
27065
|
-
|
|
27068
|
+
if (this.handleMaxTokensError(reqErr, chatRequest)) {
|
|
27069
|
+
try {
|
|
27070
|
+
response = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
|
|
27071
|
+
} catch (retryErr) {
|
|
27072
|
+
this.emit({ type: "error", content: `Retry with reduced max_tokens also failed: ${retryErr instanceof Error ? retryErr.message : String(retryErr)}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
27073
|
+
break;
|
|
27074
|
+
}
|
|
27075
|
+
} else {
|
|
27076
|
+
const recovered = await this.retryOnTransient(reqErr, chatRequest, turn);
|
|
27077
|
+
if (!recovered) {
|
|
27078
|
+
const errMsg2 = reqErr instanceof Error ? reqErr.message : String(reqErr);
|
|
27079
|
+
const cause2 = reqErr instanceof Error && reqErr.cause ? ` (${reqErr.cause.message ?? ""} ${reqErr.cause?.code ?? ""})` : "";
|
|
27080
|
+
this.emit({ type: "error", content: `Backend error: ${errMsg2}${cause2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
27081
|
+
this.emit({ type: "error", content: `Backend unavailable \u2014 stopping task.`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
27082
|
+
break;
|
|
27083
|
+
}
|
|
27084
|
+
response = recovered;
|
|
27066
27085
|
}
|
|
27067
|
-
response = recovered;
|
|
27068
27086
|
}
|
|
27069
27087
|
totalTokens += response.usage?.totalTokens ?? 0;
|
|
27070
27088
|
promptTokens += response.usage?.promptTokens ?? 0;
|
|
@@ -28443,6 +28461,28 @@ ${transcript}`
|
|
|
28443
28461
|
// -------------------------------------------------------------------------
|
|
28444
28462
|
// Transient error recovery — retry on 502, fetch failed, timeouts
|
|
28445
28463
|
// -------------------------------------------------------------------------
|
|
28464
|
+
/**
|
|
28465
|
+
* Detect max_completion_tokens rejection (HTTP 400) and auto-reduce to the server's limit.
|
|
28466
|
+
* Returns true if maxTokens was reduced and the caller should retry.
|
|
28467
|
+
*/
|
|
28468
|
+
handleMaxTokensError(err, chatRequest) {
|
|
28469
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
28470
|
+
const match = msg.match(/max_?(?:completion_?)?tokens\s+is\s+too\s+large.*?allows?\s+up\s+to\s+(\d+)/i);
|
|
28471
|
+
if (!match)
|
|
28472
|
+
return false;
|
|
28473
|
+
const serverLimit = parseInt(match[1], 10);
|
|
28474
|
+
if (isNaN(serverLimit) || serverLimit <= 0)
|
|
28475
|
+
return false;
|
|
28476
|
+
const prev = this.options.maxTokens;
|
|
28477
|
+
this.options.maxTokens = serverLimit;
|
|
28478
|
+
chatRequest.maxTokens = serverLimit;
|
|
28479
|
+
this.emit({
|
|
28480
|
+
type: "status",
|
|
28481
|
+
content: `Server max_tokens limit is ${serverLimit} (was ${prev}) \u2014 auto-adjusted`,
|
|
28482
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
28483
|
+
});
|
|
28484
|
+
return true;
|
|
28485
|
+
}
|
|
28446
28486
|
/** Detect whether an error is transient (worth retrying) */
|
|
28447
28487
|
isTransientError(err) {
|
|
28448
28488
|
if (err instanceof Error && err.fatal)
|
|
@@ -36362,14 +36402,15 @@ var init_voice_session = __esm({
|
|
|
36362
36402
|
}
|
|
36363
36403
|
// ── HTTP handler ──────────────────────────────────────────────────────
|
|
36364
36404
|
handleHTTP(req, res) {
|
|
36365
|
-
|
|
36405
|
+
const pathname = (req.url ?? "/").split("?")[0];
|
|
36406
|
+
if (pathname === "/" || pathname === "/index.html") {
|
|
36366
36407
|
res.writeHead(200, { "Content-Type": "text/html; charset=utf-8" });
|
|
36367
36408
|
if (this.personaPlexWsUrl) {
|
|
36368
36409
|
res.end(generatePersonaPlexHTML(this.personaPlexWsUrl, this.personaPlexTextPrompt, this.personaPlexVoicePrompt));
|
|
36369
36410
|
} else {
|
|
36370
36411
|
res.end(generateFrontendHTML());
|
|
36371
36412
|
}
|
|
36372
|
-
} else if (
|
|
36413
|
+
} else if (pathname === "/health") {
|
|
36373
36414
|
res.writeHead(200, { "Content-Type": "application/json" });
|
|
36374
36415
|
res.end(JSON.stringify({
|
|
36375
36416
|
active: this.state.active,
|
|
@@ -41301,11 +41342,11 @@ function execAsync(cmd, opts = {}) {
|
|
|
41301
41342
|
child.stderr?.on("data", (d) => {
|
|
41302
41343
|
stderr += d.toString();
|
|
41303
41344
|
});
|
|
41304
|
-
child.on("close", (code) => {
|
|
41345
|
+
child.on("close", (code, signal) => {
|
|
41305
41346
|
if (code === 0)
|
|
41306
41347
|
resolve36(stdout.trim());
|
|
41307
41348
|
else
|
|
41308
|
-
reject(new Error(`Exit ${code}: ${stderr.slice(0, 500)}`));
|
|
41349
|
+
reject(new Error(`Exit ${code}${signal ? ` (signal: ${signal})` : ""}: ${stderr.slice(0, 500)}`));
|
|
41309
41350
|
});
|
|
41310
41351
|
child.on("error", reject);
|
|
41311
41352
|
});
|
|
@@ -41314,8 +41355,8 @@ function selectWeightTier(vramGB) {
|
|
|
41314
41355
|
if (vramGB >= 48)
|
|
41315
41356
|
return "original";
|
|
41316
41357
|
if (vramGB >= 16)
|
|
41317
|
-
return "nf4";
|
|
41318
|
-
return "
|
|
41358
|
+
return "nf4-distilled";
|
|
41359
|
+
return "nf4";
|
|
41319
41360
|
}
|
|
41320
41361
|
function detectJetson() {
|
|
41321
41362
|
try {
|
|
@@ -41499,9 +41540,21 @@ async function installPersonaPlex(onInfo, weightTier) {
|
|
|
41499
41540
|
return false;
|
|
41500
41541
|
}
|
|
41501
41542
|
}
|
|
41502
|
-
|
|
41543
|
+
if (isAarch64) {
|
|
41544
|
+
log("ARM64: Installing moshi (--no-deps to preserve JetPack torch)...");
|
|
41545
|
+
await execAsync(`"${pip}" install --quiet --no-deps "${join54(repoDir, "moshi")}/."`, { timeout: 3e5 });
|
|
41546
|
+
log("ARM64: Installing remaining moshi dependencies...");
|
|
41547
|
+
await execAsync(`"${pip}" install --quiet "numpy>=1.26,<2.2" "safetensors>=0.4.0,<0.5" "huggingface-hub>=0.24,<0.25" "einops==0.7" "sentencepiece==0.2" "sounddevice==0.5" "aiohttp>=3.10.5,<3.11"`, { timeout: 3e5 });
|
|
41548
|
+
} else {
|
|
41549
|
+
await execAsync(`"${pip}" install --quiet "${join54(repoDir, "moshi")}/."`, { timeout: 3e5 });
|
|
41550
|
+
}
|
|
41503
41551
|
} catch (err) {
|
|
41504
41552
|
log(`Moshi install failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
41553
|
+
if (isAarch64) {
|
|
41554
|
+
log("ARM64: This often means the pip process was OOM-killed.");
|
|
41555
|
+
log("Check: dmesg | grep -i 'oom\\|killed' | tail -5");
|
|
41556
|
+
log("Ensure JetPack PyTorch is installed: pip3 show torch");
|
|
41557
|
+
}
|
|
41505
41558
|
try {
|
|
41506
41559
|
await execAsync(`"${pip}" install --quiet torch torchaudio websockets soundfile huggingface_hub`, { timeout: 3e5, stdio: "pipe" });
|
|
41507
41560
|
} catch {
|
|
@@ -41526,6 +41579,104 @@ async function installPersonaPlex(onInfo, weightTier) {
|
|
|
41526
41579
|
}
|
|
41527
41580
|
} catch {
|
|
41528
41581
|
}
|
|
41582
|
+
try {
|
|
41583
|
+
const sitePackages = execSync27(`"${python}" -c "import moshi, os; print(os.path.dirname(moshi.__file__))"`, {
|
|
41584
|
+
encoding: "utf8",
|
|
41585
|
+
timeout: 5e3,
|
|
41586
|
+
stdio: "pipe"
|
|
41587
|
+
}).trim();
|
|
41588
|
+
const loadersFile = join54(sitePackages, "models", "loaders.py");
|
|
41589
|
+
if (existsSync37(loadersFile)) {
|
|
41590
|
+
let src = readFileSync28(loadersFile, "utf8");
|
|
41591
|
+
if (!src.includes("_dequantize_2bit_state_dict")) {
|
|
41592
|
+
const dequantPatch = `
|
|
41593
|
+
import math
|
|
41594
|
+
|
|
41595
|
+
# NF2 centroids (Lloyd-Max optimal for Gaussian distribution)
|
|
41596
|
+
_NF2_CENTROIDS = torch.tensor([-1.5104, -0.4528, 0.4528, 1.5104])
|
|
41597
|
+
|
|
41598
|
+
|
|
41599
|
+
def _is_2bit_quantized(filename):
|
|
41600
|
+
return "turbo2bit" in str(filename).lower() or "2bit" in str(filename).lower()
|
|
41601
|
+
|
|
41602
|
+
|
|
41603
|
+
def _fast_wht(x):
|
|
41604
|
+
n = x.shape[-1]
|
|
41605
|
+
h = 1
|
|
41606
|
+
while h < n:
|
|
41607
|
+
x_view = x.view(*x.shape[:-1], -1, 2, h)
|
|
41608
|
+
a = x_view[..., 0, :].clone()
|
|
41609
|
+
b = x_view[..., 1, :].clone()
|
|
41610
|
+
x_view[..., 0, :] = a + b
|
|
41611
|
+
x_view[..., 1, :] = a - b
|
|
41612
|
+
x = x_view.reshape(*x.shape)
|
|
41613
|
+
h *= 2
|
|
41614
|
+
return x / math.sqrt(n)
|
|
41615
|
+
|
|
41616
|
+
|
|
41617
|
+
def _dequantize_2bit_state_dict(state_dict):
|
|
41618
|
+
result = {}
|
|
41619
|
+
processed = set()
|
|
41620
|
+
meta_suffixes = (".packed", ".scales", ".shape", ".numel", ".gs", ".np2")
|
|
41621
|
+
base_names = set()
|
|
41622
|
+
for key in state_dict:
|
|
41623
|
+
if key.endswith(".packed"):
|
|
41624
|
+
base_names.add(key[:-len(".packed")])
|
|
41625
|
+
for name in base_names:
|
|
41626
|
+
packed_key = f"{name}.packed"
|
|
41627
|
+
if packed_key in state_dict:
|
|
41628
|
+
gs = state_dict[f"{name}.gs"].item()
|
|
41629
|
+
gs_pow2 = state_dict[f"{name}.np2"].item()
|
|
41630
|
+
numel = state_dict[f"{name}.numel"].item()
|
|
41631
|
+
shape = [s for s in state_dict[f"{name}.shape"].tolist() if s > 0]
|
|
41632
|
+
scales = state_dict[f"{name}.scales"].float()
|
|
41633
|
+
packed = state_dict[packed_key]
|
|
41634
|
+
n_groups = scales.numel()
|
|
41635
|
+
p = packed.reshape(n_groups, gs // 4)
|
|
41636
|
+
codes = torch.zeros(n_groups, gs, dtype=torch.long)
|
|
41637
|
+
for i in range(4):
|
|
41638
|
+
codes[:, i::4] = (p >> (2 * i)) & 0x03
|
|
41639
|
+
dequant = _NF2_CENTROIDS[codes]
|
|
41640
|
+
if gs_pow2 > gs:
|
|
41641
|
+
dequant = torch.cat([dequant, torch.zeros(n_groups, gs_pow2 - gs)], dim=1)
|
|
41642
|
+
dequant = _fast_wht(dequant)
|
|
41643
|
+
dequant = dequant[:, :gs]
|
|
41644
|
+
dequant = dequant * scales.unsqueeze(1)
|
|
41645
|
+
result[name] = dequant.reshape(-1)[:numel].reshape(shape).to(torch.bfloat16)
|
|
41646
|
+
processed.add(name)
|
|
41647
|
+
for name, tensor in state_dict.items():
|
|
41648
|
+
if any(name.endswith(s) for s in meta_suffixes):
|
|
41649
|
+
continue
|
|
41650
|
+
if name not in processed:
|
|
41651
|
+
result[name] = tensor.to(torch.bfloat16)
|
|
41652
|
+
return result
|
|
41653
|
+
`;
|
|
41654
|
+
const insertPoint = src.indexOf("\nSAMPLE_RATE");
|
|
41655
|
+
if (insertPoint > 0) {
|
|
41656
|
+
src = src.slice(0, insertPoint) + dequantPatch + src.slice(insertPoint);
|
|
41657
|
+
}
|
|
41658
|
+
src = src.replace(/( +)# Load state_dict\n( +)if filename\.endswith\("\.safetensors"\):/, `$1is_2bit = _is_2bit_quantized(filename)
|
|
41659
|
+
$1# Load state_dict \u2014 2-bit must load to CPU for dequant
|
|
41660
|
+
$2load_device = "cpu" if is_2bit else dev.type
|
|
41661
|
+
$2if filename.endswith(".safetensors"):`);
|
|
41662
|
+
if (src.includes("device=dev.type)")) {
|
|
41663
|
+
src = src.replace("device=dev.type)", "device=load_device)");
|
|
41664
|
+
}
|
|
41665
|
+
const patchPoint = "# Patch 1: expand depformer";
|
|
41666
|
+
if (src.includes(patchPoint) && !src.includes("_dequantize_2bit_state_dict(state_dict)")) {
|
|
41667
|
+
src = src.replace(patchPoint, `# Dequantize 2-bit weights if needed
|
|
41668
|
+
if is_2bit:
|
|
41669
|
+
logger.info("Dequantizing 2-bit TurboQuant weights...")
|
|
41670
|
+
state_dict = _dequantize_2bit_state_dict(state_dict)
|
|
41671
|
+
|
|
41672
|
+
${patchPoint}`);
|
|
41673
|
+
}
|
|
41674
|
+
writeFileSync16(loadersFile, src);
|
|
41675
|
+
log("Patched loaders.py with 2-bit TurboQuant native dequant support.");
|
|
41676
|
+
}
|
|
41677
|
+
}
|
|
41678
|
+
} catch {
|
|
41679
|
+
}
|
|
41529
41680
|
if (isAarch64) {
|
|
41530
41681
|
log("ARM64: Installing bitsandbytes for INT4 inference...");
|
|
41531
41682
|
try {
|
|
@@ -41533,6 +41684,10 @@ async function installPersonaPlex(onInfo, weightTier) {
|
|
|
41533
41684
|
} catch {
|
|
41534
41685
|
}
|
|
41535
41686
|
}
|
|
41687
|
+
try {
|
|
41688
|
+
await execAsync(`"${pip}" install --quiet accelerate`, { timeout: 12e4, stdio: "pipe" });
|
|
41689
|
+
} catch {
|
|
41690
|
+
}
|
|
41536
41691
|
try {
|
|
41537
41692
|
await execAsync(`"${pip}" install --quiet pyloudnorm noisereduce torchaudio`, { timeout: 12e4, stdio: "pipe" });
|
|
41538
41693
|
} catch {
|
|
@@ -41616,49 +41771,101 @@ async function startPersonaPlexDaemon(onInfo) {
|
|
|
41616
41771
|
const repoInfo = WEIGHT_REPOS[tier];
|
|
41617
41772
|
const extraArgs = [];
|
|
41618
41773
|
if (tier !== "original") {
|
|
41619
|
-
log(`Weight tier: ${tier} (${repoInfo.sizeGB}GB) \u2014 dequantizing to bf16 cache...`);
|
|
41620
|
-
const dequantScript = join54(PERSONAPLEX_DIR, "dequant-loader.py");
|
|
41621
41774
|
const cachedBf16 = join54(PERSONAPLEX_DIR, "model-bf16-cache.safetensors");
|
|
41622
|
-
if (
|
|
41623
|
-
|
|
41624
|
-
|
|
41625
|
-
const
|
|
41626
|
-
if (existsSync37(
|
|
41627
|
-
|
|
41628
|
-
|
|
41629
|
-
|
|
41630
|
-
|
|
41631
|
-
|
|
41632
|
-
|
|
41633
|
-
|
|
41634
|
-
|
|
41775
|
+
if (tier === "nf4-distilled") {
|
|
41776
|
+
log(`Weight tier: ${tier} \u2014 distilled NF4 (90% token match, ${repoInfo.sizeGB}GB)...`);
|
|
41777
|
+
try {
|
|
41778
|
+
const weightPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', '${repoInfo.file}', token=False))"`, { encoding: "utf8", timeout: 6e4, stdio: "pipe" }).trim();
|
|
41779
|
+
if (existsSync37(weightPath)) {
|
|
41780
|
+
if (!existsSync37(cachedBf16)) {
|
|
41781
|
+
log("Converting .pt checkpoint to safetensors (one-time)...");
|
|
41782
|
+
execSync27(`"${venvPython2}" -c "
|
|
41783
|
+
import torch; from safetensors.torch import save_file
|
|
41784
|
+
state = torch.load('${weightPath}', map_location='cpu', weights_only=True)
|
|
41785
|
+
state = {k: v.to(torch.bfloat16) if v.is_floating_point() else v for k, v in state.items()}
|
|
41786
|
+
save_file(state, '${cachedBf16}')
|
|
41787
|
+
print('Converted')
|
|
41788
|
+
"`, { timeout: 18e4, stdio: "pipe" });
|
|
41789
|
+
}
|
|
41635
41790
|
if (existsSync37(cachedBf16)) {
|
|
41636
41791
|
extraArgs.push("--moshi-weight", cachedBf16);
|
|
41637
|
-
log(`Using
|
|
41792
|
+
log(`Using distilled weights: ${(statSync13(cachedBf16).size / 1024 ** 3).toFixed(1)}GB`);
|
|
41793
|
+
} else {
|
|
41794
|
+
extraArgs.push("--moshi-weight", weightPath);
|
|
41638
41795
|
}
|
|
41639
|
-
} catch (e) {
|
|
41640
|
-
log(`Dequantization failed \u2014 server will try to load original weights`);
|
|
41641
41796
|
}
|
|
41797
|
+
} catch (e) {
|
|
41798
|
+
log(`Failed to load distilled weights \u2014 falling back to standard NF4`);
|
|
41642
41799
|
}
|
|
41643
|
-
|
|
41644
|
-
|
|
41645
|
-
|
|
41646
|
-
|
|
41647
|
-
|
|
41800
|
+
} else {
|
|
41801
|
+
log(`Weight tier: ${tier} (${repoInfo.sizeGB}GB) \u2014 dequantizing to bf16 cache...`);
|
|
41802
|
+
const dequantScript = join54(PERSONAPLEX_DIR, "dequant-loader.py");
|
|
41803
|
+
if (!existsSync37(dequantScript)) {
|
|
41804
|
+
const shipped = getShippedVoicesDir();
|
|
41805
|
+
if (shipped) {
|
|
41806
|
+
const src = join54(shipped, "dequant-loader.py");
|
|
41807
|
+
if (existsSync37(src))
|
|
41808
|
+
copyFileSync2(src, dequantScript);
|
|
41809
|
+
}
|
|
41648
41810
|
}
|
|
41649
41811
|
try {
|
|
41650
|
-
const
|
|
41651
|
-
if (existsSync37(
|
|
41652
|
-
|
|
41812
|
+
const weightPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', '${repoInfo.file}'${repoInfo.needsToken ? "" : ", token=False"}))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
|
|
41813
|
+
if (existsSync37(dequantScript) && existsSync37(weightPath)) {
|
|
41814
|
+
try {
|
|
41815
|
+
execSync27(`"${venvPython2}" "${dequantScript}" --input "${weightPath}" --output "${cachedBf16}"`, { timeout: 3e5, stdio: "pipe" });
|
|
41816
|
+
if (existsSync37(cachedBf16)) {
|
|
41817
|
+
extraArgs.push("--moshi-weight", cachedBf16);
|
|
41818
|
+
log(`Using dequantized cache: ${(statSync13(cachedBf16).size / 1024 ** 3).toFixed(1)}GB`);
|
|
41819
|
+
}
|
|
41820
|
+
} catch (e) {
|
|
41821
|
+
log(`Dequantization failed \u2014 server will try to load original weights`);
|
|
41822
|
+
}
|
|
41823
|
+
}
|
|
41824
|
+
try {
|
|
41825
|
+
const mimiPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', 'tokenizer-e351c8d8-checkpoint125.safetensors', token=False))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
|
|
41826
|
+
if (existsSync37(mimiPath))
|
|
41827
|
+
extraArgs.push("--mimi-weight", mimiPath);
|
|
41828
|
+
} catch {
|
|
41829
|
+
}
|
|
41830
|
+
try {
|
|
41831
|
+
const tokPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', 'tokenizer_spm_32k_3.model', token=False))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
|
|
41832
|
+
if (existsSync37(tokPath))
|
|
41833
|
+
extraArgs.push("--tokenizer", tokPath);
|
|
41834
|
+
} catch {
|
|
41835
|
+
}
|
|
41653
41836
|
} catch {
|
|
41837
|
+
log(`Weight file not found \u2014 server will download on first run`);
|
|
41654
41838
|
}
|
|
41839
|
+
extraArgs.push("--hf-repo", repoInfo.repo);
|
|
41840
|
+
}
|
|
41841
|
+
}
|
|
41842
|
+
let hybridEnabled = false;
|
|
41843
|
+
let ollamaModel = process.env["HYBRID_LLM_MODEL"] || "";
|
|
41844
|
+
if (!ollamaModel) {
|
|
41845
|
+
try {
|
|
41846
|
+
const oaConfig = JSON.parse(readFileSync28(join54(homedir13(), ".open-agents", "config.json"), "utf8"));
|
|
41847
|
+
if (oaConfig.model)
|
|
41848
|
+
ollamaModel = oaConfig.model;
|
|
41655
41849
|
} catch {
|
|
41656
|
-
log(`Weight file not found \u2014 server will download on first run`);
|
|
41657
41850
|
}
|
|
41658
|
-
extraArgs.push("--hf-repo", repoInfo.repo);
|
|
41659
41851
|
}
|
|
41660
|
-
|
|
41661
|
-
|
|
41852
|
+
if (!ollamaModel)
|
|
41853
|
+
ollamaModel = "qwen3.5:4b";
|
|
41854
|
+
try {
|
|
41855
|
+
const ollamaCheck = execSync27("curl -s http://localhost:11434/api/tags", {
|
|
41856
|
+
timeout: 3e3,
|
|
41857
|
+
stdio: "pipe",
|
|
41858
|
+
encoding: "utf8"
|
|
41859
|
+
});
|
|
41860
|
+
if (ollamaCheck.includes("models")) {
|
|
41861
|
+
hybridEnabled = true;
|
|
41862
|
+
log(`Hybrid mode: PersonaPlex voice + ${ollamaModel} reasoning`);
|
|
41863
|
+
}
|
|
41864
|
+
} catch {
|
|
41865
|
+
log("Ollama not detected \u2014 running PersonaPlex standalone (no hybrid)");
|
|
41866
|
+
}
|
|
41867
|
+
log(`Starting PersonaPlex daemon (${tier} tier${hybridEnabled ? ", hybrid" : ""})...`);
|
|
41868
|
+
const serverArgs = [
|
|
41662
41869
|
"-m",
|
|
41663
41870
|
"moshi.server",
|
|
41664
41871
|
"--host",
|
|
@@ -41670,10 +41877,19 @@ async function startPersonaPlexDaemon(onInfo) {
|
|
|
41670
41877
|
"--device",
|
|
41671
41878
|
"cuda",
|
|
41672
41879
|
...extraArgs
|
|
41673
|
-
]
|
|
41880
|
+
];
|
|
41881
|
+
if (hybridEnabled)
|
|
41882
|
+
serverArgs.push("--hybrid");
|
|
41883
|
+
const serverEnv = { ...process.env };
|
|
41884
|
+
if (hybridEnabled) {
|
|
41885
|
+
serverEnv["HYBRID_ENABLED"] = "1";
|
|
41886
|
+
serverEnv["HYBRID_LLM_MODEL"] = ollamaModel;
|
|
41887
|
+
serverEnv["HYBRID_MODEL_FAST"] = "qwen3.5:4b";
|
|
41888
|
+
}
|
|
41889
|
+
const child = spawn19(venvPython2, serverArgs, {
|
|
41674
41890
|
stdio: ["ignore", "pipe", "pipe"],
|
|
41675
41891
|
detached: true,
|
|
41676
|
-
env:
|
|
41892
|
+
env: serverEnv,
|
|
41677
41893
|
cwd: PERSONAPLEX_DIR
|
|
41678
41894
|
});
|
|
41679
41895
|
if (child.pid) {
|
|
@@ -41990,7 +42206,7 @@ var init_personaplex = __esm({
|
|
|
41990
42206
|
WEIGHT_REPOS = {
|
|
41991
42207
|
original: { repo: "nvidia/personaplex-7b-v1", file: "model.safetensors", sizeGB: 15.6, needsToken: true },
|
|
41992
42208
|
nf4: { repo: "cudabenchmarktest/personaplex-7b-nf4", file: "model-nf4.safetensors", sizeGB: 4.1, needsToken: false },
|
|
41993
|
-
|
|
42209
|
+
"nf4-distilled": { repo: "cudabenchmarktest/personaplex-7b-nf4-distilled", file: "student_best.pt", sizeGB: 16.7, needsToken: false }
|
|
41994
42210
|
};
|
|
41995
42211
|
PERSONAPLEX_DIR = join54(homedir13(), ".open-agents", "voice", "personaplex");
|
|
41996
42212
|
PID_FILE = join54(PERSONAPLEX_DIR, "daemon.pid");
|
|
@@ -43757,7 +43973,8 @@ function fitToWidth(text, width) {
|
|
|
43757
43973
|
return text + " ".repeat(width - visible.length);
|
|
43758
43974
|
}
|
|
43759
43975
|
function showDropPanel(opts) {
|
|
43760
|
-
const { title, instruction = "Drag and drop a file here, or type/paste a path", allowedExtensions = [], typeLabel, rl } = opts;
|
|
43976
|
+
const { title, instruction = "Drag and drop a file here, or type/paste a path", allowedExtensions = [], typeLabel, rl, borderColor } = opts;
|
|
43977
|
+
const bc = borderColor ?? dc.cyan;
|
|
43761
43978
|
return new Promise((resolve_) => {
|
|
43762
43979
|
const stdin = process.stdin;
|
|
43763
43980
|
const hadRawMode = stdin.isRaw;
|
|
@@ -43809,16 +44026,16 @@ function showDropPanel(opts) {
|
|
|
43809
44026
|
const bottomPad = Math.max(0, availableForPadding - topPad);
|
|
43810
44027
|
const lines = [];
|
|
43811
44028
|
const borderH = "\u2508".repeat(Math.max(2, cols - 4));
|
|
43812
|
-
const emptyPipe = ` ${
|
|
43813
|
-
lines.push(` ${
|
|
44029
|
+
const emptyPipe = ` ${bc("\u250A")}${" ".repeat(innerSpace)}${bc("\u250A")}`;
|
|
44030
|
+
lines.push(` ${bc(borderH)}`);
|
|
43814
44031
|
for (let i = 0; i < topPad; i++)
|
|
43815
44032
|
lines.push(emptyPipe);
|
|
43816
44033
|
for (const line of content) {
|
|
43817
|
-
lines.push(` ${
|
|
44034
|
+
lines.push(` ${bc("\u250A")}${line}${bc("\u250A")}`);
|
|
43818
44035
|
}
|
|
43819
44036
|
for (let i = 0; i < bottomPad; i++)
|
|
43820
44037
|
lines.push(emptyPipe);
|
|
43821
|
-
lines.push(` ${
|
|
44038
|
+
lines.push(` ${bc(borderH)}`);
|
|
43822
44039
|
lines.push(` ${dc.dim("Enter confirm Esc cancel")}`);
|
|
43823
44040
|
overlayWrite(lines.join("\n") + "\n");
|
|
43824
44041
|
}
|
|
@@ -49644,18 +49861,22 @@ async function handleSlashCommand(input, ctx) {
|
|
|
49644
49861
|
if (currentVoiceModel === "personaplex") {
|
|
49645
49862
|
if (!cloneArg) {
|
|
49646
49863
|
const dropResult = await showDropPanel({
|
|
49647
|
-
title: "PersonaPlex Voice Clone
|
|
49648
|
-
instruction: "Drop
|
|
49864
|
+
title: "PersonaPlex Voice Clone",
|
|
49865
|
+
instruction: "Drop an audio file (4-10s clean speech) to clone a voice",
|
|
49649
49866
|
allowedExtensions: [".wav", ".mp3", ".ogg", ".flac", ".m4a", ".opus", ".aac"],
|
|
49650
49867
|
typeLabel: "Audio files",
|
|
49651
|
-
rl: ctx.rl
|
|
49868
|
+
rl: ctx.rl,
|
|
49869
|
+
borderColor: c2.green
|
|
49652
49870
|
});
|
|
49653
49871
|
if (dropResult.confirmed && dropResult.path) {
|
|
49654
|
-
const
|
|
49872
|
+
const defaultName = dropResult.path.replace(/.*[\\/]/, "").replace(/\.[^.]+$/, "").replace(/[^a-zA-Z0-9_-]/g, "_");
|
|
49873
|
+
renderInfo(`File: ${dropResult.path}`);
|
|
49874
|
+
renderInfo(`Voice name: ${defaultName} (derived from filename)`);
|
|
49875
|
+
renderInfo("Cloning voice with preprocessing (denoise + normalize + multi-segment)...");
|
|
49655
49876
|
const { clonePersonaPlexVoice: clonePersonaPlexVoice3 } = await Promise.resolve().then(() => (init_personaplex(), personaplex_exports));
|
|
49656
|
-
const result2 = await clonePersonaPlexVoice3(dropResult.path,
|
|
49877
|
+
const result2 = await clonePersonaPlexVoice3(dropResult.path, defaultName, (m) => renderInfo(m));
|
|
49657
49878
|
if (result2)
|
|
49658
|
-
renderInfo(`Voice "${
|
|
49879
|
+
renderInfo(`Voice "${defaultName}" ready \u2014 use /voice list to see all voices`);
|
|
49659
49880
|
} else {
|
|
49660
49881
|
renderInfo("Voice clone cancelled.");
|
|
49661
49882
|
}
|
package/package.json
CHANGED