open-agents-ai 0.185.30 → 0.185.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +201 -54
- package/package.json +1 -1
- package/voices/personaplex/dequant-loader.py +174 -0
package/dist/index.js
CHANGED
|
@@ -41270,6 +41270,8 @@ __export(personaplex_exports, {
|
|
|
41270
41270
|
clonePersonaPlexVoice: () => clonePersonaPlexVoice,
|
|
41271
41271
|
detectPersonaPlexCapability: () => detectPersonaPlexCapability,
|
|
41272
41272
|
getPersonaPlexWSUrl: () => getPersonaPlexWSUrl,
|
|
41273
|
+
getWeightRepoInfo: () => getWeightRepoInfo,
|
|
41274
|
+
getWeightTier: () => getWeightTier,
|
|
41273
41275
|
installPersonaPlex: () => installPersonaPlex,
|
|
41274
41276
|
isPersonaPlexInstalled: () => isPersonaPlexInstalled,
|
|
41275
41277
|
isPersonaPlexRunning: () => isPersonaPlexRunning,
|
|
@@ -41279,37 +41281,68 @@ __export(personaplex_exports, {
|
|
|
41279
41281
|
startPersonaPlexDaemon: () => startPersonaPlexDaemon,
|
|
41280
41282
|
stopPersonaPlex: () => stopPersonaPlex
|
|
41281
41283
|
});
|
|
41282
|
-
import { existsSync as existsSync37, writeFileSync as writeFileSync16, readFileSync as readFileSync28, mkdirSync as mkdirSync15, copyFileSync as copyFileSync2, readdirSync as readdirSync11 } from "node:fs";
|
|
41284
|
+
import { existsSync as existsSync37, writeFileSync as writeFileSync16, readFileSync as readFileSync28, mkdirSync as mkdirSync15, copyFileSync as copyFileSync2, readdirSync as readdirSync11, statSync as statSync13 } from "node:fs";
|
|
41283
41285
|
import { join as join54, dirname as dirname18 } from "node:path";
|
|
41284
41286
|
import { homedir as homedir13 } from "node:os";
|
|
41285
41287
|
import { execSync as execSync27, spawn as spawn19 } from "node:child_process";
|
|
41286
41288
|
import { fileURLToPath as fileURLToPath11 } from "node:url";
|
|
41289
|
+
function selectWeightTier(vramGB) {
|
|
41290
|
+
if (vramGB >= 48)
|
|
41291
|
+
return "original";
|
|
41292
|
+
if (vramGB >= 16)
|
|
41293
|
+
return "nf4";
|
|
41294
|
+
return "turbo2bit";
|
|
41295
|
+
}
|
|
41296
|
+
function detectJetson() {
|
|
41297
|
+
try {
|
|
41298
|
+
const model = readFileSync28("/proc/device-tree/model", "utf8").replace(/\0/g, "").trim();
|
|
41299
|
+
if (/jetson|orin|tegra/i.test(model)) {
|
|
41300
|
+
const memInfo = execSync27("grep MemTotal /proc/meminfo", { encoding: "utf8", timeout: 3e3, stdio: "pipe" });
|
|
41301
|
+
const memKB = parseInt(memInfo.match(/(\d+)/)?.[1] ?? "0", 10);
|
|
41302
|
+
return { isJetson: true, model, totalMemGB: memKB / 1024 / 1024 };
|
|
41303
|
+
}
|
|
41304
|
+
} catch {
|
|
41305
|
+
}
|
|
41306
|
+
return { isJetson: false, model: "", totalMemGB: 0 };
|
|
41307
|
+
}
|
|
41287
41308
|
function detectPersonaPlexCapability() {
|
|
41309
|
+
const fail = (reason) => ({
|
|
41310
|
+
supported: false,
|
|
41311
|
+
reason,
|
|
41312
|
+
gpuName: "",
|
|
41313
|
+
vramGB: 0,
|
|
41314
|
+
weightTier: "turbo2bit",
|
|
41315
|
+
needsHfToken: false
|
|
41316
|
+
});
|
|
41317
|
+
const jetson = detectJetson();
|
|
41318
|
+
if (jetson.isJetson) {
|
|
41319
|
+
const vramGB = jetson.totalMemGB;
|
|
41320
|
+
if (vramGB < 8)
|
|
41321
|
+
return { ...fail(`Jetson has ${vramGB.toFixed(0)}GB unified memory (need \u22658GB)`), gpuName: jetson.model, vramGB };
|
|
41322
|
+
const tier = selectWeightTier(vramGB);
|
|
41323
|
+
const hasHfToken = !!(process.env["HF_TOKEN"] || process.env["HUGGING_FACE_HUB_TOKEN"]);
|
|
41324
|
+
const effectiveTier = tier === "original" && !hasHfToken ? "nf4" : tier;
|
|
41325
|
+
return {
|
|
41326
|
+
supported: true,
|
|
41327
|
+
reason: `Jetson ${jetson.model} \u2014 ${effectiveTier} weights (${WEIGHT_REPOS[effectiveTier].sizeGB}GB)`,
|
|
41328
|
+
gpuName: jetson.model,
|
|
41329
|
+
vramGB,
|
|
41330
|
+
weightTier: effectiveTier,
|
|
41331
|
+
needsHfToken: WEIGHT_REPOS[effectiveTier].needsToken
|
|
41332
|
+
};
|
|
41333
|
+
}
|
|
41288
41334
|
try {
|
|
41289
41335
|
const nvsmi = execSync27("nvidia-smi --query-gpu=name,memory.total --format=csv,noheader,nounits", {
|
|
41290
41336
|
encoding: "utf8",
|
|
41291
41337
|
timeout: 5e3,
|
|
41292
41338
|
stdio: "pipe"
|
|
41293
41339
|
}).trim();
|
|
41294
|
-
if (!nvsmi)
|
|
41295
|
-
return
|
|
41296
|
-
}
|
|
41340
|
+
if (!nvsmi)
|
|
41341
|
+
return fail("No NVIDIA GPU detected");
|
|
41297
41342
|
const [gpuName, vramMB] = nvsmi.split("\n")[0].split(", ");
|
|
41298
41343
|
const vramGB = parseInt(vramMB ?? "0", 10) / 1024;
|
|
41299
|
-
if (vramGB <
|
|
41300
|
-
|
|
41301
|
-
if (isJetson) {
|
|
41302
|
-
try {
|
|
41303
|
-
const memInfo = execSync27("grep MemTotal /proc/meminfo", { encoding: "utf8", timeout: 3e3, stdio: "pipe" });
|
|
41304
|
-
const memKB = parseInt(memInfo.match(/(\d+)/)?.[1] ?? "0", 10);
|
|
41305
|
-
const totalGB = memKB / 1024 / 1024;
|
|
41306
|
-
if (totalGB >= 32) {
|
|
41307
|
-
return { supported: true, reason: `Jetson unified memory (${totalGB.toFixed(0)}GB total)`, gpuName: gpuName ?? "", vramGB: totalGB };
|
|
41308
|
-
}
|
|
41309
|
-
} catch {
|
|
41310
|
-
}
|
|
41311
|
-
}
|
|
41312
|
-
return { supported: false, reason: `GPU has ${vramGB.toFixed(1)}GB VRAM (need \u226516GB)`, gpuName: gpuName ?? "", vramGB };
|
|
41344
|
+
if (vramGB < 8) {
|
|
41345
|
+
return { ...fail(`GPU has ${vramGB.toFixed(1)}GB VRAM (need \u22658GB)`), gpuName: gpuName ?? "", vramGB };
|
|
41313
41346
|
}
|
|
41314
41347
|
try {
|
|
41315
41348
|
execSync27('python3 -c "import torch; assert torch.cuda.is_available()"', {
|
|
@@ -41317,11 +41350,22 @@ function detectPersonaPlexCapability() {
|
|
|
41317
41350
|
stdio: "pipe"
|
|
41318
41351
|
});
|
|
41319
41352
|
} catch {
|
|
41320
|
-
|
|
41353
|
+
const tier2 = selectWeightTier(vramGB);
|
|
41354
|
+
return { ...fail("PyTorch CUDA not available"), gpuName: gpuName ?? "", vramGB, weightTier: tier2, needsHfToken: WEIGHT_REPOS[tier2].needsToken };
|
|
41321
41355
|
}
|
|
41322
|
-
|
|
41356
|
+
const tier = selectWeightTier(vramGB);
|
|
41357
|
+
const hasHfToken = !!(process.env["HF_TOKEN"] || process.env["HUGGING_FACE_HUB_TOKEN"]);
|
|
41358
|
+
const effectiveTier = tier === "original" && !hasHfToken ? "nf4" : tier;
|
|
41359
|
+
return {
|
|
41360
|
+
supported: true,
|
|
41361
|
+
reason: `OK \u2014 ${effectiveTier} weights (${WEIGHT_REPOS[effectiveTier].sizeGB}GB)`,
|
|
41362
|
+
gpuName: gpuName ?? "",
|
|
41363
|
+
vramGB,
|
|
41364
|
+
weightTier: effectiveTier,
|
|
41365
|
+
needsHfToken: WEIGHT_REPOS[effectiveTier].needsToken
|
|
41366
|
+
};
|
|
41323
41367
|
} catch {
|
|
41324
|
-
return
|
|
41368
|
+
return fail("No NVIDIA GPU detected (nvidia-smi not found)");
|
|
41325
41369
|
}
|
|
41326
41370
|
}
|
|
41327
41371
|
function isPersonaPlexRunning() {
|
|
@@ -41348,15 +41392,36 @@ function getPersonaPlexWSUrl() {
|
|
|
41348
41392
|
function isPersonaPlexInstalled() {
|
|
41349
41393
|
return existsSync37(join54(PERSONAPLEX_DIR, "model_ready"));
|
|
41350
41394
|
}
|
|
41351
|
-
|
|
41395
|
+
function getWeightTier() {
|
|
41396
|
+
const tierFile = join54(PERSONAPLEX_DIR, "weight_tier");
|
|
41397
|
+
if (existsSync37(tierFile)) {
|
|
41398
|
+
const saved = readFileSync28(tierFile, "utf8").trim();
|
|
41399
|
+
if (saved in WEIGHT_REPOS)
|
|
41400
|
+
return saved;
|
|
41401
|
+
}
|
|
41402
|
+
return detectPersonaPlexCapability().weightTier;
|
|
41403
|
+
}
|
|
41404
|
+
function getWeightRepoInfo(tier) {
|
|
41405
|
+
return WEIGHT_REPOS[tier];
|
|
41406
|
+
}
|
|
41407
|
+
async function installPersonaPlex(onInfo, weightTier) {
|
|
41352
41408
|
const log = onInfo ?? (() => {
|
|
41353
41409
|
});
|
|
41354
41410
|
mkdirSync15(PERSONAPLEX_DIR, { recursive: true });
|
|
41411
|
+
let arch2 = "";
|
|
41412
|
+
try {
|
|
41413
|
+
arch2 = execSync27("uname -m", { encoding: "utf8", timeout: 3e3, stdio: "pipe" }).trim();
|
|
41414
|
+
} catch {
|
|
41415
|
+
}
|
|
41416
|
+
const isAarch64 = arch2 === "aarch64" || arch2 === "arm64";
|
|
41417
|
+
if (isAarch64)
|
|
41418
|
+
log(`Detected ARM64 platform (${arch2}) \u2014 Jetson/ARM install path`);
|
|
41355
41419
|
const venvDir = join54(PERSONAPLEX_DIR, "venv");
|
|
41356
41420
|
if (!existsSync37(venvDir)) {
|
|
41357
41421
|
log("Creating Python virtual environment...");
|
|
41358
41422
|
try {
|
|
41359
|
-
|
|
41423
|
+
const ssp = isAarch64 ? " --system-site-packages" : "";
|
|
41424
|
+
execSync27(`python3 -m venv${ssp} "${venvDir}"`, { timeout: 6e4, stdio: "pipe" });
|
|
41360
41425
|
} catch (err) {
|
|
41361
41426
|
log(`Failed to create venv: ${err instanceof Error ? err.message : String(err)}`);
|
|
41362
41427
|
return false;
|
|
@@ -41364,14 +41429,6 @@ async function installPersonaPlex(onInfo) {
|
|
|
41364
41429
|
}
|
|
41365
41430
|
const pip = process.platform === "win32" ? join54(venvDir, "Scripts", "pip.exe") : join54(venvDir, "bin", "pip");
|
|
41366
41431
|
const python = process.platform === "win32" ? join54(venvDir, "Scripts", "python.exe") : join54(venvDir, "bin", "python3");
|
|
41367
|
-
let arch2 = "";
|
|
41368
|
-
try {
|
|
41369
|
-
arch2 = execSync27("uname -m", { encoding: "utf8", timeout: 3e3, stdio: "pipe" }).trim();
|
|
41370
|
-
} catch {
|
|
41371
|
-
}
|
|
41372
|
-
const isAarch64 = arch2 === "aarch64" || arch2 === "arm64";
|
|
41373
|
-
if (isAarch64)
|
|
41374
|
-
log(`Detected ARM64 platform (${arch2}) \u2014 Jetson/ARM install path`);
|
|
41375
41432
|
log("Checking system dependencies (libopus)...");
|
|
41376
41433
|
try {
|
|
41377
41434
|
if (process.platform === "linux") {
|
|
@@ -41456,12 +41513,64 @@ async function installPersonaPlex(onInfo) {
|
|
|
41456
41513
|
execSync27(`"${pip}" install --quiet pyloudnorm noisereduce torchaudio`, { timeout: 12e4, stdio: "pipe" });
|
|
41457
41514
|
} catch {
|
|
41458
41515
|
}
|
|
41459
|
-
|
|
41460
|
-
|
|
41461
|
-
|
|
41516
|
+
const tier = weightTier ?? detectPersonaPlexCapability().weightTier;
|
|
41517
|
+
const repoInfo = WEIGHT_REPOS[tier];
|
|
41518
|
+
log(`Weight tier: ${tier} (${repoInfo.sizeGB}GB) \u2014 ${repoInfo.needsToken ? "requires HF_TOKEN" : "public, no token needed"}`);
|
|
41519
|
+
log(`Downloading PersonaPlex weights (${repoInfo.sizeGB}GB)...`);
|
|
41520
|
+
try {
|
|
41521
|
+
const tokenArg = repoInfo.needsToken ? "" : "--token ''";
|
|
41522
|
+
const dlCmd = `"${python}" -c "from huggingface_hub import hf_hub_download; f=hf_hub_download('${repoInfo.repo}', '${repoInfo.file}'${repoInfo.needsToken ? "" : ", token=False"}); print(f)"`;
|
|
41523
|
+
const weightPath = execSync27(dlCmd, {
|
|
41524
|
+
encoding: "utf8",
|
|
41525
|
+
timeout: 6e5,
|
|
41526
|
+
stdio: "pipe",
|
|
41527
|
+
env: { ...process.env }
|
|
41528
|
+
}).trim();
|
|
41529
|
+
log(`Weights downloaded: ${repoInfo.file}`);
|
|
41530
|
+
if (tier !== "original") {
|
|
41531
|
+
log("Downloading Mimi codec and tokenizer...");
|
|
41532
|
+
try {
|
|
41533
|
+
const hasToken = !!(process.env["HF_TOKEN"] || process.env["HUGGING_FACE_HUB_TOKEN"]);
|
|
41534
|
+
if (hasToken) {
|
|
41535
|
+
execSync27(`"${python}" -c "from huggingface_hub import hf_hub_download; hf_hub_download('nvidia/personaplex-7b-v1', 'tokenizer_spm_32k_3.model'); hf_hub_download('nvidia/personaplex-7b-v1', 'tokenizer-e351c8d8-checkpoint125.safetensors')"`, {
|
|
41536
|
+
timeout: 3e5,
|
|
41537
|
+
stdio: "pipe"
|
|
41538
|
+
});
|
|
41539
|
+
log("Codec + tokenizer downloaded.");
|
|
41540
|
+
} else {
|
|
41541
|
+
log("Note: Mimi codec needs HF_TOKEN on first run (set HF_TOKEN env var).");
|
|
41542
|
+
log("Weights themselves are public \u2014 no token needed for the model.");
|
|
41543
|
+
}
|
|
41544
|
+
} catch {
|
|
41545
|
+
}
|
|
41546
|
+
}
|
|
41547
|
+
} catch (err) {
|
|
41548
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
41549
|
+
if (repoInfo.needsToken && /401|403|gated|unauthorized/i.test(msg)) {
|
|
41550
|
+
log(`HF_TOKEN required for ${tier} weights. Set HF_TOKEN or accept license at https://huggingface.co/${repoInfo.repo}`);
|
|
41551
|
+
if (tier === "original") {
|
|
41552
|
+
log("Auto-downgrading to INT4 weights (no token required)...");
|
|
41553
|
+
const nf4 = WEIGHT_REPOS["nf4"];
|
|
41554
|
+
try {
|
|
41555
|
+
execSync27(`"${python}" -c "from huggingface_hub import hf_hub_download; hf_hub_download('${nf4.repo}', '${nf4.file}', token=False)"`, {
|
|
41556
|
+
timeout: 6e5,
|
|
41557
|
+
stdio: "pipe"
|
|
41558
|
+
});
|
|
41559
|
+
writeFileSync16(join54(PERSONAPLEX_DIR, "weight_tier"), "nf4");
|
|
41560
|
+
log(`Downloaded INT4 weights instead (${nf4.sizeGB}GB, public).`);
|
|
41561
|
+
} catch {
|
|
41562
|
+
log("Weight download failed.");
|
|
41563
|
+
return false;
|
|
41564
|
+
}
|
|
41565
|
+
}
|
|
41566
|
+
} else {
|
|
41567
|
+
log(`Weight download failed: ${msg}`);
|
|
41568
|
+
log("Weights will download on first server launch.");
|
|
41569
|
+
}
|
|
41462
41570
|
}
|
|
41571
|
+
writeFileSync16(join54(PERSONAPLEX_DIR, "weight_tier"), tier);
|
|
41463
41572
|
writeFileSync16(join54(PERSONAPLEX_DIR, "model_ready"), (/* @__PURE__ */ new Date()).toISOString());
|
|
41464
|
-
log(
|
|
41573
|
+
log(`PersonaPlex installed (${tier} tier). Use /call to start voice session.`);
|
|
41465
41574
|
return true;
|
|
41466
41575
|
}
|
|
41467
41576
|
async function startPersonaPlexDaemon(onInfo) {
|
|
@@ -41483,7 +41592,39 @@ async function startPersonaPlexDaemon(onInfo) {
|
|
|
41483
41592
|
const venvPython2 = process.platform === "win32" ? join54(PERSONAPLEX_DIR, "venv", "Scripts", "python.exe") : join54(PERSONAPLEX_DIR, "venv", "bin", "python3");
|
|
41484
41593
|
const sslDir = join54(PERSONAPLEX_DIR, "ssl");
|
|
41485
41594
|
mkdirSync15(sslDir, { recursive: true });
|
|
41486
|
-
|
|
41595
|
+
const tier = getWeightTier();
|
|
41596
|
+
const repoInfo = WEIGHT_REPOS[tier];
|
|
41597
|
+
const extraArgs = [];
|
|
41598
|
+
if (tier !== "original") {
|
|
41599
|
+
log(`Weight tier: ${tier} (${repoInfo.sizeGB}GB) \u2014 dequantizing to bf16 cache...`);
|
|
41600
|
+
const dequantScript = join54(PERSONAPLEX_DIR, "dequant-loader.py");
|
|
41601
|
+
const cachedBf16 = join54(PERSONAPLEX_DIR, "model-bf16-cache.safetensors");
|
|
41602
|
+
if (!existsSync37(dequantScript)) {
|
|
41603
|
+
const shipped = getShippedVoicesDir();
|
|
41604
|
+
if (shipped) {
|
|
41605
|
+
const src = join54(shipped, "dequant-loader.py");
|
|
41606
|
+
if (existsSync37(src))
|
|
41607
|
+
copyFileSync2(src, dequantScript);
|
|
41608
|
+
}
|
|
41609
|
+
}
|
|
41610
|
+
try {
|
|
41611
|
+
const weightPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', '${repoInfo.file}'${repoInfo.needsToken ? "" : ", token=False"}))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
|
|
41612
|
+
if (existsSync37(dequantScript) && existsSync37(weightPath)) {
|
|
41613
|
+
try {
|
|
41614
|
+
execSync27(`"${venvPython2}" "${dequantScript}" --input "${weightPath}" --output "${cachedBf16}"`, { timeout: 3e5, stdio: "pipe" });
|
|
41615
|
+
if (existsSync37(cachedBf16)) {
|
|
41616
|
+
extraArgs.push("--moshi-weight", cachedBf16);
|
|
41617
|
+
log(`Using dequantized cache: ${(statSync13(cachedBf16).size / 1024 ** 3).toFixed(1)}GB`);
|
|
41618
|
+
}
|
|
41619
|
+
} catch (e) {
|
|
41620
|
+
log(`Dequantization failed \u2014 server will try to load original weights`);
|
|
41621
|
+
}
|
|
41622
|
+
}
|
|
41623
|
+
} catch {
|
|
41624
|
+
log(`Weight file not found \u2014 server will download on first run`);
|
|
41625
|
+
}
|
|
41626
|
+
}
|
|
41627
|
+
log(`Starting PersonaPlex daemon (${tier} tier)...`);
|
|
41487
41628
|
const child = spawn19(venvPython2, [
|
|
41488
41629
|
"-m",
|
|
41489
41630
|
"moshi.server",
|
|
@@ -41494,7 +41635,8 @@ async function startPersonaPlexDaemon(onInfo) {
|
|
|
41494
41635
|
"--ssl",
|
|
41495
41636
|
sslDir,
|
|
41496
41637
|
"--device",
|
|
41497
|
-
"cuda"
|
|
41638
|
+
"cuda",
|
|
41639
|
+
...extraArgs
|
|
41498
41640
|
], {
|
|
41499
41641
|
stdio: ["ignore", "pipe", "pipe"],
|
|
41500
41642
|
detached: true,
|
|
@@ -41581,8 +41723,7 @@ function listPersonaPlexVoices() {
|
|
|
41581
41723
|
}
|
|
41582
41724
|
if (existsSync37(CUSTOM_VOICES_DIR)) {
|
|
41583
41725
|
try {
|
|
41584
|
-
const
|
|
41585
|
-
for (const f of readdirSync24(CUSTOM_VOICES_DIR)) {
|
|
41726
|
+
for (const f of readdirSync11(CUSTOM_VOICES_DIR)) {
|
|
41586
41727
|
if (f.endsWith(".pt")) {
|
|
41587
41728
|
const name = f.replace(/\.pt$/, "");
|
|
41588
41729
|
voices.push({ name, type: "custom", path: join54(CUSTOM_VOICES_DIR, f) });
|
|
@@ -41784,10 +41925,11 @@ async function autoSetupPersonaPlex(onInfo) {
|
|
|
41784
41925
|
log(`PersonaPlex not available: ${caps.reason}`);
|
|
41785
41926
|
return null;
|
|
41786
41927
|
}
|
|
41787
|
-
|
|
41928
|
+
const tierInfo = WEIGHT_REPOS[caps.weightTier];
|
|
41929
|
+
log(`GPU: ${caps.gpuName} (${caps.vramGB.toFixed(0)}GB) \u2192 ${caps.weightTier} weights (${tierInfo.sizeGB}GB${caps.needsHfToken ? "" : ", no HF token needed"})`);
|
|
41788
41930
|
if (!isPersonaPlexInstalled()) {
|
|
41789
41931
|
log("Installing PersonaPlex (first time setup)...");
|
|
41790
|
-
const ok = await installPersonaPlex(log);
|
|
41932
|
+
const ok = await installPersonaPlex(log, caps.weightTier);
|
|
41791
41933
|
if (!ok) {
|
|
41792
41934
|
log("PersonaPlex installation failed.");
|
|
41793
41935
|
return null;
|
|
@@ -41807,11 +41949,16 @@ async function autoSetupPersonaPlex(onInfo) {
|
|
|
41807
41949
|
}
|
|
41808
41950
|
return await startPersonaPlexDaemon(log);
|
|
41809
41951
|
}
|
|
41810
|
-
var PERSONAPLEX_DIR, PID_FILE, PORT_FILE, LOG_FILE, CUSTOM_VOICES_DIR;
|
|
41952
|
+
var WEIGHT_REPOS, PERSONAPLEX_DIR, PID_FILE, PORT_FILE, LOG_FILE, CUSTOM_VOICES_DIR;
|
|
41811
41953
|
var init_personaplex = __esm({
|
|
41812
41954
|
"packages/cli/dist/tui/personaplex.js"() {
|
|
41813
41955
|
"use strict";
|
|
41814
41956
|
init_render();
|
|
41957
|
+
WEIGHT_REPOS = {
|
|
41958
|
+
original: { repo: "nvidia/personaplex-7b-v1", file: "model.safetensors", sizeGB: 15.6, needsToken: true },
|
|
41959
|
+
nf4: { repo: "cudabenchmarktest/personaplex-7b-nf4", file: "model-nf4.safetensors", sizeGB: 4.1, needsToken: false },
|
|
41960
|
+
turbo2bit: { repo: "cudabenchmarktest/personaplex-7b-turbo2bit", file: "model-turbo2bit.safetensors", sizeGB: 2.1, needsToken: false }
|
|
41961
|
+
};
|
|
41815
41962
|
PERSONAPLEX_DIR = join54(homedir13(), ".open-agents", "voice", "personaplex");
|
|
41816
41963
|
PID_FILE = join54(PERSONAPLEX_DIR, "daemon.pid");
|
|
41817
41964
|
PORT_FILE = join54(PERSONAPLEX_DIR, "daemon.port");
|
|
@@ -45327,7 +45474,7 @@ __export(voice_exports, {
|
|
|
45327
45474
|
registerCustomOnnxModel: () => registerCustomOnnxModel,
|
|
45328
45475
|
resetNarrationContext: () => resetNarrationContext
|
|
45329
45476
|
});
|
|
45330
|
-
import { existsSync as existsSync42, mkdirSync as mkdirSync18, writeFileSync as writeFileSync19, readFileSync as readFileSync31, unlinkSync as unlinkSync9, readdirSync as readdirSync12, renameSync, statSync as
|
|
45477
|
+
import { existsSync as existsSync42, mkdirSync as mkdirSync18, writeFileSync as writeFileSync19, readFileSync as readFileSync31, unlinkSync as unlinkSync9, readdirSync as readdirSync12, renameSync, statSync as statSync14 } from "node:fs";
|
|
45331
45478
|
import { join as join58, dirname as dirname19 } from "node:path";
|
|
45332
45479
|
import { homedir as homedir15, tmpdir as tmpdir9, platform as platform3 } from "node:os";
|
|
45333
45480
|
import { execSync as execSync30, spawn as nodeSpawn } from "node:child_process";
|
|
@@ -46461,7 +46608,7 @@ var init_voice = __esm({
|
|
|
46461
46608
|
const p = join58(dir, f);
|
|
46462
46609
|
let size = 0;
|
|
46463
46610
|
try {
|
|
46464
|
-
size =
|
|
46611
|
+
size = statSync14(p).size;
|
|
46465
46612
|
} catch {
|
|
46466
46613
|
}
|
|
46467
46614
|
return {
|
|
@@ -48077,7 +48224,7 @@ Error: ${err instanceof Error ? err.message : String(err)}`);
|
|
|
48077
48224
|
// packages/cli/dist/tui/commands.js
|
|
48078
48225
|
import * as nodeOs from "node:os";
|
|
48079
48226
|
import { execSync as nodeExecSync } from "node:child_process";
|
|
48080
|
-
import { existsSync as existsSync43, readFileSync as readFileSync32, writeFileSync as writeFileSync20, mkdirSync as mkdirSync19, readdirSync as readdirSync13, statSync as
|
|
48227
|
+
import { existsSync as existsSync43, readFileSync as readFileSync32, writeFileSync as writeFileSync20, mkdirSync as mkdirSync19, readdirSync as readdirSync13, statSync as statSync15, rmSync } from "node:fs";
|
|
48081
48228
|
import { join as join59 } from "node:path";
|
|
48082
48229
|
function safeLog(text) {
|
|
48083
48230
|
if (isNeovimActive()) {
|
|
@@ -48890,7 +49037,7 @@ async function handleSlashCommand(input, ctx) {
|
|
|
48890
49037
|
ipfsFiles = files.length;
|
|
48891
49038
|
for (const f of files) {
|
|
48892
49039
|
try {
|
|
48893
|
-
ipfsBytes +=
|
|
49040
|
+
ipfsBytes += statSync15(join59(ipfsLocalDir, f)).size;
|
|
48894
49041
|
} catch {
|
|
48895
49042
|
}
|
|
48896
49043
|
}
|
|
@@ -48904,7 +49051,7 @@ async function handleSlashCommand(input, ctx) {
|
|
|
48904
49051
|
else {
|
|
48905
49052
|
heliaBlocks++;
|
|
48906
49053
|
try {
|
|
48907
|
-
heliaBytes +=
|
|
49054
|
+
heliaBytes += statSync15(join59(dir, entry.name)).size;
|
|
48908
49055
|
} catch {
|
|
48909
49056
|
}
|
|
48910
49057
|
}
|
|
@@ -48997,7 +49144,7 @@ async function handleSlashCommand(input, ctx) {
|
|
|
48997
49144
|
const count = memStore.count();
|
|
48998
49145
|
lines.push(`
|
|
48999
49146
|
${c2.bold("Structured Memory (SQLite)")}`);
|
|
49000
|
-
lines.push(` Memories: ${c2.bold(String(count))} DB: ${c2.dim(formatFileSize(
|
|
49147
|
+
lines.push(` Memories: ${c2.bold(String(count))} DB: ${c2.dim(formatFileSize(statSync15(dbPath).size))}`);
|
|
49001
49148
|
cDb(db);
|
|
49002
49149
|
}
|
|
49003
49150
|
} catch {
|
|
@@ -49028,7 +49175,7 @@ async function handleSlashCommand(input, ctx) {
|
|
|
49028
49175
|
walkStorage(full, subCat);
|
|
49029
49176
|
} else {
|
|
49030
49177
|
try {
|
|
49031
|
-
const sz =
|
|
49178
|
+
const sz = statSync15(full).size;
|
|
49032
49179
|
totalBytes += sz;
|
|
49033
49180
|
if (!categories[category])
|
|
49034
49181
|
categories[category] = { files: 0, bytes: 0 };
|
|
@@ -49329,7 +49476,7 @@ async function handleSlashCommand(input, ctx) {
|
|
|
49329
49476
|
const caps = detectPersonaPlexCapability2();
|
|
49330
49477
|
if (!caps.supported) {
|
|
49331
49478
|
renderWarning(`PersonaPlex not available: ${caps.reason}`);
|
|
49332
|
-
renderInfo("Requirements: NVIDIA GPU with \
|
|
49479
|
+
renderInfo("Requirements: NVIDIA GPU with \u22658GB VRAM (RTX 3060+, Jetson AGX Orin), CUDA, PyTorch");
|
|
49333
49480
|
return "handled";
|
|
49334
49481
|
}
|
|
49335
49482
|
renderInfo(`GPU: ${caps.gpuName} (${caps.vramGB.toFixed(0)}GB VRAM) \u2014 PersonaPlex compatible \u2713`);
|
|
@@ -50998,7 +51145,7 @@ async function showCohereDashboard(ctx) {
|
|
|
50998
51145
|
const snapItems = snaps.slice(0, 20).map((f) => ({
|
|
50999
51146
|
key: f,
|
|
51000
51147
|
label: f.replace(".json", ""),
|
|
51001
|
-
detail: `${formatFileSize(
|
|
51148
|
+
detail: `${formatFileSize(statSync15(join59(snapDir, f)).size)}`
|
|
51002
51149
|
}));
|
|
51003
51150
|
if (snapItems.length > 0) {
|
|
51004
51151
|
await tuiSelect({
|
|
@@ -59275,7 +59422,7 @@ var init_tool_policy = __esm({
|
|
|
59275
59422
|
});
|
|
59276
59423
|
|
|
59277
59424
|
// packages/cli/dist/tui/telegram-bridge.js
|
|
59278
|
-
import { mkdirSync as mkdirSync25, existsSync as existsSync51, unlinkSync as unlinkSync11, readdirSync as readdirSync19, statSync as
|
|
59425
|
+
import { mkdirSync as mkdirSync25, existsSync as existsSync51, unlinkSync as unlinkSync11, readdirSync as readdirSync19, statSync as statSync16 } from "node:fs";
|
|
59279
59426
|
import { join as join68, resolve as resolve30 } from "node:path";
|
|
59280
59427
|
import { writeFile as writeFileAsync } from "node:fs/promises";
|
|
59281
59428
|
function convertMarkdownToTelegramHTML(md) {
|
|
@@ -71277,7 +71424,7 @@ __export(index_repo_exports, {
|
|
|
71277
71424
|
indexRepoCommand: () => indexRepoCommand
|
|
71278
71425
|
});
|
|
71279
71426
|
import { resolve as resolve34 } from "node:path";
|
|
71280
|
-
import { existsSync as existsSync56, statSync as
|
|
71427
|
+
import { existsSync as existsSync56, statSync as statSync17 } from "node:fs";
|
|
71281
71428
|
import { cwd as cwd2 } from "node:process";
|
|
71282
71429
|
async function indexRepoCommand(opts, _config) {
|
|
71283
71430
|
const repoRoot = resolve34(opts.repoPath ?? cwd2());
|
|
@@ -71287,7 +71434,7 @@ async function indexRepoCommand(opts, _config) {
|
|
|
71287
71434
|
printError(`Path does not exist: ${repoRoot}`);
|
|
71288
71435
|
process.exit(1);
|
|
71289
71436
|
}
|
|
71290
|
-
const stat5 =
|
|
71437
|
+
const stat5 = statSync17(repoRoot);
|
|
71291
71438
|
if (!stat5.isDirectory()) {
|
|
71292
71439
|
printError(`Path is not a directory: ${repoRoot}`);
|
|
71293
71440
|
process.exit(1);
|
package/package.json
CHANGED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
dequant-loader.py — Pre-dequantize quantized PersonaPlex weights to bf16 cache.
|
|
4
|
+
|
|
5
|
+
For NF4 (INT4) or TurboQuant 2-bit weights, dequantizes to a temporary
|
|
6
|
+
bf16 safetensors file that moshi.server can load natively.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python dequant-loader.py --input model-nf4.safetensors --output /tmp/model-bf16.safetensors
|
|
10
|
+
python dequant-loader.py --input model-turbo2bit.safetensors --output /tmp/model-bf16.safetensors
|
|
11
|
+
|
|
12
|
+
The output file can then be passed to moshi.server via --moshi-weight.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import os, sys, math, time
|
|
16
|
+
import torch
|
|
17
|
+
from safetensors.torch import load_file, save_file
|
|
18
|
+
|
|
19
|
+
NF2_CENTROIDS = torch.tensor([-1.5104, -0.4528, 0.4528, 1.5104])
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def fast_wht(x):
|
|
23
|
+
"""Vectorized Walsh-Hadamard Transform."""
|
|
24
|
+
n = x.shape[-1]
|
|
25
|
+
h = 1
|
|
26
|
+
while h < n:
|
|
27
|
+
x_view = x.view(*x.shape[:-1], -1, 2, h)
|
|
28
|
+
a = x_view[..., 0, :].clone()
|
|
29
|
+
b = x_view[..., 1, :].clone()
|
|
30
|
+
x_view[..., 0, :] = a + b
|
|
31
|
+
x_view[..., 1, :] = a - b
|
|
32
|
+
x = x_view.reshape(*x.shape)
|
|
33
|
+
h *= 2
|
|
34
|
+
return x / math.sqrt(n)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def detect_format(state):
|
|
38
|
+
"""Detect if weights are NF4 (INT4), TurboQuant 2-bit, or plain."""
|
|
39
|
+
has_scales = any(k.endswith(".__scales__") for k in state)
|
|
40
|
+
has_packed = any(k.endswith(".packed") for k in state)
|
|
41
|
+
if has_packed:
|
|
42
|
+
return "turbo2bit"
|
|
43
|
+
if has_scales:
|
|
44
|
+
return "nf4"
|
|
45
|
+
return "plain"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def dequant_nf4(state):
|
|
49
|
+
"""Dequantize INT4 NF4 weights."""
|
|
50
|
+
result = {}
|
|
51
|
+
processed = set()
|
|
52
|
+
|
|
53
|
+
for name in list(state.keys()):
|
|
54
|
+
if name.endswith(".__scales__") or name.endswith(".__shape__") or name.endswith(".__numel__"):
|
|
55
|
+
continue
|
|
56
|
+
if name in processed:
|
|
57
|
+
continue
|
|
58
|
+
|
|
59
|
+
scales_key = f"{name}.__scales__"
|
|
60
|
+
if scales_key in state:
|
|
61
|
+
packed = state[name]
|
|
62
|
+
scales = state[scales_key].float()
|
|
63
|
+
shape = state[f"{name}.__shape__"].tolist()
|
|
64
|
+
numel = state[f"{name}.__numel__"].item()
|
|
65
|
+
group_size = 64
|
|
66
|
+
|
|
67
|
+
lo = (packed & 0x0F).to(torch.int8) - 8
|
|
68
|
+
hi = ((packed >> 4) & 0x0F).to(torch.int8) - 8
|
|
69
|
+
unpacked = torch.zeros(packed.numel() * 2, dtype=torch.float32)
|
|
70
|
+
unpacked[0::2] = lo.float()
|
|
71
|
+
unpacked[1::2] = hi.float()
|
|
72
|
+
|
|
73
|
+
n_groups = scales.numel()
|
|
74
|
+
groups = unpacked[:n_groups * group_size].reshape(n_groups, group_size)
|
|
75
|
+
deq = (groups * scales.unsqueeze(1)).reshape(-1)[:numel]
|
|
76
|
+
|
|
77
|
+
orig_shape = [s for s in shape if s > 0]
|
|
78
|
+
result[name] = deq.reshape(orig_shape).to(torch.bfloat16)
|
|
79
|
+
processed.add(name)
|
|
80
|
+
else:
|
|
81
|
+
result[name] = state[name].to(torch.bfloat16)
|
|
82
|
+
processed.add(name)
|
|
83
|
+
|
|
84
|
+
return result
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def dequant_turbo2bit(state):
|
|
88
|
+
"""Dequantize TurboQuant 2-bit (NF2 + WHT) weights."""
|
|
89
|
+
result = {}
|
|
90
|
+
processed = set()
|
|
91
|
+
|
|
92
|
+
for name in list(state.keys()):
|
|
93
|
+
if any(name.endswith(f".{s}") for s in ["packed", "scales", "shape", "numel", "gs", "np2"]):
|
|
94
|
+
continue
|
|
95
|
+
if name in processed:
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
packed_key = f"{name}.packed"
|
|
99
|
+
if packed_key in state:
|
|
100
|
+
gs = state[f"{name}.gs"].item()
|
|
101
|
+
gs_pow2 = state[f"{name}.np2"].item()
|
|
102
|
+
numel = state[f"{name}.numel"].item()
|
|
103
|
+
shape = [s for s in state[f"{name}.shape"].tolist() if s > 0]
|
|
104
|
+
scales = state[f"{name}.scales"].float()
|
|
105
|
+
packed = state[packed_key]
|
|
106
|
+
n_groups = scales.numel()
|
|
107
|
+
|
|
108
|
+
# Unpack 2-bit
|
|
109
|
+
p = packed.reshape(n_groups, gs // 4)
|
|
110
|
+
codes = torch.zeros(n_groups, gs, dtype=torch.long)
|
|
111
|
+
for i in range(4):
|
|
112
|
+
codes[:, i::4] = (p >> (2 * i)) & 0x03
|
|
113
|
+
|
|
114
|
+
dequant = NF2_CENTROIDS[codes]
|
|
115
|
+
|
|
116
|
+
# Inverse WHT
|
|
117
|
+
if gs_pow2 > gs:
|
|
118
|
+
dequant = torch.cat([dequant, torch.zeros(n_groups, gs_pow2 - gs)], dim=1)
|
|
119
|
+
dequant = fast_wht(dequant)
|
|
120
|
+
dequant = dequant[:, :gs]
|
|
121
|
+
|
|
122
|
+
dequant = dequant * scales.unsqueeze(1)
|
|
123
|
+
result[name] = dequant.reshape(-1)[:numel].reshape(shape).to(torch.bfloat16)
|
|
124
|
+
processed.add(name)
|
|
125
|
+
else:
|
|
126
|
+
result[name] = state[name].to(torch.bfloat16)
|
|
127
|
+
processed.add(name)
|
|
128
|
+
|
|
129
|
+
return result
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def main():
|
|
133
|
+
import argparse
|
|
134
|
+
parser = argparse.ArgumentParser(description="Dequantize PersonaPlex weights to bf16")
|
|
135
|
+
parser.add_argument("--input", "-i", required=True, help="Quantized safetensors file")
|
|
136
|
+
parser.add_argument("--output", "-o", required=True, help="Output bf16 safetensors file")
|
|
137
|
+
parser.add_argument("--device", "-d", default="cpu", help="Device for dequantization")
|
|
138
|
+
args = parser.parse_args()
|
|
139
|
+
|
|
140
|
+
if not os.path.exists(args.input):
|
|
141
|
+
print(f"Error: {args.input} not found")
|
|
142
|
+
sys.exit(1)
|
|
143
|
+
|
|
144
|
+
# Skip if output already exists and is newer than input
|
|
145
|
+
if os.path.exists(args.output) and os.path.getmtime(args.output) > os.path.getmtime(args.input):
|
|
146
|
+
print(f"Cached: {args.output} is up to date")
|
|
147
|
+
sys.exit(0)
|
|
148
|
+
|
|
149
|
+
print(f"Loading {args.input}...")
|
|
150
|
+
t0 = time.time()
|
|
151
|
+
state = load_file(args.input, device=args.device)
|
|
152
|
+
|
|
153
|
+
fmt = detect_format(state)
|
|
154
|
+
print(f"Format: {fmt}")
|
|
155
|
+
|
|
156
|
+
if fmt == "nf4":
|
|
157
|
+
result = dequant_nf4(state)
|
|
158
|
+
elif fmt == "turbo2bit":
|
|
159
|
+
result = dequant_turbo2bit(state)
|
|
160
|
+
else:
|
|
161
|
+
print("Already plain bf16/fp16 — copying")
|
|
162
|
+
result = {k: v.to(torch.bfloat16) for k, v in state.items()}
|
|
163
|
+
|
|
164
|
+
t1 = time.time()
|
|
165
|
+
print(f"Dequantized {len(result)} tensors in {t1-t0:.1f}s")
|
|
166
|
+
|
|
167
|
+
print(f"Saving to {args.output}...")
|
|
168
|
+
save_file(result, args.output)
|
|
169
|
+
size_gb = os.path.getsize(args.output) / 1024**3
|
|
170
|
+
print(f"Done: {size_gb:.2f} GB")
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
if __name__ == "__main__":
|
|
174
|
+
main()
|