open-agents-ai 0.185.31 → 0.185.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +212 -104
- package/package.json +1 -1
- package/voices/personaplex/dequant-loader.py +174 -0
package/dist/index.js
CHANGED
|
@@ -8663,7 +8663,7 @@ process.on('SIGINT', () => process.emit('SIGTERM'));
|
|
|
8663
8663
|
const nodeModulesDir = resolve13(this.repoRoot, "node_modules");
|
|
8664
8664
|
let nexusResolved = false;
|
|
8665
8665
|
let installedVersion = "";
|
|
8666
|
-
const
|
|
8666
|
+
const execAsync3 = (cmd, opts = {}) => new Promise((res, rej) => {
|
|
8667
8667
|
const { exec: ex } = __require("node:child_process");
|
|
8668
8668
|
ex(cmd, { encoding: "utf8", timeout: opts.timeout ?? 3e4, cwd: opts.cwd, maxBuffer: 10 * 1024 * 1024 }, (err, stdout) => {
|
|
8669
8669
|
if (err)
|
|
@@ -8683,7 +8683,7 @@ process.on('SIGINT', () => process.emit('SIGTERM'));
|
|
|
8683
8683
|
}
|
|
8684
8684
|
} else {
|
|
8685
8685
|
try {
|
|
8686
|
-
const globalDir2 = await
|
|
8686
|
+
const globalDir2 = await execAsync3("npm root -g", { timeout: 5e3 });
|
|
8687
8687
|
const globalPkg = join14(globalDir2, "open-agents-nexus", "package.json");
|
|
8688
8688
|
if (existsSync11(globalPkg)) {
|
|
8689
8689
|
nexusResolved = true;
|
|
@@ -8700,10 +8700,10 @@ process.on('SIGINT', () => process.emit('SIGTERM'));
|
|
|
8700
8700
|
}
|
|
8701
8701
|
if (nexusResolved && installedVersion) {
|
|
8702
8702
|
try {
|
|
8703
|
-
const latestRaw = await
|
|
8703
|
+
const latestRaw = await execAsync3("npm view open-agents-nexus version 2>/dev/null", { timeout: 8e3 });
|
|
8704
8704
|
if (latestRaw && latestRaw !== installedVersion) {
|
|
8705
8705
|
try {
|
|
8706
|
-
await
|
|
8706
|
+
await execAsync3(`npm install open-agents-nexus@${latestRaw} --save 2>&1`, {
|
|
8707
8707
|
cwd: this.repoRoot,
|
|
8708
8708
|
timeout: 6e4
|
|
8709
8709
|
});
|
|
@@ -8716,13 +8716,13 @@ process.on('SIGINT', () => process.emit('SIGTERM'));
|
|
|
8716
8716
|
}
|
|
8717
8717
|
if (!nexusResolved) {
|
|
8718
8718
|
try {
|
|
8719
|
-
await
|
|
8719
|
+
await execAsync3("npm install open-agents-nexus@latest 2>&1", {
|
|
8720
8720
|
cwd: this.repoRoot,
|
|
8721
8721
|
timeout: 12e4
|
|
8722
8722
|
});
|
|
8723
8723
|
} catch {
|
|
8724
8724
|
try {
|
|
8725
|
-
await
|
|
8725
|
+
await execAsync3("npm install -g open-agents-nexus@latest 2>&1", { timeout: 12e4 });
|
|
8726
8726
|
} catch {
|
|
8727
8727
|
throw new Error("Failed to install open-agents-nexus. Run: npm install open-agents-nexus");
|
|
8728
8728
|
}
|
|
@@ -8767,7 +8767,7 @@ process.on('SIGINT', () => process.emit('SIGTERM'));
|
|
|
8767
8767
|
const agentType = args.agent_type || "general";
|
|
8768
8768
|
const nodePaths = [nodeModulesDir];
|
|
8769
8769
|
try {
|
|
8770
|
-
const globalDir2 = await
|
|
8770
|
+
const globalDir2 = await execAsync3("npm root -g", { timeout: 5e3 });
|
|
8771
8771
|
nodePaths.push(globalDir2);
|
|
8772
8772
|
} catch {
|
|
8773
8773
|
}
|
|
@@ -25041,9 +25041,9 @@ var init_verifierRunner = __esm({
|
|
|
25041
25041
|
async executeTests(patch, repoRoot) {
|
|
25042
25042
|
if (patch.testsToRun.length === 0)
|
|
25043
25043
|
return "(no tests specified)";
|
|
25044
|
-
const { execFile:
|
|
25044
|
+
const { execFile: execFile8 } = await import("node:child_process");
|
|
25045
25045
|
const { promisify: promisify7 } = await import("node:util");
|
|
25046
|
-
const execFileAsync6 = promisify7(
|
|
25046
|
+
const execFileAsync6 = promisify7(execFile8);
|
|
25047
25047
|
const outputs = [];
|
|
25048
25048
|
const workDir = this.options.workingDir || repoRoot;
|
|
25049
25049
|
for (const cmd of patch.testsToRun.slice(0, 3)) {
|
|
@@ -41281,11 +41281,35 @@ __export(personaplex_exports, {
|
|
|
41281
41281
|
startPersonaPlexDaemon: () => startPersonaPlexDaemon,
|
|
41282
41282
|
stopPersonaPlex: () => stopPersonaPlex
|
|
41283
41283
|
});
|
|
41284
|
-
import { existsSync as existsSync37, writeFileSync as writeFileSync16, readFileSync as readFileSync28, mkdirSync as mkdirSync15, copyFileSync as copyFileSync2, readdirSync as readdirSync11 } from "node:fs";
|
|
41284
|
+
import { existsSync as existsSync37, writeFileSync as writeFileSync16, readFileSync as readFileSync28, mkdirSync as mkdirSync15, copyFileSync as copyFileSync2, readdirSync as readdirSync11, statSync as statSync13 } from "node:fs";
|
|
41285
41285
|
import { join as join54, dirname as dirname18 } from "node:path";
|
|
41286
41286
|
import { homedir as homedir13 } from "node:os";
|
|
41287
|
-
import { execSync as execSync27, spawn as spawn19 } from "node:child_process";
|
|
41287
|
+
import { execSync as execSync27, spawn as spawn19, execFile as execFile7 } from "node:child_process";
|
|
41288
41288
|
import { fileURLToPath as fileURLToPath11 } from "node:url";
|
|
41289
|
+
function execAsync(cmd, opts = {}) {
|
|
41290
|
+
return new Promise((resolve36, reject) => {
|
|
41291
|
+
const child = spawn19("bash", ["-c", cmd], {
|
|
41292
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
41293
|
+
timeout: opts.timeout ?? 3e5,
|
|
41294
|
+
env: opts.env ?? process.env
|
|
41295
|
+
});
|
|
41296
|
+
let stdout = "";
|
|
41297
|
+
let stderr = "";
|
|
41298
|
+
child.stdout?.on("data", (d) => {
|
|
41299
|
+
stdout += d.toString();
|
|
41300
|
+
});
|
|
41301
|
+
child.stderr?.on("data", (d) => {
|
|
41302
|
+
stderr += d.toString();
|
|
41303
|
+
});
|
|
41304
|
+
child.on("close", (code) => {
|
|
41305
|
+
if (code === 0)
|
|
41306
|
+
resolve36(stdout.trim());
|
|
41307
|
+
else
|
|
41308
|
+
reject(new Error(`Exit ${code}: ${stderr.slice(0, 500)}`));
|
|
41309
|
+
});
|
|
41310
|
+
child.on("error", reject);
|
|
41311
|
+
});
|
|
41312
|
+
}
|
|
41289
41313
|
function selectWeightTier(vramGB) {
|
|
41290
41314
|
if (vramGB >= 48)
|
|
41291
41315
|
return "original";
|
|
@@ -41293,31 +41317,56 @@ function selectWeightTier(vramGB) {
|
|
|
41293
41317
|
return "nf4";
|
|
41294
41318
|
return "turbo2bit";
|
|
41295
41319
|
}
|
|
41320
|
+
function detectJetson() {
|
|
41321
|
+
try {
|
|
41322
|
+
const model = readFileSync28("/proc/device-tree/model", "utf8").replace(/\0/g, "").trim();
|
|
41323
|
+
if (/jetson|orin|tegra/i.test(model)) {
|
|
41324
|
+
const memInfo = execSync27("grep MemTotal /proc/meminfo", { encoding: "utf8", timeout: 3e3, stdio: "pipe" });
|
|
41325
|
+
const memKB = parseInt(memInfo.match(/(\d+)/)?.[1] ?? "0", 10);
|
|
41326
|
+
return { isJetson: true, model, totalMemGB: memKB / 1024 / 1024 };
|
|
41327
|
+
}
|
|
41328
|
+
} catch {
|
|
41329
|
+
}
|
|
41330
|
+
return { isJetson: false, model: "", totalMemGB: 0 };
|
|
41331
|
+
}
|
|
41296
41332
|
function detectPersonaPlexCapability() {
|
|
41333
|
+
const fail = (reason) => ({
|
|
41334
|
+
supported: false,
|
|
41335
|
+
reason,
|
|
41336
|
+
gpuName: "",
|
|
41337
|
+
vramGB: 0,
|
|
41338
|
+
weightTier: "turbo2bit",
|
|
41339
|
+
needsHfToken: false
|
|
41340
|
+
});
|
|
41341
|
+
const jetson = detectJetson();
|
|
41342
|
+
if (jetson.isJetson) {
|
|
41343
|
+
const vramGB = jetson.totalMemGB;
|
|
41344
|
+
if (vramGB < 8)
|
|
41345
|
+
return { ...fail(`Jetson has ${vramGB.toFixed(0)}GB unified memory (need \u22658GB)`), gpuName: jetson.model, vramGB };
|
|
41346
|
+
const tier = selectWeightTier(vramGB);
|
|
41347
|
+
const hasHfToken = !!(process.env["HF_TOKEN"] || process.env["HUGGING_FACE_HUB_TOKEN"]);
|
|
41348
|
+
const effectiveTier = tier === "original" && !hasHfToken ? "nf4" : tier;
|
|
41349
|
+
return {
|
|
41350
|
+
supported: true,
|
|
41351
|
+
reason: `Jetson ${jetson.model} \u2014 ${effectiveTier} weights (${WEIGHT_REPOS[effectiveTier].sizeGB}GB)`,
|
|
41352
|
+
gpuName: jetson.model,
|
|
41353
|
+
vramGB,
|
|
41354
|
+
weightTier: effectiveTier,
|
|
41355
|
+
needsHfToken: WEIGHT_REPOS[effectiveTier].needsToken
|
|
41356
|
+
};
|
|
41357
|
+
}
|
|
41297
41358
|
try {
|
|
41298
41359
|
const nvsmi = execSync27("nvidia-smi --query-gpu=name,memory.total --format=csv,noheader,nounits", {
|
|
41299
41360
|
encoding: "utf8",
|
|
41300
41361
|
timeout: 5e3,
|
|
41301
41362
|
stdio: "pipe"
|
|
41302
41363
|
}).trim();
|
|
41303
|
-
if (!nvsmi)
|
|
41304
|
-
return
|
|
41305
|
-
}
|
|
41364
|
+
if (!nvsmi)
|
|
41365
|
+
return fail("No NVIDIA GPU detected");
|
|
41306
41366
|
const [gpuName, vramMB] = nvsmi.split("\n")[0].split(", ");
|
|
41307
|
-
|
|
41308
|
-
const isJetson = /orin|tegra|jetson/i.test(gpuName ?? "");
|
|
41309
|
-
if (isJetson) {
|
|
41310
|
-
try {
|
|
41311
|
-
const memInfo = execSync27("grep MemTotal /proc/meminfo", { encoding: "utf8", timeout: 3e3, stdio: "pipe" });
|
|
41312
|
-
const memKB = parseInt(memInfo.match(/(\d+)/)?.[1] ?? "0", 10);
|
|
41313
|
-
const totalGB = memKB / 1024 / 1024;
|
|
41314
|
-
if (totalGB > vramGB)
|
|
41315
|
-
vramGB = totalGB;
|
|
41316
|
-
} catch {
|
|
41317
|
-
}
|
|
41318
|
-
}
|
|
41367
|
+
const vramGB = parseInt(vramMB ?? "0", 10) / 1024;
|
|
41319
41368
|
if (vramGB < 8) {
|
|
41320
|
-
return {
|
|
41369
|
+
return { ...fail(`GPU has ${vramGB.toFixed(1)}GB VRAM (need \u22658GB)`), gpuName: gpuName ?? "", vramGB };
|
|
41321
41370
|
}
|
|
41322
41371
|
try {
|
|
41323
41372
|
execSync27('python3 -c "import torch; assert torch.cuda.is_available()"', {
|
|
@@ -41326,7 +41375,7 @@ function detectPersonaPlexCapability() {
|
|
|
41326
41375
|
});
|
|
41327
41376
|
} catch {
|
|
41328
41377
|
const tier2 = selectWeightTier(vramGB);
|
|
41329
|
-
return {
|
|
41378
|
+
return { ...fail("PyTorch CUDA not available"), gpuName: gpuName ?? "", vramGB, weightTier: tier2, needsHfToken: WEIGHT_REPOS[tier2].needsToken };
|
|
41330
41379
|
}
|
|
41331
41380
|
const tier = selectWeightTier(vramGB);
|
|
41332
41381
|
const hasHfToken = !!(process.env["HF_TOKEN"] || process.env["HUGGING_FACE_HUB_TOKEN"]);
|
|
@@ -41340,7 +41389,7 @@ function detectPersonaPlexCapability() {
|
|
|
41340
41389
|
needsHfToken: WEIGHT_REPOS[effectiveTier].needsToken
|
|
41341
41390
|
};
|
|
41342
41391
|
} catch {
|
|
41343
|
-
return
|
|
41392
|
+
return fail("No NVIDIA GPU detected (nvidia-smi not found)");
|
|
41344
41393
|
}
|
|
41345
41394
|
}
|
|
41346
41395
|
function isPersonaPlexRunning() {
|
|
@@ -41383,11 +41432,20 @@ async function installPersonaPlex(onInfo, weightTier) {
|
|
|
41383
41432
|
const log = onInfo ?? (() => {
|
|
41384
41433
|
});
|
|
41385
41434
|
mkdirSync15(PERSONAPLEX_DIR, { recursive: true });
|
|
41435
|
+
let arch2 = "";
|
|
41436
|
+
try {
|
|
41437
|
+
arch2 = execSync27("uname -m", { encoding: "utf8", timeout: 3e3, stdio: "pipe" }).trim();
|
|
41438
|
+
} catch {
|
|
41439
|
+
}
|
|
41440
|
+
const isAarch64 = arch2 === "aarch64" || arch2 === "arm64";
|
|
41441
|
+
if (isAarch64)
|
|
41442
|
+
log(`Detected ARM64 platform (${arch2}) \u2014 Jetson/ARM install path`);
|
|
41386
41443
|
const venvDir = join54(PERSONAPLEX_DIR, "venv");
|
|
41387
41444
|
if (!existsSync37(venvDir)) {
|
|
41388
41445
|
log("Creating Python virtual environment...");
|
|
41389
41446
|
try {
|
|
41390
|
-
|
|
41447
|
+
const ssp = isAarch64 ? " --system-site-packages" : "";
|
|
41448
|
+
await execAsync(`python3 -m venv${ssp} "${venvDir}"`, { timeout: 6e4 });
|
|
41391
41449
|
} catch (err) {
|
|
41392
41450
|
log(`Failed to create venv: ${err instanceof Error ? err.message : String(err)}`);
|
|
41393
41451
|
return false;
|
|
@@ -41395,14 +41453,6 @@ async function installPersonaPlex(onInfo, weightTier) {
|
|
|
41395
41453
|
}
|
|
41396
41454
|
const pip = process.platform === "win32" ? join54(venvDir, "Scripts", "pip.exe") : join54(venvDir, "bin", "pip");
|
|
41397
41455
|
const python = process.platform === "win32" ? join54(venvDir, "Scripts", "python.exe") : join54(venvDir, "bin", "python3");
|
|
41398
|
-
let arch2 = "";
|
|
41399
|
-
try {
|
|
41400
|
-
arch2 = execSync27("uname -m", { encoding: "utf8", timeout: 3e3, stdio: "pipe" }).trim();
|
|
41401
|
-
} catch {
|
|
41402
|
-
}
|
|
41403
|
-
const isAarch64 = arch2 === "aarch64" || arch2 === "arm64";
|
|
41404
|
-
if (isAarch64)
|
|
41405
|
-
log(`Detected ARM64 platform (${arch2}) \u2014 Jetson/ARM install path`);
|
|
41406
41456
|
log("Checking system dependencies (libopus)...");
|
|
41407
41457
|
try {
|
|
41408
41458
|
if (process.platform === "linux") {
|
|
@@ -41419,7 +41469,7 @@ async function installPersonaPlex(onInfo, weightTier) {
|
|
|
41419
41469
|
} catch {
|
|
41420
41470
|
log("ARM64: Installing Rust toolchain (needed for sphn audio codec)...");
|
|
41421
41471
|
try {
|
|
41422
|
-
|
|
41472
|
+
await execAsync("curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y", { timeout: 12e4 });
|
|
41423
41473
|
} catch (e) {
|
|
41424
41474
|
log(`Rust install failed: ${e instanceof Error ? e.message : String(e)}`);
|
|
41425
41475
|
log("Install Rust manually: curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh");
|
|
@@ -41427,7 +41477,7 @@ async function installPersonaPlex(onInfo, weightTier) {
|
|
|
41427
41477
|
}
|
|
41428
41478
|
}
|
|
41429
41479
|
try {
|
|
41430
|
-
|
|
41480
|
+
await execAsync(`"${pip}" install --quiet maturin`, { timeout: 6e4, stdio: "pipe" });
|
|
41431
41481
|
} catch {
|
|
41432
41482
|
}
|
|
41433
41483
|
}
|
|
@@ -41435,13 +41485,13 @@ async function installPersonaPlex(onInfo, weightTier) {
|
|
|
41435
41485
|
const repoDir = join54(PERSONAPLEX_DIR, "personaplex-repo");
|
|
41436
41486
|
try {
|
|
41437
41487
|
if (!existsSync37(repoDir)) {
|
|
41438
|
-
|
|
41488
|
+
await execAsync(`git clone https://github.com/NVIDIA/personaplex.git "${repoDir}"`, { timeout: 12e4 });
|
|
41439
41489
|
}
|
|
41440
41490
|
if (isAarch64) {
|
|
41441
41491
|
log("ARM64: Building sphn from source (Opus codec bindings)...");
|
|
41442
41492
|
try {
|
|
41443
41493
|
const rustEnv = `export PATH="$HOME/.cargo/bin:$PATH" &&`;
|
|
41444
|
-
|
|
41494
|
+
await execAsync(`${rustEnv} "${pip}" install --quiet --no-binary sphn sphn`, { timeout: 3e5 });
|
|
41445
41495
|
log("ARM64: sphn built successfully");
|
|
41446
41496
|
} catch (e) {
|
|
41447
41497
|
log(`ARM64: sphn build failed \u2014 ${e instanceof Error ? e.message : String(e)}`);
|
|
@@ -41449,11 +41499,11 @@ async function installPersonaPlex(onInfo, weightTier) {
|
|
|
41449
41499
|
return false;
|
|
41450
41500
|
}
|
|
41451
41501
|
}
|
|
41452
|
-
|
|
41502
|
+
await execAsync(`"${pip}" install --quiet "${join54(repoDir, "moshi")}/."`, { timeout: 3e5 });
|
|
41453
41503
|
} catch (err) {
|
|
41454
41504
|
log(`Moshi install failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
41455
41505
|
try {
|
|
41456
|
-
|
|
41506
|
+
await execAsync(`"${pip}" install --quiet torch torchaudio websockets soundfile huggingface_hub`, { timeout: 3e5, stdio: "pipe" });
|
|
41457
41507
|
} catch {
|
|
41458
41508
|
}
|
|
41459
41509
|
return false;
|
|
@@ -41479,12 +41529,12 @@ async function installPersonaPlex(onInfo, weightTier) {
|
|
|
41479
41529
|
if (isAarch64) {
|
|
41480
41530
|
log("ARM64: Installing bitsandbytes for INT4 inference...");
|
|
41481
41531
|
try {
|
|
41482
|
-
|
|
41532
|
+
await execAsync(`"${pip}" install --quiet bitsandbytes`, { timeout: 12e4, stdio: "pipe" });
|
|
41483
41533
|
} catch {
|
|
41484
41534
|
}
|
|
41485
41535
|
}
|
|
41486
41536
|
try {
|
|
41487
|
-
|
|
41537
|
+
await execAsync(`"${pip}" install --quiet pyloudnorm noisereduce torchaudio`, { timeout: 12e4, stdio: "pipe" });
|
|
41488
41538
|
} catch {
|
|
41489
41539
|
}
|
|
41490
41540
|
const tier = weightTier ?? detectPersonaPlexCapability().weightTier;
|
|
@@ -41494,29 +41544,26 @@ async function installPersonaPlex(onInfo, weightTier) {
|
|
|
41494
41544
|
try {
|
|
41495
41545
|
const tokenArg = repoInfo.needsToken ? "" : "--token ''";
|
|
41496
41546
|
const dlCmd = `"${python}" -c "from huggingface_hub import hf_hub_download; f=hf_hub_download('${repoInfo.repo}', '${repoInfo.file}'${repoInfo.needsToken ? "" : ", token=False"}); print(f)"`;
|
|
41497
|
-
const weightPath =
|
|
41498
|
-
encoding: "utf8",
|
|
41499
|
-
timeout: 6e5,
|
|
41500
|
-
stdio: "pipe",
|
|
41501
|
-
env: { ...process.env }
|
|
41502
|
-
}).trim();
|
|
41547
|
+
const weightPath = (await execAsync(dlCmd, { timeout: 6e5 })).trim();
|
|
41503
41548
|
log(`Weights downloaded: ${repoInfo.file}`);
|
|
41504
41549
|
if (tier !== "original") {
|
|
41505
|
-
log("Downloading Mimi codec
|
|
41506
|
-
|
|
41507
|
-
|
|
41508
|
-
|
|
41509
|
-
|
|
41510
|
-
timeout: 3e5
|
|
41511
|
-
stdio: "pipe"
|
|
41550
|
+
log("Downloading Mimi codec + tokenizer (no token needed)...");
|
|
41551
|
+
const supportFiles = ["tokenizer-e351c8d8-checkpoint125.safetensors", "tokenizer_spm_32k_3.model", "config.json"];
|
|
41552
|
+
for (const sf of supportFiles) {
|
|
41553
|
+
try {
|
|
41554
|
+
await execAsync(`"${python}" -c "from huggingface_hub import hf_hub_download; hf_hub_download('${repoInfo.repo}', '${sf}', token=False)"`, {
|
|
41555
|
+
timeout: 3e5
|
|
41512
41556
|
});
|
|
41513
|
-
|
|
41514
|
-
|
|
41515
|
-
|
|
41516
|
-
|
|
41557
|
+
} catch {
|
|
41558
|
+
try {
|
|
41559
|
+
await execAsync(`"${python}" -c "from huggingface_hub import hf_hub_download; hf_hub_download('nvidia/personaplex-7b-v1', '${sf}')"`, {
|
|
41560
|
+
timeout: 3e5
|
|
41561
|
+
});
|
|
41562
|
+
} catch {
|
|
41563
|
+
}
|
|
41517
41564
|
}
|
|
41518
|
-
} catch {
|
|
41519
41565
|
}
|
|
41566
|
+
log("Codec + tokenizer downloaded.");
|
|
41520
41567
|
}
|
|
41521
41568
|
} catch (err) {
|
|
41522
41569
|
const msg = err instanceof Error ? err.message : String(err);
|
|
@@ -41526,9 +41573,8 @@ async function installPersonaPlex(onInfo, weightTier) {
|
|
|
41526
41573
|
log("Auto-downgrading to INT4 weights (no token required)...");
|
|
41527
41574
|
const nf4 = WEIGHT_REPOS["nf4"];
|
|
41528
41575
|
try {
|
|
41529
|
-
|
|
41530
|
-
timeout: 6e5
|
|
41531
|
-
stdio: "pipe"
|
|
41576
|
+
await execAsync(`"${python}" -c "from huggingface_hub import hf_hub_download; hf_hub_download('${nf4.repo}', '${nf4.file}', token=False)"`, {
|
|
41577
|
+
timeout: 6e5
|
|
41532
41578
|
});
|
|
41533
41579
|
writeFileSync16(join54(PERSONAPLEX_DIR, "weight_tier"), "nf4");
|
|
41534
41580
|
log(`Downloaded INT4 weights instead (${nf4.sizeGB}GB, public).`);
|
|
@@ -41566,7 +41612,52 @@ async function startPersonaPlexDaemon(onInfo) {
|
|
|
41566
41612
|
const venvPython2 = process.platform === "win32" ? join54(PERSONAPLEX_DIR, "venv", "Scripts", "python.exe") : join54(PERSONAPLEX_DIR, "venv", "bin", "python3");
|
|
41567
41613
|
const sslDir = join54(PERSONAPLEX_DIR, "ssl");
|
|
41568
41614
|
mkdirSync15(sslDir, { recursive: true });
|
|
41569
|
-
|
|
41615
|
+
const tier = getWeightTier();
|
|
41616
|
+
const repoInfo = WEIGHT_REPOS[tier];
|
|
41617
|
+
const extraArgs = [];
|
|
41618
|
+
if (tier !== "original") {
|
|
41619
|
+
log(`Weight tier: ${tier} (${repoInfo.sizeGB}GB) \u2014 dequantizing to bf16 cache...`);
|
|
41620
|
+
const dequantScript = join54(PERSONAPLEX_DIR, "dequant-loader.py");
|
|
41621
|
+
const cachedBf16 = join54(PERSONAPLEX_DIR, "model-bf16-cache.safetensors");
|
|
41622
|
+
if (!existsSync37(dequantScript)) {
|
|
41623
|
+
const shipped = getShippedVoicesDir();
|
|
41624
|
+
if (shipped) {
|
|
41625
|
+
const src = join54(shipped, "dequant-loader.py");
|
|
41626
|
+
if (existsSync37(src))
|
|
41627
|
+
copyFileSync2(src, dequantScript);
|
|
41628
|
+
}
|
|
41629
|
+
}
|
|
41630
|
+
try {
|
|
41631
|
+
const weightPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', '${repoInfo.file}'${repoInfo.needsToken ? "" : ", token=False"}))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
|
|
41632
|
+
if (existsSync37(dequantScript) && existsSync37(weightPath)) {
|
|
41633
|
+
try {
|
|
41634
|
+
execSync27(`"${venvPython2}" "${dequantScript}" --input "${weightPath}" --output "${cachedBf16}"`, { timeout: 3e5, stdio: "pipe" });
|
|
41635
|
+
if (existsSync37(cachedBf16)) {
|
|
41636
|
+
extraArgs.push("--moshi-weight", cachedBf16);
|
|
41637
|
+
log(`Using dequantized cache: ${(statSync13(cachedBf16).size / 1024 ** 3).toFixed(1)}GB`);
|
|
41638
|
+
}
|
|
41639
|
+
} catch (e) {
|
|
41640
|
+
log(`Dequantization failed \u2014 server will try to load original weights`);
|
|
41641
|
+
}
|
|
41642
|
+
}
|
|
41643
|
+
try {
|
|
41644
|
+
const mimiPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', 'tokenizer-e351c8d8-checkpoint125.safetensors', token=False))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
|
|
41645
|
+
if (existsSync37(mimiPath))
|
|
41646
|
+
extraArgs.push("--mimi-weight", mimiPath);
|
|
41647
|
+
} catch {
|
|
41648
|
+
}
|
|
41649
|
+
try {
|
|
41650
|
+
const tokPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', 'tokenizer_spm_32k_3.model', token=False))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
|
|
41651
|
+
if (existsSync37(tokPath))
|
|
41652
|
+
extraArgs.push("--tokenizer", tokPath);
|
|
41653
|
+
} catch {
|
|
41654
|
+
}
|
|
41655
|
+
} catch {
|
|
41656
|
+
log(`Weight file not found \u2014 server will download on first run`);
|
|
41657
|
+
}
|
|
41658
|
+
extraArgs.push("--hf-repo", repoInfo.repo);
|
|
41659
|
+
}
|
|
41660
|
+
log(`Starting PersonaPlex daemon (${tier} tier)...`);
|
|
41570
41661
|
const child = spawn19(venvPython2, [
|
|
41571
41662
|
"-m",
|
|
41572
41663
|
"moshi.server",
|
|
@@ -41577,7 +41668,8 @@ async function startPersonaPlexDaemon(onInfo) {
|
|
|
41577
41668
|
"--ssl",
|
|
41578
41669
|
sslDir,
|
|
41579
41670
|
"--device",
|
|
41580
|
-
"cuda"
|
|
41671
|
+
"cuda",
|
|
41672
|
+
...extraArgs
|
|
41581
41673
|
], {
|
|
41582
41674
|
stdio: ["ignore", "pipe", "pipe"],
|
|
41583
41675
|
detached: true,
|
|
@@ -41664,8 +41756,7 @@ function listPersonaPlexVoices() {
|
|
|
41664
41756
|
}
|
|
41665
41757
|
if (existsSync37(CUSTOM_VOICES_DIR)) {
|
|
41666
41758
|
try {
|
|
41667
|
-
const
|
|
41668
|
-
for (const f of readdirSync24(CUSTOM_VOICES_DIR)) {
|
|
41759
|
+
for (const f of readdirSync11(CUSTOM_VOICES_DIR)) {
|
|
41669
41760
|
if (f.endsWith(".pt")) {
|
|
41670
41761
|
const name = f.replace(/\.pt$/, "");
|
|
41671
41762
|
voices.push({ name, type: "custom", path: join54(CUSTOM_VOICES_DIR, f) });
|
|
@@ -41993,7 +42084,7 @@ async function detectSystemSpecsAsync() {
|
|
|
41993
42084
|
let gpuVramGB = 0;
|
|
41994
42085
|
let gpuName = "";
|
|
41995
42086
|
try {
|
|
41996
|
-
const { stdout: memInfo } = await
|
|
42087
|
+
const { stdout: memInfo } = await execAsync2("free -b 2>/dev/null || sysctl -n hw.memsize 2>/dev/null", { timeout: 5e3 });
|
|
41997
42088
|
if (memInfo.includes("Mem:")) {
|
|
41998
42089
|
const match = memInfo.match(/^Mem:\s+(\d+)\s+\d+\s+\d+\s+\d+\s+\d+\s+(\d+)/m);
|
|
41999
42090
|
if (match) {
|
|
@@ -42010,7 +42101,7 @@ async function detectSystemSpecsAsync() {
|
|
|
42010
42101
|
} catch {
|
|
42011
42102
|
}
|
|
42012
42103
|
try {
|
|
42013
|
-
const { stdout: nvidiaSmi } = await
|
|
42104
|
+
const { stdout: nvidiaSmi } = await execAsync2("nvidia-smi --query-gpu=memory.total,name --format=csv,noheader,nounits 2>/dev/null", { timeout: 5e3 });
|
|
42014
42105
|
const lines = nvidiaSmi.trim().split("\n");
|
|
42015
42106
|
if (lines.length > 0) {
|
|
42016
42107
|
for (const line of lines) {
|
|
@@ -43452,7 +43543,7 @@ async function createExpandedVariantAsync(baseModel, specs, sizeGB, kvBytesPerTo
|
|
|
43452
43543
|
mkdirSync16(modelDir2, { recursive: true });
|
|
43453
43544
|
const modelfilePath = join55(modelDir2, `Modelfile.${customName}`);
|
|
43454
43545
|
writeFileSync17(modelfilePath, modelfileContent + "\n", "utf8");
|
|
43455
|
-
await
|
|
43546
|
+
await execAsync2(`ollama create ${customName} -f ${modelfilePath}`, {
|
|
43456
43547
|
timeout: 12e4
|
|
43457
43548
|
});
|
|
43458
43549
|
return customName;
|
|
@@ -43610,7 +43701,7 @@ export PATH="${binDir}:$PATH" # Added by open-agents for nvim
|
|
|
43610
43701
|
} catch {
|
|
43611
43702
|
}
|
|
43612
43703
|
}
|
|
43613
|
-
var
|
|
43704
|
+
var execAsync2, QWEN_VARIANTS, _toolSupportCache, _cloudflaredInstallPromise;
|
|
43614
43705
|
var init_setup = __esm({
|
|
43615
43706
|
"packages/cli/dist/tui/setup.js"() {
|
|
43616
43707
|
"use strict";
|
|
@@ -43619,7 +43710,7 @@ var init_setup = __esm({
|
|
|
43619
43710
|
init_config();
|
|
43620
43711
|
init_dist();
|
|
43621
43712
|
init_tui_select();
|
|
43622
|
-
|
|
43713
|
+
execAsync2 = promisify6(exec2);
|
|
43623
43714
|
QWEN_VARIANTS = [
|
|
43624
43715
|
{ tag: "qwen3.5:0.8b", sizeGB: 1, label: "0.8B params (1.0 GB)", cloud: false },
|
|
43625
43716
|
{ tag: "qwen3.5:2b", sizeGB: 2.7, label: "2B params (2.7 GB)", cloud: false },
|
|
@@ -45416,7 +45507,7 @@ __export(voice_exports, {
|
|
|
45416
45507
|
registerCustomOnnxModel: () => registerCustomOnnxModel,
|
|
45417
45508
|
resetNarrationContext: () => resetNarrationContext
|
|
45418
45509
|
});
|
|
45419
|
-
import { existsSync as existsSync42, mkdirSync as mkdirSync18, writeFileSync as writeFileSync19, readFileSync as readFileSync31, unlinkSync as unlinkSync9, readdirSync as readdirSync12, renameSync, statSync as
|
|
45510
|
+
import { existsSync as existsSync42, mkdirSync as mkdirSync18, writeFileSync as writeFileSync19, readFileSync as readFileSync31, unlinkSync as unlinkSync9, readdirSync as readdirSync12, renameSync, statSync as statSync14 } from "node:fs";
|
|
45420
45511
|
import { join as join58, dirname as dirname19 } from "node:path";
|
|
45421
45512
|
import { homedir as homedir15, tmpdir as tmpdir9, platform as platform3 } from "node:os";
|
|
45422
45513
|
import { execSync as execSync30, spawn as nodeSpawn } from "node:child_process";
|
|
@@ -46550,7 +46641,7 @@ var init_voice = __esm({
|
|
|
46550
46641
|
const p = join58(dir, f);
|
|
46551
46642
|
let size = 0;
|
|
46552
46643
|
try {
|
|
46553
|
-
size =
|
|
46644
|
+
size = statSync14(p).size;
|
|
46554
46645
|
} catch {
|
|
46555
46646
|
}
|
|
46556
46647
|
return {
|
|
@@ -48166,7 +48257,7 @@ Error: ${err instanceof Error ? err.message : String(err)}`);
|
|
|
48166
48257
|
// packages/cli/dist/tui/commands.js
|
|
48167
48258
|
import * as nodeOs from "node:os";
|
|
48168
48259
|
import { execSync as nodeExecSync } from "node:child_process";
|
|
48169
|
-
import { existsSync as existsSync43, readFileSync as readFileSync32, writeFileSync as writeFileSync20, mkdirSync as mkdirSync19, readdirSync as readdirSync13, statSync as
|
|
48260
|
+
import { existsSync as existsSync43, readFileSync as readFileSync32, writeFileSync as writeFileSync20, mkdirSync as mkdirSync19, readdirSync as readdirSync13, statSync as statSync15, rmSync } from "node:fs";
|
|
48170
48261
|
import { join as join59 } from "node:path";
|
|
48171
48262
|
function safeLog(text) {
|
|
48172
48263
|
if (isNeovimActive()) {
|
|
@@ -48979,7 +49070,7 @@ async function handleSlashCommand(input, ctx) {
|
|
|
48979
49070
|
ipfsFiles = files.length;
|
|
48980
49071
|
for (const f of files) {
|
|
48981
49072
|
try {
|
|
48982
|
-
ipfsBytes +=
|
|
49073
|
+
ipfsBytes += statSync15(join59(ipfsLocalDir, f)).size;
|
|
48983
49074
|
} catch {
|
|
48984
49075
|
}
|
|
48985
49076
|
}
|
|
@@ -48993,7 +49084,7 @@ async function handleSlashCommand(input, ctx) {
|
|
|
48993
49084
|
else {
|
|
48994
49085
|
heliaBlocks++;
|
|
48995
49086
|
try {
|
|
48996
|
-
heliaBytes +=
|
|
49087
|
+
heliaBytes += statSync15(join59(dir, entry.name)).size;
|
|
48997
49088
|
} catch {
|
|
48998
49089
|
}
|
|
48999
49090
|
}
|
|
@@ -49086,7 +49177,7 @@ async function handleSlashCommand(input, ctx) {
|
|
|
49086
49177
|
const count = memStore.count();
|
|
49087
49178
|
lines.push(`
|
|
49088
49179
|
${c2.bold("Structured Memory (SQLite)")}`);
|
|
49089
|
-
lines.push(` Memories: ${c2.bold(String(count))} DB: ${c2.dim(formatFileSize(
|
|
49180
|
+
lines.push(` Memories: ${c2.bold(String(count))} DB: ${c2.dim(formatFileSize(statSync15(dbPath).size))}`);
|
|
49090
49181
|
cDb(db);
|
|
49091
49182
|
}
|
|
49092
49183
|
} catch {
|
|
@@ -49117,7 +49208,7 @@ async function handleSlashCommand(input, ctx) {
|
|
|
49117
49208
|
walkStorage(full, subCat);
|
|
49118
49209
|
} else {
|
|
49119
49210
|
try {
|
|
49120
|
-
const sz =
|
|
49211
|
+
const sz = statSync15(full).size;
|
|
49121
49212
|
totalBytes += sz;
|
|
49122
49213
|
if (!categories[category])
|
|
49123
49214
|
categories[category] = { files: 0, bytes: 0 };
|
|
@@ -49418,29 +49509,46 @@ async function handleSlashCommand(input, ctx) {
|
|
|
49418
49509
|
const caps = detectPersonaPlexCapability2();
|
|
49419
49510
|
if (!caps.supported) {
|
|
49420
49511
|
renderWarning(`PersonaPlex not available: ${caps.reason}`);
|
|
49421
|
-
renderInfo("Requirements: NVIDIA GPU with \
|
|
49512
|
+
renderInfo("Requirements: NVIDIA GPU with \u22658GB VRAM (RTX 3060+, Jetson AGX Orin), CUDA, PyTorch");
|
|
49422
49513
|
return "handled";
|
|
49423
49514
|
}
|
|
49424
|
-
|
|
49515
|
+
const tierInfo = caps.weightTier;
|
|
49516
|
+
renderInfo(`GPU: ${caps.gpuName} (${caps.vramGB.toFixed(0)}GB) \u2192 ${tierInfo} tier${caps.needsHfToken ? "" : " (no HF token needed)"}`);
|
|
49425
49517
|
if (!isPersonaPlexInstalled2()) {
|
|
49426
|
-
renderInfo("
|
|
49427
|
-
|
|
49428
|
-
|
|
49429
|
-
|
|
49430
|
-
|
|
49431
|
-
|
|
49518
|
+
renderInfo("Setting up PersonaPlex in background \u2014 you can keep working...");
|
|
49519
|
+
(async () => {
|
|
49520
|
+
try {
|
|
49521
|
+
const ok = await installPersonaPlex2((msg2) => renderInfo(msg2), caps.weightTier);
|
|
49522
|
+
if (!ok) {
|
|
49523
|
+
renderError("PersonaPlex installation failed.");
|
|
49524
|
+
return;
|
|
49525
|
+
}
|
|
49526
|
+
if (!isPersonaPlexRunning2()) {
|
|
49527
|
+
const url = await startPersonaPlexDaemon2((msg2) => renderInfo(msg2));
|
|
49528
|
+
if (url) {
|
|
49529
|
+
renderInfo(`PersonaPlex ready at ${url} \u2014 use /call for full-duplex voice`);
|
|
49530
|
+
} else {
|
|
49531
|
+
renderError("PersonaPlex daemon failed to start. Check ~/.open-agents/voice/personaplex/daemon.log");
|
|
49532
|
+
}
|
|
49533
|
+
}
|
|
49534
|
+
} catch (e) {
|
|
49535
|
+
renderError(`PersonaPlex setup error: ${e instanceof Error ? e.message : String(e)}`);
|
|
49536
|
+
}
|
|
49537
|
+
})();
|
|
49538
|
+
return "handled";
|
|
49432
49539
|
}
|
|
49433
49540
|
if (isPersonaPlexRunning2()) {
|
|
49434
|
-
renderInfo("PersonaPlex daemon is running.");
|
|
49435
|
-
renderInfo("Use /call to start a full-duplex voice session.");
|
|
49541
|
+
renderInfo("PersonaPlex daemon is running. Use /call for full-duplex voice.");
|
|
49436
49542
|
} else {
|
|
49437
|
-
|
|
49438
|
-
|
|
49439
|
-
|
|
49440
|
-
|
|
49441
|
-
|
|
49442
|
-
|
|
49443
|
-
|
|
49543
|
+
renderInfo("Starting PersonaPlex daemon...");
|
|
49544
|
+
startPersonaPlexDaemon2((msg2) => renderInfo(msg2)).then((url) => {
|
|
49545
|
+
if (url) {
|
|
49546
|
+
renderInfo(`PersonaPlex ready at ${url} \u2014 use /call for full-duplex voice`);
|
|
49547
|
+
} else {
|
|
49548
|
+
renderError("PersonaPlex daemon failed to start. Check daemon.log");
|
|
49549
|
+
}
|
|
49550
|
+
}).catch(() => {
|
|
49551
|
+
});
|
|
49444
49552
|
}
|
|
49445
49553
|
return "handled";
|
|
49446
49554
|
}
|
|
@@ -51087,7 +51195,7 @@ async function showCohereDashboard(ctx) {
|
|
|
51087
51195
|
const snapItems = snaps.slice(0, 20).map((f) => ({
|
|
51088
51196
|
key: f,
|
|
51089
51197
|
label: f.replace(".json", ""),
|
|
51090
|
-
detail: `${formatFileSize(
|
|
51198
|
+
detail: `${formatFileSize(statSync15(join59(snapDir, f)).size)}`
|
|
51091
51199
|
}));
|
|
51092
51200
|
if (snapItems.length > 0) {
|
|
51093
51201
|
await tuiSelect({
|
|
@@ -59364,7 +59472,7 @@ var init_tool_policy = __esm({
|
|
|
59364
59472
|
});
|
|
59365
59473
|
|
|
59366
59474
|
// packages/cli/dist/tui/telegram-bridge.js
|
|
59367
|
-
import { mkdirSync as mkdirSync25, existsSync as existsSync51, unlinkSync as unlinkSync11, readdirSync as readdirSync19, statSync as
|
|
59475
|
+
import { mkdirSync as mkdirSync25, existsSync as existsSync51, unlinkSync as unlinkSync11, readdirSync as readdirSync19, statSync as statSync16 } from "node:fs";
|
|
59368
59476
|
import { join as join68, resolve as resolve30 } from "node:path";
|
|
59369
59477
|
import { writeFile as writeFileAsync } from "node:fs/promises";
|
|
59370
59478
|
function convertMarkdownToTelegramHTML(md) {
|
|
@@ -71366,7 +71474,7 @@ __export(index_repo_exports, {
|
|
|
71366
71474
|
indexRepoCommand: () => indexRepoCommand
|
|
71367
71475
|
});
|
|
71368
71476
|
import { resolve as resolve34 } from "node:path";
|
|
71369
|
-
import { existsSync as existsSync56, statSync as
|
|
71477
|
+
import { existsSync as existsSync56, statSync as statSync17 } from "node:fs";
|
|
71370
71478
|
import { cwd as cwd2 } from "node:process";
|
|
71371
71479
|
async function indexRepoCommand(opts, _config) {
|
|
71372
71480
|
const repoRoot = resolve34(opts.repoPath ?? cwd2());
|
|
@@ -71376,7 +71484,7 @@ async function indexRepoCommand(opts, _config) {
|
|
|
71376
71484
|
printError(`Path does not exist: ${repoRoot}`);
|
|
71377
71485
|
process.exit(1);
|
|
71378
71486
|
}
|
|
71379
|
-
const stat5 =
|
|
71487
|
+
const stat5 = statSync17(repoRoot);
|
|
71380
71488
|
if (!stat5.isDirectory()) {
|
|
71381
71489
|
printError(`Path is not a directory: ${repoRoot}`);
|
|
71382
71490
|
process.exit(1);
|
package/package.json
CHANGED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
dequant-loader.py — Pre-dequantize quantized PersonaPlex weights to bf16 cache.
|
|
4
|
+
|
|
5
|
+
For NF4 (INT4) or TurboQuant 2-bit weights, dequantizes to a temporary
|
|
6
|
+
bf16 safetensors file that moshi.server can load natively.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python dequant-loader.py --input model-nf4.safetensors --output /tmp/model-bf16.safetensors
|
|
10
|
+
python dequant-loader.py --input model-turbo2bit.safetensors --output /tmp/model-bf16.safetensors
|
|
11
|
+
|
|
12
|
+
The output file can then be passed to moshi.server via --moshi-weight.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import os, sys, math, time
|
|
16
|
+
import torch
|
|
17
|
+
from safetensors.torch import load_file, save_file
|
|
18
|
+
|
|
19
|
+
NF2_CENTROIDS = torch.tensor([-1.5104, -0.4528, 0.4528, 1.5104])
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def fast_wht(x):
|
|
23
|
+
"""Vectorized Walsh-Hadamard Transform."""
|
|
24
|
+
n = x.shape[-1]
|
|
25
|
+
h = 1
|
|
26
|
+
while h < n:
|
|
27
|
+
x_view = x.view(*x.shape[:-1], -1, 2, h)
|
|
28
|
+
a = x_view[..., 0, :].clone()
|
|
29
|
+
b = x_view[..., 1, :].clone()
|
|
30
|
+
x_view[..., 0, :] = a + b
|
|
31
|
+
x_view[..., 1, :] = a - b
|
|
32
|
+
x = x_view.reshape(*x.shape)
|
|
33
|
+
h *= 2
|
|
34
|
+
return x / math.sqrt(n)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def detect_format(state):
|
|
38
|
+
"""Detect if weights are NF4 (INT4), TurboQuant 2-bit, or plain."""
|
|
39
|
+
has_scales = any(k.endswith(".__scales__") for k in state)
|
|
40
|
+
has_packed = any(k.endswith(".packed") for k in state)
|
|
41
|
+
if has_packed:
|
|
42
|
+
return "turbo2bit"
|
|
43
|
+
if has_scales:
|
|
44
|
+
return "nf4"
|
|
45
|
+
return "plain"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def dequant_nf4(state):
|
|
49
|
+
"""Dequantize INT4 NF4 weights."""
|
|
50
|
+
result = {}
|
|
51
|
+
processed = set()
|
|
52
|
+
|
|
53
|
+
for name in list(state.keys()):
|
|
54
|
+
if name.endswith(".__scales__") or name.endswith(".__shape__") or name.endswith(".__numel__"):
|
|
55
|
+
continue
|
|
56
|
+
if name in processed:
|
|
57
|
+
continue
|
|
58
|
+
|
|
59
|
+
scales_key = f"{name}.__scales__"
|
|
60
|
+
if scales_key in state:
|
|
61
|
+
packed = state[name]
|
|
62
|
+
scales = state[scales_key].float()
|
|
63
|
+
shape = state[f"{name}.__shape__"].tolist()
|
|
64
|
+
numel = state[f"{name}.__numel__"].item()
|
|
65
|
+
group_size = 64
|
|
66
|
+
|
|
67
|
+
lo = (packed & 0x0F).to(torch.int8) - 8
|
|
68
|
+
hi = ((packed >> 4) & 0x0F).to(torch.int8) - 8
|
|
69
|
+
unpacked = torch.zeros(packed.numel() * 2, dtype=torch.float32)
|
|
70
|
+
unpacked[0::2] = lo.float()
|
|
71
|
+
unpacked[1::2] = hi.float()
|
|
72
|
+
|
|
73
|
+
n_groups = scales.numel()
|
|
74
|
+
groups = unpacked[:n_groups * group_size].reshape(n_groups, group_size)
|
|
75
|
+
deq = (groups * scales.unsqueeze(1)).reshape(-1)[:numel]
|
|
76
|
+
|
|
77
|
+
orig_shape = [s for s in shape if s > 0]
|
|
78
|
+
result[name] = deq.reshape(orig_shape).to(torch.bfloat16)
|
|
79
|
+
processed.add(name)
|
|
80
|
+
else:
|
|
81
|
+
result[name] = state[name].to(torch.bfloat16)
|
|
82
|
+
processed.add(name)
|
|
83
|
+
|
|
84
|
+
return result
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def dequant_turbo2bit(state):
|
|
88
|
+
"""Dequantize TurboQuant 2-bit (NF2 + WHT) weights."""
|
|
89
|
+
result = {}
|
|
90
|
+
processed = set()
|
|
91
|
+
|
|
92
|
+
for name in list(state.keys()):
|
|
93
|
+
if any(name.endswith(f".{s}") for s in ["packed", "scales", "shape", "numel", "gs", "np2"]):
|
|
94
|
+
continue
|
|
95
|
+
if name in processed:
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
packed_key = f"{name}.packed"
|
|
99
|
+
if packed_key in state:
|
|
100
|
+
gs = state[f"{name}.gs"].item()
|
|
101
|
+
gs_pow2 = state[f"{name}.np2"].item()
|
|
102
|
+
numel = state[f"{name}.numel"].item()
|
|
103
|
+
shape = [s for s in state[f"{name}.shape"].tolist() if s > 0]
|
|
104
|
+
scales = state[f"{name}.scales"].float()
|
|
105
|
+
packed = state[packed_key]
|
|
106
|
+
n_groups = scales.numel()
|
|
107
|
+
|
|
108
|
+
# Unpack 2-bit
|
|
109
|
+
p = packed.reshape(n_groups, gs // 4)
|
|
110
|
+
codes = torch.zeros(n_groups, gs, dtype=torch.long)
|
|
111
|
+
for i in range(4):
|
|
112
|
+
codes[:, i::4] = (p >> (2 * i)) & 0x03
|
|
113
|
+
|
|
114
|
+
dequant = NF2_CENTROIDS[codes]
|
|
115
|
+
|
|
116
|
+
# Inverse WHT
|
|
117
|
+
if gs_pow2 > gs:
|
|
118
|
+
dequant = torch.cat([dequant, torch.zeros(n_groups, gs_pow2 - gs)], dim=1)
|
|
119
|
+
dequant = fast_wht(dequant)
|
|
120
|
+
dequant = dequant[:, :gs]
|
|
121
|
+
|
|
122
|
+
dequant = dequant * scales.unsqueeze(1)
|
|
123
|
+
result[name] = dequant.reshape(-1)[:numel].reshape(shape).to(torch.bfloat16)
|
|
124
|
+
processed.add(name)
|
|
125
|
+
else:
|
|
126
|
+
result[name] = state[name].to(torch.bfloat16)
|
|
127
|
+
processed.add(name)
|
|
128
|
+
|
|
129
|
+
return result
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def main():
|
|
133
|
+
import argparse
|
|
134
|
+
parser = argparse.ArgumentParser(description="Dequantize PersonaPlex weights to bf16")
|
|
135
|
+
parser.add_argument("--input", "-i", required=True, help="Quantized safetensors file")
|
|
136
|
+
parser.add_argument("--output", "-o", required=True, help="Output bf16 safetensors file")
|
|
137
|
+
parser.add_argument("--device", "-d", default="cpu", help="Device for dequantization")
|
|
138
|
+
args = parser.parse_args()
|
|
139
|
+
|
|
140
|
+
if not os.path.exists(args.input):
|
|
141
|
+
print(f"Error: {args.input} not found")
|
|
142
|
+
sys.exit(1)
|
|
143
|
+
|
|
144
|
+
# Skip if output already exists and is newer than input
|
|
145
|
+
if os.path.exists(args.output) and os.path.getmtime(args.output) > os.path.getmtime(args.input):
|
|
146
|
+
print(f"Cached: {args.output} is up to date")
|
|
147
|
+
sys.exit(0)
|
|
148
|
+
|
|
149
|
+
print(f"Loading {args.input}...")
|
|
150
|
+
t0 = time.time()
|
|
151
|
+
state = load_file(args.input, device=args.device)
|
|
152
|
+
|
|
153
|
+
fmt = detect_format(state)
|
|
154
|
+
print(f"Format: {fmt}")
|
|
155
|
+
|
|
156
|
+
if fmt == "nf4":
|
|
157
|
+
result = dequant_nf4(state)
|
|
158
|
+
elif fmt == "turbo2bit":
|
|
159
|
+
result = dequant_turbo2bit(state)
|
|
160
|
+
else:
|
|
161
|
+
print("Already plain bf16/fp16 — copying")
|
|
162
|
+
result = {k: v.to(torch.bfloat16) for k, v in state.items()}
|
|
163
|
+
|
|
164
|
+
t1 = time.time()
|
|
165
|
+
print(f"Dequantized {len(result)} tensors in {t1-t0:.1f}s")
|
|
166
|
+
|
|
167
|
+
print(f"Saving to {args.output}...")
|
|
168
|
+
save_file(result, args.output)
|
|
169
|
+
size_gb = os.path.getsize(args.output) / 1024**3
|
|
170
|
+
print(f"Done: {size_gb:.2f} GB")
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
if __name__ == "__main__":
|
|
174
|
+
main()
|