open-agents-ai 0.185.31 → 0.185.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +102 -44
- package/package.json +1 -1
- package/voices/personaplex/dequant-loader.py +174 -0
package/dist/index.js
CHANGED
|
@@ -41281,7 +41281,7 @@ __export(personaplex_exports, {
|
|
|
41281
41281
|
startPersonaPlexDaemon: () => startPersonaPlexDaemon,
|
|
41282
41282
|
stopPersonaPlex: () => stopPersonaPlex
|
|
41283
41283
|
});
|
|
41284
|
-
import { existsSync as existsSync37, writeFileSync as writeFileSync16, readFileSync as readFileSync28, mkdirSync as mkdirSync15, copyFileSync as copyFileSync2, readdirSync as readdirSync11 } from "node:fs";
|
|
41284
|
+
import { existsSync as existsSync37, writeFileSync as writeFileSync16, readFileSync as readFileSync28, mkdirSync as mkdirSync15, copyFileSync as copyFileSync2, readdirSync as readdirSync11, statSync as statSync13 } from "node:fs";
|
|
41285
41285
|
import { join as join54, dirname as dirname18 } from "node:path";
|
|
41286
41286
|
import { homedir as homedir13 } from "node:os";
|
|
41287
41287
|
import { execSync as execSync27, spawn as spawn19 } from "node:child_process";
|
|
@@ -41293,31 +41293,56 @@ function selectWeightTier(vramGB) {
|
|
|
41293
41293
|
return "nf4";
|
|
41294
41294
|
return "turbo2bit";
|
|
41295
41295
|
}
|
|
41296
|
+
function detectJetson() {
|
|
41297
|
+
try {
|
|
41298
|
+
const model = readFileSync28("/proc/device-tree/model", "utf8").replace(/\0/g, "").trim();
|
|
41299
|
+
if (/jetson|orin|tegra/i.test(model)) {
|
|
41300
|
+
const memInfo = execSync27("grep MemTotal /proc/meminfo", { encoding: "utf8", timeout: 3e3, stdio: "pipe" });
|
|
41301
|
+
const memKB = parseInt(memInfo.match(/(\d+)/)?.[1] ?? "0", 10);
|
|
41302
|
+
return { isJetson: true, model, totalMemGB: memKB / 1024 / 1024 };
|
|
41303
|
+
}
|
|
41304
|
+
} catch {
|
|
41305
|
+
}
|
|
41306
|
+
return { isJetson: false, model: "", totalMemGB: 0 };
|
|
41307
|
+
}
|
|
41296
41308
|
function detectPersonaPlexCapability() {
|
|
41309
|
+
const fail = (reason) => ({
|
|
41310
|
+
supported: false,
|
|
41311
|
+
reason,
|
|
41312
|
+
gpuName: "",
|
|
41313
|
+
vramGB: 0,
|
|
41314
|
+
weightTier: "turbo2bit",
|
|
41315
|
+
needsHfToken: false
|
|
41316
|
+
});
|
|
41317
|
+
const jetson = detectJetson();
|
|
41318
|
+
if (jetson.isJetson) {
|
|
41319
|
+
const vramGB = jetson.totalMemGB;
|
|
41320
|
+
if (vramGB < 8)
|
|
41321
|
+
return { ...fail(`Jetson has ${vramGB.toFixed(0)}GB unified memory (need \u22658GB)`), gpuName: jetson.model, vramGB };
|
|
41322
|
+
const tier = selectWeightTier(vramGB);
|
|
41323
|
+
const hasHfToken = !!(process.env["HF_TOKEN"] || process.env["HUGGING_FACE_HUB_TOKEN"]);
|
|
41324
|
+
const effectiveTier = tier === "original" && !hasHfToken ? "nf4" : tier;
|
|
41325
|
+
return {
|
|
41326
|
+
supported: true,
|
|
41327
|
+
reason: `Jetson ${jetson.model} \u2014 ${effectiveTier} weights (${WEIGHT_REPOS[effectiveTier].sizeGB}GB)`,
|
|
41328
|
+
gpuName: jetson.model,
|
|
41329
|
+
vramGB,
|
|
41330
|
+
weightTier: effectiveTier,
|
|
41331
|
+
needsHfToken: WEIGHT_REPOS[effectiveTier].needsToken
|
|
41332
|
+
};
|
|
41333
|
+
}
|
|
41297
41334
|
try {
|
|
41298
41335
|
const nvsmi = execSync27("nvidia-smi --query-gpu=name,memory.total --format=csv,noheader,nounits", {
|
|
41299
41336
|
encoding: "utf8",
|
|
41300
41337
|
timeout: 5e3,
|
|
41301
41338
|
stdio: "pipe"
|
|
41302
41339
|
}).trim();
|
|
41303
|
-
if (!nvsmi)
|
|
41304
|
-
return
|
|
41305
|
-
}
|
|
41340
|
+
if (!nvsmi)
|
|
41341
|
+
return fail("No NVIDIA GPU detected");
|
|
41306
41342
|
const [gpuName, vramMB] = nvsmi.split("\n")[0].split(", ");
|
|
41307
|
-
|
|
41308
|
-
const isJetson = /orin|tegra|jetson/i.test(gpuName ?? "");
|
|
41309
|
-
if (isJetson) {
|
|
41310
|
-
try {
|
|
41311
|
-
const memInfo = execSync27("grep MemTotal /proc/meminfo", { encoding: "utf8", timeout: 3e3, stdio: "pipe" });
|
|
41312
|
-
const memKB = parseInt(memInfo.match(/(\d+)/)?.[1] ?? "0", 10);
|
|
41313
|
-
const totalGB = memKB / 1024 / 1024;
|
|
41314
|
-
if (totalGB > vramGB)
|
|
41315
|
-
vramGB = totalGB;
|
|
41316
|
-
} catch {
|
|
41317
|
-
}
|
|
41318
|
-
}
|
|
41343
|
+
const vramGB = parseInt(vramMB ?? "0", 10) / 1024;
|
|
41319
41344
|
if (vramGB < 8) {
|
|
41320
|
-
return {
|
|
41345
|
+
return { ...fail(`GPU has ${vramGB.toFixed(1)}GB VRAM (need \u22658GB)`), gpuName: gpuName ?? "", vramGB };
|
|
41321
41346
|
}
|
|
41322
41347
|
try {
|
|
41323
41348
|
execSync27('python3 -c "import torch; assert torch.cuda.is_available()"', {
|
|
@@ -41326,7 +41351,7 @@ function detectPersonaPlexCapability() {
|
|
|
41326
41351
|
});
|
|
41327
41352
|
} catch {
|
|
41328
41353
|
const tier2 = selectWeightTier(vramGB);
|
|
41329
|
-
return {
|
|
41354
|
+
return { ...fail("PyTorch CUDA not available"), gpuName: gpuName ?? "", vramGB, weightTier: tier2, needsHfToken: WEIGHT_REPOS[tier2].needsToken };
|
|
41330
41355
|
}
|
|
41331
41356
|
const tier = selectWeightTier(vramGB);
|
|
41332
41357
|
const hasHfToken = !!(process.env["HF_TOKEN"] || process.env["HUGGING_FACE_HUB_TOKEN"]);
|
|
@@ -41340,7 +41365,7 @@ function detectPersonaPlexCapability() {
|
|
|
41340
41365
|
needsHfToken: WEIGHT_REPOS[effectiveTier].needsToken
|
|
41341
41366
|
};
|
|
41342
41367
|
} catch {
|
|
41343
|
-
return
|
|
41368
|
+
return fail("No NVIDIA GPU detected (nvidia-smi not found)");
|
|
41344
41369
|
}
|
|
41345
41370
|
}
|
|
41346
41371
|
function isPersonaPlexRunning() {
|
|
@@ -41383,11 +41408,20 @@ async function installPersonaPlex(onInfo, weightTier) {
|
|
|
41383
41408
|
const log = onInfo ?? (() => {
|
|
41384
41409
|
});
|
|
41385
41410
|
mkdirSync15(PERSONAPLEX_DIR, { recursive: true });
|
|
41411
|
+
let arch2 = "";
|
|
41412
|
+
try {
|
|
41413
|
+
arch2 = execSync27("uname -m", { encoding: "utf8", timeout: 3e3, stdio: "pipe" }).trim();
|
|
41414
|
+
} catch {
|
|
41415
|
+
}
|
|
41416
|
+
const isAarch64 = arch2 === "aarch64" || arch2 === "arm64";
|
|
41417
|
+
if (isAarch64)
|
|
41418
|
+
log(`Detected ARM64 platform (${arch2}) \u2014 Jetson/ARM install path`);
|
|
41386
41419
|
const venvDir = join54(PERSONAPLEX_DIR, "venv");
|
|
41387
41420
|
if (!existsSync37(venvDir)) {
|
|
41388
41421
|
log("Creating Python virtual environment...");
|
|
41389
41422
|
try {
|
|
41390
|
-
|
|
41423
|
+
const ssp = isAarch64 ? " --system-site-packages" : "";
|
|
41424
|
+
execSync27(`python3 -m venv${ssp} "${venvDir}"`, { timeout: 6e4, stdio: "pipe" });
|
|
41391
41425
|
} catch (err) {
|
|
41392
41426
|
log(`Failed to create venv: ${err instanceof Error ? err.message : String(err)}`);
|
|
41393
41427
|
return false;
|
|
@@ -41395,14 +41429,6 @@ async function installPersonaPlex(onInfo, weightTier) {
|
|
|
41395
41429
|
}
|
|
41396
41430
|
const pip = process.platform === "win32" ? join54(venvDir, "Scripts", "pip.exe") : join54(venvDir, "bin", "pip");
|
|
41397
41431
|
const python = process.platform === "win32" ? join54(venvDir, "Scripts", "python.exe") : join54(venvDir, "bin", "python3");
|
|
41398
|
-
let arch2 = "";
|
|
41399
|
-
try {
|
|
41400
|
-
arch2 = execSync27("uname -m", { encoding: "utf8", timeout: 3e3, stdio: "pipe" }).trim();
|
|
41401
|
-
} catch {
|
|
41402
|
-
}
|
|
41403
|
-
const isAarch64 = arch2 === "aarch64" || arch2 === "arm64";
|
|
41404
|
-
if (isAarch64)
|
|
41405
|
-
log(`Detected ARM64 platform (${arch2}) \u2014 Jetson/ARM install path`);
|
|
41406
41432
|
log("Checking system dependencies (libopus)...");
|
|
41407
41433
|
try {
|
|
41408
41434
|
if (process.platform === "linux") {
|
|
@@ -41566,7 +41592,39 @@ async function startPersonaPlexDaemon(onInfo) {
|
|
|
41566
41592
|
const venvPython2 = process.platform === "win32" ? join54(PERSONAPLEX_DIR, "venv", "Scripts", "python.exe") : join54(PERSONAPLEX_DIR, "venv", "bin", "python3");
|
|
41567
41593
|
const sslDir = join54(PERSONAPLEX_DIR, "ssl");
|
|
41568
41594
|
mkdirSync15(sslDir, { recursive: true });
|
|
41569
|
-
|
|
41595
|
+
const tier = getWeightTier();
|
|
41596
|
+
const repoInfo = WEIGHT_REPOS[tier];
|
|
41597
|
+
const extraArgs = [];
|
|
41598
|
+
if (tier !== "original") {
|
|
41599
|
+
log(`Weight tier: ${tier} (${repoInfo.sizeGB}GB) \u2014 dequantizing to bf16 cache...`);
|
|
41600
|
+
const dequantScript = join54(PERSONAPLEX_DIR, "dequant-loader.py");
|
|
41601
|
+
const cachedBf16 = join54(PERSONAPLEX_DIR, "model-bf16-cache.safetensors");
|
|
41602
|
+
if (!existsSync37(dequantScript)) {
|
|
41603
|
+
const shipped = getShippedVoicesDir();
|
|
41604
|
+
if (shipped) {
|
|
41605
|
+
const src = join54(shipped, "dequant-loader.py");
|
|
41606
|
+
if (existsSync37(src))
|
|
41607
|
+
copyFileSync2(src, dequantScript);
|
|
41608
|
+
}
|
|
41609
|
+
}
|
|
41610
|
+
try {
|
|
41611
|
+
const weightPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', '${repoInfo.file}'${repoInfo.needsToken ? "" : ", token=False"}))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
|
|
41612
|
+
if (existsSync37(dequantScript) && existsSync37(weightPath)) {
|
|
41613
|
+
try {
|
|
41614
|
+
execSync27(`"${venvPython2}" "${dequantScript}" --input "${weightPath}" --output "${cachedBf16}"`, { timeout: 3e5, stdio: "pipe" });
|
|
41615
|
+
if (existsSync37(cachedBf16)) {
|
|
41616
|
+
extraArgs.push("--moshi-weight", cachedBf16);
|
|
41617
|
+
log(`Using dequantized cache: ${(statSync13(cachedBf16).size / 1024 ** 3).toFixed(1)}GB`);
|
|
41618
|
+
}
|
|
41619
|
+
} catch (e) {
|
|
41620
|
+
log(`Dequantization failed \u2014 server will try to load original weights`);
|
|
41621
|
+
}
|
|
41622
|
+
}
|
|
41623
|
+
} catch {
|
|
41624
|
+
log(`Weight file not found \u2014 server will download on first run`);
|
|
41625
|
+
}
|
|
41626
|
+
}
|
|
41627
|
+
log(`Starting PersonaPlex daemon (${tier} tier)...`);
|
|
41570
41628
|
const child = spawn19(venvPython2, [
|
|
41571
41629
|
"-m",
|
|
41572
41630
|
"moshi.server",
|
|
@@ -41577,7 +41635,8 @@ async function startPersonaPlexDaemon(onInfo) {
|
|
|
41577
41635
|
"--ssl",
|
|
41578
41636
|
sslDir,
|
|
41579
41637
|
"--device",
|
|
41580
|
-
"cuda"
|
|
41638
|
+
"cuda",
|
|
41639
|
+
...extraArgs
|
|
41581
41640
|
], {
|
|
41582
41641
|
stdio: ["ignore", "pipe", "pipe"],
|
|
41583
41642
|
detached: true,
|
|
@@ -41664,8 +41723,7 @@ function listPersonaPlexVoices() {
|
|
|
41664
41723
|
}
|
|
41665
41724
|
if (existsSync37(CUSTOM_VOICES_DIR)) {
|
|
41666
41725
|
try {
|
|
41667
|
-
const
|
|
41668
|
-
for (const f of readdirSync24(CUSTOM_VOICES_DIR)) {
|
|
41726
|
+
for (const f of readdirSync11(CUSTOM_VOICES_DIR)) {
|
|
41669
41727
|
if (f.endsWith(".pt")) {
|
|
41670
41728
|
const name = f.replace(/\.pt$/, "");
|
|
41671
41729
|
voices.push({ name, type: "custom", path: join54(CUSTOM_VOICES_DIR, f) });
|
|
@@ -45416,7 +45474,7 @@ __export(voice_exports, {
|
|
|
45416
45474
|
registerCustomOnnxModel: () => registerCustomOnnxModel,
|
|
45417
45475
|
resetNarrationContext: () => resetNarrationContext
|
|
45418
45476
|
});
|
|
45419
|
-
import { existsSync as existsSync42, mkdirSync as mkdirSync18, writeFileSync as writeFileSync19, readFileSync as readFileSync31, unlinkSync as unlinkSync9, readdirSync as readdirSync12, renameSync, statSync as
|
|
45477
|
+
import { existsSync as existsSync42, mkdirSync as mkdirSync18, writeFileSync as writeFileSync19, readFileSync as readFileSync31, unlinkSync as unlinkSync9, readdirSync as readdirSync12, renameSync, statSync as statSync14 } from "node:fs";
|
|
45420
45478
|
import { join as join58, dirname as dirname19 } from "node:path";
|
|
45421
45479
|
import { homedir as homedir15, tmpdir as tmpdir9, platform as platform3 } from "node:os";
|
|
45422
45480
|
import { execSync as execSync30, spawn as nodeSpawn } from "node:child_process";
|
|
@@ -46550,7 +46608,7 @@ var init_voice = __esm({
|
|
|
46550
46608
|
const p = join58(dir, f);
|
|
46551
46609
|
let size = 0;
|
|
46552
46610
|
try {
|
|
46553
|
-
size =
|
|
46611
|
+
size = statSync14(p).size;
|
|
46554
46612
|
} catch {
|
|
46555
46613
|
}
|
|
46556
46614
|
return {
|
|
@@ -48166,7 +48224,7 @@ Error: ${err instanceof Error ? err.message : String(err)}`);
|
|
|
48166
48224
|
// packages/cli/dist/tui/commands.js
|
|
48167
48225
|
import * as nodeOs from "node:os";
|
|
48168
48226
|
import { execSync as nodeExecSync } from "node:child_process";
|
|
48169
|
-
import { existsSync as existsSync43, readFileSync as readFileSync32, writeFileSync as writeFileSync20, mkdirSync as mkdirSync19, readdirSync as readdirSync13, statSync as
|
|
48227
|
+
import { existsSync as existsSync43, readFileSync as readFileSync32, writeFileSync as writeFileSync20, mkdirSync as mkdirSync19, readdirSync as readdirSync13, statSync as statSync15, rmSync } from "node:fs";
|
|
48170
48228
|
import { join as join59 } from "node:path";
|
|
48171
48229
|
function safeLog(text) {
|
|
48172
48230
|
if (isNeovimActive()) {
|
|
@@ -48979,7 +49037,7 @@ async function handleSlashCommand(input, ctx) {
|
|
|
48979
49037
|
ipfsFiles = files.length;
|
|
48980
49038
|
for (const f of files) {
|
|
48981
49039
|
try {
|
|
48982
|
-
ipfsBytes +=
|
|
49040
|
+
ipfsBytes += statSync15(join59(ipfsLocalDir, f)).size;
|
|
48983
49041
|
} catch {
|
|
48984
49042
|
}
|
|
48985
49043
|
}
|
|
@@ -48993,7 +49051,7 @@ async function handleSlashCommand(input, ctx) {
|
|
|
48993
49051
|
else {
|
|
48994
49052
|
heliaBlocks++;
|
|
48995
49053
|
try {
|
|
48996
|
-
heliaBytes +=
|
|
49054
|
+
heliaBytes += statSync15(join59(dir, entry.name)).size;
|
|
48997
49055
|
} catch {
|
|
48998
49056
|
}
|
|
48999
49057
|
}
|
|
@@ -49086,7 +49144,7 @@ async function handleSlashCommand(input, ctx) {
|
|
|
49086
49144
|
const count = memStore.count();
|
|
49087
49145
|
lines.push(`
|
|
49088
49146
|
${c2.bold("Structured Memory (SQLite)")}`);
|
|
49089
|
-
lines.push(` Memories: ${c2.bold(String(count))} DB: ${c2.dim(formatFileSize(
|
|
49147
|
+
lines.push(` Memories: ${c2.bold(String(count))} DB: ${c2.dim(formatFileSize(statSync15(dbPath).size))}`);
|
|
49090
49148
|
cDb(db);
|
|
49091
49149
|
}
|
|
49092
49150
|
} catch {
|
|
@@ -49117,7 +49175,7 @@ async function handleSlashCommand(input, ctx) {
|
|
|
49117
49175
|
walkStorage(full, subCat);
|
|
49118
49176
|
} else {
|
|
49119
49177
|
try {
|
|
49120
|
-
const sz =
|
|
49178
|
+
const sz = statSync15(full).size;
|
|
49121
49179
|
totalBytes += sz;
|
|
49122
49180
|
if (!categories[category])
|
|
49123
49181
|
categories[category] = { files: 0, bytes: 0 };
|
|
@@ -49418,7 +49476,7 @@ async function handleSlashCommand(input, ctx) {
|
|
|
49418
49476
|
const caps = detectPersonaPlexCapability2();
|
|
49419
49477
|
if (!caps.supported) {
|
|
49420
49478
|
renderWarning(`PersonaPlex not available: ${caps.reason}`);
|
|
49421
|
-
renderInfo("Requirements: NVIDIA GPU with \
|
|
49479
|
+
renderInfo("Requirements: NVIDIA GPU with \u22658GB VRAM (RTX 3060+, Jetson AGX Orin), CUDA, PyTorch");
|
|
49422
49480
|
return "handled";
|
|
49423
49481
|
}
|
|
49424
49482
|
renderInfo(`GPU: ${caps.gpuName} (${caps.vramGB.toFixed(0)}GB VRAM) \u2014 PersonaPlex compatible \u2713`);
|
|
@@ -51087,7 +51145,7 @@ async function showCohereDashboard(ctx) {
|
|
|
51087
51145
|
const snapItems = snaps.slice(0, 20).map((f) => ({
|
|
51088
51146
|
key: f,
|
|
51089
51147
|
label: f.replace(".json", ""),
|
|
51090
|
-
detail: `${formatFileSize(
|
|
51148
|
+
detail: `${formatFileSize(statSync15(join59(snapDir, f)).size)}`
|
|
51091
51149
|
}));
|
|
51092
51150
|
if (snapItems.length > 0) {
|
|
51093
51151
|
await tuiSelect({
|
|
@@ -59364,7 +59422,7 @@ var init_tool_policy = __esm({
|
|
|
59364
59422
|
});
|
|
59365
59423
|
|
|
59366
59424
|
// packages/cli/dist/tui/telegram-bridge.js
|
|
59367
|
-
import { mkdirSync as mkdirSync25, existsSync as existsSync51, unlinkSync as unlinkSync11, readdirSync as readdirSync19, statSync as
|
|
59425
|
+
import { mkdirSync as mkdirSync25, existsSync as existsSync51, unlinkSync as unlinkSync11, readdirSync as readdirSync19, statSync as statSync16 } from "node:fs";
|
|
59368
59426
|
import { join as join68, resolve as resolve30 } from "node:path";
|
|
59369
59427
|
import { writeFile as writeFileAsync } from "node:fs/promises";
|
|
59370
59428
|
function convertMarkdownToTelegramHTML(md) {
|
|
@@ -71366,7 +71424,7 @@ __export(index_repo_exports, {
|
|
|
71366
71424
|
indexRepoCommand: () => indexRepoCommand
|
|
71367
71425
|
});
|
|
71368
71426
|
import { resolve as resolve34 } from "node:path";
|
|
71369
|
-
import { existsSync as existsSync56, statSync as
|
|
71427
|
+
import { existsSync as existsSync56, statSync as statSync17 } from "node:fs";
|
|
71370
71428
|
import { cwd as cwd2 } from "node:process";
|
|
71371
71429
|
async function indexRepoCommand(opts, _config) {
|
|
71372
71430
|
const repoRoot = resolve34(opts.repoPath ?? cwd2());
|
|
@@ -71376,7 +71434,7 @@ async function indexRepoCommand(opts, _config) {
|
|
|
71376
71434
|
printError(`Path does not exist: ${repoRoot}`);
|
|
71377
71435
|
process.exit(1);
|
|
71378
71436
|
}
|
|
71379
|
-
const stat5 =
|
|
71437
|
+
const stat5 = statSync17(repoRoot);
|
|
71380
71438
|
if (!stat5.isDirectory()) {
|
|
71381
71439
|
printError(`Path is not a directory: ${repoRoot}`);
|
|
71382
71440
|
process.exit(1);
|
package/package.json
CHANGED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
dequant-loader.py — Pre-dequantize quantized PersonaPlex weights to bf16 cache.
|
|
4
|
+
|
|
5
|
+
For NF4 (INT4) or TurboQuant 2-bit weights, dequantizes to a temporary
|
|
6
|
+
bf16 safetensors file that moshi.server can load natively.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python dequant-loader.py --input model-nf4.safetensors --output /tmp/model-bf16.safetensors
|
|
10
|
+
python dequant-loader.py --input model-turbo2bit.safetensors --output /tmp/model-bf16.safetensors
|
|
11
|
+
|
|
12
|
+
The output file can then be passed to moshi.server via --moshi-weight.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import os, sys, math, time
|
|
16
|
+
import torch
|
|
17
|
+
from safetensors.torch import load_file, save_file
|
|
18
|
+
|
|
19
|
+
NF2_CENTROIDS = torch.tensor([-1.5104, -0.4528, 0.4528, 1.5104])
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def fast_wht(x):
|
|
23
|
+
"""Vectorized Walsh-Hadamard Transform."""
|
|
24
|
+
n = x.shape[-1]
|
|
25
|
+
h = 1
|
|
26
|
+
while h < n:
|
|
27
|
+
x_view = x.view(*x.shape[:-1], -1, 2, h)
|
|
28
|
+
a = x_view[..., 0, :].clone()
|
|
29
|
+
b = x_view[..., 1, :].clone()
|
|
30
|
+
x_view[..., 0, :] = a + b
|
|
31
|
+
x_view[..., 1, :] = a - b
|
|
32
|
+
x = x_view.reshape(*x.shape)
|
|
33
|
+
h *= 2
|
|
34
|
+
return x / math.sqrt(n)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def detect_format(state):
|
|
38
|
+
"""Detect if weights are NF4 (INT4), TurboQuant 2-bit, or plain."""
|
|
39
|
+
has_scales = any(k.endswith(".__scales__") for k in state)
|
|
40
|
+
has_packed = any(k.endswith(".packed") for k in state)
|
|
41
|
+
if has_packed:
|
|
42
|
+
return "turbo2bit"
|
|
43
|
+
if has_scales:
|
|
44
|
+
return "nf4"
|
|
45
|
+
return "plain"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def dequant_nf4(state):
|
|
49
|
+
"""Dequantize INT4 NF4 weights."""
|
|
50
|
+
result = {}
|
|
51
|
+
processed = set()
|
|
52
|
+
|
|
53
|
+
for name in list(state.keys()):
|
|
54
|
+
if name.endswith(".__scales__") or name.endswith(".__shape__") or name.endswith(".__numel__"):
|
|
55
|
+
continue
|
|
56
|
+
if name in processed:
|
|
57
|
+
continue
|
|
58
|
+
|
|
59
|
+
scales_key = f"{name}.__scales__"
|
|
60
|
+
if scales_key in state:
|
|
61
|
+
packed = state[name]
|
|
62
|
+
scales = state[scales_key].float()
|
|
63
|
+
shape = state[f"{name}.__shape__"].tolist()
|
|
64
|
+
numel = state[f"{name}.__numel__"].item()
|
|
65
|
+
group_size = 64
|
|
66
|
+
|
|
67
|
+
lo = (packed & 0x0F).to(torch.int8) - 8
|
|
68
|
+
hi = ((packed >> 4) & 0x0F).to(torch.int8) - 8
|
|
69
|
+
unpacked = torch.zeros(packed.numel() * 2, dtype=torch.float32)
|
|
70
|
+
unpacked[0::2] = lo.float()
|
|
71
|
+
unpacked[1::2] = hi.float()
|
|
72
|
+
|
|
73
|
+
n_groups = scales.numel()
|
|
74
|
+
groups = unpacked[:n_groups * group_size].reshape(n_groups, group_size)
|
|
75
|
+
deq = (groups * scales.unsqueeze(1)).reshape(-1)[:numel]
|
|
76
|
+
|
|
77
|
+
orig_shape = [s for s in shape if s > 0]
|
|
78
|
+
result[name] = deq.reshape(orig_shape).to(torch.bfloat16)
|
|
79
|
+
processed.add(name)
|
|
80
|
+
else:
|
|
81
|
+
result[name] = state[name].to(torch.bfloat16)
|
|
82
|
+
processed.add(name)
|
|
83
|
+
|
|
84
|
+
return result
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def dequant_turbo2bit(state):
|
|
88
|
+
"""Dequantize TurboQuant 2-bit (NF2 + WHT) weights."""
|
|
89
|
+
result = {}
|
|
90
|
+
processed = set()
|
|
91
|
+
|
|
92
|
+
for name in list(state.keys()):
|
|
93
|
+
if any(name.endswith(f".{s}") for s in ["packed", "scales", "shape", "numel", "gs", "np2"]):
|
|
94
|
+
continue
|
|
95
|
+
if name in processed:
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
packed_key = f"{name}.packed"
|
|
99
|
+
if packed_key in state:
|
|
100
|
+
gs = state[f"{name}.gs"].item()
|
|
101
|
+
gs_pow2 = state[f"{name}.np2"].item()
|
|
102
|
+
numel = state[f"{name}.numel"].item()
|
|
103
|
+
shape = [s for s in state[f"{name}.shape"].tolist() if s > 0]
|
|
104
|
+
scales = state[f"{name}.scales"].float()
|
|
105
|
+
packed = state[packed_key]
|
|
106
|
+
n_groups = scales.numel()
|
|
107
|
+
|
|
108
|
+
# Unpack 2-bit
|
|
109
|
+
p = packed.reshape(n_groups, gs // 4)
|
|
110
|
+
codes = torch.zeros(n_groups, gs, dtype=torch.long)
|
|
111
|
+
for i in range(4):
|
|
112
|
+
codes[:, i::4] = (p >> (2 * i)) & 0x03
|
|
113
|
+
|
|
114
|
+
dequant = NF2_CENTROIDS[codes]
|
|
115
|
+
|
|
116
|
+
# Inverse WHT
|
|
117
|
+
if gs_pow2 > gs:
|
|
118
|
+
dequant = torch.cat([dequant, torch.zeros(n_groups, gs_pow2 - gs)], dim=1)
|
|
119
|
+
dequant = fast_wht(dequant)
|
|
120
|
+
dequant = dequant[:, :gs]
|
|
121
|
+
|
|
122
|
+
dequant = dequant * scales.unsqueeze(1)
|
|
123
|
+
result[name] = dequant.reshape(-1)[:numel].reshape(shape).to(torch.bfloat16)
|
|
124
|
+
processed.add(name)
|
|
125
|
+
else:
|
|
126
|
+
result[name] = state[name].to(torch.bfloat16)
|
|
127
|
+
processed.add(name)
|
|
128
|
+
|
|
129
|
+
return result
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def main():
|
|
133
|
+
import argparse
|
|
134
|
+
parser = argparse.ArgumentParser(description="Dequantize PersonaPlex weights to bf16")
|
|
135
|
+
parser.add_argument("--input", "-i", required=True, help="Quantized safetensors file")
|
|
136
|
+
parser.add_argument("--output", "-o", required=True, help="Output bf16 safetensors file")
|
|
137
|
+
parser.add_argument("--device", "-d", default="cpu", help="Device for dequantization")
|
|
138
|
+
args = parser.parse_args()
|
|
139
|
+
|
|
140
|
+
if not os.path.exists(args.input):
|
|
141
|
+
print(f"Error: {args.input} not found")
|
|
142
|
+
sys.exit(1)
|
|
143
|
+
|
|
144
|
+
# Skip if output already exists and is newer than input
|
|
145
|
+
if os.path.exists(args.output) and os.path.getmtime(args.output) > os.path.getmtime(args.input):
|
|
146
|
+
print(f"Cached: {args.output} is up to date")
|
|
147
|
+
sys.exit(0)
|
|
148
|
+
|
|
149
|
+
print(f"Loading {args.input}...")
|
|
150
|
+
t0 = time.time()
|
|
151
|
+
state = load_file(args.input, device=args.device)
|
|
152
|
+
|
|
153
|
+
fmt = detect_format(state)
|
|
154
|
+
print(f"Format: {fmt}")
|
|
155
|
+
|
|
156
|
+
if fmt == "nf4":
|
|
157
|
+
result = dequant_nf4(state)
|
|
158
|
+
elif fmt == "turbo2bit":
|
|
159
|
+
result = dequant_turbo2bit(state)
|
|
160
|
+
else:
|
|
161
|
+
print("Already plain bf16/fp16 — copying")
|
|
162
|
+
result = {k: v.to(torch.bfloat16) for k, v in state.items()}
|
|
163
|
+
|
|
164
|
+
t1 = time.time()
|
|
165
|
+
print(f"Dequantized {len(result)} tensors in {t1-t0:.1f}s")
|
|
166
|
+
|
|
167
|
+
print(f"Saving to {args.output}...")
|
|
168
|
+
save_file(result, args.output)
|
|
169
|
+
size_gb = os.path.getsize(args.output) / 1024**3
|
|
170
|
+
print(f"Done: {size_gb:.2f} GB")
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
if __name__ == "__main__":
|
|
174
|
+
main()
|