open-agents-ai 0.185.31 → 0.185.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -41281,7 +41281,7 @@ __export(personaplex_exports, {
41281
41281
  startPersonaPlexDaemon: () => startPersonaPlexDaemon,
41282
41282
  stopPersonaPlex: () => stopPersonaPlex
41283
41283
  });
41284
- import { existsSync as existsSync37, writeFileSync as writeFileSync16, readFileSync as readFileSync28, mkdirSync as mkdirSync15, copyFileSync as copyFileSync2, readdirSync as readdirSync11 } from "node:fs";
41284
+ import { existsSync as existsSync37, writeFileSync as writeFileSync16, readFileSync as readFileSync28, mkdirSync as mkdirSync15, copyFileSync as copyFileSync2, readdirSync as readdirSync11, statSync as statSync13 } from "node:fs";
41285
41285
  import { join as join54, dirname as dirname18 } from "node:path";
41286
41286
  import { homedir as homedir13 } from "node:os";
41287
41287
  import { execSync as execSync27, spawn as spawn19 } from "node:child_process";
@@ -41293,31 +41293,56 @@ function selectWeightTier(vramGB) {
41293
41293
  return "nf4";
41294
41294
  return "turbo2bit";
41295
41295
  }
41296
+ function detectJetson() {
41297
+ try {
41298
+ const model = readFileSync28("/proc/device-tree/model", "utf8").replace(/\0/g, "").trim();
41299
+ if (/jetson|orin|tegra/i.test(model)) {
41300
+ const memInfo = execSync27("grep MemTotal /proc/meminfo", { encoding: "utf8", timeout: 3e3, stdio: "pipe" });
41301
+ const memKB = parseInt(memInfo.match(/(\d+)/)?.[1] ?? "0", 10);
41302
+ return { isJetson: true, model, totalMemGB: memKB / 1024 / 1024 };
41303
+ }
41304
+ } catch {
41305
+ }
41306
+ return { isJetson: false, model: "", totalMemGB: 0 };
41307
+ }
41296
41308
  function detectPersonaPlexCapability() {
41309
+ const fail = (reason) => ({
41310
+ supported: false,
41311
+ reason,
41312
+ gpuName: "",
41313
+ vramGB: 0,
41314
+ weightTier: "turbo2bit",
41315
+ needsHfToken: false
41316
+ });
41317
+ const jetson = detectJetson();
41318
+ if (jetson.isJetson) {
41319
+ const vramGB = jetson.totalMemGB;
41320
+ if (vramGB < 8)
41321
+ return { ...fail(`Jetson has ${vramGB.toFixed(0)}GB unified memory (need \u22658GB)`), gpuName: jetson.model, vramGB };
41322
+ const tier = selectWeightTier(vramGB);
41323
+ const hasHfToken = !!(process.env["HF_TOKEN"] || process.env["HUGGING_FACE_HUB_TOKEN"]);
41324
+ const effectiveTier = tier === "original" && !hasHfToken ? "nf4" : tier;
41325
+ return {
41326
+ supported: true,
41327
+ reason: `Jetson ${jetson.model} \u2014 ${effectiveTier} weights (${WEIGHT_REPOS[effectiveTier].sizeGB}GB)`,
41328
+ gpuName: jetson.model,
41329
+ vramGB,
41330
+ weightTier: effectiveTier,
41331
+ needsHfToken: WEIGHT_REPOS[effectiveTier].needsToken
41332
+ };
41333
+ }
41297
41334
  try {
41298
41335
  const nvsmi = execSync27("nvidia-smi --query-gpu=name,memory.total --format=csv,noheader,nounits", {
41299
41336
  encoding: "utf8",
41300
41337
  timeout: 5e3,
41301
41338
  stdio: "pipe"
41302
41339
  }).trim();
41303
- if (!nvsmi) {
41304
- return { supported: false, reason: "No NVIDIA GPU detected", gpuName: "", vramGB: 0, weightTier: "turbo2bit", needsHfToken: false };
41305
- }
41340
+ if (!nvsmi)
41341
+ return fail("No NVIDIA GPU detected");
41306
41342
  const [gpuName, vramMB] = nvsmi.split("\n")[0].split(", ");
41307
- let vramGB = parseInt(vramMB ?? "0", 10) / 1024;
41308
- const isJetson = /orin|tegra|jetson/i.test(gpuName ?? "");
41309
- if (isJetson) {
41310
- try {
41311
- const memInfo = execSync27("grep MemTotal /proc/meminfo", { encoding: "utf8", timeout: 3e3, stdio: "pipe" });
41312
- const memKB = parseInt(memInfo.match(/(\d+)/)?.[1] ?? "0", 10);
41313
- const totalGB = memKB / 1024 / 1024;
41314
- if (totalGB > vramGB)
41315
- vramGB = totalGB;
41316
- } catch {
41317
- }
41318
- }
41343
+ const vramGB = parseInt(vramMB ?? "0", 10) / 1024;
41319
41344
  if (vramGB < 8) {
41320
- return { supported: false, reason: `GPU has ${vramGB.toFixed(1)}GB VRAM (need \u22658GB for 2-bit weights)`, gpuName: gpuName ?? "", vramGB, weightTier: "turbo2bit", needsHfToken: false };
41345
+ return { ...fail(`GPU has ${vramGB.toFixed(1)}GB VRAM (need \u22658GB)`), gpuName: gpuName ?? "", vramGB };
41321
41346
  }
41322
41347
  try {
41323
41348
  execSync27('python3 -c "import torch; assert torch.cuda.is_available()"', {
@@ -41326,7 +41351,7 @@ function detectPersonaPlexCapability() {
41326
41351
  });
41327
41352
  } catch {
41328
41353
  const tier2 = selectWeightTier(vramGB);
41329
- return { supported: false, reason: "PyTorch CUDA not available", gpuName: gpuName ?? "", vramGB, weightTier: tier2, needsHfToken: WEIGHT_REPOS[tier2].needsToken };
41354
+ return { ...fail("PyTorch CUDA not available"), gpuName: gpuName ?? "", vramGB, weightTier: tier2, needsHfToken: WEIGHT_REPOS[tier2].needsToken };
41330
41355
  }
41331
41356
  const tier = selectWeightTier(vramGB);
41332
41357
  const hasHfToken = !!(process.env["HF_TOKEN"] || process.env["HUGGING_FACE_HUB_TOKEN"]);
@@ -41340,7 +41365,7 @@ function detectPersonaPlexCapability() {
41340
41365
  needsHfToken: WEIGHT_REPOS[effectiveTier].needsToken
41341
41366
  };
41342
41367
  } catch {
41343
- return { supported: false, reason: "nvidia-smi not found", gpuName: "", vramGB: 0, weightTier: "turbo2bit", needsHfToken: false };
41368
+ return fail("No NVIDIA GPU detected (nvidia-smi not found)");
41344
41369
  }
41345
41370
  }
41346
41371
  function isPersonaPlexRunning() {
@@ -41383,11 +41408,20 @@ async function installPersonaPlex(onInfo, weightTier) {
41383
41408
  const log = onInfo ?? (() => {
41384
41409
  });
41385
41410
  mkdirSync15(PERSONAPLEX_DIR, { recursive: true });
41411
+ let arch2 = "";
41412
+ try {
41413
+ arch2 = execSync27("uname -m", { encoding: "utf8", timeout: 3e3, stdio: "pipe" }).trim();
41414
+ } catch {
41415
+ }
41416
+ const isAarch64 = arch2 === "aarch64" || arch2 === "arm64";
41417
+ if (isAarch64)
41418
+ log(`Detected ARM64 platform (${arch2}) \u2014 Jetson/ARM install path`);
41386
41419
  const venvDir = join54(PERSONAPLEX_DIR, "venv");
41387
41420
  if (!existsSync37(venvDir)) {
41388
41421
  log("Creating Python virtual environment...");
41389
41422
  try {
41390
- execSync27(`python3 -m venv "${venvDir}"`, { timeout: 6e4, stdio: "pipe" });
41423
+ const ssp = isAarch64 ? " --system-site-packages" : "";
41424
+ execSync27(`python3 -m venv${ssp} "${venvDir}"`, { timeout: 6e4, stdio: "pipe" });
41391
41425
  } catch (err) {
41392
41426
  log(`Failed to create venv: ${err instanceof Error ? err.message : String(err)}`);
41393
41427
  return false;
@@ -41395,14 +41429,6 @@ async function installPersonaPlex(onInfo, weightTier) {
41395
41429
  }
41396
41430
  const pip = process.platform === "win32" ? join54(venvDir, "Scripts", "pip.exe") : join54(venvDir, "bin", "pip");
41397
41431
  const python = process.platform === "win32" ? join54(venvDir, "Scripts", "python.exe") : join54(venvDir, "bin", "python3");
41398
- let arch2 = "";
41399
- try {
41400
- arch2 = execSync27("uname -m", { encoding: "utf8", timeout: 3e3, stdio: "pipe" }).trim();
41401
- } catch {
41402
- }
41403
- const isAarch64 = arch2 === "aarch64" || arch2 === "arm64";
41404
- if (isAarch64)
41405
- log(`Detected ARM64 platform (${arch2}) \u2014 Jetson/ARM install path`);
41406
41432
  log("Checking system dependencies (libopus)...");
41407
41433
  try {
41408
41434
  if (process.platform === "linux") {
@@ -41566,7 +41592,39 @@ async function startPersonaPlexDaemon(onInfo) {
41566
41592
  const venvPython2 = process.platform === "win32" ? join54(PERSONAPLEX_DIR, "venv", "Scripts", "python.exe") : join54(PERSONAPLEX_DIR, "venv", "bin", "python3");
41567
41593
  const sslDir = join54(PERSONAPLEX_DIR, "ssl");
41568
41594
  mkdirSync15(sslDir, { recursive: true });
41569
- log("Starting PersonaPlex daemon (loading ~7B model)...");
41595
+ const tier = getWeightTier();
41596
+ const repoInfo = WEIGHT_REPOS[tier];
41597
+ const extraArgs = [];
41598
+ if (tier !== "original") {
41599
+ log(`Weight tier: ${tier} (${repoInfo.sizeGB}GB) \u2014 dequantizing to bf16 cache...`);
41600
+ const dequantScript = join54(PERSONAPLEX_DIR, "dequant-loader.py");
41601
+ const cachedBf16 = join54(PERSONAPLEX_DIR, "model-bf16-cache.safetensors");
41602
+ if (!existsSync37(dequantScript)) {
41603
+ const shipped = getShippedVoicesDir();
41604
+ if (shipped) {
41605
+ const src = join54(shipped, "dequant-loader.py");
41606
+ if (existsSync37(src))
41607
+ copyFileSync2(src, dequantScript);
41608
+ }
41609
+ }
41610
+ try {
41611
+ const weightPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', '${repoInfo.file}'${repoInfo.needsToken ? "" : ", token=False"}))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
41612
+ if (existsSync37(dequantScript) && existsSync37(weightPath)) {
41613
+ try {
41614
+ execSync27(`"${venvPython2}" "${dequantScript}" --input "${weightPath}" --output "${cachedBf16}"`, { timeout: 3e5, stdio: "pipe" });
41615
+ if (existsSync37(cachedBf16)) {
41616
+ extraArgs.push("--moshi-weight", cachedBf16);
41617
+ log(`Using dequantized cache: ${(statSync13(cachedBf16).size / 1024 ** 3).toFixed(1)}GB`);
41618
+ }
41619
+ } catch (e) {
41620
+ log(`Dequantization failed \u2014 server will try to load original weights`);
41621
+ }
41622
+ }
41623
+ } catch {
41624
+ log(`Weight file not found \u2014 server will download on first run`);
41625
+ }
41626
+ }
41627
+ log(`Starting PersonaPlex daemon (${tier} tier)...`);
41570
41628
  const child = spawn19(venvPython2, [
41571
41629
  "-m",
41572
41630
  "moshi.server",
@@ -41577,7 +41635,8 @@ async function startPersonaPlexDaemon(onInfo) {
41577
41635
  "--ssl",
41578
41636
  sslDir,
41579
41637
  "--device",
41580
- "cuda"
41638
+ "cuda",
41639
+ ...extraArgs
41581
41640
  ], {
41582
41641
  stdio: ["ignore", "pipe", "pipe"],
41583
41642
  detached: true,
@@ -41664,8 +41723,7 @@ function listPersonaPlexVoices() {
41664
41723
  }
41665
41724
  if (existsSync37(CUSTOM_VOICES_DIR)) {
41666
41725
  try {
41667
- const { readdirSync: readdirSync24 } = __require("node:fs");
41668
- for (const f of readdirSync24(CUSTOM_VOICES_DIR)) {
41726
+ for (const f of readdirSync11(CUSTOM_VOICES_DIR)) {
41669
41727
  if (f.endsWith(".pt")) {
41670
41728
  const name = f.replace(/\.pt$/, "");
41671
41729
  voices.push({ name, type: "custom", path: join54(CUSTOM_VOICES_DIR, f) });
@@ -45416,7 +45474,7 @@ __export(voice_exports, {
45416
45474
  registerCustomOnnxModel: () => registerCustomOnnxModel,
45417
45475
  resetNarrationContext: () => resetNarrationContext
45418
45476
  });
45419
- import { existsSync as existsSync42, mkdirSync as mkdirSync18, writeFileSync as writeFileSync19, readFileSync as readFileSync31, unlinkSync as unlinkSync9, readdirSync as readdirSync12, renameSync, statSync as statSync13 } from "node:fs";
45477
+ import { existsSync as existsSync42, mkdirSync as mkdirSync18, writeFileSync as writeFileSync19, readFileSync as readFileSync31, unlinkSync as unlinkSync9, readdirSync as readdirSync12, renameSync, statSync as statSync14 } from "node:fs";
45420
45478
  import { join as join58, dirname as dirname19 } from "node:path";
45421
45479
  import { homedir as homedir15, tmpdir as tmpdir9, platform as platform3 } from "node:os";
45422
45480
  import { execSync as execSync30, spawn as nodeSpawn } from "node:child_process";
@@ -46550,7 +46608,7 @@ var init_voice = __esm({
46550
46608
  const p = join58(dir, f);
46551
46609
  let size = 0;
46552
46610
  try {
46553
- size = statSync13(p).size;
46611
+ size = statSync14(p).size;
46554
46612
  } catch {
46555
46613
  }
46556
46614
  return {
@@ -48166,7 +48224,7 @@ Error: ${err instanceof Error ? err.message : String(err)}`);
48166
48224
  // packages/cli/dist/tui/commands.js
48167
48225
  import * as nodeOs from "node:os";
48168
48226
  import { execSync as nodeExecSync } from "node:child_process";
48169
- import { existsSync as existsSync43, readFileSync as readFileSync32, writeFileSync as writeFileSync20, mkdirSync as mkdirSync19, readdirSync as readdirSync13, statSync as statSync14, rmSync } from "node:fs";
48227
+ import { existsSync as existsSync43, readFileSync as readFileSync32, writeFileSync as writeFileSync20, mkdirSync as mkdirSync19, readdirSync as readdirSync13, statSync as statSync15, rmSync } from "node:fs";
48170
48228
  import { join as join59 } from "node:path";
48171
48229
  function safeLog(text) {
48172
48230
  if (isNeovimActive()) {
@@ -48979,7 +49037,7 @@ async function handleSlashCommand(input, ctx) {
48979
49037
  ipfsFiles = files.length;
48980
49038
  for (const f of files) {
48981
49039
  try {
48982
- ipfsBytes += statSync14(join59(ipfsLocalDir, f)).size;
49040
+ ipfsBytes += statSync15(join59(ipfsLocalDir, f)).size;
48983
49041
  } catch {
48984
49042
  }
48985
49043
  }
@@ -48993,7 +49051,7 @@ async function handleSlashCommand(input, ctx) {
48993
49051
  else {
48994
49052
  heliaBlocks++;
48995
49053
  try {
48996
- heliaBytes += statSync14(join59(dir, entry.name)).size;
49054
+ heliaBytes += statSync15(join59(dir, entry.name)).size;
48997
49055
  } catch {
48998
49056
  }
48999
49057
  }
@@ -49086,7 +49144,7 @@ async function handleSlashCommand(input, ctx) {
49086
49144
  const count = memStore.count();
49087
49145
  lines.push(`
49088
49146
  ${c2.bold("Structured Memory (SQLite)")}`);
49089
- lines.push(` Memories: ${c2.bold(String(count))} DB: ${c2.dim(formatFileSize(statSync14(dbPath).size))}`);
49147
+ lines.push(` Memories: ${c2.bold(String(count))} DB: ${c2.dim(formatFileSize(statSync15(dbPath).size))}`);
49090
49148
  cDb(db);
49091
49149
  }
49092
49150
  } catch {
@@ -49117,7 +49175,7 @@ async function handleSlashCommand(input, ctx) {
49117
49175
  walkStorage(full, subCat);
49118
49176
  } else {
49119
49177
  try {
49120
- const sz = statSync14(full).size;
49178
+ const sz = statSync15(full).size;
49121
49179
  totalBytes += sz;
49122
49180
  if (!categories[category])
49123
49181
  categories[category] = { files: 0, bytes: 0 };
@@ -49418,7 +49476,7 @@ async function handleSlashCommand(input, ctx) {
49418
49476
  const caps = detectPersonaPlexCapability2();
49419
49477
  if (!caps.supported) {
49420
49478
  renderWarning(`PersonaPlex not available: ${caps.reason}`);
49421
- renderInfo("Requirements: NVIDIA GPU with \u226516GB VRAM (RTX 3090/4090/A100+), CUDA 12.1+, PyTorch");
49479
+ renderInfo("Requirements: NVIDIA GPU with \u22658GB VRAM (RTX 3060+, Jetson AGX Orin), CUDA, PyTorch");
49422
49480
  return "handled";
49423
49481
  }
49424
49482
  renderInfo(`GPU: ${caps.gpuName} (${caps.vramGB.toFixed(0)}GB VRAM) \u2014 PersonaPlex compatible \u2713`);
@@ -51087,7 +51145,7 @@ async function showCohereDashboard(ctx) {
51087
51145
  const snapItems = snaps.slice(0, 20).map((f) => ({
51088
51146
  key: f,
51089
51147
  label: f.replace(".json", ""),
51090
- detail: `${formatFileSize(statSync14(join59(snapDir, f)).size)}`
51148
+ detail: `${formatFileSize(statSync15(join59(snapDir, f)).size)}`
51091
51149
  }));
51092
51150
  if (snapItems.length > 0) {
51093
51151
  await tuiSelect({
@@ -59364,7 +59422,7 @@ var init_tool_policy = __esm({
59364
59422
  });
59365
59423
 
59366
59424
  // packages/cli/dist/tui/telegram-bridge.js
59367
- import { mkdirSync as mkdirSync25, existsSync as existsSync51, unlinkSync as unlinkSync11, readdirSync as readdirSync19, statSync as statSync15 } from "node:fs";
59425
+ import { mkdirSync as mkdirSync25, existsSync as existsSync51, unlinkSync as unlinkSync11, readdirSync as readdirSync19, statSync as statSync16 } from "node:fs";
59368
59426
  import { join as join68, resolve as resolve30 } from "node:path";
59369
59427
  import { writeFile as writeFileAsync } from "node:fs/promises";
59370
59428
  function convertMarkdownToTelegramHTML(md) {
@@ -71366,7 +71424,7 @@ __export(index_repo_exports, {
71366
71424
  indexRepoCommand: () => indexRepoCommand
71367
71425
  });
71368
71426
  import { resolve as resolve34 } from "node:path";
71369
- import { existsSync as existsSync56, statSync as statSync16 } from "node:fs";
71427
+ import { existsSync as existsSync56, statSync as statSync17 } from "node:fs";
71370
71428
  import { cwd as cwd2 } from "node:process";
71371
71429
  async function indexRepoCommand(opts, _config) {
71372
71430
  const repoRoot = resolve34(opts.repoPath ?? cwd2());
@@ -71376,7 +71434,7 @@ async function indexRepoCommand(opts, _config) {
71376
71434
  printError(`Path does not exist: ${repoRoot}`);
71377
71435
  process.exit(1);
71378
71436
  }
71379
- const stat5 = statSync16(repoRoot);
71437
+ const stat5 = statSync17(repoRoot);
71380
71438
  if (!stat5.isDirectory()) {
71381
71439
  printError(`Path is not a directory: ${repoRoot}`);
71382
71440
  process.exit(1);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "open-agents-ai",
3
- "version": "0.185.31",
3
+ "version": "0.185.32",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -0,0 +1,174 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ dequant-loader.py — Pre-dequantize quantized PersonaPlex weights to bf16 cache.
4
+
5
+ For NF4 (INT4) or TurboQuant 2-bit weights, dequantizes to a temporary
6
+ bf16 safetensors file that moshi.server can load natively.
7
+
8
+ Usage:
9
+ python dequant-loader.py --input model-nf4.safetensors --output /tmp/model-bf16.safetensors
10
+ python dequant-loader.py --input model-turbo2bit.safetensors --output /tmp/model-bf16.safetensors
11
+
12
+ The output file can then be passed to moshi.server via --moshi-weight.
13
+ """
14
+
15
+ import os, sys, math, time
16
+ import torch
17
+ from safetensors.torch import load_file, save_file
18
+
19
+ NF2_CENTROIDS = torch.tensor([-1.5104, -0.4528, 0.4528, 1.5104])
20
+
21
+
22
+ def fast_wht(x):
23
+ """Vectorized Walsh-Hadamard Transform."""
24
+ n = x.shape[-1]
25
+ h = 1
26
+ while h < n:
27
+ x_view = x.view(*x.shape[:-1], -1, 2, h)
28
+ a = x_view[..., 0, :].clone()
29
+ b = x_view[..., 1, :].clone()
30
+ x_view[..., 0, :] = a + b
31
+ x_view[..., 1, :] = a - b
32
+ x = x_view.reshape(*x.shape)
33
+ h *= 2
34
+ return x / math.sqrt(n)
35
+
36
+
37
+ def detect_format(state):
38
+ """Detect if weights are NF4 (INT4), TurboQuant 2-bit, or plain."""
39
+ has_scales = any(k.endswith(".__scales__") for k in state)
40
+ has_packed = any(k.endswith(".packed") for k in state)
41
+ if has_packed:
42
+ return "turbo2bit"
43
+ if has_scales:
44
+ return "nf4"
45
+ return "plain"
46
+
47
+
48
+ def dequant_nf4(state):
49
+ """Dequantize INT4 NF4 weights."""
50
+ result = {}
51
+ processed = set()
52
+
53
+ for name in list(state.keys()):
54
+ if name.endswith(".__scales__") or name.endswith(".__shape__") or name.endswith(".__numel__"):
55
+ continue
56
+ if name in processed:
57
+ continue
58
+
59
+ scales_key = f"{name}.__scales__"
60
+ if scales_key in state:
61
+ packed = state[name]
62
+ scales = state[scales_key].float()
63
+ shape = state[f"{name}.__shape__"].tolist()
64
+ numel = state[f"{name}.__numel__"].item()
65
+ group_size = 64
66
+
67
+ lo = (packed & 0x0F).to(torch.int8) - 8
68
+ hi = ((packed >> 4) & 0x0F).to(torch.int8) - 8
69
+ unpacked = torch.zeros(packed.numel() * 2, dtype=torch.float32)
70
+ unpacked[0::2] = lo.float()
71
+ unpacked[1::2] = hi.float()
72
+
73
+ n_groups = scales.numel()
74
+ groups = unpacked[:n_groups * group_size].reshape(n_groups, group_size)
75
+ deq = (groups * scales.unsqueeze(1)).reshape(-1)[:numel]
76
+
77
+ orig_shape = [s for s in shape if s > 0]
78
+ result[name] = deq.reshape(orig_shape).to(torch.bfloat16)
79
+ processed.add(name)
80
+ else:
81
+ result[name] = state[name].to(torch.bfloat16)
82
+ processed.add(name)
83
+
84
+ return result
85
+
86
+
87
+ def dequant_turbo2bit(state):
88
+ """Dequantize TurboQuant 2-bit (NF2 + WHT) weights."""
89
+ result = {}
90
+ processed = set()
91
+
92
+ for name in list(state.keys()):
93
+ if any(name.endswith(f".{s}") for s in ["packed", "scales", "shape", "numel", "gs", "np2"]):
94
+ continue
95
+ if name in processed:
96
+ continue
97
+
98
+ packed_key = f"{name}.packed"
99
+ if packed_key in state:
100
+ gs = state[f"{name}.gs"].item()
101
+ gs_pow2 = state[f"{name}.np2"].item()
102
+ numel = state[f"{name}.numel"].item()
103
+ shape = [s for s in state[f"{name}.shape"].tolist() if s > 0]
104
+ scales = state[f"{name}.scales"].float()
105
+ packed = state[packed_key]
106
+ n_groups = scales.numel()
107
+
108
+ # Unpack 2-bit
109
+ p = packed.reshape(n_groups, gs // 4)
110
+ codes = torch.zeros(n_groups, gs, dtype=torch.long)
111
+ for i in range(4):
112
+ codes[:, i::4] = (p >> (2 * i)) & 0x03
113
+
114
+ dequant = NF2_CENTROIDS[codes]
115
+
116
+ # Inverse WHT
117
+ if gs_pow2 > gs:
118
+ dequant = torch.cat([dequant, torch.zeros(n_groups, gs_pow2 - gs)], dim=1)
119
+ dequant = fast_wht(dequant)
120
+ dequant = dequant[:, :gs]
121
+
122
+ dequant = dequant * scales.unsqueeze(1)
123
+ result[name] = dequant.reshape(-1)[:numel].reshape(shape).to(torch.bfloat16)
124
+ processed.add(name)
125
+ else:
126
+ result[name] = state[name].to(torch.bfloat16)
127
+ processed.add(name)
128
+
129
+ return result
130
+
131
+
132
+ def main():
133
+ import argparse
134
+ parser = argparse.ArgumentParser(description="Dequantize PersonaPlex weights to bf16")
135
+ parser.add_argument("--input", "-i", required=True, help="Quantized safetensors file")
136
+ parser.add_argument("--output", "-o", required=True, help="Output bf16 safetensors file")
137
+ parser.add_argument("--device", "-d", default="cpu", help="Device for dequantization")
138
+ args = parser.parse_args()
139
+
140
+ if not os.path.exists(args.input):
141
+ print(f"Error: {args.input} not found")
142
+ sys.exit(1)
143
+
144
+ # Skip if output already exists and is newer than input
145
+ if os.path.exists(args.output) and os.path.getmtime(args.output) > os.path.getmtime(args.input):
146
+ print(f"Cached: {args.output} is up to date")
147
+ sys.exit(0)
148
+
149
+ print(f"Loading {args.input}...")
150
+ t0 = time.time()
151
+ state = load_file(args.input, device=args.device)
152
+
153
+ fmt = detect_format(state)
154
+ print(f"Format: {fmt}")
155
+
156
+ if fmt == "nf4":
157
+ result = dequant_nf4(state)
158
+ elif fmt == "turbo2bit":
159
+ result = dequant_turbo2bit(state)
160
+ else:
161
+ print("Already plain bf16/fp16 — copying")
162
+ result = {k: v.to(torch.bfloat16) for k, v in state.items()}
163
+
164
+ t1 = time.time()
165
+ print(f"Dequantized {len(result)} tensors in {t1-t0:.1f}s")
166
+
167
+ print(f"Saving to {args.output}...")
168
+ save_file(result, args.output)
169
+ size_gb = os.path.getsize(args.output) / 1024**3
170
+ print(f"Done: {size_gb:.2f} GB")
171
+
172
+
173
+ if __name__ == "__main__":
174
+ main()