open-agents-ai 0.185.30 → 0.185.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -41270,6 +41270,8 @@ __export(personaplex_exports, {
41270
41270
  clonePersonaPlexVoice: () => clonePersonaPlexVoice,
41271
41271
  detectPersonaPlexCapability: () => detectPersonaPlexCapability,
41272
41272
  getPersonaPlexWSUrl: () => getPersonaPlexWSUrl,
41273
+ getWeightRepoInfo: () => getWeightRepoInfo,
41274
+ getWeightTier: () => getWeightTier,
41273
41275
  installPersonaPlex: () => installPersonaPlex,
41274
41276
  isPersonaPlexInstalled: () => isPersonaPlexInstalled,
41275
41277
  isPersonaPlexRunning: () => isPersonaPlexRunning,
@@ -41279,37 +41281,68 @@ __export(personaplex_exports, {
41279
41281
  startPersonaPlexDaemon: () => startPersonaPlexDaemon,
41280
41282
  stopPersonaPlex: () => stopPersonaPlex
41281
41283
  });
41282
- import { existsSync as existsSync37, writeFileSync as writeFileSync16, readFileSync as readFileSync28, mkdirSync as mkdirSync15, copyFileSync as copyFileSync2, readdirSync as readdirSync11 } from "node:fs";
41284
+ import { existsSync as existsSync37, writeFileSync as writeFileSync16, readFileSync as readFileSync28, mkdirSync as mkdirSync15, copyFileSync as copyFileSync2, readdirSync as readdirSync11, statSync as statSync13 } from "node:fs";
41283
41285
  import { join as join54, dirname as dirname18 } from "node:path";
41284
41286
  import { homedir as homedir13 } from "node:os";
41285
41287
  import { execSync as execSync27, spawn as spawn19 } from "node:child_process";
41286
41288
  import { fileURLToPath as fileURLToPath11 } from "node:url";
41289
+ function selectWeightTier(vramGB) {
41290
+ if (vramGB >= 48)
41291
+ return "original";
41292
+ if (vramGB >= 16)
41293
+ return "nf4";
41294
+ return "turbo2bit";
41295
+ }
41296
+ function detectJetson() {
41297
+ try {
41298
+ const model = readFileSync28("/proc/device-tree/model", "utf8").replace(/\0/g, "").trim();
41299
+ if (/jetson|orin|tegra/i.test(model)) {
41300
+ const memInfo = execSync27("grep MemTotal /proc/meminfo", { encoding: "utf8", timeout: 3e3, stdio: "pipe" });
41301
+ const memKB = parseInt(memInfo.match(/(\d+)/)?.[1] ?? "0", 10);
41302
+ return { isJetson: true, model, totalMemGB: memKB / 1024 / 1024 };
41303
+ }
41304
+ } catch {
41305
+ }
41306
+ return { isJetson: false, model: "", totalMemGB: 0 };
41307
+ }
41287
41308
  function detectPersonaPlexCapability() {
41309
+ const fail = (reason) => ({
41310
+ supported: false,
41311
+ reason,
41312
+ gpuName: "",
41313
+ vramGB: 0,
41314
+ weightTier: "turbo2bit",
41315
+ needsHfToken: false
41316
+ });
41317
+ const jetson = detectJetson();
41318
+ if (jetson.isJetson) {
41319
+ const vramGB = jetson.totalMemGB;
41320
+ if (vramGB < 8)
41321
+ return { ...fail(`Jetson has ${vramGB.toFixed(0)}GB unified memory (need \u22658GB)`), gpuName: jetson.model, vramGB };
41322
+ const tier = selectWeightTier(vramGB);
41323
+ const hasHfToken = !!(process.env["HF_TOKEN"] || process.env["HUGGING_FACE_HUB_TOKEN"]);
41324
+ const effectiveTier = tier === "original" && !hasHfToken ? "nf4" : tier;
41325
+ return {
41326
+ supported: true,
41327
+ reason: `Jetson ${jetson.model} \u2014 ${effectiveTier} weights (${WEIGHT_REPOS[effectiveTier].sizeGB}GB)`,
41328
+ gpuName: jetson.model,
41329
+ vramGB,
41330
+ weightTier: effectiveTier,
41331
+ needsHfToken: WEIGHT_REPOS[effectiveTier].needsToken
41332
+ };
41333
+ }
41288
41334
  try {
41289
41335
  const nvsmi = execSync27("nvidia-smi --query-gpu=name,memory.total --format=csv,noheader,nounits", {
41290
41336
  encoding: "utf8",
41291
41337
  timeout: 5e3,
41292
41338
  stdio: "pipe"
41293
41339
  }).trim();
41294
- if (!nvsmi) {
41295
- return { supported: false, reason: "No NVIDIA GPU detected", gpuName: "", vramGB: 0 };
41296
- }
41340
+ if (!nvsmi)
41341
+ return fail("No NVIDIA GPU detected");
41297
41342
  const [gpuName, vramMB] = nvsmi.split("\n")[0].split(", ");
41298
41343
  const vramGB = parseInt(vramMB ?? "0", 10) / 1024;
41299
- if (vramGB < 16) {
41300
- const isJetson = /orin|tegra|jetson/i.test(gpuName ?? "");
41301
- if (isJetson) {
41302
- try {
41303
- const memInfo = execSync27("grep MemTotal /proc/meminfo", { encoding: "utf8", timeout: 3e3, stdio: "pipe" });
41304
- const memKB = parseInt(memInfo.match(/(\d+)/)?.[1] ?? "0", 10);
41305
- const totalGB = memKB / 1024 / 1024;
41306
- if (totalGB >= 32) {
41307
- return { supported: true, reason: `Jetson unified memory (${totalGB.toFixed(0)}GB total)`, gpuName: gpuName ?? "", vramGB: totalGB };
41308
- }
41309
- } catch {
41310
- }
41311
- }
41312
- return { supported: false, reason: `GPU has ${vramGB.toFixed(1)}GB VRAM (need \u226516GB)`, gpuName: gpuName ?? "", vramGB };
41344
+ if (vramGB < 8) {
41345
+ return { ...fail(`GPU has ${vramGB.toFixed(1)}GB VRAM (need \u22658GB)`), gpuName: gpuName ?? "", vramGB };
41313
41346
  }
41314
41347
  try {
41315
41348
  execSync27('python3 -c "import torch; assert torch.cuda.is_available()"', {
@@ -41317,11 +41350,22 @@ function detectPersonaPlexCapability() {
41317
41350
  stdio: "pipe"
41318
41351
  });
41319
41352
  } catch {
41320
- return { supported: false, reason: "PyTorch CUDA not available", gpuName: gpuName ?? "", vramGB };
41353
+ const tier2 = selectWeightTier(vramGB);
41354
+ return { ...fail("PyTorch CUDA not available"), gpuName: gpuName ?? "", vramGB, weightTier: tier2, needsHfToken: WEIGHT_REPOS[tier2].needsToken };
41321
41355
  }
41322
- return { supported: true, reason: "OK", gpuName: gpuName ?? "", vramGB };
41356
+ const tier = selectWeightTier(vramGB);
41357
+ const hasHfToken = !!(process.env["HF_TOKEN"] || process.env["HUGGING_FACE_HUB_TOKEN"]);
41358
+ const effectiveTier = tier === "original" && !hasHfToken ? "nf4" : tier;
41359
+ return {
41360
+ supported: true,
41361
+ reason: `OK \u2014 ${effectiveTier} weights (${WEIGHT_REPOS[effectiveTier].sizeGB}GB)`,
41362
+ gpuName: gpuName ?? "",
41363
+ vramGB,
41364
+ weightTier: effectiveTier,
41365
+ needsHfToken: WEIGHT_REPOS[effectiveTier].needsToken
41366
+ };
41323
41367
  } catch {
41324
- return { supported: false, reason: "nvidia-smi not found", gpuName: "", vramGB: 0 };
41368
+ return fail("No NVIDIA GPU detected (nvidia-smi not found)");
41325
41369
  }
41326
41370
  }
41327
41371
  function isPersonaPlexRunning() {
@@ -41348,15 +41392,36 @@ function getPersonaPlexWSUrl() {
41348
41392
  function isPersonaPlexInstalled() {
41349
41393
  return existsSync37(join54(PERSONAPLEX_DIR, "model_ready"));
41350
41394
  }
41351
- async function installPersonaPlex(onInfo) {
41395
+ function getWeightTier() {
41396
+ const tierFile = join54(PERSONAPLEX_DIR, "weight_tier");
41397
+ if (existsSync37(tierFile)) {
41398
+ const saved = readFileSync28(tierFile, "utf8").trim();
41399
+ if (saved in WEIGHT_REPOS)
41400
+ return saved;
41401
+ }
41402
+ return detectPersonaPlexCapability().weightTier;
41403
+ }
41404
+ function getWeightRepoInfo(tier) {
41405
+ return WEIGHT_REPOS[tier];
41406
+ }
41407
+ async function installPersonaPlex(onInfo, weightTier) {
41352
41408
  const log = onInfo ?? (() => {
41353
41409
  });
41354
41410
  mkdirSync15(PERSONAPLEX_DIR, { recursive: true });
41411
+ let arch2 = "";
41412
+ try {
41413
+ arch2 = execSync27("uname -m", { encoding: "utf8", timeout: 3e3, stdio: "pipe" }).trim();
41414
+ } catch {
41415
+ }
41416
+ const isAarch64 = arch2 === "aarch64" || arch2 === "arm64";
41417
+ if (isAarch64)
41418
+ log(`Detected ARM64 platform (${arch2}) \u2014 Jetson/ARM install path`);
41355
41419
  const venvDir = join54(PERSONAPLEX_DIR, "venv");
41356
41420
  if (!existsSync37(venvDir)) {
41357
41421
  log("Creating Python virtual environment...");
41358
41422
  try {
41359
- execSync27(`python3 -m venv "${venvDir}"`, { timeout: 6e4, stdio: "pipe" });
41423
+ const ssp = isAarch64 ? " --system-site-packages" : "";
41424
+ execSync27(`python3 -m venv${ssp} "${venvDir}"`, { timeout: 6e4, stdio: "pipe" });
41360
41425
  } catch (err) {
41361
41426
  log(`Failed to create venv: ${err instanceof Error ? err.message : String(err)}`);
41362
41427
  return false;
@@ -41364,14 +41429,6 @@ async function installPersonaPlex(onInfo) {
41364
41429
  }
41365
41430
  const pip = process.platform === "win32" ? join54(venvDir, "Scripts", "pip.exe") : join54(venvDir, "bin", "pip");
41366
41431
  const python = process.platform === "win32" ? join54(venvDir, "Scripts", "python.exe") : join54(venvDir, "bin", "python3");
41367
- let arch2 = "";
41368
- try {
41369
- arch2 = execSync27("uname -m", { encoding: "utf8", timeout: 3e3, stdio: "pipe" }).trim();
41370
- } catch {
41371
- }
41372
- const isAarch64 = arch2 === "aarch64" || arch2 === "arm64";
41373
- if (isAarch64)
41374
- log(`Detected ARM64 platform (${arch2}) \u2014 Jetson/ARM install path`);
41375
41432
  log("Checking system dependencies (libopus)...");
41376
41433
  try {
41377
41434
  if (process.platform === "linux") {
@@ -41456,12 +41513,64 @@ async function installPersonaPlex(onInfo) {
41456
41513
  execSync27(`"${pip}" install --quiet pyloudnorm noisereduce torchaudio`, { timeout: 12e4, stdio: "pipe" });
41457
41514
  } catch {
41458
41515
  }
41459
- log("PersonaPlex installed. Model will download on first launch (~14GB).");
41460
- if (isAarch64) {
41461
- log("ARM64: On first run, weights will load in INT4 mode for real-time performance.");
41516
+ const tier = weightTier ?? detectPersonaPlexCapability().weightTier;
41517
+ const repoInfo = WEIGHT_REPOS[tier];
41518
+ log(`Weight tier: ${tier} (${repoInfo.sizeGB}GB) \u2014 ${repoInfo.needsToken ? "requires HF_TOKEN" : "public, no token needed"}`);
41519
+ log(`Downloading PersonaPlex weights (${repoInfo.sizeGB}GB)...`);
41520
+ try {
41521
+ const tokenArg = repoInfo.needsToken ? "" : "--token ''";
41522
+ const dlCmd = `"${python}" -c "from huggingface_hub import hf_hub_download; f=hf_hub_download('${repoInfo.repo}', '${repoInfo.file}'${repoInfo.needsToken ? "" : ", token=False"}); print(f)"`;
41523
+ const weightPath = execSync27(dlCmd, {
41524
+ encoding: "utf8",
41525
+ timeout: 6e5,
41526
+ stdio: "pipe",
41527
+ env: { ...process.env }
41528
+ }).trim();
41529
+ log(`Weights downloaded: ${repoInfo.file}`);
41530
+ if (tier !== "original") {
41531
+ log("Downloading Mimi codec and tokenizer...");
41532
+ try {
41533
+ const hasToken = !!(process.env["HF_TOKEN"] || process.env["HUGGING_FACE_HUB_TOKEN"]);
41534
+ if (hasToken) {
41535
+ execSync27(`"${python}" -c "from huggingface_hub import hf_hub_download; hf_hub_download('nvidia/personaplex-7b-v1', 'tokenizer_spm_32k_3.model'); hf_hub_download('nvidia/personaplex-7b-v1', 'tokenizer-e351c8d8-checkpoint125.safetensors')"`, {
41536
+ timeout: 3e5,
41537
+ stdio: "pipe"
41538
+ });
41539
+ log("Codec + tokenizer downloaded.");
41540
+ } else {
41541
+ log("Note: Mimi codec needs HF_TOKEN on first run (set HF_TOKEN env var).");
41542
+ log("Weights themselves are public \u2014 no token needed for the model.");
41543
+ }
41544
+ } catch {
41545
+ }
41546
+ }
41547
+ } catch (err) {
41548
+ const msg = err instanceof Error ? err.message : String(err);
41549
+ if (repoInfo.needsToken && /401|403|gated|unauthorized/i.test(msg)) {
41550
+ log(`HF_TOKEN required for ${tier} weights. Set HF_TOKEN or accept license at https://huggingface.co/${repoInfo.repo}`);
41551
+ if (tier === "original") {
41552
+ log("Auto-downgrading to INT4 weights (no token required)...");
41553
+ const nf4 = WEIGHT_REPOS["nf4"];
41554
+ try {
41555
+ execSync27(`"${python}" -c "from huggingface_hub import hf_hub_download; hf_hub_download('${nf4.repo}', '${nf4.file}', token=False)"`, {
41556
+ timeout: 6e5,
41557
+ stdio: "pipe"
41558
+ });
41559
+ writeFileSync16(join54(PERSONAPLEX_DIR, "weight_tier"), "nf4");
41560
+ log(`Downloaded INT4 weights instead (${nf4.sizeGB}GB, public).`);
41561
+ } catch {
41562
+ log("Weight download failed.");
41563
+ return false;
41564
+ }
41565
+ }
41566
+ } else {
41567
+ log(`Weight download failed: ${msg}`);
41568
+ log("Weights will download on first server launch.");
41569
+ }
41462
41570
  }
41571
+ writeFileSync16(join54(PERSONAPLEX_DIR, "weight_tier"), tier);
41463
41572
  writeFileSync16(join54(PERSONAPLEX_DIR, "model_ready"), (/* @__PURE__ */ new Date()).toISOString());
41464
- log("PersonaPlex installed successfully.");
41573
+ log(`PersonaPlex installed (${tier} tier). Use /call to start voice session.`);
41465
41574
  return true;
41466
41575
  }
41467
41576
  async function startPersonaPlexDaemon(onInfo) {
@@ -41483,7 +41592,39 @@ async function startPersonaPlexDaemon(onInfo) {
41483
41592
  const venvPython2 = process.platform === "win32" ? join54(PERSONAPLEX_DIR, "venv", "Scripts", "python.exe") : join54(PERSONAPLEX_DIR, "venv", "bin", "python3");
41484
41593
  const sslDir = join54(PERSONAPLEX_DIR, "ssl");
41485
41594
  mkdirSync15(sslDir, { recursive: true });
41486
- log("Starting PersonaPlex daemon (loading ~7B model)...");
41595
+ const tier = getWeightTier();
41596
+ const repoInfo = WEIGHT_REPOS[tier];
41597
+ const extraArgs = [];
41598
+ if (tier !== "original") {
41599
+ log(`Weight tier: ${tier} (${repoInfo.sizeGB}GB) \u2014 dequantizing to bf16 cache...`);
41600
+ const dequantScript = join54(PERSONAPLEX_DIR, "dequant-loader.py");
41601
+ const cachedBf16 = join54(PERSONAPLEX_DIR, "model-bf16-cache.safetensors");
41602
+ if (!existsSync37(dequantScript)) {
41603
+ const shipped = getShippedVoicesDir();
41604
+ if (shipped) {
41605
+ const src = join54(shipped, "dequant-loader.py");
41606
+ if (existsSync37(src))
41607
+ copyFileSync2(src, dequantScript);
41608
+ }
41609
+ }
41610
+ try {
41611
+ const weightPath = execSync27(`"${venvPython2}" -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${repoInfo.repo}', '${repoInfo.file}'${repoInfo.needsToken ? "" : ", token=False"}))"`, { encoding: "utf8", timeout: 3e4, stdio: "pipe" }).trim();
41612
+ if (existsSync37(dequantScript) && existsSync37(weightPath)) {
41613
+ try {
41614
+ execSync27(`"${venvPython2}" "${dequantScript}" --input "${weightPath}" --output "${cachedBf16}"`, { timeout: 3e5, stdio: "pipe" });
41615
+ if (existsSync37(cachedBf16)) {
41616
+ extraArgs.push("--moshi-weight", cachedBf16);
41617
+ log(`Using dequantized cache: ${(statSync13(cachedBf16).size / 1024 ** 3).toFixed(1)}GB`);
41618
+ }
41619
+ } catch (e) {
41620
+ log(`Dequantization failed \u2014 server will try to load original weights`);
41621
+ }
41622
+ }
41623
+ } catch {
41624
+ log(`Weight file not found \u2014 server will download on first run`);
41625
+ }
41626
+ }
41627
+ log(`Starting PersonaPlex daemon (${tier} tier)...`);
41487
41628
  const child = spawn19(venvPython2, [
41488
41629
  "-m",
41489
41630
  "moshi.server",
@@ -41494,7 +41635,8 @@ async function startPersonaPlexDaemon(onInfo) {
41494
41635
  "--ssl",
41495
41636
  sslDir,
41496
41637
  "--device",
41497
- "cuda"
41638
+ "cuda",
41639
+ ...extraArgs
41498
41640
  ], {
41499
41641
  stdio: ["ignore", "pipe", "pipe"],
41500
41642
  detached: true,
@@ -41581,8 +41723,7 @@ function listPersonaPlexVoices() {
41581
41723
  }
41582
41724
  if (existsSync37(CUSTOM_VOICES_DIR)) {
41583
41725
  try {
41584
- const { readdirSync: readdirSync24 } = __require("node:fs");
41585
- for (const f of readdirSync24(CUSTOM_VOICES_DIR)) {
41726
+ for (const f of readdirSync11(CUSTOM_VOICES_DIR)) {
41586
41727
  if (f.endsWith(".pt")) {
41587
41728
  const name = f.replace(/\.pt$/, "");
41588
41729
  voices.push({ name, type: "custom", path: join54(CUSTOM_VOICES_DIR, f) });
@@ -41784,10 +41925,11 @@ async function autoSetupPersonaPlex(onInfo) {
41784
41925
  log(`PersonaPlex not available: ${caps.reason}`);
41785
41926
  return null;
41786
41927
  }
41787
- log(`GPU: ${caps.gpuName} (${caps.vramGB.toFixed(0)}GB) \u2014 PersonaPlex compatible`);
41928
+ const tierInfo = WEIGHT_REPOS[caps.weightTier];
41929
+ log(`GPU: ${caps.gpuName} (${caps.vramGB.toFixed(0)}GB) \u2192 ${caps.weightTier} weights (${tierInfo.sizeGB}GB${caps.needsHfToken ? "" : ", no HF token needed"})`);
41788
41930
  if (!isPersonaPlexInstalled()) {
41789
41931
  log("Installing PersonaPlex (first time setup)...");
41790
- const ok = await installPersonaPlex(log);
41932
+ const ok = await installPersonaPlex(log, caps.weightTier);
41791
41933
  if (!ok) {
41792
41934
  log("PersonaPlex installation failed.");
41793
41935
  return null;
@@ -41807,11 +41949,16 @@ async function autoSetupPersonaPlex(onInfo) {
41807
41949
  }
41808
41950
  return await startPersonaPlexDaemon(log);
41809
41951
  }
41810
- var PERSONAPLEX_DIR, PID_FILE, PORT_FILE, LOG_FILE, CUSTOM_VOICES_DIR;
41952
+ var WEIGHT_REPOS, PERSONAPLEX_DIR, PID_FILE, PORT_FILE, LOG_FILE, CUSTOM_VOICES_DIR;
41811
41953
  var init_personaplex = __esm({
41812
41954
  "packages/cli/dist/tui/personaplex.js"() {
41813
41955
  "use strict";
41814
41956
  init_render();
41957
+ WEIGHT_REPOS = {
41958
+ original: { repo: "nvidia/personaplex-7b-v1", file: "model.safetensors", sizeGB: 15.6, needsToken: true },
41959
+ nf4: { repo: "cudabenchmarktest/personaplex-7b-nf4", file: "model-nf4.safetensors", sizeGB: 4.1, needsToken: false },
41960
+ turbo2bit: { repo: "cudabenchmarktest/personaplex-7b-turbo2bit", file: "model-turbo2bit.safetensors", sizeGB: 2.1, needsToken: false }
41961
+ };
41815
41962
  PERSONAPLEX_DIR = join54(homedir13(), ".open-agents", "voice", "personaplex");
41816
41963
  PID_FILE = join54(PERSONAPLEX_DIR, "daemon.pid");
41817
41964
  PORT_FILE = join54(PERSONAPLEX_DIR, "daemon.port");
@@ -45327,7 +45474,7 @@ __export(voice_exports, {
45327
45474
  registerCustomOnnxModel: () => registerCustomOnnxModel,
45328
45475
  resetNarrationContext: () => resetNarrationContext
45329
45476
  });
45330
- import { existsSync as existsSync42, mkdirSync as mkdirSync18, writeFileSync as writeFileSync19, readFileSync as readFileSync31, unlinkSync as unlinkSync9, readdirSync as readdirSync12, renameSync, statSync as statSync13 } from "node:fs";
45477
+ import { existsSync as existsSync42, mkdirSync as mkdirSync18, writeFileSync as writeFileSync19, readFileSync as readFileSync31, unlinkSync as unlinkSync9, readdirSync as readdirSync12, renameSync, statSync as statSync14 } from "node:fs";
45331
45478
  import { join as join58, dirname as dirname19 } from "node:path";
45332
45479
  import { homedir as homedir15, tmpdir as tmpdir9, platform as platform3 } from "node:os";
45333
45480
  import { execSync as execSync30, spawn as nodeSpawn } from "node:child_process";
@@ -46461,7 +46608,7 @@ var init_voice = __esm({
46461
46608
  const p = join58(dir, f);
46462
46609
  let size = 0;
46463
46610
  try {
46464
- size = statSync13(p).size;
46611
+ size = statSync14(p).size;
46465
46612
  } catch {
46466
46613
  }
46467
46614
  return {
@@ -48077,7 +48224,7 @@ Error: ${err instanceof Error ? err.message : String(err)}`);
48077
48224
  // packages/cli/dist/tui/commands.js
48078
48225
  import * as nodeOs from "node:os";
48079
48226
  import { execSync as nodeExecSync } from "node:child_process";
48080
- import { existsSync as existsSync43, readFileSync as readFileSync32, writeFileSync as writeFileSync20, mkdirSync as mkdirSync19, readdirSync as readdirSync13, statSync as statSync14, rmSync } from "node:fs";
48227
+ import { existsSync as existsSync43, readFileSync as readFileSync32, writeFileSync as writeFileSync20, mkdirSync as mkdirSync19, readdirSync as readdirSync13, statSync as statSync15, rmSync } from "node:fs";
48081
48228
  import { join as join59 } from "node:path";
48082
48229
  function safeLog(text) {
48083
48230
  if (isNeovimActive()) {
@@ -48890,7 +49037,7 @@ async function handleSlashCommand(input, ctx) {
48890
49037
  ipfsFiles = files.length;
48891
49038
  for (const f of files) {
48892
49039
  try {
48893
- ipfsBytes += statSync14(join59(ipfsLocalDir, f)).size;
49040
+ ipfsBytes += statSync15(join59(ipfsLocalDir, f)).size;
48894
49041
  } catch {
48895
49042
  }
48896
49043
  }
@@ -48904,7 +49051,7 @@ async function handleSlashCommand(input, ctx) {
48904
49051
  else {
48905
49052
  heliaBlocks++;
48906
49053
  try {
48907
- heliaBytes += statSync14(join59(dir, entry.name)).size;
49054
+ heliaBytes += statSync15(join59(dir, entry.name)).size;
48908
49055
  } catch {
48909
49056
  }
48910
49057
  }
@@ -48997,7 +49144,7 @@ async function handleSlashCommand(input, ctx) {
48997
49144
  const count = memStore.count();
48998
49145
  lines.push(`
48999
49146
  ${c2.bold("Structured Memory (SQLite)")}`);
49000
- lines.push(` Memories: ${c2.bold(String(count))} DB: ${c2.dim(formatFileSize(statSync14(dbPath).size))}`);
49147
+ lines.push(` Memories: ${c2.bold(String(count))} DB: ${c2.dim(formatFileSize(statSync15(dbPath).size))}`);
49001
49148
  cDb(db);
49002
49149
  }
49003
49150
  } catch {
@@ -49028,7 +49175,7 @@ async function handleSlashCommand(input, ctx) {
49028
49175
  walkStorage(full, subCat);
49029
49176
  } else {
49030
49177
  try {
49031
- const sz = statSync14(full).size;
49178
+ const sz = statSync15(full).size;
49032
49179
  totalBytes += sz;
49033
49180
  if (!categories[category])
49034
49181
  categories[category] = { files: 0, bytes: 0 };
@@ -49329,7 +49476,7 @@ async function handleSlashCommand(input, ctx) {
49329
49476
  const caps = detectPersonaPlexCapability2();
49330
49477
  if (!caps.supported) {
49331
49478
  renderWarning(`PersonaPlex not available: ${caps.reason}`);
49332
- renderInfo("Requirements: NVIDIA GPU with \u226516GB VRAM (RTX 3090/4090/A100+), CUDA 12.1+, PyTorch");
49479
+ renderInfo("Requirements: NVIDIA GPU with \u22658GB VRAM (RTX 3060+, Jetson AGX Orin), CUDA, PyTorch");
49333
49480
  return "handled";
49334
49481
  }
49335
49482
  renderInfo(`GPU: ${caps.gpuName} (${caps.vramGB.toFixed(0)}GB VRAM) \u2014 PersonaPlex compatible \u2713`);
@@ -50998,7 +51145,7 @@ async function showCohereDashboard(ctx) {
50998
51145
  const snapItems = snaps.slice(0, 20).map((f) => ({
50999
51146
  key: f,
51000
51147
  label: f.replace(".json", ""),
51001
- detail: `${formatFileSize(statSync14(join59(snapDir, f)).size)}`
51148
+ detail: `${formatFileSize(statSync15(join59(snapDir, f)).size)}`
51002
51149
  }));
51003
51150
  if (snapItems.length > 0) {
51004
51151
  await tuiSelect({
@@ -59275,7 +59422,7 @@ var init_tool_policy = __esm({
59275
59422
  });
59276
59423
 
59277
59424
  // packages/cli/dist/tui/telegram-bridge.js
59278
- import { mkdirSync as mkdirSync25, existsSync as existsSync51, unlinkSync as unlinkSync11, readdirSync as readdirSync19, statSync as statSync15 } from "node:fs";
59425
+ import { mkdirSync as mkdirSync25, existsSync as existsSync51, unlinkSync as unlinkSync11, readdirSync as readdirSync19, statSync as statSync16 } from "node:fs";
59279
59426
  import { join as join68, resolve as resolve30 } from "node:path";
59280
59427
  import { writeFile as writeFileAsync } from "node:fs/promises";
59281
59428
  function convertMarkdownToTelegramHTML(md) {
@@ -71277,7 +71424,7 @@ __export(index_repo_exports, {
71277
71424
  indexRepoCommand: () => indexRepoCommand
71278
71425
  });
71279
71426
  import { resolve as resolve34 } from "node:path";
71280
- import { existsSync as existsSync56, statSync as statSync16 } from "node:fs";
71427
+ import { existsSync as existsSync56, statSync as statSync17 } from "node:fs";
71281
71428
  import { cwd as cwd2 } from "node:process";
71282
71429
  async function indexRepoCommand(opts, _config) {
71283
71430
  const repoRoot = resolve34(opts.repoPath ?? cwd2());
@@ -71287,7 +71434,7 @@ async function indexRepoCommand(opts, _config) {
71287
71434
  printError(`Path does not exist: ${repoRoot}`);
71288
71435
  process.exit(1);
71289
71436
  }
71290
- const stat5 = statSync16(repoRoot);
71437
+ const stat5 = statSync17(repoRoot);
71291
71438
  if (!stat5.isDirectory()) {
71292
71439
  printError(`Path is not a directory: ${repoRoot}`);
71293
71440
  process.exit(1);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "open-agents-ai",
3
- "version": "0.185.30",
3
+ "version": "0.185.32",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -0,0 +1,174 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ dequant-loader.py — Pre-dequantize quantized PersonaPlex weights to bf16 cache.
4
+
5
+ For NF4 (INT4) or TurboQuant 2-bit weights, dequantizes to a temporary
6
+ bf16 safetensors file that moshi.server can load natively.
7
+
8
+ Usage:
9
+ python dequant-loader.py --input model-nf4.safetensors --output /tmp/model-bf16.safetensors
10
+ python dequant-loader.py --input model-turbo2bit.safetensors --output /tmp/model-bf16.safetensors
11
+
12
+ The output file can then be passed to moshi.server via --moshi-weight.
13
+ """
14
+
15
+ import os, sys, math, time
16
+ import torch
17
+ from safetensors.torch import load_file, save_file
18
+
19
+ NF2_CENTROIDS = torch.tensor([-1.5104, -0.4528, 0.4528, 1.5104])
20
+
21
+
22
+ def fast_wht(x):
23
+ """Vectorized Walsh-Hadamard Transform."""
24
+ n = x.shape[-1]
25
+ h = 1
26
+ while h < n:
27
+ x_view = x.view(*x.shape[:-1], -1, 2, h)
28
+ a = x_view[..., 0, :].clone()
29
+ b = x_view[..., 1, :].clone()
30
+ x_view[..., 0, :] = a + b
31
+ x_view[..., 1, :] = a - b
32
+ x = x_view.reshape(*x.shape)
33
+ h *= 2
34
+ return x / math.sqrt(n)
35
+
36
+
37
+ def detect_format(state):
38
+ """Detect if weights are NF4 (INT4), TurboQuant 2-bit, or plain."""
39
+ has_scales = any(k.endswith(".__scales__") for k in state)
40
+ has_packed = any(k.endswith(".packed") for k in state)
41
+ if has_packed:
42
+ return "turbo2bit"
43
+ if has_scales:
44
+ return "nf4"
45
+ return "plain"
46
+
47
+
48
+ def dequant_nf4(state):
49
+ """Dequantize INT4 NF4 weights."""
50
+ result = {}
51
+ processed = set()
52
+
53
+ for name in list(state.keys()):
54
+ if name.endswith(".__scales__") or name.endswith(".__shape__") or name.endswith(".__numel__"):
55
+ continue
56
+ if name in processed:
57
+ continue
58
+
59
+ scales_key = f"{name}.__scales__"
60
+ if scales_key in state:
61
+ packed = state[name]
62
+ scales = state[scales_key].float()
63
+ shape = state[f"{name}.__shape__"].tolist()
64
+ numel = state[f"{name}.__numel__"].item()
65
+ group_size = 64
66
+
67
+ lo = (packed & 0x0F).to(torch.int8) - 8
68
+ hi = ((packed >> 4) & 0x0F).to(torch.int8) - 8
69
+ unpacked = torch.zeros(packed.numel() * 2, dtype=torch.float32)
70
+ unpacked[0::2] = lo.float()
71
+ unpacked[1::2] = hi.float()
72
+
73
+ n_groups = scales.numel()
74
+ groups = unpacked[:n_groups * group_size].reshape(n_groups, group_size)
75
+ deq = (groups * scales.unsqueeze(1)).reshape(-1)[:numel]
76
+
77
+ orig_shape = [s for s in shape if s > 0]
78
+ result[name] = deq.reshape(orig_shape).to(torch.bfloat16)
79
+ processed.add(name)
80
+ else:
81
+ result[name] = state[name].to(torch.bfloat16)
82
+ processed.add(name)
83
+
84
+ return result
85
+
86
+
87
+ def dequant_turbo2bit(state):
88
+ """Dequantize TurboQuant 2-bit (NF2 + WHT) weights."""
89
+ result = {}
90
+ processed = set()
91
+
92
+ for name in list(state.keys()):
93
+ if any(name.endswith(f".{s}") for s in ["packed", "scales", "shape", "numel", "gs", "np2"]):
94
+ continue
95
+ if name in processed:
96
+ continue
97
+
98
+ packed_key = f"{name}.packed"
99
+ if packed_key in state:
100
+ gs = state[f"{name}.gs"].item()
101
+ gs_pow2 = state[f"{name}.np2"].item()
102
+ numel = state[f"{name}.numel"].item()
103
+ shape = [s for s in state[f"{name}.shape"].tolist() if s > 0]
104
+ scales = state[f"{name}.scales"].float()
105
+ packed = state[packed_key]
106
+ n_groups = scales.numel()
107
+
108
+ # Unpack 2-bit
109
+ p = packed.reshape(n_groups, gs // 4)
110
+ codes = torch.zeros(n_groups, gs, dtype=torch.long)
111
+ for i in range(4):
112
+ codes[:, i::4] = (p >> (2 * i)) & 0x03
113
+
114
+ dequant = NF2_CENTROIDS[codes]
115
+
116
+ # Inverse WHT
117
+ if gs_pow2 > gs:
118
+ dequant = torch.cat([dequant, torch.zeros(n_groups, gs_pow2 - gs)], dim=1)
119
+ dequant = fast_wht(dequant)
120
+ dequant = dequant[:, :gs]
121
+
122
+ dequant = dequant * scales.unsqueeze(1)
123
+ result[name] = dequant.reshape(-1)[:numel].reshape(shape).to(torch.bfloat16)
124
+ processed.add(name)
125
+ else:
126
+ result[name] = state[name].to(torch.bfloat16)
127
+ processed.add(name)
128
+
129
+ return result
130
+
131
+
132
+ def main():
133
+ import argparse
134
+ parser = argparse.ArgumentParser(description="Dequantize PersonaPlex weights to bf16")
135
+ parser.add_argument("--input", "-i", required=True, help="Quantized safetensors file")
136
+ parser.add_argument("--output", "-o", required=True, help="Output bf16 safetensors file")
137
+ parser.add_argument("--device", "-d", default="cpu", help="Device for dequantization")
138
+ args = parser.parse_args()
139
+
140
+ if not os.path.exists(args.input):
141
+ print(f"Error: {args.input} not found")
142
+ sys.exit(1)
143
+
144
+ # Skip if output already exists and is newer than input
145
+ if os.path.exists(args.output) and os.path.getmtime(args.output) > os.path.getmtime(args.input):
146
+ print(f"Cached: {args.output} is up to date")
147
+ sys.exit(0)
148
+
149
+ print(f"Loading {args.input}...")
150
+ t0 = time.time()
151
+ state = load_file(args.input, device=args.device)
152
+
153
+ fmt = detect_format(state)
154
+ print(f"Format: {fmt}")
155
+
156
+ if fmt == "nf4":
157
+ result = dequant_nf4(state)
158
+ elif fmt == "turbo2bit":
159
+ result = dequant_turbo2bit(state)
160
+ else:
161
+ print("Already plain bf16/fp16 — copying")
162
+ result = {k: v.to(torch.bfloat16) for k, v in state.items()}
163
+
164
+ t1 = time.time()
165
+ print(f"Dequantized {len(result)} tensors in {t1-t0:.1f}s")
166
+
167
+ print(f"Saving to {args.output}...")
168
+ save_file(result, args.output)
169
+ size_gb = os.path.getsize(args.output) / 1024**3
170
+ print(f"Done: {size_gb:.2f} GB")
171
+
172
+
173
+ if __name__ == "__main__":
174
+ main()