omnius 1.0.108 → 1.0.110

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -563485,6 +563485,537 @@ var init_task_complete_box = __esm({
563485
563485
  }
563486
563486
  });
563487
563487
 
563488
+ // packages/cli/src/tui/model-picker.ts
563489
+ import { totalmem as totalmem3 } from "node:os";
563490
+ function isImageGenModel(name10, family) {
563491
+ return IMAGE_GEN_PATTERNS.some((p2) => p2.test(name10) || family && p2.test(family));
563492
+ }
563493
+ function parseShowNumCtx(show) {
563494
+ const sources = [show.parameters, show.modelfile];
563495
+ for (const source of sources) {
563496
+ if (!source) continue;
563497
+ const match = source.match(/\b(?:PARAMETER\s+)?num_ctx\s+(\d+)/i);
563498
+ if (match) return parseInt(match[1], 10);
563499
+ }
563500
+ return null;
563501
+ }
563502
+ async function fetchOllamaModels(baseUrl) {
563503
+ const url = `${normalizeBaseUrl(baseUrl)}/api/tags`;
563504
+ const resp = await fetch(url, {
563505
+ signal: AbortSignal.timeout(1e4)
563506
+ });
563507
+ if (!resp.ok) {
563508
+ throw new Error(`Failed to fetch models: HTTP ${resp.status}`);
563509
+ }
563510
+ const data = await resp.json();
563511
+ const models = data.models ?? [];
563512
+ const result = models.map((m2) => {
563513
+ const family = m2.details?.family;
563514
+ return {
563515
+ name: m2.name,
563516
+ size: formatBytes3(m2.size),
563517
+ sizeBytes: m2.size,
563518
+ modified: formatRelativeTime(m2.modified_at),
563519
+ parameterSize: m2.details?.parameter_size,
563520
+ contextLength: void 0,
563521
+ caps: void 0,
563522
+ isImageGen: isImageGenModel(m2.name, family),
563523
+ family
563524
+ };
563525
+ }).sort((a2, b) => b.sizeBytes - a2.sizeBytes);
563526
+ const normalized = normalizeBaseUrl(baseUrl);
563527
+ const showResults = await Promise.allSettled(
563528
+ result.map(
563529
+ (m2) => fetch(`${normalized}/api/show`, {
563530
+ method: "POST",
563531
+ headers: { "Content-Type": "application/json" },
563532
+ body: JSON.stringify({ name: m2.name }),
563533
+ signal: AbortSignal.timeout(5e3)
563534
+ }).then((r2) => r2.ok ? r2.json() : null)
563535
+ )
563536
+ );
563537
+ for (let i2 = 0; i2 < result.length; i2++) {
563538
+ const sr = showResults[i2];
563539
+ if (sr?.status !== "fulfilled" || !sr.value) continue;
563540
+ const show = sr.value;
563541
+ const explicitNumCtx = parseShowNumCtx(show);
563542
+ if (explicitNumCtx) {
563543
+ result[i2].contextLength = explicitNumCtx;
563544
+ continue;
563545
+ }
563546
+ if (show.model_info) {
563547
+ const info = show.model_info;
563548
+ const arch3 = info["general.architecture"];
563549
+ const paramCount = info["general.parameter_count"];
563550
+ const fileSizeGB = result[i2].sizeBytes > 0 ? result[i2].sizeBytes / 1024 ** 3 : paramCount ? paramCount * 0.6 / 1024 ** 3 : 4;
563551
+ if (arch3) {
563552
+ const archMax = info[`${arch3}.context_length`];
563553
+ const nLayers = info[`${arch3}.block_count`];
563554
+ const nKVHeads = info[`${arch3}.attention.head_count_kv`] ?? info[`${arch3}.attention.head_count`];
563555
+ const keyDim = info[`${arch3}.attention.key_length`];
563556
+ const valDim = info[`${arch3}.attention.value_length`] ?? keyDim;
563557
+ if (archMax && nLayers && nKVHeads && keyDim && valDim) {
563558
+ const kvBytesPerToken = nLayers * nKVHeads * (keyDim + valDim) * 2;
563559
+ result[i2].contextLength = estimateRealisticContext(kvBytesPerToken, archMax, fileSizeGB);
563560
+ } else if (archMax) {
563561
+ const kvEstimate = fileSizeGB <= 5 ? 524288 : fileSizeGB <= 20 ? 1048576 : 1572864;
563562
+ result[i2].contextLength = estimateRealisticContext(kvEstimate, archMax, fileSizeGB);
563563
+ }
563564
+ }
563565
+ }
563566
+ const modelCaps = { vision: false, toolUse: false, thinking: false };
563567
+ const nameLower = result[i2].name.toLowerCase();
563568
+ if (Array.isArray(show.capabilities)) {
563569
+ if (show.capabilities.includes("vision")) modelCaps.vision = true;
563570
+ if (show.capabilities.includes("tools")) modelCaps.toolUse = true;
563571
+ if (show.capabilities.includes("thinking")) modelCaps.thinking = true;
563572
+ }
563573
+ if (show.model_info) {
563574
+ for (const key of Object.keys(show.model_info)) {
563575
+ const k = key.toLowerCase();
563576
+ if (k.includes("vision.block_count") || k.includes("clip.") || k.includes("image_token_id") || k.includes("projector")) {
563577
+ const val = show.model_info[key];
563578
+ if (val !== null && val !== void 0 && val !== 0 && val !== "") {
563579
+ modelCaps.vision = true;
563580
+ }
563581
+ }
563582
+ }
563583
+ }
563584
+ if (/qwen3|qwen2\.5|llama3\.[13]|mistral|mixtral|command-r|gemma3|devstral|deepseek/.test(nameLower)) {
563585
+ modelCaps.toolUse = true;
563586
+ }
563587
+ if (show.template && (show.template.includes("<think>") || show.template.includes("thinking"))) {
563588
+ modelCaps.thinking = true;
563589
+ }
563590
+ result[i2].caps = modelCaps;
563591
+ }
563592
+ return result;
563593
+ }
563594
+ async function fetchOpenAIModels(baseUrl, apiKey) {
563595
+ const normalized = normalizeBaseUrl(baseUrl);
563596
+ const url = `${normalized}/v1/models`;
563597
+ const isAnthropic = /api\.anthropic\.com/i.test(baseUrl);
563598
+ const headers = {};
563599
+ if (apiKey) {
563600
+ if (isAnthropic) {
563601
+ headers["x-api-key"] = apiKey;
563602
+ headers["anthropic-version"] = "2023-06-01";
563603
+ } else {
563604
+ headers["Authorization"] = `Bearer ${apiKey}`;
563605
+ }
563606
+ }
563607
+ const resp = await fetch(url, {
563608
+ headers,
563609
+ signal: AbortSignal.timeout(15e3)
563610
+ });
563611
+ if (!resp.ok) {
563612
+ throw new Error(`Failed to fetch models: HTTP ${resp.status}`);
563613
+ }
563614
+ const data = await resp.json();
563615
+ const models = data.data ?? [];
563616
+ return models.map((m2) => ({
563617
+ name: m2.id,
563618
+ size: "",
563619
+ sizeBytes: 0,
563620
+ modified: m2.created ? formatRelativeTime(new Date(m2.created * 1e3).toISOString()) : "",
563621
+ parameterSize: m2.owned_by ?? void 0,
563622
+ contextLength: m2.context_length ?? m2.max_model_len ?? void 0
563623
+ })).sort((a2, b) => a2.name.localeCompare(b.name));
563624
+ }
563625
+ async function fetchPeerModels(peerId, authKey) {
563626
+ try {
563627
+ const { NexusTool: NexusTool2 } = await Promise.resolve().then(() => (init_dist5(), dist_exports));
563628
+ const { existsSync: existsSync131, readFileSync: readFileSync107 } = await import("node:fs");
563629
+ const { join: join148 } = await import("node:path");
563630
+ const cwd4 = process.cwd();
563631
+ const nexusTool = new NexusTool2(cwd4);
563632
+ const nexusDir = nexusTool.getNexusDir();
563633
+ let isLocalPeer = false;
563634
+ try {
563635
+ const statusPath = join148(nexusDir, "status.json");
563636
+ if (existsSync131(statusPath)) {
563637
+ const status = JSON.parse(readFileSync107(statusPath, "utf8"));
563638
+ if (status.peerId === peerId) isLocalPeer = true;
563639
+ }
563640
+ } catch {
563641
+ }
563642
+ if (isLocalPeer) {
563643
+ const pricingPath = join148(nexusDir, "pricing.json");
563644
+ if (existsSync131(pricingPath)) {
563645
+ try {
563646
+ const pricing = JSON.parse(readFileSync107(pricingPath, "utf8"));
563647
+ const localModels = (pricing.models || []).map((m2) => ({
563648
+ name: m2.model || "unknown",
563649
+ size: m2.parameterSize || "",
563650
+ modified: "",
563651
+ sizeBytes: 0,
563652
+ parameterSize: m2.parameterSize || "remote"
563653
+ }));
563654
+ if (localModels.length > 0) return localModels;
563655
+ } catch {
563656
+ }
563657
+ }
563658
+ }
563659
+ const cachePath = join148(nexusDir, "peer-models-cache.json");
563660
+ if (existsSync131(cachePath)) {
563661
+ try {
563662
+ const cache8 = JSON.parse(readFileSync107(cachePath, "utf8"));
563663
+ if (cache8.peerId === peerId && cache8.models?.length > 0) {
563664
+ const age = Date.now() - new Date(cache8.cachedAt).getTime();
563665
+ if (age < 5 * 60 * 1e3) {
563666
+ return cache8.models.map((m2) => ({
563667
+ name: m2.name || "unknown",
563668
+ size: m2.size || m2.parameterSize || "",
563669
+ modified: "",
563670
+ sizeBytes: 0,
563671
+ parameterSize: m2.parameterSize || "remote"
563672
+ }));
563673
+ }
563674
+ }
563675
+ } catch {
563676
+ }
563677
+ }
563678
+ try {
563679
+ const capsResult = await nexusTool.execute({
563680
+ action: "query_peer_caps",
563681
+ peer_id: peerId,
563682
+ ...authKey ? { auth_key: authKey } : {}
563683
+ });
563684
+ if (capsResult.success && capsResult.output) {
563685
+ let capsData = null;
563686
+ try {
563687
+ capsData = JSON.parse(capsResult.output);
563688
+ } catch {
563689
+ }
563690
+ if (capsData?.models && capsData.models.length > 0) {
563691
+ return capsData.models.map((m2) => ({
563692
+ name: m2.name || "unknown",
563693
+ size: m2.parameterSize || "",
563694
+ modified: "",
563695
+ sizeBytes: 0,
563696
+ parameterSize: m2.parameterSize || "remote"
563697
+ }));
563698
+ }
563699
+ if (capsData?.capabilities && capsData.capabilities.length > 0) {
563700
+ const models = [];
563701
+ for (const cap of capsData.capabilities) {
563702
+ if (typeof cap === "string" && cap.startsWith("inference:")) {
563703
+ const capName = cap.slice(10);
563704
+ const modelName = capName.replace(/_(\d+[bBmMkK])$/, ":$1").replace(/_latest$/, ":latest");
563705
+ models.push({
563706
+ name: modelName,
563707
+ size: "",
563708
+ modified: "",
563709
+ sizeBytes: 0,
563710
+ parameterSize: "remote"
563711
+ });
563712
+ }
563713
+ }
563714
+ if (models.length > 0) return models;
563715
+ }
563716
+ }
563717
+ } catch {
563718
+ }
563719
+ try {
563720
+ const natsResult = await nexusTool.execute({
563721
+ action: "discover_peer_caps",
563722
+ peer_id: peerId
563723
+ });
563724
+ if (natsResult.success && natsResult.output) {
563725
+ let natsPeer = null;
563726
+ try {
563727
+ natsPeer = JSON.parse(natsResult.output);
563728
+ } catch {
563729
+ }
563730
+ if (natsPeer?.capabilities && natsPeer.capabilities.length > 0) {
563731
+ const models = [];
563732
+ for (const cap of natsPeer.capabilities) {
563733
+ if (typeof cap === "string" && cap.startsWith("inference:")) {
563734
+ const capName = cap.slice(10);
563735
+ const modelName = capName.replace(/_(\d+[bBmMkK])$/, ":$1").replace(/_latest$/, ":latest");
563736
+ models.push({
563737
+ name: modelName,
563738
+ size: "",
563739
+ modified: "",
563740
+ sizeBytes: 0,
563741
+ parameterSize: "remote"
563742
+ });
563743
+ }
563744
+ }
563745
+ if (models.length > 0) return models;
563746
+ }
563747
+ }
563748
+ } catch {
563749
+ }
563750
+ try {
563751
+ const result = await nexusTool.execute({
563752
+ action: "find_agent",
563753
+ peer_id: peerId
563754
+ });
563755
+ if (result.success && result.output) {
563756
+ const models = [];
563757
+ const capMatches = result.output.matchAll(/inference:([^\s,\]]+)/g);
563758
+ for (const m2 of capMatches) {
563759
+ const capName = m2[1];
563760
+ const modelName = capName.replace(/_(\d+[bBmMkK])$/, ":$1").replace(/_latest$/, ":latest");
563761
+ models.push({
563762
+ name: modelName,
563763
+ size: "",
563764
+ modified: "",
563765
+ sizeBytes: 0,
563766
+ parameterSize: "remote"
563767
+ });
563768
+ }
563769
+ if (models.length > 0) return models;
563770
+ }
563771
+ } catch {
563772
+ }
563773
+ if (isLocalPeer) {
563774
+ const pricingPath = join148(nexusDir, "pricing.json");
563775
+ if (existsSync131(pricingPath)) {
563776
+ try {
563777
+ const pricing = JSON.parse(readFileSync107(pricingPath, "utf8"));
563778
+ return (pricing.models || []).map((m2) => ({
563779
+ name: m2.model || "unknown",
563780
+ size: m2.parameterSize || "",
563781
+ modified: "",
563782
+ sizeBytes: 0,
563783
+ parameterSize: m2.parameterSize || "remote"
563784
+ }));
563785
+ } catch {
563786
+ }
563787
+ }
563788
+ }
563789
+ return [];
563790
+ } catch {
563791
+ return [];
563792
+ }
563793
+ }
563794
+ async function fetchModels(baseUrl, apiKey) {
563795
+ if (baseUrl.startsWith("peer://")) {
563796
+ return fetchPeerModels(baseUrl.slice(7), apiKey);
563797
+ }
563798
+ const provider = detectProvider(baseUrl);
563799
+ if (provider.id === "ollama") {
563800
+ let ollamaErr;
563801
+ try {
563802
+ return await fetchOllamaModels(baseUrl);
563803
+ } catch (err) {
563804
+ ollamaErr = err instanceof Error ? err : new Error(String(err));
563805
+ try {
563806
+ return await fetchOpenAIModels(baseUrl, apiKey);
563807
+ } catch {
563808
+ throw new Error(`Cannot reach Ollama at ${baseUrl}: ${ollamaErr.message}`);
563809
+ }
563810
+ }
563811
+ }
563812
+ let lastErr;
563813
+ for (let attempt = 0; attempt < 2; attempt++) {
563814
+ try {
563815
+ return await fetchOpenAIModels(baseUrl, apiKey);
563816
+ } catch (err) {
563817
+ lastErr = err instanceof Error ? err : new Error(String(err));
563818
+ if (attempt === 0) await new Promise((r2) => setTimeout(r2, 1e3));
563819
+ }
563820
+ }
563821
+ try {
563822
+ return await fetchOllamaModels(baseUrl);
563823
+ } catch {
563824
+ throw new Error(`Cannot fetch models from ${provider.label} at ${baseUrl}: ${lastErr?.message ?? "unknown error"}`);
563825
+ }
563826
+ }
563827
+ function stripLatest(modelName) {
563828
+ return modelName.replace(/:latest$/i, "");
563829
+ }
563830
+ function findModel(models, query) {
563831
+ const exact = models.find((m2) => m2.name === query);
563832
+ if (exact) return exact;
563833
+ const partial = models.find((m2) => m2.name.startsWith(query));
563834
+ if (partial) return partial;
563835
+ const fuzzy = models.find((m2) => m2.name.includes(query));
563836
+ return fuzzy;
563837
+ }
563838
+ async function queryModelContextSize(baseUrl, modelName) {
563839
+ try {
563840
+ const normalized = normalizeBaseUrl(baseUrl);
563841
+ const res = await fetch(`${normalized}/api/show`, {
563842
+ method: "POST",
563843
+ headers: { "Content-Type": "application/json" },
563844
+ body: JSON.stringify({ name: modelName }),
563845
+ signal: AbortSignal.timeout(1e4)
563846
+ });
563847
+ if (!res.ok) return null;
563848
+ const data = await res.json();
563849
+ const explicitNumCtx = parseShowNumCtx(data);
563850
+ if (explicitNumCtx) return explicitNumCtx;
563851
+ if (data.model_info) {
563852
+ const info = data.model_info;
563853
+ const arch3 = info["general.architecture"];
563854
+ const paramCount = info["general.parameter_count"];
563855
+ const modelSizeGB2 = paramCount ? paramCount * 0.6 / 1024 ** 3 : 4;
563856
+ if (arch3) {
563857
+ const archMax = info[`${arch3}.context_length`];
563858
+ const nLayers = info[`${arch3}.block_count`];
563859
+ const nKVHeads = info[`${arch3}.attention.head_count_kv`] ?? info[`${arch3}.attention.head_count`];
563860
+ const keyDim = info[`${arch3}.attention.key_length`];
563861
+ const valDim = info[`${arch3}.attention.value_length`] ?? keyDim;
563862
+ if (archMax && nLayers && nKVHeads && keyDim && valDim) {
563863
+ const kvBytesPerToken = nLayers * nKVHeads * (keyDim + valDim) * 2;
563864
+ return estimateRealisticContext(kvBytesPerToken, archMax, modelSizeGB2);
563865
+ }
563866
+ if (archMax) {
563867
+ const kvEstimate = modelSizeGB2 <= 5 ? 524288 : modelSizeGB2 <= 20 ? 1048576 : 1572864;
563868
+ return estimateRealisticContext(kvEstimate, archMax, modelSizeGB2);
563869
+ }
563870
+ }
563871
+ }
563872
+ return null;
563873
+ } catch {
563874
+ return null;
563875
+ }
563876
+ }
563877
+ function estimateRealisticContext(kvBytesPerToken, archMax, modelSizeGB2) {
563878
+ const totalMemGB = totalmem3() / 1024 ** 3;
563879
+ const usableBytes = totalMemGB * 0.7 * 1024 ** 3;
563880
+ const maxTokens = Math.floor(usableBytes / kvBytesPerToken);
563881
+ let numCtx = Math.max(2048, Math.floor(maxTokens / 1024) * 1024);
563882
+ numCtx = Math.min(numCtx, 131072, archMax);
563883
+ if (modelSizeGB2 && modelSizeGB2 > 0) {
563884
+ const maxKVBytes = modelSizeGB2 * 4 * 1024 ** 3;
563885
+ const budgetCap = Math.max(2048, Math.floor(maxKVBytes / kvBytesPerToken / 1024) * 1024);
563886
+ numCtx = Math.min(numCtx, budgetCap);
563887
+ }
563888
+ return numCtx;
563889
+ }
563890
+ async function queryOpenAIContextSize(baseUrl, modelName, apiKey) {
563891
+ try {
563892
+ const models = await fetchOpenAIModels(baseUrl, apiKey);
563893
+ const model = models.find((m2) => m2.name === modelName);
563894
+ if (model?.contextLength) return model.contextLength;
563895
+ if (model?.size) {
563896
+ const match = model.size.match(/(\d+)K ctx/);
563897
+ if (match) return parseInt(match[1], 10) * 1024;
563898
+ }
563899
+ return null;
563900
+ } catch {
563901
+ return null;
563902
+ }
563903
+ }
563904
+ async function queryContextSize(baseUrl, modelName, apiKey) {
563905
+ if (baseUrl.startsWith("peer://")) return 32768;
563906
+ const ollamaSize = await queryModelContextSize(baseUrl, modelName);
563907
+ if (ollamaSize) return ollamaSize;
563908
+ return queryOpenAIContextSize(baseUrl, modelName, apiKey);
563909
+ }
563910
+ async function queryModelCapabilities(baseUrl, modelName) {
563911
+ const caps = { vision: false, toolUse: false, thinking: false };
563912
+ if (baseUrl.startsWith("peer://")) {
563913
+ const nameLower = modelName.toLowerCase();
563914
+ if (/qwen3|qwen2\.5|llama3\.[13]|mistral|mixtral|command-r|gemma3|devstral|deepseek/.test(nameLower)) {
563915
+ caps.toolUse = true;
563916
+ }
563917
+ if (/qwen3|deepseek-r1/.test(nameLower)) {
563918
+ caps.thinking = true;
563919
+ }
563920
+ return caps;
563921
+ }
563922
+ try {
563923
+ const normalized = normalizeBaseUrl(baseUrl);
563924
+ const res = await fetch(`${normalized}/api/show`, {
563925
+ method: "POST",
563926
+ headers: { "Content-Type": "application/json" },
563927
+ body: JSON.stringify({ name: modelName }),
563928
+ signal: AbortSignal.timeout(1e4)
563929
+ });
563930
+ if (!res.ok) return caps;
563931
+ const data = await res.json();
563932
+ if (Array.isArray(data.capabilities)) {
563933
+ if (data.capabilities.includes("vision")) caps.vision = true;
563934
+ if (data.capabilities.includes("tools")) caps.toolUse = true;
563935
+ if (data.capabilities.includes("thinking")) caps.thinking = true;
563936
+ }
563937
+ if (data.model_info) {
563938
+ for (const key of Object.keys(data.model_info)) {
563939
+ const k = key.toLowerCase();
563940
+ if (k.includes("vision.block_count") || k.includes("clip.") || k.includes("image_token_id") || k.includes("projector") || k.includes("vision.embedding_length")) {
563941
+ const val = data.model_info[key];
563942
+ if (val !== null && val !== void 0 && val !== 0 && val !== "") {
563943
+ caps.vision = true;
563944
+ }
563945
+ }
563946
+ }
563947
+ }
563948
+ const nameLower = modelName.toLowerCase();
563949
+ if (/qwen3|qwen2\.5|llama3\.[13]|mistral|mixtral|command-r|gemma3|devstral|deepseek/.test(nameLower)) {
563950
+ caps.toolUse = true;
563951
+ }
563952
+ if (data.template) {
563953
+ if (data.template.includes("<think>") || data.template.includes("thinking")) {
563954
+ caps.thinking = true;
563955
+ }
563956
+ }
563957
+ return caps;
563958
+ } catch {
563959
+ return caps;
563960
+ }
563961
+ }
563962
+ function formatBytes3(bytes) {
563963
+ if (bytes < 1024) return `${bytes} B`;
563964
+ const units = ["KB", "MB", "GB", "TB"];
563965
+ let size = bytes;
563966
+ let i2 = -1;
563967
+ while (size >= 1024 && i2 < units.length - 1) {
563968
+ size /= 1024;
563969
+ i2++;
563970
+ }
563971
+ return `${size.toFixed(1)} ${units[i2] ?? "B"}`;
563972
+ }
563973
+ function formatContextLength(tokens) {
563974
+ if (tokens >= 1e6) return `${(tokens / 1e6).toFixed(1)}M ctx`;
563975
+ if (tokens >= 1024) return `${Math.round(tokens / 1024)}K ctx`;
563976
+ return `${tokens} ctx`;
563977
+ }
563978
+ function formatCaps(caps) {
563979
+ const tags = [];
563980
+ if (caps.vision) tags.push("vision");
563981
+ if (caps.toolUse) tags.push("tools");
563982
+ if (caps.thinking) tags.push("think");
563983
+ return tags.join("+");
563984
+ }
563985
+ function formatRelativeTime(iso2) {
563986
+ const now = Date.now();
563987
+ const then = new Date(iso2).getTime();
563988
+ const diffMs = now - then;
563989
+ const minutes = Math.floor(diffMs / 6e4);
563990
+ if (minutes < 1) return "just now";
563991
+ if (minutes < 60) return `${minutes}m ago`;
563992
+ const hours = Math.floor(minutes / 60);
563993
+ if (hours < 24) return `${hours}h ago`;
563994
+ const days = Math.floor(hours / 24);
563995
+ if (days < 7) return `${days}d ago`;
563996
+ const weeks = Math.floor(days / 7);
563997
+ if (weeks < 5) return `${weeks}w ago`;
563998
+ const months = Math.floor(days / 30);
563999
+ return `${months}mo ago`;
564000
+ }
564001
+ var IMAGE_GEN_PATTERNS;
564002
+ var init_model_picker = __esm({
564003
+ "packages/cli/src/tui/model-picker.ts"() {
564004
+ "use strict";
564005
+ init_dist();
564006
+ IMAGE_GEN_PATTERNS = [
564007
+ /flux/i,
564008
+ /z-image/i,
564009
+ /stable-diffusion/i,
564010
+ /sdxl/i,
564011
+ /dall/i,
564012
+ /kandinsky/i,
564013
+ /midjourney/i,
564014
+ /imagen/i
564015
+ ];
564016
+ }
564017
+ });
564018
+
563488
564019
  // packages/cli/src/tui/render.ts
563489
564020
  var render_exports = {};
563490
564021
  __export(render_exports, {
@@ -564353,8 +564884,9 @@ function renderModelList(models, current) {
564353
564884
  ${c3.bold("Available models:")}
564354
564885
 
564355
564886
  `);
564887
+ const currentKey = stripLatest(current);
564356
564888
  for (const m2 of models) {
564357
- const isCurrent = m2.name === current;
564889
+ const isCurrent = stripLatest(m2.name) === currentKey;
564358
564890
  const marker = isCurrent ? c3.green("●") : c3.dim("○");
564359
564891
  const name10 = isCurrent ? c3.bold(c3.green(m2.name)) : m2.name;
564360
564892
  const size = c3.dim(m2.size);
@@ -564413,6 +564945,7 @@ var init_render = __esm({
564413
564945
  init_config();
564414
564946
  init_text_selection();
564415
564947
  init_task_complete_box();
564948
+ init_model_picker();
564416
564949
  isTTY2 = process.stdout.isTTY ?? false;
564417
564950
  c3 = {
564418
564951
  bold: (t2) => ansi2("1", t2),
@@ -566137,7 +566670,7 @@ import { spawn as spawn24, exec as exec2 } from "node:child_process";
566137
566670
  import { EventEmitter as EventEmitter7 } from "node:events";
566138
566671
  import { randomBytes as randomBytes18, timingSafeEqual } from "node:crypto";
566139
566672
  import { URL as URL2 } from "node:url";
566140
- import { loadavg, cpus as cpus2, totalmem as totalmem3, freemem as freemem3 } from "node:os";
566673
+ import { loadavg, cpus as cpus2, totalmem as totalmem4, freemem as freemem3 } from "node:os";
566141
566674
  import { existsSync as existsSync84, readFileSync as readFileSync66, writeFileSync as writeFileSync42, unlinkSync as unlinkSync13, mkdirSync as mkdirSync47, readdirSync as readdirSync28, statSync as statSync31, statfsSync as statfsSync4 } from "node:fs";
566142
566675
  import { join as join99 } from "node:path";
566143
566676
  function cleanForwardHeaders(raw, targetHost) {
@@ -566243,7 +566776,7 @@ function parseRateLimitHeaders(headers) {
566243
566776
  async function collectSystemMetricsAsync() {
566244
566777
  const [l1, l5, l15] = loadavg();
566245
566778
  const cores = cpus2().length;
566246
- const totalMem = totalmem3();
566779
+ const totalMem = totalmem4();
566247
566780
  const freeMem = freemem3();
566248
566781
  const usedMem = totalMem - freeMem;
566249
566782
  let disk = {
@@ -568835,716 +569368,188 @@ var init_call_agent = __esm({
568835
569368
  feed.push({
568836
569369
  ts: Date.now(),
568837
569370
  source: "call",
568838
- sourceId: this.clientId,
568839
- summary: content,
568840
- toolName,
568841
- success
568842
- });
568843
- }
568844
- if (event.type === "model_response" && event.content) {
568845
- this.emit("response", event.content);
568846
- }
568847
- });
568848
- }
568849
- /** Process a voice transcript — queues if already processing */
568850
- handleTranscript(text) {
568851
- if (this.disposed) return;
568852
- this.conversationHistory.push({ role: "user", text });
568853
- if (this.processing) {
568854
- this.pendingTranscripts.push(text);
568855
- return;
568856
- }
568857
- this.processTranscript(text).catch((err) => {
568858
- this.emit("error", err instanceof Error ? err : new Error(String(err)));
568859
- });
568860
- }
568861
- /** Dispose and clean up */
568862
- dispose() {
568863
- this.disposed = true;
568864
- this.pendingTranscripts.length = 0;
568865
- this.runner = null;
568866
- }
568867
- // ── Private ──────────────────────────────────────────────────────────
568868
- async processTranscript(text) {
568869
- if (!this.runner || this.disposed) return;
568870
- this.processing = true;
568871
- try {
568872
- const historyContext = this.conversationHistory.slice(-10).map((h) => `${h.role === "user" ? "User" : "You"}: ${h.text}`).join("\n");
568873
- const feed = getActivityFeed();
568874
- const activitySummary = feed.getSummary(
568875
- this.tier === "admin" ? 20 : 10,
568876
- this.tier === "admin"
568877
- );
568878
- const wantsAction = /\b(read|open|show|run|execute|check|look at|find|search|grep|edit|write|fix|test|build|deploy|install|create|delete|remove|update|change|modify|commit|push|pull)\b/i.test(text) && !/\b(how are you|what's up|hello|hi|hey|can you hear|stop|quit|bye|thanks|thank you|ok|okay|sure|yeah|yes|no)\b/i.test(text);
568879
- if (!wantsAction) {
568880
- try {
568881
- const chatMessages = [
568882
- { role: "system", content: this.buildSystemPrompt() },
568883
- ...this.conversationHistory.slice(-6).map((h) => ({
568884
- role: h.role === "user" ? "user" : "assistant",
568885
- content: h.text
568886
- })),
568887
- { role: "user", content: text }
568888
- ];
568889
- const chatResult = await this.backend.chatCompletion({
568890
- messages: chatMessages,
568891
- tools: [],
568892
- temperature: 0.4,
568893
- maxTokens: 256,
568894
- timeoutMs: 15e3
568895
- });
568896
- const reply = (chatResult.choices[0]?.message?.content ?? "").trim();
568897
- if (!reply) return;
568898
- this.conversationHistory.push({ role: "assistant", text: reply });
568899
- this.emit("response", reply);
568900
- } catch {
568901
- this.emit("response", "Sorry, I couldn't process that.");
568902
- }
568903
- } else {
568904
- const taskPrompt = [
568905
- `User said: "${text}"`,
568906
- "",
568907
- historyContext ? `Conversation so far:
568908
- ${historyContext}
568909
- ` : "",
568910
- `Background activity:
568911
- ${activitySummary}
568912
- `,
568913
- "The user is requesting an action. Use tools as needed, then call task_complete with a brief spoken summary of what you did (1-2 sentences)."
568914
- ].join("\n");
568915
- const result = await this.runner.run(taskPrompt, `Working directory: ${this.repoRoot}`);
568916
- if (result.summary) {
568917
- this.conversationHistory.push({ role: "assistant", text: result.summary });
568918
- }
568919
- }
568920
- } catch (err) {
568921
- this.emit("error", err instanceof Error ? err : new Error(String(err)));
568922
- } finally {
568923
- this.processing = false;
568924
- this.emit("done");
568925
- if (this.pendingTranscripts.length > 0) {
568926
- const next = this.pendingTranscripts.shift();
568927
- this.processTranscript(next).catch((err) => {
568928
- this.emit("error", err instanceof Error ? err : new Error(String(err)));
568929
- });
568930
- }
568931
- }
568932
- }
568933
- buildSystemPrompt() {
568934
- const base3 = [
568935
- "You are a voice assistant on a LIVE AUDIO CALL. This is a real-time conversation.",
568936
- "",
568937
- "CRITICAL RULES FOR VOICE CALLS:",
568938
- "1. ALWAYS respond IMMEDIATELY with speech. Do NOT use tools before responding.",
568939
- "2. Your response goes through text-to-speech — keep it SHORT (1-3 sentences).",
568940
- "3. NEVER use code blocks, markdown, or long technical text.",
568941
- "4. Be conversational and natural, like talking to a colleague.",
568942
- "5. Call task_complete with your spoken response as the summary.",
568943
- "6. Only use tools (file_read, grep, shell, etc.) if the user EXPLICITLY asks you to look something up, run a command, or make a change. For normal conversation, NEVER call tools.",
568944
- "7. If the user asks what's happening, summarize from the activity context below — do NOT run tools to find out."
568945
- ];
568946
- if (this.opts.emotionContext) {
568947
- base3.push("", "Mood:", this.opts.emotionContext);
568948
- }
568949
- if (this.tier === "admin") {
568950
- base3.push(
568951
- "",
568952
- "ADMIN call — you CAN use tools IF the user explicitly requests an action (e.g. 'read that file', 'run the tests').",
568953
- "But for general chat, status questions, or greetings — respond immediately WITHOUT tools."
568954
- );
568955
- } else {
568956
- base3.push(
568957
- "",
568958
- "PUBLIC call — read-only access. Answer questions about the project conversationally."
568959
- );
568960
- }
568961
- return base3.join("\n");
568962
- }
568963
- buildTools() {
568964
- if (this.tier === "admin") {
568965
- return this.buildAdminTools();
568966
- }
568967
- return this.buildPublicTools();
568968
- }
568969
- buildAdminTools() {
568970
- const debateAdapter = async (prompt) => {
568971
- const r2 = await this.backend.chatCompletion({
568972
- messages: [{ role: "user", content: prompt }],
568973
- tools: [],
568974
- temperature: 0.7,
568975
- maxTokens: 800,
568976
- timeoutMs: 12e4
568977
- });
568978
- return r2.choices[0]?.message?.content ?? "";
568979
- };
568980
- const replayAdapter = async (prompt) => {
568981
- const r2 = await this.backend.chatCompletion({
568982
- messages: [{ role: "user", content: prompt }],
568983
- tools: [],
568984
- temperature: 0,
568985
- maxTokens: 1500,
568986
- timeoutMs: 12e4
568987
- });
568988
- return r2.choices[0]?.message?.content ?? "";
568989
- };
568990
- const tools = [
568991
- new FileReadTool(this.repoRoot),
568992
- new FileWriteTool(this.repoRoot),
568993
- new FileEditTool(this.repoRoot),
568994
- new ShellTool(this.repoRoot),
568995
- new GrepSearchTool(this.repoRoot),
568996
- new GlobFindTool(this.repoRoot),
568997
- new ListDirectoryTool(this.repoRoot),
568998
- new WebSearchTool(),
568999
- new WebFetchTool(),
569000
- new MemoryReadTool(this.repoRoot),
569001
- new MemoryWriteTool(this.repoRoot),
569002
- new MemorySearchTool(this.repoRoot),
569003
- new DebateTool(debateAdapter),
569004
- new ReplayWithInterventionTool({ workingDir: this.repoRoot, callable: replayAdapter })
569005
- ];
569006
- return tools.map(adaptTool);
569007
- }
569008
- buildPublicTools() {
569009
- const tools = [
569010
- new FileReadTool(this.repoRoot),
569011
- new GrepSearchTool(this.repoRoot),
569012
- new GlobFindTool(this.repoRoot),
569013
- new ListDirectoryTool(this.repoRoot),
569014
- new MemoryReadTool(this.repoRoot),
569015
- new MemorySearchTool(this.repoRoot)
569016
- ];
569017
- return tools.map(adaptTool);
569018
- }
569019
- };
569020
- }
569021
- });
569022
-
569023
- // packages/cli/src/tui/model-picker.ts
569024
- import { totalmem as totalmem4 } from "node:os";
569025
- function isImageGenModel(name10, family) {
569026
- return IMAGE_GEN_PATTERNS.some((p2) => p2.test(name10) || family && p2.test(family));
569027
- }
569028
- function parseShowNumCtx(show) {
569029
- const sources = [show.parameters, show.modelfile];
569030
- for (const source of sources) {
569031
- if (!source) continue;
569032
- const match = source.match(/\b(?:PARAMETER\s+)?num_ctx\s+(\d+)/i);
569033
- if (match) return parseInt(match[1], 10);
569034
- }
569035
- return null;
569036
- }
569037
- async function fetchOllamaModels(baseUrl) {
569038
- const url = `${normalizeBaseUrl(baseUrl)}/api/tags`;
569039
- const resp = await fetch(url, {
569040
- signal: AbortSignal.timeout(1e4)
569041
- });
569042
- if (!resp.ok) {
569043
- throw new Error(`Failed to fetch models: HTTP ${resp.status}`);
569044
- }
569045
- const data = await resp.json();
569046
- const models = data.models ?? [];
569047
- const result = models.map((m2) => {
569048
- const family = m2.details?.family;
569049
- return {
569050
- name: m2.name,
569051
- size: formatBytes3(m2.size),
569052
- sizeBytes: m2.size,
569053
- modified: formatRelativeTime(m2.modified_at),
569054
- parameterSize: m2.details?.parameter_size,
569055
- contextLength: void 0,
569056
- caps: void 0,
569057
- isImageGen: isImageGenModel(m2.name, family),
569058
- family
569059
- };
569060
- }).sort((a2, b) => b.sizeBytes - a2.sizeBytes);
569061
- const normalized = normalizeBaseUrl(baseUrl);
569062
- const showResults = await Promise.allSettled(
569063
- result.map(
569064
- (m2) => fetch(`${normalized}/api/show`, {
569065
- method: "POST",
569066
- headers: { "Content-Type": "application/json" },
569067
- body: JSON.stringify({ name: m2.name }),
569068
- signal: AbortSignal.timeout(5e3)
569069
- }).then((r2) => r2.ok ? r2.json() : null)
569070
- )
569071
- );
569072
- for (let i2 = 0; i2 < result.length; i2++) {
569073
- const sr = showResults[i2];
569074
- if (sr?.status !== "fulfilled" || !sr.value) continue;
569075
- const show = sr.value;
569076
- const explicitNumCtx = parseShowNumCtx(show);
569077
- if (explicitNumCtx) {
569078
- result[i2].contextLength = explicitNumCtx;
569079
- continue;
569080
- }
569081
- if (show.model_info) {
569082
- const info = show.model_info;
569083
- const arch3 = info["general.architecture"];
569084
- const paramCount = info["general.parameter_count"];
569085
- const fileSizeGB = result[i2].sizeBytes > 0 ? result[i2].sizeBytes / 1024 ** 3 : paramCount ? paramCount * 0.6 / 1024 ** 3 : 4;
569086
- if (arch3) {
569087
- const archMax = info[`${arch3}.context_length`];
569088
- const nLayers = info[`${arch3}.block_count`];
569089
- const nKVHeads = info[`${arch3}.attention.head_count_kv`] ?? info[`${arch3}.attention.head_count`];
569090
- const keyDim = info[`${arch3}.attention.key_length`];
569091
- const valDim = info[`${arch3}.attention.value_length`] ?? keyDim;
569092
- if (archMax && nLayers && nKVHeads && keyDim && valDim) {
569093
- const kvBytesPerToken = nLayers * nKVHeads * (keyDim + valDim) * 2;
569094
- result[i2].contextLength = estimateRealisticContext(kvBytesPerToken, archMax, fileSizeGB);
569095
- } else if (archMax) {
569096
- const kvEstimate = fileSizeGB <= 5 ? 524288 : fileSizeGB <= 20 ? 1048576 : 1572864;
569097
- result[i2].contextLength = estimateRealisticContext(kvEstimate, archMax, fileSizeGB);
569098
- }
569099
- }
569100
- }
569101
- const modelCaps = { vision: false, toolUse: false, thinking: false };
569102
- const nameLower = result[i2].name.toLowerCase();
569103
- if (Array.isArray(show.capabilities)) {
569104
- if (show.capabilities.includes("vision")) modelCaps.vision = true;
569105
- if (show.capabilities.includes("tools")) modelCaps.toolUse = true;
569106
- if (show.capabilities.includes("thinking")) modelCaps.thinking = true;
569107
- }
569108
- if (show.model_info) {
569109
- for (const key of Object.keys(show.model_info)) {
569110
- const k = key.toLowerCase();
569111
- if (k.includes("vision.block_count") || k.includes("clip.") || k.includes("image_token_id") || k.includes("projector")) {
569112
- const val = show.model_info[key];
569113
- if (val !== null && val !== void 0 && val !== 0 && val !== "") {
569114
- modelCaps.vision = true;
569115
- }
569116
- }
569117
- }
569118
- }
569119
- if (/qwen3|qwen2\.5|llama3\.[13]|mistral|mixtral|command-r|gemma3|devstral|deepseek/.test(nameLower)) {
569120
- modelCaps.toolUse = true;
569121
- }
569122
- if (show.template && (show.template.includes("<think>") || show.template.includes("thinking"))) {
569123
- modelCaps.thinking = true;
569124
- }
569125
- result[i2].caps = modelCaps;
569126
- }
569127
- return result;
569128
- }
569129
- async function fetchOpenAIModels(baseUrl, apiKey) {
569130
- const normalized = normalizeBaseUrl(baseUrl);
569131
- const url = `${normalized}/v1/models`;
569132
- const isAnthropic = /api\.anthropic\.com/i.test(baseUrl);
569133
- const headers = {};
569134
- if (apiKey) {
569135
- if (isAnthropic) {
569136
- headers["x-api-key"] = apiKey;
569137
- headers["anthropic-version"] = "2023-06-01";
569138
- } else {
569139
- headers["Authorization"] = `Bearer ${apiKey}`;
569140
- }
569141
- }
569142
- const resp = await fetch(url, {
569143
- headers,
569144
- signal: AbortSignal.timeout(15e3)
569145
- });
569146
- if (!resp.ok) {
569147
- throw new Error(`Failed to fetch models: HTTP ${resp.status}`);
569148
- }
569149
- const data = await resp.json();
569150
- const models = data.data ?? [];
569151
- return models.map((m2) => ({
569152
- name: m2.id,
569153
- size: "",
569154
- sizeBytes: 0,
569155
- modified: m2.created ? formatRelativeTime(new Date(m2.created * 1e3).toISOString()) : "",
569156
- parameterSize: m2.owned_by ?? void 0,
569157
- contextLength: m2.context_length ?? m2.max_model_len ?? void 0
569158
- })).sort((a2, b) => a2.name.localeCompare(b.name));
569159
- }
569160
- async function fetchPeerModels(peerId, authKey) {
569161
- try {
569162
- const { NexusTool: NexusTool2 } = await Promise.resolve().then(() => (init_dist5(), dist_exports));
569163
- const { existsSync: existsSync131, readFileSync: readFileSync107 } = await import("node:fs");
569164
- const { join: join148 } = await import("node:path");
569165
- const cwd4 = process.cwd();
569166
- const nexusTool = new NexusTool2(cwd4);
569167
- const nexusDir = nexusTool.getNexusDir();
569168
- let isLocalPeer = false;
569169
- try {
569170
- const statusPath = join148(nexusDir, "status.json");
569171
- if (existsSync131(statusPath)) {
569172
- const status = JSON.parse(readFileSync107(statusPath, "utf8"));
569173
- if (status.peerId === peerId) isLocalPeer = true;
569174
- }
569175
- } catch {
569176
- }
569177
- if (isLocalPeer) {
569178
- const pricingPath = join148(nexusDir, "pricing.json");
569179
- if (existsSync131(pricingPath)) {
569180
- try {
569181
- const pricing = JSON.parse(readFileSync107(pricingPath, "utf8"));
569182
- const localModels = (pricing.models || []).map((m2) => ({
569183
- name: m2.model || "unknown",
569184
- size: m2.parameterSize || "",
569185
- modified: "",
569186
- sizeBytes: 0,
569187
- parameterSize: m2.parameterSize || "remote"
569188
- }));
569189
- if (localModels.length > 0) return localModels;
569190
- } catch {
569191
- }
569192
- }
569193
- }
569194
- const cachePath = join148(nexusDir, "peer-models-cache.json");
569195
- if (existsSync131(cachePath)) {
569196
- try {
569197
- const cache8 = JSON.parse(readFileSync107(cachePath, "utf8"));
569198
- if (cache8.peerId === peerId && cache8.models?.length > 0) {
569199
- const age = Date.now() - new Date(cache8.cachedAt).getTime();
569200
- if (age < 5 * 60 * 1e3) {
569201
- return cache8.models.map((m2) => ({
569202
- name: m2.name || "unknown",
569203
- size: m2.size || m2.parameterSize || "",
569204
- modified: "",
569205
- sizeBytes: 0,
569206
- parameterSize: m2.parameterSize || "remote"
569207
- }));
569371
+ sourceId: this.clientId,
569372
+ summary: content,
569373
+ toolName,
569374
+ success
569375
+ });
569208
569376
  }
569209
- }
569210
- } catch {
569211
- }
569212
- }
569213
- try {
569214
- const capsResult = await nexusTool.execute({
569215
- action: "query_peer_caps",
569216
- peer_id: peerId,
569217
- ...authKey ? { auth_key: authKey } : {}
569218
- });
569219
- if (capsResult.success && capsResult.output) {
569220
- let capsData = null;
569221
- try {
569222
- capsData = JSON.parse(capsResult.output);
569223
- } catch {
569224
- }
569225
- if (capsData?.models && capsData.models.length > 0) {
569226
- return capsData.models.map((m2) => ({
569227
- name: m2.name || "unknown",
569228
- size: m2.parameterSize || "",
569229
- modified: "",
569230
- sizeBytes: 0,
569231
- parameterSize: m2.parameterSize || "remote"
569232
- }));
569233
- }
569234
- if (capsData?.capabilities && capsData.capabilities.length > 0) {
569235
- const models = [];
569236
- for (const cap of capsData.capabilities) {
569237
- if (typeof cap === "string" && cap.startsWith("inference:")) {
569238
- const capName = cap.slice(10);
569239
- const modelName = capName.replace(/_(\d+[bBmMkK])$/, ":$1").replace(/_latest$/, ":latest");
569240
- models.push({
569241
- name: modelName,
569242
- size: "",
569243
- modified: "",
569244
- sizeBytes: 0,
569245
- parameterSize: "remote"
569246
- });
569247
- }
569377
+ if (event.type === "model_response" && event.content) {
569378
+ this.emit("response", event.content);
569248
569379
  }
569249
- if (models.length > 0) return models;
569380
+ });
569381
+ }
569382
+ /** Process a voice transcript — queues if already processing */
569383
+ handleTranscript(text) {
569384
+ if (this.disposed) return;
569385
+ this.conversationHistory.push({ role: "user", text });
569386
+ if (this.processing) {
569387
+ this.pendingTranscripts.push(text);
569388
+ return;
569250
569389
  }
569390
+ this.processTranscript(text).catch((err) => {
569391
+ this.emit("error", err instanceof Error ? err : new Error(String(err)));
569392
+ });
569251
569393
  }
569252
- } catch {
569253
- }
569254
- try {
569255
- const natsResult = await nexusTool.execute({
569256
- action: "discover_peer_caps",
569257
- peer_id: peerId
569258
- });
569259
- if (natsResult.success && natsResult.output) {
569260
- let natsPeer = null;
569394
+ /** Dispose and clean up */
569395
+ dispose() {
569396
+ this.disposed = true;
569397
+ this.pendingTranscripts.length = 0;
569398
+ this.runner = null;
569399
+ }
569400
+ // ── Private ──────────────────────────────────────────────────────────
569401
+ async processTranscript(text) {
569402
+ if (!this.runner || this.disposed) return;
569403
+ this.processing = true;
569261
569404
  try {
569262
- natsPeer = JSON.parse(natsResult.output);
569263
- } catch {
569264
- }
569265
- if (natsPeer?.capabilities && natsPeer.capabilities.length > 0) {
569266
- const models = [];
569267
- for (const cap of natsPeer.capabilities) {
569268
- if (typeof cap === "string" && cap.startsWith("inference:")) {
569269
- const capName = cap.slice(10);
569270
- const modelName = capName.replace(/_(\d+[bBmMkK])$/, ":$1").replace(/_latest$/, ":latest");
569271
- models.push({
569272
- name: modelName,
569273
- size: "",
569274
- modified: "",
569275
- sizeBytes: 0,
569276
- parameterSize: "remote"
569405
+ const historyContext = this.conversationHistory.slice(-10).map((h) => `${h.role === "user" ? "User" : "You"}: ${h.text}`).join("\n");
569406
+ const feed = getActivityFeed();
569407
+ const activitySummary = feed.getSummary(
569408
+ this.tier === "admin" ? 20 : 10,
569409
+ this.tier === "admin"
569410
+ );
569411
+ const wantsAction = /\b(read|open|show|run|execute|check|look at|find|search|grep|edit|write|fix|test|build|deploy|install|create|delete|remove|update|change|modify|commit|push|pull)\b/i.test(text) && !/\b(how are you|what's up|hello|hi|hey|can you hear|stop|quit|bye|thanks|thank you|ok|okay|sure|yeah|yes|no)\b/i.test(text);
569412
+ if (!wantsAction) {
569413
+ try {
569414
+ const chatMessages = [
569415
+ { role: "system", content: this.buildSystemPrompt() },
569416
+ ...this.conversationHistory.slice(-6).map((h) => ({
569417
+ role: h.role === "user" ? "user" : "assistant",
569418
+ content: h.text
569419
+ })),
569420
+ { role: "user", content: text }
569421
+ ];
569422
+ const chatResult = await this.backend.chatCompletion({
569423
+ messages: chatMessages,
569424
+ tools: [],
569425
+ temperature: 0.4,
569426
+ maxTokens: 256,
569427
+ timeoutMs: 15e3
569277
569428
  });
569429
+ const reply = (chatResult.choices[0]?.message?.content ?? "").trim();
569430
+ if (!reply) return;
569431
+ this.conversationHistory.push({ role: "assistant", text: reply });
569432
+ this.emit("response", reply);
569433
+ } catch {
569434
+ this.emit("response", "Sorry, I couldn't process that.");
569435
+ }
569436
+ } else {
569437
+ const taskPrompt = [
569438
+ `User said: "${text}"`,
569439
+ "",
569440
+ historyContext ? `Conversation so far:
569441
+ ${historyContext}
569442
+ ` : "",
569443
+ `Background activity:
569444
+ ${activitySummary}
569445
+ `,
569446
+ "The user is requesting an action. Use tools as needed, then call task_complete with a brief spoken summary of what you did (1-2 sentences)."
569447
+ ].join("\n");
569448
+ const result = await this.runner.run(taskPrompt, `Working directory: ${this.repoRoot}`);
569449
+ if (result.summary) {
569450
+ this.conversationHistory.push({ role: "assistant", text: result.summary });
569278
569451
  }
569279
569452
  }
569280
- if (models.length > 0) return models;
569453
+ } catch (err) {
569454
+ this.emit("error", err instanceof Error ? err : new Error(String(err)));
569455
+ } finally {
569456
+ this.processing = false;
569457
+ this.emit("done");
569458
+ if (this.pendingTranscripts.length > 0) {
569459
+ const next = this.pendingTranscripts.shift();
569460
+ this.processTranscript(next).catch((err) => {
569461
+ this.emit("error", err instanceof Error ? err : new Error(String(err)));
569462
+ });
569463
+ }
569281
569464
  }
569282
569465
  }
569283
- } catch {
569284
- }
569285
- try {
569286
- const result = await nexusTool.execute({
569287
- action: "find_agent",
569288
- peer_id: peerId
569289
- });
569290
- if (result.success && result.output) {
569291
- const models = [];
569292
- const capMatches = result.output.matchAll(/inference:([^\s,\]]+)/g);
569293
- for (const m2 of capMatches) {
569294
- const capName = m2[1];
569295
- const modelName = capName.replace(/_(\d+[bBmMkK])$/, ":$1").replace(/_latest$/, ":latest");
569296
- models.push({
569297
- name: modelName,
569298
- size: "",
569299
- modified: "",
569300
- sizeBytes: 0,
569301
- parameterSize: "remote"
569302
- });
569466
+ buildSystemPrompt() {
569467
+ const base3 = [
569468
+ "You are a voice assistant on a LIVE AUDIO CALL. This is a real-time conversation.",
569469
+ "",
569470
+ "CRITICAL RULES FOR VOICE CALLS:",
569471
+ "1. ALWAYS respond IMMEDIATELY with speech. Do NOT use tools before responding.",
569472
+ "2. Your response goes through text-to-speech — keep it SHORT (1-3 sentences).",
569473
+ "3. NEVER use code blocks, markdown, or long technical text.",
569474
+ "4. Be conversational and natural, like talking to a colleague.",
569475
+ "5. Call task_complete with your spoken response as the summary.",
569476
+ "6. Only use tools (file_read, grep, shell, etc.) if the user EXPLICITLY asks you to look something up, run a command, or make a change. For normal conversation, NEVER call tools.",
569477
+ "7. If the user asks what's happening, summarize from the activity context below — do NOT run tools to find out."
569478
+ ];
569479
+ if (this.opts.emotionContext) {
569480
+ base3.push("", "Mood:", this.opts.emotionContext);
569303
569481
  }
569304
- if (models.length > 0) return models;
569305
- }
569306
- } catch {
569307
- }
569308
- if (isLocalPeer) {
569309
- const pricingPath = join148(nexusDir, "pricing.json");
569310
- if (existsSync131(pricingPath)) {
569311
- try {
569312
- const pricing = JSON.parse(readFileSync107(pricingPath, "utf8"));
569313
- return (pricing.models || []).map((m2) => ({
569314
- name: m2.model || "unknown",
569315
- size: m2.parameterSize || "",
569316
- modified: "",
569317
- sizeBytes: 0,
569318
- parameterSize: m2.parameterSize || "remote"
569319
- }));
569320
- } catch {
569482
+ if (this.tier === "admin") {
569483
+ base3.push(
569484
+ "",
569485
+ "ADMIN call — you CAN use tools IF the user explicitly requests an action (e.g. 'read that file', 'run the tests').",
569486
+ "But for general chat, status questions, or greetings — respond immediately WITHOUT tools."
569487
+ );
569488
+ } else {
569489
+ base3.push(
569490
+ "",
569491
+ "PUBLIC call read-only access. Answer questions about the project conversationally."
569492
+ );
569321
569493
  }
569494
+ return base3.join("\n");
569322
569495
  }
569323
- }
569324
- return [];
569325
- } catch {
569326
- return [];
569327
- }
569328
- }
569329
- async function fetchModels(baseUrl, apiKey) {
569330
- if (baseUrl.startsWith("peer://")) {
569331
- return fetchPeerModels(baseUrl.slice(7), apiKey);
569332
- }
569333
- const provider = detectProvider(baseUrl);
569334
- if (provider.id === "ollama") {
569335
- let ollamaErr;
569336
- try {
569337
- return await fetchOllamaModels(baseUrl);
569338
- } catch (err) {
569339
- ollamaErr = err instanceof Error ? err : new Error(String(err));
569340
- try {
569341
- return await fetchOpenAIModels(baseUrl, apiKey);
569342
- } catch {
569343
- throw new Error(`Cannot reach Ollama at ${baseUrl}: ${ollamaErr.message}`);
569344
- }
569345
- }
569346
- }
569347
- let lastErr;
569348
- for (let attempt = 0; attempt < 2; attempt++) {
569349
- try {
569350
- return await fetchOpenAIModels(baseUrl, apiKey);
569351
- } catch (err) {
569352
- lastErr = err instanceof Error ? err : new Error(String(err));
569353
- if (attempt === 0) await new Promise((r2) => setTimeout(r2, 1e3));
569354
- }
569355
- }
569356
- try {
569357
- return await fetchOllamaModels(baseUrl);
569358
- } catch {
569359
- throw new Error(`Cannot fetch models from ${provider.label} at ${baseUrl}: ${lastErr?.message ?? "unknown error"}`);
569360
- }
569361
- }
569362
- function findModel(models, query) {
569363
- const exact = models.find((m2) => m2.name === query);
569364
- if (exact) return exact;
569365
- const partial = models.find((m2) => m2.name.startsWith(query));
569366
- if (partial) return partial;
569367
- const fuzzy = models.find((m2) => m2.name.includes(query));
569368
- return fuzzy;
569369
- }
569370
- async function queryModelContextSize(baseUrl, modelName) {
569371
- try {
569372
- const normalized = normalizeBaseUrl(baseUrl);
569373
- const res = await fetch(`${normalized}/api/show`, {
569374
- method: "POST",
569375
- headers: { "Content-Type": "application/json" },
569376
- body: JSON.stringify({ name: modelName }),
569377
- signal: AbortSignal.timeout(1e4)
569378
- });
569379
- if (!res.ok) return null;
569380
- const data = await res.json();
569381
- const explicitNumCtx = parseShowNumCtx(data);
569382
- if (explicitNumCtx) return explicitNumCtx;
569383
- if (data.model_info) {
569384
- const info = data.model_info;
569385
- const arch3 = info["general.architecture"];
569386
- const paramCount = info["general.parameter_count"];
569387
- const modelSizeGB2 = paramCount ? paramCount * 0.6 / 1024 ** 3 : 4;
569388
- if (arch3) {
569389
- const archMax = info[`${arch3}.context_length`];
569390
- const nLayers = info[`${arch3}.block_count`];
569391
- const nKVHeads = info[`${arch3}.attention.head_count_kv`] ?? info[`${arch3}.attention.head_count`];
569392
- const keyDim = info[`${arch3}.attention.key_length`];
569393
- const valDim = info[`${arch3}.attention.value_length`] ?? keyDim;
569394
- if (archMax && nLayers && nKVHeads && keyDim && valDim) {
569395
- const kvBytesPerToken = nLayers * nKVHeads * (keyDim + valDim) * 2;
569396
- return estimateRealisticContext(kvBytesPerToken, archMax, modelSizeGB2);
569397
- }
569398
- if (archMax) {
569399
- const kvEstimate = modelSizeGB2 <= 5 ? 524288 : modelSizeGB2 <= 20 ? 1048576 : 1572864;
569400
- return estimateRealisticContext(kvEstimate, archMax, modelSizeGB2);
569496
+ buildTools() {
569497
+ if (this.tier === "admin") {
569498
+ return this.buildAdminTools();
569401
569499
  }
569500
+ return this.buildPublicTools();
569402
569501
  }
569403
- }
569404
- return null;
569405
- } catch {
569406
- return null;
569407
- }
569408
- }
569409
- function estimateRealisticContext(kvBytesPerToken, archMax, modelSizeGB2) {
569410
- const totalMemGB = totalmem4() / 1024 ** 3;
569411
- const usableBytes = totalMemGB * 0.7 * 1024 ** 3;
569412
- const maxTokens = Math.floor(usableBytes / kvBytesPerToken);
569413
- let numCtx = Math.max(2048, Math.floor(maxTokens / 1024) * 1024);
569414
- numCtx = Math.min(numCtx, 131072, archMax);
569415
- if (modelSizeGB2 && modelSizeGB2 > 0) {
569416
- const maxKVBytes = modelSizeGB2 * 4 * 1024 ** 3;
569417
- const budgetCap = Math.max(2048, Math.floor(maxKVBytes / kvBytesPerToken / 1024) * 1024);
569418
- numCtx = Math.min(numCtx, budgetCap);
569419
- }
569420
- return numCtx;
569421
- }
569422
- async function queryOpenAIContextSize(baseUrl, modelName, apiKey) {
569423
- try {
569424
- const models = await fetchOpenAIModels(baseUrl, apiKey);
569425
- const model = models.find((m2) => m2.name === modelName);
569426
- if (model?.contextLength) return model.contextLength;
569427
- if (model?.size) {
569428
- const match = model.size.match(/(\d+)K ctx/);
569429
- if (match) return parseInt(match[1], 10) * 1024;
569430
- }
569431
- return null;
569432
- } catch {
569433
- return null;
569434
- }
569435
- }
569436
- async function queryContextSize(baseUrl, modelName, apiKey) {
569437
- if (baseUrl.startsWith("peer://")) return 32768;
569438
- const ollamaSize = await queryModelContextSize(baseUrl, modelName);
569439
- if (ollamaSize) return ollamaSize;
569440
- return queryOpenAIContextSize(baseUrl, modelName, apiKey);
569441
- }
569442
- async function queryModelCapabilities(baseUrl, modelName) {
569443
- const caps = { vision: false, toolUse: false, thinking: false };
569444
- if (baseUrl.startsWith("peer://")) {
569445
- const nameLower = modelName.toLowerCase();
569446
- if (/qwen3|qwen2\.5|llama3\.[13]|mistral|mixtral|command-r|gemma3|devstral|deepseek/.test(nameLower)) {
569447
- caps.toolUse = true;
569448
- }
569449
- if (/qwen3|deepseek-r1/.test(nameLower)) {
569450
- caps.thinking = true;
569451
- }
569452
- return caps;
569453
- }
569454
- try {
569455
- const normalized = normalizeBaseUrl(baseUrl);
569456
- const res = await fetch(`${normalized}/api/show`, {
569457
- method: "POST",
569458
- headers: { "Content-Type": "application/json" },
569459
- body: JSON.stringify({ name: modelName }),
569460
- signal: AbortSignal.timeout(1e4)
569461
- });
569462
- if (!res.ok) return caps;
569463
- const data = await res.json();
569464
- if (Array.isArray(data.capabilities)) {
569465
- if (data.capabilities.includes("vision")) caps.vision = true;
569466
- if (data.capabilities.includes("tools")) caps.toolUse = true;
569467
- if (data.capabilities.includes("thinking")) caps.thinking = true;
569468
- }
569469
- if (data.model_info) {
569470
- for (const key of Object.keys(data.model_info)) {
569471
- const k = key.toLowerCase();
569472
- if (k.includes("vision.block_count") || k.includes("clip.") || k.includes("image_token_id") || k.includes("projector") || k.includes("vision.embedding_length")) {
569473
- const val = data.model_info[key];
569474
- if (val !== null && val !== void 0 && val !== 0 && val !== "") {
569475
- caps.vision = true;
569476
- }
569477
- }
569502
+ buildAdminTools() {
569503
+ const debateAdapter = async (prompt) => {
569504
+ const r2 = await this.backend.chatCompletion({
569505
+ messages: [{ role: "user", content: prompt }],
569506
+ tools: [],
569507
+ temperature: 0.7,
569508
+ maxTokens: 800,
569509
+ timeoutMs: 12e4
569510
+ });
569511
+ return r2.choices[0]?.message?.content ?? "";
569512
+ };
569513
+ const replayAdapter = async (prompt) => {
569514
+ const r2 = await this.backend.chatCompletion({
569515
+ messages: [{ role: "user", content: prompt }],
569516
+ tools: [],
569517
+ temperature: 0,
569518
+ maxTokens: 1500,
569519
+ timeoutMs: 12e4
569520
+ });
569521
+ return r2.choices[0]?.message?.content ?? "";
569522
+ };
569523
+ const tools = [
569524
+ new FileReadTool(this.repoRoot),
569525
+ new FileWriteTool(this.repoRoot),
569526
+ new FileEditTool(this.repoRoot),
569527
+ new ShellTool(this.repoRoot),
569528
+ new GrepSearchTool(this.repoRoot),
569529
+ new GlobFindTool(this.repoRoot),
569530
+ new ListDirectoryTool(this.repoRoot),
569531
+ new WebSearchTool(),
569532
+ new WebFetchTool(),
569533
+ new MemoryReadTool(this.repoRoot),
569534
+ new MemoryWriteTool(this.repoRoot),
569535
+ new MemorySearchTool(this.repoRoot),
569536
+ new DebateTool(debateAdapter),
569537
+ new ReplayWithInterventionTool({ workingDir: this.repoRoot, callable: replayAdapter })
569538
+ ];
569539
+ return tools.map(adaptTool);
569478
569540
  }
569479
- }
569480
- const nameLower = modelName.toLowerCase();
569481
- if (/qwen3|qwen2\.5|llama3\.[13]|mistral|mixtral|command-r|gemma3|devstral|deepseek/.test(nameLower)) {
569482
- caps.toolUse = true;
569483
- }
569484
- if (data.template) {
569485
- if (data.template.includes("<think>") || data.template.includes("thinking")) {
569486
- caps.thinking = true;
569541
+ buildPublicTools() {
569542
+ const tools = [
569543
+ new FileReadTool(this.repoRoot),
569544
+ new GrepSearchTool(this.repoRoot),
569545
+ new GlobFindTool(this.repoRoot),
569546
+ new ListDirectoryTool(this.repoRoot),
569547
+ new MemoryReadTool(this.repoRoot),
569548
+ new MemorySearchTool(this.repoRoot)
569549
+ ];
569550
+ return tools.map(adaptTool);
569487
569551
  }
569488
- }
569489
- return caps;
569490
- } catch {
569491
- return caps;
569492
- }
569493
- }
569494
- function formatBytes3(bytes) {
569495
- if (bytes < 1024) return `${bytes} B`;
569496
- const units = ["KB", "MB", "GB", "TB"];
569497
- let size = bytes;
569498
- let i2 = -1;
569499
- while (size >= 1024 && i2 < units.length - 1) {
569500
- size /= 1024;
569501
- i2++;
569502
- }
569503
- return `${size.toFixed(1)} ${units[i2] ?? "B"}`;
569504
- }
569505
- function formatContextLength(tokens) {
569506
- if (tokens >= 1e6) return `${(tokens / 1e6).toFixed(1)}M ctx`;
569507
- if (tokens >= 1024) return `${Math.round(tokens / 1024)}K ctx`;
569508
- return `${tokens} ctx`;
569509
- }
569510
- function formatCaps(caps) {
569511
- const tags = [];
569512
- if (caps.vision) tags.push("vision");
569513
- if (caps.toolUse) tags.push("tools");
569514
- if (caps.thinking) tags.push("think");
569515
- return tags.join("+");
569516
- }
569517
- function formatRelativeTime(iso2) {
569518
- const now = Date.now();
569519
- const then = new Date(iso2).getTime();
569520
- const diffMs = now - then;
569521
- const minutes = Math.floor(diffMs / 6e4);
569522
- if (minutes < 1) return "just now";
569523
- if (minutes < 60) return `${minutes}m ago`;
569524
- const hours = Math.floor(minutes / 60);
569525
- if (hours < 24) return `${hours}h ago`;
569526
- const days = Math.floor(hours / 24);
569527
- if (days < 7) return `${days}d ago`;
569528
- const weeks = Math.floor(days / 7);
569529
- if (weeks < 5) return `${weeks}w ago`;
569530
- const months = Math.floor(days / 30);
569531
- return `${months}mo ago`;
569532
- }
569533
- var IMAGE_GEN_PATTERNS;
569534
- var init_model_picker = __esm({
569535
- "packages/cli/src/tui/model-picker.ts"() {
569536
- "use strict";
569537
- init_dist();
569538
- IMAGE_GEN_PATTERNS = [
569539
- /flux/i,
569540
- /z-image/i,
569541
- /stable-diffusion/i,
569542
- /sdxl/i,
569543
- /dall/i,
569544
- /kandinsky/i,
569545
- /midjourney/i,
569546
- /imagen/i
569547
- ];
569552
+ };
569548
569553
  }
569549
569554
  });
569550
569555
 
@@ -577955,6 +577960,7 @@ __export(setup_exports, {
577955
577960
  ensurePythonVenv: () => ensurePythonVenv,
577956
577961
  ensureVisionDeps: () => ensureVisionDeps,
577957
577962
  expandedModelName: () => expandedModelName,
577963
+ formatExpandedContextDiagnostic: () => formatExpandedContextDiagnostic,
577958
577964
  getLatestOllamaVersion: () => getLatestOllamaVersion,
577959
577965
  getOllamaVersion: () => getOllamaVersion,
577960
577966
  hasCmd: () => hasCmd,
@@ -578020,7 +578026,7 @@ async function needsTextToolMode(modelName, backendUrl2) {
578020
578026
  const hasTools = await checkToolSupport(modelName, backendUrl2);
578021
578027
  return !hasTools;
578022
578028
  }
578023
- function detectUnifiedMemory() {
578029
+ function detectUnifiedMemory(hasDiscreteGpu = false) {
578024
578030
  if (process.platform === "darwin" && process.arch === "arm64") return true;
578025
578031
  if (process.platform === "linux") {
578026
578032
  try {
@@ -578034,6 +578040,16 @@ function detectUnifiedMemory() {
578034
578040
  }
578035
578041
  } catch {
578036
578042
  }
578043
+ try {
578044
+ if (existsSync91("/proc/device-tree/model")) {
578045
+ const model = readFileSync74("/proc/device-tree/model", "utf8").replace(/\0+$/, "").toLowerCase();
578046
+ if (/jetson|tegra|orin|xavier|nano|raspberry|rockchip|rk\d{4}|mt\d{4}/.test(model)) {
578047
+ return true;
578048
+ }
578049
+ }
578050
+ } catch {
578051
+ }
578052
+ if (process.arch === "arm64" && !hasDiscreteGpu) return true;
578037
578053
  }
578038
578054
  return false;
578039
578055
  }
@@ -578127,11 +578143,15 @@ function detectSystemSpecs() {
578127
578143
  } catch {
578128
578144
  }
578129
578145
  }
578130
- const unifiedMemory = detectUnifiedMemory();
578146
+ const unifiedMemory = detectUnifiedMemory(gpuVramGB > 0);
578131
578147
  if (unifiedMemory && totalRamGB > 0) {
578132
578148
  const floorGB = totalRamGB * 0.8;
578133
578149
  if (availableRamGB < floorGB) availableRamGB = floorGB;
578134
578150
  }
578151
+ if (!unifiedMemory && totalRamGB > 0) {
578152
+ const floorGB = totalRamGB * 0.75;
578153
+ if (availableRamGB < floorGB) availableRamGB = floorGB;
578154
+ }
578135
578155
  return {
578136
578156
  totalRamGB: Math.round(totalRamGB * 10) / 10,
578137
578157
  availableRamGB: Math.round(availableRamGB * 10) / 10,
@@ -578191,11 +578211,15 @@ async function detectSystemSpecsAsync() {
578191
578211
  } catch {
578192
578212
  }
578193
578213
  }
578194
- const unifiedMemory = detectUnifiedMemory();
578214
+ const unifiedMemory = detectUnifiedMemory(gpuVramGB > 0);
578195
578215
  if (unifiedMemory && totalRamGB > 0) {
578196
578216
  const floorGB = totalRamGB * 0.8;
578197
578217
  if (availableRamGB < floorGB) availableRamGB = floorGB;
578198
578218
  }
578219
+ if (!unifiedMemory && totalRamGB > 0) {
578220
+ const floorGB = totalRamGB * 0.75;
578221
+ if (availableRamGB < floorGB) availableRamGB = floorGB;
578222
+ }
578199
578223
  return {
578200
578224
  totalRamGB: Math.round(totalRamGB * 10) / 10,
578201
578225
  availableRamGB: Math.round(availableRamGB * 10) / 10,
@@ -578259,17 +578283,41 @@ function formatContextLabel(numCtx) {
578259
578283
  return numCtx >= 1024 ? `${Math.floor(numCtx / 1024)}K` : String(numCtx);
578260
578284
  }
578261
578285
  function calculateExpandedVariantContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
578262
- const memoryFit = calculateMemoryBoundedNumCtx(
578263
- specs,
578264
- modelSizeGB2,
578265
- kvBytesPerToken,
578266
- archMax
578267
- );
578268
- const archCtx = archMax && archMax > 0 ? Math.max(2048, Math.floor(archMax / 1024) * 1024) : Number.POSITIVE_INFINITY;
578286
+ const ramBudget = specs.availableRamGB > 0 ? specs.availableRamGB : specs.totalRamGB;
578287
+ const vramBudget = specs.availableVramGB > 0 ? specs.availableVramGB : specs.gpuVramGB;
578288
+ const totalAvail = Math.max(vramBudget, ramBudget);
578289
+ const remaining = Math.max(0, totalAvail - modelSizeGB2);
578290
+ const usableGB = remaining * 0.85;
578291
+ const memoryFit = calculateMemoryBoundedNumCtx(specs, modelSizeGB2, kvBytesPerToken, archMax);
578292
+ const archCtxValue = archMax && archMax > 0 ? Math.max(2048, Math.floor(archMax / 1024) * 1024) : null;
578293
+ const archCtx = archCtxValue ?? Number.POSITIVE_INFINITY;
578269
578294
  const floor = Math.min(EXPANDED_VARIANT_MIN_NUM_CTX, archCtx);
578270
578295
  const fits = Math.min(memoryFit, archCtx);
578271
578296
  const numCtx = Math.max(floor, fits);
578272
- return { numCtx, label: formatContextLabel(numCtx) };
578297
+ let limitedBy;
578298
+ if (numCtx === floor && fits < floor) limitedBy = "floor";
578299
+ else if (archCtxValue !== null && numCtx === archCtxValue) limitedBy = "arch";
578300
+ else limitedBy = "memory";
578301
+ const effectiveKvBpt = kvBytesPerToken && kvBytesPerToken > 0 ? kvBytesPerToken : (modelSizeGB2 <= 5 ? 64 : modelSizeGB2 <= 12 ? 160 : modelSizeGB2 <= 25 ? 256 : 384) * 1024;
578302
+ return {
578303
+ numCtx,
578304
+ label: formatContextLabel(numCtx),
578305
+ math: {
578306
+ numCtx,
578307
+ label: formatContextLabel(numCtx),
578308
+ modelSizeGB: modelSizeGB2,
578309
+ kvBytesPerToken: effectiveKvBpt,
578310
+ kvSource: kvBytesPerToken && kvBytesPerToken > 0 ? "model_info" : "fallback",
578311
+ archMax: archMax && archMax > 0 ? archMax : null,
578312
+ ramBudgetGB: ramBudget,
578313
+ vramBudgetGB: vramBudget,
578314
+ usableGB,
578315
+ memoryFit,
578316
+ archCtx: archCtxValue,
578317
+ floor,
578318
+ limitedBy
578319
+ }
578320
+ };
578273
578321
  }
578274
578322
  function ask(rl, question) {
578275
578323
  return new Promise((resolve52) => {
@@ -580064,7 +580112,7 @@ async function queryModelKVInfo(backendUrl2, modelName) {
580064
580112
  const arch3 = info["general.architecture"];
580065
580113
  if (!arch3) return null;
580066
580114
  const nLayersRaw = info[`${arch3}.block_count`];
580067
- const nKVHeadsRaw = info[`${arch3}.attention.head_count_kv`] ?? info[`${arch3}.attention.head_count`];
580115
+ const nKVHeadsRaw = info[`${arch3}.attention.head_count_kv`];
580068
580116
  const keyDimRaw = info[`${arch3}.attention.key_length`];
580069
580117
  const valDimRaw = info[`${arch3}.attention.value_length`] ?? keyDimRaw;
580070
580118
  const archMax = info[`${arch3}.context_length`];
@@ -580072,7 +580120,7 @@ async function queryModelKVInfo(backendUrl2, modelName) {
580072
580120
  const keyDim = keyDimRaw ?? 128;
580073
580121
  const valDim = valDimRaw ?? 128;
580074
580122
  const nLayers = nLayersRaw ?? defaultLayersForArch(arch3);
580075
- const nKVHeads = nKVHeadsRaw ?? 32;
580123
+ const nKVHeads = nKVHeadsRaw ?? 8;
580076
580124
  if (!nLayers) return { archMax };
580077
580125
  const kvBytesPerToken = nLayers * nKVHeads * (keyDim + valDim) * 2;
580078
580126
  return { kvBytesPerToken, archMax };
@@ -580204,6 +580252,21 @@ async function createExpandedVariantAsync(baseModel, specs, sizeGB, kvBytesPerTo
580204
580252
  archMax
580205
580253
  );
580206
580254
  }
580255
+ function formatExpandedContextDiagnostic(specs, math) {
580256
+ const fmtGB = (n2) => `${n2.toFixed(1)}GB`;
580257
+ const fmtKB = (n2) => `${Math.round(n2 / 1024)}KB`;
580258
+ const fmtK = (n2) => n2 >= 1024 ? `${Math.floor(n2 / 1024)}K` : String(n2);
580259
+ const memBits = [];
580260
+ if (specs.gpuVramGB > 0) {
580261
+ memBits.push(`VRAM ${fmtGB(specs.availableVramGB || specs.gpuVramGB)}/${fmtGB(specs.gpuVramGB)}`);
580262
+ }
580263
+ memBits.push(`RAM ${fmtGB(specs.availableRamGB)}/${fmtGB(specs.totalRamGB)}${specs.unifiedMemory ? " unified" : ""}`);
580264
+ const mem = memBits.join(", ");
580265
+ const kv = `KV ${fmtKB(math.kvBytesPerToken)}/tok (${math.kvSource})`;
580266
+ const fit2 = `fit ${fmtK(math.memoryFit)}, arch ${math.archCtx !== null ? fmtK(math.archCtx) : "n/a"}, floor ${fmtK(math.floor)}`;
580267
+ const limit = `→ ${fmtK(math.numCtx)} (${math.limitedBy === "floor" ? "min floor" : math.limitedBy === "arch" ? "arch-capped" : "memory-fit"})`;
580268
+ return `[${mem} | model ${fmtGB(math.modelSizeGB)} | ${kv} | ${fit2} ${limit}]`;
580269
+ }
580207
580270
  async function ensureExpandedContext(modelName, backendUrl2) {
580208
580271
  if (modelName.includes("cloud") || modelName.includes(":cloud")) {
580209
580272
  return { model: modelName, created: false, contextLabel: "remote", numCtx: 0 };
@@ -580230,11 +580293,11 @@ async function ensureExpandedContext(modelName, backendUrl2) {
580230
580293
  kvInfo?.kvBytesPerToken,
580231
580294
  kvInfo?.archMax
580232
580295
  ).catch(() => ({ repaired: false, currentNumCtx: 0, baseModel: null, resolvedModel: modelName }));
580233
- return { model: repair.resolvedModel, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
580296
+ return { model: repair.resolvedModel, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
580234
580297
  }
580235
580298
  const existing = await checkExpandedVariant(modelName, backendUrl2);
580236
580299
  if (existing === null) {
580237
- return { model: modelName, created: false, contextLabel: "", numCtx: 0 };
580300
+ return { model: modelName, created: false, contextLabel: "", numCtx: 0, specs, math: ctx3.math };
580238
580301
  }
580239
580302
  if (typeof existing === "string") {
580240
580303
  const lostTools = await wrapperLacksToolsCapability(backendUrl2, existing).catch(() => false);
@@ -580249,7 +580312,7 @@ async function ensureExpandedContext(modelName, backendUrl2) {
580249
580312
  kvInfo?.archMax
580250
580313
  );
580251
580314
  if (rebuilt) {
580252
- return { model: rebuilt, created: true, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
580315
+ return { model: rebuilt, created: true, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
580253
580316
  }
580254
580317
  } catch {
580255
580318
  }
@@ -580264,13 +580327,13 @@ async function ensureExpandedContext(modelName, backendUrl2) {
580264
580327
  kvInfo?.kvBytesPerToken,
580265
580328
  kvInfo?.archMax
580266
580329
  ).catch(() => ({ repaired: false, currentNumCtx: 0, baseModel: null, resolvedModel: existing }));
580267
- return { model: repair.resolvedModel, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
580330
+ return { model: repair.resolvedModel, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
580268
580331
  }
580269
580332
  const created = await createExpandedVariantAsync(modelName, specs, sizeGB, kvInfo?.kvBytesPerToken, kvInfo?.archMax);
580270
580333
  if (created) {
580271
- return { model: created, created: true, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
580334
+ return { model: created, created: true, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
580272
580335
  }
580273
- return { model: modelName, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
580336
+ return { model: modelName, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
580274
580337
  }
580275
580338
  function guessBaseFromVariant(variantName, models) {
580276
580339
  const stripped = stripVariantTag(variantName);
@@ -598640,8 +598703,8 @@ async function showModelPicker(ctx3, local = false) {
598640
598703
  }
598641
598704
  const items = [];
598642
598705
  const history = loadUsageHistory("model", ctx3.repoRoot);
598643
- const liveModelNames = new Set(models.map((m2) => m2.name));
598644
- const modelMap = new Map(models.map((m2) => [m2.name, m2]));
598706
+ const liveModelNames = new Set(models.map((m2) => stripLatest(m2.name)));
598707
+ const modelMap = new Map(models.map((m2) => [stripLatest(m2.name), m2]));
598645
598708
  if (history.length > 0) {
598646
598709
  items.push({
598647
598710
  key: "__header_recent__",
@@ -598650,8 +598713,9 @@ async function showModelPicker(ctx3, local = false) {
598650
598713
  });
598651
598714
  for (const h of history.slice(0, 8)) {
598652
598715
  const uses = h.localUses > 0 ? `${h.useCount} uses (${h.localUses} local)` : `${h.useCount} uses`;
598653
- const available = liveModelNames.has(h.value) ? "" : c3.yellow(" [offline]");
598654
- const meta = modelMap.get(h.value);
598716
+ const hKey = stripLatest(h.value);
598717
+ const available = liveModelNames.has(hKey) ? "" : c3.yellow(" [offline]");
598718
+ const meta = modelMap.get(hKey);
598655
598719
  const ctx4 = meta?.contextLength ? ` ${formatContextLength(meta.contextLength)}` : "";
598656
598720
  const capStr = meta?.caps ? ` ${formatCaps(meta.caps)}` : "";
598657
598721
  items.push({
@@ -598666,9 +598730,9 @@ async function showModelPicker(ctx3, local = false) {
598666
598730
  detail: ""
598667
598731
  });
598668
598732
  }
598669
- const historyKeys = new Set(history.map((h) => h.value));
598733
+ const historyKeys = new Set(history.map((h) => stripLatest(h.value)));
598670
598734
  for (const m2 of models) {
598671
- if (history.length > 0 && historyKeys.has(m2.name)) continue;
598735
+ if (history.length > 0 && historyKeys.has(stripLatest(m2.name))) continue;
598672
598736
  const ctx4 = m2.contextLength ? formatContextLength(m2.contextLength) : "";
598673
598737
  const capStr = m2.caps ? formatCaps(m2.caps) : "";
598674
598738
  items.push({
@@ -598679,7 +598743,10 @@ async function showModelPicker(ctx3, local = false) {
598679
598743
  }
598680
598744
  const result = await tuiSelect({
598681
598745
  items,
598682
- activeKey: ctx3.config.model,
598746
+ // `activeKey` is the keyed currently-selected row. The picker stores
598747
+ // history/recent entries with tag-less keys, so normalize the active
598748
+ // model from config to match.
598749
+ activeKey: stripLatest(ctx3.config.model),
598683
598750
  title: "Select Model",
598684
598751
  rl: ctx3.rl,
598685
598752
  // Skip header rows
@@ -598690,7 +598757,7 @@ async function showModelPicker(ctx3, local = false) {
598690
598757
  renderInfo("Model selection cancelled.");
598691
598758
  return;
598692
598759
  }
598693
- await switchModel(result.key, ctx3, local);
598760
+ await switchModel(stripLatest(result.key), ctx3, local);
598694
598761
  } catch (err) {
598695
598762
  renderError(
598696
598763
  `Failed to fetch models: ${err instanceof Error ? err.message : String(err)}`
@@ -602241,14 +602308,15 @@ async function switchModel(query, ctx3, local = false) {
602241
602308
  match.name,
602242
602309
  ctx3.config.backendUrl
602243
602310
  );
602311
+ const diag = result.specs && result.math ? "\n " + c3.dim(formatExpandedContextDiagnostic(result.specs, result.math)) : "";
602244
602312
  if (result.created) {
602245
602313
  renderInfo(
602246
- `Created expanded context variant: ${c3.bold(result.model)} (${result.contextLabel}, ${result.numCtx} tokens)`
602314
+ `Created expanded context variant: ${c3.bold(result.model)} (${result.contextLabel}, ${result.numCtx} tokens)${diag}`
602247
602315
  );
602248
602316
  finalModel = result.model;
602249
602317
  } else if (result.model !== match.name) {
602250
602318
  renderInfo(
602251
- `Using expanded context variant: ${c3.bold(result.model)} (${result.contextLabel})`
602319
+ `Using expanded context variant: ${c3.bold(result.model)} (${result.contextLabel})${diag}`
602252
602320
  );
602253
602321
  finalModel = result.model;
602254
602322
  }
@@ -653796,13 +653864,14 @@ This is an independent background session started from /background.`
653796
653864
  currentConfig.model,
653797
653865
  currentConfig.backendUrl
653798
653866
  );
653867
+ const diag = expandResult.specs && expandResult.math ? "\n " + c3.dim(formatExpandedContextDiagnostic(expandResult.specs, expandResult.math)) : "";
653799
653868
  if (expandResult.created) {
653800
653869
  config = { ...config, model: expandResult.model };
653801
653870
  currentConfig = { ...currentConfig, model: expandResult.model };
653802
653871
  statusBar.setModelName(expandResult.model);
653803
653872
  writeContent(
653804
653873
  () => renderInfo(
653805
- `Created expanded context model: ${expandResult.model} (${expandResult.contextLabel}, ${expandResult.numCtx} tokens)`
653874
+ `Created expanded context model: ${expandResult.model} (${expandResult.contextLabel}, ${expandResult.numCtx} tokens)${diag}`
653806
653875
  )
653807
653876
  );
653808
653877
  } else if (expandResult.model !== currentConfig.model) {
@@ -653811,7 +653880,7 @@ This is an independent background session started from /background.`
653811
653880
  statusBar.setModelName(expandResult.model);
653812
653881
  writeContent(
653813
653882
  () => renderInfo(
653814
- `Using expanded context model: ${expandResult.model} (${expandResult.contextLabel})`
653883
+ `Using expanded context model: ${expandResult.model} (${expandResult.contextLabel})${diag}`
653815
653884
  )
653816
653885
  );
653817
653886
  }