omnius 1.0.108 → 1.0.110
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +796 -727
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -563485,6 +563485,537 @@ var init_task_complete_box = __esm({
|
|
|
563485
563485
|
}
|
|
563486
563486
|
});
|
|
563487
563487
|
|
|
563488
|
+
// packages/cli/src/tui/model-picker.ts
|
|
563489
|
+
import { totalmem as totalmem3 } from "node:os";
|
|
563490
|
+
function isImageGenModel(name10, family) {
|
|
563491
|
+
return IMAGE_GEN_PATTERNS.some((p2) => p2.test(name10) || family && p2.test(family));
|
|
563492
|
+
}
|
|
563493
|
+
function parseShowNumCtx(show) {
|
|
563494
|
+
const sources = [show.parameters, show.modelfile];
|
|
563495
|
+
for (const source of sources) {
|
|
563496
|
+
if (!source) continue;
|
|
563497
|
+
const match = source.match(/\b(?:PARAMETER\s+)?num_ctx\s+(\d+)/i);
|
|
563498
|
+
if (match) return parseInt(match[1], 10);
|
|
563499
|
+
}
|
|
563500
|
+
return null;
|
|
563501
|
+
}
|
|
563502
|
+
async function fetchOllamaModels(baseUrl) {
|
|
563503
|
+
const url = `${normalizeBaseUrl(baseUrl)}/api/tags`;
|
|
563504
|
+
const resp = await fetch(url, {
|
|
563505
|
+
signal: AbortSignal.timeout(1e4)
|
|
563506
|
+
});
|
|
563507
|
+
if (!resp.ok) {
|
|
563508
|
+
throw new Error(`Failed to fetch models: HTTP ${resp.status}`);
|
|
563509
|
+
}
|
|
563510
|
+
const data = await resp.json();
|
|
563511
|
+
const models = data.models ?? [];
|
|
563512
|
+
const result = models.map((m2) => {
|
|
563513
|
+
const family = m2.details?.family;
|
|
563514
|
+
return {
|
|
563515
|
+
name: m2.name,
|
|
563516
|
+
size: formatBytes3(m2.size),
|
|
563517
|
+
sizeBytes: m2.size,
|
|
563518
|
+
modified: formatRelativeTime(m2.modified_at),
|
|
563519
|
+
parameterSize: m2.details?.parameter_size,
|
|
563520
|
+
contextLength: void 0,
|
|
563521
|
+
caps: void 0,
|
|
563522
|
+
isImageGen: isImageGenModel(m2.name, family),
|
|
563523
|
+
family
|
|
563524
|
+
};
|
|
563525
|
+
}).sort((a2, b) => b.sizeBytes - a2.sizeBytes);
|
|
563526
|
+
const normalized = normalizeBaseUrl(baseUrl);
|
|
563527
|
+
const showResults = await Promise.allSettled(
|
|
563528
|
+
result.map(
|
|
563529
|
+
(m2) => fetch(`${normalized}/api/show`, {
|
|
563530
|
+
method: "POST",
|
|
563531
|
+
headers: { "Content-Type": "application/json" },
|
|
563532
|
+
body: JSON.stringify({ name: m2.name }),
|
|
563533
|
+
signal: AbortSignal.timeout(5e3)
|
|
563534
|
+
}).then((r2) => r2.ok ? r2.json() : null)
|
|
563535
|
+
)
|
|
563536
|
+
);
|
|
563537
|
+
for (let i2 = 0; i2 < result.length; i2++) {
|
|
563538
|
+
const sr = showResults[i2];
|
|
563539
|
+
if (sr?.status !== "fulfilled" || !sr.value) continue;
|
|
563540
|
+
const show = sr.value;
|
|
563541
|
+
const explicitNumCtx = parseShowNumCtx(show);
|
|
563542
|
+
if (explicitNumCtx) {
|
|
563543
|
+
result[i2].contextLength = explicitNumCtx;
|
|
563544
|
+
continue;
|
|
563545
|
+
}
|
|
563546
|
+
if (show.model_info) {
|
|
563547
|
+
const info = show.model_info;
|
|
563548
|
+
const arch3 = info["general.architecture"];
|
|
563549
|
+
const paramCount = info["general.parameter_count"];
|
|
563550
|
+
const fileSizeGB = result[i2].sizeBytes > 0 ? result[i2].sizeBytes / 1024 ** 3 : paramCount ? paramCount * 0.6 / 1024 ** 3 : 4;
|
|
563551
|
+
if (arch3) {
|
|
563552
|
+
const archMax = info[`${arch3}.context_length`];
|
|
563553
|
+
const nLayers = info[`${arch3}.block_count`];
|
|
563554
|
+
const nKVHeads = info[`${arch3}.attention.head_count_kv`] ?? info[`${arch3}.attention.head_count`];
|
|
563555
|
+
const keyDim = info[`${arch3}.attention.key_length`];
|
|
563556
|
+
const valDim = info[`${arch3}.attention.value_length`] ?? keyDim;
|
|
563557
|
+
if (archMax && nLayers && nKVHeads && keyDim && valDim) {
|
|
563558
|
+
const kvBytesPerToken = nLayers * nKVHeads * (keyDim + valDim) * 2;
|
|
563559
|
+
result[i2].contextLength = estimateRealisticContext(kvBytesPerToken, archMax, fileSizeGB);
|
|
563560
|
+
} else if (archMax) {
|
|
563561
|
+
const kvEstimate = fileSizeGB <= 5 ? 524288 : fileSizeGB <= 20 ? 1048576 : 1572864;
|
|
563562
|
+
result[i2].contextLength = estimateRealisticContext(kvEstimate, archMax, fileSizeGB);
|
|
563563
|
+
}
|
|
563564
|
+
}
|
|
563565
|
+
}
|
|
563566
|
+
const modelCaps = { vision: false, toolUse: false, thinking: false };
|
|
563567
|
+
const nameLower = result[i2].name.toLowerCase();
|
|
563568
|
+
if (Array.isArray(show.capabilities)) {
|
|
563569
|
+
if (show.capabilities.includes("vision")) modelCaps.vision = true;
|
|
563570
|
+
if (show.capabilities.includes("tools")) modelCaps.toolUse = true;
|
|
563571
|
+
if (show.capabilities.includes("thinking")) modelCaps.thinking = true;
|
|
563572
|
+
}
|
|
563573
|
+
if (show.model_info) {
|
|
563574
|
+
for (const key of Object.keys(show.model_info)) {
|
|
563575
|
+
const k = key.toLowerCase();
|
|
563576
|
+
if (k.includes("vision.block_count") || k.includes("clip.") || k.includes("image_token_id") || k.includes("projector")) {
|
|
563577
|
+
const val = show.model_info[key];
|
|
563578
|
+
if (val !== null && val !== void 0 && val !== 0 && val !== "") {
|
|
563579
|
+
modelCaps.vision = true;
|
|
563580
|
+
}
|
|
563581
|
+
}
|
|
563582
|
+
}
|
|
563583
|
+
}
|
|
563584
|
+
if (/qwen3|qwen2\.5|llama3\.[13]|mistral|mixtral|command-r|gemma3|devstral|deepseek/.test(nameLower)) {
|
|
563585
|
+
modelCaps.toolUse = true;
|
|
563586
|
+
}
|
|
563587
|
+
if (show.template && (show.template.includes("<think>") || show.template.includes("thinking"))) {
|
|
563588
|
+
modelCaps.thinking = true;
|
|
563589
|
+
}
|
|
563590
|
+
result[i2].caps = modelCaps;
|
|
563591
|
+
}
|
|
563592
|
+
return result;
|
|
563593
|
+
}
|
|
563594
|
+
async function fetchOpenAIModels(baseUrl, apiKey) {
|
|
563595
|
+
const normalized = normalizeBaseUrl(baseUrl);
|
|
563596
|
+
const url = `${normalized}/v1/models`;
|
|
563597
|
+
const isAnthropic = /api\.anthropic\.com/i.test(baseUrl);
|
|
563598
|
+
const headers = {};
|
|
563599
|
+
if (apiKey) {
|
|
563600
|
+
if (isAnthropic) {
|
|
563601
|
+
headers["x-api-key"] = apiKey;
|
|
563602
|
+
headers["anthropic-version"] = "2023-06-01";
|
|
563603
|
+
} else {
|
|
563604
|
+
headers["Authorization"] = `Bearer ${apiKey}`;
|
|
563605
|
+
}
|
|
563606
|
+
}
|
|
563607
|
+
const resp = await fetch(url, {
|
|
563608
|
+
headers,
|
|
563609
|
+
signal: AbortSignal.timeout(15e3)
|
|
563610
|
+
});
|
|
563611
|
+
if (!resp.ok) {
|
|
563612
|
+
throw new Error(`Failed to fetch models: HTTP ${resp.status}`);
|
|
563613
|
+
}
|
|
563614
|
+
const data = await resp.json();
|
|
563615
|
+
const models = data.data ?? [];
|
|
563616
|
+
return models.map((m2) => ({
|
|
563617
|
+
name: m2.id,
|
|
563618
|
+
size: "",
|
|
563619
|
+
sizeBytes: 0,
|
|
563620
|
+
modified: m2.created ? formatRelativeTime(new Date(m2.created * 1e3).toISOString()) : "",
|
|
563621
|
+
parameterSize: m2.owned_by ?? void 0,
|
|
563622
|
+
contextLength: m2.context_length ?? m2.max_model_len ?? void 0
|
|
563623
|
+
})).sort((a2, b) => a2.name.localeCompare(b.name));
|
|
563624
|
+
}
|
|
563625
|
+
async function fetchPeerModels(peerId, authKey) {
|
|
563626
|
+
try {
|
|
563627
|
+
const { NexusTool: NexusTool2 } = await Promise.resolve().then(() => (init_dist5(), dist_exports));
|
|
563628
|
+
const { existsSync: existsSync131, readFileSync: readFileSync107 } = await import("node:fs");
|
|
563629
|
+
const { join: join148 } = await import("node:path");
|
|
563630
|
+
const cwd4 = process.cwd();
|
|
563631
|
+
const nexusTool = new NexusTool2(cwd4);
|
|
563632
|
+
const nexusDir = nexusTool.getNexusDir();
|
|
563633
|
+
let isLocalPeer = false;
|
|
563634
|
+
try {
|
|
563635
|
+
const statusPath = join148(nexusDir, "status.json");
|
|
563636
|
+
if (existsSync131(statusPath)) {
|
|
563637
|
+
const status = JSON.parse(readFileSync107(statusPath, "utf8"));
|
|
563638
|
+
if (status.peerId === peerId) isLocalPeer = true;
|
|
563639
|
+
}
|
|
563640
|
+
} catch {
|
|
563641
|
+
}
|
|
563642
|
+
if (isLocalPeer) {
|
|
563643
|
+
const pricingPath = join148(nexusDir, "pricing.json");
|
|
563644
|
+
if (existsSync131(pricingPath)) {
|
|
563645
|
+
try {
|
|
563646
|
+
const pricing = JSON.parse(readFileSync107(pricingPath, "utf8"));
|
|
563647
|
+
const localModels = (pricing.models || []).map((m2) => ({
|
|
563648
|
+
name: m2.model || "unknown",
|
|
563649
|
+
size: m2.parameterSize || "",
|
|
563650
|
+
modified: "",
|
|
563651
|
+
sizeBytes: 0,
|
|
563652
|
+
parameterSize: m2.parameterSize || "remote"
|
|
563653
|
+
}));
|
|
563654
|
+
if (localModels.length > 0) return localModels;
|
|
563655
|
+
} catch {
|
|
563656
|
+
}
|
|
563657
|
+
}
|
|
563658
|
+
}
|
|
563659
|
+
const cachePath = join148(nexusDir, "peer-models-cache.json");
|
|
563660
|
+
if (existsSync131(cachePath)) {
|
|
563661
|
+
try {
|
|
563662
|
+
const cache8 = JSON.parse(readFileSync107(cachePath, "utf8"));
|
|
563663
|
+
if (cache8.peerId === peerId && cache8.models?.length > 0) {
|
|
563664
|
+
const age = Date.now() - new Date(cache8.cachedAt).getTime();
|
|
563665
|
+
if (age < 5 * 60 * 1e3) {
|
|
563666
|
+
return cache8.models.map((m2) => ({
|
|
563667
|
+
name: m2.name || "unknown",
|
|
563668
|
+
size: m2.size || m2.parameterSize || "",
|
|
563669
|
+
modified: "",
|
|
563670
|
+
sizeBytes: 0,
|
|
563671
|
+
parameterSize: m2.parameterSize || "remote"
|
|
563672
|
+
}));
|
|
563673
|
+
}
|
|
563674
|
+
}
|
|
563675
|
+
} catch {
|
|
563676
|
+
}
|
|
563677
|
+
}
|
|
563678
|
+
try {
|
|
563679
|
+
const capsResult = await nexusTool.execute({
|
|
563680
|
+
action: "query_peer_caps",
|
|
563681
|
+
peer_id: peerId,
|
|
563682
|
+
...authKey ? { auth_key: authKey } : {}
|
|
563683
|
+
});
|
|
563684
|
+
if (capsResult.success && capsResult.output) {
|
|
563685
|
+
let capsData = null;
|
|
563686
|
+
try {
|
|
563687
|
+
capsData = JSON.parse(capsResult.output);
|
|
563688
|
+
} catch {
|
|
563689
|
+
}
|
|
563690
|
+
if (capsData?.models && capsData.models.length > 0) {
|
|
563691
|
+
return capsData.models.map((m2) => ({
|
|
563692
|
+
name: m2.name || "unknown",
|
|
563693
|
+
size: m2.parameterSize || "",
|
|
563694
|
+
modified: "",
|
|
563695
|
+
sizeBytes: 0,
|
|
563696
|
+
parameterSize: m2.parameterSize || "remote"
|
|
563697
|
+
}));
|
|
563698
|
+
}
|
|
563699
|
+
if (capsData?.capabilities && capsData.capabilities.length > 0) {
|
|
563700
|
+
const models = [];
|
|
563701
|
+
for (const cap of capsData.capabilities) {
|
|
563702
|
+
if (typeof cap === "string" && cap.startsWith("inference:")) {
|
|
563703
|
+
const capName = cap.slice(10);
|
|
563704
|
+
const modelName = capName.replace(/_(\d+[bBmMkK])$/, ":$1").replace(/_latest$/, ":latest");
|
|
563705
|
+
models.push({
|
|
563706
|
+
name: modelName,
|
|
563707
|
+
size: "",
|
|
563708
|
+
modified: "",
|
|
563709
|
+
sizeBytes: 0,
|
|
563710
|
+
parameterSize: "remote"
|
|
563711
|
+
});
|
|
563712
|
+
}
|
|
563713
|
+
}
|
|
563714
|
+
if (models.length > 0) return models;
|
|
563715
|
+
}
|
|
563716
|
+
}
|
|
563717
|
+
} catch {
|
|
563718
|
+
}
|
|
563719
|
+
try {
|
|
563720
|
+
const natsResult = await nexusTool.execute({
|
|
563721
|
+
action: "discover_peer_caps",
|
|
563722
|
+
peer_id: peerId
|
|
563723
|
+
});
|
|
563724
|
+
if (natsResult.success && natsResult.output) {
|
|
563725
|
+
let natsPeer = null;
|
|
563726
|
+
try {
|
|
563727
|
+
natsPeer = JSON.parse(natsResult.output);
|
|
563728
|
+
} catch {
|
|
563729
|
+
}
|
|
563730
|
+
if (natsPeer?.capabilities && natsPeer.capabilities.length > 0) {
|
|
563731
|
+
const models = [];
|
|
563732
|
+
for (const cap of natsPeer.capabilities) {
|
|
563733
|
+
if (typeof cap === "string" && cap.startsWith("inference:")) {
|
|
563734
|
+
const capName = cap.slice(10);
|
|
563735
|
+
const modelName = capName.replace(/_(\d+[bBmMkK])$/, ":$1").replace(/_latest$/, ":latest");
|
|
563736
|
+
models.push({
|
|
563737
|
+
name: modelName,
|
|
563738
|
+
size: "",
|
|
563739
|
+
modified: "",
|
|
563740
|
+
sizeBytes: 0,
|
|
563741
|
+
parameterSize: "remote"
|
|
563742
|
+
});
|
|
563743
|
+
}
|
|
563744
|
+
}
|
|
563745
|
+
if (models.length > 0) return models;
|
|
563746
|
+
}
|
|
563747
|
+
}
|
|
563748
|
+
} catch {
|
|
563749
|
+
}
|
|
563750
|
+
try {
|
|
563751
|
+
const result = await nexusTool.execute({
|
|
563752
|
+
action: "find_agent",
|
|
563753
|
+
peer_id: peerId
|
|
563754
|
+
});
|
|
563755
|
+
if (result.success && result.output) {
|
|
563756
|
+
const models = [];
|
|
563757
|
+
const capMatches = result.output.matchAll(/inference:([^\s,\]]+)/g);
|
|
563758
|
+
for (const m2 of capMatches) {
|
|
563759
|
+
const capName = m2[1];
|
|
563760
|
+
const modelName = capName.replace(/_(\d+[bBmMkK])$/, ":$1").replace(/_latest$/, ":latest");
|
|
563761
|
+
models.push({
|
|
563762
|
+
name: modelName,
|
|
563763
|
+
size: "",
|
|
563764
|
+
modified: "",
|
|
563765
|
+
sizeBytes: 0,
|
|
563766
|
+
parameterSize: "remote"
|
|
563767
|
+
});
|
|
563768
|
+
}
|
|
563769
|
+
if (models.length > 0) return models;
|
|
563770
|
+
}
|
|
563771
|
+
} catch {
|
|
563772
|
+
}
|
|
563773
|
+
if (isLocalPeer) {
|
|
563774
|
+
const pricingPath = join148(nexusDir, "pricing.json");
|
|
563775
|
+
if (existsSync131(pricingPath)) {
|
|
563776
|
+
try {
|
|
563777
|
+
const pricing = JSON.parse(readFileSync107(pricingPath, "utf8"));
|
|
563778
|
+
return (pricing.models || []).map((m2) => ({
|
|
563779
|
+
name: m2.model || "unknown",
|
|
563780
|
+
size: m2.parameterSize || "",
|
|
563781
|
+
modified: "",
|
|
563782
|
+
sizeBytes: 0,
|
|
563783
|
+
parameterSize: m2.parameterSize || "remote"
|
|
563784
|
+
}));
|
|
563785
|
+
} catch {
|
|
563786
|
+
}
|
|
563787
|
+
}
|
|
563788
|
+
}
|
|
563789
|
+
return [];
|
|
563790
|
+
} catch {
|
|
563791
|
+
return [];
|
|
563792
|
+
}
|
|
563793
|
+
}
|
|
563794
|
+
async function fetchModels(baseUrl, apiKey) {
|
|
563795
|
+
if (baseUrl.startsWith("peer://")) {
|
|
563796
|
+
return fetchPeerModels(baseUrl.slice(7), apiKey);
|
|
563797
|
+
}
|
|
563798
|
+
const provider = detectProvider(baseUrl);
|
|
563799
|
+
if (provider.id === "ollama") {
|
|
563800
|
+
let ollamaErr;
|
|
563801
|
+
try {
|
|
563802
|
+
return await fetchOllamaModels(baseUrl);
|
|
563803
|
+
} catch (err) {
|
|
563804
|
+
ollamaErr = err instanceof Error ? err : new Error(String(err));
|
|
563805
|
+
try {
|
|
563806
|
+
return await fetchOpenAIModels(baseUrl, apiKey);
|
|
563807
|
+
} catch {
|
|
563808
|
+
throw new Error(`Cannot reach Ollama at ${baseUrl}: ${ollamaErr.message}`);
|
|
563809
|
+
}
|
|
563810
|
+
}
|
|
563811
|
+
}
|
|
563812
|
+
let lastErr;
|
|
563813
|
+
for (let attempt = 0; attempt < 2; attempt++) {
|
|
563814
|
+
try {
|
|
563815
|
+
return await fetchOpenAIModels(baseUrl, apiKey);
|
|
563816
|
+
} catch (err) {
|
|
563817
|
+
lastErr = err instanceof Error ? err : new Error(String(err));
|
|
563818
|
+
if (attempt === 0) await new Promise((r2) => setTimeout(r2, 1e3));
|
|
563819
|
+
}
|
|
563820
|
+
}
|
|
563821
|
+
try {
|
|
563822
|
+
return await fetchOllamaModels(baseUrl);
|
|
563823
|
+
} catch {
|
|
563824
|
+
throw new Error(`Cannot fetch models from ${provider.label} at ${baseUrl}: ${lastErr?.message ?? "unknown error"}`);
|
|
563825
|
+
}
|
|
563826
|
+
}
|
|
563827
|
+
function stripLatest(modelName) {
|
|
563828
|
+
return modelName.replace(/:latest$/i, "");
|
|
563829
|
+
}
|
|
563830
|
+
function findModel(models, query) {
|
|
563831
|
+
const exact = models.find((m2) => m2.name === query);
|
|
563832
|
+
if (exact) return exact;
|
|
563833
|
+
const partial = models.find((m2) => m2.name.startsWith(query));
|
|
563834
|
+
if (partial) return partial;
|
|
563835
|
+
const fuzzy = models.find((m2) => m2.name.includes(query));
|
|
563836
|
+
return fuzzy;
|
|
563837
|
+
}
|
|
563838
|
+
async function queryModelContextSize(baseUrl, modelName) {
|
|
563839
|
+
try {
|
|
563840
|
+
const normalized = normalizeBaseUrl(baseUrl);
|
|
563841
|
+
const res = await fetch(`${normalized}/api/show`, {
|
|
563842
|
+
method: "POST",
|
|
563843
|
+
headers: { "Content-Type": "application/json" },
|
|
563844
|
+
body: JSON.stringify({ name: modelName }),
|
|
563845
|
+
signal: AbortSignal.timeout(1e4)
|
|
563846
|
+
});
|
|
563847
|
+
if (!res.ok) return null;
|
|
563848
|
+
const data = await res.json();
|
|
563849
|
+
const explicitNumCtx = parseShowNumCtx(data);
|
|
563850
|
+
if (explicitNumCtx) return explicitNumCtx;
|
|
563851
|
+
if (data.model_info) {
|
|
563852
|
+
const info = data.model_info;
|
|
563853
|
+
const arch3 = info["general.architecture"];
|
|
563854
|
+
const paramCount = info["general.parameter_count"];
|
|
563855
|
+
const modelSizeGB2 = paramCount ? paramCount * 0.6 / 1024 ** 3 : 4;
|
|
563856
|
+
if (arch3) {
|
|
563857
|
+
const archMax = info[`${arch3}.context_length`];
|
|
563858
|
+
const nLayers = info[`${arch3}.block_count`];
|
|
563859
|
+
const nKVHeads = info[`${arch3}.attention.head_count_kv`] ?? info[`${arch3}.attention.head_count`];
|
|
563860
|
+
const keyDim = info[`${arch3}.attention.key_length`];
|
|
563861
|
+
const valDim = info[`${arch3}.attention.value_length`] ?? keyDim;
|
|
563862
|
+
if (archMax && nLayers && nKVHeads && keyDim && valDim) {
|
|
563863
|
+
const kvBytesPerToken = nLayers * nKVHeads * (keyDim + valDim) * 2;
|
|
563864
|
+
return estimateRealisticContext(kvBytesPerToken, archMax, modelSizeGB2);
|
|
563865
|
+
}
|
|
563866
|
+
if (archMax) {
|
|
563867
|
+
const kvEstimate = modelSizeGB2 <= 5 ? 524288 : modelSizeGB2 <= 20 ? 1048576 : 1572864;
|
|
563868
|
+
return estimateRealisticContext(kvEstimate, archMax, modelSizeGB2);
|
|
563869
|
+
}
|
|
563870
|
+
}
|
|
563871
|
+
}
|
|
563872
|
+
return null;
|
|
563873
|
+
} catch {
|
|
563874
|
+
return null;
|
|
563875
|
+
}
|
|
563876
|
+
}
|
|
563877
|
+
function estimateRealisticContext(kvBytesPerToken, archMax, modelSizeGB2) {
|
|
563878
|
+
const totalMemGB = totalmem3() / 1024 ** 3;
|
|
563879
|
+
const usableBytes = totalMemGB * 0.7 * 1024 ** 3;
|
|
563880
|
+
const maxTokens = Math.floor(usableBytes / kvBytesPerToken);
|
|
563881
|
+
let numCtx = Math.max(2048, Math.floor(maxTokens / 1024) * 1024);
|
|
563882
|
+
numCtx = Math.min(numCtx, 131072, archMax);
|
|
563883
|
+
if (modelSizeGB2 && modelSizeGB2 > 0) {
|
|
563884
|
+
const maxKVBytes = modelSizeGB2 * 4 * 1024 ** 3;
|
|
563885
|
+
const budgetCap = Math.max(2048, Math.floor(maxKVBytes / kvBytesPerToken / 1024) * 1024);
|
|
563886
|
+
numCtx = Math.min(numCtx, budgetCap);
|
|
563887
|
+
}
|
|
563888
|
+
return numCtx;
|
|
563889
|
+
}
|
|
563890
|
+
async function queryOpenAIContextSize(baseUrl, modelName, apiKey) {
|
|
563891
|
+
try {
|
|
563892
|
+
const models = await fetchOpenAIModels(baseUrl, apiKey);
|
|
563893
|
+
const model = models.find((m2) => m2.name === modelName);
|
|
563894
|
+
if (model?.contextLength) return model.contextLength;
|
|
563895
|
+
if (model?.size) {
|
|
563896
|
+
const match = model.size.match(/(\d+)K ctx/);
|
|
563897
|
+
if (match) return parseInt(match[1], 10) * 1024;
|
|
563898
|
+
}
|
|
563899
|
+
return null;
|
|
563900
|
+
} catch {
|
|
563901
|
+
return null;
|
|
563902
|
+
}
|
|
563903
|
+
}
|
|
563904
|
+
async function queryContextSize(baseUrl, modelName, apiKey) {
|
|
563905
|
+
if (baseUrl.startsWith("peer://")) return 32768;
|
|
563906
|
+
const ollamaSize = await queryModelContextSize(baseUrl, modelName);
|
|
563907
|
+
if (ollamaSize) return ollamaSize;
|
|
563908
|
+
return queryOpenAIContextSize(baseUrl, modelName, apiKey);
|
|
563909
|
+
}
|
|
563910
|
+
async function queryModelCapabilities(baseUrl, modelName) {
|
|
563911
|
+
const caps = { vision: false, toolUse: false, thinking: false };
|
|
563912
|
+
if (baseUrl.startsWith("peer://")) {
|
|
563913
|
+
const nameLower = modelName.toLowerCase();
|
|
563914
|
+
if (/qwen3|qwen2\.5|llama3\.[13]|mistral|mixtral|command-r|gemma3|devstral|deepseek/.test(nameLower)) {
|
|
563915
|
+
caps.toolUse = true;
|
|
563916
|
+
}
|
|
563917
|
+
if (/qwen3|deepseek-r1/.test(nameLower)) {
|
|
563918
|
+
caps.thinking = true;
|
|
563919
|
+
}
|
|
563920
|
+
return caps;
|
|
563921
|
+
}
|
|
563922
|
+
try {
|
|
563923
|
+
const normalized = normalizeBaseUrl(baseUrl);
|
|
563924
|
+
const res = await fetch(`${normalized}/api/show`, {
|
|
563925
|
+
method: "POST",
|
|
563926
|
+
headers: { "Content-Type": "application/json" },
|
|
563927
|
+
body: JSON.stringify({ name: modelName }),
|
|
563928
|
+
signal: AbortSignal.timeout(1e4)
|
|
563929
|
+
});
|
|
563930
|
+
if (!res.ok) return caps;
|
|
563931
|
+
const data = await res.json();
|
|
563932
|
+
if (Array.isArray(data.capabilities)) {
|
|
563933
|
+
if (data.capabilities.includes("vision")) caps.vision = true;
|
|
563934
|
+
if (data.capabilities.includes("tools")) caps.toolUse = true;
|
|
563935
|
+
if (data.capabilities.includes("thinking")) caps.thinking = true;
|
|
563936
|
+
}
|
|
563937
|
+
if (data.model_info) {
|
|
563938
|
+
for (const key of Object.keys(data.model_info)) {
|
|
563939
|
+
const k = key.toLowerCase();
|
|
563940
|
+
if (k.includes("vision.block_count") || k.includes("clip.") || k.includes("image_token_id") || k.includes("projector") || k.includes("vision.embedding_length")) {
|
|
563941
|
+
const val = data.model_info[key];
|
|
563942
|
+
if (val !== null && val !== void 0 && val !== 0 && val !== "") {
|
|
563943
|
+
caps.vision = true;
|
|
563944
|
+
}
|
|
563945
|
+
}
|
|
563946
|
+
}
|
|
563947
|
+
}
|
|
563948
|
+
const nameLower = modelName.toLowerCase();
|
|
563949
|
+
if (/qwen3|qwen2\.5|llama3\.[13]|mistral|mixtral|command-r|gemma3|devstral|deepseek/.test(nameLower)) {
|
|
563950
|
+
caps.toolUse = true;
|
|
563951
|
+
}
|
|
563952
|
+
if (data.template) {
|
|
563953
|
+
if (data.template.includes("<think>") || data.template.includes("thinking")) {
|
|
563954
|
+
caps.thinking = true;
|
|
563955
|
+
}
|
|
563956
|
+
}
|
|
563957
|
+
return caps;
|
|
563958
|
+
} catch {
|
|
563959
|
+
return caps;
|
|
563960
|
+
}
|
|
563961
|
+
}
|
|
563962
|
+
function formatBytes3(bytes) {
|
|
563963
|
+
if (bytes < 1024) return `${bytes} B`;
|
|
563964
|
+
const units = ["KB", "MB", "GB", "TB"];
|
|
563965
|
+
let size = bytes;
|
|
563966
|
+
let i2 = -1;
|
|
563967
|
+
while (size >= 1024 && i2 < units.length - 1) {
|
|
563968
|
+
size /= 1024;
|
|
563969
|
+
i2++;
|
|
563970
|
+
}
|
|
563971
|
+
return `${size.toFixed(1)} ${units[i2] ?? "B"}`;
|
|
563972
|
+
}
|
|
563973
|
+
function formatContextLength(tokens) {
|
|
563974
|
+
if (tokens >= 1e6) return `${(tokens / 1e6).toFixed(1)}M ctx`;
|
|
563975
|
+
if (tokens >= 1024) return `${Math.round(tokens / 1024)}K ctx`;
|
|
563976
|
+
return `${tokens} ctx`;
|
|
563977
|
+
}
|
|
563978
|
+
function formatCaps(caps) {
|
|
563979
|
+
const tags = [];
|
|
563980
|
+
if (caps.vision) tags.push("vision");
|
|
563981
|
+
if (caps.toolUse) tags.push("tools");
|
|
563982
|
+
if (caps.thinking) tags.push("think");
|
|
563983
|
+
return tags.join("+");
|
|
563984
|
+
}
|
|
563985
|
+
function formatRelativeTime(iso2) {
|
|
563986
|
+
const now = Date.now();
|
|
563987
|
+
const then = new Date(iso2).getTime();
|
|
563988
|
+
const diffMs = now - then;
|
|
563989
|
+
const minutes = Math.floor(diffMs / 6e4);
|
|
563990
|
+
if (minutes < 1) return "just now";
|
|
563991
|
+
if (minutes < 60) return `${minutes}m ago`;
|
|
563992
|
+
const hours = Math.floor(minutes / 60);
|
|
563993
|
+
if (hours < 24) return `${hours}h ago`;
|
|
563994
|
+
const days = Math.floor(hours / 24);
|
|
563995
|
+
if (days < 7) return `${days}d ago`;
|
|
563996
|
+
const weeks = Math.floor(days / 7);
|
|
563997
|
+
if (weeks < 5) return `${weeks}w ago`;
|
|
563998
|
+
const months = Math.floor(days / 30);
|
|
563999
|
+
return `${months}mo ago`;
|
|
564000
|
+
}
|
|
564001
|
+
var IMAGE_GEN_PATTERNS;
|
|
564002
|
+
var init_model_picker = __esm({
|
|
564003
|
+
"packages/cli/src/tui/model-picker.ts"() {
|
|
564004
|
+
"use strict";
|
|
564005
|
+
init_dist();
|
|
564006
|
+
IMAGE_GEN_PATTERNS = [
|
|
564007
|
+
/flux/i,
|
|
564008
|
+
/z-image/i,
|
|
564009
|
+
/stable-diffusion/i,
|
|
564010
|
+
/sdxl/i,
|
|
564011
|
+
/dall/i,
|
|
564012
|
+
/kandinsky/i,
|
|
564013
|
+
/midjourney/i,
|
|
564014
|
+
/imagen/i
|
|
564015
|
+
];
|
|
564016
|
+
}
|
|
564017
|
+
});
|
|
564018
|
+
|
|
563488
564019
|
// packages/cli/src/tui/render.ts
|
|
563489
564020
|
var render_exports = {};
|
|
563490
564021
|
__export(render_exports, {
|
|
@@ -564353,8 +564884,9 @@ function renderModelList(models, current) {
|
|
|
564353
564884
|
${c3.bold("Available models:")}
|
|
564354
564885
|
|
|
564355
564886
|
`);
|
|
564887
|
+
const currentKey = stripLatest(current);
|
|
564356
564888
|
for (const m2 of models) {
|
|
564357
|
-
const isCurrent = m2.name ===
|
|
564889
|
+
const isCurrent = stripLatest(m2.name) === currentKey;
|
|
564358
564890
|
const marker = isCurrent ? c3.green("●") : c3.dim("○");
|
|
564359
564891
|
const name10 = isCurrent ? c3.bold(c3.green(m2.name)) : m2.name;
|
|
564360
564892
|
const size = c3.dim(m2.size);
|
|
@@ -564413,6 +564945,7 @@ var init_render = __esm({
|
|
|
564413
564945
|
init_config();
|
|
564414
564946
|
init_text_selection();
|
|
564415
564947
|
init_task_complete_box();
|
|
564948
|
+
init_model_picker();
|
|
564416
564949
|
isTTY2 = process.stdout.isTTY ?? false;
|
|
564417
564950
|
c3 = {
|
|
564418
564951
|
bold: (t2) => ansi2("1", t2),
|
|
@@ -566137,7 +566670,7 @@ import { spawn as spawn24, exec as exec2 } from "node:child_process";
|
|
|
566137
566670
|
import { EventEmitter as EventEmitter7 } from "node:events";
|
|
566138
566671
|
import { randomBytes as randomBytes18, timingSafeEqual } from "node:crypto";
|
|
566139
566672
|
import { URL as URL2 } from "node:url";
|
|
566140
|
-
import { loadavg, cpus as cpus2, totalmem as
|
|
566673
|
+
import { loadavg, cpus as cpus2, totalmem as totalmem4, freemem as freemem3 } from "node:os";
|
|
566141
566674
|
import { existsSync as existsSync84, readFileSync as readFileSync66, writeFileSync as writeFileSync42, unlinkSync as unlinkSync13, mkdirSync as mkdirSync47, readdirSync as readdirSync28, statSync as statSync31, statfsSync as statfsSync4 } from "node:fs";
|
|
566142
566675
|
import { join as join99 } from "node:path";
|
|
566143
566676
|
function cleanForwardHeaders(raw, targetHost) {
|
|
@@ -566243,7 +566776,7 @@ function parseRateLimitHeaders(headers) {
|
|
|
566243
566776
|
async function collectSystemMetricsAsync() {
|
|
566244
566777
|
const [l1, l5, l15] = loadavg();
|
|
566245
566778
|
const cores = cpus2().length;
|
|
566246
|
-
const totalMem =
|
|
566779
|
+
const totalMem = totalmem4();
|
|
566247
566780
|
const freeMem = freemem3();
|
|
566248
566781
|
const usedMem = totalMem - freeMem;
|
|
566249
566782
|
let disk = {
|
|
@@ -568835,716 +569368,188 @@ var init_call_agent = __esm({
|
|
|
568835
569368
|
feed.push({
|
|
568836
569369
|
ts: Date.now(),
|
|
568837
569370
|
source: "call",
|
|
568838
|
-
sourceId: this.clientId,
|
|
568839
|
-
summary: content,
|
|
568840
|
-
toolName,
|
|
568841
|
-
success
|
|
568842
|
-
});
|
|
568843
|
-
}
|
|
568844
|
-
if (event.type === "model_response" && event.content) {
|
|
568845
|
-
this.emit("response", event.content);
|
|
568846
|
-
}
|
|
568847
|
-
});
|
|
568848
|
-
}
|
|
568849
|
-
/** Process a voice transcript — queues if already processing */
|
|
568850
|
-
handleTranscript(text) {
|
|
568851
|
-
if (this.disposed) return;
|
|
568852
|
-
this.conversationHistory.push({ role: "user", text });
|
|
568853
|
-
if (this.processing) {
|
|
568854
|
-
this.pendingTranscripts.push(text);
|
|
568855
|
-
return;
|
|
568856
|
-
}
|
|
568857
|
-
this.processTranscript(text).catch((err) => {
|
|
568858
|
-
this.emit("error", err instanceof Error ? err : new Error(String(err)));
|
|
568859
|
-
});
|
|
568860
|
-
}
|
|
568861
|
-
/** Dispose and clean up */
|
|
568862
|
-
dispose() {
|
|
568863
|
-
this.disposed = true;
|
|
568864
|
-
this.pendingTranscripts.length = 0;
|
|
568865
|
-
this.runner = null;
|
|
568866
|
-
}
|
|
568867
|
-
// ── Private ──────────────────────────────────────────────────────────
|
|
568868
|
-
async processTranscript(text) {
|
|
568869
|
-
if (!this.runner || this.disposed) return;
|
|
568870
|
-
this.processing = true;
|
|
568871
|
-
try {
|
|
568872
|
-
const historyContext = this.conversationHistory.slice(-10).map((h) => `${h.role === "user" ? "User" : "You"}: ${h.text}`).join("\n");
|
|
568873
|
-
const feed = getActivityFeed();
|
|
568874
|
-
const activitySummary = feed.getSummary(
|
|
568875
|
-
this.tier === "admin" ? 20 : 10,
|
|
568876
|
-
this.tier === "admin"
|
|
568877
|
-
);
|
|
568878
|
-
const wantsAction = /\b(read|open|show|run|execute|check|look at|find|search|grep|edit|write|fix|test|build|deploy|install|create|delete|remove|update|change|modify|commit|push|pull)\b/i.test(text) && !/\b(how are you|what's up|hello|hi|hey|can you hear|stop|quit|bye|thanks|thank you|ok|okay|sure|yeah|yes|no)\b/i.test(text);
|
|
568879
|
-
if (!wantsAction) {
|
|
568880
|
-
try {
|
|
568881
|
-
const chatMessages = [
|
|
568882
|
-
{ role: "system", content: this.buildSystemPrompt() },
|
|
568883
|
-
...this.conversationHistory.slice(-6).map((h) => ({
|
|
568884
|
-
role: h.role === "user" ? "user" : "assistant",
|
|
568885
|
-
content: h.text
|
|
568886
|
-
})),
|
|
568887
|
-
{ role: "user", content: text }
|
|
568888
|
-
];
|
|
568889
|
-
const chatResult = await this.backend.chatCompletion({
|
|
568890
|
-
messages: chatMessages,
|
|
568891
|
-
tools: [],
|
|
568892
|
-
temperature: 0.4,
|
|
568893
|
-
maxTokens: 256,
|
|
568894
|
-
timeoutMs: 15e3
|
|
568895
|
-
});
|
|
568896
|
-
const reply = (chatResult.choices[0]?.message?.content ?? "").trim();
|
|
568897
|
-
if (!reply) return;
|
|
568898
|
-
this.conversationHistory.push({ role: "assistant", text: reply });
|
|
568899
|
-
this.emit("response", reply);
|
|
568900
|
-
} catch {
|
|
568901
|
-
this.emit("response", "Sorry, I couldn't process that.");
|
|
568902
|
-
}
|
|
568903
|
-
} else {
|
|
568904
|
-
const taskPrompt = [
|
|
568905
|
-
`User said: "${text}"`,
|
|
568906
|
-
"",
|
|
568907
|
-
historyContext ? `Conversation so far:
|
|
568908
|
-
${historyContext}
|
|
568909
|
-
` : "",
|
|
568910
|
-
`Background activity:
|
|
568911
|
-
${activitySummary}
|
|
568912
|
-
`,
|
|
568913
|
-
"The user is requesting an action. Use tools as needed, then call task_complete with a brief spoken summary of what you did (1-2 sentences)."
|
|
568914
|
-
].join("\n");
|
|
568915
|
-
const result = await this.runner.run(taskPrompt, `Working directory: ${this.repoRoot}`);
|
|
568916
|
-
if (result.summary) {
|
|
568917
|
-
this.conversationHistory.push({ role: "assistant", text: result.summary });
|
|
568918
|
-
}
|
|
568919
|
-
}
|
|
568920
|
-
} catch (err) {
|
|
568921
|
-
this.emit("error", err instanceof Error ? err : new Error(String(err)));
|
|
568922
|
-
} finally {
|
|
568923
|
-
this.processing = false;
|
|
568924
|
-
this.emit("done");
|
|
568925
|
-
if (this.pendingTranscripts.length > 0) {
|
|
568926
|
-
const next = this.pendingTranscripts.shift();
|
|
568927
|
-
this.processTranscript(next).catch((err) => {
|
|
568928
|
-
this.emit("error", err instanceof Error ? err : new Error(String(err)));
|
|
568929
|
-
});
|
|
568930
|
-
}
|
|
568931
|
-
}
|
|
568932
|
-
}
|
|
568933
|
-
buildSystemPrompt() {
|
|
568934
|
-
const base3 = [
|
|
568935
|
-
"You are a voice assistant on a LIVE AUDIO CALL. This is a real-time conversation.",
|
|
568936
|
-
"",
|
|
568937
|
-
"CRITICAL RULES FOR VOICE CALLS:",
|
|
568938
|
-
"1. ALWAYS respond IMMEDIATELY with speech. Do NOT use tools before responding.",
|
|
568939
|
-
"2. Your response goes through text-to-speech — keep it SHORT (1-3 sentences).",
|
|
568940
|
-
"3. NEVER use code blocks, markdown, or long technical text.",
|
|
568941
|
-
"4. Be conversational and natural, like talking to a colleague.",
|
|
568942
|
-
"5. Call task_complete with your spoken response as the summary.",
|
|
568943
|
-
"6. Only use tools (file_read, grep, shell, etc.) if the user EXPLICITLY asks you to look something up, run a command, or make a change. For normal conversation, NEVER call tools.",
|
|
568944
|
-
"7. If the user asks what's happening, summarize from the activity context below — do NOT run tools to find out."
|
|
568945
|
-
];
|
|
568946
|
-
if (this.opts.emotionContext) {
|
|
568947
|
-
base3.push("", "Mood:", this.opts.emotionContext);
|
|
568948
|
-
}
|
|
568949
|
-
if (this.tier === "admin") {
|
|
568950
|
-
base3.push(
|
|
568951
|
-
"",
|
|
568952
|
-
"ADMIN call — you CAN use tools IF the user explicitly requests an action (e.g. 'read that file', 'run the tests').",
|
|
568953
|
-
"But for general chat, status questions, or greetings — respond immediately WITHOUT tools."
|
|
568954
|
-
);
|
|
568955
|
-
} else {
|
|
568956
|
-
base3.push(
|
|
568957
|
-
"",
|
|
568958
|
-
"PUBLIC call — read-only access. Answer questions about the project conversationally."
|
|
568959
|
-
);
|
|
568960
|
-
}
|
|
568961
|
-
return base3.join("\n");
|
|
568962
|
-
}
|
|
568963
|
-
buildTools() {
|
|
568964
|
-
if (this.tier === "admin") {
|
|
568965
|
-
return this.buildAdminTools();
|
|
568966
|
-
}
|
|
568967
|
-
return this.buildPublicTools();
|
|
568968
|
-
}
|
|
568969
|
-
buildAdminTools() {
|
|
568970
|
-
const debateAdapter = async (prompt) => {
|
|
568971
|
-
const r2 = await this.backend.chatCompletion({
|
|
568972
|
-
messages: [{ role: "user", content: prompt }],
|
|
568973
|
-
tools: [],
|
|
568974
|
-
temperature: 0.7,
|
|
568975
|
-
maxTokens: 800,
|
|
568976
|
-
timeoutMs: 12e4
|
|
568977
|
-
});
|
|
568978
|
-
return r2.choices[0]?.message?.content ?? "";
|
|
568979
|
-
};
|
|
568980
|
-
const replayAdapter = async (prompt) => {
|
|
568981
|
-
const r2 = await this.backend.chatCompletion({
|
|
568982
|
-
messages: [{ role: "user", content: prompt }],
|
|
568983
|
-
tools: [],
|
|
568984
|
-
temperature: 0,
|
|
568985
|
-
maxTokens: 1500,
|
|
568986
|
-
timeoutMs: 12e4
|
|
568987
|
-
});
|
|
568988
|
-
return r2.choices[0]?.message?.content ?? "";
|
|
568989
|
-
};
|
|
568990
|
-
const tools = [
|
|
568991
|
-
new FileReadTool(this.repoRoot),
|
|
568992
|
-
new FileWriteTool(this.repoRoot),
|
|
568993
|
-
new FileEditTool(this.repoRoot),
|
|
568994
|
-
new ShellTool(this.repoRoot),
|
|
568995
|
-
new GrepSearchTool(this.repoRoot),
|
|
568996
|
-
new GlobFindTool(this.repoRoot),
|
|
568997
|
-
new ListDirectoryTool(this.repoRoot),
|
|
568998
|
-
new WebSearchTool(),
|
|
568999
|
-
new WebFetchTool(),
|
|
569000
|
-
new MemoryReadTool(this.repoRoot),
|
|
569001
|
-
new MemoryWriteTool(this.repoRoot),
|
|
569002
|
-
new MemorySearchTool(this.repoRoot),
|
|
569003
|
-
new DebateTool(debateAdapter),
|
|
569004
|
-
new ReplayWithInterventionTool({ workingDir: this.repoRoot, callable: replayAdapter })
|
|
569005
|
-
];
|
|
569006
|
-
return tools.map(adaptTool);
|
|
569007
|
-
}
|
|
569008
|
-
buildPublicTools() {
|
|
569009
|
-
const tools = [
|
|
569010
|
-
new FileReadTool(this.repoRoot),
|
|
569011
|
-
new GrepSearchTool(this.repoRoot),
|
|
569012
|
-
new GlobFindTool(this.repoRoot),
|
|
569013
|
-
new ListDirectoryTool(this.repoRoot),
|
|
569014
|
-
new MemoryReadTool(this.repoRoot),
|
|
569015
|
-
new MemorySearchTool(this.repoRoot)
|
|
569016
|
-
];
|
|
569017
|
-
return tools.map(adaptTool);
|
|
569018
|
-
}
|
|
569019
|
-
};
|
|
569020
|
-
}
|
|
569021
|
-
});
|
|
569022
|
-
|
|
569023
|
-
// packages/cli/src/tui/model-picker.ts
|
|
569024
|
-
import { totalmem as totalmem4 } from "node:os";
|
|
569025
|
-
function isImageGenModel(name10, family) {
|
|
569026
|
-
return IMAGE_GEN_PATTERNS.some((p2) => p2.test(name10) || family && p2.test(family));
|
|
569027
|
-
}
|
|
569028
|
-
function parseShowNumCtx(show) {
|
|
569029
|
-
const sources = [show.parameters, show.modelfile];
|
|
569030
|
-
for (const source of sources) {
|
|
569031
|
-
if (!source) continue;
|
|
569032
|
-
const match = source.match(/\b(?:PARAMETER\s+)?num_ctx\s+(\d+)/i);
|
|
569033
|
-
if (match) return parseInt(match[1], 10);
|
|
569034
|
-
}
|
|
569035
|
-
return null;
|
|
569036
|
-
}
|
|
569037
|
-
async function fetchOllamaModels(baseUrl) {
|
|
569038
|
-
const url = `${normalizeBaseUrl(baseUrl)}/api/tags`;
|
|
569039
|
-
const resp = await fetch(url, {
|
|
569040
|
-
signal: AbortSignal.timeout(1e4)
|
|
569041
|
-
});
|
|
569042
|
-
if (!resp.ok) {
|
|
569043
|
-
throw new Error(`Failed to fetch models: HTTP ${resp.status}`);
|
|
569044
|
-
}
|
|
569045
|
-
const data = await resp.json();
|
|
569046
|
-
const models = data.models ?? [];
|
|
569047
|
-
const result = models.map((m2) => {
|
|
569048
|
-
const family = m2.details?.family;
|
|
569049
|
-
return {
|
|
569050
|
-
name: m2.name,
|
|
569051
|
-
size: formatBytes3(m2.size),
|
|
569052
|
-
sizeBytes: m2.size,
|
|
569053
|
-
modified: formatRelativeTime(m2.modified_at),
|
|
569054
|
-
parameterSize: m2.details?.parameter_size,
|
|
569055
|
-
contextLength: void 0,
|
|
569056
|
-
caps: void 0,
|
|
569057
|
-
isImageGen: isImageGenModel(m2.name, family),
|
|
569058
|
-
family
|
|
569059
|
-
};
|
|
569060
|
-
}).sort((a2, b) => b.sizeBytes - a2.sizeBytes);
|
|
569061
|
-
const normalized = normalizeBaseUrl(baseUrl);
|
|
569062
|
-
const showResults = await Promise.allSettled(
|
|
569063
|
-
result.map(
|
|
569064
|
-
(m2) => fetch(`${normalized}/api/show`, {
|
|
569065
|
-
method: "POST",
|
|
569066
|
-
headers: { "Content-Type": "application/json" },
|
|
569067
|
-
body: JSON.stringify({ name: m2.name }),
|
|
569068
|
-
signal: AbortSignal.timeout(5e3)
|
|
569069
|
-
}).then((r2) => r2.ok ? r2.json() : null)
|
|
569070
|
-
)
|
|
569071
|
-
);
|
|
569072
|
-
for (let i2 = 0; i2 < result.length; i2++) {
|
|
569073
|
-
const sr = showResults[i2];
|
|
569074
|
-
if (sr?.status !== "fulfilled" || !sr.value) continue;
|
|
569075
|
-
const show = sr.value;
|
|
569076
|
-
const explicitNumCtx = parseShowNumCtx(show);
|
|
569077
|
-
if (explicitNumCtx) {
|
|
569078
|
-
result[i2].contextLength = explicitNumCtx;
|
|
569079
|
-
continue;
|
|
569080
|
-
}
|
|
569081
|
-
if (show.model_info) {
|
|
569082
|
-
const info = show.model_info;
|
|
569083
|
-
const arch3 = info["general.architecture"];
|
|
569084
|
-
const paramCount = info["general.parameter_count"];
|
|
569085
|
-
const fileSizeGB = result[i2].sizeBytes > 0 ? result[i2].sizeBytes / 1024 ** 3 : paramCount ? paramCount * 0.6 / 1024 ** 3 : 4;
|
|
569086
|
-
if (arch3) {
|
|
569087
|
-
const archMax = info[`${arch3}.context_length`];
|
|
569088
|
-
const nLayers = info[`${arch3}.block_count`];
|
|
569089
|
-
const nKVHeads = info[`${arch3}.attention.head_count_kv`] ?? info[`${arch3}.attention.head_count`];
|
|
569090
|
-
const keyDim = info[`${arch3}.attention.key_length`];
|
|
569091
|
-
const valDim = info[`${arch3}.attention.value_length`] ?? keyDim;
|
|
569092
|
-
if (archMax && nLayers && nKVHeads && keyDim && valDim) {
|
|
569093
|
-
const kvBytesPerToken = nLayers * nKVHeads * (keyDim + valDim) * 2;
|
|
569094
|
-
result[i2].contextLength = estimateRealisticContext(kvBytesPerToken, archMax, fileSizeGB);
|
|
569095
|
-
} else if (archMax) {
|
|
569096
|
-
const kvEstimate = fileSizeGB <= 5 ? 524288 : fileSizeGB <= 20 ? 1048576 : 1572864;
|
|
569097
|
-
result[i2].contextLength = estimateRealisticContext(kvEstimate, archMax, fileSizeGB);
|
|
569098
|
-
}
|
|
569099
|
-
}
|
|
569100
|
-
}
|
|
569101
|
-
const modelCaps = { vision: false, toolUse: false, thinking: false };
|
|
569102
|
-
const nameLower = result[i2].name.toLowerCase();
|
|
569103
|
-
if (Array.isArray(show.capabilities)) {
|
|
569104
|
-
if (show.capabilities.includes("vision")) modelCaps.vision = true;
|
|
569105
|
-
if (show.capabilities.includes("tools")) modelCaps.toolUse = true;
|
|
569106
|
-
if (show.capabilities.includes("thinking")) modelCaps.thinking = true;
|
|
569107
|
-
}
|
|
569108
|
-
if (show.model_info) {
|
|
569109
|
-
for (const key of Object.keys(show.model_info)) {
|
|
569110
|
-
const k = key.toLowerCase();
|
|
569111
|
-
if (k.includes("vision.block_count") || k.includes("clip.") || k.includes("image_token_id") || k.includes("projector")) {
|
|
569112
|
-
const val = show.model_info[key];
|
|
569113
|
-
if (val !== null && val !== void 0 && val !== 0 && val !== "") {
|
|
569114
|
-
modelCaps.vision = true;
|
|
569115
|
-
}
|
|
569116
|
-
}
|
|
569117
|
-
}
|
|
569118
|
-
}
|
|
569119
|
-
if (/qwen3|qwen2\.5|llama3\.[13]|mistral|mixtral|command-r|gemma3|devstral|deepseek/.test(nameLower)) {
|
|
569120
|
-
modelCaps.toolUse = true;
|
|
569121
|
-
}
|
|
569122
|
-
if (show.template && (show.template.includes("<think>") || show.template.includes("thinking"))) {
|
|
569123
|
-
modelCaps.thinking = true;
|
|
569124
|
-
}
|
|
569125
|
-
result[i2].caps = modelCaps;
|
|
569126
|
-
}
|
|
569127
|
-
return result;
|
|
569128
|
-
}
|
|
569129
|
-
async function fetchOpenAIModels(baseUrl, apiKey) {
|
|
569130
|
-
const normalized = normalizeBaseUrl(baseUrl);
|
|
569131
|
-
const url = `${normalized}/v1/models`;
|
|
569132
|
-
const isAnthropic = /api\.anthropic\.com/i.test(baseUrl);
|
|
569133
|
-
const headers = {};
|
|
569134
|
-
if (apiKey) {
|
|
569135
|
-
if (isAnthropic) {
|
|
569136
|
-
headers["x-api-key"] = apiKey;
|
|
569137
|
-
headers["anthropic-version"] = "2023-06-01";
|
|
569138
|
-
} else {
|
|
569139
|
-
headers["Authorization"] = `Bearer ${apiKey}`;
|
|
569140
|
-
}
|
|
569141
|
-
}
|
|
569142
|
-
const resp = await fetch(url, {
|
|
569143
|
-
headers,
|
|
569144
|
-
signal: AbortSignal.timeout(15e3)
|
|
569145
|
-
});
|
|
569146
|
-
if (!resp.ok) {
|
|
569147
|
-
throw new Error(`Failed to fetch models: HTTP ${resp.status}`);
|
|
569148
|
-
}
|
|
569149
|
-
const data = await resp.json();
|
|
569150
|
-
const models = data.data ?? [];
|
|
569151
|
-
return models.map((m2) => ({
|
|
569152
|
-
name: m2.id,
|
|
569153
|
-
size: "",
|
|
569154
|
-
sizeBytes: 0,
|
|
569155
|
-
modified: m2.created ? formatRelativeTime(new Date(m2.created * 1e3).toISOString()) : "",
|
|
569156
|
-
parameterSize: m2.owned_by ?? void 0,
|
|
569157
|
-
contextLength: m2.context_length ?? m2.max_model_len ?? void 0
|
|
569158
|
-
})).sort((a2, b) => a2.name.localeCompare(b.name));
|
|
569159
|
-
}
|
|
569160
|
-
async function fetchPeerModels(peerId, authKey) {
|
|
569161
|
-
try {
|
|
569162
|
-
const { NexusTool: NexusTool2 } = await Promise.resolve().then(() => (init_dist5(), dist_exports));
|
|
569163
|
-
const { existsSync: existsSync131, readFileSync: readFileSync107 } = await import("node:fs");
|
|
569164
|
-
const { join: join148 } = await import("node:path");
|
|
569165
|
-
const cwd4 = process.cwd();
|
|
569166
|
-
const nexusTool = new NexusTool2(cwd4);
|
|
569167
|
-
const nexusDir = nexusTool.getNexusDir();
|
|
569168
|
-
let isLocalPeer = false;
|
|
569169
|
-
try {
|
|
569170
|
-
const statusPath = join148(nexusDir, "status.json");
|
|
569171
|
-
if (existsSync131(statusPath)) {
|
|
569172
|
-
const status = JSON.parse(readFileSync107(statusPath, "utf8"));
|
|
569173
|
-
if (status.peerId === peerId) isLocalPeer = true;
|
|
569174
|
-
}
|
|
569175
|
-
} catch {
|
|
569176
|
-
}
|
|
569177
|
-
if (isLocalPeer) {
|
|
569178
|
-
const pricingPath = join148(nexusDir, "pricing.json");
|
|
569179
|
-
if (existsSync131(pricingPath)) {
|
|
569180
|
-
try {
|
|
569181
|
-
const pricing = JSON.parse(readFileSync107(pricingPath, "utf8"));
|
|
569182
|
-
const localModels = (pricing.models || []).map((m2) => ({
|
|
569183
|
-
name: m2.model || "unknown",
|
|
569184
|
-
size: m2.parameterSize || "",
|
|
569185
|
-
modified: "",
|
|
569186
|
-
sizeBytes: 0,
|
|
569187
|
-
parameterSize: m2.parameterSize || "remote"
|
|
569188
|
-
}));
|
|
569189
|
-
if (localModels.length > 0) return localModels;
|
|
569190
|
-
} catch {
|
|
569191
|
-
}
|
|
569192
|
-
}
|
|
569193
|
-
}
|
|
569194
|
-
const cachePath = join148(nexusDir, "peer-models-cache.json");
|
|
569195
|
-
if (existsSync131(cachePath)) {
|
|
569196
|
-
try {
|
|
569197
|
-
const cache8 = JSON.parse(readFileSync107(cachePath, "utf8"));
|
|
569198
|
-
if (cache8.peerId === peerId && cache8.models?.length > 0) {
|
|
569199
|
-
const age = Date.now() - new Date(cache8.cachedAt).getTime();
|
|
569200
|
-
if (age < 5 * 60 * 1e3) {
|
|
569201
|
-
return cache8.models.map((m2) => ({
|
|
569202
|
-
name: m2.name || "unknown",
|
|
569203
|
-
size: m2.size || m2.parameterSize || "",
|
|
569204
|
-
modified: "",
|
|
569205
|
-
sizeBytes: 0,
|
|
569206
|
-
parameterSize: m2.parameterSize || "remote"
|
|
569207
|
-
}));
|
|
569371
|
+
sourceId: this.clientId,
|
|
569372
|
+
summary: content,
|
|
569373
|
+
toolName,
|
|
569374
|
+
success
|
|
569375
|
+
});
|
|
569208
569376
|
}
|
|
569209
|
-
|
|
569210
|
-
|
|
569211
|
-
}
|
|
569212
|
-
}
|
|
569213
|
-
try {
|
|
569214
|
-
const capsResult = await nexusTool.execute({
|
|
569215
|
-
action: "query_peer_caps",
|
|
569216
|
-
peer_id: peerId,
|
|
569217
|
-
...authKey ? { auth_key: authKey } : {}
|
|
569218
|
-
});
|
|
569219
|
-
if (capsResult.success && capsResult.output) {
|
|
569220
|
-
let capsData = null;
|
|
569221
|
-
try {
|
|
569222
|
-
capsData = JSON.parse(capsResult.output);
|
|
569223
|
-
} catch {
|
|
569224
|
-
}
|
|
569225
|
-
if (capsData?.models && capsData.models.length > 0) {
|
|
569226
|
-
return capsData.models.map((m2) => ({
|
|
569227
|
-
name: m2.name || "unknown",
|
|
569228
|
-
size: m2.parameterSize || "",
|
|
569229
|
-
modified: "",
|
|
569230
|
-
sizeBytes: 0,
|
|
569231
|
-
parameterSize: m2.parameterSize || "remote"
|
|
569232
|
-
}));
|
|
569233
|
-
}
|
|
569234
|
-
if (capsData?.capabilities && capsData.capabilities.length > 0) {
|
|
569235
|
-
const models = [];
|
|
569236
|
-
for (const cap of capsData.capabilities) {
|
|
569237
|
-
if (typeof cap === "string" && cap.startsWith("inference:")) {
|
|
569238
|
-
const capName = cap.slice(10);
|
|
569239
|
-
const modelName = capName.replace(/_(\d+[bBmMkK])$/, ":$1").replace(/_latest$/, ":latest");
|
|
569240
|
-
models.push({
|
|
569241
|
-
name: modelName,
|
|
569242
|
-
size: "",
|
|
569243
|
-
modified: "",
|
|
569244
|
-
sizeBytes: 0,
|
|
569245
|
-
parameterSize: "remote"
|
|
569246
|
-
});
|
|
569247
|
-
}
|
|
569377
|
+
if (event.type === "model_response" && event.content) {
|
|
569378
|
+
this.emit("response", event.content);
|
|
569248
569379
|
}
|
|
569249
|
-
|
|
569380
|
+
});
|
|
569381
|
+
}
|
|
569382
|
+
/** Process a voice transcript — queues if already processing */
|
|
569383
|
+
handleTranscript(text) {
|
|
569384
|
+
if (this.disposed) return;
|
|
569385
|
+
this.conversationHistory.push({ role: "user", text });
|
|
569386
|
+
if (this.processing) {
|
|
569387
|
+
this.pendingTranscripts.push(text);
|
|
569388
|
+
return;
|
|
569250
569389
|
}
|
|
569390
|
+
this.processTranscript(text).catch((err) => {
|
|
569391
|
+
this.emit("error", err instanceof Error ? err : new Error(String(err)));
|
|
569392
|
+
});
|
|
569251
569393
|
}
|
|
569252
|
-
|
|
569253
|
-
|
|
569254
|
-
|
|
569255
|
-
|
|
569256
|
-
|
|
569257
|
-
|
|
569258
|
-
|
|
569259
|
-
|
|
569260
|
-
|
|
569394
|
+
/** Dispose and clean up */
|
|
569395
|
+
dispose() {
|
|
569396
|
+
this.disposed = true;
|
|
569397
|
+
this.pendingTranscripts.length = 0;
|
|
569398
|
+
this.runner = null;
|
|
569399
|
+
}
|
|
569400
|
+
// ── Private ──────────────────────────────────────────────────────────
|
|
569401
|
+
async processTranscript(text) {
|
|
569402
|
+
if (!this.runner || this.disposed) return;
|
|
569403
|
+
this.processing = true;
|
|
569261
569404
|
try {
|
|
569262
|
-
|
|
569263
|
-
|
|
569264
|
-
|
|
569265
|
-
|
|
569266
|
-
|
|
569267
|
-
|
|
569268
|
-
|
|
569269
|
-
|
|
569270
|
-
|
|
569271
|
-
|
|
569272
|
-
|
|
569273
|
-
|
|
569274
|
-
|
|
569275
|
-
|
|
569276
|
-
|
|
569405
|
+
const historyContext = this.conversationHistory.slice(-10).map((h) => `${h.role === "user" ? "User" : "You"}: ${h.text}`).join("\n");
|
|
569406
|
+
const feed = getActivityFeed();
|
|
569407
|
+
const activitySummary = feed.getSummary(
|
|
569408
|
+
this.tier === "admin" ? 20 : 10,
|
|
569409
|
+
this.tier === "admin"
|
|
569410
|
+
);
|
|
569411
|
+
const wantsAction = /\b(read|open|show|run|execute|check|look at|find|search|grep|edit|write|fix|test|build|deploy|install|create|delete|remove|update|change|modify|commit|push|pull)\b/i.test(text) && !/\b(how are you|what's up|hello|hi|hey|can you hear|stop|quit|bye|thanks|thank you|ok|okay|sure|yeah|yes|no)\b/i.test(text);
|
|
569412
|
+
if (!wantsAction) {
|
|
569413
|
+
try {
|
|
569414
|
+
const chatMessages = [
|
|
569415
|
+
{ role: "system", content: this.buildSystemPrompt() },
|
|
569416
|
+
...this.conversationHistory.slice(-6).map((h) => ({
|
|
569417
|
+
role: h.role === "user" ? "user" : "assistant",
|
|
569418
|
+
content: h.text
|
|
569419
|
+
})),
|
|
569420
|
+
{ role: "user", content: text }
|
|
569421
|
+
];
|
|
569422
|
+
const chatResult = await this.backend.chatCompletion({
|
|
569423
|
+
messages: chatMessages,
|
|
569424
|
+
tools: [],
|
|
569425
|
+
temperature: 0.4,
|
|
569426
|
+
maxTokens: 256,
|
|
569427
|
+
timeoutMs: 15e3
|
|
569277
569428
|
});
|
|
569429
|
+
const reply = (chatResult.choices[0]?.message?.content ?? "").trim();
|
|
569430
|
+
if (!reply) return;
|
|
569431
|
+
this.conversationHistory.push({ role: "assistant", text: reply });
|
|
569432
|
+
this.emit("response", reply);
|
|
569433
|
+
} catch {
|
|
569434
|
+
this.emit("response", "Sorry, I couldn't process that.");
|
|
569435
|
+
}
|
|
569436
|
+
} else {
|
|
569437
|
+
const taskPrompt = [
|
|
569438
|
+
`User said: "${text}"`,
|
|
569439
|
+
"",
|
|
569440
|
+
historyContext ? `Conversation so far:
|
|
569441
|
+
${historyContext}
|
|
569442
|
+
` : "",
|
|
569443
|
+
`Background activity:
|
|
569444
|
+
${activitySummary}
|
|
569445
|
+
`,
|
|
569446
|
+
"The user is requesting an action. Use tools as needed, then call task_complete with a brief spoken summary of what you did (1-2 sentences)."
|
|
569447
|
+
].join("\n");
|
|
569448
|
+
const result = await this.runner.run(taskPrompt, `Working directory: ${this.repoRoot}`);
|
|
569449
|
+
if (result.summary) {
|
|
569450
|
+
this.conversationHistory.push({ role: "assistant", text: result.summary });
|
|
569278
569451
|
}
|
|
569279
569452
|
}
|
|
569280
|
-
|
|
569453
|
+
} catch (err) {
|
|
569454
|
+
this.emit("error", err instanceof Error ? err : new Error(String(err)));
|
|
569455
|
+
} finally {
|
|
569456
|
+
this.processing = false;
|
|
569457
|
+
this.emit("done");
|
|
569458
|
+
if (this.pendingTranscripts.length > 0) {
|
|
569459
|
+
const next = this.pendingTranscripts.shift();
|
|
569460
|
+
this.processTranscript(next).catch((err) => {
|
|
569461
|
+
this.emit("error", err instanceof Error ? err : new Error(String(err)));
|
|
569462
|
+
});
|
|
569463
|
+
}
|
|
569281
569464
|
}
|
|
569282
569465
|
}
|
|
569283
|
-
|
|
569284
|
-
|
|
569285
|
-
|
|
569286
|
-
|
|
569287
|
-
|
|
569288
|
-
|
|
569289
|
-
|
|
569290
|
-
|
|
569291
|
-
|
|
569292
|
-
|
|
569293
|
-
|
|
569294
|
-
|
|
569295
|
-
|
|
569296
|
-
|
|
569297
|
-
|
|
569298
|
-
size: "",
|
|
569299
|
-
modified: "",
|
|
569300
|
-
sizeBytes: 0,
|
|
569301
|
-
parameterSize: "remote"
|
|
569302
|
-
});
|
|
569466
|
+
buildSystemPrompt() {
|
|
569467
|
+
const base3 = [
|
|
569468
|
+
"You are a voice assistant on a LIVE AUDIO CALL. This is a real-time conversation.",
|
|
569469
|
+
"",
|
|
569470
|
+
"CRITICAL RULES FOR VOICE CALLS:",
|
|
569471
|
+
"1. ALWAYS respond IMMEDIATELY with speech. Do NOT use tools before responding.",
|
|
569472
|
+
"2. Your response goes through text-to-speech — keep it SHORT (1-3 sentences).",
|
|
569473
|
+
"3. NEVER use code blocks, markdown, or long technical text.",
|
|
569474
|
+
"4. Be conversational and natural, like talking to a colleague.",
|
|
569475
|
+
"5. Call task_complete with your spoken response as the summary.",
|
|
569476
|
+
"6. Only use tools (file_read, grep, shell, etc.) if the user EXPLICITLY asks you to look something up, run a command, or make a change. For normal conversation, NEVER call tools.",
|
|
569477
|
+
"7. If the user asks what's happening, summarize from the activity context below — do NOT run tools to find out."
|
|
569478
|
+
];
|
|
569479
|
+
if (this.opts.emotionContext) {
|
|
569480
|
+
base3.push("", "Mood:", this.opts.emotionContext);
|
|
569303
569481
|
}
|
|
569304
|
-
if (
|
|
569305
|
-
|
|
569306
|
-
|
|
569307
|
-
|
|
569308
|
-
|
|
569309
|
-
|
|
569310
|
-
|
|
569311
|
-
|
|
569312
|
-
|
|
569313
|
-
|
|
569314
|
-
|
|
569315
|
-
size: m2.parameterSize || "",
|
|
569316
|
-
modified: "",
|
|
569317
|
-
sizeBytes: 0,
|
|
569318
|
-
parameterSize: m2.parameterSize || "remote"
|
|
569319
|
-
}));
|
|
569320
|
-
} catch {
|
|
569482
|
+
if (this.tier === "admin") {
|
|
569483
|
+
base3.push(
|
|
569484
|
+
"",
|
|
569485
|
+
"ADMIN call — you CAN use tools IF the user explicitly requests an action (e.g. 'read that file', 'run the tests').",
|
|
569486
|
+
"But for general chat, status questions, or greetings — respond immediately WITHOUT tools."
|
|
569487
|
+
);
|
|
569488
|
+
} else {
|
|
569489
|
+
base3.push(
|
|
569490
|
+
"",
|
|
569491
|
+
"PUBLIC call — read-only access. Answer questions about the project conversationally."
|
|
569492
|
+
);
|
|
569321
569493
|
}
|
|
569494
|
+
return base3.join("\n");
|
|
569322
569495
|
}
|
|
569323
|
-
|
|
569324
|
-
|
|
569325
|
-
|
|
569326
|
-
return [];
|
|
569327
|
-
}
|
|
569328
|
-
}
|
|
569329
|
-
async function fetchModels(baseUrl, apiKey) {
|
|
569330
|
-
if (baseUrl.startsWith("peer://")) {
|
|
569331
|
-
return fetchPeerModels(baseUrl.slice(7), apiKey);
|
|
569332
|
-
}
|
|
569333
|
-
const provider = detectProvider(baseUrl);
|
|
569334
|
-
if (provider.id === "ollama") {
|
|
569335
|
-
let ollamaErr;
|
|
569336
|
-
try {
|
|
569337
|
-
return await fetchOllamaModels(baseUrl);
|
|
569338
|
-
} catch (err) {
|
|
569339
|
-
ollamaErr = err instanceof Error ? err : new Error(String(err));
|
|
569340
|
-
try {
|
|
569341
|
-
return await fetchOpenAIModels(baseUrl, apiKey);
|
|
569342
|
-
} catch {
|
|
569343
|
-
throw new Error(`Cannot reach Ollama at ${baseUrl}: ${ollamaErr.message}`);
|
|
569344
|
-
}
|
|
569345
|
-
}
|
|
569346
|
-
}
|
|
569347
|
-
let lastErr;
|
|
569348
|
-
for (let attempt = 0; attempt < 2; attempt++) {
|
|
569349
|
-
try {
|
|
569350
|
-
return await fetchOpenAIModels(baseUrl, apiKey);
|
|
569351
|
-
} catch (err) {
|
|
569352
|
-
lastErr = err instanceof Error ? err : new Error(String(err));
|
|
569353
|
-
if (attempt === 0) await new Promise((r2) => setTimeout(r2, 1e3));
|
|
569354
|
-
}
|
|
569355
|
-
}
|
|
569356
|
-
try {
|
|
569357
|
-
return await fetchOllamaModels(baseUrl);
|
|
569358
|
-
} catch {
|
|
569359
|
-
throw new Error(`Cannot fetch models from ${provider.label} at ${baseUrl}: ${lastErr?.message ?? "unknown error"}`);
|
|
569360
|
-
}
|
|
569361
|
-
}
|
|
569362
|
-
function findModel(models, query) {
|
|
569363
|
-
const exact = models.find((m2) => m2.name === query);
|
|
569364
|
-
if (exact) return exact;
|
|
569365
|
-
const partial = models.find((m2) => m2.name.startsWith(query));
|
|
569366
|
-
if (partial) return partial;
|
|
569367
|
-
const fuzzy = models.find((m2) => m2.name.includes(query));
|
|
569368
|
-
return fuzzy;
|
|
569369
|
-
}
|
|
569370
|
-
async function queryModelContextSize(baseUrl, modelName) {
|
|
569371
|
-
try {
|
|
569372
|
-
const normalized = normalizeBaseUrl(baseUrl);
|
|
569373
|
-
const res = await fetch(`${normalized}/api/show`, {
|
|
569374
|
-
method: "POST",
|
|
569375
|
-
headers: { "Content-Type": "application/json" },
|
|
569376
|
-
body: JSON.stringify({ name: modelName }),
|
|
569377
|
-
signal: AbortSignal.timeout(1e4)
|
|
569378
|
-
});
|
|
569379
|
-
if (!res.ok) return null;
|
|
569380
|
-
const data = await res.json();
|
|
569381
|
-
const explicitNumCtx = parseShowNumCtx(data);
|
|
569382
|
-
if (explicitNumCtx) return explicitNumCtx;
|
|
569383
|
-
if (data.model_info) {
|
|
569384
|
-
const info = data.model_info;
|
|
569385
|
-
const arch3 = info["general.architecture"];
|
|
569386
|
-
const paramCount = info["general.parameter_count"];
|
|
569387
|
-
const modelSizeGB2 = paramCount ? paramCount * 0.6 / 1024 ** 3 : 4;
|
|
569388
|
-
if (arch3) {
|
|
569389
|
-
const archMax = info[`${arch3}.context_length`];
|
|
569390
|
-
const nLayers = info[`${arch3}.block_count`];
|
|
569391
|
-
const nKVHeads = info[`${arch3}.attention.head_count_kv`] ?? info[`${arch3}.attention.head_count`];
|
|
569392
|
-
const keyDim = info[`${arch3}.attention.key_length`];
|
|
569393
|
-
const valDim = info[`${arch3}.attention.value_length`] ?? keyDim;
|
|
569394
|
-
if (archMax && nLayers && nKVHeads && keyDim && valDim) {
|
|
569395
|
-
const kvBytesPerToken = nLayers * nKVHeads * (keyDim + valDim) * 2;
|
|
569396
|
-
return estimateRealisticContext(kvBytesPerToken, archMax, modelSizeGB2);
|
|
569397
|
-
}
|
|
569398
|
-
if (archMax) {
|
|
569399
|
-
const kvEstimate = modelSizeGB2 <= 5 ? 524288 : modelSizeGB2 <= 20 ? 1048576 : 1572864;
|
|
569400
|
-
return estimateRealisticContext(kvEstimate, archMax, modelSizeGB2);
|
|
569496
|
+
buildTools() {
|
|
569497
|
+
if (this.tier === "admin") {
|
|
569498
|
+
return this.buildAdminTools();
|
|
569401
569499
|
}
|
|
569500
|
+
return this.buildPublicTools();
|
|
569402
569501
|
}
|
|
569403
|
-
|
|
569404
|
-
|
|
569405
|
-
|
|
569406
|
-
|
|
569407
|
-
|
|
569408
|
-
|
|
569409
|
-
|
|
569410
|
-
|
|
569411
|
-
|
|
569412
|
-
|
|
569413
|
-
|
|
569414
|
-
|
|
569415
|
-
|
|
569416
|
-
|
|
569417
|
-
|
|
569418
|
-
|
|
569419
|
-
|
|
569420
|
-
|
|
569421
|
-
}
|
|
569422
|
-
|
|
569423
|
-
|
|
569424
|
-
|
|
569425
|
-
|
|
569426
|
-
|
|
569427
|
-
|
|
569428
|
-
|
|
569429
|
-
|
|
569430
|
-
|
|
569431
|
-
|
|
569432
|
-
|
|
569433
|
-
|
|
569434
|
-
|
|
569435
|
-
|
|
569436
|
-
|
|
569437
|
-
|
|
569438
|
-
|
|
569439
|
-
|
|
569440
|
-
|
|
569441
|
-
}
|
|
569442
|
-
async function queryModelCapabilities(baseUrl, modelName) {
|
|
569443
|
-
const caps = { vision: false, toolUse: false, thinking: false };
|
|
569444
|
-
if (baseUrl.startsWith("peer://")) {
|
|
569445
|
-
const nameLower = modelName.toLowerCase();
|
|
569446
|
-
if (/qwen3|qwen2\.5|llama3\.[13]|mistral|mixtral|command-r|gemma3|devstral|deepseek/.test(nameLower)) {
|
|
569447
|
-
caps.toolUse = true;
|
|
569448
|
-
}
|
|
569449
|
-
if (/qwen3|deepseek-r1/.test(nameLower)) {
|
|
569450
|
-
caps.thinking = true;
|
|
569451
|
-
}
|
|
569452
|
-
return caps;
|
|
569453
|
-
}
|
|
569454
|
-
try {
|
|
569455
|
-
const normalized = normalizeBaseUrl(baseUrl);
|
|
569456
|
-
const res = await fetch(`${normalized}/api/show`, {
|
|
569457
|
-
method: "POST",
|
|
569458
|
-
headers: { "Content-Type": "application/json" },
|
|
569459
|
-
body: JSON.stringify({ name: modelName }),
|
|
569460
|
-
signal: AbortSignal.timeout(1e4)
|
|
569461
|
-
});
|
|
569462
|
-
if (!res.ok) return caps;
|
|
569463
|
-
const data = await res.json();
|
|
569464
|
-
if (Array.isArray(data.capabilities)) {
|
|
569465
|
-
if (data.capabilities.includes("vision")) caps.vision = true;
|
|
569466
|
-
if (data.capabilities.includes("tools")) caps.toolUse = true;
|
|
569467
|
-
if (data.capabilities.includes("thinking")) caps.thinking = true;
|
|
569468
|
-
}
|
|
569469
|
-
if (data.model_info) {
|
|
569470
|
-
for (const key of Object.keys(data.model_info)) {
|
|
569471
|
-
const k = key.toLowerCase();
|
|
569472
|
-
if (k.includes("vision.block_count") || k.includes("clip.") || k.includes("image_token_id") || k.includes("projector") || k.includes("vision.embedding_length")) {
|
|
569473
|
-
const val = data.model_info[key];
|
|
569474
|
-
if (val !== null && val !== void 0 && val !== 0 && val !== "") {
|
|
569475
|
-
caps.vision = true;
|
|
569476
|
-
}
|
|
569477
|
-
}
|
|
569502
|
+
buildAdminTools() {
|
|
569503
|
+
const debateAdapter = async (prompt) => {
|
|
569504
|
+
const r2 = await this.backend.chatCompletion({
|
|
569505
|
+
messages: [{ role: "user", content: prompt }],
|
|
569506
|
+
tools: [],
|
|
569507
|
+
temperature: 0.7,
|
|
569508
|
+
maxTokens: 800,
|
|
569509
|
+
timeoutMs: 12e4
|
|
569510
|
+
});
|
|
569511
|
+
return r2.choices[0]?.message?.content ?? "";
|
|
569512
|
+
};
|
|
569513
|
+
const replayAdapter = async (prompt) => {
|
|
569514
|
+
const r2 = await this.backend.chatCompletion({
|
|
569515
|
+
messages: [{ role: "user", content: prompt }],
|
|
569516
|
+
tools: [],
|
|
569517
|
+
temperature: 0,
|
|
569518
|
+
maxTokens: 1500,
|
|
569519
|
+
timeoutMs: 12e4
|
|
569520
|
+
});
|
|
569521
|
+
return r2.choices[0]?.message?.content ?? "";
|
|
569522
|
+
};
|
|
569523
|
+
const tools = [
|
|
569524
|
+
new FileReadTool(this.repoRoot),
|
|
569525
|
+
new FileWriteTool(this.repoRoot),
|
|
569526
|
+
new FileEditTool(this.repoRoot),
|
|
569527
|
+
new ShellTool(this.repoRoot),
|
|
569528
|
+
new GrepSearchTool(this.repoRoot),
|
|
569529
|
+
new GlobFindTool(this.repoRoot),
|
|
569530
|
+
new ListDirectoryTool(this.repoRoot),
|
|
569531
|
+
new WebSearchTool(),
|
|
569532
|
+
new WebFetchTool(),
|
|
569533
|
+
new MemoryReadTool(this.repoRoot),
|
|
569534
|
+
new MemoryWriteTool(this.repoRoot),
|
|
569535
|
+
new MemorySearchTool(this.repoRoot),
|
|
569536
|
+
new DebateTool(debateAdapter),
|
|
569537
|
+
new ReplayWithInterventionTool({ workingDir: this.repoRoot, callable: replayAdapter })
|
|
569538
|
+
];
|
|
569539
|
+
return tools.map(adaptTool);
|
|
569478
569540
|
}
|
|
569479
|
-
|
|
569480
|
-
|
|
569481
|
-
|
|
569482
|
-
|
|
569483
|
-
|
|
569484
|
-
|
|
569485
|
-
|
|
569486
|
-
|
|
569541
|
+
buildPublicTools() {
|
|
569542
|
+
const tools = [
|
|
569543
|
+
new FileReadTool(this.repoRoot),
|
|
569544
|
+
new GrepSearchTool(this.repoRoot),
|
|
569545
|
+
new GlobFindTool(this.repoRoot),
|
|
569546
|
+
new ListDirectoryTool(this.repoRoot),
|
|
569547
|
+
new MemoryReadTool(this.repoRoot),
|
|
569548
|
+
new MemorySearchTool(this.repoRoot)
|
|
569549
|
+
];
|
|
569550
|
+
return tools.map(adaptTool);
|
|
569487
569551
|
}
|
|
569488
|
-
}
|
|
569489
|
-
return caps;
|
|
569490
|
-
} catch {
|
|
569491
|
-
return caps;
|
|
569492
|
-
}
|
|
569493
|
-
}
|
|
569494
|
-
function formatBytes3(bytes) {
|
|
569495
|
-
if (bytes < 1024) return `${bytes} B`;
|
|
569496
|
-
const units = ["KB", "MB", "GB", "TB"];
|
|
569497
|
-
let size = bytes;
|
|
569498
|
-
let i2 = -1;
|
|
569499
|
-
while (size >= 1024 && i2 < units.length - 1) {
|
|
569500
|
-
size /= 1024;
|
|
569501
|
-
i2++;
|
|
569502
|
-
}
|
|
569503
|
-
return `${size.toFixed(1)} ${units[i2] ?? "B"}`;
|
|
569504
|
-
}
|
|
569505
|
-
function formatContextLength(tokens) {
|
|
569506
|
-
if (tokens >= 1e6) return `${(tokens / 1e6).toFixed(1)}M ctx`;
|
|
569507
|
-
if (tokens >= 1024) return `${Math.round(tokens / 1024)}K ctx`;
|
|
569508
|
-
return `${tokens} ctx`;
|
|
569509
|
-
}
|
|
569510
|
-
function formatCaps(caps) {
|
|
569511
|
-
const tags = [];
|
|
569512
|
-
if (caps.vision) tags.push("vision");
|
|
569513
|
-
if (caps.toolUse) tags.push("tools");
|
|
569514
|
-
if (caps.thinking) tags.push("think");
|
|
569515
|
-
return tags.join("+");
|
|
569516
|
-
}
|
|
569517
|
-
function formatRelativeTime(iso2) {
|
|
569518
|
-
const now = Date.now();
|
|
569519
|
-
const then = new Date(iso2).getTime();
|
|
569520
|
-
const diffMs = now - then;
|
|
569521
|
-
const minutes = Math.floor(diffMs / 6e4);
|
|
569522
|
-
if (minutes < 1) return "just now";
|
|
569523
|
-
if (minutes < 60) return `${minutes}m ago`;
|
|
569524
|
-
const hours = Math.floor(minutes / 60);
|
|
569525
|
-
if (hours < 24) return `${hours}h ago`;
|
|
569526
|
-
const days = Math.floor(hours / 24);
|
|
569527
|
-
if (days < 7) return `${days}d ago`;
|
|
569528
|
-
const weeks = Math.floor(days / 7);
|
|
569529
|
-
if (weeks < 5) return `${weeks}w ago`;
|
|
569530
|
-
const months = Math.floor(days / 30);
|
|
569531
|
-
return `${months}mo ago`;
|
|
569532
|
-
}
|
|
569533
|
-
var IMAGE_GEN_PATTERNS;
|
|
569534
|
-
var init_model_picker = __esm({
|
|
569535
|
-
"packages/cli/src/tui/model-picker.ts"() {
|
|
569536
|
-
"use strict";
|
|
569537
|
-
init_dist();
|
|
569538
|
-
IMAGE_GEN_PATTERNS = [
|
|
569539
|
-
/flux/i,
|
|
569540
|
-
/z-image/i,
|
|
569541
|
-
/stable-diffusion/i,
|
|
569542
|
-
/sdxl/i,
|
|
569543
|
-
/dall/i,
|
|
569544
|
-
/kandinsky/i,
|
|
569545
|
-
/midjourney/i,
|
|
569546
|
-
/imagen/i
|
|
569547
|
-
];
|
|
569552
|
+
};
|
|
569548
569553
|
}
|
|
569549
569554
|
});
|
|
569550
569555
|
|
|
@@ -577955,6 +577960,7 @@ __export(setup_exports, {
|
|
|
577955
577960
|
ensurePythonVenv: () => ensurePythonVenv,
|
|
577956
577961
|
ensureVisionDeps: () => ensureVisionDeps,
|
|
577957
577962
|
expandedModelName: () => expandedModelName,
|
|
577963
|
+
formatExpandedContextDiagnostic: () => formatExpandedContextDiagnostic,
|
|
577958
577964
|
getLatestOllamaVersion: () => getLatestOllamaVersion,
|
|
577959
577965
|
getOllamaVersion: () => getOllamaVersion,
|
|
577960
577966
|
hasCmd: () => hasCmd,
|
|
@@ -578020,7 +578026,7 @@ async function needsTextToolMode(modelName, backendUrl2) {
|
|
|
578020
578026
|
const hasTools = await checkToolSupport(modelName, backendUrl2);
|
|
578021
578027
|
return !hasTools;
|
|
578022
578028
|
}
|
|
578023
|
-
function detectUnifiedMemory() {
|
|
578029
|
+
function detectUnifiedMemory(hasDiscreteGpu = false) {
|
|
578024
578030
|
if (process.platform === "darwin" && process.arch === "arm64") return true;
|
|
578025
578031
|
if (process.platform === "linux") {
|
|
578026
578032
|
try {
|
|
@@ -578034,6 +578040,16 @@ function detectUnifiedMemory() {
|
|
|
578034
578040
|
}
|
|
578035
578041
|
} catch {
|
|
578036
578042
|
}
|
|
578043
|
+
try {
|
|
578044
|
+
if (existsSync91("/proc/device-tree/model")) {
|
|
578045
|
+
const model = readFileSync74("/proc/device-tree/model", "utf8").replace(/\0+$/, "").toLowerCase();
|
|
578046
|
+
if (/jetson|tegra|orin|xavier|nano|raspberry|rockchip|rk\d{4}|mt\d{4}/.test(model)) {
|
|
578047
|
+
return true;
|
|
578048
|
+
}
|
|
578049
|
+
}
|
|
578050
|
+
} catch {
|
|
578051
|
+
}
|
|
578052
|
+
if (process.arch === "arm64" && !hasDiscreteGpu) return true;
|
|
578037
578053
|
}
|
|
578038
578054
|
return false;
|
|
578039
578055
|
}
|
|
@@ -578127,11 +578143,15 @@ function detectSystemSpecs() {
|
|
|
578127
578143
|
} catch {
|
|
578128
578144
|
}
|
|
578129
578145
|
}
|
|
578130
|
-
const unifiedMemory = detectUnifiedMemory();
|
|
578146
|
+
const unifiedMemory = detectUnifiedMemory(gpuVramGB > 0);
|
|
578131
578147
|
if (unifiedMemory && totalRamGB > 0) {
|
|
578132
578148
|
const floorGB = totalRamGB * 0.8;
|
|
578133
578149
|
if (availableRamGB < floorGB) availableRamGB = floorGB;
|
|
578134
578150
|
}
|
|
578151
|
+
if (!unifiedMemory && totalRamGB > 0) {
|
|
578152
|
+
const floorGB = totalRamGB * 0.75;
|
|
578153
|
+
if (availableRamGB < floorGB) availableRamGB = floorGB;
|
|
578154
|
+
}
|
|
578135
578155
|
return {
|
|
578136
578156
|
totalRamGB: Math.round(totalRamGB * 10) / 10,
|
|
578137
578157
|
availableRamGB: Math.round(availableRamGB * 10) / 10,
|
|
@@ -578191,11 +578211,15 @@ async function detectSystemSpecsAsync() {
|
|
|
578191
578211
|
} catch {
|
|
578192
578212
|
}
|
|
578193
578213
|
}
|
|
578194
|
-
const unifiedMemory = detectUnifiedMemory();
|
|
578214
|
+
const unifiedMemory = detectUnifiedMemory(gpuVramGB > 0);
|
|
578195
578215
|
if (unifiedMemory && totalRamGB > 0) {
|
|
578196
578216
|
const floorGB = totalRamGB * 0.8;
|
|
578197
578217
|
if (availableRamGB < floorGB) availableRamGB = floorGB;
|
|
578198
578218
|
}
|
|
578219
|
+
if (!unifiedMemory && totalRamGB > 0) {
|
|
578220
|
+
const floorGB = totalRamGB * 0.75;
|
|
578221
|
+
if (availableRamGB < floorGB) availableRamGB = floorGB;
|
|
578222
|
+
}
|
|
578199
578223
|
return {
|
|
578200
578224
|
totalRamGB: Math.round(totalRamGB * 10) / 10,
|
|
578201
578225
|
availableRamGB: Math.round(availableRamGB * 10) / 10,
|
|
@@ -578259,17 +578283,41 @@ function formatContextLabel(numCtx) {
|
|
|
578259
578283
|
return numCtx >= 1024 ? `${Math.floor(numCtx / 1024)}K` : String(numCtx);
|
|
578260
578284
|
}
|
|
578261
578285
|
function calculateExpandedVariantContextWindow(specs, modelSizeGB2, kvBytesPerToken, archMax) {
|
|
578262
|
-
const
|
|
578263
|
-
|
|
578264
|
-
|
|
578265
|
-
|
|
578266
|
-
|
|
578267
|
-
);
|
|
578268
|
-
const
|
|
578286
|
+
const ramBudget = specs.availableRamGB > 0 ? specs.availableRamGB : specs.totalRamGB;
|
|
578287
|
+
const vramBudget = specs.availableVramGB > 0 ? specs.availableVramGB : specs.gpuVramGB;
|
|
578288
|
+
const totalAvail = Math.max(vramBudget, ramBudget);
|
|
578289
|
+
const remaining = Math.max(0, totalAvail - modelSizeGB2);
|
|
578290
|
+
const usableGB = remaining * 0.85;
|
|
578291
|
+
const memoryFit = calculateMemoryBoundedNumCtx(specs, modelSizeGB2, kvBytesPerToken, archMax);
|
|
578292
|
+
const archCtxValue = archMax && archMax > 0 ? Math.max(2048, Math.floor(archMax / 1024) * 1024) : null;
|
|
578293
|
+
const archCtx = archCtxValue ?? Number.POSITIVE_INFINITY;
|
|
578269
578294
|
const floor = Math.min(EXPANDED_VARIANT_MIN_NUM_CTX, archCtx);
|
|
578270
578295
|
const fits = Math.min(memoryFit, archCtx);
|
|
578271
578296
|
const numCtx = Math.max(floor, fits);
|
|
578272
|
-
|
|
578297
|
+
let limitedBy;
|
|
578298
|
+
if (numCtx === floor && fits < floor) limitedBy = "floor";
|
|
578299
|
+
else if (archCtxValue !== null && numCtx === archCtxValue) limitedBy = "arch";
|
|
578300
|
+
else limitedBy = "memory";
|
|
578301
|
+
const effectiveKvBpt = kvBytesPerToken && kvBytesPerToken > 0 ? kvBytesPerToken : (modelSizeGB2 <= 5 ? 64 : modelSizeGB2 <= 12 ? 160 : modelSizeGB2 <= 25 ? 256 : 384) * 1024;
|
|
578302
|
+
return {
|
|
578303
|
+
numCtx,
|
|
578304
|
+
label: formatContextLabel(numCtx),
|
|
578305
|
+
math: {
|
|
578306
|
+
numCtx,
|
|
578307
|
+
label: formatContextLabel(numCtx),
|
|
578308
|
+
modelSizeGB: modelSizeGB2,
|
|
578309
|
+
kvBytesPerToken: effectiveKvBpt,
|
|
578310
|
+
kvSource: kvBytesPerToken && kvBytesPerToken > 0 ? "model_info" : "fallback",
|
|
578311
|
+
archMax: archMax && archMax > 0 ? archMax : null,
|
|
578312
|
+
ramBudgetGB: ramBudget,
|
|
578313
|
+
vramBudgetGB: vramBudget,
|
|
578314
|
+
usableGB,
|
|
578315
|
+
memoryFit,
|
|
578316
|
+
archCtx: archCtxValue,
|
|
578317
|
+
floor,
|
|
578318
|
+
limitedBy
|
|
578319
|
+
}
|
|
578320
|
+
};
|
|
578273
578321
|
}
|
|
578274
578322
|
function ask(rl, question) {
|
|
578275
578323
|
return new Promise((resolve52) => {
|
|
@@ -580064,7 +580112,7 @@ async function queryModelKVInfo(backendUrl2, modelName) {
|
|
|
580064
580112
|
const arch3 = info["general.architecture"];
|
|
580065
580113
|
if (!arch3) return null;
|
|
580066
580114
|
const nLayersRaw = info[`${arch3}.block_count`];
|
|
580067
|
-
const nKVHeadsRaw = info[`${arch3}.attention.head_count_kv`]
|
|
580115
|
+
const nKVHeadsRaw = info[`${arch3}.attention.head_count_kv`];
|
|
580068
580116
|
const keyDimRaw = info[`${arch3}.attention.key_length`];
|
|
580069
580117
|
const valDimRaw = info[`${arch3}.attention.value_length`] ?? keyDimRaw;
|
|
580070
580118
|
const archMax = info[`${arch3}.context_length`];
|
|
@@ -580072,7 +580120,7 @@ async function queryModelKVInfo(backendUrl2, modelName) {
|
|
|
580072
580120
|
const keyDim = keyDimRaw ?? 128;
|
|
580073
580121
|
const valDim = valDimRaw ?? 128;
|
|
580074
580122
|
const nLayers = nLayersRaw ?? defaultLayersForArch(arch3);
|
|
580075
|
-
const nKVHeads = nKVHeadsRaw ??
|
|
580123
|
+
const nKVHeads = nKVHeadsRaw ?? 8;
|
|
580076
580124
|
if (!nLayers) return { archMax };
|
|
580077
580125
|
const kvBytesPerToken = nLayers * nKVHeads * (keyDim + valDim) * 2;
|
|
580078
580126
|
return { kvBytesPerToken, archMax };
|
|
@@ -580204,6 +580252,21 @@ async function createExpandedVariantAsync(baseModel, specs, sizeGB, kvBytesPerTo
|
|
|
580204
580252
|
archMax
|
|
580205
580253
|
);
|
|
580206
580254
|
}
|
|
580255
|
+
function formatExpandedContextDiagnostic(specs, math) {
|
|
580256
|
+
const fmtGB = (n2) => `${n2.toFixed(1)}GB`;
|
|
580257
|
+
const fmtKB = (n2) => `${Math.round(n2 / 1024)}KB`;
|
|
580258
|
+
const fmtK = (n2) => n2 >= 1024 ? `${Math.floor(n2 / 1024)}K` : String(n2);
|
|
580259
|
+
const memBits = [];
|
|
580260
|
+
if (specs.gpuVramGB > 0) {
|
|
580261
|
+
memBits.push(`VRAM ${fmtGB(specs.availableVramGB || specs.gpuVramGB)}/${fmtGB(specs.gpuVramGB)}`);
|
|
580262
|
+
}
|
|
580263
|
+
memBits.push(`RAM ${fmtGB(specs.availableRamGB)}/${fmtGB(specs.totalRamGB)}${specs.unifiedMemory ? " unified" : ""}`);
|
|
580264
|
+
const mem = memBits.join(", ");
|
|
580265
|
+
const kv = `KV ${fmtKB(math.kvBytesPerToken)}/tok (${math.kvSource})`;
|
|
580266
|
+
const fit2 = `fit ${fmtK(math.memoryFit)}, arch ${math.archCtx !== null ? fmtK(math.archCtx) : "n/a"}, floor ${fmtK(math.floor)}`;
|
|
580267
|
+
const limit = `→ ${fmtK(math.numCtx)} (${math.limitedBy === "floor" ? "min floor" : math.limitedBy === "arch" ? "arch-capped" : "memory-fit"})`;
|
|
580268
|
+
return `[${mem} | model ${fmtGB(math.modelSizeGB)} | ${kv} | ${fit2} ${limit}]`;
|
|
580269
|
+
}
|
|
580207
580270
|
async function ensureExpandedContext(modelName, backendUrl2) {
|
|
580208
580271
|
if (modelName.includes("cloud") || modelName.includes(":cloud")) {
|
|
580209
580272
|
return { model: modelName, created: false, contextLabel: "remote", numCtx: 0 };
|
|
@@ -580230,11 +580293,11 @@ async function ensureExpandedContext(modelName, backendUrl2) {
|
|
|
580230
580293
|
kvInfo?.kvBytesPerToken,
|
|
580231
580294
|
kvInfo?.archMax
|
|
580232
580295
|
).catch(() => ({ repaired: false, currentNumCtx: 0, baseModel: null, resolvedModel: modelName }));
|
|
580233
|
-
return { model: repair.resolvedModel, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
|
|
580296
|
+
return { model: repair.resolvedModel, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
|
|
580234
580297
|
}
|
|
580235
580298
|
const existing = await checkExpandedVariant(modelName, backendUrl2);
|
|
580236
580299
|
if (existing === null) {
|
|
580237
|
-
return { model: modelName, created: false, contextLabel: "", numCtx: 0 };
|
|
580300
|
+
return { model: modelName, created: false, contextLabel: "", numCtx: 0, specs, math: ctx3.math };
|
|
580238
580301
|
}
|
|
580239
580302
|
if (typeof existing === "string") {
|
|
580240
580303
|
const lostTools = await wrapperLacksToolsCapability(backendUrl2, existing).catch(() => false);
|
|
@@ -580249,7 +580312,7 @@ async function ensureExpandedContext(modelName, backendUrl2) {
|
|
|
580249
580312
|
kvInfo?.archMax
|
|
580250
580313
|
);
|
|
580251
580314
|
if (rebuilt) {
|
|
580252
|
-
return { model: rebuilt, created: true, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
|
|
580315
|
+
return { model: rebuilt, created: true, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
|
|
580253
580316
|
}
|
|
580254
580317
|
} catch {
|
|
580255
580318
|
}
|
|
@@ -580264,13 +580327,13 @@ async function ensureExpandedContext(modelName, backendUrl2) {
|
|
|
580264
580327
|
kvInfo?.kvBytesPerToken,
|
|
580265
580328
|
kvInfo?.archMax
|
|
580266
580329
|
).catch(() => ({ repaired: false, currentNumCtx: 0, baseModel: null, resolvedModel: existing }));
|
|
580267
|
-
return { model: repair.resolvedModel, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
|
|
580330
|
+
return { model: repair.resolvedModel, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
|
|
580268
580331
|
}
|
|
580269
580332
|
const created = await createExpandedVariantAsync(modelName, specs, sizeGB, kvInfo?.kvBytesPerToken, kvInfo?.archMax);
|
|
580270
580333
|
if (created) {
|
|
580271
|
-
return { model: created, created: true, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
|
|
580334
|
+
return { model: created, created: true, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
|
|
580272
580335
|
}
|
|
580273
|
-
return { model: modelName, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx };
|
|
580336
|
+
return { model: modelName, created: false, contextLabel: ctx3.label, numCtx: ctx3.numCtx, specs, math: ctx3.math };
|
|
580274
580337
|
}
|
|
580275
580338
|
function guessBaseFromVariant(variantName, models) {
|
|
580276
580339
|
const stripped = stripVariantTag(variantName);
|
|
@@ -598640,8 +598703,8 @@ async function showModelPicker(ctx3, local = false) {
|
|
|
598640
598703
|
}
|
|
598641
598704
|
const items = [];
|
|
598642
598705
|
const history = loadUsageHistory("model", ctx3.repoRoot);
|
|
598643
|
-
const liveModelNames = new Set(models.map((m2) => m2.name));
|
|
598644
|
-
const modelMap = new Map(models.map((m2) => [m2.name, m2]));
|
|
598706
|
+
const liveModelNames = new Set(models.map((m2) => stripLatest(m2.name)));
|
|
598707
|
+
const modelMap = new Map(models.map((m2) => [stripLatest(m2.name), m2]));
|
|
598645
598708
|
if (history.length > 0) {
|
|
598646
598709
|
items.push({
|
|
598647
598710
|
key: "__header_recent__",
|
|
@@ -598650,8 +598713,9 @@ async function showModelPicker(ctx3, local = false) {
|
|
|
598650
598713
|
});
|
|
598651
598714
|
for (const h of history.slice(0, 8)) {
|
|
598652
598715
|
const uses = h.localUses > 0 ? `${h.useCount} uses (${h.localUses} local)` : `${h.useCount} uses`;
|
|
598653
|
-
const
|
|
598654
|
-
const
|
|
598716
|
+
const hKey = stripLatest(h.value);
|
|
598717
|
+
const available = liveModelNames.has(hKey) ? "" : c3.yellow(" [offline]");
|
|
598718
|
+
const meta = modelMap.get(hKey);
|
|
598655
598719
|
const ctx4 = meta?.contextLength ? ` ${formatContextLength(meta.contextLength)}` : "";
|
|
598656
598720
|
const capStr = meta?.caps ? ` ${formatCaps(meta.caps)}` : "";
|
|
598657
598721
|
items.push({
|
|
@@ -598666,9 +598730,9 @@ async function showModelPicker(ctx3, local = false) {
|
|
|
598666
598730
|
detail: ""
|
|
598667
598731
|
});
|
|
598668
598732
|
}
|
|
598669
|
-
const historyKeys = new Set(history.map((h) => h.value));
|
|
598733
|
+
const historyKeys = new Set(history.map((h) => stripLatest(h.value)));
|
|
598670
598734
|
for (const m2 of models) {
|
|
598671
|
-
if (history.length > 0 && historyKeys.has(m2.name)) continue;
|
|
598735
|
+
if (history.length > 0 && historyKeys.has(stripLatest(m2.name))) continue;
|
|
598672
598736
|
const ctx4 = m2.contextLength ? formatContextLength(m2.contextLength) : "";
|
|
598673
598737
|
const capStr = m2.caps ? formatCaps(m2.caps) : "";
|
|
598674
598738
|
items.push({
|
|
@@ -598679,7 +598743,10 @@ async function showModelPicker(ctx3, local = false) {
|
|
|
598679
598743
|
}
|
|
598680
598744
|
const result = await tuiSelect({
|
|
598681
598745
|
items,
|
|
598682
|
-
activeKey
|
|
598746
|
+
// `activeKey` is the keyed currently-selected row. The picker stores
|
|
598747
|
+
// history/recent entries with tag-less keys, so normalize the active
|
|
598748
|
+
// model from config to match.
|
|
598749
|
+
activeKey: stripLatest(ctx3.config.model),
|
|
598683
598750
|
title: "Select Model",
|
|
598684
598751
|
rl: ctx3.rl,
|
|
598685
598752
|
// Skip header rows
|
|
@@ -598690,7 +598757,7 @@ async function showModelPicker(ctx3, local = false) {
|
|
|
598690
598757
|
renderInfo("Model selection cancelled.");
|
|
598691
598758
|
return;
|
|
598692
598759
|
}
|
|
598693
|
-
await switchModel(result.key, ctx3, local);
|
|
598760
|
+
await switchModel(stripLatest(result.key), ctx3, local);
|
|
598694
598761
|
} catch (err) {
|
|
598695
598762
|
renderError(
|
|
598696
598763
|
`Failed to fetch models: ${err instanceof Error ? err.message : String(err)}`
|
|
@@ -602241,14 +602308,15 @@ async function switchModel(query, ctx3, local = false) {
|
|
|
602241
602308
|
match.name,
|
|
602242
602309
|
ctx3.config.backendUrl
|
|
602243
602310
|
);
|
|
602311
|
+
const diag = result.specs && result.math ? "\n " + c3.dim(formatExpandedContextDiagnostic(result.specs, result.math)) : "";
|
|
602244
602312
|
if (result.created) {
|
|
602245
602313
|
renderInfo(
|
|
602246
|
-
`Created expanded context variant: ${c3.bold(result.model)} (${result.contextLabel}, ${result.numCtx} tokens)`
|
|
602314
|
+
`Created expanded context variant: ${c3.bold(result.model)} (${result.contextLabel}, ${result.numCtx} tokens)${diag}`
|
|
602247
602315
|
);
|
|
602248
602316
|
finalModel = result.model;
|
|
602249
602317
|
} else if (result.model !== match.name) {
|
|
602250
602318
|
renderInfo(
|
|
602251
|
-
`Using expanded context variant: ${c3.bold(result.model)} (${result.contextLabel})`
|
|
602319
|
+
`Using expanded context variant: ${c3.bold(result.model)} (${result.contextLabel})${diag}`
|
|
602252
602320
|
);
|
|
602253
602321
|
finalModel = result.model;
|
|
602254
602322
|
}
|
|
@@ -653796,13 +653864,14 @@ This is an independent background session started from /background.`
|
|
|
653796
653864
|
currentConfig.model,
|
|
653797
653865
|
currentConfig.backendUrl
|
|
653798
653866
|
);
|
|
653867
|
+
const diag = expandResult.specs && expandResult.math ? "\n " + c3.dim(formatExpandedContextDiagnostic(expandResult.specs, expandResult.math)) : "";
|
|
653799
653868
|
if (expandResult.created) {
|
|
653800
653869
|
config = { ...config, model: expandResult.model };
|
|
653801
653870
|
currentConfig = { ...currentConfig, model: expandResult.model };
|
|
653802
653871
|
statusBar.setModelName(expandResult.model);
|
|
653803
653872
|
writeContent(
|
|
653804
653873
|
() => renderInfo(
|
|
653805
|
-
`Created expanded context model: ${expandResult.model} (${expandResult.contextLabel}, ${expandResult.numCtx} tokens)`
|
|
653874
|
+
`Created expanded context model: ${expandResult.model} (${expandResult.contextLabel}, ${expandResult.numCtx} tokens)${diag}`
|
|
653806
653875
|
)
|
|
653807
653876
|
);
|
|
653808
653877
|
} else if (expandResult.model !== currentConfig.model) {
|
|
@@ -653811,7 +653880,7 @@ This is an independent background session started from /background.`
|
|
|
653811
653880
|
statusBar.setModelName(expandResult.model);
|
|
653812
653881
|
writeContent(
|
|
653813
653882
|
() => renderInfo(
|
|
653814
|
-
`Using expanded context model: ${expandResult.model} (${expandResult.contextLabel})`
|
|
653883
|
+
`Using expanded context model: ${expandResult.model} (${expandResult.contextLabel})${diag}`
|
|
653815
653884
|
)
|
|
653816
653885
|
);
|
|
653817
653886
|
}
|