codemaxxing 0.3.1 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -16
- package/dist/config.d.ts +3 -0
- package/dist/config.js +13 -0
- package/dist/index.js +457 -8
- package/dist/utils/hardware.d.ts +17 -0
- package/dist/utils/hardware.js +120 -0
- package/dist/utils/models.d.ts +24 -0
- package/dist/utils/models.js +177 -0
- package/dist/utils/ollama.d.ts +42 -0
- package/dist/utils/ollama.js +213 -0
- package/package.json +1 -1
- package/src/config.ts +15 -0
- package/src/index.tsx +589 -6
- package/src/utils/hardware.ts +131 -0
- package/src/utils/models.ts +217 -0
- package/src/utils/ollama.ts +232 -0
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import os from "os";
|
|
2
|
+
import { execSync } from "child_process";
|
|
3
|
+
function getOS() {
|
|
4
|
+
switch (process.platform) {
|
|
5
|
+
case "darwin": return "macos";
|
|
6
|
+
case "win32": return "windows";
|
|
7
|
+
default: return "linux";
|
|
8
|
+
}
|
|
9
|
+
}
|
|
10
|
+
function getCPU() {
|
|
11
|
+
const cpus = os.cpus();
|
|
12
|
+
return {
|
|
13
|
+
name: cpus[0]?.model?.trim() ?? "Unknown CPU",
|
|
14
|
+
cores: cpus.length,
|
|
15
|
+
speed: cpus[0]?.speed ?? 0, // MHz
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
function getGPU(platform) {
|
|
19
|
+
try {
|
|
20
|
+
if (platform === "macos") {
|
|
21
|
+
const raw = execSync("system_profiler SPDisplaysDataType -json", {
|
|
22
|
+
encoding: "utf-8",
|
|
23
|
+
timeout: 5000,
|
|
24
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
25
|
+
});
|
|
26
|
+
const data = JSON.parse(raw);
|
|
27
|
+
const displays = data?.SPDisplaysDataType;
|
|
28
|
+
if (Array.isArray(displays) && displays.length > 0) {
|
|
29
|
+
const gpu = displays[0];
|
|
30
|
+
const name = gpu.sppci_model ?? gpu._name ?? "Unknown GPU";
|
|
31
|
+
// On Apple Silicon, VRAM is shared (unified memory) — report total RAM
|
|
32
|
+
const vramStr = gpu["spdisplays_vram"] ?? gpu["spdisplays_vram_shared"] ?? "";
|
|
33
|
+
let vram = 0;
|
|
34
|
+
if (vramStr) {
|
|
35
|
+
const match = vramStr.match(/(\d+)\s*(GB|MB)/i);
|
|
36
|
+
if (match) {
|
|
37
|
+
vram = parseInt(match[1]) * (match[2].toUpperCase() === "GB" ? 1024 * 1024 * 1024 : 1024 * 1024);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
// Apple Silicon unified memory — use total RAM as VRAM
|
|
41
|
+
if (vram === 0 && name.toLowerCase().includes("apple")) {
|
|
42
|
+
vram = os.totalmem();
|
|
43
|
+
}
|
|
44
|
+
return { name, vram };
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
if (platform === "linux") {
|
|
48
|
+
// Try NVIDIA first
|
|
49
|
+
try {
|
|
50
|
+
const raw = execSync("nvidia-smi --query-gpu=name,memory.total --format=csv,noheader", {
|
|
51
|
+
encoding: "utf-8",
|
|
52
|
+
timeout: 5000,
|
|
53
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
54
|
+
});
|
|
55
|
+
const line = raw.trim().split("\n")[0];
|
|
56
|
+
if (line) {
|
|
57
|
+
const parts = line.split(",").map(s => s.trim());
|
|
58
|
+
const name = parts[0] ?? "NVIDIA GPU";
|
|
59
|
+
const memMatch = (parts[1] ?? "").match(/(\d+)/);
|
|
60
|
+
const vram = memMatch ? parseInt(memMatch[1]) * 1024 * 1024 : 0; // MiB to bytes
|
|
61
|
+
return { name, vram };
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
catch {
|
|
65
|
+
// No NVIDIA, try lspci
|
|
66
|
+
try {
|
|
67
|
+
const raw = execSync("lspci | grep -i vga", {
|
|
68
|
+
encoding: "utf-8",
|
|
69
|
+
timeout: 5000,
|
|
70
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
71
|
+
});
|
|
72
|
+
const line = raw.trim().split("\n")[0];
|
|
73
|
+
if (line) {
|
|
74
|
+
const name = line.split(":").slice(2).join(":").trim() || "Unknown GPU";
|
|
75
|
+
return { name, vram: 0 };
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
catch { /* no lspci */ }
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
if (platform === "windows") {
|
|
82
|
+
try {
|
|
83
|
+
const raw = execSync("wmic path win32_VideoController get Name,AdapterRAM /format:csv", {
|
|
84
|
+
encoding: "utf-8",
|
|
85
|
+
timeout: 5000,
|
|
86
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
87
|
+
});
|
|
88
|
+
const lines = raw.trim().split("\n").filter(l => l.trim() && !l.startsWith("Node"));
|
|
89
|
+
if (lines.length > 0) {
|
|
90
|
+
const parts = lines[0].split(",");
|
|
91
|
+
const adapterRAM = parseInt(parts[1] ?? "0");
|
|
92
|
+
const name = parts[2]?.trim() ?? "Unknown GPU";
|
|
93
|
+
return { name, vram: isNaN(adapterRAM) ? 0 : adapterRAM };
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
catch { /* no wmic */ }
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
catch {
|
|
100
|
+
// GPU detection failed
|
|
101
|
+
}
|
|
102
|
+
return null;
|
|
103
|
+
}
|
|
104
|
+
export function detectHardware() {
|
|
105
|
+
const platform = getOS();
|
|
106
|
+
const cpu = getCPU();
|
|
107
|
+
const ram = os.totalmem();
|
|
108
|
+
const gpu = getGPU(platform);
|
|
109
|
+
// Detect Apple Silicon
|
|
110
|
+
const appleSilicon = platform === "macos" && /apple\s+m/i.test(cpu.name);
|
|
111
|
+
return { cpu, ram, gpu, os: platform, appleSilicon };
|
|
112
|
+
}
|
|
113
|
+
/** Format bytes to human-readable string */
|
|
114
|
+
export function formatBytes(bytes) {
|
|
115
|
+
if (bytes >= 1024 * 1024 * 1024)
|
|
116
|
+
return `${Math.round(bytes / (1024 * 1024 * 1024))} GB`;
|
|
117
|
+
if (bytes >= 1024 * 1024)
|
|
118
|
+
return `${Math.round(bytes / (1024 * 1024))} MB`;
|
|
119
|
+
return `${Math.round(bytes / 1024)} KB`;
|
|
120
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { HardwareInfo } from "./hardware.js";
|
|
2
|
+
export interface RecommendedModel {
|
|
3
|
+
name: string;
|
|
4
|
+
ollamaId: string;
|
|
5
|
+
size: number;
|
|
6
|
+
ramRequired: number;
|
|
7
|
+
vramOptimal: number;
|
|
8
|
+
description: string;
|
|
9
|
+
speed: string;
|
|
10
|
+
quality: "good" | "great" | "best";
|
|
11
|
+
}
|
|
12
|
+
export type ModelFit = "perfect" | "good" | "tight" | "skip";
|
|
13
|
+
export interface ScoredModel extends RecommendedModel {
|
|
14
|
+
fit: ModelFit;
|
|
15
|
+
}
|
|
16
|
+
export declare function getRecommendations(hardware: HardwareInfo): ScoredModel[];
|
|
17
|
+
export declare function getFitIcon(fit: ModelFit): string;
|
|
18
|
+
/** Check if llmfit binary is available */
|
|
19
|
+
export declare function isLlmfitAvailable(): boolean;
|
|
20
|
+
/** Get recommendations using llmfit if available, otherwise fall back to hardcoded list */
|
|
21
|
+
export declare function getRecommendationsWithLlmfit(hardware: HardwareInfo): {
|
|
22
|
+
models: ScoredModel[];
|
|
23
|
+
usedLlmfit: boolean;
|
|
24
|
+
};
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
import { execSync } from "child_process";
|
|
2
|
+
const MODELS = [
|
|
3
|
+
{
|
|
4
|
+
name: "Qwen 2.5 Coder 3B",
|
|
5
|
+
ollamaId: "qwen2.5-coder:3b",
|
|
6
|
+
size: 2,
|
|
7
|
+
ramRequired: 8,
|
|
8
|
+
vramOptimal: 4,
|
|
9
|
+
description: "Lightweight, fast coding model",
|
|
10
|
+
speed: "~60 tok/s on M1",
|
|
11
|
+
quality: "good",
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
name: "Qwen 2.5 Coder 7B",
|
|
15
|
+
ollamaId: "qwen2.5-coder:7b",
|
|
16
|
+
size: 5,
|
|
17
|
+
ramRequired: 16,
|
|
18
|
+
vramOptimal: 8,
|
|
19
|
+
description: "Sweet spot for most machines",
|
|
20
|
+
speed: "~45 tok/s on M1",
|
|
21
|
+
quality: "great",
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
name: "Qwen 2.5 Coder 14B",
|
|
25
|
+
ollamaId: "qwen2.5-coder:14b",
|
|
26
|
+
size: 9,
|
|
27
|
+
ramRequired: 32,
|
|
28
|
+
vramOptimal: 16,
|
|
29
|
+
description: "High quality coding",
|
|
30
|
+
speed: "~25 tok/s on M1 Pro",
|
|
31
|
+
quality: "best",
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
name: "Qwen 2.5 Coder 32B",
|
|
35
|
+
ollamaId: "qwen2.5-coder:32b",
|
|
36
|
+
size: 20,
|
|
37
|
+
ramRequired: 48,
|
|
38
|
+
vramOptimal: 32,
|
|
39
|
+
description: "Premium quality, needs lots of RAM",
|
|
40
|
+
speed: "~12 tok/s on M1 Max",
|
|
41
|
+
quality: "best",
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
name: "DeepSeek Coder V2 16B",
|
|
45
|
+
ollamaId: "deepseek-coder-v2:16b",
|
|
46
|
+
size: 9,
|
|
47
|
+
ramRequired: 32,
|
|
48
|
+
vramOptimal: 16,
|
|
49
|
+
description: "Strong alternative for coding",
|
|
50
|
+
speed: "~30 tok/s on M1 Pro",
|
|
51
|
+
quality: "great",
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
name: "CodeLlama 7B",
|
|
55
|
+
ollamaId: "codellama:7b",
|
|
56
|
+
size: 4,
|
|
57
|
+
ramRequired: 16,
|
|
58
|
+
vramOptimal: 8,
|
|
59
|
+
description: "Meta's coding model",
|
|
60
|
+
speed: "~40 tok/s on M1",
|
|
61
|
+
quality: "good",
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
name: "StarCoder2 7B",
|
|
65
|
+
ollamaId: "starcoder2:7b",
|
|
66
|
+
size: 4,
|
|
67
|
+
ramRequired: 16,
|
|
68
|
+
vramOptimal: 8,
|
|
69
|
+
description: "Good for code completion",
|
|
70
|
+
speed: "~40 tok/s on M1",
|
|
71
|
+
quality: "good",
|
|
72
|
+
},
|
|
73
|
+
];
|
|
74
|
+
function scoreModel(model, ramGB, vramGB) {
|
|
75
|
+
if (ramGB < model.ramRequired)
|
|
76
|
+
return "skip";
|
|
77
|
+
const ramHeadroom = ramGB - model.ramRequired;
|
|
78
|
+
const hasGoodVRAM = vramGB >= model.vramOptimal;
|
|
79
|
+
if (hasGoodVRAM && ramHeadroom >= 4)
|
|
80
|
+
return "perfect";
|
|
81
|
+
if (hasGoodVRAM || ramHeadroom >= 8)
|
|
82
|
+
return "good";
|
|
83
|
+
if (ramHeadroom >= 0)
|
|
84
|
+
return "tight";
|
|
85
|
+
return "skip";
|
|
86
|
+
}
|
|
87
|
+
const qualityOrder = { best: 3, great: 2, good: 1 };
|
|
88
|
+
const fitOrder = { perfect: 4, good: 3, tight: 2, skip: 1 };
|
|
89
|
+
export function getRecommendations(hardware) {
|
|
90
|
+
const ramGB = hardware.ram / (1024 * 1024 * 1024);
|
|
91
|
+
const vramGB = hardware.gpu?.vram ? hardware.gpu.vram / (1024 * 1024 * 1024) : 0;
|
|
92
|
+
// Apple Silicon uses unified memory — VRAM = RAM
|
|
93
|
+
const effectiveVRAM = hardware.appleSilicon ? ramGB : vramGB;
|
|
94
|
+
const scored = MODELS.map((m) => ({
|
|
95
|
+
...m,
|
|
96
|
+
fit: scoreModel(m, ramGB, effectiveVRAM),
|
|
97
|
+
}));
|
|
98
|
+
// Sort: perfect first, then by quality descending
|
|
99
|
+
scored.sort((a, b) => {
|
|
100
|
+
const fitDiff = (fitOrder[b.fit] ?? 0) - (fitOrder[a.fit] ?? 0);
|
|
101
|
+
if (fitDiff !== 0)
|
|
102
|
+
return fitDiff;
|
|
103
|
+
return (qualityOrder[b.quality] ?? 0) - (qualityOrder[a.quality] ?? 0);
|
|
104
|
+
});
|
|
105
|
+
return scored;
|
|
106
|
+
}
|
|
107
|
+
export function getFitIcon(fit) {
|
|
108
|
+
switch (fit) {
|
|
109
|
+
case "perfect": return "\u2B50"; // ⭐
|
|
110
|
+
case "good": return "\u2705"; // ✅
|
|
111
|
+
case "tight": return "\u26A0\uFE0F"; // ⚠️
|
|
112
|
+
case "skip": return "\u274C"; // ❌
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
/** Check if llmfit binary is available */
|
|
116
|
+
export function isLlmfitAvailable() {
|
|
117
|
+
try {
|
|
118
|
+
const cmd = process.platform === "win32" ? "where llmfit" : "which llmfit";
|
|
119
|
+
execSync(cmd, { stdio: ["pipe", "pipe", "pipe"], timeout: 3000 });
|
|
120
|
+
return true;
|
|
121
|
+
}
|
|
122
|
+
catch {
|
|
123
|
+
return false;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
function mapLlmfitFit(fit) {
|
|
127
|
+
switch (fit) {
|
|
128
|
+
case "perfect": return "perfect";
|
|
129
|
+
case "good": return "good";
|
|
130
|
+
case "marginal": return "tight";
|
|
131
|
+
default: return "skip";
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
function mapLlmfitQuality(params_b) {
|
|
135
|
+
if (params_b >= 14)
|
|
136
|
+
return "best";
|
|
137
|
+
if (params_b >= 7)
|
|
138
|
+
return "great";
|
|
139
|
+
return "good";
|
|
140
|
+
}
|
|
141
|
+
/** Get recommendations using llmfit if available, otherwise fall back to hardcoded list */
|
|
142
|
+
export function getRecommendationsWithLlmfit(hardware) {
|
|
143
|
+
if (!isLlmfitAvailable()) {
|
|
144
|
+
return { models: getRecommendations(hardware), usedLlmfit: false };
|
|
145
|
+
}
|
|
146
|
+
try {
|
|
147
|
+
const raw = execSync("llmfit recommend --use-case coding --format json --limit 10", {
|
|
148
|
+
encoding: "utf-8",
|
|
149
|
+
timeout: 15000,
|
|
150
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
151
|
+
});
|
|
152
|
+
const llmfitModels = JSON.parse(raw.trim());
|
|
153
|
+
if (!Array.isArray(llmfitModels) || llmfitModels.length === 0) {
|
|
154
|
+
return { models: getRecommendations(hardware), usedLlmfit: false };
|
|
155
|
+
}
|
|
156
|
+
const scored = llmfitModels
|
|
157
|
+
.filter((m) => m.provider === "ollama")
|
|
158
|
+
.map((m) => ({
|
|
159
|
+
name: m.name.split(":")[0]?.replace(/-/g, " ").replace(/\b\w/g, c => c.toUpperCase()) + ` ${m.params_b}B`,
|
|
160
|
+
ollamaId: m.name,
|
|
161
|
+
size: Math.ceil(m.vram_gb),
|
|
162
|
+
ramRequired: Math.ceil(m.ram_gb),
|
|
163
|
+
vramOptimal: Math.ceil(m.vram_gb),
|
|
164
|
+
description: `${m.quant} · ~${m.estimated_tps.toFixed(0)} tok/s`,
|
|
165
|
+
speed: `~${m.estimated_tps.toFixed(0)} tok/s`,
|
|
166
|
+
quality: mapLlmfitQuality(m.params_b),
|
|
167
|
+
fit: mapLlmfitFit(m.fit),
|
|
168
|
+
}));
|
|
169
|
+
if (scored.length === 0) {
|
|
170
|
+
return { models: getRecommendations(hardware), usedLlmfit: false };
|
|
171
|
+
}
|
|
172
|
+
return { models: scored, usedLlmfit: true };
|
|
173
|
+
}
|
|
174
|
+
catch {
|
|
175
|
+
return { models: getRecommendations(hardware), usedLlmfit: false };
|
|
176
|
+
}
|
|
177
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/** Check if ollama binary exists on PATH */
|
|
2
|
+
export declare function isOllamaInstalled(): boolean;
|
|
3
|
+
/** Check if ollama server is responding */
|
|
4
|
+
export declare function isOllamaRunning(): Promise<boolean>;
|
|
5
|
+
/** Get the install command for the user's OS */
|
|
6
|
+
export declare function getOllamaInstallCommand(os: "macos" | "linux" | "windows"): string;
|
|
7
|
+
/** Start ollama serve in background */
|
|
8
|
+
export declare function startOllama(): void;
|
|
9
|
+
export interface PullProgress {
|
|
10
|
+
status: string;
|
|
11
|
+
total?: number;
|
|
12
|
+
completed?: number;
|
|
13
|
+
percent: number;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Pull a model from Ollama registry.
|
|
17
|
+
* Calls onProgress with download updates.
|
|
18
|
+
* Returns a promise that resolves when complete.
|
|
19
|
+
*/
|
|
20
|
+
export declare function pullModel(modelId: string, onProgress?: (progress: PullProgress) => void): Promise<void>;
|
|
21
|
+
export interface OllamaModelInfo {
|
|
22
|
+
name: string;
|
|
23
|
+
size: number;
|
|
24
|
+
modified_at: string;
|
|
25
|
+
digest: string;
|
|
26
|
+
}
|
|
27
|
+
/** List models installed in Ollama with detailed info */
|
|
28
|
+
export declare function listInstalledModelsDetailed(): Promise<OllamaModelInfo[]>;
|
|
29
|
+
/** List models installed in Ollama */
|
|
30
|
+
export declare function listInstalledModels(): Promise<string[]>;
|
|
31
|
+
/** Stop all loaded models (frees VRAM) and kill the Ollama server process */
|
|
32
|
+
export declare function stopOllama(): Promise<{
|
|
33
|
+
ok: boolean;
|
|
34
|
+
message: string;
|
|
35
|
+
}>;
|
|
36
|
+
/** Delete a model from disk */
|
|
37
|
+
export declare function deleteModel(modelId: string): {
|
|
38
|
+
ok: boolean;
|
|
39
|
+
message: string;
|
|
40
|
+
};
|
|
41
|
+
/** Get GPU memory usage info (best-effort) */
|
|
42
|
+
export declare function getGPUMemoryUsage(): string | null;
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
import { execSync, spawn } from "child_process";
|
|
2
|
+
/** Check if ollama binary exists on PATH */
|
|
3
|
+
export function isOllamaInstalled() {
|
|
4
|
+
try {
|
|
5
|
+
const cmd = process.platform === "win32" ? "where ollama" : "which ollama";
|
|
6
|
+
execSync(cmd, { stdio: ["pipe", "pipe", "pipe"], timeout: 3000 });
|
|
7
|
+
return true;
|
|
8
|
+
}
|
|
9
|
+
catch {
|
|
10
|
+
return false;
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
/** Check if ollama server is responding */
|
|
14
|
+
export async function isOllamaRunning() {
|
|
15
|
+
try {
|
|
16
|
+
const controller = new AbortController();
|
|
17
|
+
const timeout = setTimeout(() => controller.abort(), 2000);
|
|
18
|
+
const res = await fetch("http://localhost:11434/api/tags", { signal: controller.signal });
|
|
19
|
+
clearTimeout(timeout);
|
|
20
|
+
return res.ok;
|
|
21
|
+
}
|
|
22
|
+
catch {
|
|
23
|
+
return false;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
/** Get the install command for the user's OS */
|
|
27
|
+
export function getOllamaInstallCommand(os) {
|
|
28
|
+
switch (os) {
|
|
29
|
+
case "macos": return "brew install ollama";
|
|
30
|
+
case "linux": return "curl -fsSL https://ollama.com/install.sh | sh";
|
|
31
|
+
case "windows": return "winget install Ollama.Ollama";
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
/** Start ollama serve in background */
|
|
35
|
+
export function startOllama() {
|
|
36
|
+
const child = spawn("ollama", ["serve"], {
|
|
37
|
+
detached: true,
|
|
38
|
+
stdio: "ignore",
|
|
39
|
+
});
|
|
40
|
+
child.unref();
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Pull a model from Ollama registry.
|
|
44
|
+
* Calls onProgress with download updates.
|
|
45
|
+
* Returns a promise that resolves when complete.
|
|
46
|
+
*/
|
|
47
|
+
export function pullModel(modelId, onProgress) {
|
|
48
|
+
return new Promise((resolve, reject) => {
|
|
49
|
+
const child = spawn("ollama", ["pull", modelId], {
|
|
50
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
51
|
+
});
|
|
52
|
+
let lastOutput = "";
|
|
53
|
+
const parseLine = (data) => {
|
|
54
|
+
lastOutput = data;
|
|
55
|
+
// Ollama pull output looks like:
|
|
56
|
+
// pulling manifest
|
|
57
|
+
// pulling abc123... 58% ▕██████████░░░░░░░░░░▏ 2.9 GB/5.0 GB
|
|
58
|
+
// verifying sha256 digest
|
|
59
|
+
// writing manifest
|
|
60
|
+
// success
|
|
61
|
+
// Try to parse percentage
|
|
62
|
+
const pctMatch = data.match(/(\d+)%/);
|
|
63
|
+
const sizeMatch = data.match(/([\d.]+)\s*GB\s*\/\s*([\d.]+)\s*GB/);
|
|
64
|
+
if (pctMatch) {
|
|
65
|
+
const percent = parseInt(pctMatch[1]);
|
|
66
|
+
let completed;
|
|
67
|
+
let total;
|
|
68
|
+
if (sizeMatch) {
|
|
69
|
+
completed = parseFloat(sizeMatch[1]) * 1024 * 1024 * 1024;
|
|
70
|
+
total = parseFloat(sizeMatch[2]) * 1024 * 1024 * 1024;
|
|
71
|
+
}
|
|
72
|
+
onProgress?.({ status: "downloading", total, completed, percent });
|
|
73
|
+
}
|
|
74
|
+
else if (data.includes("pulling manifest")) {
|
|
75
|
+
onProgress?.({ status: "pulling manifest", percent: 0 });
|
|
76
|
+
}
|
|
77
|
+
else if (data.includes("verifying")) {
|
|
78
|
+
onProgress?.({ status: "verifying", percent: 100 });
|
|
79
|
+
}
|
|
80
|
+
else if (data.includes("writing manifest")) {
|
|
81
|
+
onProgress?.({ status: "writing manifest", percent: 100 });
|
|
82
|
+
}
|
|
83
|
+
else if (data.includes("success")) {
|
|
84
|
+
onProgress?.({ status: "success", percent: 100 });
|
|
85
|
+
}
|
|
86
|
+
};
|
|
87
|
+
child.stdout?.on("data", (data) => {
|
|
88
|
+
parseLine(data.toString().trim());
|
|
89
|
+
});
|
|
90
|
+
child.stderr?.on("data", (data) => {
|
|
91
|
+
// Ollama writes progress to stderr
|
|
92
|
+
parseLine(data.toString().trim());
|
|
93
|
+
});
|
|
94
|
+
child.on("close", (code) => {
|
|
95
|
+
if (code === 0) {
|
|
96
|
+
resolve();
|
|
97
|
+
}
|
|
98
|
+
else {
|
|
99
|
+
reject(new Error(`ollama pull failed (exit ${code}): ${lastOutput}`));
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
child.on("error", (err) => {
|
|
103
|
+
reject(new Error(`Failed to run ollama pull: ${err.message}`));
|
|
104
|
+
});
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
/** List models installed in Ollama with detailed info */
|
|
108
|
+
export async function listInstalledModelsDetailed() {
|
|
109
|
+
try {
|
|
110
|
+
const controller = new AbortController();
|
|
111
|
+
const timeout = setTimeout(() => controller.abort(), 3000);
|
|
112
|
+
const res = await fetch("http://localhost:11434/api/tags", { signal: controller.signal });
|
|
113
|
+
clearTimeout(timeout);
|
|
114
|
+
if (res.ok) {
|
|
115
|
+
const data = (await res.json());
|
|
116
|
+
return (data.models ?? []).map((m) => ({
|
|
117
|
+
name: m.name,
|
|
118
|
+
size: m.size,
|
|
119
|
+
modified_at: m.modified_at,
|
|
120
|
+
digest: m.digest,
|
|
121
|
+
}));
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
catch { /* not running */ }
|
|
125
|
+
return [];
|
|
126
|
+
}
|
|
127
|
+
/** List models installed in Ollama */
|
|
128
|
+
export async function listInstalledModels() {
|
|
129
|
+
const models = await listInstalledModelsDetailed();
|
|
130
|
+
return models.map((m) => m.name);
|
|
131
|
+
}
|
|
132
|
+
/** Stop all loaded models (frees VRAM) and kill the Ollama server process */
|
|
133
|
+
export async function stopOllama() {
|
|
134
|
+
try {
|
|
135
|
+
// First unload all models from memory
|
|
136
|
+
try {
|
|
137
|
+
execSync("ollama stop", { stdio: ["pipe", "pipe", "pipe"], timeout: 5000 });
|
|
138
|
+
}
|
|
139
|
+
catch { /* may fail if no models loaded */ }
|
|
140
|
+
// Kill the server process
|
|
141
|
+
if (process.platform === "win32") {
|
|
142
|
+
execSync("taskkill /f /im ollama.exe", { stdio: ["pipe", "pipe", "pipe"], timeout: 5000 });
|
|
143
|
+
}
|
|
144
|
+
else if (process.platform === "darwin") {
|
|
145
|
+
// Try launchctl first (Ollama app), then pkill
|
|
146
|
+
try {
|
|
147
|
+
execSync("launchctl stop com.ollama.ollama", { stdio: ["pipe", "pipe", "pipe"], timeout: 3000 });
|
|
148
|
+
}
|
|
149
|
+
catch {
|
|
150
|
+
try {
|
|
151
|
+
execSync("pkill ollama", { stdio: ["pipe", "pipe", "pipe"], timeout: 3000 });
|
|
152
|
+
}
|
|
153
|
+
catch { /* already stopped */ }
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
else {
|
|
157
|
+
// Linux
|
|
158
|
+
try {
|
|
159
|
+
execSync("systemctl stop ollama", { stdio: ["pipe", "pipe", "pipe"], timeout: 3000 });
|
|
160
|
+
}
|
|
161
|
+
catch {
|
|
162
|
+
try {
|
|
163
|
+
execSync("pkill ollama", { stdio: ["pipe", "pipe", "pipe"], timeout: 3000 });
|
|
164
|
+
}
|
|
165
|
+
catch { /* already stopped */ }
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
// Verify it stopped
|
|
169
|
+
await new Promise(r => setTimeout(r, 500));
|
|
170
|
+
const stillRunning = await isOllamaRunning();
|
|
171
|
+
if (stillRunning) {
|
|
172
|
+
return { ok: false, message: "Ollama is still running. Try killing it manually." };
|
|
173
|
+
}
|
|
174
|
+
return { ok: true, message: "Ollama stopped." };
|
|
175
|
+
}
|
|
176
|
+
catch (err) {
|
|
177
|
+
return { ok: false, message: `Failed to stop Ollama: ${err.message}` };
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
/** Delete a model from disk */
|
|
181
|
+
export function deleteModel(modelId) {
|
|
182
|
+
try {
|
|
183
|
+
execSync(`ollama rm ${modelId}`, { stdio: ["pipe", "pipe", "pipe"], timeout: 30000 });
|
|
184
|
+
return { ok: true, message: `Deleted ${modelId}` };
|
|
185
|
+
}
|
|
186
|
+
catch (err) {
|
|
187
|
+
return { ok: false, message: `Failed to delete ${modelId}: ${err.stderr?.toString().trim() || err.message}` };
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
/** Get GPU memory usage info (best-effort) */
|
|
191
|
+
export function getGPUMemoryUsage() {
|
|
192
|
+
try {
|
|
193
|
+
if (process.platform === "darwin") {
|
|
194
|
+
// Apple Silicon — check memory pressure
|
|
195
|
+
const raw = execSync("memory_pressure", { encoding: "utf-8", timeout: 3000, stdio: ["pipe", "pipe", "pipe"] });
|
|
196
|
+
const match = raw.match(/System-wide memory free percentage:\s*(\d+)%/);
|
|
197
|
+
if (match) {
|
|
198
|
+
return `${100 - parseInt(match[1])}% system memory in use`;
|
|
199
|
+
}
|
|
200
|
+
return null;
|
|
201
|
+
}
|
|
202
|
+
// NVIDIA GPU
|
|
203
|
+
const raw = execSync("nvidia-smi --query-gpu=memory.used,memory.total --format=csv,noheader,nounits", {
|
|
204
|
+
encoding: "utf-8", timeout: 3000, stdio: ["pipe", "pipe", "pipe"],
|
|
205
|
+
});
|
|
206
|
+
const parts = raw.trim().split(",").map(s => s.trim());
|
|
207
|
+
if (parts.length === 2) {
|
|
208
|
+
return `${parts[0]} MiB / ${parts[1]} MiB GPU memory`;
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
catch { /* no GPU info available */ }
|
|
212
|
+
return null;
|
|
213
|
+
}
|
package/package.json
CHANGED
package/src/config.ts
CHANGED
|
@@ -24,6 +24,7 @@ export interface CodemaxxingConfig {
|
|
|
24
24
|
contextCompressionThreshold?: number;
|
|
25
25
|
architectModel?: string;
|
|
26
26
|
autoLint?: boolean;
|
|
27
|
+
stopOllamaOnExit?: boolean;
|
|
27
28
|
};
|
|
28
29
|
}
|
|
29
30
|
|
|
@@ -138,6 +139,20 @@ export function loadConfig(): CodemaxxingConfig {
|
|
|
138
139
|
}
|
|
139
140
|
}
|
|
140
141
|
|
|
142
|
+
/** Save config to disk (merges with existing) */
|
|
143
|
+
export function saveConfig(updates: Partial<CodemaxxingConfig>): void {
|
|
144
|
+
const current = loadConfig();
|
|
145
|
+
const merged = {
|
|
146
|
+
...current,
|
|
147
|
+
...updates,
|
|
148
|
+
defaults: { ...current.defaults, ...(updates.defaults ?? {}) },
|
|
149
|
+
};
|
|
150
|
+
if (!existsSync(CONFIG_DIR)) {
|
|
151
|
+
mkdirSync(CONFIG_DIR, { recursive: true });
|
|
152
|
+
}
|
|
153
|
+
writeFileSync(CONFIG_FILE, JSON.stringify(merged, null, 2));
|
|
154
|
+
}
|
|
155
|
+
|
|
141
156
|
/**
|
|
142
157
|
* Apply CLI overrides to a provider config
|
|
143
158
|
*/
|