codemaxxing 0.3.1 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,131 @@
1
+ import os from "os";
2
+ import { execSync } from "child_process";
3
+
4
+ export interface HardwareInfo {
5
+ cpu: { name: string; cores: number; speed: number };
6
+ ram: number; // bytes
7
+ gpu: { name: string; vram: number } | null; // vram in bytes, null if no GPU
8
+ os: "macos" | "linux" | "windows";
9
+ appleSilicon: boolean;
10
+ }
11
+
12
+ function getOS(): "macos" | "linux" | "windows" {
13
+ switch (process.platform) {
14
+ case "darwin": return "macos";
15
+ case "win32": return "windows";
16
+ default: return "linux";
17
+ }
18
+ }
19
+
20
+ function getCPU(): { name: string; cores: number; speed: number } {
21
+ const cpus = os.cpus();
22
+ return {
23
+ name: cpus[0]?.model?.trim() ?? "Unknown CPU",
24
+ cores: cpus.length,
25
+ speed: cpus[0]?.speed ?? 0, // MHz
26
+ };
27
+ }
28
+
29
+ function getGPU(platform: "macos" | "linux" | "windows"): { name: string; vram: number } | null {
30
+ try {
31
+ if (platform === "macos") {
32
+ const raw = execSync("system_profiler SPDisplaysDataType -json", {
33
+ encoding: "utf-8",
34
+ timeout: 5000,
35
+ stdio: ["pipe", "pipe", "pipe"],
36
+ });
37
+ const data = JSON.parse(raw);
38
+ const displays = data?.SPDisplaysDataType;
39
+ if (Array.isArray(displays) && displays.length > 0) {
40
+ const gpu = displays[0];
41
+ const name: string = gpu.sppci_model ?? gpu._name ?? "Unknown GPU";
42
+ // On Apple Silicon, VRAM is shared (unified memory) — report total RAM
43
+ const vramStr: string = gpu["spdisplays_vram"] ?? gpu["spdisplays_vram_shared"] ?? "";
44
+ let vram = 0;
45
+ if (vramStr) {
46
+ const match = vramStr.match(/(\d+)\s*(GB|MB)/i);
47
+ if (match) {
48
+ vram = parseInt(match[1]) * (match[2].toUpperCase() === "GB" ? 1024 * 1024 * 1024 : 1024 * 1024);
49
+ }
50
+ }
51
+ // Apple Silicon unified memory — use total RAM as VRAM
52
+ if (vram === 0 && name.toLowerCase().includes("apple")) {
53
+ vram = os.totalmem();
54
+ }
55
+ return { name, vram };
56
+ }
57
+ }
58
+
59
+ if (platform === "linux") {
60
+ // Try NVIDIA first
61
+ try {
62
+ const raw = execSync("nvidia-smi --query-gpu=name,memory.total --format=csv,noheader", {
63
+ encoding: "utf-8",
64
+ timeout: 5000,
65
+ stdio: ["pipe", "pipe", "pipe"],
66
+ });
67
+ const line = raw.trim().split("\n")[0];
68
+ if (line) {
69
+ const parts = line.split(",").map(s => s.trim());
70
+ const name = parts[0] ?? "NVIDIA GPU";
71
+ const memMatch = (parts[1] ?? "").match(/(\d+)/);
72
+ const vram = memMatch ? parseInt(memMatch[1]) * 1024 * 1024 : 0; // MiB to bytes
73
+ return { name, vram };
74
+ }
75
+ } catch {
76
+ // No NVIDIA, try lspci
77
+ try {
78
+ const raw = execSync("lspci | grep -i vga", {
79
+ encoding: "utf-8",
80
+ timeout: 5000,
81
+ stdio: ["pipe", "pipe", "pipe"],
82
+ });
83
+ const line = raw.trim().split("\n")[0];
84
+ if (line) {
85
+ const name = line.split(":").slice(2).join(":").trim() || "Unknown GPU";
86
+ return { name, vram: 0 };
87
+ }
88
+ } catch { /* no lspci */ }
89
+ }
90
+ }
91
+
92
+ if (platform === "windows") {
93
+ try {
94
+ const raw = execSync("wmic path win32_VideoController get Name,AdapterRAM /format:csv", {
95
+ encoding: "utf-8",
96
+ timeout: 5000,
97
+ stdio: ["pipe", "pipe", "pipe"],
98
+ });
99
+ const lines = raw.trim().split("\n").filter(l => l.trim() && !l.startsWith("Node"));
100
+ if (lines.length > 0) {
101
+ const parts = lines[0].split(",");
102
+ const adapterRAM = parseInt(parts[1] ?? "0");
103
+ const name = parts[2]?.trim() ?? "Unknown GPU";
104
+ return { name, vram: isNaN(adapterRAM) ? 0 : adapterRAM };
105
+ }
106
+ } catch { /* no wmic */ }
107
+ }
108
+ } catch {
109
+ // GPU detection failed
110
+ }
111
+ return null;
112
+ }
113
+
114
+ export function detectHardware(): HardwareInfo {
115
+ const platform = getOS();
116
+ const cpu = getCPU();
117
+ const ram = os.totalmem();
118
+ const gpu = getGPU(platform);
119
+
120
+ // Detect Apple Silicon
121
+ const appleSilicon = platform === "macos" && /apple\s+m/i.test(cpu.name);
122
+
123
+ return { cpu, ram, gpu, os: platform, appleSilicon };
124
+ }
125
+
126
+ /** Format bytes to human-readable string */
127
+ export function formatBytes(bytes: number): string {
128
+ if (bytes >= 1024 * 1024 * 1024) return `${Math.round(bytes / (1024 * 1024 * 1024))} GB`;
129
+ if (bytes >= 1024 * 1024) return `${Math.round(bytes / (1024 * 1024))} MB`;
130
+ return `${Math.round(bytes / 1024)} KB`;
131
+ }
@@ -0,0 +1,217 @@
1
+ import { execSync } from "child_process";
2
+ import type { HardwareInfo } from "./hardware.js";
3
+
4
+ export interface RecommendedModel {
5
+ name: string; // Display name
6
+ ollamaId: string; // Ollama model ID
7
+ size: number; // Download size in GB
8
+ ramRequired: number; // Minimum RAM in GB
9
+ vramOptimal: number; // Optimal VRAM in GB (0 = CPU fine)
10
+ description: string; // One-liner
11
+ speed: string; // e.g., "~45 tok/s on M1"
12
+ quality: "good" | "great" | "best";
13
+ }
14
+
15
+ export type ModelFit = "perfect" | "good" | "tight" | "skip";
16
+
17
+ export interface ScoredModel extends RecommendedModel {
18
+ fit: ModelFit;
19
+ }
20
+
21
+ const MODELS: RecommendedModel[] = [
22
+ {
23
+ name: "Qwen 2.5 Coder 3B",
24
+ ollamaId: "qwen2.5-coder:3b",
25
+ size: 2,
26
+ ramRequired: 8,
27
+ vramOptimal: 4,
28
+ description: "Lightweight, fast coding model",
29
+ speed: "~60 tok/s on M1",
30
+ quality: "good",
31
+ },
32
+ {
33
+ name: "Qwen 2.5 Coder 7B",
34
+ ollamaId: "qwen2.5-coder:7b",
35
+ size: 5,
36
+ ramRequired: 16,
37
+ vramOptimal: 8,
38
+ description: "Sweet spot for most machines",
39
+ speed: "~45 tok/s on M1",
40
+ quality: "great",
41
+ },
42
+ {
43
+ name: "Qwen 2.5 Coder 14B",
44
+ ollamaId: "qwen2.5-coder:14b",
45
+ size: 9,
46
+ ramRequired: 32,
47
+ vramOptimal: 16,
48
+ description: "High quality coding",
49
+ speed: "~25 tok/s on M1 Pro",
50
+ quality: "best",
51
+ },
52
+ {
53
+ name: "Qwen 2.5 Coder 32B",
54
+ ollamaId: "qwen2.5-coder:32b",
55
+ size: 20,
56
+ ramRequired: 48,
57
+ vramOptimal: 32,
58
+ description: "Premium quality, needs lots of RAM",
59
+ speed: "~12 tok/s on M1 Max",
60
+ quality: "best",
61
+ },
62
+ {
63
+ name: "DeepSeek Coder V2 16B",
64
+ ollamaId: "deepseek-coder-v2:16b",
65
+ size: 9,
66
+ ramRequired: 32,
67
+ vramOptimal: 16,
68
+ description: "Strong alternative for coding",
69
+ speed: "~30 tok/s on M1 Pro",
70
+ quality: "great",
71
+ },
72
+ {
73
+ name: "CodeLlama 7B",
74
+ ollamaId: "codellama:7b",
75
+ size: 4,
76
+ ramRequired: 16,
77
+ vramOptimal: 8,
78
+ description: "Meta's coding model",
79
+ speed: "~40 tok/s on M1",
80
+ quality: "good",
81
+ },
82
+ {
83
+ name: "StarCoder2 7B",
84
+ ollamaId: "starcoder2:7b",
85
+ size: 4,
86
+ ramRequired: 16,
87
+ vramOptimal: 8,
88
+ description: "Good for code completion",
89
+ speed: "~40 tok/s on M1",
90
+ quality: "good",
91
+ },
92
+ ];
93
+
94
+ function scoreModel(model: RecommendedModel, ramGB: number, vramGB: number): ModelFit {
95
+ if (ramGB < model.ramRequired) return "skip";
96
+
97
+ const ramHeadroom = ramGB - model.ramRequired;
98
+ const hasGoodVRAM = vramGB >= model.vramOptimal;
99
+
100
+ if (hasGoodVRAM && ramHeadroom >= 4) return "perfect";
101
+ if (hasGoodVRAM || ramHeadroom >= 8) return "good";
102
+ if (ramHeadroom >= 0) return "tight";
103
+ return "skip";
104
+ }
105
+
106
+ const qualityOrder: Record<string, number> = { best: 3, great: 2, good: 1 };
107
+ const fitOrder: Record<string, number> = { perfect: 4, good: 3, tight: 2, skip: 1 };
108
+
109
+ export function getRecommendations(hardware: HardwareInfo): ScoredModel[] {
110
+ const ramGB = hardware.ram / (1024 * 1024 * 1024);
111
+ const vramGB = hardware.gpu?.vram ? hardware.gpu.vram / (1024 * 1024 * 1024) : 0;
112
+
113
+ // Apple Silicon uses unified memory — VRAM = RAM
114
+ const effectiveVRAM = hardware.appleSilicon ? ramGB : vramGB;
115
+
116
+ const scored: ScoredModel[] = MODELS.map((m) => ({
117
+ ...m,
118
+ fit: scoreModel(m, ramGB, effectiveVRAM),
119
+ }));
120
+
121
+ // Sort: perfect first, then by quality descending
122
+ scored.sort((a, b) => {
123
+ const fitDiff = (fitOrder[b.fit] ?? 0) - (fitOrder[a.fit] ?? 0);
124
+ if (fitDiff !== 0) return fitDiff;
125
+ return (qualityOrder[b.quality] ?? 0) - (qualityOrder[a.quality] ?? 0);
126
+ });
127
+
128
+ return scored;
129
+ }
130
+
131
+ export function getFitIcon(fit: ModelFit): string {
132
+ switch (fit) {
133
+ case "perfect": return "\u2B50"; // ⭐
134
+ case "good": return "\u2705"; // ✅
135
+ case "tight": return "\u26A0\uFE0F"; // ⚠️
136
+ case "skip": return "\u274C"; // ❌
137
+ }
138
+ }
139
+
140
+ /** Check if llmfit binary is available */
141
+ export function isLlmfitAvailable(): boolean {
142
+ try {
143
+ const cmd = process.platform === "win32" ? "where llmfit" : "which llmfit";
144
+ execSync(cmd, { stdio: ["pipe", "pipe", "pipe"], timeout: 3000 });
145
+ return true;
146
+ } catch {
147
+ return false;
148
+ }
149
+ }
150
+
151
+ interface LlmfitModel {
152
+ name: string;
153
+ provider: string;
154
+ params_b: number;
155
+ quant: string;
156
+ fit: string;
157
+ estimated_tps: number;
158
+ vram_gb: number;
159
+ ram_gb: number;
160
+ }
161
+
162
+ function mapLlmfitFit(fit: string): ModelFit {
163
+ switch (fit) {
164
+ case "perfect": return "perfect";
165
+ case "good": return "good";
166
+ case "marginal": return "tight";
167
+ default: return "skip";
168
+ }
169
+ }
170
+
171
+ function mapLlmfitQuality(params_b: number): "good" | "great" | "best" {
172
+ if (params_b >= 14) return "best";
173
+ if (params_b >= 7) return "great";
174
+ return "good";
175
+ }
176
+
177
+ /** Get recommendations using llmfit if available, otherwise fall back to hardcoded list */
178
+ export function getRecommendationsWithLlmfit(hardware: HardwareInfo): { models: ScoredModel[]; usedLlmfit: boolean } {
179
+ if (!isLlmfitAvailable()) {
180
+ return { models: getRecommendations(hardware), usedLlmfit: false };
181
+ }
182
+
183
+ try {
184
+ const raw = execSync("llmfit recommend --use-case coding --format json --limit 10", {
185
+ encoding: "utf-8",
186
+ timeout: 15000,
187
+ stdio: ["pipe", "pipe", "pipe"],
188
+ });
189
+
190
+ const llmfitModels: LlmfitModel[] = JSON.parse(raw.trim());
191
+ if (!Array.isArray(llmfitModels) || llmfitModels.length === 0) {
192
+ return { models: getRecommendations(hardware), usedLlmfit: false };
193
+ }
194
+
195
+ const scored: ScoredModel[] = llmfitModels
196
+ .filter((m) => m.provider === "ollama")
197
+ .map((m) => ({
198
+ name: m.name.split(":")[0]?.replace(/-/g, " ").replace(/\b\w/g, c => c.toUpperCase()) + ` ${m.params_b}B`,
199
+ ollamaId: m.name,
200
+ size: Math.ceil(m.vram_gb),
201
+ ramRequired: Math.ceil(m.ram_gb),
202
+ vramOptimal: Math.ceil(m.vram_gb),
203
+ description: `${m.quant} · ~${m.estimated_tps.toFixed(0)} tok/s`,
204
+ speed: `~${m.estimated_tps.toFixed(0)} tok/s`,
205
+ quality: mapLlmfitQuality(m.params_b),
206
+ fit: mapLlmfitFit(m.fit),
207
+ }));
208
+
209
+ if (scored.length === 0) {
210
+ return { models: getRecommendations(hardware), usedLlmfit: false };
211
+ }
212
+
213
+ return { models: scored, usedLlmfit: true };
214
+ } catch {
215
+ return { models: getRecommendations(hardware), usedLlmfit: false };
216
+ }
217
+ }
@@ -0,0 +1,232 @@
1
+ import { execSync, spawn } from "child_process";
2
+
3
+ /** Check if ollama binary exists on PATH */
4
+ export function isOllamaInstalled(): boolean {
5
+ try {
6
+ const cmd = process.platform === "win32" ? "where ollama" : "which ollama";
7
+ execSync(cmd, { stdio: ["pipe", "pipe", "pipe"], timeout: 3000 });
8
+ return true;
9
+ } catch {
10
+ return false;
11
+ }
12
+ }
13
+
14
+ /** Check if ollama server is responding */
15
+ export async function isOllamaRunning(): Promise<boolean> {
16
+ try {
17
+ const controller = new AbortController();
18
+ const timeout = setTimeout(() => controller.abort(), 2000);
19
+ const res = await fetch("http://localhost:11434/api/tags", { signal: controller.signal });
20
+ clearTimeout(timeout);
21
+ return res.ok;
22
+ } catch {
23
+ return false;
24
+ }
25
+ }
26
+
27
+ /** Get the install command for the user's OS */
28
+ export function getOllamaInstallCommand(os: "macos" | "linux" | "windows"): string {
29
+ switch (os) {
30
+ case "macos": return "brew install ollama";
31
+ case "linux": return "curl -fsSL https://ollama.com/install.sh | sh";
32
+ case "windows": return "winget install Ollama.Ollama";
33
+ }
34
+ }
35
+
36
+ /** Start ollama serve in background */
37
+ export function startOllama(): void {
38
+ const child = spawn("ollama", ["serve"], {
39
+ detached: true,
40
+ stdio: "ignore",
41
+ });
42
+ child.unref();
43
+ }
44
+
45
+ export interface PullProgress {
46
+ status: string;
47
+ total?: number;
48
+ completed?: number;
49
+ percent: number;
50
+ }
51
+
52
+ /**
53
+ * Pull a model from Ollama registry.
54
+ * Calls onProgress with download updates.
55
+ * Returns a promise that resolves when complete.
56
+ */
57
+ export function pullModel(
58
+ modelId: string,
59
+ onProgress?: (progress: PullProgress) => void
60
+ ): Promise<void> {
61
+ return new Promise((resolve, reject) => {
62
+ const child = spawn("ollama", ["pull", modelId], {
63
+ stdio: ["pipe", "pipe", "pipe"],
64
+ });
65
+
66
+ let lastOutput = "";
67
+
68
+ const parseLine = (data: string) => {
69
+ lastOutput = data;
70
+ // Ollama pull output looks like:
71
+ // pulling manifest
72
+ // pulling abc123... 58% ▕██████████░░░░░░░░░░▏ 2.9 GB/5.0 GB
73
+ // verifying sha256 digest
74
+ // writing manifest
75
+ // success
76
+
77
+ // Try to parse percentage
78
+ const pctMatch = data.match(/(\d+)%/);
79
+ const sizeMatch = data.match(/([\d.]+)\s*GB\s*\/\s*([\d.]+)\s*GB/);
80
+
81
+ if (pctMatch) {
82
+ const percent = parseInt(pctMatch[1]);
83
+ let completed: number | undefined;
84
+ let total: number | undefined;
85
+ if (sizeMatch) {
86
+ completed = parseFloat(sizeMatch[1]) * 1024 * 1024 * 1024;
87
+ total = parseFloat(sizeMatch[2]) * 1024 * 1024 * 1024;
88
+ }
89
+ onProgress?.({ status: "downloading", total, completed, percent });
90
+ } else if (data.includes("pulling manifest")) {
91
+ onProgress?.({ status: "pulling manifest", percent: 0 });
92
+ } else if (data.includes("verifying")) {
93
+ onProgress?.({ status: "verifying", percent: 100 });
94
+ } else if (data.includes("writing manifest")) {
95
+ onProgress?.({ status: "writing manifest", percent: 100 });
96
+ } else if (data.includes("success")) {
97
+ onProgress?.({ status: "success", percent: 100 });
98
+ }
99
+ };
100
+
101
+ child.stdout?.on("data", (data: Buffer) => {
102
+ parseLine(data.toString().trim());
103
+ });
104
+
105
+ child.stderr?.on("data", (data: Buffer) => {
106
+ // Ollama writes progress to stderr
107
+ parseLine(data.toString().trim());
108
+ });
109
+
110
+ child.on("close", (code) => {
111
+ if (code === 0) {
112
+ resolve();
113
+ } else {
114
+ reject(new Error(`ollama pull failed (exit ${code}): ${lastOutput}`));
115
+ }
116
+ });
117
+
118
+ child.on("error", (err) => {
119
+ reject(new Error(`Failed to run ollama pull: ${err.message}`));
120
+ });
121
+ });
122
+ }
123
+
124
+ export interface OllamaModelInfo {
125
+ name: string;
126
+ size: number; // bytes
127
+ modified_at: string;
128
+ digest: string;
129
+ }
130
+
131
+ /** List models installed in Ollama with detailed info */
132
+ export async function listInstalledModelsDetailed(): Promise<OllamaModelInfo[]> {
133
+ try {
134
+ const controller = new AbortController();
135
+ const timeout = setTimeout(() => controller.abort(), 3000);
136
+ const res = await fetch("http://localhost:11434/api/tags", { signal: controller.signal });
137
+ clearTimeout(timeout);
138
+ if (res.ok) {
139
+ const data = (await res.json()) as { models?: Array<{ name: string; size: number; modified_at: string; digest: string }> };
140
+ return (data.models ?? []).map((m) => ({
141
+ name: m.name,
142
+ size: m.size,
143
+ modified_at: m.modified_at,
144
+ digest: m.digest,
145
+ }));
146
+ }
147
+ } catch { /* not running */ }
148
+ return [];
149
+ }
150
+
151
+ /** List models installed in Ollama */
152
+ export async function listInstalledModels(): Promise<string[]> {
153
+ const models = await listInstalledModelsDetailed();
154
+ return models.map((m) => m.name);
155
+ }
156
+
157
+ /** Stop all loaded models (frees VRAM) and kill the Ollama server process */
158
+ export async function stopOllama(): Promise<{ ok: boolean; message: string }> {
159
+ try {
160
+ // First unload all models from memory
161
+ try {
162
+ execSync("ollama stop", { stdio: ["pipe", "pipe", "pipe"], timeout: 5000 });
163
+ } catch { /* may fail if no models loaded */ }
164
+
165
+ // Kill the server process
166
+ if (process.platform === "win32") {
167
+ execSync("taskkill /f /im ollama.exe", { stdio: ["pipe", "pipe", "pipe"], timeout: 5000 });
168
+ } else if (process.platform === "darwin") {
169
+ // Try launchctl first (Ollama app), then pkill
170
+ try {
171
+ execSync("launchctl stop com.ollama.ollama", { stdio: ["pipe", "pipe", "pipe"], timeout: 3000 });
172
+ } catch {
173
+ try {
174
+ execSync("pkill ollama", { stdio: ["pipe", "pipe", "pipe"], timeout: 3000 });
175
+ } catch { /* already stopped */ }
176
+ }
177
+ } else {
178
+ // Linux
179
+ try {
180
+ execSync("systemctl stop ollama", { stdio: ["pipe", "pipe", "pipe"], timeout: 3000 });
181
+ } catch {
182
+ try {
183
+ execSync("pkill ollama", { stdio: ["pipe", "pipe", "pipe"], timeout: 3000 });
184
+ } catch { /* already stopped */ }
185
+ }
186
+ }
187
+
188
+ // Verify it stopped
189
+ await new Promise(r => setTimeout(r, 500));
190
+ const stillRunning = await isOllamaRunning();
191
+ if (stillRunning) {
192
+ return { ok: false, message: "Ollama is still running. Try killing it manually." };
193
+ }
194
+ return { ok: true, message: "Ollama stopped." };
195
+ } catch (err: any) {
196
+ return { ok: false, message: `Failed to stop Ollama: ${err.message}` };
197
+ }
198
+ }
199
+
200
+ /** Delete a model from disk */
201
+ export function deleteModel(modelId: string): { ok: boolean; message: string } {
202
+ try {
203
+ execSync(`ollama rm ${modelId}`, { stdio: ["pipe", "pipe", "pipe"], timeout: 30000 });
204
+ return { ok: true, message: `Deleted ${modelId}` };
205
+ } catch (err: any) {
206
+ return { ok: false, message: `Failed to delete ${modelId}: ${err.stderr?.toString().trim() || err.message}` };
207
+ }
208
+ }
209
+
210
+ /** Get GPU memory usage info (best-effort) */
211
+ export function getGPUMemoryUsage(): string | null {
212
+ try {
213
+ if (process.platform === "darwin") {
214
+ // Apple Silicon — check memory pressure
215
+ const raw = execSync("memory_pressure", { encoding: "utf-8", timeout: 3000, stdio: ["pipe", "pipe", "pipe"] });
216
+ const match = raw.match(/System-wide memory free percentage:\s*(\d+)%/);
217
+ if (match) {
218
+ return `${100 - parseInt(match[1])}% system memory in use`;
219
+ }
220
+ return null;
221
+ }
222
+ // NVIDIA GPU
223
+ const raw = execSync("nvidia-smi --query-gpu=memory.used,memory.total --format=csv,noheader,nounits", {
224
+ encoding: "utf-8", timeout: 3000, stdio: ["pipe", "pipe", "pipe"],
225
+ });
226
+ const parts = raw.trim().split(",").map(s => s.trim());
227
+ if (parts.length === 2) {
228
+ return `${parts[0]} MiB / ${parts[1]} MiB GPU memory`;
229
+ }
230
+ } catch { /* no GPU info available */ }
231
+ return null;
232
+ }