localcoder 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
localcoder/cli.py ADDED
@@ -0,0 +1,827 @@
1
+ """localcoder CLI — main entry point."""
2
+ import argparse, json, os, sys, time
3
+
4
+ from rich.console import Console
5
+
6
+ console = Console()
7
+
8
+
9
+ def main():
10
+ parser = argparse.ArgumentParser(
11
+ description="localcoder — local AI coding agent",
12
+ formatter_class=argparse.RawDescriptionHelpFormatter,
13
+ epilog="""examples:
14
+ localcoder interactive mode (auto-setup on first run)
15
+ localcoder --setup run setup wizard
16
+ localcoder -p "build a react app" one-shot mode
17
+ localcoder -c continue last session
18
+ localcoder --yolo auto-approve everything
19
+ localcoder -m gemma4:e4b use specific model
20
+ localcoder --models list/switch models
21
+ """)
22
+ parser.add_argument("-p", "--prompt", type=str, help="Run a single task and exit")
23
+ parser.add_argument("-c", "--continue", dest="cont", action="store_true", help="Continue last session")
24
+ parser.add_argument("-m", "--model", type=str, default=None, help="Model name")
25
+ parser.add_argument("--yolo", action="store_true", help="Auto-approve everything")
26
+ parser.add_argument("--bypass", action="store_true", help="Same as --yolo")
27
+ parser.add_argument("--ask", action="store_true", help="Ask before every tool")
28
+ parser.add_argument("--api", type=str, default=None, help="API base URL")
29
+ parser.add_argument("--setup", action="store_true", help="Run setup wizard")
30
+ parser.add_argument("--models", action="store_true", help="List and select models")
31
+ parser.add_argument("--status", action="store_true", help="Show backend status")
32
+ parser.add_argument("--specs", action="store_true", help="Show machine specs and GPU memory")
33
+ parser.add_argument("--cleanup", action="store_true", help="Free GPU memory (unload models, kill stale servers)")
34
+ parser.add_argument("--health", action="store_true", help="Diagnose GPU health: offload, KV cache, swap, context")
35
+ parser.add_argument("--debloat", action="store_true", help="Disable macOS services that steal GPU/memory from your model")
36
+ parser.add_argument("--simulate", nargs="?", const="__interactive__", metavar="MODEL", help="Will this model fit? Interactive picker or --simulate '70b q4'")
37
+ parser.add_argument("--bench", action="store_true", help="Benchmark all installed models (local LM Arena)")
38
+ parser.add_argument("--arena", action="store_true", help="Show model leaderboard")
39
+ parser.add_argument("--force", action="store_true", help="Re-run benchmarks even if cached")
40
+ parser.add_argument("--fetch", type=str, metavar="URL_OR_NAME", help="Fetch model from HuggingFace/Ollama URL and check fit")
41
+ args = parser.parse_args()
42
+
43
+ # ── Setup wizard ──
44
+ if args.setup:
45
+ from localcoder.setup import wizard
46
+ wizard()
47
+ return
48
+
49
+ # ── Machine specs ──
50
+ if args.specs:
51
+ from localcoder.backends import get_machine_specs, print_machine_specs
52
+ print_machine_specs()
53
+ return
54
+
55
+ # ── GPU cleanup wizard ──
56
+ if args.cleanup:
57
+ from localcoder.backends import (
58
+ cleanup_gpu_memory, get_machine_specs, print_machine_specs,
59
+ get_top_memory_processes, print_health_dashboard,
60
+ )
61
+ import signal
62
+
63
+ print_health_dashboard()
64
+
65
+ procs = get_top_memory_processes(min_mb=200)
66
+ killable = [p for p in procs if p["killable"] and p["category"] in ("app", "bloat") and p["mb"] > 300]
67
+
68
+ if not killable:
69
+ console.print("\n [dim]No heavy apps to clean up.[/]")
70
+ else:
71
+ console.print(f"\n [bold]Quick cleanup — select apps to quit:[/]\n")
72
+ for i, p in enumerate(killable, 1):
73
+ mb = p["mb"]
74
+ size = f"{mb / 1024:.1f}GB" if mb >= 1024 else f"{mb}MB"
75
+ console.print(f" [bold]{i}.[/] {p['name']}"
76
+ + (f" ×{p['count']}" if p.get("count", 1) > 1 else "")
77
+ + f" [dim]({size})[/]")
78
+ console.print(f" [bold]a.[/] All of the above")
79
+ console.print(f" [bold]0.[/] Skip\n")
80
+
81
+ try:
82
+ ans = input(" Choose (e.g. 1,3 or a): ").strip().lower()
83
+ except (EOFError, KeyboardInterrupt):
84
+ ans = "0"
85
+
86
+ if ans and ans != "0":
87
+ targets = killable if ans == "a" else []
88
+ if not targets:
89
+ for part in ans.replace(" ", "").split(","):
90
+ try:
91
+ idx = int(part) - 1
92
+ if 0 <= idx < len(killable):
93
+ targets.append(killable[idx])
94
+ except ValueError:
95
+ pass
96
+
97
+ for t in targets:
98
+ for pid in t.get("pids", [t["pid"]]):
99
+ try:
100
+ os.kill(pid, signal.SIGTERM)
101
+ except (ProcessLookupError, PermissionError):
102
+ pass
103
+ console.print(f" [green]✓ Quit {t['name']}[/]")
104
+
105
+ import time as _time
106
+ _time.sleep(2)
107
+
108
+ # Also clean ML processes
109
+ console.print("\n [bold yellow]Cleaning ML backends...[/]")
110
+ result = cleanup_gpu_memory(force=True)
111
+ if result["ollama_unloaded"]:
112
+ console.print(f" [green]Unloaded Ollama: {', '.join(result['ollama_unloaded'])}[/]")
113
+ if result["processes_killed"]:
114
+ for p in result["processes_killed"]:
115
+ console.print(f" [green]Killed llama-server PID {p['pid']}[/]")
116
+ if not result["ollama_unloaded"] and not result["processes_killed"]:
117
+ console.print(" [dim]No ML backends to clean.[/]")
118
+
119
+ console.print()
120
+ print_machine_specs()
121
+ return
122
+
123
+ # ── Benchmark / Arena ──
124
+ if args.bench:
125
+ from localcoder.bench import run_full_bench
126
+ run_full_bench(force=args.force)
127
+ return
128
+ if args.arena:
129
+ from localcoder.bench import show_leaderboard
130
+ show_leaderboard()
131
+ return
132
+
133
+ # ── Fetch model from HuggingFace ──
134
+ if args.fetch:
135
+ from localcoder.backends import simulate_hf_model
136
+ simulate_hf_model(args.fetch)
137
+ return
138
+
139
+ # ── Simulate model fit ──
140
+ if args.simulate:
141
+ from localcoder.backends import simulate_model_fit, MODELS, get_machine_specs
142
+ if args.simulate == "__interactive__":
143
+ # Interactive picker
144
+ specs = get_machine_specs()
145
+ os.system("clear" if os.name != "nt" else "cls")
146
+ console.print(f"\n [bold]Will it fit?[/] · {specs['chip']} · {specs['ram_gb']}GB RAM\n")
147
+
148
+ # Known models
149
+ models_list = list(MODELS.items())
150
+ for i, (mid, m) in enumerate(models_list, 1):
151
+ size = m["size_gb"]
152
+ gpu_mb = specs["gpu_total_mb"]
153
+ fits = size * 1024 < gpu_mb
154
+ icon = "[green]✓[/]" if fits else "[red]✗[/]"
155
+ console.print(f" {icon} [bold]{i:>2}.[/] {m['name']:<30} {size:>5}GB [dim]{m.get('description', '')[:45]}[/]")
156
+
157
+ # Show community coding models by VRAM tier
158
+ from localcoder.backends import COMMUNITY_CODING_MODELS
159
+ gpu_mb = specs["gpu_total_mb"]
160
+
161
+ console.print(f"\n [dim]── r/LocalLLaMA top coding models ──[/]")
162
+ fav_list = list(COMMUNITY_CODING_MODELS.items())
163
+ fav_start = len(models_list) + 1
164
+ for j, (mid, m) in enumerate(fav_list, fav_start):
165
+ # Quick fit check based on smallest likely quant
166
+ fits = "✓" if any(
167
+ v * 1024 < gpu_mb
168
+ for v in [3, 6, 10, 12, 15, 20]
169
+ if m["vram"].startswith(str(v)[:2]) or m["vram"].startswith("<")
170
+ ) else "?"
171
+ console.print(
172
+ f" [bold]{j:>2}.[/] {m['name']:<28}"
173
+ f" [dim]{m['vram']:>8}[/]"
174
+ f" [dim]{m['note'][:40]}[/]"
175
+ )
176
+
177
+ console.print(f"\n [bold] s.[/] Search HuggingFace [dim](paste URL or search term)[/]")
178
+ console.print(f" [bold] c.[/] Custom [dim](type size like '13b q4')[/]")
179
+ console.print(f" [bold] q.[/] Quit\n")
180
+
181
+ try:
182
+ choice = input(" > ").strip().lower()
183
+ except (EOFError, KeyboardInterrupt):
184
+ return
185
+
186
+ if choice == "q" or not choice:
187
+ return
188
+ elif choice == "s":
189
+ try:
190
+ q = input(" Search or paste URL: ").strip()
191
+ except (EOFError, KeyboardInterrupt):
192
+ return
193
+ if q:
194
+ from localcoder.backends import simulate_hf_model
195
+ simulate_hf_model(q)
196
+ elif choice == "c":
197
+ try:
198
+ custom = input(" Model (e.g. '70b q4'): ").strip()
199
+ except (EOFError, KeyboardInterrupt):
200
+ return
201
+ if custom:
202
+ simulate_model_fit(custom)
203
+ else:
204
+ try:
205
+ idx = int(choice) - 1
206
+ if 0 <= idx < len(models_list):
207
+ simulate_model_fit(models_list[idx][0])
208
+ elif idx < len(models_list) + len(fav_list):
209
+ _, fav = fav_list[idx - len(models_list)]
210
+ from localcoder.backends import simulate_hf_model
211
+ simulate_hf_model(fav["hf"])
212
+ except ValueError:
213
+ # Maybe they typed a model spec or URL
214
+ if "/" in choice or "huggingface" in choice:
215
+ from localcoder.backends import simulate_hf_model
216
+ simulate_hf_model(choice)
217
+ else:
218
+ simulate_model_fit(choice)
219
+ else:
220
+ simulate_model_fit(args.simulate)
221
+ return
222
+
223
+ # ── Debloat wizard ──
224
+ if args.debloat:
225
+ from localcoder.backends import debloat_wizard
226
+ debloat_wizard()
227
+ return
228
+
229
+ # ── GPU health diagnostic (interactive) ──
230
+ if args.health:
231
+ # Try Textual TUI first (fixed layout, keyboard shortcuts)
232
+ try:
233
+ from localcoder.tui import run_tui_dashboard
234
+ result = run_tui_dashboard()
235
+ # Handle exit codes from TUI actions
236
+ if result == 10: # cleanup
237
+ args.cleanup = True
238
+ # fall through
239
+ elif result == 11: # debloat
240
+ from localcoder.backends import debloat_wizard
241
+ debloat_wizard()
242
+ return
243
+ elif result == 12: # simulate
244
+ args.simulate = "__interactive__"
245
+ # fall through to simulate handler below
246
+ else:
247
+ return
248
+ except (ImportError, Exception):
249
+ pass # Fall back to Rich dashboard
250
+
251
+ from localcoder.backends import (
252
+ print_health_dashboard, get_top_memory_processes,
253
+ cleanup_gpu_memory,
254
+ )
255
+ import signal
256
+
257
+ diag = print_health_dashboard()
258
+
259
+ # Interactive kill prompt
260
+ procs = get_top_memory_processes(min_mb=200)
261
+ killable = [p for p in procs if p["killable"] and p["category"] in ("app", "bloat") and p["mb"] > 300]
262
+ if killable and diag["status"] in ("critical", "degraded"):
263
+ console.print(f" [bold]Kill processes to free memory?[/]")
264
+ for i, p in enumerate(killable, 1):
265
+ mb = p["mb"]
266
+ size = f"{mb / 1024:.1f}GB" if mb >= 1024 else f"{mb}MB"
267
+ tag = "[red]bloat[/]" if p["category"] == "bloat" else "[yellow]app[/]"
268
+ n = p["name"] + (f" ×{p.get('count',1)}" if p.get("count",1) > 1 else "")
269
+ console.print(f" [bold]{i}[/]. {n} {tag} [dim]{size}[/]")
270
+ console.print(f" [bold]a[/]. All [bold]m[/]. ML backends only [bold]q[/]. Quit\n")
271
+
272
+ try:
273
+ ans = input(" > ").strip().lower()
274
+ except (EOFError, KeyboardInterrupt):
275
+ ans = "q"
276
+
277
+ if ans == "q" or not ans:
278
+ pass
279
+ elif ans == "m":
280
+ result = cleanup_gpu_memory(force=True)
281
+ if result["ollama_unloaded"]:
282
+ console.print(f" [green]Unloaded: {', '.join(result['ollama_unloaded'])}[/]")
283
+ if result["processes_killed"]:
284
+ for pk in result["processes_killed"]:
285
+ console.print(f" [green]Killed llama-server PID {pk['pid']}[/]")
286
+ else:
287
+ targets = killable if ans == "a" else []
288
+ if not targets:
289
+ for part in ans.replace(" ", "").split(","):
290
+ try:
291
+ idx = int(part) - 1
292
+ if 0 <= idx < len(killable):
293
+ targets.append(killable[idx])
294
+ except ValueError:
295
+ pass
296
+ for t in targets:
297
+ for pid in t.get("pids", [t["pid"]]):
298
+ try:
299
+ os.kill(pid, signal.SIGTERM)
300
+ except (ProcessLookupError, PermissionError):
301
+ pass
302
+ console.print(f" [green]✓[/] Killed {t['name']}")
303
+
304
+ if ans and ans != "q":
305
+ import time as _t
306
+ _t.sleep(2)
307
+ console.print()
308
+ print_health_dashboard()
309
+ return
310
+
311
+ # ── Status ──
312
+ if args.status:
313
+ from localcoder.backends import discover_all, BACKENDS
314
+ from rich.table import Table
315
+ discovery = discover_all()
316
+ table = Table(title="Backend Status", show_header=True, header_style="bold cyan")
317
+ table.add_column("Backend")
318
+ table.add_column("Installed")
319
+ table.add_column("Running")
320
+ table.add_column("Models")
321
+ for d in discovery:
322
+ installed = "[green]✓[/]" if d["installed"] else "[red]✗[/]"
323
+ running = f"[green]:{d['port']}[/]" if d["running"] else "[dim]—[/]"
324
+ models = ", ".join(d["models"][:5]) or "[dim]none[/]"
325
+ table.add_row(d["name"], installed, running, models)
326
+ console.print(table)
327
+ return
328
+
329
+ # ── Model selector ──
330
+ if args.models:
331
+ from localcoder.backends import discover_all
332
+ discovery = discover_all()
333
+ for d in discovery:
334
+ if d["models"]:
335
+ console.print(f"\n [bold]{d['name']}[/] [dim](:{d['port']})[/]")
336
+ for m in d["models"]:
337
+ console.print(f" [cyan]{m}[/]")
338
+ if not any(d["models"] for d in discovery):
339
+ console.print(" [dim]No models found. Run: localcoder --setup[/]")
340
+ return
341
+
342
+ # ── Ensure setup ──
343
+ from localcoder.setup import ensure_setup, load_config
344
+ cfg = ensure_setup()
345
+ if not cfg:
346
+ console.print(" [dim]Setup cancelled.[/]")
347
+ return
348
+
349
+ # ── Resolve config ──
350
+ api_base = args.api or cfg.get("api_base", "http://127.0.0.1:8089/v1")
351
+ model = args.model or cfg.get("model", "gemma4-26b")
352
+
353
+ # ── Detect backend from model/api override ──
354
+ from localcoder.backends import (
355
+ check_backend_installed, check_backend_running,
356
+ start_llama_server, start_ollama_serve,
357
+ stop_conflicting_backends, get_system_ram_gb, get_gpu_memory_info,
358
+ can_run_simultaneously, MODELS, BACKENDS,
359
+ )
360
+ backend_id = cfg.get("backend", "llamacpp")
361
+
362
+ # If user specified --api with Ollama port, switch backend
363
+ if args.api and "11434" in args.api:
364
+ backend_id = "ollama"
365
+ elif args.api and "8089" in args.api:
366
+ backend_id = "llamacpp"
367
+ # If user specified an Ollama-style model name, switch backend
368
+ if args.model and ":" in args.model:
369
+ backend_id = "ollama"
370
+ api_base = "http://127.0.0.1:11434/v1"
371
+
372
+ # ── Check backend is running, auto-start if needed ──
373
+ if not check_backend_running(backend_id):
374
+ ram = get_system_ram_gb()
375
+ gpu = get_gpu_memory_info()
376
+ model_info = MODELS.get(cfg.get("model_id", ""), {})
377
+ model_size = model_info.get("size_gb", 12)
378
+
379
+ # Check if another backend is hogging GPU
380
+ other = "ollama" if backend_id == "llamacpp" else "llamacpp"
381
+ other_running = check_backend_running(other)
382
+
383
+ if other_running and not can_run_simultaneously(ram, model_size, 0):
384
+ other_name = BACKENDS[other]["name"]
385
+ console.print(f"\n [yellow]GPU memory conflict detected[/]")
386
+ console.print(f" [dim]RAM: {ram}GB · GPU limit: ~{gpu['total_mb']//1024}GB · {other_name} is using GPU[/]")
387
+ console.print(f" [dim]Need ~{model_size}GB for model — not enough with {other_name} loaded.[/]")
388
+ console.print()
389
+ console.print(f" [bold]1.[/] Stop {other_name} and use {BACKENDS[backend_id]['name']} [dim](recommended)[/]")
390
+ console.print(f" [bold]2.[/] Try anyway [dim](will be very slow — swap thrashing)[/]")
391
+ console.print(f" [bold]3.[/] Cancel")
392
+ try:
393
+ ans = input("\n Choose (1/2/3): ").strip()
394
+ except (EOFError, KeyboardInterrupt):
395
+ ans = "1"
396
+ if ans == "1":
397
+ stop_conflicting_backends(backend_id)
398
+ elif ans == "3":
399
+ return
400
+ # ans == "2" continues without stopping
401
+
402
+ console.print(f" [yellow]Starting {BACKENDS[backend_id]['name']}...[/]")
403
+ if backend_id == "llamacpp":
404
+ model_id = cfg.get("model_id", "gemma4-26b")
405
+ proc = start_llama_server(model_id)
406
+ if not proc:
407
+ console.print(" [red]Failed to start llama-server.[/]")
408
+ if check_backend_installed("ollama"):
409
+ console.print(" [yellow]Falling back to Ollama...[/]")
410
+ backend_id = "ollama"
411
+ api_base = "http://127.0.0.1:11434/v1"
412
+ if not check_backend_running("ollama"):
413
+ start_ollama_serve()
414
+ ollama_model = MODELS.get(model_id, {}).get("ollama_tag")
415
+ if ollama_model:
416
+ model = ollama_model
417
+ else:
418
+ console.print(" [red]No backend available. Run: localcoder --setup[/]")
419
+ return
420
+ else:
421
+ if not check_backend_running("ollama"):
422
+ if not start_ollama_serve():
423
+ console.print(" [red]Failed to start Ollama.[/]")
424
+ if check_backend_running("llamacpp"):
425
+ console.print(" [yellow]Falling back to llama.cpp...[/]")
426
+ backend_id = "llamacpp"
427
+ api_base = "http://127.0.0.1:8089/v1"
428
+ else:
429
+ console.print(" [red]No backend available. Run: localcoder --setup[/]")
430
+ return
431
+
432
+ # ── Boot sequence (like an OS POST screen) ──
433
+ from localcoder.backends import (
434
+ get_machine_specs, diagnose_gpu_health, get_swap_usage_mb,
435
+ get_metal_gpu_stats, get_llama_server_config, _detect_model_info,
436
+ get_top_memory_processes, cleanup_gpu_memory,
437
+ )
438
+ import time as _t
439
+
440
+ skip_boot = cfg.get("skip_boot_health", False)
441
+
442
+ if skip_boot:
443
+ # Fast mode — one line
444
+ specs = get_machine_specs()
445
+ diag = diagnose_gpu_health(cfg.get("model_id"))
446
+ swap_mb = get_swap_usage_mb()
447
+ ga, gt = diag.get("gpu_alloc_mb", 0), diag.get("gpu_total_mb", 0)
448
+ gc = "green" if ga < gt * 0.8 else "yellow" if ga < gt else "red"
449
+ sc = "red" if swap_mb > 4000 else "green"
450
+ sc2 = {"healthy": "green", "degraded": "yellow", "critical": "red"}.get(diag["status"], "dim")
451
+ gi = "[green]●[/]" if diag["on_gpu"] else "[red]●[/]"
452
+ console.print(f" {gi} [{gc}]GPU {ga // 1024}/{gt // 1024}GB[/{gc}] [{sc}]swap {swap_mb // 1024}GB[/{sc}] [{sc2}]{diag['status']}[/{sc2}]")
453
+ else:
454
+ # Full boot sequence — gather everything first, render once
455
+ with console.status("[bold] Starting localcoder...", spinner="dots"):
456
+ specs = get_machine_specs()
457
+ metal = get_metal_gpu_stats()
458
+ srv = get_llama_server_config()
459
+ swap_mb = get_swap_usage_mb()
460
+ diag = diagnose_gpu_health(cfg.get("model_id"))
461
+ procs = get_top_memory_processes(min_mb=500, limit=3)
462
+
463
+ gpu_total = metal.get("total_mb") or specs["gpu_total_mb"]
464
+ gpu_alloc = metal.get("alloc_mb", 0)
465
+ gpu_free = max(0, gpu_total - gpu_alloc)
466
+ status = diag["status"]
467
+ sc2 = {"healthy": "green", "degraded": "yellow", "critical": "red"}.get(status, "dim")
468
+
469
+ # ── Render boot screen with logo animation ──
470
+ from rich.table import Table as _Table
471
+ from rich.panel import Panel
472
+ from rich.text import Text as _RText
473
+ from rich.console import Group as _Group
474
+ from rich.live import Live as _Live
475
+
476
+ # Logo animation (Copilot-style: border draws, text reveals)
477
+ B = "#e07a5f"
478
+ G = "#81b29a"
479
+ LOGO = [
480
+ (f"[bold #e07a5f]██╗ ██████╗ ██████╗ █████╗ ██╗ [/]",),
481
+ (f"[bold #d4725a]██║ ██╔═══██╗██╔════╝██╔══██╗██║ [/]",),
482
+ (f"[bold #c96a55]██║ ██║ ██║██║ ███████║██║ [/]",),
483
+ (f"[bold #be6250]██║ ██║ ██║██║ ██╔══██║██║ [/]",),
484
+ (f"[bold #b35a4b]███████╗╚██████╔╝╚██████╗██║ ██║███████╗[/]",),
485
+ (f"[bold #a85246]╚══════╝ ╚═════╝ ╚═════╝╚═╝ ╚═╝╚══════╝[/]",),
486
+ (f"[bold #81b29a] ██████╗ ██████╗ ██████╗ ███████╗██████╗ [/]",),
487
+ (f"[bold #76a890]██╔════╝██╔═══██╗██╔══██╗██╔════╝██╔══██╗[/]",),
488
+ (f"[bold #6b9e86]██║ ██║ ██║██║ ██║█████╗ ██████╔╝[/]",),
489
+ (f"[bold #60947c]██║ ██║ ██║██║ ██║██╔══╝ ██╔══██╗[/]",),
490
+ (f"[bold #558a72]╚██████╗╚██████╔╝██████╔╝███████╗██║ ██║[/]",),
491
+ (f"[bold #4a8068] ╚═════╝ ╚═════╝ ╚═════╝ ╚══════╝╚═╝ ╚═╝[/]",),
492
+ ]
493
+
494
+ def _mk_frame(*lines):
495
+ return _Group(*(_RText.from_markup(l) for l in lines))
496
+
497
+ def _logo_frm(cols=99, scan=False, sub="", extra=None):
498
+ out = [f" [{B}]┌──────────────────────────────────────────────────┐[/]"]
499
+ for lt in LOGO:
500
+ raw = lt[0]
501
+ c = raw.split(']')[0] + ']'
502
+ p = raw.replace('[/]', '').split(']')[-1] if ']' in raw else raw
503
+ s = p[:cols]
504
+ cur = "[white bold]▌[/]" if scan and cols < len(p) else ""
505
+ pad = " " * max(0, 48 - len(s) - (1 if cur else 0))
506
+ out.append(f" [{B}]│[/]{c}{s}[/]{cur}{pad}[{B}]│[/]")
507
+ out.append(f" [{B}]└──────────────────────────────────────────────────┘[/]")
508
+ if sub: out.append(sub)
509
+ if extra: out.extend(extra)
510
+ return _mk_frame(*out)
511
+
512
+ try:
513
+ import time as _ta
514
+ os.system("clear" if os.name != "nt" else "cls")
515
+ with _Live(console=console, refresh_per_second=20, transient=True) as live:
516
+ # Border draws
517
+ for w in [2, 16, 32, 48]:
518
+ ln = [f" [{B}]┌{'─' * w}{'─' * (48 - w)}┐[/]"]
519
+ for _ in range(12): ln.append(f" [{B}]│[/]{' ' * 48}[{B}]│[/]")
520
+ ln.append(f" [{B}]└{'─' * w}{'─' * (48 - w)}┘[/]")
521
+ live.update(_mk_frame(*ln)); _ta.sleep(0.04)
522
+
523
+ # Logo reveals with cursor
524
+ for col in range(0, 48, 3):
525
+ live.update(_logo_frm(cols=col, scan=True)); _ta.sleep(0.04)
526
+
527
+ # Full logo + subtitle
528
+ live.update(_logo_frm(sub=f" [{B}]✦[/] [dim]Command-line interface[/] [bold {G}]✓ offline[/]"))
529
+ _ta.sleep(0.3)
530
+ except Exception:
531
+ pass
532
+
533
+ # Print static logo (stays visible — animation was transient)
534
+ os.system("clear" if os.name != "nt" else "cls")
535
+ console.print(f" [{B}]┌──────────────────────────────────────────────────┐[/]")
536
+ for lt in LOGO:
537
+ raw = lt[0]
538
+ c = raw.split(']')[0] + ']'
539
+ p = raw.replace('[/]', '').split(']')[-1] if ']' in raw else raw
540
+ pad = " " * max(0, 48 - len(p))
541
+ console.print(f" [{B}]│[/]{lt[0]}{pad}[{B}]│[/]")
542
+ console.print(f" [{B}]└──────────────────────────────────────────────────┘[/]")
543
+ console.print(f" [{B}]✦[/] [dim]Command-line interface[/] [bold {G}]✓ offline[/]")
544
+ console.print()
545
+
546
+ # Calculate model GPU usage
547
+ model_mb = 0
548
+ if srv["running"]:
549
+ mi = _detect_model_info(srv, cfg.get("model_id"))
550
+ model_mb = int((mi.get("size_gb") or 0) * 1024)
551
+ if model_mb == 0:
552
+ mi_fallback = MODELS.get(cfg.get("model_id", ""), {})
553
+ model_mb = int(mi_fallback.get("size_gb", 0) * 1024)
554
+ kv_mb = diag.get("kv_cache_est_mb", 0)
555
+ used_mb = model_mb + kv_mb
556
+ free_mb = max(0, gpu_total - used_mb)
557
+ model_fits = used_mb < gpu_total
558
+
559
+ if model_fits and swap_mb < 2000:
560
+ border_color, verdict = "green", "READY"
561
+ elif model_fits:
562
+ border_color, verdict = "yellow", "READY"
563
+ else:
564
+ border_color, verdict = "red", "SLOW"
565
+
566
+ # Dashboard table inside a panel (logo stays above)
567
+ t = _Table(show_header=False, show_edge=False, box=None, padding=0, expand=False)
568
+ t.add_column(width=9, style="bold dim")
569
+ t.add_column(width=34)
570
+ t.add_column(width=30)
571
+
572
+ # GPU bar
573
+ gpu_pct = min(1.0, used_mb / max(1, gpu_total))
574
+ gw = 30
575
+ gf = int(gpu_pct * gw)
576
+ gc = "green" if gpu_pct < 0.75 else "yellow" if gpu_pct < 0.9 else "red"
577
+ t.add_row(
578
+ "GPU",
579
+ f"[{gc}]{'━' * gf}[/{gc}][dim]{'─' * (gw - gf)}[/]",
580
+ f"[{gc}]{used_mb // 1024}/{gpu_total // 1024}GB[/{gc}] {free_mb // 1024}GB free",
581
+ )
582
+
583
+ # Swap bar
584
+ sp = min(1.0, swap_mb / 8192)
585
+ sf = int(sp * gw)
586
+ sc = "green" if swap_mb < 1000 else "yellow" if swap_mb < 4000 else "red"
587
+ t.add_row(
588
+ "Swap",
589
+ f"[{sc}]{'━' * sf}[/{sc}][dim]{'─' * (gw - sf)}[/]",
590
+ f"[{sc}]{swap_mb // 1024}GB[/{sc}]" + (" [dim]close apps to fix[/]" if swap_mb > 2000 else ""),
591
+ )
592
+ # Disk bar
593
+ try:
594
+ from localcoder.backends import get_disk_info
595
+ di = get_disk_info()
596
+ dtot = max(1, di["disk_total_gb"])
597
+ dfree = di["disk_free_gb"]
598
+ dused = dtot - dfree
599
+ dpct = min(1.0, dused / dtot)
600
+ dfl = int(dpct * gw)
601
+ dc = "green" if dfree > 50 else "yellow" if dfree > 20 else "red"
602
+ cache_info = f" [dim]cache {di['hf_cache_gb']}GB[/]" if di["hf_cache_gb"] > 0 else ""
603
+ t.add_row(
604
+ "Disk",
605
+ f"[{dc}]{'━' * dfl}[/{dc}][dim]{'─' * (gw - dfl)}[/]",
606
+ f"[{dc}]{dfree}GB free[/{dc}]{cache_info}",
607
+ )
608
+ except Exception:
609
+ pass
610
+ t.add_row("", "", "")
611
+
612
+ # Model
613
+ if srv["running"]:
614
+ mi2 = _detect_model_info(srv, cfg.get("model_id"))
615
+ mn = mi2["name"] or "?"
616
+ mq = f" {mi2['quant']}" if mi2.get("quant") else ""
617
+ ms = f" {mi2['size_gb']}GB" if mi2.get("size_gb") else ""
618
+ gb = "[green]● GPU[/]" if srv["ngl"] >= 90 else "[red]● CPU[/]"
619
+ t.add_row("Model", f"[cyan]{mn}{mq}{ms}[/]", f"{gb} ctx {srv['n_ctx'] // 1024}K")
620
+ else:
621
+ t.add_row("Model", "[dim]not running[/]", "[dim]will auto-start[/]")
622
+
623
+ # Machine
624
+ t.add_row("Machine", f"[dim]{specs['chip']}[/]", f"[dim]{specs['ram_gb']}GB {specs.get('gpu_cores', '?')} GPU cores[/]")
625
+
626
+ console.print(Panel(
627
+ t,
628
+ title=f"[bold #e07a5f] localcoder [/]",
629
+ subtitle=f"[bold {border_color}] {verdict} [/]",
630
+ border_style=border_color,
631
+ padding=(1, 2),
632
+ width=80,
633
+ ))
634
+
635
+ # One verdict line
636
+ if model_fits and swap_mb < 2000:
637
+ console.print(f" [green]All good. Full speed.[/]")
638
+ elif model_fits:
639
+ console.print(f" [yellow]AI runs fine. Mac slow from other apps using RAM.[/]")
640
+ else:
641
+ console.print(f" [red]Model too big — ~5 tok/s instead of ~49. Try --simulate for alternatives.[/]")
642
+
643
+ # ── Trending models (fetched live) ──
644
+ try:
645
+ from localcoder.backends import fetch_unsloth_top_models, COMMUNITY_CODING_MODELS
646
+ from rich.markup import escape as _esc
647
+ from localcoder.backends import get_disk_info
648
+
649
+ # Detect what's already installed
650
+ di = get_disk_info()
651
+ installed_names = {m["name"].lower().replace(".gguf", "").replace("-", "").replace("_", "") for m in di.get("models", [])}
652
+
653
+ all_downloadable = []
654
+
655
+ # Show installed models first
656
+ if di.get("models"):
657
+ console.print(f"\n [dim]── Installed ({len(di['models'])} models, {sum(m['size_gb'] for m in di['models']):.0f}GB) ──[/]")
658
+ for m in di["models"][:5]:
659
+ name = _esc(m["name"].replace(".gguf", ""))
660
+ console.print(f" [green]✓[/] {name:<28} [dim]{m['size_gb']}GB[/]")
661
+
662
+ # Trending
663
+ console.print(f"\n [dim]── Trending (live from HuggingFace) ──[/]")
664
+ trending = fetch_unsloth_top_models(limit=5)
665
+ num = 1
666
+ for m in trending:
667
+ dl = m["downloads"]
668
+ dl_str = f"{dl // 1000}K" if dl < 1_000_000 else f"{dl / 1_000_000:.1f}M"
669
+ label = _esc(m["label"])
670
+ # Capability icons
671
+ caps = m.get("caps", [])
672
+ cap_str = ""
673
+ if "vision" in caps: cap_str += " [magenta]img[/]"
674
+ if "code" in caps: cap_str += " [cyan]code[/]"
675
+ if "MoE" in caps: cap_str += " [green]MoE[/]"
676
+ if "audio" in caps: cap_str += " [yellow]audio[/]"
677
+
678
+ # Fit check from estimated size
679
+ est = m.get("est_smallest_gb")
680
+ fit_tag = ""
681
+ if est:
682
+ if est * 1024 > gpu_total:
683
+ fit_tag = f" [red]~{est}GB min · won't fit[/]"
684
+ else:
685
+ fit_tag = f" [green]~{est}GB min · fits[/]"
686
+
687
+ # Check if already installed
688
+ base = m["label"].lower().replace("-", "").replace("_", "")
689
+ if any(base in inst for inst in installed_names):
690
+ console.print(f" [green]✓[/] {label:<22}{cap_str} [dim]{dl_str} dl installed[/]")
691
+ else:
692
+ console.print(f" [bold cyan]{num}[/] {label:<22}{cap_str} [dim]{dl_str} dl[/]{fit_tag}")
693
+ all_downloadable.append({"label": m["label"], "repo": m["repo_id"]})
694
+ num += 1
695
+
696
+ # Community favorites (most liked, deduplicated)
697
+ try:
698
+ from localcoder.backends import fetch_hf_trending_models as _fetch_trend
699
+ liked = _fetch_trend(limit=8, sort="likes")
700
+ trending_repos = {t["repo_id"] for t in trending}
701
+ liked = [l for l in liked if l["repo_id"] not in trending_repos][:4]
702
+ if liked:
703
+ console.print(f"\n [dim]── Most liked ──[/]")
704
+ for lm in liked:
705
+ label = _esc(lm["label"])
706
+ dl = lm["downloads"]
707
+ dl_str = f"{dl // 1000}K" if dl < 1_000_000 else f"{dl / 1_000_000:.1f}M"
708
+ caps = lm.get("caps", [])
709
+ cap_str = ""
710
+ if "vision" in caps: cap_str += " [magenta]img[/]"
711
+ if "code" in caps: cap_str += " [cyan]code[/]"
712
+ if "MoE" in caps: cap_str += " [green]MoE[/]"
713
+ est = lm.get("est_smallest_gb")
714
+ fit_tag = ""
715
+ if est:
716
+ if est * 1024 > gpu_total:
717
+ fit_tag = f" [red]~{est}GB min · won't fit[/]"
718
+ else:
719
+ fit_tag = f" [green]~{est}GB min · fits[/]"
720
+ base = lm["label"].lower().replace("-", "").replace("_", "")
721
+ if any(base in inst for inst in installed_names):
722
+ console.print(f" [green]✓[/] {label:<22}{cap_str} [dim]{dl_str} dl installed[/]")
723
+ else:
724
+ console.print(f" [bold cyan]{num}[/] {label:<22}{cap_str} [dim]{dl_str} dl[/]{fit_tag}")
725
+ all_downloadable.append({"label": lm["label"], "repo": lm["repo_id"]})
726
+ num += 1
727
+ except Exception:
728
+ pass
729
+
730
+ except Exception:
731
+ all_downloadable = []
732
+
733
+ console.print(f"\n [bold]enter[/] start coding")
734
+ console.print(f" [bold]1-{max(1, len(all_downloadable))}[/] try a model [dim](shows quants, downloads if needed)[/]")
735
+ console.print(f" [bold]c[/] cleanup GPU [bold]s[/] skip boot [bold]q[/] quit")
736
+
737
+ try:
738
+ ans = input(" > ").strip().lower()
739
+ except (EOFError, KeyboardInterrupt):
740
+ return
741
+
742
+ if ans == "q":
743
+ return
744
+ elif ans.isdigit() and all_downloadable:
745
+ idx = int(ans) - 1
746
+ if 0 <= idx < len(all_downloadable):
747
+ pick = all_downloadable[idx]
748
+ console.print(f"\n [bold]Checking {pick['label']}...[/]")
749
+
750
+ # Fetch real quants from HuggingFace
751
+ try:
752
+ from localcoder.backends import simulate_hf_model
753
+ simulate_hf_model(pick["repo"])
754
+ except Exception as e:
755
+ console.print(f" [red]Error: {e}[/]")
756
+
757
+ console.print(f"\n [dim]Press Enter to continue to coding...[/]")
758
+ try:
759
+ input(" ")
760
+ except (EOFError, KeyboardInterrupt):
761
+ pass
762
+ else:
763
+ console.print(f" [dim]Invalid number. Press Enter to start.[/]")
764
+ try:
765
+ input(" ")
766
+ except (EOFError, KeyboardInterrupt):
767
+ pass
768
+ elif ans == "c":
769
+ console.print("\n [bold]Freeing memory...[/] [dim](only unloading unused AI models — your apps are safe)[/]")
770
+ result = cleanup_gpu_memory(force=False)
771
+ if result["ollama_unloaded"]:
772
+ console.print(f" [green]✓[/] Unloaded: {', '.join(result['ollama_unloaded'])}")
773
+ else:
774
+ console.print(f" [dim]No unused models to unload.[/]")
775
+
776
+ # Show what user could close
777
+ big_hogs = [p for p in procs if p["category"] in ("app", "bloat") and p["mb"] >= 500]
778
+ if big_hogs:
779
+ console.print(f"\n [bold]Want more speed?[/] Close these apps when you don't need them:")
780
+ for p in big_hogs:
781
+ n = p["name"] + (f" ×{p['count']}" if p.get("count", 1) > 1 else "")
782
+ console.print(f" {n} [dim]({p['mb'] // 1024}GB)[/]")
783
+
784
+ console.print(f"\n [dim]Press Enter to start coding...[/]")
785
+ try:
786
+ input(" ")
787
+ except (EOFError, KeyboardInterrupt):
788
+ pass
789
+ elif ans == "s":
790
+ config_path = os.path.expanduser("~/.localcoder/config.json")
791
+ try:
792
+ with open(config_path) as f:
793
+ c = json.load(f)
794
+ c["skip_boot_health"] = True
795
+ with open(config_path, "w") as f:
796
+ json.dump(c, f, indent=2)
797
+ console.print(f" [dim]Got it. Run localcoder --health anytime to see this again.[/]")
798
+ import time as _t2
799
+ _t2.sleep(1)
800
+ except Exception:
801
+ pass
802
+
803
+ # Clear boot screen, start fresh
804
+ os.system("clear" if os.name != "nt" else "cls")
805
+
806
+ # ── Set env and run the agent ──
807
+ os.environ["GEMMA_API_BASE"] = api_base
808
+ os.environ["GEMMA_MODEL"] = model
809
+
810
+ # Import and run the original localcoder agent
811
+ # For now, exec the original script if it exists nearby
812
+ agent_script = os.path.join(os.path.dirname(__file__), "agent.py")
813
+ if os.path.exists(agent_script):
814
+ # Use the modular agent
815
+ from localcoder.agent import run_agent
816
+ run_agent(api_base, model, args)
817
+ else:
818
+ # Fallback: find the agent script
819
+ original = os.path.expanduser("~/Projects/gemma4-research/gemma4coder")
820
+ if os.path.exists(original):
821
+ os.execv(sys.executable, [sys.executable, original] + sys.argv[1:])
822
+ else:
823
+ console.print(" [red]Agent not found. Ensure localcoder agent.py is installed.[/]")
824
+
825
+
826
+ if __name__ == "__main__":
827
+ main()