omegon 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/.gitattributes +3 -0
  2. package/AGENTS.md +16 -0
  3. package/LICENSE +15 -0
  4. package/README.md +289 -0
  5. package/bin/pi.mjs +30 -0
  6. package/extensions/00-secrets/index.ts +1126 -0
  7. package/extensions/01-auth/auth.ts +401 -0
  8. package/extensions/01-auth/index.ts +289 -0
  9. package/extensions/auto-compact.ts +42 -0
  10. package/extensions/bootstrap/deps.ts +291 -0
  11. package/extensions/bootstrap/index.ts +811 -0
  12. package/extensions/chronos/chronos.sh +487 -0
  13. package/extensions/chronos/index.ts +148 -0
  14. package/extensions/cleave/assessment.ts +754 -0
  15. package/extensions/cleave/bridge.ts +31 -0
  16. package/extensions/cleave/conflicts.ts +250 -0
  17. package/extensions/cleave/dispatcher.ts +808 -0
  18. package/extensions/cleave/guardrails.ts +426 -0
  19. package/extensions/cleave/index.ts +3121 -0
  20. package/extensions/cleave/lifecycle-emitter.ts +20 -0
  21. package/extensions/cleave/openspec.ts +811 -0
  22. package/extensions/cleave/planner.ts +260 -0
  23. package/extensions/cleave/review.ts +579 -0
  24. package/extensions/cleave/skills.ts +355 -0
  25. package/extensions/cleave/types.ts +261 -0
  26. package/extensions/cleave/workspace.ts +861 -0
  27. package/extensions/cleave/worktree.ts +243 -0
  28. package/extensions/core-renderers.ts +253 -0
  29. package/extensions/dashboard/context-gauge.ts +58 -0
  30. package/extensions/dashboard/file-watch.ts +14 -0
  31. package/extensions/dashboard/footer.ts +1145 -0
  32. package/extensions/dashboard/git.ts +185 -0
  33. package/extensions/dashboard/index.ts +478 -0
  34. package/extensions/dashboard/memory-audit.ts +34 -0
  35. package/extensions/dashboard/overlay-data.ts +705 -0
  36. package/extensions/dashboard/overlay.ts +365 -0
  37. package/extensions/dashboard/render-utils.ts +54 -0
  38. package/extensions/dashboard/types.ts +191 -0
  39. package/extensions/dashboard/uri-helper.ts +45 -0
  40. package/extensions/debug.ts +69 -0
  41. package/extensions/defaults.ts +282 -0
  42. package/extensions/design-tree/dashboard-state.ts +161 -0
  43. package/extensions/design-tree/design-card.ts +362 -0
  44. package/extensions/design-tree/index.ts +2130 -0
  45. package/extensions/design-tree/lifecycle-emitter.ts +41 -0
  46. package/extensions/design-tree/tree.ts +1607 -0
  47. package/extensions/design-tree/types.ts +163 -0
  48. package/extensions/distill.ts +127 -0
  49. package/extensions/effort/index.ts +395 -0
  50. package/extensions/effort/tiers.ts +146 -0
  51. package/extensions/effort/types.ts +105 -0
  52. package/extensions/lib/git-state.ts +227 -0
  53. package/extensions/lib/local-models.ts +157 -0
  54. package/extensions/lib/model-preferences.ts +51 -0
  55. package/extensions/lib/model-routing.ts +720 -0
  56. package/extensions/lib/operator-fallback.ts +205 -0
  57. package/extensions/lib/operator-profile.ts +360 -0
  58. package/extensions/lib/slash-command-bridge.ts +253 -0
  59. package/extensions/lib/typebox-helpers.ts +16 -0
  60. package/extensions/local-inference/index.ts +727 -0
  61. package/extensions/mcp-bridge/README.md +220 -0
  62. package/extensions/mcp-bridge/index.ts +951 -0
  63. package/extensions/mcp-bridge/lib.ts +365 -0
  64. package/extensions/mcp-bridge/mcp.json +3 -0
  65. package/extensions/mcp-bridge/package.json +11 -0
  66. package/extensions/model-budget.ts +752 -0
  67. package/extensions/offline-driver.ts +403 -0
  68. package/extensions/openspec/archive-gate.ts +164 -0
  69. package/extensions/openspec/branch-cleanup.ts +64 -0
  70. package/extensions/openspec/dashboard-state.ts +50 -0
  71. package/extensions/openspec/index.ts +1917 -0
  72. package/extensions/openspec/lifecycle-emitter.ts +65 -0
  73. package/extensions/openspec/lifecycle-files.ts +70 -0
  74. package/extensions/openspec/lifecycle.ts +50 -0
  75. package/extensions/openspec/reconcile.ts +187 -0
  76. package/extensions/openspec/spec.ts +1385 -0
  77. package/extensions/openspec/types.ts +98 -0
  78. package/extensions/project-memory/DESIGN-global-mind.md +198 -0
  79. package/extensions/project-memory/README.md +202 -0
  80. package/extensions/project-memory/api-types.ts +382 -0
  81. package/extensions/project-memory/compaction-policy.ts +29 -0
  82. package/extensions/project-memory/core.ts +164 -0
  83. package/extensions/project-memory/embeddings.ts +230 -0
  84. package/extensions/project-memory/extraction-v2.ts +861 -0
  85. package/extensions/project-memory/factstore.ts +2177 -0
  86. package/extensions/project-memory/index.ts +3459 -0
  87. package/extensions/project-memory/injection-metrics.ts +91 -0
  88. package/extensions/project-memory/jsonl-io.ts +12 -0
  89. package/extensions/project-memory/lifecycle.ts +331 -0
  90. package/extensions/project-memory/migration.ts +293 -0
  91. package/extensions/project-memory/package.json +9 -0
  92. package/extensions/project-memory/sci-renderers.ts +7 -0
  93. package/extensions/project-memory/template.ts +103 -0
  94. package/extensions/project-memory/triggers.ts +52 -0
  95. package/extensions/project-memory/types.ts +102 -0
  96. package/extensions/render/composition/fonts/Inter-Bold.ttf +0 -0
  97. package/extensions/render/composition/fonts/Inter-Regular.ttf +0 -0
  98. package/extensions/render/composition/fonts/Tomorrow-Bold.ttf +0 -0
  99. package/extensions/render/composition/fonts/Tomorrow-Regular.ttf +0 -0
  100. package/extensions/render/composition/package-lock.json +534 -0
  101. package/extensions/render/composition/package.json +22 -0
  102. package/extensions/render/composition/render.mjs +246 -0
  103. package/extensions/render/composition/test-comp.tsx +87 -0
  104. package/extensions/render/composition/types.ts +24 -0
  105. package/extensions/render/excalidraw/UPSTREAM.md +81 -0
  106. package/extensions/render/excalidraw/elements.ts +764 -0
  107. package/extensions/render/excalidraw/index.ts +66 -0
  108. package/extensions/render/excalidraw/types.ts +223 -0
  109. package/extensions/render/excalidraw-renderer/pyproject.toml +8 -0
  110. package/extensions/render/excalidraw-renderer/render_excalidraw.py +182 -0
  111. package/extensions/render/excalidraw-renderer/render_template.html +59 -0
  112. package/extensions/render/index.ts +830 -0
  113. package/extensions/render/native-diagrams/index.ts +57 -0
  114. package/extensions/render/native-diagrams/motifs.ts +542 -0
  115. package/extensions/render/native-diagrams/raster.ts +8 -0
  116. package/extensions/render/native-diagrams/scene.ts +75 -0
  117. package/extensions/render/native-diagrams/spec.ts +204 -0
  118. package/extensions/render/native-diagrams/svg.ts +116 -0
  119. package/extensions/sci-ui.ts +304 -0
  120. package/extensions/session-log.ts +174 -0
  121. package/extensions/shared-state.ts +146 -0
  122. package/extensions/spinner-verbs.ts +91 -0
  123. package/extensions/style.ts +281 -0
  124. package/extensions/terminal-title.ts +191 -0
  125. package/extensions/tool-profile/index.ts +291 -0
  126. package/extensions/tool-profile/profiles.ts +290 -0
  127. package/extensions/types.d.ts +9 -0
  128. package/extensions/vault/index.ts +185 -0
  129. package/extensions/version-check.ts +90 -0
  130. package/extensions/view/index.ts +859 -0
  131. package/extensions/view/uri-resolver.ts +148 -0
  132. package/extensions/web-search/index.ts +182 -0
  133. package/extensions/web-search/providers.ts +121 -0
  134. package/extensions/web-ui/index.ts +110 -0
  135. package/extensions/web-ui/server.ts +265 -0
  136. package/extensions/web-ui/state.ts +462 -0
  137. package/extensions/web-ui/static/index.html +145 -0
  138. package/extensions/web-ui/types.ts +284 -0
  139. package/package.json +76 -0
  140. package/prompts/init.md +75 -0
  141. package/prompts/new-repo.md +54 -0
  142. package/prompts/oci-login.md +56 -0
  143. package/prompts/status.md +50 -0
  144. package/settings.json +4 -0
  145. package/skills/cleave/SKILL.md +218 -0
  146. package/skills/git/SKILL.md +209 -0
  147. package/skills/git/_reference/ci-validation.md +204 -0
  148. package/skills/oci/SKILL.md +338 -0
  149. package/skills/openspec/SKILL.md +346 -0
  150. package/skills/pi-extensions/SKILL.md +191 -0
  151. package/skills/pi-tui/SKILL.md +517 -0
  152. package/skills/python/SKILL.md +189 -0
  153. package/skills/rust/SKILL.md +268 -0
  154. package/skills/security/SKILL.md +206 -0
  155. package/skills/style/SKILL.md +264 -0
  156. package/skills/typescript/SKILL.md +225 -0
  157. package/skills/vault/SKILL.md +102 -0
  158. package/themes/alpharius-legacy.json +85 -0
  159. package/themes/alpharius.conf +59 -0
  160. package/themes/alpharius.json +88 -0
@@ -0,0 +1,727 @@
1
+ // @config LOCAL_INFERENCE_URL "Ollama / OpenAI-compatible inference server URL" [default: http://localhost:11434]
2
+
3
+ /**
4
+ * local-inference — Delegate sub-tasks to locally running LLM inference servers
5
+ *
6
+ * Registers an `ask_local_model` tool that the driving agent (Claude) can call to
7
+ * delegate specific sub-tasks to local models running via Ollama or any
8
+ * OpenAI-compatible local server. Zero API cost for delegated work.
9
+ *
10
+ * Use cases:
11
+ * - Boilerplate/template generation
12
+ * - File summarization
13
+ * - Code transforms (formatting, conversion)
14
+ * - Draft generation for review by the driving agent
15
+ * - Embeddings generation
16
+ *
17
+ * Architecture:
18
+ * This is Option C (tool-callable sub-agent): the driving agent stays Claude
19
+ * with reliable tool use and reasoning, but can offload cheap work to local models.
20
+ * The abstraction layer supports any OpenAI-compatible backend. Default: Ollama.
21
+ */
22
+
23
+ import { execSync, spawn, type ChildProcess } from "node:child_process";
24
+ import type { ExtensionAPI } from "@cwilson613/pi-coding-agent";
25
+ import { Type } from "@sinclair/typebox";
26
+ import { StringEnum } from "../lib/typebox-helpers";
27
+
28
+ const DEFAULT_URL = "http://localhost:11434";
29
+
30
+ interface LocalModel {
31
+ id: string;
32
+ object: string;
33
+ owned_by: string;
34
+ }
35
+
36
+ interface ChatMessage {
37
+ role: "system" | "user" | "assistant";
38
+ content: string;
39
+ }
40
+
41
+ interface ChatResponse {
42
+ id: string;
43
+ choices: Array<{
44
+ message: {
45
+ role: string;
46
+ content: string;
47
+ reasoning?: string;
48
+ };
49
+ finish_reason: string;
50
+ }>;
51
+ usage: {
52
+ prompt_tokens: number;
53
+ completion_tokens: number;
54
+ total_tokens: number;
55
+ };
56
+ }
57
+
58
+ function getBaseUrl(): string {
59
+ return process.env.LOCAL_INFERENCE_URL || DEFAULT_URL;
60
+ }
61
+
62
+ async function discoverModels(baseUrl: string): Promise<LocalModel[]> {
63
+ try {
64
+ const resp = await fetch(`${baseUrl}/v1/models`, {
65
+ signal: AbortSignal.timeout(3000),
66
+ });
67
+ if (!resp.ok) return [];
68
+ const data = await resp.json();
69
+ return (data.data || []).filter(
70
+ (m: LocalModel) => !m.id.includes("embed") // exclude embedding models from chat
71
+ );
72
+ } catch {
73
+ return [];
74
+ }
75
+ }
76
+
77
+ async function listAllModels(baseUrl: string): Promise<LocalModel[]> {
78
+ try {
79
+ const resp = await fetch(`${baseUrl}/v1/models`, {
80
+ signal: AbortSignal.timeout(3000),
81
+ });
82
+ if (!resp.ok) return [];
83
+ const data = await resp.json();
84
+ return data.data || [];
85
+ } catch {
86
+ return [];
87
+ }
88
+ }
89
+
90
+ function stripThinkTokens(text: string): string {
91
+ // Clean up leaked thinking tokens from various model families
92
+ return text
93
+ .replace(/<think>[\s\S]*?<\/think>\s*/g, "") // <think>...</think>
94
+ .replace(/<\|begin_of_box\|>/g, "") // GLM box tokens
95
+ .replace(/<\|end_of_box\|>/g, "")
96
+ .trim();
97
+ }
98
+
99
+ async function chatCompletionStreaming(
100
+ baseUrl: string,
101
+ model: string,
102
+ messages: ChatMessage[],
103
+ opts: {
104
+ maxTokens?: number;
105
+ temperature?: number;
106
+ signal?: AbortSignal;
107
+ onToken?: (accumulated: string) => void;
108
+ }
109
+ ): Promise<{ content: string; reasoning?: string; usage: ChatResponse["usage"] }> {
110
+ const resp = await fetch(`${baseUrl}/v1/chat/completions`, {
111
+ method: "POST",
112
+ headers: { "Content-Type": "application/json" },
113
+ body: JSON.stringify({
114
+ model,
115
+ messages,
116
+ max_tokens: opts.maxTokens || 2048,
117
+ temperature: opts.temperature ?? 0.3,
118
+ stream: true,
119
+ }),
120
+ signal: opts.signal,
121
+ });
122
+
123
+ if (!resp.ok) {
124
+ const body = await resp.text().catch(() => "");
125
+ throw new Error(`Local inference failed (${resp.status}): ${body}`);
126
+ }
127
+
128
+ if (!resp.body) throw new Error("No response body from local model");
129
+
130
+ let accumulated = "";
131
+ let reasoning = "";
132
+ let usage: ChatResponse["usage"] = { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 };
133
+
134
+ const reader = resp.body.getReader();
135
+ const decoder = new TextDecoder();
136
+ let buffer = "";
137
+
138
+ while (true) {
139
+ const { done, value } = await reader.read();
140
+ if (done) break;
141
+
142
+ buffer += decoder.decode(value, { stream: true });
143
+ const lines = buffer.split("\n");
144
+ buffer = lines.pop() || "";
145
+
146
+ for (const line of lines) {
147
+ const trimmed = line.trim();
148
+ if (!trimmed || !trimmed.startsWith("data: ")) continue;
149
+ const payload = trimmed.slice(6);
150
+ if (payload === "[DONE]") continue;
151
+
152
+ try {
153
+ const chunk = JSON.parse(payload);
154
+ const delta = chunk.choices?.[0]?.delta;
155
+ if (delta?.content) {
156
+ accumulated += delta.content;
157
+ opts.onToken?.(accumulated);
158
+ }
159
+ if (delta?.reasoning) {
160
+ reasoning += delta.reasoning;
161
+ }
162
+ // Ollama sends usage in the final chunk
163
+ if (chunk.usage) {
164
+ usage = chunk.usage;
165
+ }
166
+ } catch {
167
+ // skip malformed chunks
168
+ }
169
+ }
170
+ }
171
+
172
+ return {
173
+ content: stripThinkTokens(accumulated),
174
+ reasoning: reasoning || undefined,
175
+ usage,
176
+ };
177
+ }
178
+
179
+ export default function (pi: ExtensionAPI) {
180
+ // Track available models (refreshed on session start and via command)
181
+ let cachedModels: LocalModel[] = [];
182
+ let serverOnline = false;
183
+
184
+ async function refreshModels() {
185
+ const baseUrl = getBaseUrl();
186
+ cachedModels = await discoverModels(baseUrl);
187
+ serverOnline = cachedModels.length > 0 || (await listAllModels(baseUrl)).length > 0;
188
+ return cachedModels;
189
+ }
190
+
191
+ // --- Ollama lifecycle management ---
192
+
193
+ let ollamaChild: ChildProcess | null = null;
194
+ /** True when THIS session started Ollama via `brew services start`. */
195
+ let brewServicesManaged = false;
196
+ let ollamaBinaryAvailable: boolean | null = null; // cached after first check
197
+
198
+ function hasOllama(): boolean {
199
+ if (ollamaBinaryAvailable !== null) return ollamaBinaryAvailable;
200
+ try {
201
+ execSync("which ollama", { stdio: "ignore" });
202
+ ollamaBinaryAvailable = true;
203
+ } catch {
204
+ ollamaBinaryAvailable = false;
205
+ }
206
+ return ollamaBinaryAvailable;
207
+ }
208
+
209
+ async function isOllamaReachable(): Promise<boolean> {
210
+ try {
211
+ const resp = await fetch(`${getBaseUrl()}/api/tags`, {
212
+ signal: AbortSignal.timeout(2000),
213
+ });
214
+ return resp.ok;
215
+ } catch {
216
+ return false;
217
+ }
218
+ }
219
+
220
+ /** Try brew services first (persists across reboots), fall back to ollama serve */
221
+ function startOllamaProcess(): { method: string } {
222
+ if (process.platform === "darwin") {
223
+ try {
224
+ execSync("brew services start ollama", { stdio: "ignore", timeout: 10_000 });
225
+ brewServicesManaged = true;
226
+ return { method: "brew services" };
227
+ } catch {
228
+ // fall through to manual serve
229
+ }
230
+ }
231
+
232
+ const child = spawn("ollama", ["serve"], {
233
+ stdio: "ignore",
234
+ detached: true,
235
+ });
236
+ child.unref();
237
+ ollamaChild = child;
238
+
239
+ child.on("exit", () => {
240
+ if (ollamaChild === child) ollamaChild = null;
241
+ });
242
+
243
+ return { method: "ollama serve (background)" };
244
+ }
245
+
246
+ function stopOllama(): string {
247
+ // Only attempt brew services stop if WE started via brew services (W2: flag-gated, not platform-gated).
248
+ if (brewServicesManaged) {
249
+ try {
250
+ execSync("brew services stop ollama", { stdio: "ignore", timeout: 10_000 });
251
+ brewServicesManaged = false;
252
+ serverOnline = false;
253
+ cachedModels = [];
254
+ return "Stopped Ollama (brew services).";
255
+ } catch { /* fall through */ }
256
+ }
257
+
258
+ if (ollamaChild) {
259
+ ollamaChild.kill("SIGTERM");
260
+ ollamaChild = null;
261
+ serverOnline = false;
262
+ cachedModels = [];
263
+ return "Stopped Ollama background process.";
264
+ }
265
+
266
+ // No managed child — do NOT use broad pkill to avoid terminating unrelated processes.
267
+ return "No managed Ollama server is running. If you started Ollama externally, stop it manually.";
268
+ }
269
+
270
+ async function waitForOllama(maxSeconds: number): Promise<boolean> {
271
+ for (let i = 0; i < maxSeconds; i++) {
272
+ await new Promise((r) => setTimeout(r, 1000));
273
+ if (await isOllamaReachable()) return true;
274
+ }
275
+ return false;
276
+ }
277
+
278
+ function pullModel(modelName: string, signal?: AbortSignal): Promise<{ success: boolean; output: string }> {
279
+ return new Promise((resolve) => {
280
+ const child = spawn("ollama", ["pull", modelName], { stdio: "pipe" });
281
+ let output = "";
282
+ child.stdout?.on("data", (d: Buffer) => { output += d.toString(); });
283
+ child.stderr?.on("data", (d: Buffer) => { output += d.toString(); });
284
+ child.on("exit", (code) => {
285
+ resolve({ success: code === 0, output: output.slice(-200) });
286
+ });
287
+ child.on("error", (err) => {
288
+ resolve({ success: false, output: err.message });
289
+ });
290
+ signal?.addEventListener("abort", () => { child.kill("SIGTERM"); });
291
+ });
292
+ }
293
+
294
+ // Check server + auto-start ollama on session start
295
+ pi.on("session_start", async (_event, ctx) => {
296
+ await refreshModels();
297
+
298
+ if (serverOnline) return;
299
+
300
+ // Auto-start if binary exists, server is down, and no custom URL configured
301
+ if (!hasOllama() || process.env.LOCAL_INFERENCE_URL) return;
302
+
303
+ if (ctx.hasUI) {
304
+ ctx.ui.notify("Ollama installed but not running — starting...", "info");
305
+ }
306
+
307
+ startOllamaProcess();
308
+
309
+ if (await waitForOllama(10)) {
310
+ await refreshModels();
311
+ if (ctx.hasUI) {
312
+ ctx.ui.notify(`Ollama started — ${cachedModels.length} chat models available`, "info");
313
+ }
314
+ }
315
+ });
316
+
317
+ // Clean up spawned ollama child on session end
318
+ pi.on("session_shutdown", () => {
319
+ // Only kill if WE spawned it (not brew services)
320
+ if (ollamaChild) {
321
+ ollamaChild.kill("SIGTERM");
322
+ ollamaChild = null;
323
+ }
324
+ });
325
+
326
+ // Main delegation tool
327
+ pi.registerTool({
328
+ name: "ask_local_model",
329
+ label: "Ask Local Model",
330
+ description:
331
+ "Delegate a sub-task to a locally running LLM (zero API cost). " +
332
+ "The local model runs on-device via Ollama. Use for:\n" +
333
+ "- Boilerplate/template generation\n" +
334
+ "- File summarization or content transforms\n" +
335
+ "- Code formatting, conversion, or simple generation\n" +
336
+ "- Drafting text for your review\n" +
337
+ "- Any task where perfect accuracy isn't critical\n\n" +
338
+ "You receive the local model's response and can review, edit, or use it. " +
339
+ "The local model has NO access to tools, files, or conversation context — " +
340
+ "you must include all necessary context in the prompt.",
341
+ promptSnippet: "Delegate sub-tasks to local LLM via Ollama (zero API cost, on-device)",
342
+ promptGuidelines: [
343
+ "Include ALL necessary context in the prompt — the local model cannot see conversation history or access tools",
344
+ "Use for boilerplate generation, file summarization, code transforms, and drafting text for review",
345
+ ],
346
+ parameters: Type.Object({
347
+ prompt: Type.String({
348
+ description: "Complete prompt for the local model. Include ALL necessary context — the local model cannot see our conversation or access any tools.",
349
+ }),
350
+ system: Type.Optional(
351
+ Type.String({
352
+ description: "Optional system prompt to set the local model's behavior (e.g., 'You are a Python expert. Output only code, no explanations.')",
353
+ })
354
+ ),
355
+ model: Type.Optional(
356
+ Type.String({
357
+ description: "Specific model ID to use. Omit to auto-select the best available model.",
358
+ })
359
+ ),
360
+ max_tokens: Type.Optional(
361
+ Type.Number({
362
+ description: "Maximum response tokens (default: 2048)",
363
+ })
364
+ ),
365
+ temperature: Type.Optional(
366
+ Type.Number({
367
+ description: "Sampling temperature 0.0-1.0 (default: 0.3, lower = more deterministic)",
368
+ })
369
+ ),
370
+ }),
371
+ execute: async (
372
+ _toolCallId,
373
+ params,
374
+ signal,
375
+ onUpdate,
376
+ ctx
377
+ ) => {
378
+ const baseUrl = getBaseUrl();
379
+
380
+ // Refresh models if cache is empty
381
+ if (cachedModels.length === 0) await refreshModels();
382
+
383
+ if (!serverOnline) {
384
+ return {
385
+ content: [
386
+ {
387
+ type: "text" as const,
388
+ text: `Local inference server not available at ${baseUrl}. Is Ollama running? Start with: ollama serve`,
389
+ },
390
+ ],
391
+ details: undefined,
392
+ };
393
+ }
394
+
395
+ // Model selection: explicit > auto (prefer largest/most capable)
396
+ let modelId = params.model;
397
+ if (!modelId) {
398
+ // Prefer models roughly by capability heuristic (larger/newer = higher score)
399
+ const ranked = [...cachedModels].sort((a, b) => {
400
+ const score = (id: string) => {
401
+ if (id.includes("nemotron")) return 110;
402
+ if (id.includes("qwen3")) return 100;
403
+ if (id.includes("devstral")) return 95;
404
+ if (id.includes("qwen2.5")) return 80;
405
+ if (id.includes("qwen")) return 75;
406
+ if (id.includes("llama")) return 60;
407
+ if (id.includes("mistral")) return 50;
408
+ if (id.includes("gemma")) return 45;
409
+ return 30;
410
+ };
411
+ return score(b.id) - score(a.id);
412
+ });
413
+ modelId = ranked[0]?.id;
414
+ }
415
+
416
+ if (!modelId) {
417
+ return {
418
+ content: [
419
+ {
420
+ type: "text" as const,
421
+ text: "No chat models available in Ollama. Pull a model with: ollama pull nemotron-3-nano:30b",
422
+ },
423
+ ],
424
+ details: undefined,
425
+ };
426
+ }
427
+
428
+ const messages: ChatMessage[] = [];
429
+ if (params.system) {
430
+ messages.push({ role: "system", content: params.system });
431
+ }
432
+ messages.push({ role: "user", content: params.prompt });
433
+
434
+ try {
435
+ const result = await chatCompletionStreaming(baseUrl, modelId, messages, {
436
+ maxTokens: params.max_tokens,
437
+ temperature: params.temperature,
438
+ signal: signal,
439
+ onToken: (accumulated) => {
440
+ onUpdate?.({
441
+ content: [
442
+ {
443
+ type: "text" as const,
444
+ text: `**Local model:** ${modelId} *(streaming...)*\n\n---\n\n${stripThinkTokens(accumulated)}`,
445
+ },
446
+ ],
447
+ details: undefined,
448
+ });
449
+ },
450
+ });
451
+
452
+ const parts: Array<{ type: "text"; text: string }> = [];
453
+ parts.push({
454
+ type: "text" as const,
455
+ text: `**Local model:** ${modelId}\n**Tokens:** ${result.usage.prompt_tokens} in → ${result.usage.completion_tokens} out\n\n---\n\n${result.content}`,
456
+ });
457
+
458
+ if (result.reasoning) {
459
+ parts.push({
460
+ type: "text" as const,
461
+ text: `\n\n---\n**Model reasoning:** ${result.reasoning}`,
462
+ });
463
+ }
464
+
465
+ return { content: parts, details: undefined };
466
+ } catch (err: any) {
467
+ return {
468
+ content: [
469
+ {
470
+ type: "text" as const,
471
+ text: `Local inference error (${modelId}): ${err.message}`,
472
+ },
473
+ ],
474
+ details: undefined,
475
+ };
476
+ }
477
+ },
478
+ });
479
+
480
+ // List available local models
481
+ pi.registerTool({
482
+ name: "list_local_models",
483
+ label: "List Local Models",
484
+ description:
485
+ "List all models currently available in the local inference server (Ollama). " +
486
+ "Use to check what's loaded before delegating work.",
487
+ promptSnippet: "List available Ollama models before delegating work",
488
+ parameters: Type.Object({}),
489
+ execute: async (_toolCallId, _params, _signal, _onUpdate, ctx) => {
490
+ const baseUrl = getBaseUrl();
491
+ const all = await listAllModels(baseUrl);
492
+
493
+ if (all.length === 0) {
494
+ return {
495
+ content: [
496
+ {
497
+ type: "text" as const,
498
+ text: `No models available at ${baseUrl}. Is Ollama running? Start with: ollama serve`,
499
+ },
500
+ ],
501
+ details: undefined,
502
+ };
503
+ }
504
+
505
+ const lines = all.map((m) => {
506
+ const isEmbed = m.id.includes("embed");
507
+ return `- \`${m.id}\` ${isEmbed ? "(embeddings)" : "(chat)"}`;
508
+ });
509
+
510
+ return {
511
+ content: [
512
+ {
513
+ type: "text" as const,
514
+ text: `**Local models at ${baseUrl}:**\n${lines.join("\n")}`,
515
+ },
516
+ ],
517
+ details: undefined,
518
+ };
519
+ },
520
+ });
521
+
522
+ // --- Ollama management tool (agent-callable) ---
523
+
524
+ function toolResult(msg: string) {
525
+ return { content: [{ type: "text" as const, text: msg }], details: undefined };
526
+ }
527
+
528
+ function modelCount(models: LocalModel[]): string {
529
+ return `${models.length} chat model${models.length !== 1 ? "s" : ""}`;
530
+ }
531
+
532
+ async function ollamaStatus(): Promise<string> {
533
+ const reachable = await isOllamaReachable();
534
+ if (!reachable) return `Ollama is not running at ${getBaseUrl()}.`;
535
+
536
+ const all = await listAllModels(getBaseUrl());
537
+ const chat = all.filter((m) => !m.id.includes("embed"));
538
+ const embed = all.filter((m) => m.id.includes("embed"));
539
+ let msg = `Ollama running at ${getBaseUrl()}\n`;
540
+ if (chat.length > 0) msg += `Chat models: ${chat.map((m) => m.id).join(", ")}\n`;
541
+ if (embed.length > 0) msg += `Embedding models: ${embed.map((m) => m.id).join(", ")}\n`;
542
+ if (all.length === 0) msg += "No models installed.";
543
+ return msg;
544
+ }
545
+
546
+ async function ollamaStart(): Promise<string> {
547
+ if (await isOllamaReachable()) {
548
+ const models = await refreshModels();
549
+ return `Ollama is already running at ${getBaseUrl()} — ${modelCount(models)} available.`;
550
+ }
551
+
552
+ // W4: if brew services already launched it this session, don't spawn a second process.
553
+ // (Ollama may still be binding; a second start would create competing processes.)
554
+ if (brewServicesManaged) {
555
+ return "Ollama was started via brew services and may still be initializing. Check status in a moment.";
556
+ }
557
+
558
+ startOllamaProcess();
559
+
560
+ if (await waitForOllama(15)) {
561
+ const models = await refreshModels();
562
+ const suffix = models.length === 0
563
+ ? " No models installed yet — pull one with `ollama pull qwen3:30b`."
564
+ : "";
565
+ return `Ollama started successfully — ${modelCount(models)} available.${suffix}`;
566
+ }
567
+ return "Ollama process started but server not responding after 15s. It may still be loading.";
568
+ }
569
+
570
+ pi.registerTool({
571
+ name: "manage_ollama",
572
+ label: "Manage Ollama",
573
+ description:
574
+ "Manage the Ollama local inference server: start, stop, check status, or pull models. " +
575
+ "Use 'start' when local models are needed but Ollama isn't running. " +
576
+ "Use 'pull' to download a model before delegating work to it. " +
577
+ "Use 'status' to check what's available. Use 'stop' to free GPU memory.",
578
+ promptSnippet: "Start/stop Ollama, pull models, check status",
579
+ promptGuidelines: [
580
+ "Call with action 'start' if ask_local_model or list_local_models reports Ollama is not running",
581
+ "Call with action 'pull' and a model name to download models (e.g., 'qwen3:30b', 'devstral-small:24b')",
582
+ "Call with action 'status' to check if Ollama is running and what models are available",
583
+ "Call with action 'stop' when done with local inference to free GPU/memory",
584
+ ],
585
+ parameters: Type.Object({
586
+ action: StringEnum(["start", "stop", "status", "pull"], {
587
+ description: "Action to perform",
588
+ }),
589
+ model: Type.Optional(
590
+ Type.String({
591
+ description: "Model name for 'pull' action (e.g., 'qwen3:30b', 'devstral-small:24b', 'qwen3-embedding')",
592
+ })
593
+ ),
594
+ }),
595
+ execute: async (_toolCallId, params, signal, _onUpdate, _ctx) => {
596
+ if (!hasOllama()) {
597
+ return toolResult("Ollama is not installed. The user should run `/bootstrap` to set up Omegon dependencies.");
598
+ }
599
+
600
+ switch (params.action) {
601
+ case "start":
602
+ return toolResult(await ollamaStart());
603
+
604
+ case "stop":
605
+ return toolResult(stopOllama());
606
+
607
+ case "status":
608
+ return toolResult(await ollamaStatus());
609
+
610
+ case "pull": {
611
+ if (!params.model) {
612
+ return toolResult("Model name required for pull. Examples: qwen3:30b, devstral-small:24b, qwen3-embedding");
613
+ }
614
+ if (!(await isOllamaReachable())) {
615
+ return toolResult("Ollama is not running. Start it first (action: 'start').");
616
+ }
617
+
618
+ const result = await pullModel(params.model, signal);
619
+ if (result.success) {
620
+ await refreshModels();
621
+ return toolResult(`Successfully pulled ${params.model}. Model is now available for use.`);
622
+ }
623
+ return toolResult(`Failed to pull ${params.model}. ${result.output}`);
624
+ }
625
+
626
+ default:
627
+ return toolResult("Unknown action. Use: start, stop, status, pull");
628
+ }
629
+ },
630
+ });
631
+
632
+ // Manual commands
633
+ pi.registerCommand("local-models", {
634
+ description: "List available local inference models",
635
+ handler: async (_args, ctx) => {
636
+ const all = await listAllModels(getBaseUrl());
637
+ if (all.length === 0) {
638
+ ctx.ui.notify("No local models available — is Ollama running?", "warning");
639
+ } else {
640
+ const names = all.map((m) => m.id).join("\n ");
641
+ ctx.ui.notify(`Local models:\n ${names}`, "info");
642
+ }
643
+ },
644
+ });
645
+
646
+ pi.registerCommand("local-status", {
647
+ description: "Check local inference server status",
648
+ handler: async (_args, ctx) => {
649
+ await refreshModels();
650
+ if (serverOnline) {
651
+ ctx.ui.notify(
652
+ `🏠 Local inference online — ${cachedModels.length} chat models available`,
653
+ "info"
654
+ );
655
+ } else {
656
+ ctx.ui.notify(`Local inference offline at ${getBaseUrl()}`, "warning");
657
+ }
658
+ },
659
+ });
660
+
661
+ pi.registerCommand("ollama", {
662
+ description: "Manage Ollama — start, stop, status, pull models",
663
+ handler: async (args, ctx) => {
664
+ const parts = args.trim().split(/\s+/);
665
+ const sub = parts[0]?.toLowerCase() || "";
666
+
667
+ if (!hasOllama()) {
668
+ ctx.ui.notify("`ollama` is not installed. Run `/bootstrap` to set up Omegon dependencies.", "warning");
669
+ return;
670
+ }
671
+
672
+ switch (sub) {
673
+ case "start": {
674
+ ctx.ui.notify("Starting Ollama...", "info");
675
+ const msg = await ollamaStart();
676
+ ctx.ui.notify(msg, "info");
677
+ return;
678
+ }
679
+
680
+ case "stop": {
681
+ ctx.ui.notify(stopOllama(), "info");
682
+ return;
683
+ }
684
+
685
+ case "pull": {
686
+ const modelName = parts[1];
687
+ if (!modelName) {
688
+ ctx.ui.notify(
689
+ "Usage: /ollama pull <model>\n\nPopular models:\n" +
690
+ "- qwen3:30b — general purpose, 256K context\n" +
691
+ "- devstral-small:24b — code-focused, 128K context\n" +
692
+ "- qwen3-embedding — embeddings\n" +
693
+ "- nemotron-3-nano:30b — NVIDIA, 1M context",
694
+ "info"
695
+ );
696
+ return;
697
+ }
698
+
699
+ if (!(await isOllamaReachable())) {
700
+ ctx.ui.notify("Ollama is not running. Start it first with /ollama start.", "warning");
701
+ return;
702
+ }
703
+
704
+ ctx.ui.notify(`Pulling ${modelName}...`, "info");
705
+ const result = await pullModel(modelName);
706
+ if (result.success) {
707
+ await refreshModels();
708
+ ctx.ui.notify(`✅ ${modelName} pulled successfully.`, "info");
709
+ } else {
710
+ ctx.ui.notify(`❌ Failed to pull ${modelName}. ${result.output}`, "warning");
711
+ }
712
+ return;
713
+ }
714
+
715
+ case "status":
716
+ case "": {
717
+ ctx.ui.notify(await ollamaStatus(), "info");
718
+ return;
719
+ }
720
+
721
+ default:
722
+ ctx.ui.notify("Usage: /ollama [start|stop|status|pull <model>]", "info");
723
+ return;
724
+ }
725
+ },
726
+ });
727
+ }