amalfa 1.0.26 β†’ 1.0.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "amalfa",
3
- "version": "1.0.26",
3
+ "version": "1.0.27",
4
4
  "description": "Local-first knowledge graph engine for AI agents. Transforms markdown into searchable memory with MCP protocol.",
5
5
  "license": "MIT",
6
6
  "homepage": "https://github.com/pjsvis/amalfa#readme",
@@ -115,10 +115,28 @@ export interface SonarConfig {
115
115
  schedule: string;
116
116
  };
117
117
  };
118
+ /** Cloud inference configuration (dev-cloud/prod-local strategy) */
119
+ cloud?: {
120
+ /** Enable cloud inference (overrides local Ollama) */
121
+ enabled: boolean;
122
+ /** Provider type: 'ollama' for self-hosted, 'openrouter' for OpenRouter.ai */
123
+ provider: "ollama" | "openrouter";
124
+ /** API endpoint (e.g., your-gpu-server:11434 or openrouter.ai/api/v1) */
125
+ host: string;
126
+ /** Model to use on cloud (can be larger than local) */
127
+ model: string;
128
+ /** API key for authenticated endpoints (required for OpenRouter) */
129
+ apiKey?: string;
130
+ };
118
131
  }
119
132
 
120
133
  export const DEFAULT_CONFIG: AmalfaConfig = {
121
- sources: ["./docs"],
134
+ sources: [
135
+ "./docs",
136
+ "./*.md", // Root documentation (README.md, _CURRENT_TASK.md, etc.)
137
+ "./src/**/*.md", // Documentation co-located with code
138
+ "./scripts/**/*.md", // Documentation in scripts
139
+ ],
122
140
  database: ".amalfa/resonance.db",
123
141
  embeddings: {
124
142
  model: "BAAI/bge-small-en-v1.5",
@@ -150,11 +168,11 @@ export const DEFAULT_CONFIG: AmalfaConfig = {
150
168
  autoDiscovery: true,
151
169
  discoveryMethod: "cli",
152
170
  inferenceMethod: "http",
153
- model: "phi3:latest",
171
+ model: "qwen2.5:1.5b",
154
172
  modelPriority: [
173
+ "qwen2.5:1.5b", // Best-in-class reasoning for size
155
174
  "tinydolphin:latest",
156
175
  "tinyllama:latest",
157
- "phi3:latest",
158
176
  "mistral:7b-instruct-v0.3-q4_K_M",
159
177
  "llama3.1:8b",
160
178
  ],
@@ -21,6 +21,7 @@ const log = getLogger("SonarAgent");
21
21
 
22
22
  // Database initialization
23
23
  import { ResonanceDB } from "@src/resonance/db";
24
+ import { VectorEngine } from "@src/core/VectorEngine";
24
25
  let DB_PATH: string;
25
26
 
26
27
  // Service lifecycle management
@@ -62,6 +63,7 @@ interface RequestOptions {
62
63
  num_predict?: number;
63
64
  stream?: boolean;
64
65
  format?: "json"; // Enable GBNF-constrained JSON output
66
+ model?: string; // Override model for this specific call (tiered strategy)
65
67
  }
66
68
 
67
69
  /**
@@ -73,36 +75,116 @@ async function callOllama(
73
75
  options: RequestOptions = {},
74
76
  ): Promise<{ message: Message }> {
75
77
  const config = await loadConfig();
76
- // @ts-ignore
78
+ // @ts-ignore - backward compatibility with phi3 config
77
79
  const hostArgs = config.sonar || config.phi3 || {};
78
- const host = hostArgs.host || "localhost:11434";
79
- // Use discovered model if available, otherwise config or default
80
- const model = ollamaModel || hostArgs.model || "phi3:latest";
81
80
 
82
- // Extract format from options to put at root level of request
83
- const { format, ...modelOptions } = options;
81
+ // Cloud toggle: dev-cloud/prod-local strategy
82
+ const cloudConfig = hostArgs.cloud;
83
+ const useCloud = cloudConfig?.enabled === true;
84
+ const provider = useCloud ? cloudConfig.provider || "ollama" : "ollama";
85
+
86
+ // Tiered model strategy: options.model > cloud.model > discovered > config > default
87
+ const { format, model: overrideModel, ...modelOptions } = options;
88
+ const model =
89
+ overrideModel ||
90
+ (useCloud ? cloudConfig.model : null) ||
91
+ ollamaModel ||
92
+ hostArgs.model ||
93
+ "qwen2.5:1.5b";
94
+
95
+ // Build headers
96
+ const headers: Record<string, string> = {
97
+ "Content-Type": "application/json",
98
+ };
99
+ // API key: prefer env var (OPENROUTER_API_KEY) over config
100
+ const apiKey = process.env.OPENROUTER_API_KEY || cloudConfig?.apiKey;
101
+ if (useCloud && apiKey) {
102
+ headers["Authorization"] = `Bearer ${apiKey}`;
103
+ log.info(
104
+ { provider, hasKey: !!apiKey, keyLength: apiKey?.length },
105
+ "Cloud request with API key",
106
+ );
107
+ } else if (useCloud) {
108
+ log.warn("Cloud enabled but no API key found in env or config!");
109
+ }
110
+ // OpenRouter requires site headers for tracking
111
+ if (provider === "openrouter") {
112
+ headers["HTTP-Referer"] = "https://github.com/pjsvis/amalfa";
113
+ headers["X-Title"] = "AMALFA Knowledge Graph";
114
+ }
115
+
116
+ // Determine endpoint and request format based on provider
117
+ let endpoint: string;
118
+ let body: string;
84
119
 
85
- const response = await fetch(`http://${host}/api/chat`, {
86
- method: "POST",
87
- headers: { "Content-Type": "application/json" },
88
- body: JSON.stringify({
120
+ if (provider === "openrouter") {
121
+ // OpenRouter uses OpenAI-compatible format at openrouter.ai/api/v1
122
+ endpoint = "https://openrouter.ai/api/v1/chat/completions";
123
+ body = JSON.stringify({
89
124
  model,
90
125
  messages,
91
126
  stream: false,
92
- format, // Pass format (e.g. "json") to enable GBNF grammar
127
+ temperature: modelOptions.temperature ?? 0.1,
128
+ max_tokens: modelOptions.num_predict ?? 500,
129
+ });
130
+ } else {
131
+ // Ollama format (local or cloud Ollama server)
132
+ const host = useCloud
133
+ ? cloudConfig.host
134
+ : hostArgs.host || "localhost:11434";
135
+ endpoint = `http://${host}/api/chat`;
136
+ body = JSON.stringify({
137
+ model,
138
+ messages,
139
+ stream: false,
140
+ format, // Pass format (e.g. "json") for GBNF grammar
93
141
  options: {
94
142
  temperature: 0.1,
95
143
  num_predict: 200,
96
144
  ...modelOptions,
97
145
  },
98
- }),
146
+ });
147
+ }
148
+
149
+ const response = await fetch(endpoint, {
150
+ method: "POST",
151
+ headers,
152
+ body,
99
153
  });
100
154
 
101
155
  if (!response.ok) {
102
- throw new Error(`Ollama API error: ${response.statusText}`);
156
+ // Try to get error details from response body
157
+ let errorBody = "";
158
+ try {
159
+ errorBody = await response.text();
160
+ } catch {}
161
+ log.error(
162
+ {
163
+ status: response.status,
164
+ statusText: response.statusText,
165
+ body: errorBody,
166
+ },
167
+ "API request failed",
168
+ );
169
+ throw new Error(`${provider} API error: ${response.statusText}`);
103
170
  }
104
171
 
105
- return (await response.json()) as { message: Message };
172
+ const result = await response.json();
173
+
174
+ // Normalize response format (OpenRouter uses OpenAI format)
175
+ if (provider === "openrouter") {
176
+ // OpenAI format: { choices: [{ message: { role, content } }] }
177
+ const openaiResult = result as { choices: { message: Message }[] };
178
+ return {
179
+ message: openaiResult.choices[0]?.message || {
180
+ role: "assistant",
181
+ content: "",
182
+ },
183
+ };
184
+ }
185
+
186
+ // Ollama format: { message: { role, content } }
187
+ return result as { message: Message };
106
188
  }
107
189
 
108
190
  /**
@@ -382,6 +464,7 @@ Return JSON array with relevance scores (0.0 to 1.0):
382
464
  async function handleChat(
383
465
  sessionId: string,
384
466
  userMessage: string,
467
+ modelOverride?: string, // Optional: Use specific model (e.g., mistral-nemo for research)
385
468
  ): Promise<{ message: Message; sessionId: string }> {
386
469
  if (!ollamaAvailable) {
387
470
  throw new Error("Sonar is not available");
@@ -412,7 +495,37 @@ User can ask you about:
412
495
  }
413
496
 
414
497
  // Add user message
415
- session.messages.push({ role: "user", content: userMessage });
498
+
499
+ // RAG: Perform vector search to augment context
500
+ const db = new ResonanceDB(DB_PATH);
501
+ const vectors = new VectorEngine(db.getRawDb());
502
+ try {
503
+ const results = await vectors.search(userMessage, 3);
504
+
505
+ let augmentContext = "";
506
+ if (results.length > 0) {
507
+ augmentContext = `\n\nRELEVANT CONTEXT FROM KNOWLEDGE BASE:\n`;
508
+ results.forEach((r: { id: string; score: number }, i: number) => {
509
+ // Read full node content if possible, or just use what we have
510
+ const node = db.getNode(r.id);
511
+ // Truncate content to avoid blowing up context window
512
+ const content = node?.content ?? "";
513
+ const snippet = content.slice(0, 1000);
514
+ augmentContext += `[Document ${i + 1}: ${r.id}] (Score: ${r.score.toFixed(2)})\n${snippet}\n\n`;
515
+ });
516
+ augmentContext += `INSTRUCTIONS: Use the above context to answer the user's question. Cite sources if possible.\n`;
517
+ }
518
+
519
+ // Append context to user message
520
+ session.messages.push({
521
+ role: "user",
522
+ content: userMessage + augmentContext,
523
+ });
524
+ } catch (e) {
525
+ // Fallback to ignoring RAG on error
526
+ log.warn({ err: e }, "RAG search failed, proceeding without context");
527
+ session.messages.push({ role: "user", content: userMessage });
528
+ }
416
529
 
417
530
  // Maintain context window (keep system msg + last 10 messages)
418
531
  const contextMessages = [
@@ -422,9 +535,11 @@ User can ask you about:
422
535
 
423
536
  try {
424
537
  // NOTE: No format: "json" for chat! We want natural language.
538
+ // Use modelOverride if provided (e.g., mistral-nemo for research)
425
539
  const response = await callOllama(contextMessages, {
426
540
  temperature: 0.7,
427
541
  num_predict: 500,
542
+ model: modelOverride,
428
543
  });
429
544
 
430
545
  // Add assistant response to history
@@ -861,6 +976,24 @@ async function executeTask(task: any): Promise<string> {
861
976
  output += `- Failed: ${result.failed}\n\n`;
862
977
 
863
978
  output += `Check daemon logs for detailed errors per document.\n`;
979
+ } else if (task.type === "research") {
980
+ output += `## Objective\nResearch Query: "${task.query}"\n\n`;
981
+
982
+ try {
983
+ const sessionId = `task-${Date.now()}`;
984
+ // For research: use task.model if specified, otherwise let the cloud/local config decide
985
+ // Don't hardcode mistral-nemo since it's not valid on OpenRouter
986
+ const researchModel = task.model || undefined;
987
+ const response = await handleChat(sessionId, task.query, researchModel);
988
+
989
+ output += `## Analysis\n${response.message.content}\n\n`;
990
+ output += `(Model: ${researchModel || "default"})\n`;
991
+
992
+ // Note: chat doesn't return structured sources yet
993
+ output += `(Source citation not available in simple research task)\n`;
994
+ } catch (e) {
995
+ output += `## Error\nResearch failed: ${e instanceof Error ? e.message : String(e)}\n`;
996
+ }
864
997
  } else {
865
998
  output += `Error: Unknown task type '${task.type}'\n`;
866
999
  }
@@ -4,202 +4,203 @@ import { join } from "path";
4
4
  import { AMALFA_DIRS, initAmalfaDirs } from "@src/config/defaults";
5
5
 
6
6
  export interface ServiceConfig {
7
- name: string; // e.g. "Daemon"
8
- pidFile: string; // e.g. ".daemon.pid"
9
- logFile: string; // e.g. ".daemon.log"
10
- entryPoint: string; // e.g. "src/resonance/daemon.ts"
7
+ name: string; // e.g. "Daemon"
8
+ pidFile: string; // e.g. ".daemon.pid"
9
+ logFile: string; // e.g. ".daemon.log"
10
+ entryPoint: string; // e.g. "src/resonance/daemon.ts"
11
11
  }
12
12
 
13
13
  export class ServiceLifecycle {
14
- constructor(private config: ServiceConfig) {}
15
-
16
- private async isRunning(pid: number): Promise<boolean> {
17
- try {
18
- process.kill(pid, 0);
19
- return true;
20
- } catch (_e) {
21
- return false;
22
- }
23
- }
24
-
25
- /**
26
- * Start the service in the background (detached).
27
- */
28
- async start() {
29
- // Ensure .amalfa directories exist
30
- initAmalfaDirs();
31
-
32
- // Check if already running based on PID file
33
- if (await Bun.file(this.config.pidFile).exists()) {
34
- const pid = parseInt(await Bun.file(this.config.pidFile).text(), 10);
35
- if (await this.isRunning(pid)) {
36
- console.log(`⚠️ ${this.config.name} is already running (PID: ${pid})`);
37
- return;
38
- }
39
- console.log(
40
- `⚠️ Found stale PID file for ${this.config.name}. Clearing...`,
41
- );
42
- await unlink(this.config.pidFile);
43
- }
44
-
45
- const logFile = Bun.file(this.config.logFile);
46
- await Bun.write(logFile, ""); // Truncate logs
47
-
48
- // Spawn subprocess
49
- const subprocess = Bun.spawn(
50
- ["bun", "run", this.config.entryPoint, "serve"],
51
- {
52
- cwd: process.cwd(),
53
- detached: true,
54
- stdout: logFile,
55
- stderr: logFile,
56
- },
57
- );
58
-
59
- await Bun.write(this.config.pidFile, subprocess.pid.toString());
60
- subprocess.unref();
61
-
62
- console.log(
63
- `βœ… ${this.config.name} started in background (PID: ${subprocess.pid})`,
64
- );
65
- console.log(`πŸ“ Logs: ${this.config.logFile}`);
66
- }
67
-
68
- /**
69
- * Stop the service using the PID file.
70
- */
71
- async stop() {
72
- if (!(await Bun.file(this.config.pidFile).exists())) {
73
- console.log(`ℹ️ ${this.config.name} is not running.`);
74
- return;
75
- }
76
-
77
- const pid = parseInt(await Bun.file(this.config.pidFile).text(), 10);
78
-
79
- if (await this.isRunning(pid)) {
80
- console.log(`πŸ›‘ Stopping ${this.config.name} (PID: ${pid})...`);
81
- process.kill(pid, "SIGTERM");
82
-
83
- let attempts = 0;
84
- // Wait up to 1 second
85
- while ((await this.isRunning(pid)) && attempts < 10) {
86
- await new Promise((r) => setTimeout(r, 100));
87
- attempts++;
88
- }
89
-
90
- if (await this.isRunning(pid)) {
91
- console.log("⚠️ Process did not exit gracefully. Force killing...");
92
- process.kill(pid, "SIGKILL");
93
- }
94
- console.log(`βœ… ${this.config.name} stopped.`);
95
- } else {
96
- console.log("⚠️ Stale PID file found. Cleaning up.");
97
- }
98
-
99
- try {
100
- await unlink(this.config.pidFile);
101
- } catch (e: unknown) {
102
- const err = e as { code?: string; message: string };
103
- if (err.code !== "ENOENT") {
104
- console.warn(`⚠️ Failed to remove PID file: ${err.message}`);
105
- }
106
- }
107
- }
108
-
109
- /**
110
- * Check status of the service.
111
- */
112
- async status() {
113
- if (await Bun.file(this.config.pidFile).exists()) {
114
- const pid = parseInt(await Bun.file(this.config.pidFile).text(), 10);
115
- if (await this.isRunning(pid)) {
116
- console.log(`🟒 ${this.config.name} is RUNNING (PID: ${pid})`);
117
- return;
118
- }
119
- console.log(`πŸ”΄ ${this.config.name} is NOT RUNNING (Stale PID: ${pid})`);
120
- } else {
121
- console.log(`βšͺ️ ${this.config.name} is STOPPED`);
122
- }
123
- }
124
-
125
- /**
126
- * Wrapper for the foreground 'serve' command logic.
127
- * Use this to wrap your actual server startup code.
128
- */
129
- async serve(serverLogic: () => Promise<void>) {
130
- // Ensure .amalfa directories exist
131
- initAmalfaDirs();
132
-
133
- // Write PID file for this serving process
134
- await Bun.write(this.config.pidFile, process.pid.toString());
135
-
136
- // Register cleanup handlers to remove PID file on exit/crash/kill
137
- let cleanupCalled = false;
138
- const cleanup = async (signal?: string) => {
139
- if (cleanupCalled) return; // Prevent double cleanup
140
- cleanupCalled = true;
141
-
142
- try {
143
- if (await Bun.file(this.config.pidFile).exists()) {
144
- await unlink(this.config.pidFile);
145
- if (signal) {
146
- console.error(
147
- `\n🧹 ${this.config.name}: PID file cleaned up on ${signal}`,
148
- );
149
- }
150
- }
151
- } catch (_e) {
152
- // Ignore cleanup errors (file might already be deleted)
153
- }
154
- };
155
-
156
- // Register signal handlers
157
- process.on("SIGINT", () => cleanup("SIGINT").then(() => process.exit(0)));
158
- process.on("SIGTERM", () => cleanup("SIGTERM").then(() => process.exit(0)));
159
- process.on("exit", () => {
160
- // Note: exit event is synchronous, so we do sync cleanup
161
- if (!cleanupCalled && existsSync(this.config.pidFile)) {
162
- cleanupCalled = true;
163
- try {
164
- Bun.write(this.config.pidFile, ""); // Truncate to mark as stale
165
- } catch {}
166
- }
167
- });
168
-
169
- await serverLogic();
170
- }
171
-
172
- /**
173
- * Main CLI dispatch logic.
174
- */
175
- async run(command: string, serverLogic: () => Promise<void>) {
176
- switch (command) {
177
- case "start":
178
- await this.start();
179
- process.exit(0);
180
- break;
181
- case "stop":
182
- await this.stop();
183
- process.exit(0);
184
- break;
185
- case "status":
186
- await this.status();
187
- process.exit(0);
188
- break;
189
- case "restart":
190
- await this.stop();
191
- await new Promise((r) => setTimeout(r, 500));
192
- await this.start();
193
- process.exit(0);
194
- break;
195
- case "serve":
196
- await this.serve(serverLogic);
197
- break;
198
- default:
199
- console.log(
200
- `Unknown command '${command}'. Use: start, stop, status, restart, or serve`,
201
- );
202
- process.exit(1);
203
- }
204
- }
14
+ constructor(private config: ServiceConfig) {}
15
+
16
+ private async isRunning(pid: number): Promise<boolean> {
17
+ try {
18
+ process.kill(pid, 0);
19
+ return true;
20
+ } catch (_e) {
21
+ return false;
22
+ }
23
+ }
24
+
25
+ /**
26
+ * Start the service in the background (detached).
27
+ */
28
+ async start() {
29
+ // Ensure .amalfa directories exist
30
+ initAmalfaDirs();
31
+
32
+ // Check if already running based on PID file
33
+ if (await Bun.file(this.config.pidFile).exists()) {
34
+ const pid = parseInt(await Bun.file(this.config.pidFile).text(), 10);
35
+ if (await this.isRunning(pid)) {
36
+ console.log(`⚠️ ${this.config.name} is already running (PID: ${pid})`);
37
+ return;
38
+ }
39
+ console.log(
40
+ `⚠️ Found stale PID file for ${this.config.name}. Clearing...`,
41
+ );
42
+ await unlink(this.config.pidFile);
43
+ }
44
+
45
+ const logFile = Bun.file(this.config.logFile);
46
+ await Bun.write(logFile, ""); // Truncate logs
47
+
48
+ // Spawn subprocess with explicit env inheritance for .env vars
49
+ const subprocess = Bun.spawn(
50
+ ["bun", "run", this.config.entryPoint, "serve"],
51
+ {
52
+ cwd: process.cwd(),
53
+ detached: true,
54
+ stdout: logFile,
55
+ stderr: logFile,
56
+ env: process.env, // Explicitly inherit env (including .env loaded by parent)
57
+ },
58
+ );
59
+
60
+ await Bun.write(this.config.pidFile, subprocess.pid.toString());
61
+ subprocess.unref();
62
+
63
+ console.log(
64
+ `βœ… ${this.config.name} started in background (PID: ${subprocess.pid})`,
65
+ );
66
+ console.log(`πŸ“ Logs: ${this.config.logFile}`);
67
+ }
68
+
69
+ /**
70
+ * Stop the service using the PID file.
71
+ */
72
+ async stop() {
73
+ if (!(await Bun.file(this.config.pidFile).exists())) {
74
+ console.log(`ℹ️ ${this.config.name} is not running.`);
75
+ return;
76
+ }
77
+
78
+ const pid = parseInt(await Bun.file(this.config.pidFile).text(), 10);
79
+
80
+ if (await this.isRunning(pid)) {
81
+ console.log(`πŸ›‘ Stopping ${this.config.name} (PID: ${pid})...`);
82
+ process.kill(pid, "SIGTERM");
83
+
84
+ let attempts = 0;
85
+ // Wait up to 1 second
86
+ while ((await this.isRunning(pid)) && attempts < 10) {
87
+ await new Promise((r) => setTimeout(r, 100));
88
+ attempts++;
89
+ }
90
+
91
+ if (await this.isRunning(pid)) {
92
+ console.log("⚠️ Process did not exit gracefully. Force killing...");
93
+ process.kill(pid, "SIGKILL");
94
+ }
95
+ console.log(`βœ… ${this.config.name} stopped.`);
96
+ } else {
97
+ console.log("⚠️ Stale PID file found. Cleaning up.");
98
+ }
99
+
100
+ try {
101
+ await unlink(this.config.pidFile);
102
+ } catch (e: unknown) {
103
+ const err = e as { code?: string; message: string };
104
+ if (err.code !== "ENOENT") {
105
+ console.warn(`⚠️ Failed to remove PID file: ${err.message}`);
106
+ }
107
+ }
108
+ }
109
+
110
+ /**
111
+ * Check status of the service.
112
+ */
113
+ async status() {
114
+ if (await Bun.file(this.config.pidFile).exists()) {
115
+ const pid = parseInt(await Bun.file(this.config.pidFile).text(), 10);
116
+ if (await this.isRunning(pid)) {
117
+ console.log(`🟒 ${this.config.name} is RUNNING (PID: ${pid})`);
118
+ return;
119
+ }
120
+ console.log(`πŸ”΄ ${this.config.name} is NOT RUNNING (Stale PID: ${pid})`);
121
+ } else {
122
+ console.log(`βšͺ️ ${this.config.name} is STOPPED`);
123
+ }
124
+ }
125
+
126
+ /**
127
+ * Wrapper for the foreground 'serve' command logic.
128
+ * Use this to wrap your actual server startup code.
129
+ */
130
+ async serve(serverLogic: () => Promise<void>) {
131
+ // Ensure .amalfa directories exist
132
+ initAmalfaDirs();
133
+
134
+ // Write PID file for this serving process
135
+ await Bun.write(this.config.pidFile, process.pid.toString());
136
+
137
+ // Register cleanup handlers to remove PID file on exit/crash/kill
138
+ let cleanupCalled = false;
139
+ const cleanup = async (signal?: string) => {
140
+ if (cleanupCalled) return; // Prevent double cleanup
141
+ cleanupCalled = true;
142
+
143
+ try {
144
+ if (await Bun.file(this.config.pidFile).exists()) {
145
+ await unlink(this.config.pidFile);
146
+ if (signal) {
147
+ console.error(
148
+ `\n🧹 ${this.config.name}: PID file cleaned up on ${signal}`,
149
+ );
150
+ }
151
+ }
152
+ } catch (_e) {
153
+ // Ignore cleanup errors (file might already be deleted)
154
+ }
155
+ };
156
+
157
+ // Register signal handlers
158
+ process.on("SIGINT", () => cleanup("SIGINT").then(() => process.exit(0)));
159
+ process.on("SIGTERM", () => cleanup("SIGTERM").then(() => process.exit(0)));
160
+ process.on("exit", () => {
161
+ // Note: exit event is synchronous, so we do sync cleanup
162
+ if (!cleanupCalled && existsSync(this.config.pidFile)) {
163
+ cleanupCalled = true;
164
+ try {
165
+ Bun.write(this.config.pidFile, ""); // Truncate to mark as stale
166
+ } catch {}
167
+ }
168
+ });
169
+
170
+ await serverLogic();
171
+ }
172
+
173
+ /**
174
+ * Main CLI dispatch logic.
175
+ */
176
+ async run(command: string, serverLogic: () => Promise<void>) {
177
+ switch (command) {
178
+ case "start":
179
+ await this.start();
180
+ process.exit(0);
181
+ break;
182
+ case "stop":
183
+ await this.stop();
184
+ process.exit(0);
185
+ break;
186
+ case "status":
187
+ await this.status();
188
+ process.exit(0);
189
+ break;
190
+ case "restart":
191
+ await this.stop();
192
+ await new Promise((r) => setTimeout(r, 500));
193
+ await this.start();
194
+ process.exit(0);
195
+ break;
196
+ case "serve":
197
+ await this.serve(serverLogic);
198
+ break;
199
+ default:
200
+ console.log(
201
+ `Unknown command '${command}'. Use: start, stop, status, restart, or serve`,
202
+ );
203
+ process.exit(1);
204
+ }
205
+ }
205
206
  }
@@ -102,11 +102,13 @@ export async function discoverOllamaCapabilities(): Promise<OllamaCapabilities>
102
102
  `πŸ“¦ Found ${models.length} model(s): ${models.map((m) => m.name).join(", ")}`,
103
103
  );
104
104
 
105
+ // Model priority order for search tasks (from brief)
105
106
  // Model priority order for search tasks (from brief)
106
107
  const modelPriority = [
108
+ "qwen2.5:1.5b", // Best-in-class reasoning for size
109
+ "phi3:mini", // 3.8B but optimized
107
110
  "tinydolphin:latest",
108
111
  "tinyllama:latest",
109
- "phi3:latest",
110
112
  "mistral:7b-instruct-v0.3-q4_K_M",
111
113
  "llama3.1:8b",
112
114
  ];