alvin-bot 4.5.1 → 4.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/CHANGELOG.md +278 -0
  2. package/README.md +25 -2
  3. package/bin/cli.js +325 -26
  4. package/dist/handlers/commands.js +505 -63
  5. package/dist/handlers/message.js +209 -14
  6. package/dist/i18n.js +470 -13
  7. package/dist/index.js +45 -5
  8. package/dist/providers/claude-sdk-provider.js +106 -14
  9. package/dist/providers/ollama-provider.js +32 -0
  10. package/dist/providers/openai-compatible.js +10 -1
  11. package/dist/providers/registry.js +112 -17
  12. package/dist/providers/types.js +25 -3
  13. package/dist/services/compaction.js +2 -0
  14. package/dist/services/cron.js +53 -42
  15. package/dist/services/heartbeat.js +41 -7
  16. package/dist/services/language-detect.js +12 -2
  17. package/dist/services/ollama-manager.js +339 -0
  18. package/dist/services/personality.js +20 -14
  19. package/dist/services/session.js +21 -3
  20. package/dist/services/subagent-delivery.js +266 -0
  21. package/dist/services/subagent-stats.js +123 -0
  22. package/dist/services/subagents.js +509 -42
  23. package/dist/services/telegram.js +28 -1
  24. package/dist/services/updater.js +158 -0
  25. package/dist/services/usage-tracker.js +11 -4
  26. package/dist/services/users.js +2 -1
  27. package/docs/HANDBOOK.md +856 -0
  28. package/package.json +7 -2
  29. package/test/claude-sdk-provider.test.ts +69 -0
  30. package/test/i18n.test.ts +108 -0
  31. package/test/registry.test.ts +201 -0
  32. package/test/subagent-delivery.test.ts +273 -0
  33. package/test/subagent-stats.test.ts +119 -0
  34. package/test/subagents-commands.test.ts +64 -0
  35. package/test/subagents-config.test.ts +114 -0
  36. package/test/subagents-depth.test.ts +58 -0
  37. package/test/subagents-inheritance.test.ts +67 -0
  38. package/test/subagents-name-resolver.test.ts +122 -0
  39. package/test/subagents-priority-reject.test.ts +88 -0
  40. package/test/subagents-queue.test.ts +127 -0
  41. package/test/subagents-shutdown.test.ts +126 -0
  42. package/test/subagents-toolset.test.ts +51 -0
  43. package/vitest.config.ts +17 -0
@@ -12,7 +12,6 @@
12
12
  import fs from "fs";
13
13
  import { execSync } from "child_process";
14
14
  import { dirname } from "path";
15
- import { getRegistry } from "../engine.js";
16
15
  import { CRON_FILE, BOT_ROOT } from "../paths.js";
17
16
  // ── Storage ─────────────────────────────────────────────
18
17
  function loadJobs() {
@@ -151,52 +150,61 @@ async function executeJob(job) {
151
150
  return { output };
152
151
  }
153
152
  case "ai-query": {
154
- // AI queries run through the actual AI engine (Claude SDK)
153
+ // AI queries run as isolated sub-agents rather than directly against
154
+ // the registry. This gives cron jobs timeout/cancel/state-tracking
155
+ // "for free" via the existing subagents infrastructure, and — most
156
+ // importantly — keeps them completely independent of any user's
157
+ // active main session. A cron job can run in the background while
158
+ // the user chats with Alvin in the foreground; neither interferes
159
+ // with the other.
155
160
  const prompt = job.payload.prompt || "";
161
+ // Dynamic import to avoid circular dep chain (cron → engine → registry
162
+ // and subagents → engine). Type-only import at file top is erased,
163
+ // so no runtime cycle is created.
164
+ const { spawnSubAgent } = await import("./subagents.js");
156
165
  try {
157
- const registry = getRegistry();
158
- const queryOpts = {
159
- prompt,
160
- systemPrompt: `You are Alvin Bot, an autonomous AI assistant. You are currently executing a scheduled cron job ("${job.name}"). Reply concisely. Use Telegram-compatible Markdown. You have access to tools (Bash, files, etc.) — use them if needed.`,
161
- effort: "high",
162
- workingDir: BOT_ROOT,
163
- };
164
- let fullResponse = "";
165
- for await (const chunk of registry.queryWithFallback(queryOpts)) {
166
- if (chunk.type === "text" && chunk.text) {
167
- fullResponse = chunk.text;
168
- }
169
- if (chunk.type === "error") {
170
- throw new Error(chunk.error || "AI query failed");
171
- }
172
- if (chunk.type === "done") {
173
- break;
174
- }
175
- }
176
- // Send AI response to target
177
- if (notifyCallback && fullResponse.trim()) {
178
- // Split long responses into chunks (Telegram limit ~4096 chars)
179
- const maxLen = 3900;
180
- if (fullResponse.length <= maxLen) {
181
- await notifyCallback(job.target, fullResponse);
182
- }
183
- else {
184
- const parts = [];
185
- for (let i = 0; i < fullResponse.length; i += maxLen) {
186
- parts.push(fullResponse.slice(i, i + maxLen));
187
- }
188
- for (const part of parts) {
189
- await notifyCallback(job.target, part);
190
- }
191
- }
166
+ // Turn the fire-and-forget spawnSubAgent into an awaitable via
167
+ // the onComplete callback. Rejection of the spawn promise itself
168
+ // means the max-parallel limit was hit.
169
+ // Parse the target chat id for I3 delivery routing. Only telegram
170
+ // targets get a numeric parentChatId — other platforms/web get
171
+ // undefined and fall through the delivery router's warning path.
172
+ const parentChatId = job.target.platform === "telegram" && job.target.chatId
173
+ ? Number(job.target.chatId)
174
+ : undefined;
175
+ const result = await new Promise((resolve, reject) => {
176
+ spawnSubAgent({
177
+ name: job.name,
178
+ prompt,
179
+ workingDir: BOT_ROOT,
180
+ source: "cron",
181
+ parentChatId,
182
+ onComplete: (r) => resolve(r),
183
+ }).catch(reject);
184
+ });
185
+ // Non-success: don't notify here. The I3 delivery router has
186
+ // already posted the appropriate banner (cancelled / timeout /
187
+ // error) to parentChatId, so a legacy notifyCallback would
188
+ // produce a duplicate message.
189
+ if (result.status !== "completed") {
190
+ return {
191
+ output: "",
192
+ error: `Sub-agent ${result.status}: ${result.error || result.status}`,
193
+ };
192
194
  }
195
+ const fullResponse = result.output;
196
+ // NOTE: No notifyCallback for ai-query jobs. The I3 delivery
197
+ // router (src/services/subagent-delivery.ts) fires from
198
+ // spawnSubAgent().finally() and sends a proper banner+final to
199
+ // parentChatId. Legacy notifyCallback stays in use for the
200
+ // other job types (reminder, shell, http, message) which do
201
+ // not route through spawnSubAgent.
193
202
  return { output: fullResponse.slice(0, 500) };
194
203
  }
195
204
  catch (err) {
196
- const error = err instanceof Error ? err.message : String(err);
197
- if (notifyCallback) {
198
- await notifyCallback(job.target, `❌ AI-Query Error (${job.name}): ${error}`);
199
- }
205
+ // Re-throw without notifying the outer catch will record
206
+ // lastError on the job, and the I3 delivery router has already
207
+ // posted a banner if the failure came from inside spawnSubAgent.
200
208
  throw err;
201
209
  }
202
210
  }
@@ -206,7 +214,10 @@ async function executeJob(job) {
206
214
  }
207
215
  catch (err) {
208
216
  const error = err instanceof Error ? err.message : String(err);
209
- if (notifyCallback) {
217
+ // Skip notification for ai-query jobs — the I3 delivery router has
218
+ // already posted the banner. Other job types still get the legacy
219
+ // notify path because they don't route through spawnSubAgent.
220
+ if (notifyCallback && job.type !== "ai-query") {
210
221
  await notifyCallback(job.target, `❌ Cron Error (${job.name}): ${error}`);
211
222
  }
212
223
  return { output: "", error };
@@ -101,6 +101,18 @@ async function runHeartbeat() {
101
101
  const provider = registry.get(key);
102
102
  if (!provider)
103
103
  continue;
104
+ // Providers with an on-demand lifecycle (local runners: Ollama, LM
105
+ // Studio, llama.cpp, …) are not pinged periodically — they're off
106
+ // until we actively boot them during failover. Mark as always-healthy
107
+ // so they remain a valid failover target.
108
+ if (provider.lifecycle) {
109
+ health.healthy = true;
110
+ health.lastCheck = Date.now();
111
+ health.lastLatencyMs = 0;
112
+ health.failCount = 0;
113
+ health.lastError = undefined;
114
+ continue;
115
+ }
104
116
  const start = Date.now();
105
117
  try {
106
118
  // Quick availability check first
@@ -142,7 +154,7 @@ async function runHeartbeat() {
142
154
  }
143
155
  }
144
156
  // Auto-failover logic
145
- handleFailover(registry);
157
+ await handleFailover(registry);
146
158
  }
147
159
  async function pingProvider(provider, key) {
148
160
  // For CLI-based providers, just check availability (no full query needed)
@@ -166,7 +178,7 @@ async function pingProvider(provider, key) {
166
178
  }
167
179
  return text || "ok";
168
180
  }
169
- function handleFailover(registry) {
181
+ async function handleFailover(registry) {
170
182
  const primaryHealth = state.providers.get(state.originalPrimary);
171
183
  const currentKey = registry.getActiveKey();
172
184
  // Case 1: Primary is down → switch to first healthy fallback
@@ -174,19 +186,41 @@ function handleFailover(registry) {
174
186
  const fallbackOrder = config.fallbackProviders;
175
187
  for (const fbKey of fallbackOrder) {
176
188
  const fbHealth = state.providers.get(fbKey);
177
- if (fbHealth?.healthy) {
189
+ if (!fbHealth?.healthy)
190
+ continue;
191
+ const fbProvider = registry.get(fbKey);
192
+ if (!fbProvider)
193
+ continue;
194
+ // Providers with a lifecycle (local runners) must be booted before
195
+ // the switch. If boot fails, skip and try the next fallback.
196
+ if (fbProvider.lifecycle) {
197
+ console.log(`💓 🔄 Auto-failover: ${state.originalPrimary} → ${fbKey} — booting ${fbKey}…`);
198
+ const ok = await fbProvider.lifecycle.ensureRunning();
199
+ if (!ok) {
200
+ console.log(`💓 ⚠️ ${fbKey} boot failed — skipping`);
201
+ continue;
202
+ }
203
+ }
204
+ else {
178
205
  console.log(`💓 🔄 Auto-failover: ${state.originalPrimary} → ${fbKey}`);
179
- registry.switchTo(fbKey);
180
- state.wasFailedOver = true;
181
- return;
182
206
  }
207
+ registry.switchTo(fbKey);
208
+ state.wasFailedOver = true;
209
+ return;
183
210
  }
184
211
  console.log("💓 ⚠️ All providers unhealthy — staying on primary");
212
+ return;
185
213
  }
186
- // Case 2: Primary recovered → switch back
214
+ // Case 2: Primary recovered → switch back, tearing down any lifecycle-
215
+ // managed fallback we booted during the outage.
187
216
  if (primaryHealth?.healthy && state.wasFailedOver && currentKey !== state.originalPrimary) {
217
+ const currentProvider = registry.get(currentKey);
188
218
  console.log(`💓 ✅ Primary recovered — switching back to ${state.originalPrimary}`);
189
219
  registry.switchTo(state.originalPrimary);
190
220
  state.wasFailedOver = false;
221
+ if (currentProvider?.lifecycle) {
222
+ console.log(`💓 🧹 Tearing down ${currentKey} daemon + unloading model`);
223
+ await currentProvider.lifecycle.ensureStopped();
224
+ }
191
225
  }
192
226
  }
@@ -83,12 +83,18 @@ export function detectLanguage(text) {
83
83
  /**
84
84
  * Update language statistics for a user and auto-adapt if pattern is clear.
85
85
  * Returns the recommended language for this session.
86
+ *
87
+ * Note: auto-detection is intentionally limited to de/en (the two languages
88
+ * our heuristic covers). For es/fr users, the /language command is the only
89
+ * way to set their UI locale — their explicit choice is persisted via
90
+ * profile.langExplicit and trackAndAdapt returns it untouched.
86
91
  */
87
92
  export function trackAndAdapt(userId, text, currentSessionLang) {
88
93
  const profile = loadProfile(userId);
89
94
  if (!profile)
90
95
  return currentSessionLang;
91
- // If user explicitly set language, don't auto-switch
96
+ // If user explicitly set language (via /language), honour it and never
97
+ // auto-switch. This is the only way es/fr get persisted.
92
98
  if (profile.langExplicit)
93
99
  return profile.language;
94
100
  const detected = detectLanguage(text);
@@ -115,7 +121,10 @@ export function trackAndAdapt(userId, text, currentSessionLang) {
115
121
  }
116
122
  }
117
123
  else {
118
- // Early phase: follow immediate language for responsiveness
124
+ // Early phase: follow immediate language for responsiveness.
125
+ // Only overrides es/fr if the user wrote in de/en without having set
126
+ // langExplicit — which can only happen if they changed language via
127
+ // something other than /language (shouldn't happen in practice).
119
128
  profile.language = detected;
120
129
  }
121
130
  saveProfile(profile);
@@ -123,6 +132,7 @@ export function trackAndAdapt(userId, text, currentSessionLang) {
123
132
  }
124
133
  /**
125
134
  * Mark language as explicitly set by user (disables auto-detection).
135
+ * Accepts all supported locales including es/fr.
126
136
  */
127
137
  export function setExplicitLanguage(userId, lang) {
128
138
  const profile = loadProfile(userId);
@@ -0,0 +1,339 @@
1
+ /**
2
+ * Ollama Manager — on-demand daemon lifecycle for fallback use.
3
+ *
4
+ * The bot uses Ollama as a local fallback when the primary provider is down.
5
+ * Historically the user had to run `ollama serve` themselves — if they forgot,
6
+ * the fallback silently failed. This service spawns the daemon on demand,
7
+ * preloads the target model into VRAM, and tears it all down once the primary
8
+ * provider is healthy again.
9
+ *
10
+ * Key invariants:
11
+ * • Only kills instances the bot started itself (tracked via PID file).
12
+ * An externally-managed ollama is left alone.
13
+ * • Preload uses Ollama's native /api/generate endpoint with an empty
14
+ * prompt and keep_alive=30m, so the first real query is not cold.
15
+ * • Unload sets keep_alive=0 to flush the model from VRAM immediately.
16
+ * • All spawns are detached with stdio=ignore, so the child survives the
17
+ * bot crashing but still gets cleaned up on graceful shutdown.
18
+ */
19
+ import { spawn, execFile } from "child_process";
20
+ import { promisify } from "util";
21
+ import fs from "fs";
22
+ import { resolve, dirname } from "path";
23
+ import os from "os";
24
+ const execFileAsync = promisify(execFile);
25
+ const DATA_DIR = process.env.ALVIN_DATA_DIR || resolve(os.homedir(), ".alvin-bot");
26
+ const PID_FILE = resolve(DATA_DIR, "ollama.pid");
27
+ const MODEL_FILE = resolve(DATA_DIR, "ollama.model");
28
+ const OLLAMA_API_BASE = "http://localhost:11434";
29
+ const DAEMON_READY_TIMEOUT_MS = 15_000;
30
+ const PRELOAD_TIMEOUT_MS = 60_000;
31
+ const KEEP_ALIVE = "30m";
32
+ let managedProcess = null;
33
+ let managedModel = null;
34
+ // ── PID / Process verification ─────────────────────────────────────────────
35
+ /**
36
+ * Verify that `pid` is actually an ollama process by inspecting its command
37
+ * via `ps`. This prevents the classic PID-reuse bug where we'd kill a
38
+ * random process after a bot crash left a stale pid file pointing at
39
+ * something the OS has since re-assigned.
40
+ */
41
+ async function verifyPidIsOllama(pid) {
42
+ try {
43
+ const { stdout } = await execFileAsync("ps", ["-p", String(pid), "-o", "command="], {
44
+ timeout: 3_000,
45
+ });
46
+ return stdout.toLowerCase().includes("ollama");
47
+ }
48
+ catch {
49
+ // ps exits non-zero if pid doesn't exist — treat as "not ollama"
50
+ return false;
51
+ }
52
+ }
53
+ function loadManagedModelFromDisk() {
54
+ try {
55
+ if (fs.existsSync(MODEL_FILE)) {
56
+ return fs.readFileSync(MODEL_FILE, "utf-8").trim() || null;
57
+ }
58
+ }
59
+ catch { /* ignore */ }
60
+ return null;
61
+ }
62
+ function persistManagedModel(model) {
63
+ try {
64
+ fs.mkdirSync(dirname(MODEL_FILE), { recursive: true });
65
+ if (model) {
66
+ fs.writeFileSync(MODEL_FILE, model, "utf-8");
67
+ }
68
+ else if (fs.existsSync(MODEL_FILE)) {
69
+ fs.unlinkSync(MODEL_FILE);
70
+ }
71
+ }
72
+ catch (err) {
73
+ console.warn(`[ollama] failed to persist model file: ${err}`);
74
+ }
75
+ }
76
+ /**
77
+ * Reconcile stale state left behind from a previous bot run.
78
+ * If the PID file points at a process that is no longer ollama (crashed,
79
+ * PID reused, never existed), remove the file so we don't try to kill
80
+ * the wrong process later. Called lazily from ensureRunning / ensureStopped.
81
+ */
82
+ async function reconcileStalePidFile() {
83
+ if (!fs.existsSync(PID_FILE))
84
+ return;
85
+ try {
86
+ const raw = fs.readFileSync(PID_FILE, "utf-8").trim();
87
+ const pid = parseInt(raw, 10);
88
+ if (isNaN(pid) || pid <= 0) {
89
+ fs.unlinkSync(PID_FILE);
90
+ return;
91
+ }
92
+ const isOllama = await verifyPidIsOllama(pid);
93
+ if (!isOllama) {
94
+ console.log(`[ollama] stale pid file (pid=${pid} is no longer ollama) — removing`);
95
+ fs.unlinkSync(PID_FILE);
96
+ persistManagedModel(null);
97
+ }
98
+ }
99
+ catch {
100
+ // If we can't read/parse it, drop it
101
+ try {
102
+ fs.unlinkSync(PID_FILE);
103
+ }
104
+ catch { /* ignore */ }
105
+ }
106
+ }
107
+ export async function isDaemonRunning() {
108
+ try {
109
+ const res = await fetch(`${OLLAMA_API_BASE}/api/tags`, {
110
+ signal: AbortSignal.timeout(2_000),
111
+ });
112
+ return res.ok;
113
+ }
114
+ catch {
115
+ return false;
116
+ }
117
+ }
118
+ async function findOllamaBinary() {
119
+ // Common install paths — macOS Homebrew, Linux, /usr/local
120
+ const candidates = [
121
+ "/opt/homebrew/bin/ollama",
122
+ "/usr/local/bin/ollama",
123
+ "/usr/bin/ollama",
124
+ ];
125
+ for (const p of candidates) {
126
+ if (fs.existsSync(p))
127
+ return p;
128
+ }
129
+ // Fallback: `which ollama` (async, no event-loop block)
130
+ try {
131
+ const { stdout } = await execFileAsync("which", ["ollama"], { timeout: 3_000 });
132
+ return stdout.trim() || null;
133
+ }
134
+ catch {
135
+ return null;
136
+ }
137
+ }
138
+ async function waitForDaemon(timeoutMs = DAEMON_READY_TIMEOUT_MS) {
139
+ const start = Date.now();
140
+ while (Date.now() - start < timeoutMs) {
141
+ if (await isDaemonRunning())
142
+ return true;
143
+ await new Promise(r => setTimeout(r, 500));
144
+ }
145
+ return false;
146
+ }
147
+ async function preloadModel(model) {
148
+ try {
149
+ await fetch(`${OLLAMA_API_BASE}/api/generate`, {
150
+ method: "POST",
151
+ headers: { "Content-Type": "application/json" },
152
+ body: JSON.stringify({
153
+ model,
154
+ prompt: "",
155
+ keep_alive: KEEP_ALIVE,
156
+ }),
157
+ signal: AbortSignal.timeout(PRELOAD_TIMEOUT_MS),
158
+ });
159
+ }
160
+ catch (err) {
161
+ const msg = err instanceof Error ? err.message : String(err);
162
+ console.warn(`[ollama] preload warning (model=${model}): ${msg}`);
163
+ }
164
+ }
165
+ async function unloadModel(model) {
166
+ try {
167
+ await fetch(`${OLLAMA_API_BASE}/api/generate`, {
168
+ method: "POST",
169
+ headers: { "Content-Type": "application/json" },
170
+ body: JSON.stringify({
171
+ model,
172
+ keep_alive: 0, // immediate VRAM unload
173
+ }),
174
+ signal: AbortSignal.timeout(5_000),
175
+ });
176
+ }
177
+ catch {
178
+ // ignore — daemon may already be stopping
179
+ }
180
+ }
181
+ /**
182
+ * Ensure the Ollama daemon is running and the specified model is loaded.
183
+ * Idempotent. If an externally-managed daemon is already running, we use
184
+ * it and just preload the model, but leave it for ensureStopped() to decide
185
+ * whether to kill it (it won't — only bot-spawned daemons get killed).
186
+ */
187
+ export async function ensureRunning(model) {
188
+ // Drop any stale pid file from a previous run before deciding anything.
189
+ await reconcileStalePidFile();
190
+ if (await isDaemonRunning()) {
191
+ // Daemon is already up — either we started it in a previous bot run
192
+ // (pid file still valid) or user started it externally (no pid file).
193
+ // In both cases we preload the target model so the first query is warm.
194
+ await preloadModel(model);
195
+ managedModel = model;
196
+ // If a valid pid file exists, we inherit ownership of that daemon
197
+ // (it was bot-managed before a crash/restart). Update the model file.
198
+ if (fs.existsSync(PID_FILE)) {
199
+ persistManagedModel(model);
200
+ }
201
+ return true;
202
+ }
203
+ const binary = await findOllamaBinary();
204
+ if (!binary) {
205
+ console.error("[ollama] binary not found — install ollama first (brew install ollama)");
206
+ return false;
207
+ }
208
+ console.log(`[ollama] starting daemon: ${binary} serve`);
209
+ const proc = spawn(binary, ["serve"], {
210
+ detached: true,
211
+ stdio: "ignore",
212
+ env: process.env,
213
+ });
214
+ proc.unref();
215
+ if (!proc.pid) {
216
+ console.error("[ollama] spawn failed — no pid");
217
+ return false;
218
+ }
219
+ // Persist the PID + model so we can kill/unload correctly on cleanup,
220
+ // even after a bot restart loses the in-memory references.
221
+ try {
222
+ fs.mkdirSync(dirname(PID_FILE), { recursive: true });
223
+ fs.writeFileSync(PID_FILE, String(proc.pid), "utf-8");
224
+ persistManagedModel(model);
225
+ }
226
+ catch (err) {
227
+ console.warn(`[ollama] failed to write state files: ${err}`);
228
+ }
229
+ managedProcess = proc;
230
+ managedModel = model;
231
+ const ready = await waitForDaemon();
232
+ if (!ready) {
233
+ console.error("[ollama] daemon did not become ready within 15s");
234
+ // Clean up: we spawned something that didn't come up. Best effort kill.
235
+ try {
236
+ process.kill(proc.pid, "SIGTERM");
237
+ }
238
+ catch { /* ignore */ }
239
+ try {
240
+ fs.unlinkSync(PID_FILE);
241
+ }
242
+ catch { /* ignore */ }
243
+ persistManagedModel(null);
244
+ return false;
245
+ }
246
+ console.log(`[ollama] daemon ready — preloading model: ${model}`);
247
+ await preloadModel(model);
248
+ return true;
249
+ }
250
+ /**
251
+ * Stop the daemon if we started it, unload the model from VRAM.
252
+ * Does nothing if the daemon was started externally (no PID file).
253
+ */
254
+ export async function ensureStopped() {
255
+ if (!fs.existsSync(PID_FILE)) {
256
+ // No PID file = externally managed daemon. Don't touch it.
257
+ return;
258
+ }
259
+ let pid = null;
260
+ try {
261
+ const raw = fs.readFileSync(PID_FILE, "utf-8").trim();
262
+ const parsed = parseInt(raw, 10);
263
+ if (!isNaN(parsed) && parsed > 0)
264
+ pid = parsed;
265
+ }
266
+ catch {
267
+ // ignore
268
+ }
269
+ // Verify the PID actually points at an ollama process before SIGTERM.
270
+ // Prevents the classic PID-reuse bug where we'd kill a random process
271
+ // after a bot crash/restart left a stale pid file.
272
+ const pidIsOllama = pid ? await verifyPidIsOllama(pid) : false;
273
+ if (!pidIsOllama) {
274
+ console.log(`[ollama] pid file points to pid=${pid} which is no longer ollama — cleaning up`);
275
+ try {
276
+ fs.unlinkSync(PID_FILE);
277
+ }
278
+ catch { /* ignore */ }
279
+ persistManagedModel(null);
280
+ managedProcess = null;
281
+ managedModel = null;
282
+ return;
283
+ }
284
+ // Unload the model first so VRAM is freed even if the kill races.
285
+ // Model name might be in memory (current run) or on disk (survived a restart).
286
+ const modelToUnload = managedModel || loadManagedModelFromDisk();
287
+ if (modelToUnload) {
288
+ await unloadModel(modelToUnload);
289
+ }
290
+ try {
291
+ process.kill(pid, "SIGTERM");
292
+ console.log(`[ollama] stopped daemon pid=${pid}`);
293
+ }
294
+ catch (err) {
295
+ const msg = err instanceof Error ? err.message : String(err);
296
+ console.warn(`[ollama] failed to kill pid=${pid}: ${msg}`);
297
+ }
298
+ // Clean up state
299
+ try {
300
+ fs.unlinkSync(PID_FILE);
301
+ }
302
+ catch { /* ignore */ }
303
+ persistManagedModel(null);
304
+ managedProcess = null;
305
+ managedModel = null;
306
+ }
307
+ /** Whether the current daemon was spawned by the bot (via PID file). */
308
+ export function isBotManaged() {
309
+ return fs.existsSync(PID_FILE);
310
+ }
311
+ /** Currently loaded model name, if any. */
312
+ export function getManagedModel() {
313
+ return managedModel || loadManagedModelFromDisk();
314
+ }
315
+ // ── Module-load side effects ──────────────────────────────────────────────
316
+ //
317
+ // On first import (bot startup), reconcile any stale pid file from a previous
318
+ // crashed run AND restore the in-memory managedModel if the daemon is still
319
+ // alive. Best-effort — failures are logged but not fatal.
320
+ //
321
+ // NOTE: SIGTERM/SIGINT handling lives in src/index.ts (the bot's shutdown()
322
+ // function). That function calls ensureStopped() directly — we deliberately
323
+ // do NOT install our own signal handler here, to avoid racing with the
324
+ // bot's own cleanup path.
325
+ void (async () => {
326
+ try {
327
+ await reconcileStalePidFile();
328
+ if (fs.existsSync(PID_FILE)) {
329
+ const diskModel = loadManagedModelFromDisk();
330
+ if (diskModel) {
331
+ managedModel = diskModel;
332
+ console.log(`[ollama] restored managed state from previous run (model=${diskModel})`);
333
+ }
334
+ }
335
+ }
336
+ catch (err) {
337
+ console.warn(`[ollama] startup reconciliation failed: ${err}`);
338
+ }
339
+ })();
@@ -134,21 +134,27 @@ Always ask yourself first: "Can I solve this with my own intelligence?" If yes
134
134
  * @param isSDK Whether the active provider is the Claude SDK (has tool use)
135
135
  * @param language Preferred language ('de' or 'en')
136
136
  */
137
- export function buildSystemPrompt(isSDK, language = "de", chatId) {
138
- const langInstruction = language === "en"
139
- ? "Respond in English. If the user writes in another language, mirror their language naturally."
140
- : "Reply in the language the user writes in. Match their language naturally.";
141
- // Current date/time context
137
+ export function buildSystemPrompt(isSDK, language = "en", chatId) {
138
+ // The deep base prompt has only de/en variants (writing four full
139
+ // personality templates is out of scope). For es/fr we fall back to
140
+ // the English base the LLM mirrors the user's conversational language
141
+ // anyway via langInstruction below, so the base-prompt language is
142
+ // really just the "hint" for the system-prompt wrapper.
143
+ const deepLang = language === "de" ? "de" : "en";
144
+ const langInstruction = "Reply in the language the user writes in. Match their language naturally.";
145
+ // Current date/time context — locale formatting uses the user's picked
146
+ // locale for familiarity (German date formatting for de, etc.).
142
147
  const now = new Date();
143
- const locale = language === "de" ? "de-DE" : "en-US";
144
- const dateStr = now.toLocaleDateString(locale, { weekday: "long", year: "numeric", month: "long", day: "numeric" });
145
- const timeStr = now.toLocaleTimeString(locale, { hour: "2-digit", minute: "2-digit" });
146
- const timeContext = language === "de"
147
- ? `Current date: ${dateStr}, ${timeStr} (Europe/Berlin).`
148
- : `Current date: ${dateStr}, ${timeStr} (Europe/Berlin).`;
149
- const parts = [getBasePrompt(language), langInstruction, timeContext];
148
+ const tzLocale = language === "de" ? "de-DE" :
149
+ language === "es" ? "es-ES" :
150
+ language === "fr" ? "fr-FR" :
151
+ "en-US";
152
+ const dateStr = now.toLocaleDateString(tzLocale, { weekday: "long", year: "numeric", month: "long", day: "numeric" });
153
+ const timeStr = now.toLocaleTimeString(tzLocale, { hour: "2-digit", minute: "2-digit" });
154
+ const timeContext = `Current date: ${dateStr}, ${timeStr} (Europe/Berlin).`;
155
+ const parts = [getBasePrompt(deepLang), langInstruction, timeContext];
150
156
  // Core self-awareness — always injected, adapted to active provider and language
151
- parts.push(buildSelfAwareness(isSDK, getActiveProviderLabel(), language));
157
+ parts.push(buildSelfAwareness(isSDK, getActiveProviderLabel(), deepLang));
152
158
  if (soulContent) {
153
159
  parts.push(soulContent);
154
160
  }
@@ -186,7 +192,7 @@ export function buildSystemPrompt(isSDK, language = "de", chatId) {
186
192
  * Build a system prompt enhanced with semantically relevant memories.
187
193
  * Searches the vector index for context related to the user's message.
188
194
  */
189
- export async function buildSmartSystemPrompt(isSDK, language = "de", userMessage, chatId) {
195
+ export async function buildSmartSystemPrompt(isSDK, language = "en", userMessage, chatId) {
190
196
  const base = buildSystemPrompt(isSDK, language, chatId);
191
197
  // SDK providers read memory directly via tools — skip
192
198
  if (isSDK || !userMessage)