alvin-bot 4.5.1 → 4.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +278 -0
- package/README.md +25 -2
- package/bin/cli.js +325 -26
- package/dist/handlers/commands.js +505 -63
- package/dist/handlers/message.js +209 -14
- package/dist/i18n.js +470 -13
- package/dist/index.js +45 -5
- package/dist/providers/claude-sdk-provider.js +106 -14
- package/dist/providers/ollama-provider.js +32 -0
- package/dist/providers/openai-compatible.js +10 -1
- package/dist/providers/registry.js +112 -17
- package/dist/providers/types.js +25 -3
- package/dist/services/compaction.js +2 -0
- package/dist/services/cron.js +53 -42
- package/dist/services/heartbeat.js +41 -7
- package/dist/services/language-detect.js +12 -2
- package/dist/services/ollama-manager.js +339 -0
- package/dist/services/personality.js +20 -14
- package/dist/services/session.js +21 -3
- package/dist/services/subagent-delivery.js +266 -0
- package/dist/services/subagent-stats.js +123 -0
- package/dist/services/subagents.js +509 -42
- package/dist/services/telegram.js +28 -1
- package/dist/services/updater.js +158 -0
- package/dist/services/usage-tracker.js +11 -4
- package/dist/services/users.js +2 -1
- package/docs/HANDBOOK.md +856 -0
- package/package.json +7 -2
- package/test/claude-sdk-provider.test.ts +69 -0
- package/test/i18n.test.ts +108 -0
- package/test/registry.test.ts +201 -0
- package/test/subagent-delivery.test.ts +273 -0
- package/test/subagent-stats.test.ts +119 -0
- package/test/subagents-commands.test.ts +64 -0
- package/test/subagents-config.test.ts +114 -0
- package/test/subagents-depth.test.ts +58 -0
- package/test/subagents-inheritance.test.ts +67 -0
- package/test/subagents-name-resolver.test.ts +122 -0
- package/test/subagents-priority-reject.test.ts +88 -0
- package/test/subagents-queue.test.ts +127 -0
- package/test/subagents-shutdown.test.ts +126 -0
- package/test/subagents-toolset.test.ts +51 -0
- package/vitest.config.ts +17 -0
package/dist/services/cron.js
CHANGED
|
@@ -12,7 +12,6 @@
|
|
|
12
12
|
import fs from "fs";
|
|
13
13
|
import { execSync } from "child_process";
|
|
14
14
|
import { dirname } from "path";
|
|
15
|
-
import { getRegistry } from "../engine.js";
|
|
16
15
|
import { CRON_FILE, BOT_ROOT } from "../paths.js";
|
|
17
16
|
// ── Storage ─────────────────────────────────────────────
|
|
18
17
|
function loadJobs() {
|
|
@@ -151,52 +150,61 @@ async function executeJob(job) {
|
|
|
151
150
|
return { output };
|
|
152
151
|
}
|
|
153
152
|
case "ai-query": {
|
|
154
|
-
// AI queries run
|
|
153
|
+
// AI queries run as isolated sub-agents rather than directly against
|
|
154
|
+
// the registry. This gives cron jobs timeout/cancel/state-tracking
|
|
155
|
+
// "for free" via the existing subagents infrastructure, and — most
|
|
156
|
+
// importantly — keeps them completely independent of any user's
|
|
157
|
+
// active main session. A cron job can run in the background while
|
|
158
|
+
// the user chats with Alvin in the foreground; neither interferes
|
|
159
|
+
// with the other.
|
|
155
160
|
const prompt = job.payload.prompt || "";
|
|
161
|
+
// Dynamic import to avoid circular dep chain (cron → engine → registry
|
|
162
|
+
// and subagents → engine). Type-only import at file top is erased,
|
|
163
|
+
// so no runtime cycle is created.
|
|
164
|
+
const { spawnSubAgent } = await import("./subagents.js");
|
|
156
165
|
try {
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
}
|
|
175
|
-
}
|
|
176
|
-
//
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
for (let i = 0; i < fullResponse.length; i += maxLen) {
|
|
186
|
-
parts.push(fullResponse.slice(i, i + maxLen));
|
|
187
|
-
}
|
|
188
|
-
for (const part of parts) {
|
|
189
|
-
await notifyCallback(job.target, part);
|
|
190
|
-
}
|
|
191
|
-
}
|
|
166
|
+
// Turn the fire-and-forget spawnSubAgent into an awaitable via
|
|
167
|
+
// the onComplete callback. Rejection of the spawn promise itself
|
|
168
|
+
// means the max-parallel limit was hit.
|
|
169
|
+
// Parse the target chat id for I3 delivery routing. Only telegram
|
|
170
|
+
// targets get a numeric parentChatId — other platforms/web get
|
|
171
|
+
// undefined and fall through the delivery router's warning path.
|
|
172
|
+
const parentChatId = job.target.platform === "telegram" && job.target.chatId
|
|
173
|
+
? Number(job.target.chatId)
|
|
174
|
+
: undefined;
|
|
175
|
+
const result = await new Promise((resolve, reject) => {
|
|
176
|
+
spawnSubAgent({
|
|
177
|
+
name: job.name,
|
|
178
|
+
prompt,
|
|
179
|
+
workingDir: BOT_ROOT,
|
|
180
|
+
source: "cron",
|
|
181
|
+
parentChatId,
|
|
182
|
+
onComplete: (r) => resolve(r),
|
|
183
|
+
}).catch(reject);
|
|
184
|
+
});
|
|
185
|
+
// Non-success: don't notify here. The I3 delivery router has
|
|
186
|
+
// already posted the appropriate banner (cancelled / timeout /
|
|
187
|
+
// error) to parentChatId, so a legacy notifyCallback would
|
|
188
|
+
// produce a duplicate message.
|
|
189
|
+
if (result.status !== "completed") {
|
|
190
|
+
return {
|
|
191
|
+
output: "",
|
|
192
|
+
error: `Sub-agent ${result.status}: ${result.error || result.status}`,
|
|
193
|
+
};
|
|
192
194
|
}
|
|
195
|
+
const fullResponse = result.output;
|
|
196
|
+
// NOTE: No notifyCallback for ai-query jobs. The I3 delivery
|
|
197
|
+
// router (src/services/subagent-delivery.ts) fires from
|
|
198
|
+
// spawnSubAgent().finally() and sends a proper banner+final to
|
|
199
|
+
// parentChatId. Legacy notifyCallback stays in use for the
|
|
200
|
+
// other job types (reminder, shell, http, message) which do
|
|
201
|
+
// not route through spawnSubAgent.
|
|
193
202
|
return { output: fullResponse.slice(0, 500) };
|
|
194
203
|
}
|
|
195
204
|
catch (err) {
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
}
|
|
205
|
+
// Re-throw without notifying — the outer catch will record
|
|
206
|
+
// lastError on the job, and the I3 delivery router has already
|
|
207
|
+
// posted a banner if the failure came from inside spawnSubAgent.
|
|
200
208
|
throw err;
|
|
201
209
|
}
|
|
202
210
|
}
|
|
@@ -206,7 +214,10 @@ async function executeJob(job) {
|
|
|
206
214
|
}
|
|
207
215
|
catch (err) {
|
|
208
216
|
const error = err instanceof Error ? err.message : String(err);
|
|
209
|
-
|
|
217
|
+
// Skip notification for ai-query jobs — the I3 delivery router has
|
|
218
|
+
// already posted the banner. Other job types still get the legacy
|
|
219
|
+
// notify path because they don't route through spawnSubAgent.
|
|
220
|
+
if (notifyCallback && job.type !== "ai-query") {
|
|
210
221
|
await notifyCallback(job.target, `❌ Cron Error (${job.name}): ${error}`);
|
|
211
222
|
}
|
|
212
223
|
return { output: "", error };
|
|
@@ -101,6 +101,18 @@ async function runHeartbeat() {
|
|
|
101
101
|
const provider = registry.get(key);
|
|
102
102
|
if (!provider)
|
|
103
103
|
continue;
|
|
104
|
+
// Providers with an on-demand lifecycle (local runners: Ollama, LM
|
|
105
|
+
// Studio, llama.cpp, …) are not pinged periodically — they're off
|
|
106
|
+
// until we actively boot them during failover. Mark as always-healthy
|
|
107
|
+
// so they remain a valid failover target.
|
|
108
|
+
if (provider.lifecycle) {
|
|
109
|
+
health.healthy = true;
|
|
110
|
+
health.lastCheck = Date.now();
|
|
111
|
+
health.lastLatencyMs = 0;
|
|
112
|
+
health.failCount = 0;
|
|
113
|
+
health.lastError = undefined;
|
|
114
|
+
continue;
|
|
115
|
+
}
|
|
104
116
|
const start = Date.now();
|
|
105
117
|
try {
|
|
106
118
|
// Quick availability check first
|
|
@@ -142,7 +154,7 @@ async function runHeartbeat() {
|
|
|
142
154
|
}
|
|
143
155
|
}
|
|
144
156
|
// Auto-failover logic
|
|
145
|
-
handleFailover(registry);
|
|
157
|
+
await handleFailover(registry);
|
|
146
158
|
}
|
|
147
159
|
async function pingProvider(provider, key) {
|
|
148
160
|
// For CLI-based providers, just check availability (no full query needed)
|
|
@@ -166,7 +178,7 @@ async function pingProvider(provider, key) {
|
|
|
166
178
|
}
|
|
167
179
|
return text || "ok";
|
|
168
180
|
}
|
|
169
|
-
function handleFailover(registry) {
|
|
181
|
+
async function handleFailover(registry) {
|
|
170
182
|
const primaryHealth = state.providers.get(state.originalPrimary);
|
|
171
183
|
const currentKey = registry.getActiveKey();
|
|
172
184
|
// Case 1: Primary is down → switch to first healthy fallback
|
|
@@ -174,19 +186,41 @@ function handleFailover(registry) {
|
|
|
174
186
|
const fallbackOrder = config.fallbackProviders;
|
|
175
187
|
for (const fbKey of fallbackOrder) {
|
|
176
188
|
const fbHealth = state.providers.get(fbKey);
|
|
177
|
-
if (fbHealth?.healthy)
|
|
189
|
+
if (!fbHealth?.healthy)
|
|
190
|
+
continue;
|
|
191
|
+
const fbProvider = registry.get(fbKey);
|
|
192
|
+
if (!fbProvider)
|
|
193
|
+
continue;
|
|
194
|
+
// Providers with a lifecycle (local runners) must be booted before
|
|
195
|
+
// the switch. If boot fails, skip and try the next fallback.
|
|
196
|
+
if (fbProvider.lifecycle) {
|
|
197
|
+
console.log(`💓 🔄 Auto-failover: ${state.originalPrimary} → ${fbKey} — booting ${fbKey}…`);
|
|
198
|
+
const ok = await fbProvider.lifecycle.ensureRunning();
|
|
199
|
+
if (!ok) {
|
|
200
|
+
console.log(`💓 ⚠️ ${fbKey} boot failed — skipping`);
|
|
201
|
+
continue;
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
else {
|
|
178
205
|
console.log(`💓 🔄 Auto-failover: ${state.originalPrimary} → ${fbKey}`);
|
|
179
|
-
registry.switchTo(fbKey);
|
|
180
|
-
state.wasFailedOver = true;
|
|
181
|
-
return;
|
|
182
206
|
}
|
|
207
|
+
registry.switchTo(fbKey);
|
|
208
|
+
state.wasFailedOver = true;
|
|
209
|
+
return;
|
|
183
210
|
}
|
|
184
211
|
console.log("💓 ⚠️ All providers unhealthy — staying on primary");
|
|
212
|
+
return;
|
|
185
213
|
}
|
|
186
|
-
// Case 2: Primary recovered → switch back
|
|
214
|
+
// Case 2: Primary recovered → switch back, tearing down any lifecycle-
|
|
215
|
+
// managed fallback we booted during the outage.
|
|
187
216
|
if (primaryHealth?.healthy && state.wasFailedOver && currentKey !== state.originalPrimary) {
|
|
217
|
+
const currentProvider = registry.get(currentKey);
|
|
188
218
|
console.log(`💓 ✅ Primary recovered — switching back to ${state.originalPrimary}`);
|
|
189
219
|
registry.switchTo(state.originalPrimary);
|
|
190
220
|
state.wasFailedOver = false;
|
|
221
|
+
if (currentProvider?.lifecycle) {
|
|
222
|
+
console.log(`💓 🧹 Tearing down ${currentKey} daemon + unloading model`);
|
|
223
|
+
await currentProvider.lifecycle.ensureStopped();
|
|
224
|
+
}
|
|
191
225
|
}
|
|
192
226
|
}
|
|
@@ -83,12 +83,18 @@ export function detectLanguage(text) {
|
|
|
83
83
|
/**
|
|
84
84
|
* Update language statistics for a user and auto-adapt if pattern is clear.
|
|
85
85
|
* Returns the recommended language for this session.
|
|
86
|
+
*
|
|
87
|
+
* Note: auto-detection is intentionally limited to de/en (the two languages
|
|
88
|
+
* our heuristic covers). For es/fr users, the /language command is the only
|
|
89
|
+
* way to set their UI locale — their explicit choice is persisted via
|
|
90
|
+
* profile.langExplicit and trackAndAdapt returns it untouched.
|
|
86
91
|
*/
|
|
87
92
|
export function trackAndAdapt(userId, text, currentSessionLang) {
|
|
88
93
|
const profile = loadProfile(userId);
|
|
89
94
|
if (!profile)
|
|
90
95
|
return currentSessionLang;
|
|
91
|
-
// If user explicitly set language,
|
|
96
|
+
// If user explicitly set language (via /language), honour it and never
|
|
97
|
+
// auto-switch. This is the only way es/fr get persisted.
|
|
92
98
|
if (profile.langExplicit)
|
|
93
99
|
return profile.language;
|
|
94
100
|
const detected = detectLanguage(text);
|
|
@@ -115,7 +121,10 @@ export function trackAndAdapt(userId, text, currentSessionLang) {
|
|
|
115
121
|
}
|
|
116
122
|
}
|
|
117
123
|
else {
|
|
118
|
-
// Early phase: follow immediate language for responsiveness
|
|
124
|
+
// Early phase: follow immediate language for responsiveness.
|
|
125
|
+
// Only overrides es/fr if the user wrote in de/en without having set
|
|
126
|
+
// langExplicit — which can only happen if they changed language via
|
|
127
|
+
// something other than /language (shouldn't happen in practice).
|
|
119
128
|
profile.language = detected;
|
|
120
129
|
}
|
|
121
130
|
saveProfile(profile);
|
|
@@ -123,6 +132,7 @@ export function trackAndAdapt(userId, text, currentSessionLang) {
|
|
|
123
132
|
}
|
|
124
133
|
/**
|
|
125
134
|
* Mark language as explicitly set by user (disables auto-detection).
|
|
135
|
+
* Accepts all supported locales including es/fr.
|
|
126
136
|
*/
|
|
127
137
|
export function setExplicitLanguage(userId, lang) {
|
|
128
138
|
const profile = loadProfile(userId);
|
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Ollama Manager — on-demand daemon lifecycle for fallback use.
|
|
3
|
+
*
|
|
4
|
+
* The bot uses Ollama as a local fallback when the primary provider is down.
|
|
5
|
+
* Historically the user had to run `ollama serve` themselves — if they forgot,
|
|
6
|
+
* the fallback silently failed. This service spawns the daemon on demand,
|
|
7
|
+
* preloads the target model into VRAM, and tears it all down once the primary
|
|
8
|
+
* provider is healthy again.
|
|
9
|
+
*
|
|
10
|
+
* Key invariants:
|
|
11
|
+
* • Only kills instances the bot started itself (tracked via PID file).
|
|
12
|
+
* An externally-managed ollama is left alone.
|
|
13
|
+
* • Preload uses Ollama's native /api/generate endpoint with an empty
|
|
14
|
+
* prompt and keep_alive=30m, so the first real query is not cold.
|
|
15
|
+
* • Unload sets keep_alive=0 to flush the model from VRAM immediately.
|
|
16
|
+
* • All spawns are detached with stdio=ignore, so the child survives the
|
|
17
|
+
* bot crashing but still gets cleaned up on graceful shutdown.
|
|
18
|
+
*/
|
|
19
|
+
import { spawn, execFile } from "child_process";
|
|
20
|
+
import { promisify } from "util";
|
|
21
|
+
import fs from "fs";
|
|
22
|
+
import { resolve, dirname } from "path";
|
|
23
|
+
import os from "os";
|
|
24
|
+
const execFileAsync = promisify(execFile);
|
|
25
|
+
const DATA_DIR = process.env.ALVIN_DATA_DIR || resolve(os.homedir(), ".alvin-bot");
|
|
26
|
+
const PID_FILE = resolve(DATA_DIR, "ollama.pid");
|
|
27
|
+
const MODEL_FILE = resolve(DATA_DIR, "ollama.model");
|
|
28
|
+
const OLLAMA_API_BASE = "http://localhost:11434";
|
|
29
|
+
const DAEMON_READY_TIMEOUT_MS = 15_000;
|
|
30
|
+
const PRELOAD_TIMEOUT_MS = 60_000;
|
|
31
|
+
const KEEP_ALIVE = "30m";
|
|
32
|
+
let managedProcess = null;
|
|
33
|
+
let managedModel = null;
|
|
34
|
+
// ── PID / Process verification ─────────────────────────────────────────────
|
|
35
|
+
/**
|
|
36
|
+
* Verify that `pid` is actually an ollama process by inspecting its command
|
|
37
|
+
* via `ps`. This prevents the classic PID-reuse bug where we'd kill a
|
|
38
|
+
* random process after a bot crash left a stale pid file pointing at
|
|
39
|
+
* something the OS has since re-assigned.
|
|
40
|
+
*/
|
|
41
|
+
async function verifyPidIsOllama(pid) {
|
|
42
|
+
try {
|
|
43
|
+
const { stdout } = await execFileAsync("ps", ["-p", String(pid), "-o", "command="], {
|
|
44
|
+
timeout: 3_000,
|
|
45
|
+
});
|
|
46
|
+
return stdout.toLowerCase().includes("ollama");
|
|
47
|
+
}
|
|
48
|
+
catch {
|
|
49
|
+
// ps exits non-zero if pid doesn't exist — treat as "not ollama"
|
|
50
|
+
return false;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
function loadManagedModelFromDisk() {
|
|
54
|
+
try {
|
|
55
|
+
if (fs.existsSync(MODEL_FILE)) {
|
|
56
|
+
return fs.readFileSync(MODEL_FILE, "utf-8").trim() || null;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
catch { /* ignore */ }
|
|
60
|
+
return null;
|
|
61
|
+
}
|
|
62
|
+
function persistManagedModel(model) {
|
|
63
|
+
try {
|
|
64
|
+
fs.mkdirSync(dirname(MODEL_FILE), { recursive: true });
|
|
65
|
+
if (model) {
|
|
66
|
+
fs.writeFileSync(MODEL_FILE, model, "utf-8");
|
|
67
|
+
}
|
|
68
|
+
else if (fs.existsSync(MODEL_FILE)) {
|
|
69
|
+
fs.unlinkSync(MODEL_FILE);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
catch (err) {
|
|
73
|
+
console.warn(`[ollama] failed to persist model file: ${err}`);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Reconcile stale state left behind from a previous bot run.
|
|
78
|
+
* If the PID file points at a process that is no longer ollama (crashed,
|
|
79
|
+
* PID reused, never existed), remove the file so we don't try to kill
|
|
80
|
+
* the wrong process later. Called lazily from ensureRunning / ensureStopped.
|
|
81
|
+
*/
|
|
82
|
+
async function reconcileStalePidFile() {
|
|
83
|
+
if (!fs.existsSync(PID_FILE))
|
|
84
|
+
return;
|
|
85
|
+
try {
|
|
86
|
+
const raw = fs.readFileSync(PID_FILE, "utf-8").trim();
|
|
87
|
+
const pid = parseInt(raw, 10);
|
|
88
|
+
if (isNaN(pid) || pid <= 0) {
|
|
89
|
+
fs.unlinkSync(PID_FILE);
|
|
90
|
+
return;
|
|
91
|
+
}
|
|
92
|
+
const isOllama = await verifyPidIsOllama(pid);
|
|
93
|
+
if (!isOllama) {
|
|
94
|
+
console.log(`[ollama] stale pid file (pid=${pid} is no longer ollama) — removing`);
|
|
95
|
+
fs.unlinkSync(PID_FILE);
|
|
96
|
+
persistManagedModel(null);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
catch {
|
|
100
|
+
// If we can't read/parse it, drop it
|
|
101
|
+
try {
|
|
102
|
+
fs.unlinkSync(PID_FILE);
|
|
103
|
+
}
|
|
104
|
+
catch { /* ignore */ }
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
export async function isDaemonRunning() {
|
|
108
|
+
try {
|
|
109
|
+
const res = await fetch(`${OLLAMA_API_BASE}/api/tags`, {
|
|
110
|
+
signal: AbortSignal.timeout(2_000),
|
|
111
|
+
});
|
|
112
|
+
return res.ok;
|
|
113
|
+
}
|
|
114
|
+
catch {
|
|
115
|
+
return false;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
async function findOllamaBinary() {
|
|
119
|
+
// Common install paths — macOS Homebrew, Linux, /usr/local
|
|
120
|
+
const candidates = [
|
|
121
|
+
"/opt/homebrew/bin/ollama",
|
|
122
|
+
"/usr/local/bin/ollama",
|
|
123
|
+
"/usr/bin/ollama",
|
|
124
|
+
];
|
|
125
|
+
for (const p of candidates) {
|
|
126
|
+
if (fs.existsSync(p))
|
|
127
|
+
return p;
|
|
128
|
+
}
|
|
129
|
+
// Fallback: `which ollama` (async, no event-loop block)
|
|
130
|
+
try {
|
|
131
|
+
const { stdout } = await execFileAsync("which", ["ollama"], { timeout: 3_000 });
|
|
132
|
+
return stdout.trim() || null;
|
|
133
|
+
}
|
|
134
|
+
catch {
|
|
135
|
+
return null;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
async function waitForDaemon(timeoutMs = DAEMON_READY_TIMEOUT_MS) {
|
|
139
|
+
const start = Date.now();
|
|
140
|
+
while (Date.now() - start < timeoutMs) {
|
|
141
|
+
if (await isDaemonRunning())
|
|
142
|
+
return true;
|
|
143
|
+
await new Promise(r => setTimeout(r, 500));
|
|
144
|
+
}
|
|
145
|
+
return false;
|
|
146
|
+
}
|
|
147
|
+
async function preloadModel(model) {
|
|
148
|
+
try {
|
|
149
|
+
await fetch(`${OLLAMA_API_BASE}/api/generate`, {
|
|
150
|
+
method: "POST",
|
|
151
|
+
headers: { "Content-Type": "application/json" },
|
|
152
|
+
body: JSON.stringify({
|
|
153
|
+
model,
|
|
154
|
+
prompt: "",
|
|
155
|
+
keep_alive: KEEP_ALIVE,
|
|
156
|
+
}),
|
|
157
|
+
signal: AbortSignal.timeout(PRELOAD_TIMEOUT_MS),
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
catch (err) {
|
|
161
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
162
|
+
console.warn(`[ollama] preload warning (model=${model}): ${msg}`);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
async function unloadModel(model) {
|
|
166
|
+
try {
|
|
167
|
+
await fetch(`${OLLAMA_API_BASE}/api/generate`, {
|
|
168
|
+
method: "POST",
|
|
169
|
+
headers: { "Content-Type": "application/json" },
|
|
170
|
+
body: JSON.stringify({
|
|
171
|
+
model,
|
|
172
|
+
keep_alive: 0, // immediate VRAM unload
|
|
173
|
+
}),
|
|
174
|
+
signal: AbortSignal.timeout(5_000),
|
|
175
|
+
});
|
|
176
|
+
}
|
|
177
|
+
catch {
|
|
178
|
+
// ignore — daemon may already be stopping
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
/**
|
|
182
|
+
* Ensure the Ollama daemon is running and the specified model is loaded.
|
|
183
|
+
* Idempotent. If an externally-managed daemon is already running, we use
|
|
184
|
+
* it and just preload the model, but leave it for ensureStopped() to decide
|
|
185
|
+
* whether to kill it (it won't — only bot-spawned daemons get killed).
|
|
186
|
+
*/
|
|
187
|
+
export async function ensureRunning(model) {
|
|
188
|
+
// Drop any stale pid file from a previous run before deciding anything.
|
|
189
|
+
await reconcileStalePidFile();
|
|
190
|
+
if (await isDaemonRunning()) {
|
|
191
|
+
// Daemon is already up — either we started it in a previous bot run
|
|
192
|
+
// (pid file still valid) or user started it externally (no pid file).
|
|
193
|
+
// In both cases we preload the target model so the first query is warm.
|
|
194
|
+
await preloadModel(model);
|
|
195
|
+
managedModel = model;
|
|
196
|
+
// If a valid pid file exists, we inherit ownership of that daemon
|
|
197
|
+
// (it was bot-managed before a crash/restart). Update the model file.
|
|
198
|
+
if (fs.existsSync(PID_FILE)) {
|
|
199
|
+
persistManagedModel(model);
|
|
200
|
+
}
|
|
201
|
+
return true;
|
|
202
|
+
}
|
|
203
|
+
const binary = await findOllamaBinary();
|
|
204
|
+
if (!binary) {
|
|
205
|
+
console.error("[ollama] binary not found — install ollama first (brew install ollama)");
|
|
206
|
+
return false;
|
|
207
|
+
}
|
|
208
|
+
console.log(`[ollama] starting daemon: ${binary} serve`);
|
|
209
|
+
const proc = spawn(binary, ["serve"], {
|
|
210
|
+
detached: true,
|
|
211
|
+
stdio: "ignore",
|
|
212
|
+
env: process.env,
|
|
213
|
+
});
|
|
214
|
+
proc.unref();
|
|
215
|
+
if (!proc.pid) {
|
|
216
|
+
console.error("[ollama] spawn failed — no pid");
|
|
217
|
+
return false;
|
|
218
|
+
}
|
|
219
|
+
// Persist the PID + model so we can kill/unload correctly on cleanup,
|
|
220
|
+
// even after a bot restart loses the in-memory references.
|
|
221
|
+
try {
|
|
222
|
+
fs.mkdirSync(dirname(PID_FILE), { recursive: true });
|
|
223
|
+
fs.writeFileSync(PID_FILE, String(proc.pid), "utf-8");
|
|
224
|
+
persistManagedModel(model);
|
|
225
|
+
}
|
|
226
|
+
catch (err) {
|
|
227
|
+
console.warn(`[ollama] failed to write state files: ${err}`);
|
|
228
|
+
}
|
|
229
|
+
managedProcess = proc;
|
|
230
|
+
managedModel = model;
|
|
231
|
+
const ready = await waitForDaemon();
|
|
232
|
+
if (!ready) {
|
|
233
|
+
console.error("[ollama] daemon did not become ready within 15s");
|
|
234
|
+
// Clean up: we spawned something that didn't come up. Best effort kill.
|
|
235
|
+
try {
|
|
236
|
+
process.kill(proc.pid, "SIGTERM");
|
|
237
|
+
}
|
|
238
|
+
catch { /* ignore */ }
|
|
239
|
+
try {
|
|
240
|
+
fs.unlinkSync(PID_FILE);
|
|
241
|
+
}
|
|
242
|
+
catch { /* ignore */ }
|
|
243
|
+
persistManagedModel(null);
|
|
244
|
+
return false;
|
|
245
|
+
}
|
|
246
|
+
console.log(`[ollama] daemon ready — preloading model: ${model}`);
|
|
247
|
+
await preloadModel(model);
|
|
248
|
+
return true;
|
|
249
|
+
}
|
|
250
|
+
/**
|
|
251
|
+
* Stop the daemon if we started it, unload the model from VRAM.
|
|
252
|
+
* Does nothing if the daemon was started externally (no PID file).
|
|
253
|
+
*/
|
|
254
|
+
export async function ensureStopped() {
|
|
255
|
+
if (!fs.existsSync(PID_FILE)) {
|
|
256
|
+
// No PID file = externally managed daemon. Don't touch it.
|
|
257
|
+
return;
|
|
258
|
+
}
|
|
259
|
+
let pid = null;
|
|
260
|
+
try {
|
|
261
|
+
const raw = fs.readFileSync(PID_FILE, "utf-8").trim();
|
|
262
|
+
const parsed = parseInt(raw, 10);
|
|
263
|
+
if (!isNaN(parsed) && parsed > 0)
|
|
264
|
+
pid = parsed;
|
|
265
|
+
}
|
|
266
|
+
catch {
|
|
267
|
+
// ignore
|
|
268
|
+
}
|
|
269
|
+
// Verify the PID actually points at an ollama process before SIGTERM.
|
|
270
|
+
// Prevents the classic PID-reuse bug where we'd kill a random process
|
|
271
|
+
// after a bot crash/restart left a stale pid file.
|
|
272
|
+
const pidIsOllama = pid ? await verifyPidIsOllama(pid) : false;
|
|
273
|
+
if (!pidIsOllama) {
|
|
274
|
+
console.log(`[ollama] pid file points to pid=${pid} which is no longer ollama — cleaning up`);
|
|
275
|
+
try {
|
|
276
|
+
fs.unlinkSync(PID_FILE);
|
|
277
|
+
}
|
|
278
|
+
catch { /* ignore */ }
|
|
279
|
+
persistManagedModel(null);
|
|
280
|
+
managedProcess = null;
|
|
281
|
+
managedModel = null;
|
|
282
|
+
return;
|
|
283
|
+
}
|
|
284
|
+
// Unload the model first so VRAM is freed even if the kill races.
|
|
285
|
+
// Model name might be in memory (current run) or on disk (survived a restart).
|
|
286
|
+
const modelToUnload = managedModel || loadManagedModelFromDisk();
|
|
287
|
+
if (modelToUnload) {
|
|
288
|
+
await unloadModel(modelToUnload);
|
|
289
|
+
}
|
|
290
|
+
try {
|
|
291
|
+
process.kill(pid, "SIGTERM");
|
|
292
|
+
console.log(`[ollama] stopped daemon pid=${pid}`);
|
|
293
|
+
}
|
|
294
|
+
catch (err) {
|
|
295
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
296
|
+
console.warn(`[ollama] failed to kill pid=${pid}: ${msg}`);
|
|
297
|
+
}
|
|
298
|
+
// Clean up state
|
|
299
|
+
try {
|
|
300
|
+
fs.unlinkSync(PID_FILE);
|
|
301
|
+
}
|
|
302
|
+
catch { /* ignore */ }
|
|
303
|
+
persistManagedModel(null);
|
|
304
|
+
managedProcess = null;
|
|
305
|
+
managedModel = null;
|
|
306
|
+
}
|
|
307
|
+
/** Whether the current daemon was spawned by the bot (via PID file). */
|
|
308
|
+
export function isBotManaged() {
|
|
309
|
+
return fs.existsSync(PID_FILE);
|
|
310
|
+
}
|
|
311
|
+
/** Currently loaded model name, if any. */
|
|
312
|
+
export function getManagedModel() {
|
|
313
|
+
return managedModel || loadManagedModelFromDisk();
|
|
314
|
+
}
|
|
315
|
+
// ── Module-load side effects ──────────────────────────────────────────────
|
|
316
|
+
//
|
|
317
|
+
// On first import (bot startup), reconcile any stale pid file from a previous
|
|
318
|
+
// crashed run AND restore the in-memory managedModel if the daemon is still
|
|
319
|
+
// alive. Best-effort — failures are logged but not fatal.
|
|
320
|
+
//
|
|
321
|
+
// NOTE: SIGTERM/SIGINT handling lives in src/index.ts (the bot's shutdown()
|
|
322
|
+
// function). That function calls ensureStopped() directly — we deliberately
|
|
323
|
+
// do NOT install our own signal handler here, to avoid racing with the
|
|
324
|
+
// bot's own cleanup path.
|
|
325
|
+
void (async () => {
|
|
326
|
+
try {
|
|
327
|
+
await reconcileStalePidFile();
|
|
328
|
+
if (fs.existsSync(PID_FILE)) {
|
|
329
|
+
const diskModel = loadManagedModelFromDisk();
|
|
330
|
+
if (diskModel) {
|
|
331
|
+
managedModel = diskModel;
|
|
332
|
+
console.log(`[ollama] restored managed state from previous run (model=${diskModel})`);
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
catch (err) {
|
|
337
|
+
console.warn(`[ollama] startup reconciliation failed: ${err}`);
|
|
338
|
+
}
|
|
339
|
+
})();
|
|
@@ -134,21 +134,27 @@ Always ask yourself first: "Can I solve this with my own intelligence?" If yes
|
|
|
134
134
|
* @param isSDK Whether the active provider is the Claude SDK (has tool use)
|
|
135
135
|
* @param language Preferred language ('de' or 'en')
|
|
136
136
|
*/
|
|
137
|
-
export function buildSystemPrompt(isSDK, language = "
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
//
|
|
137
|
+
export function buildSystemPrompt(isSDK, language = "en", chatId) {
|
|
138
|
+
// The deep base prompt has only de/en variants (writing four full
|
|
139
|
+
// personality templates is out of scope). For es/fr we fall back to
|
|
140
|
+
// the English base — the LLM mirrors the user's conversational language
|
|
141
|
+
// anyway via langInstruction below, so the base-prompt language is
|
|
142
|
+
// really just the "hint" for the system-prompt wrapper.
|
|
143
|
+
const deepLang = language === "de" ? "de" : "en";
|
|
144
|
+
const langInstruction = "Reply in the language the user writes in. Match their language naturally.";
|
|
145
|
+
// Current date/time context — locale formatting uses the user's picked
|
|
146
|
+
// locale for familiarity (German date formatting for de, etc.).
|
|
142
147
|
const now = new Date();
|
|
143
|
-
const
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
const
|
|
148
|
+
const tzLocale = language === "de" ? "de-DE" :
|
|
149
|
+
language === "es" ? "es-ES" :
|
|
150
|
+
language === "fr" ? "fr-FR" :
|
|
151
|
+
"en-US";
|
|
152
|
+
const dateStr = now.toLocaleDateString(tzLocale, { weekday: "long", year: "numeric", month: "long", day: "numeric" });
|
|
153
|
+
const timeStr = now.toLocaleTimeString(tzLocale, { hour: "2-digit", minute: "2-digit" });
|
|
154
|
+
const timeContext = `Current date: ${dateStr}, ${timeStr} (Europe/Berlin).`;
|
|
155
|
+
const parts = [getBasePrompt(deepLang), langInstruction, timeContext];
|
|
150
156
|
// Core self-awareness — always injected, adapted to active provider and language
|
|
151
|
-
parts.push(buildSelfAwareness(isSDK, getActiveProviderLabel(),
|
|
157
|
+
parts.push(buildSelfAwareness(isSDK, getActiveProviderLabel(), deepLang));
|
|
152
158
|
if (soulContent) {
|
|
153
159
|
parts.push(soulContent);
|
|
154
160
|
}
|
|
@@ -186,7 +192,7 @@ export function buildSystemPrompt(isSDK, language = "de", chatId) {
|
|
|
186
192
|
* Build a system prompt enhanced with semantically relevant memories.
|
|
187
193
|
* Searches the vector index for context related to the user's message.
|
|
188
194
|
*/
|
|
189
|
-
export async function buildSmartSystemPrompt(isSDK, language = "
|
|
195
|
+
export async function buildSmartSystemPrompt(isSDK, language = "en", userMessage, chatId) {
|
|
190
196
|
const base = buildSystemPrompt(isSDK, language, chatId);
|
|
191
197
|
// SDK providers read memory directly via tools — skip
|
|
192
198
|
if (isSDK || !userMessage)
|