wolverine-ai 3.6.0 → 3.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/server/lib/gpu-fleet.js +313 -0
- package/server/routes/fleet.js +167 -0
- package/server/routes/inference.js +7 -2
- package/src/agent/agent-engine.js +58 -4
- package/src/brain/brain.js +1 -1
- package/src/core/ai-client.js +4 -3
- package/src/core/error-parser.js +2 -2
- package/src/core/runner.js +29 -3
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "wolverine-ai",
|
|
3
|
-
"version": "3.6.
|
|
3
|
+
"version": "3.6.1",
|
|
4
4
|
"description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
const https = require("https");
|
|
2
|
+
const http = require("http");
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* GPU Fleet Manager — controls Vast.ai GPU instances for inference.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Start/stop individual GPUs via Vast API
|
|
9
|
+
* - Health monitoring and auto-discovery
|
|
10
|
+
* - Round-robin routing across active GPUs
|
|
11
|
+
* - Auto-scale: start burst GPUs when queue grows, stop when idle
|
|
12
|
+
* - Cold start tracking (~5s per GPU)
|
|
13
|
+
*
|
|
14
|
+
* Each GPU instance runs llama.cpp with --api-key for security.
|
|
15
|
+
* Only the EC2 backend has the internal keys.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
const VAST_API = "https://cloud.vast.ai/api/v0";
|
|
19
|
+
const VAST_KEY = process.env.VAST_API_KEY || "";
|
|
20
|
+
const POLL_INTERVAL_MS = 30000; // health check every 30s
|
|
21
|
+
const IDLE_STOP_MS = parseInt(process.env.GPU_IDLE_STOP_MS, 10) || 300000; // 5 min idle → stop
|
|
22
|
+
const SCALE_UP_QUEUE = parseInt(process.env.GPU_SCALE_UP_QUEUE, 10) || 3; // start burst GPU when 3+ queued
|
|
23
|
+
|
|
24
|
+
class GpuFleet {
|
|
25
|
+
constructor(config = {}) {
|
|
26
|
+
// GPU registry: { instanceId → { host, port, key, status, lastUsed, lastHealth, model } }
|
|
27
|
+
this.gpus = new Map();
|
|
28
|
+
this._roundRobinIndex = 0;
|
|
29
|
+
this._pollTimer = null;
|
|
30
|
+
this._scaleTimer = null;
|
|
31
|
+
this._requestQueue = [];
|
|
32
|
+
this._activeRequests = 0;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Register a GPU instance in the fleet.
|
|
37
|
+
*/
|
|
38
|
+
register(instanceId, { host, port, key, model = "wolverine-test-1", role = "general", autoStop = true }) {
|
|
39
|
+
this.gpus.set(String(instanceId), {
|
|
40
|
+
instanceId: String(instanceId),
|
|
41
|
+
host, port: parseInt(port, 10), key,
|
|
42
|
+
model, role, autoStop,
|
|
43
|
+
status: "unknown", // unknown, starting, healthy, unhealthy, stopped
|
|
44
|
+
lastUsed: 0,
|
|
45
|
+
lastHealth: null,
|
|
46
|
+
coldStartMs: null,
|
|
47
|
+
});
|
|
48
|
+
return this;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Load GPU config from environment or database.
|
|
53
|
+
*/
|
|
54
|
+
loadFromEnv() {
|
|
55
|
+
// Primary GPU from env
|
|
56
|
+
const url = process.env.WOLVERINE_INFERENCE_URL;
|
|
57
|
+
const key = process.env.WOLVERINE_GPU_KEY;
|
|
58
|
+
if (url && key) {
|
|
59
|
+
try {
|
|
60
|
+
const parsed = new URL(url);
|
|
61
|
+
const instanceId = process.env.WOLVERINE_GPU_INSTANCE_ID || "primary";
|
|
62
|
+
this.register(instanceId, {
|
|
63
|
+
host: parsed.hostname,
|
|
64
|
+
port: parseInt(parsed.port, 10) || 80,
|
|
65
|
+
key,
|
|
66
|
+
role: "primary",
|
|
67
|
+
autoStop: false, // primary stays on
|
|
68
|
+
});
|
|
69
|
+
} catch {}
|
|
70
|
+
}
|
|
71
|
+
return this;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Load GPU config from database.
|
|
76
|
+
*/
|
|
77
|
+
async loadFromDb(pool) {
|
|
78
|
+
try {
|
|
79
|
+
// Check if gpu_fleet table exists
|
|
80
|
+
const exists = await pool.query(
|
|
81
|
+
"SELECT 1 FROM information_schema.tables WHERE table_name = 'gpu_fleet' LIMIT 1"
|
|
82
|
+
);
|
|
83
|
+
if (exists.rows.length === 0) {
|
|
84
|
+
await pool.query(`
|
|
85
|
+
CREATE TABLE gpu_fleet (
|
|
86
|
+
instance_id TEXT PRIMARY KEY,
|
|
87
|
+
vast_id TEXT,
|
|
88
|
+
host TEXT NOT NULL,
|
|
89
|
+
port INTEGER NOT NULL DEFAULT 8080,
|
|
90
|
+
internal_key TEXT NOT NULL,
|
|
91
|
+
model TEXT DEFAULT 'wolverine-test-1',
|
|
92
|
+
role TEXT DEFAULT 'general',
|
|
93
|
+
auto_stop BOOLEAN DEFAULT true,
|
|
94
|
+
status TEXT DEFAULT 'stopped',
|
|
95
|
+
gpu_name TEXT,
|
|
96
|
+
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
97
|
+
)
|
|
98
|
+
`);
|
|
99
|
+
}
|
|
100
|
+
const { rows } = await pool.query("SELECT * FROM gpu_fleet");
|
|
101
|
+
for (const r of rows) {
|
|
102
|
+
this.register(r.instance_id, {
|
|
103
|
+
host: r.host, port: r.port, key: r.internal_key,
|
|
104
|
+
model: r.model, role: r.role, autoStop: r.auto_stop,
|
|
105
|
+
});
|
|
106
|
+
const gpu = this.gpus.get(r.instance_id);
|
|
107
|
+
if (gpu) gpu.status = r.status;
|
|
108
|
+
if (gpu) gpu.vastId = r.vast_id;
|
|
109
|
+
if (gpu) gpu.gpuName = r.gpu_name;
|
|
110
|
+
}
|
|
111
|
+
} catch (err) {
|
|
112
|
+
console.log("[GPU Fleet] DB load failed:", err.message);
|
|
113
|
+
}
|
|
114
|
+
return this;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Start health polling.
|
|
119
|
+
*/
|
|
120
|
+
startPolling() {
|
|
121
|
+
if (this._pollTimer) return;
|
|
122
|
+
this._pollTimer = setInterval(() => this._healthCheck(), POLL_INTERVAL_MS);
|
|
123
|
+
this._healthCheck(); // immediate first check
|
|
124
|
+
return this;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
stopPolling() {
|
|
128
|
+
if (this._pollTimer) { clearInterval(this._pollTimer); this._pollTimer = null; }
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Get a healthy GPU for inference (round-robin).
|
|
133
|
+
* Returns { host, port, key, instanceId } or null if none available.
|
|
134
|
+
*/
|
|
135
|
+
getAvailable() {
|
|
136
|
+
const healthy = Array.from(this.gpus.values()).filter(g => g.status === "healthy");
|
|
137
|
+
if (healthy.length === 0) return null;
|
|
138
|
+
this._roundRobinIndex = (this._roundRobinIndex + 1) % healthy.length;
|
|
139
|
+
const gpu = healthy[this._roundRobinIndex];
|
|
140
|
+
gpu.lastUsed = Date.now();
|
|
141
|
+
return { host: gpu.host, port: gpu.port, key: gpu.key, instanceId: gpu.instanceId, model: gpu.model };
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Start a stopped GPU instance via Vast API.
|
|
146
|
+
*/
|
|
147
|
+
async startGpu(instanceId) {
|
|
148
|
+
const gpu = this.gpus.get(String(instanceId));
|
|
149
|
+
if (!gpu) throw new Error(`GPU ${instanceId} not registered`);
|
|
150
|
+
if (gpu.status === "healthy" || gpu.status === "starting") return gpu;
|
|
151
|
+
|
|
152
|
+
gpu.status = "starting";
|
|
153
|
+
gpu.coldStartMs = null;
|
|
154
|
+
const startTime = Date.now();
|
|
155
|
+
|
|
156
|
+
const vastId = gpu.vastId || instanceId;
|
|
157
|
+
try {
|
|
158
|
+
await this._vastApi("PUT", `/instances/${vastId}/`, { state: "running" });
|
|
159
|
+
|
|
160
|
+
// Poll until healthy (max 60s)
|
|
161
|
+
for (let i = 0; i < 120; i++) {
|
|
162
|
+
await new Promise(r => setTimeout(r, 500));
|
|
163
|
+
try {
|
|
164
|
+
const res = await this._httpGet(gpu.host, gpu.port, "/v1/models", gpu.key);
|
|
165
|
+
if (res && res.includes("gemma")) {
|
|
166
|
+
gpu.status = "healthy";
|
|
167
|
+
gpu.coldStartMs = Date.now() - startTime;
|
|
168
|
+
gpu.lastHealth = Date.now();
|
|
169
|
+
console.log(`[GPU Fleet] ${instanceId} started in ${gpu.coldStartMs}ms`);
|
|
170
|
+
return gpu;
|
|
171
|
+
}
|
|
172
|
+
} catch {}
|
|
173
|
+
}
|
|
174
|
+
gpu.status = "unhealthy";
|
|
175
|
+
throw new Error(`GPU ${instanceId} failed to start within 60s`);
|
|
176
|
+
} catch (err) {
|
|
177
|
+
gpu.status = "unhealthy";
|
|
178
|
+
throw err;
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Stop a GPU instance via Vast API.
|
|
184
|
+
*/
|
|
185
|
+
async stopGpu(instanceId) {
|
|
186
|
+
const gpu = this.gpus.get(String(instanceId));
|
|
187
|
+
if (!gpu) throw new Error(`GPU ${instanceId} not registered`);
|
|
188
|
+
|
|
189
|
+
const vastId = gpu.vastId || instanceId;
|
|
190
|
+
try {
|
|
191
|
+
await this._vastApi("PUT", `/instances/${vastId}/`, { state: "stopped" });
|
|
192
|
+
gpu.status = "stopped";
|
|
193
|
+
console.log(`[GPU Fleet] ${instanceId} stopped`);
|
|
194
|
+
} catch (err) {
|
|
195
|
+
console.log(`[GPU Fleet] Stop failed for ${instanceId}:`, err.message);
|
|
196
|
+
}
|
|
197
|
+
return gpu;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Auto-scale: start burst GPUs when needed, stop idle ones.
|
|
202
|
+
*/
|
|
203
|
+
async autoScale(queueLength) {
|
|
204
|
+
// Scale up: start a stopped GPU if queue is long
|
|
205
|
+
if (queueLength >= SCALE_UP_QUEUE) {
|
|
206
|
+
const stopped = Array.from(this.gpus.values()).find(g => g.status === "stopped" && g.autoStop);
|
|
207
|
+
if (stopped) {
|
|
208
|
+
console.log(`[GPU Fleet] Queue at ${queueLength}, starting burst GPU ${stopped.instanceId}`);
|
|
209
|
+
try { await this.startGpu(stopped.instanceId); } catch (e) { console.log("[GPU Fleet] Scale-up failed:", e.message); }
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// Scale down: stop idle burst GPUs
|
|
214
|
+
const now = Date.now();
|
|
215
|
+
for (const gpu of this.gpus.values()) {
|
|
216
|
+
if (gpu.autoStop && gpu.status === "healthy" && gpu.lastUsed > 0 && (now - gpu.lastUsed) > IDLE_STOP_MS) {
|
|
217
|
+
console.log(`[GPU Fleet] ${gpu.instanceId} idle for ${Math.round((now - gpu.lastUsed) / 1000)}s, stopping`);
|
|
218
|
+
try { await this.stopGpu(gpu.instanceId); } catch {}
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* Get fleet status for dashboard/API.
|
|
225
|
+
*/
|
|
226
|
+
getStatus() {
|
|
227
|
+
const gpus = Array.from(this.gpus.values()).map(g => ({
|
|
228
|
+
instanceId: g.instanceId,
|
|
229
|
+
vastId: g.vastId,
|
|
230
|
+
gpuName: g.gpuName,
|
|
231
|
+
host: g.host,
|
|
232
|
+
port: g.port,
|
|
233
|
+
model: g.model,
|
|
234
|
+
role: g.role,
|
|
235
|
+
status: g.status,
|
|
236
|
+
autoStop: g.autoStop,
|
|
237
|
+
lastUsed: g.lastUsed ? new Date(g.lastUsed).toISOString() : null,
|
|
238
|
+
lastHealth: g.lastHealth ? new Date(g.lastHealth).toISOString() : null,
|
|
239
|
+
coldStartMs: g.coldStartMs,
|
|
240
|
+
}));
|
|
241
|
+
return {
|
|
242
|
+
total: gpus.length,
|
|
243
|
+
healthy: gpus.filter(g => g.status === "healthy").length,
|
|
244
|
+
stopped: gpus.filter(g => g.status === "stopped").length,
|
|
245
|
+
starting: gpus.filter(g => g.status === "starting").length,
|
|
246
|
+
gpus,
|
|
247
|
+
};
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
// ── Private ──
|
|
251
|
+
|
|
252
|
+
async _healthCheck() {
|
|
253
|
+
for (const gpu of this.gpus.values()) {
|
|
254
|
+
if (gpu.status === "stopped" || gpu.status === "starting") continue;
|
|
255
|
+
try {
|
|
256
|
+
const res = await this._httpGet(gpu.host, gpu.port, "/v1/models", gpu.key);
|
|
257
|
+
if (res && (res.includes("gemma") || res.includes("wolverine"))) {
|
|
258
|
+
gpu.status = "healthy";
|
|
259
|
+
gpu.lastHealth = Date.now();
|
|
260
|
+
} else {
|
|
261
|
+
gpu.status = "unhealthy";
|
|
262
|
+
}
|
|
263
|
+
} catch {
|
|
264
|
+
gpu.status = "unhealthy";
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
_vastApi(method, path, body) {
|
|
270
|
+
return new Promise((resolve, reject) => {
|
|
271
|
+
const bodyStr = body ? JSON.stringify(body) : null;
|
|
272
|
+
const req = https.request({
|
|
273
|
+
hostname: "cloud.vast.ai",
|
|
274
|
+
path: `/api/v0${path}`,
|
|
275
|
+
method,
|
|
276
|
+
timeout: 15000,
|
|
277
|
+
headers: {
|
|
278
|
+
"Authorization": `Bearer ${VAST_KEY}`,
|
|
279
|
+
"Content-Type": "application/json",
|
|
280
|
+
...(bodyStr ? { "Content-Length": Buffer.byteLength(bodyStr) } : {}),
|
|
281
|
+
},
|
|
282
|
+
}, (res) => {
|
|
283
|
+
let data = "";
|
|
284
|
+
res.on("data", c => { data += c; });
|
|
285
|
+
res.on("end", () => {
|
|
286
|
+
try { resolve(JSON.parse(data)); } catch { resolve({ raw: data }); }
|
|
287
|
+
});
|
|
288
|
+
});
|
|
289
|
+
req.on("error", reject);
|
|
290
|
+
req.on("timeout", () => { req.destroy(); reject(new Error("Vast API timeout")); });
|
|
291
|
+
if (bodyStr) req.write(bodyStr);
|
|
292
|
+
req.end();
|
|
293
|
+
});
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
_httpGet(host, port, path, key) {
|
|
297
|
+
return new Promise((resolve, reject) => {
|
|
298
|
+
const req = http.request({
|
|
299
|
+
hostname: host, port, path, method: "GET", timeout: 5000,
|
|
300
|
+
headers: key ? { "Authorization": `Bearer ${key}` } : {},
|
|
301
|
+
}, (res) => {
|
|
302
|
+
let data = "";
|
|
303
|
+
res.on("data", c => { data += c; });
|
|
304
|
+
res.on("end", () => resolve(data));
|
|
305
|
+
});
|
|
306
|
+
req.on("error", reject);
|
|
307
|
+
req.on("timeout", () => { req.destroy(); reject(new Error("timeout")); });
|
|
308
|
+
req.end();
|
|
309
|
+
});
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
module.exports = { GpuFleet };
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GPU Fleet Management API — admin routes for controlling inference GPUs.
|
|
3
|
+
*
|
|
4
|
+
* Endpoints:
|
|
5
|
+
* GET /status — fleet overview (all GPUs, health, queue)
|
|
6
|
+
* POST /start/:id — start a stopped GPU
|
|
7
|
+
* POST /stop/:id — stop a running GPU
|
|
8
|
+
* POST /register — add a new GPU to the fleet
|
|
9
|
+
* POST /remove/:id — remove a GPU from the fleet
|
|
10
|
+
* POST /scale — trigger auto-scale check
|
|
11
|
+
* GET /benchmark/:id — run inference benchmark on a GPU
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
async function routes(fastify) {
|
|
15
|
+
const { pool } = require("../lib/db");
|
|
16
|
+
|
|
17
|
+
// Fleet instance is attached to fastify by index.js
|
|
18
|
+
function getFleet() {
|
|
19
|
+
return fastify.gpuFleet;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// Admin auth
|
|
23
|
+
async function requireAdmin(request, reply) {
|
|
24
|
+
const settings = require("../config/settings.json");
|
|
25
|
+
const token = request.headers.authorization?.replace("Bearer ", "") || request.headers["x-api-key"];
|
|
26
|
+
if (token !== settings.platform?.apiKey) {
|
|
27
|
+
return reply.code(401).send({ error: "Admin access required" });
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// GET /status — fleet overview
|
|
32
|
+
fastify.get("/status", { preHandler: requireAdmin }, async (request, reply) => {
|
|
33
|
+
const fleet = getFleet();
|
|
34
|
+
return fleet.getStatus();
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
// POST /start/:id — start a GPU
|
|
38
|
+
fastify.post("/start/:id", { preHandler: requireAdmin }, async (request, reply) => {
|
|
39
|
+
const fleet = getFleet();
|
|
40
|
+
const { id } = request.params;
|
|
41
|
+
try {
|
|
42
|
+
const gpu = await fleet.startGpu(id);
|
|
43
|
+
// Update DB
|
|
44
|
+
await pool.query("UPDATE gpu_fleet SET status = 'healthy' WHERE instance_id = $1", [id]).catch(() => {});
|
|
45
|
+
return { status: "started", instanceId: id, coldStartMs: gpu.coldStartMs };
|
|
46
|
+
} catch (err) {
|
|
47
|
+
return reply.code(500).send({ error: err.message });
|
|
48
|
+
}
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
// POST /stop/:id — stop a GPU
|
|
52
|
+
fastify.post("/stop/:id", { preHandler: requireAdmin }, async (request, reply) => {
|
|
53
|
+
const fleet = getFleet();
|
|
54
|
+
const { id } = request.params;
|
|
55
|
+
try {
|
|
56
|
+
await fleet.stopGpu(id);
|
|
57
|
+
await pool.query("UPDATE gpu_fleet SET status = 'stopped' WHERE instance_id = $1", [id]).catch(() => {});
|
|
58
|
+
return { status: "stopped", instanceId: id };
|
|
59
|
+
} catch (err) {
|
|
60
|
+
return reply.code(500).send({ error: err.message });
|
|
61
|
+
}
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
// POST /register — add a GPU to the fleet
|
|
65
|
+
fastify.post("/register", { preHandler: requireAdmin }, async (request, reply) => {
|
|
66
|
+
const fleet = getFleet();
|
|
67
|
+
const { instanceId, vastId, host, port, key, model, role, gpuName, autoStop } = request.body || {};
|
|
68
|
+
if (!instanceId || !host || !key) {
|
|
69
|
+
return reply.code(400).send({ error: "instanceId, host, and key required" });
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
fleet.register(instanceId, { host, port: port || 8080, key, model, role, autoStop: autoStop !== false });
|
|
73
|
+
const gpu = fleet.gpus.get(instanceId);
|
|
74
|
+
if (vastId) gpu.vastId = vastId;
|
|
75
|
+
if (gpuName) gpu.gpuName = gpuName;
|
|
76
|
+
|
|
77
|
+
// Save to DB
|
|
78
|
+
await pool.query(
|
|
79
|
+
`INSERT INTO gpu_fleet (instance_id, vast_id, host, port, internal_key, model, role, auto_stop, gpu_name)
|
|
80
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
|
|
81
|
+
ON CONFLICT (instance_id) DO UPDATE SET
|
|
82
|
+
host = $3, port = $4, internal_key = $5, model = $6, role = $7, auto_stop = $8, gpu_name = $9, vast_id = $2`,
|
|
83
|
+
[instanceId, vastId || null, host, port || 8080, key, model || "wolverine-test-1", role || "general", autoStop !== false, gpuName || null]
|
|
84
|
+
);
|
|
85
|
+
|
|
86
|
+
return { registered: instanceId, fleet: fleet.getStatus() };
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
// POST /remove/:id — remove a GPU from the fleet
|
|
90
|
+
fastify.post("/remove/:id", { preHandler: requireAdmin }, async (request, reply) => {
|
|
91
|
+
const fleet = getFleet();
|
|
92
|
+
const { id } = request.params;
|
|
93
|
+
fleet.gpus.delete(id);
|
|
94
|
+
await pool.query("DELETE FROM gpu_fleet WHERE instance_id = $1", [id]).catch(() => {});
|
|
95
|
+
return { removed: id };
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
// POST /scale — trigger auto-scale
|
|
99
|
+
fastify.post("/scale", { preHandler: requireAdmin }, async (request, reply) => {
|
|
100
|
+
const fleet = getFleet();
|
|
101
|
+
const queueLength = request.body?.queueLength || 0;
|
|
102
|
+
await fleet.autoScale(queueLength);
|
|
103
|
+
return fleet.getStatus();
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
// GET /benchmark/:id — quick benchmark
|
|
107
|
+
fastify.get("/benchmark/:id", { preHandler: requireAdmin }, async (request, reply) => {
|
|
108
|
+
const fleet = getFleet();
|
|
109
|
+
const gpu = fleet.gpus.get(request.params.id);
|
|
110
|
+
if (!gpu || gpu.status !== "healthy") {
|
|
111
|
+
return reply.code(400).send({ error: "GPU not available" });
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const http = require("http");
|
|
115
|
+
const results = [];
|
|
116
|
+
|
|
117
|
+
for (const prompt of ["2+2?", "Write isPrime in JS.", "Explain TCP in 1 sentence."]) {
|
|
118
|
+
const start = Date.now();
|
|
119
|
+
try {
|
|
120
|
+
const body = JSON.stringify({
|
|
121
|
+
model: gpu.model,
|
|
122
|
+
messages: [{ role: "user", content: prompt }],
|
|
123
|
+
max_tokens: 50, temperature: 0,
|
|
124
|
+
});
|
|
125
|
+
const res = await new Promise((resolve, reject) => {
|
|
126
|
+
const req = http.request({
|
|
127
|
+
hostname: gpu.host, port: gpu.port, path: "/v1/chat/completions",
|
|
128
|
+
method: "POST", timeout: 30000,
|
|
129
|
+
headers: { "Content-Type": "application/json", "Authorization": `Bearer ${gpu.key}`, "Content-Length": Buffer.byteLength(body) },
|
|
130
|
+
}, (res) => {
|
|
131
|
+
let data = "";
|
|
132
|
+
res.on("data", c => { data += c; });
|
|
133
|
+
res.on("end", () => { try { resolve(JSON.parse(data)); } catch { resolve(null); } });
|
|
134
|
+
});
|
|
135
|
+
req.on("error", reject);
|
|
136
|
+
req.write(body);
|
|
137
|
+
req.end();
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
const elapsed = Date.now() - start;
|
|
141
|
+
const usage = res?.usage || {};
|
|
142
|
+
const tokOut = usage.completion_tokens || 0;
|
|
143
|
+
results.push({
|
|
144
|
+
prompt: prompt.slice(0, 30),
|
|
145
|
+
latencyMs: elapsed,
|
|
146
|
+
tokensOut: tokOut,
|
|
147
|
+
tokPerSec: tokOut > 0 ? Math.round(tokOut / (elapsed / 1000)) : 0,
|
|
148
|
+
response: res?.choices?.[0]?.message?.content?.slice(0, 60),
|
|
149
|
+
});
|
|
150
|
+
} catch (err) {
|
|
151
|
+
results.push({ prompt: prompt.slice(0, 30), error: err.message });
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const avgTokPerSec = results.filter(r => r.tokPerSec).reduce((s, r) => s + r.tokPerSec, 0) / Math.max(results.filter(r => r.tokPerSec).length, 1);
|
|
156
|
+
|
|
157
|
+
return {
|
|
158
|
+
instanceId: request.params.id,
|
|
159
|
+
gpu: gpu.gpuName,
|
|
160
|
+
model: gpu.model,
|
|
161
|
+
results,
|
|
162
|
+
avgTokPerSec: Math.round(avgTokPerSec),
|
|
163
|
+
};
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
module.exports = routes;
|
|
@@ -14,7 +14,8 @@ const crypto = require("crypto");
|
|
|
14
14
|
* Queue: when GPU is at capacity, requests queue with timeout.
|
|
15
15
|
*/
|
|
16
16
|
|
|
17
|
-
const INFERENCE_URL = process.env.WOLVERINE_INFERENCE_URL || "
|
|
17
|
+
const INFERENCE_URL = process.env.WOLVERINE_INFERENCE_URL || "http://ssh8.vast.ai:24233";
|
|
18
|
+
const GPU_KEY = process.env.WOLVERINE_GPU_KEY || "";
|
|
18
19
|
|
|
19
20
|
// Pricing in CREDITS per million tokens ($1 = 100 credits)
|
|
20
21
|
const MODEL_PRICING = {
|
|
@@ -308,7 +309,11 @@ function proxyToInference(path, body, method = "POST") {
|
|
|
308
309
|
path: url.pathname,
|
|
309
310
|
method,
|
|
310
311
|
timeout: 120000,
|
|
311
|
-
headers: {
|
|
312
|
+
headers: {
|
|
313
|
+
"Content-Type": "application/json",
|
|
314
|
+
...(GPU_KEY ? { "Authorization": `Bearer ${GPU_KEY}` } : {}),
|
|
315
|
+
...(bodyStr ? { "Content-Length": Buffer.byteLength(bodyStr) } : {}),
|
|
316
|
+
},
|
|
312
317
|
}, (res) => {
|
|
313
318
|
let data = "";
|
|
314
319
|
res.on("data", (c) => { data += c; });
|
|
@@ -250,7 +250,7 @@ const TOOL_DEFINITIONS = [
|
|
|
250
250
|
type: "function",
|
|
251
251
|
function: {
|
|
252
252
|
name: "run_db_fix",
|
|
253
|
-
description: "Run a write query on a SQLite database to fix data issues:
|
|
253
|
+
description: "Run a write query on a SQLite database to fix data issues. IMPORTANT: Always use inspect_db FIRST to see the current state before writing. This tool auto-snapshots affected rows before and after the write. Creates a backup. Returns before/after state so you can verify the fix is correct.",
|
|
254
254
|
parameters: {
|
|
255
255
|
type: "object",
|
|
256
256
|
properties: {
|
|
@@ -953,15 +953,60 @@ class AgentEngine {
|
|
|
953
953
|
if (upper.startsWith("DROP DATABASE") || upper.includes("DROP TABLE sqlite_")) {
|
|
954
954
|
return { content: "BLOCKED: Cannot drop system tables" };
|
|
955
955
|
}
|
|
956
|
+
|
|
956
957
|
// Backup the DB file first
|
|
957
958
|
const backupPath = dbPath + ".wolverine-backup";
|
|
958
959
|
fs.copyFileSync(dbPath, backupPath);
|
|
960
|
+
|
|
959
961
|
const db = new Database(dbPath);
|
|
962
|
+
|
|
963
|
+
// SAFETY: Snapshot affected rows BEFORE the write
|
|
964
|
+
// Extract table name and WHERE clause to SELECT the rows that will change
|
|
965
|
+
let beforeSnapshot = "";
|
|
966
|
+
try {
|
|
967
|
+
const tableMatch = upper.match(/(?:UPDATE|DELETE\s+FROM|INSERT\s+INTO)\s+(\w+)/i);
|
|
968
|
+
const whereMatch = args.sql.match(/WHERE\s+(.+?)(?:;|$)/i);
|
|
969
|
+
if (tableMatch) {
|
|
970
|
+
const table = tableMatch[1];
|
|
971
|
+
const whereClause = whereMatch ? `WHERE ${whereMatch[1]}` : "";
|
|
972
|
+
const selectSql = `SELECT * FROM ${table} ${whereClause} LIMIT 20`;
|
|
973
|
+
try {
|
|
974
|
+
const before = db.prepare(selectSql).all();
|
|
975
|
+
if (before.length > 0) {
|
|
976
|
+
beforeSnapshot = `\n\nBEFORE STATE (${before.length} rows affected):\n${JSON.stringify(before, null, 2).slice(0, 2000)}`;
|
|
977
|
+
console.log(chalk.gray(` 🗃️ Snapshot: ${before.length} rows from ${table} ${whereClause ? whereClause.slice(0, 40) : "(all)"}`));
|
|
978
|
+
}
|
|
979
|
+
} catch { /* SELECT failed, might be INSERT into new table — that's fine */ }
|
|
980
|
+
}
|
|
981
|
+
} catch { /* snapshot failed, proceed with caution */ }
|
|
982
|
+
|
|
983
|
+
// Execute the fix
|
|
960
984
|
const result = db.prepare(args.sql).run();
|
|
985
|
+
|
|
986
|
+
// SAFETY: Snapshot AFTER to show what changed
|
|
987
|
+
let afterSnapshot = "";
|
|
988
|
+
try {
|
|
989
|
+
const tableMatch = upper.match(/(?:UPDATE|DELETE\s+FROM|INSERT\s+INTO)\s+(\w+)/i);
|
|
990
|
+
const whereMatch = args.sql.match(/WHERE\s+(.+?)(?:;|$)/i);
|
|
991
|
+
if (tableMatch) {
|
|
992
|
+
const table = tableMatch[1];
|
|
993
|
+
const whereClause = whereMatch ? `WHERE ${whereMatch[1]}` : "";
|
|
994
|
+
const selectSql = `SELECT * FROM ${table} ${whereClause} LIMIT 20`;
|
|
995
|
+
try {
|
|
996
|
+
const after = db.prepare(selectSql).all();
|
|
997
|
+
afterSnapshot = `\n\nAFTER STATE (${after.length} rows):\n${JSON.stringify(after, null, 2).slice(0, 2000)}`;
|
|
998
|
+
} catch {}
|
|
999
|
+
}
|
|
1000
|
+
} catch {}
|
|
1001
|
+
|
|
961
1002
|
db.close();
|
|
962
1003
|
this.filesModified.push(args.db_path);
|
|
1004
|
+
|
|
1005
|
+
const summary = `SQL executed. Changes: ${result.changes}. Backup at: ${backupPath}${beforeSnapshot}${afterSnapshot}`;
|
|
963
1006
|
console.log(chalk.green(` 🗃️ DB fix applied: ${args.sql.slice(0, 60)} (changes: ${result.changes})`));
|
|
964
|
-
|
|
1007
|
+
if (beforeSnapshot) console.log(chalk.gray(` 🗃️ Before/after snapshot captured for audit`));
|
|
1008
|
+
|
|
1009
|
+
return { content: summary };
|
|
965
1010
|
} catch (e) { return { content: `DB error: ${e.message}` }; }
|
|
966
1011
|
}
|
|
967
1012
|
|
|
@@ -1094,14 +1139,23 @@ FAST FIXES (act immediately, don't investigate):
|
|
|
1094
1139
|
- Missing env var → check_env → report it → done
|
|
1095
1140
|
|
|
1096
1141
|
INVESTIGATION (only when cause is unclear):
|
|
1097
|
-
- Database error → inspect_db
|
|
1142
|
+
- Database error → inspect_db FIRST to see current state → understand what went wrong → run_db_fix with targeted fix
|
|
1098
1143
|
- Unknown errors → grep_code, list_dir to find root cause
|
|
1099
1144
|
|
|
1145
|
+
DATABASE SAFETY:
|
|
1146
|
+
- ALWAYS inspect_db before run_db_fix — never write blind
|
|
1147
|
+
- run_db_fix auto-snapshots affected rows before/after — check the response to verify your fix
|
|
1148
|
+
- For bad data: understand WHY the data is wrong before changing it
|
|
1149
|
+
- For NaN/null errors: check if the data was corrupted or if the code should handle it
|
|
1150
|
+
- Prefer fixing code to handle edge cases over modifying production data
|
|
1151
|
+
- A database backup is created automatically before every write
|
|
1152
|
+
|
|
1100
1153
|
RULES:
|
|
1101
1154
|
1. Fix on turn 1-2 when possible. Investigation is a last resort.
|
|
1102
1155
|
2. For ENOENT config files: read the code that requires the file, then create it with the expected structure.
|
|
1103
1156
|
3. bash_exec for operational fixes, edit_file for code, write_file for missing files, run_db_fix for data
|
|
1104
|
-
4.
|
|
1157
|
+
4. For database errors: inspect first, fix data only when code can't reasonably handle the edge case
|
|
1158
|
+
5. Always call done with summary when finished — never end without calling done.
|
|
1105
1159
|
${primaryFile ? `\nFile: ${primaryFile}` : ""}
|
|
1106
1160
|
Project: ${cwd}`;
|
|
1107
1161
|
}
|
package/src/brain/brain.js
CHANGED
|
@@ -218,7 +218,7 @@ const SEED_DOCS = [
|
|
|
218
218
|
metadata: { topic: "error-monitor" },
|
|
219
219
|
},
|
|
220
220
|
{
|
|
221
|
-
text: "Agent tool details: read_file supports offset/limit for large files. edit_file does surgical find-and-replace (preferred for small fixes). glob_files discovers files by pattern (**/*.js). grep_code does regex search with context lines. list_dir shows directory contents with file sizes. move_file relocates/renames files. bash_exec runs shell commands (30s default timeout, 60s hard cap, dangerous commands blocked: rm -rf /, git push --force, npm publish). inspect_db reads SQLite: action=tables (list), action=schema (CREATE statements), action=query (SELECT/PRAGMA only). run_db_fix writes SQLite: UPDATE/DELETE
|
|
221
|
+
text: "Agent tool details: read_file supports offset/limit for large files. edit_file does surgical find-and-replace (preferred for small fixes). glob_files discovers files by pattern (**/*.js). grep_code does regex search with context lines. list_dir shows directory contents with file sizes. move_file relocates/renames files. bash_exec runs shell commands (30s default timeout, 60s hard cap, dangerous commands blocked: rm -rf /, git push --force, npm publish). inspect_db reads SQLite: action=tables (list), action=schema (CREATE statements), action=query (SELECT/PRAGMA only). run_db_fix writes SQLite with SAFETY: auto-snapshots affected rows BEFORE write (SELECT WHERE matching the UPDATE/DELETE), executes the fix, snapshots AFTER, returns before/after comparison so agent can verify. Always backs up the DB file. Agent MUST inspect_db before run_db_fix — never write blind. For NaN/null data errors: prefer fixing code to handle edge cases over modifying production data. check_port finds what process is using a port (netstat/lsof). check_env lists environment variables with values redacted. audit_deps runs full npm health check. check_migration returns known upgrade paths. web_fetch retrieves URL content.",
|
|
222
222
|
metadata: { topic: "agent-tools-detail" },
|
|
223
223
|
},
|
|
224
224
|
{
|
package/src/core/ai-client.js
CHANGED
|
@@ -42,12 +42,13 @@ function getClient(provider) {
|
|
|
42
42
|
|
|
43
43
|
function _getWolverineClient() {
|
|
44
44
|
if (!_wolverineClient) {
|
|
45
|
-
// Wolverine inference: direct to GPU
|
|
46
|
-
//
|
|
45
|
+
// Wolverine inference: direct to GPU or via proxy
|
|
46
|
+
// WOLVERINE_GPU_KEY = internal key for direct GPU access (llama.cpp --api-key)
|
|
47
|
+
// WOLVERINE_API_KEY = user key for billed proxy access (api.wolverinenode.xyz)
|
|
47
48
|
const baseURL = process.env.WOLVERINE_INFERENCE_URL
|
|
48
49
|
? process.env.WOLVERINE_INFERENCE_URL + "/v1"
|
|
49
50
|
: "https://api.wolverinenode.xyz/v1";
|
|
50
|
-
const apiKey = process.env.WOLVERINE_API_KEY || "none";
|
|
51
|
+
const apiKey = process.env.WOLVERINE_GPU_KEY || process.env.WOLVERINE_API_KEY || "none";
|
|
51
52
|
_wolverineClient = new OpenAI({ apiKey, baseURL });
|
|
52
53
|
}
|
|
53
54
|
return _wolverineClient;
|
package/src/core/error-parser.js
CHANGED
|
@@ -97,11 +97,11 @@ function classifyError(errorMessage, fullStderr) {
|
|
|
97
97
|
const full = (fullStderr || "").toLowerCase();
|
|
98
98
|
|
|
99
99
|
// Missing npm package: Cannot find module 'cors' (not a relative path)
|
|
100
|
-
if (/cannot find module '(?![./\\])/.test(msg) || /module_not_found/.test(full)) {
|
|
100
|
+
if (/cannot find module ['"](?![./\\])/.test(msg) || /module_not_found/.test(full)) {
|
|
101
101
|
return "missing_module";
|
|
102
102
|
}
|
|
103
103
|
// Missing local file: Cannot find module './routes/api'
|
|
104
|
-
if (/cannot find module '[./\\]/.test(msg) || /enoent/.test(msg)) {
|
|
104
|
+
if (/cannot find module ['"][./\\]/.test(msg) || /enoent/.test(msg)) {
|
|
105
105
|
return "missing_file";
|
|
106
106
|
}
|
|
107
107
|
// Permission denied
|
package/src/core/runner.js
CHANGED
|
@@ -590,11 +590,37 @@ class WolverineRunner {
|
|
|
590
590
|
this._healStatus = { active: true, route: routePath, error: errorDetails?.message?.slice(0, 200), phase: "diagnosing", startedAt: Date.now() };
|
|
591
591
|
this.logger.info("heal.error_monitor", `Healing caught 500 on ${routePath}`, { route: routePath });
|
|
592
592
|
|
|
593
|
-
// Build
|
|
593
|
+
// Build synthetic stderr that matches the error parser's expected format
|
|
594
|
+
// If IPC didn't include a file, try to resolve from the route path or stack
|
|
595
|
+
let file = errorDetails.file;
|
|
596
|
+
let line = errorDetails.line || 1;
|
|
597
|
+
if (!file && errorDetails.stack) {
|
|
598
|
+
// Try to find user-land file in stack (not node_modules, not node:)
|
|
599
|
+
const frames = (errorDetails.stack || "").split("\n");
|
|
600
|
+
for (const frame of frames) {
|
|
601
|
+
const m = frame.match(/\(([^)]+):(\d+):(\d+)\)/) || frame.match(/at\s+([^\s(]+):(\d+):(\d+)/);
|
|
602
|
+
if (m && !m[1].includes("node_modules") && !m[1].includes("node:")) {
|
|
603
|
+
file = m[1]; line = parseInt(m[2], 10); break;
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
if (!file && routePath) {
|
|
608
|
+
// Last resort: map route path to likely file (e.g., /breakable → server/routes/breakable.js)
|
|
609
|
+
const routeName = routePath.split("/").filter(Boolean).pop();
|
|
610
|
+
if (routeName) {
|
|
611
|
+
const path = require("path");
|
|
612
|
+
const guess = path.join(this.cwd, "server", "routes", routeName + ".js");
|
|
613
|
+
if (require("fs").existsSync(guess)) { file = guess; line = 1; }
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
const msg = errorDetails.message || "Unknown error";
|
|
618
|
+
const hasErrorPrefix = /^\w*Error:/.test(msg);
|
|
594
619
|
const stderr = [
|
|
595
|
-
|
|
620
|
+
file ? `${file}:${line}` : "",
|
|
621
|
+
hasErrorPrefix ? msg : `Error: ${msg}`,
|
|
596
622
|
errorDetails.stack || "",
|
|
597
|
-
|
|
623
|
+
file ? ` at ${file}:${line}:1` : "",
|
|
598
624
|
].filter(Boolean).join("\n");
|
|
599
625
|
|
|
600
626
|
try {
|