wolverine-ai 3.6.1 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/wolverine.js CHANGED
@@ -127,6 +127,10 @@ if (args.includes("--backups")) {
127
127
 
128
128
  const scriptPath = args.find(a => !a.startsWith("--")) || "server/index.js";
129
129
 
130
+ // Initialize server/ from template if it doesn't exist (first run)
131
+ const { initServer } = require("../src/core/init-server");
132
+ initServer(process.cwd(), scriptPath);
133
+
130
134
  // System detection (for analytics + dashboard, NOT for forking)
131
135
  // Wolverine runs as a single process manager. If users want clustering,
132
136
  // they handle it inside their server (e.g. @fastify/cluster, pm2 cluster mode).
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wolverine-ai",
3
- "version": "3.6.1",
3
+ "version": "3.7.0",
4
4
  "description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -47,7 +47,6 @@
47
47
  "files": [
48
48
  "bin/",
49
49
  "src/",
50
- "server/",
51
50
  "examples/",
52
51
  ".env.example"
53
52
  ],
@@ -242,7 +242,7 @@ const SEED_DOCS = [
242
242
  metadata: { topic: "backup-skill" },
243
243
  },
244
244
  {
245
- text: "CRITICAL: Never run raw 'npm install wolverine-ai' or 'git pull' to update — these OVERWRITE server/, .wolverine/ (brain, backups, events), and .env.local. Always use the safe update skill: wolverine --update (CLI), safeUpdate(cwd) (programmatic), or let auto-update handle it. ALL backups (heal snapshots + update snapshots) stored in ~/.wolverine-safe-backups/ (OUTSIDE project, survives git clean, rm -rf, project deletion). Structure: ~/.wolverine-safe-backups/snapshots/ (heal backups), ~/.wolverine-safe-backups/updates/ (pre-update snapshots), ~/.wolverine-safe-backups/manifest.json (backup registry). Old .wolverine/backups/ auto-migrated on first run. Restore with: wolverine --restore <name>. List: wolverine --backups.",
245
+ text: "CRITICAL: Never run raw 'npm install wolverine-ai' or 'git pull' to update — these OVERWRITE server/, .wolverine/ (brain, backups, events), and .env.local. Always use the safe update skill: wolverine --update (CLI), safeUpdate(cwd) (programmatic), or let auto-update handle it. Startup backup: wolverine creates a safety snapshot of server/ before first spawn on every start. If the server crashes immediately after a bad update and healing fails/is blocked, wolverine auto-rollbacks to the startup snapshot after max retries prevents permanent breakage from corrupted server/ files. ALL backups (heal snapshots + update snapshots + startup snapshots) stored in ~/.wolverine-safe-backups/ (OUTSIDE project, survives git clean, rm -rf, project deletion). Restore with: wolverine --restore <name>. List: wolverine --backups.",
246
246
  metadata: { topic: "safe-update-warning" },
247
247
  },
248
248
  {
@@ -304,6 +304,13 @@ class Brain {
304
304
  console.log(chalk.gray(" 🧠 Framework updated — merging new seed docs..."));
305
305
  await this._mergeSeedDocs();
306
306
  try { fs.unlinkSync(seedRefreshPath); } catch {}
307
+ } else {
308
+ // Auto-detect new seeds: if SEED_DOCS count > docs namespace count, merge
309
+ const docsCount = (this.store.getNamespace("docs") || []).length;
310
+ if (SEED_DOCS.length > docsCount) {
311
+ console.log(chalk.gray(` 🧠 New seed docs detected (${SEED_DOCS.length} vs ${docsCount}) — merging...`));
312
+ await this._mergeSeedDocs();
313
+ }
307
314
  }
308
315
 
309
316
  // 2. Scan project for live function map
@@ -0,0 +1,58 @@
1
+ const fs = require("fs");
2
+ const path = require("path");
3
+ const chalk = require("chalk");
4
+
5
+ /**
6
+ * Initialize the server/ directory from the built-in template.
7
+ *
8
+ * Called on first run if server/ doesn't exist. NEVER overwrites existing files.
9
+ * This is why wolverine ships without a server/ directory in the npm package —
10
+ * so `npm install` and `git pull` can never destroy user code.
11
+ *
12
+ * The template lives in src/templates/server/ and contains a minimal Fastify
13
+ * server with health, api, and time routes + default settings.json.
14
+ */
15
+ function initServer(cwd, scriptPath) {
16
+ const serverDir = path.join(cwd, "server");
17
+ const scriptFile = path.resolve(cwd, scriptPath);
18
+
19
+ // If the script file already exists, nothing to do
20
+ if (fs.existsSync(scriptFile)) return false;
21
+
22
+ // If server/ exists but the specific script doesn't, don't create — user has their own structure
23
+ if (fs.existsSync(serverDir) && fs.readdirSync(serverDir).length > 0) {
24
+ console.log(chalk.yellow(` ⚠️ ${scriptPath} not found but server/ exists — skipping template init`));
25
+ return false;
26
+ }
27
+
28
+ // Create server/ from template
29
+ const templateDir = path.join(__dirname, "..", "templates", "server");
30
+ if (!fs.existsSync(templateDir)) {
31
+ console.log(chalk.yellow(" ⚠️ No server template found — create server/index.js manually"));
32
+ return false;
33
+ }
34
+
35
+ console.log(chalk.blue(" 📦 Creating default server/ from template..."));
36
+ _copyDir(templateDir, serverDir);
37
+ console.log(chalk.green(" ✅ Server initialized. Edit server/ to build your app."));
38
+ return true;
39
+ }
40
+
41
+ function _copyDir(src, dest) {
42
+ fs.mkdirSync(dest, { recursive: true });
43
+ for (const entry of fs.readdirSync(src, { withFileTypes: true })) {
44
+ const srcPath = path.join(src, entry.name);
45
+ const destPath = path.join(dest, entry.name);
46
+ if (entry.isDirectory()) {
47
+ _copyDir(srcPath, destPath);
48
+ } else {
49
+ // NEVER overwrite existing files
50
+ if (!fs.existsSync(destPath)) {
51
+ fs.copyFileSync(srcPath, destPath);
52
+ console.log(chalk.gray(` + ${path.relative(dest, destPath)}`));
53
+ }
54
+ }
55
+ }
56
+ }
57
+
58
+ module.exports = { initServer };
@@ -246,6 +246,15 @@ class WolverineRunner {
246
246
  console.log(chalk.gray(" 🔄 Auto-update: disabled"));
247
247
  }
248
248
 
249
+ // Create startup backup — safety net for corrupted server/ from bad updates
250
+ // If the child crashes immediately after this, we can rollback to this known state
251
+ try {
252
+ this._startupBackupId = this.backupManager.createBackup("pre-start (safety snapshot)");
253
+ console.log(chalk.gray(` 📸 Startup backup: ${this._startupBackupId}`));
254
+ } catch (err) {
255
+ console.log(chalk.yellow(` ⚠️ Startup backup failed (non-fatal): ${err.message}`));
256
+ }
257
+
249
258
  this._spawn();
250
259
  }
251
260
 
@@ -566,6 +575,20 @@ class WolverineRunner {
566
575
  console.log(chalk.yellow(" Retrying...\n"));
567
576
  this._spawn();
568
577
  } else {
578
+ // Max retries — try rolling back to startup backup as last resort
579
+ if (this._startupBackupId) {
580
+ console.log(chalk.yellow(`\n 🔄 Max retries reached — rolling back to startup backup ${this._startupBackupId}...`));
581
+ try {
582
+ this.backupManager.rollbackTo(this._startupBackupId);
583
+ console.log(chalk.green(" ✅ Rolled back to startup state. Restarting..."));
584
+ this.retryCount = 0;
585
+ this._startupBackupId = null; // don't rollback again if this also fails
586
+ this._spawn();
587
+ return;
588
+ } catch (rbErr) {
589
+ console.log(chalk.red(` ❌ Rollback failed: ${rbErr.message}`));
590
+ }
591
+ }
569
592
  console.log(chalk.red(" Max retries reached."));
570
593
  this._logRollbackHint();
571
594
  this.running = false;
@@ -1,313 +0,0 @@
1
- const https = require("https");
2
- const http = require("http");
3
-
4
- /**
5
- * GPU Fleet Manager — controls Vast.ai GPU instances for inference.
6
- *
7
- * Features:
8
- * - Start/stop individual GPUs via Vast API
9
- * - Health monitoring and auto-discovery
10
- * - Round-robin routing across active GPUs
11
- * - Auto-scale: start burst GPUs when queue grows, stop when idle
12
- * - Cold start tracking (~5s per GPU)
13
- *
14
- * Each GPU instance runs llama.cpp with --api-key for security.
15
- * Only the EC2 backend has the internal keys.
16
- */
17
-
18
- const VAST_API = "https://cloud.vast.ai/api/v0";
19
- const VAST_KEY = process.env.VAST_API_KEY || "";
20
- const POLL_INTERVAL_MS = 30000; // health check every 30s
21
- const IDLE_STOP_MS = parseInt(process.env.GPU_IDLE_STOP_MS, 10) || 300000; // 5 min idle → stop
22
- const SCALE_UP_QUEUE = parseInt(process.env.GPU_SCALE_UP_QUEUE, 10) || 3; // start burst GPU when 3+ queued
23
-
24
- class GpuFleet {
25
- constructor(config = {}) {
26
- // GPU registry: { instanceId → { host, port, key, status, lastUsed, lastHealth, model } }
27
- this.gpus = new Map();
28
- this._roundRobinIndex = 0;
29
- this._pollTimer = null;
30
- this._scaleTimer = null;
31
- this._requestQueue = [];
32
- this._activeRequests = 0;
33
- }
34
-
35
- /**
36
- * Register a GPU instance in the fleet.
37
- */
38
- register(instanceId, { host, port, key, model = "wolverine-test-1", role = "general", autoStop = true }) {
39
- this.gpus.set(String(instanceId), {
40
- instanceId: String(instanceId),
41
- host, port: parseInt(port, 10), key,
42
- model, role, autoStop,
43
- status: "unknown", // unknown, starting, healthy, unhealthy, stopped
44
- lastUsed: 0,
45
- lastHealth: null,
46
- coldStartMs: null,
47
- });
48
- return this;
49
- }
50
-
51
- /**
52
- * Load GPU config from environment or database.
53
- */
54
- loadFromEnv() {
55
- // Primary GPU from env
56
- const url = process.env.WOLVERINE_INFERENCE_URL;
57
- const key = process.env.WOLVERINE_GPU_KEY;
58
- if (url && key) {
59
- try {
60
- const parsed = new URL(url);
61
- const instanceId = process.env.WOLVERINE_GPU_INSTANCE_ID || "primary";
62
- this.register(instanceId, {
63
- host: parsed.hostname,
64
- port: parseInt(parsed.port, 10) || 80,
65
- key,
66
- role: "primary",
67
- autoStop: false, // primary stays on
68
- });
69
- } catch {}
70
- }
71
- return this;
72
- }
73
-
74
- /**
75
- * Load GPU config from database.
76
- */
77
- async loadFromDb(pool) {
78
- try {
79
- // Check if gpu_fleet table exists
80
- const exists = await pool.query(
81
- "SELECT 1 FROM information_schema.tables WHERE table_name = 'gpu_fleet' LIMIT 1"
82
- );
83
- if (exists.rows.length === 0) {
84
- await pool.query(`
85
- CREATE TABLE gpu_fleet (
86
- instance_id TEXT PRIMARY KEY,
87
- vast_id TEXT,
88
- host TEXT NOT NULL,
89
- port INTEGER NOT NULL DEFAULT 8080,
90
- internal_key TEXT NOT NULL,
91
- model TEXT DEFAULT 'wolverine-test-1',
92
- role TEXT DEFAULT 'general',
93
- auto_stop BOOLEAN DEFAULT true,
94
- status TEXT DEFAULT 'stopped',
95
- gpu_name TEXT,
96
- created_at TIMESTAMPTZ DEFAULT NOW()
97
- )
98
- `);
99
- }
100
- const { rows } = await pool.query("SELECT * FROM gpu_fleet");
101
- for (const r of rows) {
102
- this.register(r.instance_id, {
103
- host: r.host, port: r.port, key: r.internal_key,
104
- model: r.model, role: r.role, autoStop: r.auto_stop,
105
- });
106
- const gpu = this.gpus.get(r.instance_id);
107
- if (gpu) gpu.status = r.status;
108
- if (gpu) gpu.vastId = r.vast_id;
109
- if (gpu) gpu.gpuName = r.gpu_name;
110
- }
111
- } catch (err) {
112
- console.log("[GPU Fleet] DB load failed:", err.message);
113
- }
114
- return this;
115
- }
116
-
117
- /**
118
- * Start health polling.
119
- */
120
- startPolling() {
121
- if (this._pollTimer) return;
122
- this._pollTimer = setInterval(() => this._healthCheck(), POLL_INTERVAL_MS);
123
- this._healthCheck(); // immediate first check
124
- return this;
125
- }
126
-
127
- stopPolling() {
128
- if (this._pollTimer) { clearInterval(this._pollTimer); this._pollTimer = null; }
129
- }
130
-
131
- /**
132
- * Get a healthy GPU for inference (round-robin).
133
- * Returns { host, port, key, instanceId } or null if none available.
134
- */
135
- getAvailable() {
136
- const healthy = Array.from(this.gpus.values()).filter(g => g.status === "healthy");
137
- if (healthy.length === 0) return null;
138
- this._roundRobinIndex = (this._roundRobinIndex + 1) % healthy.length;
139
- const gpu = healthy[this._roundRobinIndex];
140
- gpu.lastUsed = Date.now();
141
- return { host: gpu.host, port: gpu.port, key: gpu.key, instanceId: gpu.instanceId, model: gpu.model };
142
- }
143
-
144
- /**
145
- * Start a stopped GPU instance via Vast API.
146
- */
147
- async startGpu(instanceId) {
148
- const gpu = this.gpus.get(String(instanceId));
149
- if (!gpu) throw new Error(`GPU ${instanceId} not registered`);
150
- if (gpu.status === "healthy" || gpu.status === "starting") return gpu;
151
-
152
- gpu.status = "starting";
153
- gpu.coldStartMs = null;
154
- const startTime = Date.now();
155
-
156
- const vastId = gpu.vastId || instanceId;
157
- try {
158
- await this._vastApi("PUT", `/instances/${vastId}/`, { state: "running" });
159
-
160
- // Poll until healthy (max 60s)
161
- for (let i = 0; i < 120; i++) {
162
- await new Promise(r => setTimeout(r, 500));
163
- try {
164
- const res = await this._httpGet(gpu.host, gpu.port, "/v1/models", gpu.key);
165
- if (res && res.includes("gemma")) {
166
- gpu.status = "healthy";
167
- gpu.coldStartMs = Date.now() - startTime;
168
- gpu.lastHealth = Date.now();
169
- console.log(`[GPU Fleet] ${instanceId} started in ${gpu.coldStartMs}ms`);
170
- return gpu;
171
- }
172
- } catch {}
173
- }
174
- gpu.status = "unhealthy";
175
- throw new Error(`GPU ${instanceId} failed to start within 60s`);
176
- } catch (err) {
177
- gpu.status = "unhealthy";
178
- throw err;
179
- }
180
- }
181
-
182
- /**
183
- * Stop a GPU instance via Vast API.
184
- */
185
- async stopGpu(instanceId) {
186
- const gpu = this.gpus.get(String(instanceId));
187
- if (!gpu) throw new Error(`GPU ${instanceId} not registered`);
188
-
189
- const vastId = gpu.vastId || instanceId;
190
- try {
191
- await this._vastApi("PUT", `/instances/${vastId}/`, { state: "stopped" });
192
- gpu.status = "stopped";
193
- console.log(`[GPU Fleet] ${instanceId} stopped`);
194
- } catch (err) {
195
- console.log(`[GPU Fleet] Stop failed for ${instanceId}:`, err.message);
196
- }
197
- return gpu;
198
- }
199
-
200
- /**
201
- * Auto-scale: start burst GPUs when needed, stop idle ones.
202
- */
203
- async autoScale(queueLength) {
204
- // Scale up: start a stopped GPU if queue is long
205
- if (queueLength >= SCALE_UP_QUEUE) {
206
- const stopped = Array.from(this.gpus.values()).find(g => g.status === "stopped" && g.autoStop);
207
- if (stopped) {
208
- console.log(`[GPU Fleet] Queue at ${queueLength}, starting burst GPU ${stopped.instanceId}`);
209
- try { await this.startGpu(stopped.instanceId); } catch (e) { console.log("[GPU Fleet] Scale-up failed:", e.message); }
210
- }
211
- }
212
-
213
- // Scale down: stop idle burst GPUs
214
- const now = Date.now();
215
- for (const gpu of this.gpus.values()) {
216
- if (gpu.autoStop && gpu.status === "healthy" && gpu.lastUsed > 0 && (now - gpu.lastUsed) > IDLE_STOP_MS) {
217
- console.log(`[GPU Fleet] ${gpu.instanceId} idle for ${Math.round((now - gpu.lastUsed) / 1000)}s, stopping`);
218
- try { await this.stopGpu(gpu.instanceId); } catch {}
219
- }
220
- }
221
- }
222
-
223
- /**
224
- * Get fleet status for dashboard/API.
225
- */
226
- getStatus() {
227
- const gpus = Array.from(this.gpus.values()).map(g => ({
228
- instanceId: g.instanceId,
229
- vastId: g.vastId,
230
- gpuName: g.gpuName,
231
- host: g.host,
232
- port: g.port,
233
- model: g.model,
234
- role: g.role,
235
- status: g.status,
236
- autoStop: g.autoStop,
237
- lastUsed: g.lastUsed ? new Date(g.lastUsed).toISOString() : null,
238
- lastHealth: g.lastHealth ? new Date(g.lastHealth).toISOString() : null,
239
- coldStartMs: g.coldStartMs,
240
- }));
241
- return {
242
- total: gpus.length,
243
- healthy: gpus.filter(g => g.status === "healthy").length,
244
- stopped: gpus.filter(g => g.status === "stopped").length,
245
- starting: gpus.filter(g => g.status === "starting").length,
246
- gpus,
247
- };
248
- }
249
-
250
- // ── Private ──
251
-
252
- async _healthCheck() {
253
- for (const gpu of this.gpus.values()) {
254
- if (gpu.status === "stopped" || gpu.status === "starting") continue;
255
- try {
256
- const res = await this._httpGet(gpu.host, gpu.port, "/v1/models", gpu.key);
257
- if (res && (res.includes("gemma") || res.includes("wolverine"))) {
258
- gpu.status = "healthy";
259
- gpu.lastHealth = Date.now();
260
- } else {
261
- gpu.status = "unhealthy";
262
- }
263
- } catch {
264
- gpu.status = "unhealthy";
265
- }
266
- }
267
- }
268
-
269
- _vastApi(method, path, body) {
270
- return new Promise((resolve, reject) => {
271
- const bodyStr = body ? JSON.stringify(body) : null;
272
- const req = https.request({
273
- hostname: "cloud.vast.ai",
274
- path: `/api/v0${path}`,
275
- method,
276
- timeout: 15000,
277
- headers: {
278
- "Authorization": `Bearer ${VAST_KEY}`,
279
- "Content-Type": "application/json",
280
- ...(bodyStr ? { "Content-Length": Buffer.byteLength(bodyStr) } : {}),
281
- },
282
- }, (res) => {
283
- let data = "";
284
- res.on("data", c => { data += c; });
285
- res.on("end", () => {
286
- try { resolve(JSON.parse(data)); } catch { resolve({ raw: data }); }
287
- });
288
- });
289
- req.on("error", reject);
290
- req.on("timeout", () => { req.destroy(); reject(new Error("Vast API timeout")); });
291
- if (bodyStr) req.write(bodyStr);
292
- req.end();
293
- });
294
- }
295
-
296
- _httpGet(host, port, path, key) {
297
- return new Promise((resolve, reject) => {
298
- const req = http.request({
299
- hostname: host, port, path, method: "GET", timeout: 5000,
300
- headers: key ? { "Authorization": `Bearer ${key}` } : {},
301
- }, (res) => {
302
- let data = "";
303
- res.on("data", c => { data += c; });
304
- res.on("end", () => resolve(data));
305
- });
306
- req.on("error", reject);
307
- req.on("timeout", () => { req.destroy(); reject(new Error("timeout")); });
308
- req.end();
309
- });
310
- }
311
- }
312
-
313
- module.exports = { GpuFleet };
@@ -1,167 +0,0 @@
1
- /**
2
- * GPU Fleet Management API — admin routes for controlling inference GPUs.
3
- *
4
- * Endpoints:
5
- * GET /status — fleet overview (all GPUs, health, queue)
6
- * POST /start/:id — start a stopped GPU
7
- * POST /stop/:id — stop a running GPU
8
- * POST /register — add a new GPU to the fleet
9
- * POST /remove/:id — remove a GPU from the fleet
10
- * POST /scale — trigger auto-scale check
11
- * GET /benchmark/:id — run inference benchmark on a GPU
12
- */
13
-
14
- async function routes(fastify) {
15
- const { pool } = require("../lib/db");
16
-
17
- // Fleet instance is attached to fastify by index.js
18
- function getFleet() {
19
- return fastify.gpuFleet;
20
- }
21
-
22
- // Admin auth
23
- async function requireAdmin(request, reply) {
24
- const settings = require("../config/settings.json");
25
- const token = request.headers.authorization?.replace("Bearer ", "") || request.headers["x-api-key"];
26
- if (token !== settings.platform?.apiKey) {
27
- return reply.code(401).send({ error: "Admin access required" });
28
- }
29
- }
30
-
31
- // GET /status — fleet overview
32
- fastify.get("/status", { preHandler: requireAdmin }, async (request, reply) => {
33
- const fleet = getFleet();
34
- return fleet.getStatus();
35
- });
36
-
37
- // POST /start/:id — start a GPU
38
- fastify.post("/start/:id", { preHandler: requireAdmin }, async (request, reply) => {
39
- const fleet = getFleet();
40
- const { id } = request.params;
41
- try {
42
- const gpu = await fleet.startGpu(id);
43
- // Update DB
44
- await pool.query("UPDATE gpu_fleet SET status = 'healthy' WHERE instance_id = $1", [id]).catch(() => {});
45
- return { status: "started", instanceId: id, coldStartMs: gpu.coldStartMs };
46
- } catch (err) {
47
- return reply.code(500).send({ error: err.message });
48
- }
49
- });
50
-
51
- // POST /stop/:id — stop a GPU
52
- fastify.post("/stop/:id", { preHandler: requireAdmin }, async (request, reply) => {
53
- const fleet = getFleet();
54
- const { id } = request.params;
55
- try {
56
- await fleet.stopGpu(id);
57
- await pool.query("UPDATE gpu_fleet SET status = 'stopped' WHERE instance_id = $1", [id]).catch(() => {});
58
- return { status: "stopped", instanceId: id };
59
- } catch (err) {
60
- return reply.code(500).send({ error: err.message });
61
- }
62
- });
63
-
64
- // POST /register — add a GPU to the fleet
65
- fastify.post("/register", { preHandler: requireAdmin }, async (request, reply) => {
66
- const fleet = getFleet();
67
- const { instanceId, vastId, host, port, key, model, role, gpuName, autoStop } = request.body || {};
68
- if (!instanceId || !host || !key) {
69
- return reply.code(400).send({ error: "instanceId, host, and key required" });
70
- }
71
-
72
- fleet.register(instanceId, { host, port: port || 8080, key, model, role, autoStop: autoStop !== false });
73
- const gpu = fleet.gpus.get(instanceId);
74
- if (vastId) gpu.vastId = vastId;
75
- if (gpuName) gpu.gpuName = gpuName;
76
-
77
- // Save to DB
78
- await pool.query(
79
- `INSERT INTO gpu_fleet (instance_id, vast_id, host, port, internal_key, model, role, auto_stop, gpu_name)
80
- VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
81
- ON CONFLICT (instance_id) DO UPDATE SET
82
- host = $3, port = $4, internal_key = $5, model = $6, role = $7, auto_stop = $8, gpu_name = $9, vast_id = $2`,
83
- [instanceId, vastId || null, host, port || 8080, key, model || "wolverine-test-1", role || "general", autoStop !== false, gpuName || null]
84
- );
85
-
86
- return { registered: instanceId, fleet: fleet.getStatus() };
87
- });
88
-
89
- // POST /remove/:id — remove a GPU from the fleet
90
- fastify.post("/remove/:id", { preHandler: requireAdmin }, async (request, reply) => {
91
- const fleet = getFleet();
92
- const { id } = request.params;
93
- fleet.gpus.delete(id);
94
- await pool.query("DELETE FROM gpu_fleet WHERE instance_id = $1", [id]).catch(() => {});
95
- return { removed: id };
96
- });
97
-
98
- // POST /scale — trigger auto-scale
99
- fastify.post("/scale", { preHandler: requireAdmin }, async (request, reply) => {
100
- const fleet = getFleet();
101
- const queueLength = request.body?.queueLength || 0;
102
- await fleet.autoScale(queueLength);
103
- return fleet.getStatus();
104
- });
105
-
106
- // GET /benchmark/:id — quick benchmark
107
- fastify.get("/benchmark/:id", { preHandler: requireAdmin }, async (request, reply) => {
108
- const fleet = getFleet();
109
- const gpu = fleet.gpus.get(request.params.id);
110
- if (!gpu || gpu.status !== "healthy") {
111
- return reply.code(400).send({ error: "GPU not available" });
112
- }
113
-
114
- const http = require("http");
115
- const results = [];
116
-
117
- for (const prompt of ["2+2?", "Write isPrime in JS.", "Explain TCP in 1 sentence."]) {
118
- const start = Date.now();
119
- try {
120
- const body = JSON.stringify({
121
- model: gpu.model,
122
- messages: [{ role: "user", content: prompt }],
123
- max_tokens: 50, temperature: 0,
124
- });
125
- const res = await new Promise((resolve, reject) => {
126
- const req = http.request({
127
- hostname: gpu.host, port: gpu.port, path: "/v1/chat/completions",
128
- method: "POST", timeout: 30000,
129
- headers: { "Content-Type": "application/json", "Authorization": `Bearer ${gpu.key}`, "Content-Length": Buffer.byteLength(body) },
130
- }, (res) => {
131
- let data = "";
132
- res.on("data", c => { data += c; });
133
- res.on("end", () => { try { resolve(JSON.parse(data)); } catch { resolve(null); } });
134
- });
135
- req.on("error", reject);
136
- req.write(body);
137
- req.end();
138
- });
139
-
140
- const elapsed = Date.now() - start;
141
- const usage = res?.usage || {};
142
- const tokOut = usage.completion_tokens || 0;
143
- results.push({
144
- prompt: prompt.slice(0, 30),
145
- latencyMs: elapsed,
146
- tokensOut: tokOut,
147
- tokPerSec: tokOut > 0 ? Math.round(tokOut / (elapsed / 1000)) : 0,
148
- response: res?.choices?.[0]?.message?.content?.slice(0, 60),
149
- });
150
- } catch (err) {
151
- results.push({ prompt: prompt.slice(0, 30), error: err.message });
152
- }
153
- }
154
-
155
- const avgTokPerSec = results.filter(r => r.tokPerSec).reduce((s, r) => s + r.tokPerSec, 0) / Math.max(results.filter(r => r.tokPerSec).length, 1);
156
-
157
- return {
158
- instanceId: request.params.id,
159
- gpu: gpu.gpuName,
160
- model: gpu.model,
161
- results,
162
- avgTokPerSec: Math.round(avgTokPerSec),
163
- };
164
- });
165
- }
166
-
167
- module.exports = routes;
@@ -1,329 +0,0 @@
1
- const https = require("https");
2
- const http = require("http");
3
- const crypto = require("crypto");
4
-
5
- /**
6
- * Wolverine Inference API
7
- *
8
- * Credit system: $1 = 100 credits. 1 credit = $0.01 of compute.
9
- * Token pricing (in credits per million tokens):
10
- * wolverine-test-1: 1 credit input / 4 credits output per 1M tokens
11
- * (= $0.01/$0.04 per 1M — 15x cheaper than gpt-4o-mini, 80x cheaper than haiku)
12
- *
13
- * Rate limiting: per API key, configurable per tier.
14
- * Queue: when GPU is at capacity, requests queue with timeout.
15
- */
16
-
17
- const INFERENCE_URL = process.env.WOLVERINE_INFERENCE_URL || "http://ssh8.vast.ai:24233";
18
- const GPU_KEY = process.env.WOLVERINE_GPU_KEY || "";
19
-
20
- // Pricing in CREDITS per million tokens ($1 = 100 credits)
21
- const MODEL_PRICING = {
22
- "wolverine-test-1": { input: 1.0, output: 4.0 }, // $0.01/$0.04 per 1M
23
- "wolverine-coding": { input: 1.0, output: 4.0 },
24
- "wolverine-reasoning": { input: 2.5, output: 10.0 }, // heavier model when available
25
- };
26
-
27
- const MODEL_MAP = {
28
- "wolverine-test-1": "wolverine-test-1",
29
- "wolverine-coding": "wolverine-test-1",
30
- "wolverine-reasoning": "wolverine-test-1",
31
- };
32
-
33
- const TIER_LIMITS = {
34
- free: { rpm: 10, maxTokens: 1024 },
35
- starter: { rpm: 60, maxTokens: 4096 },
36
- pro: { rpm: 300, maxTokens: 4096 },
37
- admin: { rpm: 9999, maxTokens: 4096 },
38
- };
39
-
40
- function tokenCost(model, inputTokens, outputTokens) {
41
- const p = MODEL_PRICING[model] || MODEL_PRICING["wolverine-test-1"];
42
- return ((inputTokens / 1_000_000) * p.input) + ((outputTokens / 1_000_000) * p.output);
43
- }
44
-
45
- // ── Request Queue (handles GPU saturation) ──
46
- const queue = [];
47
- let activeRequests = 0;
48
- const MAX_CONCURRENT = 8; // vLLM max-num-seqs
49
- const QUEUE_TIMEOUT_MS = 30000;
50
-
51
- function enqueue() {
52
- return new Promise((resolve, reject) => {
53
- if (activeRequests < MAX_CONCURRENT) {
54
- activeRequests++;
55
- resolve();
56
- return;
57
- }
58
- const timer = setTimeout(() => {
59
- const idx = queue.indexOf(entry);
60
- if (idx >= 0) queue.splice(idx, 1);
61
- reject(new Error("Queue timeout — GPU at capacity. Try again in a few seconds."));
62
- }, QUEUE_TIMEOUT_MS);
63
- const entry = { resolve: () => { clearTimeout(timer); activeRequests++; resolve(); }, reject };
64
- queue.push(entry);
65
- });
66
- }
67
-
68
- function dequeue() {
69
- activeRequests = Math.max(0, activeRequests - 1);
70
- if (queue.length > 0) {
71
- const next = queue.shift();
72
- next.resolve();
73
- }
74
- }
75
-
76
- async function routes(fastify) {
77
- const { pool } = require("../lib/db");
78
-
79
- // Rate limit state (in-memory)
80
- const rateWindows = new Map();
81
-
82
- async function authenticate(request, reply) {
83
- const apiKey = request.headers.authorization?.replace("Bearer ", "") || request.headers["x-api-key"];
84
- if (!apiKey) return reply.code(401).send({ error: { message: "API key required. Pass via Authorization: Bearer <key>", type: "auth_error" } });
85
-
86
- // Platform key bypass
87
- let settings = {};
88
- try { settings = require("../config/settings.json"); } catch {}
89
- if (apiKey === settings.platform?.apiKey) {
90
- request.account = { api_key: apiKey, owner: "platform", tier: "admin", credits_remaining: 999999, rate_limit_rpm: 9999 };
91
- return;
92
- }
93
-
94
- const result = await pool.query("SELECT * FROM api_credits WHERE api_key = $1", [apiKey]);
95
- if (result.rows.length === 0) return reply.code(401).send({ error: { message: "Invalid API key", type: "auth_error" } });
96
-
97
- const account = result.rows[0];
98
-
99
- // Credit check
100
- if (parseFloat(account.credits_remaining) <= 0) {
101
- return reply.code(402).send({ error: { message: "Insufficient credits. Add credits at wolverinenode.xyz", type: "billing_error", credits_remaining: 0 } });
102
- }
103
-
104
- // Rate limit
105
- const now = Date.now();
106
- const window = rateWindows.get(apiKey) || { count: 0, resetAt: now + 60000 };
107
- if (now > window.resetAt) { window.count = 0; window.resetAt = now + 60000; }
108
- const limit = account.rate_limit_rpm || TIER_LIMITS[account.tier]?.rpm || 10;
109
- if (window.count >= limit) {
110
- const retryAfter = Math.ceil((window.resetAt - now) / 1000);
111
- return reply.code(429).send({ error: { message: `Rate limit: ${limit} requests/min. Retry in ${retryAfter}s`, type: "rate_limit", retry_after: retryAfter } });
112
- }
113
- window.count++;
114
- rateWindows.set(apiKey, window);
115
-
116
- request.account = account;
117
- }
118
-
119
- // ── POST /chat/completions ──
120
- fastify.post("/chat/completions", { preHandler: authenticate }, async (request, reply) => {
121
- const body = request.body || {};
122
- const requestedModel = body.model || "wolverine-test-1";
123
- const account = request.account;
124
- const tier = TIER_LIMITS[account.tier] || TIER_LIMITS.free;
125
- const startMs = Date.now();
126
-
127
- // Enforce max tokens per tier
128
- if (body.max_tokens && body.max_tokens > tier.maxTokens) {
129
- body.max_tokens = tier.maxTokens;
130
- }
131
-
132
- // Map model name for backend
133
- const backendBody = { ...body, model: MODEL_MAP[requestedModel] || requestedModel };
134
-
135
- // Queue if GPU saturated
136
- try {
137
- await enqueue();
138
- } catch (err) {
139
- return reply.code(503).send({ error: { message: err.message, type: "capacity_error", queue_length: queue.length } });
140
- }
141
-
142
- try {
143
- const result = await proxyToInference("/v1/chat/completions", backendBody);
144
- const latencyMs = Date.now() - startMs;
145
-
146
- const usage = result.usage || {};
147
- const inputTokens = usage.prompt_tokens || 0;
148
- const outputTokens = usage.completion_tokens || 0;
149
- const cost = tokenCost(requestedModel, inputTokens, outputTokens);
150
-
151
- // Bill credits (skip for platform)
152
- if (account.owner !== "platform") {
153
- await pool.query(
154
- "UPDATE api_credits SET credits_remaining = credits_remaining - $1, credits_used = credits_used + $1, last_used = NOW() WHERE api_key = $2",
155
- [cost, account.api_key]
156
- );
157
- await pool.query(
158
- "INSERT INTO api_usage_log (api_key, model, input_tokens, output_tokens, total_tokens, cost, latency_ms, success, endpoint) VALUES ($1, $2, $3, $4, $5, $6, $7, true, $8)",
159
- [account.api_key, requestedModel, inputTokens, outputTokens, inputTokens + outputTokens, cost, latencyMs, "/v1/chat/completions"]
160
- );
161
- }
162
-
163
- // Rewrite response
164
- if (result.model) result.model = requestedModel;
165
- result.x_wolverine = {
166
- credits_used: Math.round(cost * 1000000) / 1000000,
167
- credits_remaining: Math.max(0, parseFloat(account.credits_remaining) - cost),
168
- latency_ms: latencyMs,
169
- queued: activeRequests > MAX_CONCURRENT,
170
- };
171
-
172
- return result;
173
- } catch (err) {
174
- if (account.owner !== "platform") {
175
- await pool.query(
176
- "INSERT INTO api_usage_log (api_key, model, input_tokens, output_tokens, total_tokens, cost, latency_ms, success, endpoint) VALUES ($1, $2, 0, 0, 0, 0, $3, false, $4)",
177
- [account.api_key, requestedModel, Date.now() - startMs, "/v1/chat/completions"]
178
- ).catch(() => {});
179
- }
180
- return reply.code(502).send({ error: { message: `Inference error: ${err.message}`, type: "inference_error" } });
181
- } finally {
182
- dequeue();
183
- }
184
- });
185
-
186
- // ── GET /models ──
187
- fastify.get("/models", async () => ({
188
- object: "list",
189
- data: Object.entries(MODEL_PRICING).map(([id, p]) => ({
190
- id, object: "model", owned_by: "wolverine",
191
- created: Math.floor(Date.now() / 1000),
192
- pricing: { input_credits_per_million: p.input, output_credits_per_million: p.output, usd_per_credit: 0.01 },
193
- })),
194
- }));
195
-
196
- // ── POST /keys/create — generate new API key ──
197
- fastify.post("/keys/create", { preHandler: authenticate }, async (request, reply) => {
198
- const account = request.account;
199
- if (account.tier !== "admin") return reply.code(403).send({ error: { message: "Only admins can create API keys", type: "auth_error" } });
200
-
201
- const { owner, email, credits, tier, rpm } = request.body || {};
202
- if (!owner) return reply.code(400).send({ error: { message: "owner required", type: "validation_error" } });
203
-
204
- const newKey = "wlv_" + crypto.randomBytes(24).toString("hex");
205
- const keyTier = tier || "free";
206
- const keyCredits = credits || (keyTier === "free" ? 10 : 0);
207
- const keyRpm = rpm || TIER_LIMITS[keyTier]?.rpm || 10;
208
-
209
- await pool.query(
210
- "INSERT INTO api_credits (api_key, owner, email, credits_remaining, tier, plan_name, rate_limit_rpm) VALUES ($1, $2, $3, $4, $5, $6, $7)",
211
- [newKey, owner, email || null, keyCredits, keyTier, keyTier, keyRpm]
212
- );
213
-
214
- return { api_key: newKey, owner, tier: keyTier, credits: keyCredits, rate_limit_rpm: keyRpm };
215
- });
216
-
217
- // ── POST /keys/add-credits — add credits to a key ──
218
- fastify.post("/keys/add-credits", { preHandler: authenticate }, async (request, reply) => {
219
- const account = request.account;
220
- if (account.tier !== "admin") return reply.code(403).send({ error: { message: "Only admins can add credits", type: "auth_error" } });
221
-
222
- const { api_key, credits } = request.body || {};
223
- if (!api_key || !credits) return reply.code(400).send({ error: { message: "api_key and credits required" } });
224
-
225
- await pool.query("UPDATE api_credits SET credits_remaining = credits_remaining + $1 WHERE api_key = $2", [credits, api_key]);
226
- const updated = await pool.query("SELECT credits_remaining FROM api_credits WHERE api_key = $1", [api_key]);
227
- return { api_key, credits_added: credits, credits_remaining: parseFloat(updated.rows[0]?.credits_remaining || 0) };
228
- });
229
-
230
- // ── GET /keys — list all keys (admin only) ──
231
- fastify.get("/keys", { preHandler: authenticate }, async (request, reply) => {
232
- if (request.account.tier !== "admin") return reply.code(403).send({ error: { message: "Admin only" } });
233
- const { rows } = await pool.query("SELECT api_key, owner, email, tier, credits_remaining, credits_used, rate_limit_rpm, created_at, last_used FROM api_credits ORDER BY created_at DESC");
234
- return { keys: rows };
235
- });
236
-
237
- // ── GET /credits ──
238
- fastify.get("/credits", { preHandler: authenticate }, async (request, reply) => {
239
- const a = request.account;
240
- return {
241
- credits_remaining: parseFloat(a.credits_remaining),
242
- credits_used: parseFloat(a.credits_used || 0),
243
- usd_remaining: parseFloat(a.credits_remaining) * 0.01,
244
- usd_used: parseFloat(a.credits_used || 0) * 0.01,
245
- tier: a.tier, rate_limit_rpm: a.rate_limit_rpm, owner: a.owner,
246
- };
247
- });
248
-
249
- // ── GET /usage ──
250
- fastify.get("/usage", { preHandler: authenticate }, async (request, reply) => {
251
- const apiKey = request.account.api_key;
252
- const period = request.query.period || "7d";
253
- const interval = { "1h": "1 hour", "1d": "1 day", "7d": "7 days", "30d": "30 days" }[period] || "7 days";
254
-
255
- const summary = await pool.query(
256
- `SELECT model, COUNT(*) AS calls, SUM(input_tokens) AS input, SUM(output_tokens) AS output,
257
- SUM(total_tokens) AS tokens, SUM(cost) AS credits_spent, AVG(latency_ms) AS avg_latency,
258
- COUNT(*) FILTER (WHERE success) AS successes
259
- FROM api_usage_log WHERE api_key = $1 AND timestamp > NOW() - $2::interval
260
- GROUP BY model ORDER BY credits_spent DESC`, [apiKey, interval]
261
- );
262
-
263
- const timeline = await pool.query(
264
- `SELECT date_trunc('hour', timestamp) AS hour, SUM(cost) AS credits, SUM(total_tokens) AS tokens, COUNT(*) AS calls
265
- FROM api_usage_log WHERE api_key = $1 AND timestamp > NOW() - $2::interval
266
- GROUP BY hour ORDER BY hour`, [apiKey, interval]
267
- );
268
-
269
- const totalCredits = summary.rows.reduce((s, r) => s + parseFloat(r.credits_spent || 0), 0);
270
-
271
- return {
272
- period,
273
- total_credits_spent: Math.round(totalCredits * 1000000) / 1000000,
274
- total_usd_spent: Math.round(totalCredits * 0.01 * 1000000) / 1000000,
275
- byModel: summary.rows.map(r => ({
276
- model: r.model, calls: parseInt(r.calls), input: parseInt(r.input || 0), output: parseInt(r.output || 0),
277
- tokens: parseInt(r.tokens || 0), credits_spent: parseFloat(r.credits_spent || 0),
278
- usd_spent: parseFloat(r.credits_spent || 0) * 0.01,
279
- avgLatencyMs: Math.round(parseFloat(r.avg_latency || 0)),
280
- successRate: parseInt(r.calls) > 0 ? parseFloat(((parseInt(r.successes) / parseInt(r.calls)) * 100).toFixed(2)) : 0,
281
- })),
282
- timeline: timeline.rows.map(r => ({
283
- hour: r.hour, credits: parseFloat(r.credits), tokens: parseInt(r.tokens), calls: parseInt(r.calls),
284
- })),
285
- queue: { active: activeRequests, waiting: queue.length, max: MAX_CONCURRENT },
286
- };
287
- });
288
-
289
- // ── GET /health ──
290
- fastify.get("/health", async () => {
291
- try {
292
- const result = await proxyToInference("/health", null, "GET");
293
- return { status: "ok", inference: result, queue: { active: activeRequests, waiting: queue.length, max: MAX_CONCURRENT } };
294
- } catch (err) {
295
- return { status: "down", error: err.message, queue: { active: activeRequests, waiting: queue.length, max: MAX_CONCURRENT } };
296
- }
297
- });
298
- }
299
-
300
- function proxyToInference(path, body, method = "POST") {
301
- return new Promise((resolve, reject) => {
302
- const url = new (require("url").URL)(INFERENCE_URL + path);
303
- const client = url.protocol === "https:" ? https : http;
304
- const bodyStr = body ? JSON.stringify(body) : null;
305
-
306
- const req = client.request({
307
- hostname: url.hostname,
308
- port: url.port || (url.protocol === "https:" ? 443 : 80),
309
- path: url.pathname,
310
- method,
311
- timeout: 120000,
312
- headers: {
313
- "Content-Type": "application/json",
314
- ...(GPU_KEY ? { "Authorization": `Bearer ${GPU_KEY}` } : {}),
315
- ...(bodyStr ? { "Content-Length": Buffer.byteLength(bodyStr) } : {}),
316
- },
317
- }, (res) => {
318
- let data = "";
319
- res.on("data", (c) => { data += c; });
320
- res.on("end", () => { try { resolve(JSON.parse(data)); } catch { resolve({ raw: data }); } });
321
- });
322
- req.on("error", reject);
323
- req.on("timeout", () => { req.destroy(); reject(new Error("Inference timeout")); });
324
- if (bodyStr) req.write(bodyStr);
325
- req.end();
326
- });
327
- }
328
-
329
- module.exports = routes;
File without changes
File without changes
File without changes