wolverine-ai 3.6.1 → 3.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/wolverine.js +4 -0
- package/package.json +1 -2
- package/src/brain/brain.js +8 -1
- package/src/core/init-server.js +58 -0
- package/src/core/runner.js +26 -0
- package/src/monitor/error-monitor.js +1 -1
- package/server/lib/gpu-fleet.js +0 -313
- package/server/routes/fleet.js +0 -167
- package/server/routes/inference.js +0 -329
- /package/{server → src/templates/server}/config/settings.json +0 -0
- /package/{server → src/templates/server}/index.js +0 -0
- /package/{server → src/templates/server}/routes/api.js +0 -0
- /package/{server → src/templates/server}/routes/health.js +0 -0
- /package/{server → src/templates/server}/routes/time.js +0 -0
package/bin/wolverine.js
CHANGED
|
@@ -127,6 +127,10 @@ if (args.includes("--backups")) {
|
|
|
127
127
|
|
|
128
128
|
const scriptPath = args.find(a => !a.startsWith("--")) || "server/index.js";
|
|
129
129
|
|
|
130
|
+
// Initialize server/ from template if it doesn't exist (first run)
|
|
131
|
+
const { initServer } = require("../src/core/init-server");
|
|
132
|
+
initServer(process.cwd(), scriptPath);
|
|
133
|
+
|
|
130
134
|
// System detection (for analytics + dashboard, NOT for forking)
|
|
131
135
|
// Wolverine runs as a single process manager. If users want clustering,
|
|
132
136
|
// they handle it inside their server (e.g. @fastify/cluster, pm2 cluster mode).
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "wolverine-ai",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.7.1",
|
|
4
4
|
"description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -47,7 +47,6 @@
|
|
|
47
47
|
"files": [
|
|
48
48
|
"bin/",
|
|
49
49
|
"src/",
|
|
50
|
-
"server/",
|
|
51
50
|
"examples/",
|
|
52
51
|
".env.example"
|
|
53
52
|
],
|
package/src/brain/brain.js
CHANGED
|
@@ -242,7 +242,7 @@ const SEED_DOCS = [
|
|
|
242
242
|
metadata: { topic: "backup-skill" },
|
|
243
243
|
},
|
|
244
244
|
{
|
|
245
|
-
text: "CRITICAL: Never run raw 'npm install wolverine-ai' or 'git pull' to update — these OVERWRITE server/, .wolverine/ (brain, backups, events), and .env.local. Always use the safe update skill: wolverine --update (CLI), safeUpdate(cwd) (programmatic), or let auto-update handle it.
|
|
245
|
+
text: "CRITICAL: Never run raw 'npm install wolverine-ai' or 'git pull' to update — these OVERWRITE server/, .wolverine/ (brain, backups, events), and .env.local. Always use the safe update skill: wolverine --update (CLI), safeUpdate(cwd) (programmatic), or let auto-update handle it. Startup backup: wolverine creates a safety snapshot of server/ before first spawn on every start. If the server crashes immediately after a bad update and healing fails/is blocked, wolverine auto-rollbacks to the startup snapshot after max retries — prevents permanent breakage from corrupted server/ files. ALL backups (heal snapshots + update snapshots + startup snapshots) stored in ~/.wolverine-safe-backups/ (OUTSIDE project, survives git clean, rm -rf, project deletion). Restore with: wolverine --restore <name>. List: wolverine --backups.",
|
|
246
246
|
metadata: { topic: "safe-update-warning" },
|
|
247
247
|
},
|
|
248
248
|
{
|
|
@@ -304,6 +304,13 @@ class Brain {
|
|
|
304
304
|
console.log(chalk.gray(" 🧠 Framework updated — merging new seed docs..."));
|
|
305
305
|
await this._mergeSeedDocs();
|
|
306
306
|
try { fs.unlinkSync(seedRefreshPath); } catch {}
|
|
307
|
+
} else {
|
|
308
|
+
// Auto-detect new seeds: if SEED_DOCS count > docs namespace count, merge
|
|
309
|
+
const docsCount = (this.store.getNamespace("docs") || []).length;
|
|
310
|
+
if (SEED_DOCS.length > docsCount) {
|
|
311
|
+
console.log(chalk.gray(` 🧠 New seed docs detected (${SEED_DOCS.length} vs ${docsCount}) — merging...`));
|
|
312
|
+
await this._mergeSeedDocs();
|
|
313
|
+
}
|
|
307
314
|
}
|
|
308
315
|
|
|
309
316
|
// 2. Scan project for live function map
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
const fs = require("fs");
|
|
2
|
+
const path = require("path");
|
|
3
|
+
const chalk = require("chalk");
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Initialize the server/ directory from the built-in template.
|
|
7
|
+
*
|
|
8
|
+
* Called on first run if server/ doesn't exist. NEVER overwrites existing files.
|
|
9
|
+
* This is why wolverine ships without a server/ directory in the npm package —
|
|
10
|
+
* so `npm install` and `git pull` can never destroy user code.
|
|
11
|
+
*
|
|
12
|
+
* The template lives in src/templates/server/ and contains a minimal Fastify
|
|
13
|
+
* server with health, api, and time routes + default settings.json.
|
|
14
|
+
*/
|
|
15
|
+
function initServer(cwd, scriptPath) {
|
|
16
|
+
const serverDir = path.join(cwd, "server");
|
|
17
|
+
const scriptFile = path.resolve(cwd, scriptPath);
|
|
18
|
+
|
|
19
|
+
// If the script file already exists, nothing to do
|
|
20
|
+
if (fs.existsSync(scriptFile)) return false;
|
|
21
|
+
|
|
22
|
+
// If server/ exists but the specific script doesn't, don't create — user has their own structure
|
|
23
|
+
if (fs.existsSync(serverDir) && fs.readdirSync(serverDir).length > 0) {
|
|
24
|
+
console.log(chalk.yellow(` ⚠️ ${scriptPath} not found but server/ exists — skipping template init`));
|
|
25
|
+
return false;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// Create server/ from template
|
|
29
|
+
const templateDir = path.join(__dirname, "..", "templates", "server");
|
|
30
|
+
if (!fs.existsSync(templateDir)) {
|
|
31
|
+
console.log(chalk.yellow(" ⚠️ No server template found — create server/index.js manually"));
|
|
32
|
+
return false;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
console.log(chalk.blue(" 📦 Creating default server/ from template..."));
|
|
36
|
+
_copyDir(templateDir, serverDir);
|
|
37
|
+
console.log(chalk.green(" ✅ Server initialized. Edit server/ to build your app."));
|
|
38
|
+
return true;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function _copyDir(src, dest) {
|
|
42
|
+
fs.mkdirSync(dest, { recursive: true });
|
|
43
|
+
for (const entry of fs.readdirSync(src, { withFileTypes: true })) {
|
|
44
|
+
const srcPath = path.join(src, entry.name);
|
|
45
|
+
const destPath = path.join(dest, entry.name);
|
|
46
|
+
if (entry.isDirectory()) {
|
|
47
|
+
_copyDir(srcPath, destPath);
|
|
48
|
+
} else {
|
|
49
|
+
// NEVER overwrite existing files
|
|
50
|
+
if (!fs.existsSync(destPath)) {
|
|
51
|
+
fs.copyFileSync(srcPath, destPath);
|
|
52
|
+
console.log(chalk.gray(` + ${path.relative(dest, destPath)}`));
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
module.exports = { initServer };
|
package/src/core/runner.js
CHANGED
|
@@ -246,11 +246,23 @@ class WolverineRunner {
|
|
|
246
246
|
console.log(chalk.gray(" 🔄 Auto-update: disabled"));
|
|
247
247
|
}
|
|
248
248
|
|
|
249
|
+
// Create startup backup — safety net for corrupted server/ from bad updates
|
|
250
|
+
// If the child crashes immediately after this, we can rollback to this known state
|
|
251
|
+
try {
|
|
252
|
+
this._startupBackupId = this.backupManager.createBackup("pre-start (safety snapshot)");
|
|
253
|
+
console.log(chalk.gray(` 📸 Startup backup: ${this._startupBackupId}`));
|
|
254
|
+
} catch (err) {
|
|
255
|
+
console.log(chalk.yellow(` ⚠️ Startup backup failed (non-fatal): ${err.message}`));
|
|
256
|
+
}
|
|
257
|
+
|
|
249
258
|
this._spawn();
|
|
250
259
|
}
|
|
251
260
|
|
|
252
261
|
restart() {
|
|
253
262
|
console.log(chalk.blue("\n 🔄 Restarting server..."));
|
|
263
|
+
// Reset config cache so restart picks up any settings.json changes
|
|
264
|
+
const { resetConfig } = require("./config");
|
|
265
|
+
resetConfig();
|
|
254
266
|
this.healthMonitor.stop();
|
|
255
267
|
this._clearStabilityTimer();
|
|
256
268
|
|
|
@@ -566,6 +578,20 @@ class WolverineRunner {
|
|
|
566
578
|
console.log(chalk.yellow(" Retrying...\n"));
|
|
567
579
|
this._spawn();
|
|
568
580
|
} else {
|
|
581
|
+
// Max retries — try rolling back to startup backup as last resort
|
|
582
|
+
if (this._startupBackupId) {
|
|
583
|
+
console.log(chalk.yellow(`\n 🔄 Max retries reached — rolling back to startup backup ${this._startupBackupId}...`));
|
|
584
|
+
try {
|
|
585
|
+
this.backupManager.rollbackTo(this._startupBackupId);
|
|
586
|
+
console.log(chalk.green(" ✅ Rolled back to startup state. Restarting..."));
|
|
587
|
+
this.retryCount = 0;
|
|
588
|
+
this._startupBackupId = null; // don't rollback again if this also fails
|
|
589
|
+
this._spawn();
|
|
590
|
+
return;
|
|
591
|
+
} catch (rbErr) {
|
|
592
|
+
console.log(chalk.red(` ❌ Rollback failed: ${rbErr.message}`));
|
|
593
|
+
}
|
|
594
|
+
}
|
|
569
595
|
console.log(chalk.red(" Max retries reached."));
|
|
570
596
|
this._logRollbackHint();
|
|
571
597
|
this.running = false;
|
package/server/lib/gpu-fleet.js
DELETED
|
@@ -1,313 +0,0 @@
|
|
|
1
|
-
const https = require("https");
|
|
2
|
-
const http = require("http");
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* GPU Fleet Manager — controls Vast.ai GPU instances for inference.
|
|
6
|
-
*
|
|
7
|
-
* Features:
|
|
8
|
-
* - Start/stop individual GPUs via Vast API
|
|
9
|
-
* - Health monitoring and auto-discovery
|
|
10
|
-
* - Round-robin routing across active GPUs
|
|
11
|
-
* - Auto-scale: start burst GPUs when queue grows, stop when idle
|
|
12
|
-
* - Cold start tracking (~5s per GPU)
|
|
13
|
-
*
|
|
14
|
-
* Each GPU instance runs llama.cpp with --api-key for security.
|
|
15
|
-
* Only the EC2 backend has the internal keys.
|
|
16
|
-
*/
|
|
17
|
-
|
|
18
|
-
const VAST_API = "https://cloud.vast.ai/api/v0";
|
|
19
|
-
const VAST_KEY = process.env.VAST_API_KEY || "";
|
|
20
|
-
const POLL_INTERVAL_MS = 30000; // health check every 30s
|
|
21
|
-
const IDLE_STOP_MS = parseInt(process.env.GPU_IDLE_STOP_MS, 10) || 300000; // 5 min idle → stop
|
|
22
|
-
const SCALE_UP_QUEUE = parseInt(process.env.GPU_SCALE_UP_QUEUE, 10) || 3; // start burst GPU when 3+ queued
|
|
23
|
-
|
|
24
|
-
class GpuFleet {
|
|
25
|
-
constructor(config = {}) {
|
|
26
|
-
// GPU registry: { instanceId → { host, port, key, status, lastUsed, lastHealth, model } }
|
|
27
|
-
this.gpus = new Map();
|
|
28
|
-
this._roundRobinIndex = 0;
|
|
29
|
-
this._pollTimer = null;
|
|
30
|
-
this._scaleTimer = null;
|
|
31
|
-
this._requestQueue = [];
|
|
32
|
-
this._activeRequests = 0;
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
/**
|
|
36
|
-
* Register a GPU instance in the fleet.
|
|
37
|
-
*/
|
|
38
|
-
register(instanceId, { host, port, key, model = "wolverine-test-1", role = "general", autoStop = true }) {
|
|
39
|
-
this.gpus.set(String(instanceId), {
|
|
40
|
-
instanceId: String(instanceId),
|
|
41
|
-
host, port: parseInt(port, 10), key,
|
|
42
|
-
model, role, autoStop,
|
|
43
|
-
status: "unknown", // unknown, starting, healthy, unhealthy, stopped
|
|
44
|
-
lastUsed: 0,
|
|
45
|
-
lastHealth: null,
|
|
46
|
-
coldStartMs: null,
|
|
47
|
-
});
|
|
48
|
-
return this;
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
/**
|
|
52
|
-
* Load GPU config from environment or database.
|
|
53
|
-
*/
|
|
54
|
-
loadFromEnv() {
|
|
55
|
-
// Primary GPU from env
|
|
56
|
-
const url = process.env.WOLVERINE_INFERENCE_URL;
|
|
57
|
-
const key = process.env.WOLVERINE_GPU_KEY;
|
|
58
|
-
if (url && key) {
|
|
59
|
-
try {
|
|
60
|
-
const parsed = new URL(url);
|
|
61
|
-
const instanceId = process.env.WOLVERINE_GPU_INSTANCE_ID || "primary";
|
|
62
|
-
this.register(instanceId, {
|
|
63
|
-
host: parsed.hostname,
|
|
64
|
-
port: parseInt(parsed.port, 10) || 80,
|
|
65
|
-
key,
|
|
66
|
-
role: "primary",
|
|
67
|
-
autoStop: false, // primary stays on
|
|
68
|
-
});
|
|
69
|
-
} catch {}
|
|
70
|
-
}
|
|
71
|
-
return this;
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
/**
|
|
75
|
-
* Load GPU config from database.
|
|
76
|
-
*/
|
|
77
|
-
async loadFromDb(pool) {
|
|
78
|
-
try {
|
|
79
|
-
// Check if gpu_fleet table exists
|
|
80
|
-
const exists = await pool.query(
|
|
81
|
-
"SELECT 1 FROM information_schema.tables WHERE table_name = 'gpu_fleet' LIMIT 1"
|
|
82
|
-
);
|
|
83
|
-
if (exists.rows.length === 0) {
|
|
84
|
-
await pool.query(`
|
|
85
|
-
CREATE TABLE gpu_fleet (
|
|
86
|
-
instance_id TEXT PRIMARY KEY,
|
|
87
|
-
vast_id TEXT,
|
|
88
|
-
host TEXT NOT NULL,
|
|
89
|
-
port INTEGER NOT NULL DEFAULT 8080,
|
|
90
|
-
internal_key TEXT NOT NULL,
|
|
91
|
-
model TEXT DEFAULT 'wolverine-test-1',
|
|
92
|
-
role TEXT DEFAULT 'general',
|
|
93
|
-
auto_stop BOOLEAN DEFAULT true,
|
|
94
|
-
status TEXT DEFAULT 'stopped',
|
|
95
|
-
gpu_name TEXT,
|
|
96
|
-
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
97
|
-
)
|
|
98
|
-
`);
|
|
99
|
-
}
|
|
100
|
-
const { rows } = await pool.query("SELECT * FROM gpu_fleet");
|
|
101
|
-
for (const r of rows) {
|
|
102
|
-
this.register(r.instance_id, {
|
|
103
|
-
host: r.host, port: r.port, key: r.internal_key,
|
|
104
|
-
model: r.model, role: r.role, autoStop: r.auto_stop,
|
|
105
|
-
});
|
|
106
|
-
const gpu = this.gpus.get(r.instance_id);
|
|
107
|
-
if (gpu) gpu.status = r.status;
|
|
108
|
-
if (gpu) gpu.vastId = r.vast_id;
|
|
109
|
-
if (gpu) gpu.gpuName = r.gpu_name;
|
|
110
|
-
}
|
|
111
|
-
} catch (err) {
|
|
112
|
-
console.log("[GPU Fleet] DB load failed:", err.message);
|
|
113
|
-
}
|
|
114
|
-
return this;
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
/**
|
|
118
|
-
* Start health polling.
|
|
119
|
-
*/
|
|
120
|
-
startPolling() {
|
|
121
|
-
if (this._pollTimer) return;
|
|
122
|
-
this._pollTimer = setInterval(() => this._healthCheck(), POLL_INTERVAL_MS);
|
|
123
|
-
this._healthCheck(); // immediate first check
|
|
124
|
-
return this;
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
stopPolling() {
|
|
128
|
-
if (this._pollTimer) { clearInterval(this._pollTimer); this._pollTimer = null; }
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
/**
|
|
132
|
-
* Get a healthy GPU for inference (round-robin).
|
|
133
|
-
* Returns { host, port, key, instanceId } or null if none available.
|
|
134
|
-
*/
|
|
135
|
-
getAvailable() {
|
|
136
|
-
const healthy = Array.from(this.gpus.values()).filter(g => g.status === "healthy");
|
|
137
|
-
if (healthy.length === 0) return null;
|
|
138
|
-
this._roundRobinIndex = (this._roundRobinIndex + 1) % healthy.length;
|
|
139
|
-
const gpu = healthy[this._roundRobinIndex];
|
|
140
|
-
gpu.lastUsed = Date.now();
|
|
141
|
-
return { host: gpu.host, port: gpu.port, key: gpu.key, instanceId: gpu.instanceId, model: gpu.model };
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
/**
|
|
145
|
-
* Start a stopped GPU instance via Vast API.
|
|
146
|
-
*/
|
|
147
|
-
async startGpu(instanceId) {
|
|
148
|
-
const gpu = this.gpus.get(String(instanceId));
|
|
149
|
-
if (!gpu) throw new Error(`GPU ${instanceId} not registered`);
|
|
150
|
-
if (gpu.status === "healthy" || gpu.status === "starting") return gpu;
|
|
151
|
-
|
|
152
|
-
gpu.status = "starting";
|
|
153
|
-
gpu.coldStartMs = null;
|
|
154
|
-
const startTime = Date.now();
|
|
155
|
-
|
|
156
|
-
const vastId = gpu.vastId || instanceId;
|
|
157
|
-
try {
|
|
158
|
-
await this._vastApi("PUT", `/instances/${vastId}/`, { state: "running" });
|
|
159
|
-
|
|
160
|
-
// Poll until healthy (max 60s)
|
|
161
|
-
for (let i = 0; i < 120; i++) {
|
|
162
|
-
await new Promise(r => setTimeout(r, 500));
|
|
163
|
-
try {
|
|
164
|
-
const res = await this._httpGet(gpu.host, gpu.port, "/v1/models", gpu.key);
|
|
165
|
-
if (res && res.includes("gemma")) {
|
|
166
|
-
gpu.status = "healthy";
|
|
167
|
-
gpu.coldStartMs = Date.now() - startTime;
|
|
168
|
-
gpu.lastHealth = Date.now();
|
|
169
|
-
console.log(`[GPU Fleet] ${instanceId} started in ${gpu.coldStartMs}ms`);
|
|
170
|
-
return gpu;
|
|
171
|
-
}
|
|
172
|
-
} catch {}
|
|
173
|
-
}
|
|
174
|
-
gpu.status = "unhealthy";
|
|
175
|
-
throw new Error(`GPU ${instanceId} failed to start within 60s`);
|
|
176
|
-
} catch (err) {
|
|
177
|
-
gpu.status = "unhealthy";
|
|
178
|
-
throw err;
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
/**
|
|
183
|
-
* Stop a GPU instance via Vast API.
|
|
184
|
-
*/
|
|
185
|
-
async stopGpu(instanceId) {
|
|
186
|
-
const gpu = this.gpus.get(String(instanceId));
|
|
187
|
-
if (!gpu) throw new Error(`GPU ${instanceId} not registered`);
|
|
188
|
-
|
|
189
|
-
const vastId = gpu.vastId || instanceId;
|
|
190
|
-
try {
|
|
191
|
-
await this._vastApi("PUT", `/instances/${vastId}/`, { state: "stopped" });
|
|
192
|
-
gpu.status = "stopped";
|
|
193
|
-
console.log(`[GPU Fleet] ${instanceId} stopped`);
|
|
194
|
-
} catch (err) {
|
|
195
|
-
console.log(`[GPU Fleet] Stop failed for ${instanceId}:`, err.message);
|
|
196
|
-
}
|
|
197
|
-
return gpu;
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
/**
|
|
201
|
-
* Auto-scale: start burst GPUs when needed, stop idle ones.
|
|
202
|
-
*/
|
|
203
|
-
async autoScale(queueLength) {
|
|
204
|
-
// Scale up: start a stopped GPU if queue is long
|
|
205
|
-
if (queueLength >= SCALE_UP_QUEUE) {
|
|
206
|
-
const stopped = Array.from(this.gpus.values()).find(g => g.status === "stopped" && g.autoStop);
|
|
207
|
-
if (stopped) {
|
|
208
|
-
console.log(`[GPU Fleet] Queue at ${queueLength}, starting burst GPU ${stopped.instanceId}`);
|
|
209
|
-
try { await this.startGpu(stopped.instanceId); } catch (e) { console.log("[GPU Fleet] Scale-up failed:", e.message); }
|
|
210
|
-
}
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
// Scale down: stop idle burst GPUs
|
|
214
|
-
const now = Date.now();
|
|
215
|
-
for (const gpu of this.gpus.values()) {
|
|
216
|
-
if (gpu.autoStop && gpu.status === "healthy" && gpu.lastUsed > 0 && (now - gpu.lastUsed) > IDLE_STOP_MS) {
|
|
217
|
-
console.log(`[GPU Fleet] ${gpu.instanceId} idle for ${Math.round((now - gpu.lastUsed) / 1000)}s, stopping`);
|
|
218
|
-
try { await this.stopGpu(gpu.instanceId); } catch {}
|
|
219
|
-
}
|
|
220
|
-
}
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
/**
|
|
224
|
-
* Get fleet status for dashboard/API.
|
|
225
|
-
*/
|
|
226
|
-
getStatus() {
|
|
227
|
-
const gpus = Array.from(this.gpus.values()).map(g => ({
|
|
228
|
-
instanceId: g.instanceId,
|
|
229
|
-
vastId: g.vastId,
|
|
230
|
-
gpuName: g.gpuName,
|
|
231
|
-
host: g.host,
|
|
232
|
-
port: g.port,
|
|
233
|
-
model: g.model,
|
|
234
|
-
role: g.role,
|
|
235
|
-
status: g.status,
|
|
236
|
-
autoStop: g.autoStop,
|
|
237
|
-
lastUsed: g.lastUsed ? new Date(g.lastUsed).toISOString() : null,
|
|
238
|
-
lastHealth: g.lastHealth ? new Date(g.lastHealth).toISOString() : null,
|
|
239
|
-
coldStartMs: g.coldStartMs,
|
|
240
|
-
}));
|
|
241
|
-
return {
|
|
242
|
-
total: gpus.length,
|
|
243
|
-
healthy: gpus.filter(g => g.status === "healthy").length,
|
|
244
|
-
stopped: gpus.filter(g => g.status === "stopped").length,
|
|
245
|
-
starting: gpus.filter(g => g.status === "starting").length,
|
|
246
|
-
gpus,
|
|
247
|
-
};
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
// ── Private ──
|
|
251
|
-
|
|
252
|
-
async _healthCheck() {
|
|
253
|
-
for (const gpu of this.gpus.values()) {
|
|
254
|
-
if (gpu.status === "stopped" || gpu.status === "starting") continue;
|
|
255
|
-
try {
|
|
256
|
-
const res = await this._httpGet(gpu.host, gpu.port, "/v1/models", gpu.key);
|
|
257
|
-
if (res && (res.includes("gemma") || res.includes("wolverine"))) {
|
|
258
|
-
gpu.status = "healthy";
|
|
259
|
-
gpu.lastHealth = Date.now();
|
|
260
|
-
} else {
|
|
261
|
-
gpu.status = "unhealthy";
|
|
262
|
-
}
|
|
263
|
-
} catch {
|
|
264
|
-
gpu.status = "unhealthy";
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
_vastApi(method, path, body) {
|
|
270
|
-
return new Promise((resolve, reject) => {
|
|
271
|
-
const bodyStr = body ? JSON.stringify(body) : null;
|
|
272
|
-
const req = https.request({
|
|
273
|
-
hostname: "cloud.vast.ai",
|
|
274
|
-
path: `/api/v0${path}`,
|
|
275
|
-
method,
|
|
276
|
-
timeout: 15000,
|
|
277
|
-
headers: {
|
|
278
|
-
"Authorization": `Bearer ${VAST_KEY}`,
|
|
279
|
-
"Content-Type": "application/json",
|
|
280
|
-
...(bodyStr ? { "Content-Length": Buffer.byteLength(bodyStr) } : {}),
|
|
281
|
-
},
|
|
282
|
-
}, (res) => {
|
|
283
|
-
let data = "";
|
|
284
|
-
res.on("data", c => { data += c; });
|
|
285
|
-
res.on("end", () => {
|
|
286
|
-
try { resolve(JSON.parse(data)); } catch { resolve({ raw: data }); }
|
|
287
|
-
});
|
|
288
|
-
});
|
|
289
|
-
req.on("error", reject);
|
|
290
|
-
req.on("timeout", () => { req.destroy(); reject(new Error("Vast API timeout")); });
|
|
291
|
-
if (bodyStr) req.write(bodyStr);
|
|
292
|
-
req.end();
|
|
293
|
-
});
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
_httpGet(host, port, path, key) {
|
|
297
|
-
return new Promise((resolve, reject) => {
|
|
298
|
-
const req = http.request({
|
|
299
|
-
hostname: host, port, path, method: "GET", timeout: 5000,
|
|
300
|
-
headers: key ? { "Authorization": `Bearer ${key}` } : {},
|
|
301
|
-
}, (res) => {
|
|
302
|
-
let data = "";
|
|
303
|
-
res.on("data", c => { data += c; });
|
|
304
|
-
res.on("end", () => resolve(data));
|
|
305
|
-
});
|
|
306
|
-
req.on("error", reject);
|
|
307
|
-
req.on("timeout", () => { req.destroy(); reject(new Error("timeout")); });
|
|
308
|
-
req.end();
|
|
309
|
-
});
|
|
310
|
-
}
|
|
311
|
-
}
|
|
312
|
-
|
|
313
|
-
module.exports = { GpuFleet };
|
package/server/routes/fleet.js
DELETED
|
@@ -1,167 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* GPU Fleet Management API — admin routes for controlling inference GPUs.
|
|
3
|
-
*
|
|
4
|
-
* Endpoints:
|
|
5
|
-
* GET /status — fleet overview (all GPUs, health, queue)
|
|
6
|
-
* POST /start/:id — start a stopped GPU
|
|
7
|
-
* POST /stop/:id — stop a running GPU
|
|
8
|
-
* POST /register — add a new GPU to the fleet
|
|
9
|
-
* POST /remove/:id — remove a GPU from the fleet
|
|
10
|
-
* POST /scale — trigger auto-scale check
|
|
11
|
-
* GET /benchmark/:id — run inference benchmark on a GPU
|
|
12
|
-
*/
|
|
13
|
-
|
|
14
|
-
async function routes(fastify) {
|
|
15
|
-
const { pool } = require("../lib/db");
|
|
16
|
-
|
|
17
|
-
// Fleet instance is attached to fastify by index.js
|
|
18
|
-
function getFleet() {
|
|
19
|
-
return fastify.gpuFleet;
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
// Admin auth
|
|
23
|
-
async function requireAdmin(request, reply) {
|
|
24
|
-
const settings = require("../config/settings.json");
|
|
25
|
-
const token = request.headers.authorization?.replace("Bearer ", "") || request.headers["x-api-key"];
|
|
26
|
-
if (token !== settings.platform?.apiKey) {
|
|
27
|
-
return reply.code(401).send({ error: "Admin access required" });
|
|
28
|
-
}
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
// GET /status — fleet overview
|
|
32
|
-
fastify.get("/status", { preHandler: requireAdmin }, async (request, reply) => {
|
|
33
|
-
const fleet = getFleet();
|
|
34
|
-
return fleet.getStatus();
|
|
35
|
-
});
|
|
36
|
-
|
|
37
|
-
// POST /start/:id — start a GPU
|
|
38
|
-
fastify.post("/start/:id", { preHandler: requireAdmin }, async (request, reply) => {
|
|
39
|
-
const fleet = getFleet();
|
|
40
|
-
const { id } = request.params;
|
|
41
|
-
try {
|
|
42
|
-
const gpu = await fleet.startGpu(id);
|
|
43
|
-
// Update DB
|
|
44
|
-
await pool.query("UPDATE gpu_fleet SET status = 'healthy' WHERE instance_id = $1", [id]).catch(() => {});
|
|
45
|
-
return { status: "started", instanceId: id, coldStartMs: gpu.coldStartMs };
|
|
46
|
-
} catch (err) {
|
|
47
|
-
return reply.code(500).send({ error: err.message });
|
|
48
|
-
}
|
|
49
|
-
});
|
|
50
|
-
|
|
51
|
-
// POST /stop/:id — stop a GPU
|
|
52
|
-
fastify.post("/stop/:id", { preHandler: requireAdmin }, async (request, reply) => {
|
|
53
|
-
const fleet = getFleet();
|
|
54
|
-
const { id } = request.params;
|
|
55
|
-
try {
|
|
56
|
-
await fleet.stopGpu(id);
|
|
57
|
-
await pool.query("UPDATE gpu_fleet SET status = 'stopped' WHERE instance_id = $1", [id]).catch(() => {});
|
|
58
|
-
return { status: "stopped", instanceId: id };
|
|
59
|
-
} catch (err) {
|
|
60
|
-
return reply.code(500).send({ error: err.message });
|
|
61
|
-
}
|
|
62
|
-
});
|
|
63
|
-
|
|
64
|
-
// POST /register — add a GPU to the fleet
|
|
65
|
-
fastify.post("/register", { preHandler: requireAdmin }, async (request, reply) => {
|
|
66
|
-
const fleet = getFleet();
|
|
67
|
-
const { instanceId, vastId, host, port, key, model, role, gpuName, autoStop } = request.body || {};
|
|
68
|
-
if (!instanceId || !host || !key) {
|
|
69
|
-
return reply.code(400).send({ error: "instanceId, host, and key required" });
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
fleet.register(instanceId, { host, port: port || 8080, key, model, role, autoStop: autoStop !== false });
|
|
73
|
-
const gpu = fleet.gpus.get(instanceId);
|
|
74
|
-
if (vastId) gpu.vastId = vastId;
|
|
75
|
-
if (gpuName) gpu.gpuName = gpuName;
|
|
76
|
-
|
|
77
|
-
// Save to DB
|
|
78
|
-
await pool.query(
|
|
79
|
-
`INSERT INTO gpu_fleet (instance_id, vast_id, host, port, internal_key, model, role, auto_stop, gpu_name)
|
|
80
|
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
|
|
81
|
-
ON CONFLICT (instance_id) DO UPDATE SET
|
|
82
|
-
host = $3, port = $4, internal_key = $5, model = $6, role = $7, auto_stop = $8, gpu_name = $9, vast_id = $2`,
|
|
83
|
-
[instanceId, vastId || null, host, port || 8080, key, model || "wolverine-test-1", role || "general", autoStop !== false, gpuName || null]
|
|
84
|
-
);
|
|
85
|
-
|
|
86
|
-
return { registered: instanceId, fleet: fleet.getStatus() };
|
|
87
|
-
});
|
|
88
|
-
|
|
89
|
-
// POST /remove/:id — remove a GPU from the fleet
|
|
90
|
-
fastify.post("/remove/:id", { preHandler: requireAdmin }, async (request, reply) => {
|
|
91
|
-
const fleet = getFleet();
|
|
92
|
-
const { id } = request.params;
|
|
93
|
-
fleet.gpus.delete(id);
|
|
94
|
-
await pool.query("DELETE FROM gpu_fleet WHERE instance_id = $1", [id]).catch(() => {});
|
|
95
|
-
return { removed: id };
|
|
96
|
-
});
|
|
97
|
-
|
|
98
|
-
// POST /scale — trigger auto-scale
|
|
99
|
-
fastify.post("/scale", { preHandler: requireAdmin }, async (request, reply) => {
|
|
100
|
-
const fleet = getFleet();
|
|
101
|
-
const queueLength = request.body?.queueLength || 0;
|
|
102
|
-
await fleet.autoScale(queueLength);
|
|
103
|
-
return fleet.getStatus();
|
|
104
|
-
});
|
|
105
|
-
|
|
106
|
-
// GET /benchmark/:id — quick benchmark
|
|
107
|
-
fastify.get("/benchmark/:id", { preHandler: requireAdmin }, async (request, reply) => {
|
|
108
|
-
const fleet = getFleet();
|
|
109
|
-
const gpu = fleet.gpus.get(request.params.id);
|
|
110
|
-
if (!gpu || gpu.status !== "healthy") {
|
|
111
|
-
return reply.code(400).send({ error: "GPU not available" });
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
const http = require("http");
|
|
115
|
-
const results = [];
|
|
116
|
-
|
|
117
|
-
for (const prompt of ["2+2?", "Write isPrime in JS.", "Explain TCP in 1 sentence."]) {
|
|
118
|
-
const start = Date.now();
|
|
119
|
-
try {
|
|
120
|
-
const body = JSON.stringify({
|
|
121
|
-
model: gpu.model,
|
|
122
|
-
messages: [{ role: "user", content: prompt }],
|
|
123
|
-
max_tokens: 50, temperature: 0,
|
|
124
|
-
});
|
|
125
|
-
const res = await new Promise((resolve, reject) => {
|
|
126
|
-
const req = http.request({
|
|
127
|
-
hostname: gpu.host, port: gpu.port, path: "/v1/chat/completions",
|
|
128
|
-
method: "POST", timeout: 30000,
|
|
129
|
-
headers: { "Content-Type": "application/json", "Authorization": `Bearer ${gpu.key}`, "Content-Length": Buffer.byteLength(body) },
|
|
130
|
-
}, (res) => {
|
|
131
|
-
let data = "";
|
|
132
|
-
res.on("data", c => { data += c; });
|
|
133
|
-
res.on("end", () => { try { resolve(JSON.parse(data)); } catch { resolve(null); } });
|
|
134
|
-
});
|
|
135
|
-
req.on("error", reject);
|
|
136
|
-
req.write(body);
|
|
137
|
-
req.end();
|
|
138
|
-
});
|
|
139
|
-
|
|
140
|
-
const elapsed = Date.now() - start;
|
|
141
|
-
const usage = res?.usage || {};
|
|
142
|
-
const tokOut = usage.completion_tokens || 0;
|
|
143
|
-
results.push({
|
|
144
|
-
prompt: prompt.slice(0, 30),
|
|
145
|
-
latencyMs: elapsed,
|
|
146
|
-
tokensOut: tokOut,
|
|
147
|
-
tokPerSec: tokOut > 0 ? Math.round(tokOut / (elapsed / 1000)) : 0,
|
|
148
|
-
response: res?.choices?.[0]?.message?.content?.slice(0, 60),
|
|
149
|
-
});
|
|
150
|
-
} catch (err) {
|
|
151
|
-
results.push({ prompt: prompt.slice(0, 30), error: err.message });
|
|
152
|
-
}
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
const avgTokPerSec = results.filter(r => r.tokPerSec).reduce((s, r) => s + r.tokPerSec, 0) / Math.max(results.filter(r => r.tokPerSec).length, 1);
|
|
156
|
-
|
|
157
|
-
return {
|
|
158
|
-
instanceId: request.params.id,
|
|
159
|
-
gpu: gpu.gpuName,
|
|
160
|
-
model: gpu.model,
|
|
161
|
-
results,
|
|
162
|
-
avgTokPerSec: Math.round(avgTokPerSec),
|
|
163
|
-
};
|
|
164
|
-
});
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
module.exports = routes;
|
|
@@ -1,329 +0,0 @@
|
|
|
1
|
-
const https = require("https");
|
|
2
|
-
const http = require("http");
|
|
3
|
-
const crypto = require("crypto");
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* Wolverine Inference API
|
|
7
|
-
*
|
|
8
|
-
* Credit system: $1 = 100 credits. 1 credit = $0.01 of compute.
|
|
9
|
-
* Token pricing (in credits per million tokens):
|
|
10
|
-
* wolverine-test-1: 1 credit input / 4 credits output per 1M tokens
|
|
11
|
-
* (= $0.01/$0.04 per 1M — 15x cheaper than gpt-4o-mini, 80x cheaper than haiku)
|
|
12
|
-
*
|
|
13
|
-
* Rate limiting: per API key, configurable per tier.
|
|
14
|
-
* Queue: when GPU is at capacity, requests queue with timeout.
|
|
15
|
-
*/
|
|
16
|
-
|
|
17
|
-
const INFERENCE_URL = process.env.WOLVERINE_INFERENCE_URL || "http://ssh8.vast.ai:24233";
|
|
18
|
-
const GPU_KEY = process.env.WOLVERINE_GPU_KEY || "";
|
|
19
|
-
|
|
20
|
-
// Pricing in CREDITS per million tokens ($1 = 100 credits)
|
|
21
|
-
const MODEL_PRICING = {
|
|
22
|
-
"wolverine-test-1": { input: 1.0, output: 4.0 }, // $0.01/$0.04 per 1M
|
|
23
|
-
"wolverine-coding": { input: 1.0, output: 4.0 },
|
|
24
|
-
"wolverine-reasoning": { input: 2.5, output: 10.0 }, // heavier model when available
|
|
25
|
-
};
|
|
26
|
-
|
|
27
|
-
const MODEL_MAP = {
|
|
28
|
-
"wolverine-test-1": "wolverine-test-1",
|
|
29
|
-
"wolverine-coding": "wolverine-test-1",
|
|
30
|
-
"wolverine-reasoning": "wolverine-test-1",
|
|
31
|
-
};
|
|
32
|
-
|
|
33
|
-
const TIER_LIMITS = {
|
|
34
|
-
free: { rpm: 10, maxTokens: 1024 },
|
|
35
|
-
starter: { rpm: 60, maxTokens: 4096 },
|
|
36
|
-
pro: { rpm: 300, maxTokens: 4096 },
|
|
37
|
-
admin: { rpm: 9999, maxTokens: 4096 },
|
|
38
|
-
};
|
|
39
|
-
|
|
40
|
-
function tokenCost(model, inputTokens, outputTokens) {
|
|
41
|
-
const p = MODEL_PRICING[model] || MODEL_PRICING["wolverine-test-1"];
|
|
42
|
-
return ((inputTokens / 1_000_000) * p.input) + ((outputTokens / 1_000_000) * p.output);
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
// ── Request Queue (handles GPU saturation) ──
|
|
46
|
-
const queue = [];
|
|
47
|
-
let activeRequests = 0;
|
|
48
|
-
const MAX_CONCURRENT = 8; // vLLM max-num-seqs
|
|
49
|
-
const QUEUE_TIMEOUT_MS = 30000;
|
|
50
|
-
|
|
51
|
-
function enqueue() {
|
|
52
|
-
return new Promise((resolve, reject) => {
|
|
53
|
-
if (activeRequests < MAX_CONCURRENT) {
|
|
54
|
-
activeRequests++;
|
|
55
|
-
resolve();
|
|
56
|
-
return;
|
|
57
|
-
}
|
|
58
|
-
const timer = setTimeout(() => {
|
|
59
|
-
const idx = queue.indexOf(entry);
|
|
60
|
-
if (idx >= 0) queue.splice(idx, 1);
|
|
61
|
-
reject(new Error("Queue timeout — GPU at capacity. Try again in a few seconds."));
|
|
62
|
-
}, QUEUE_TIMEOUT_MS);
|
|
63
|
-
const entry = { resolve: () => { clearTimeout(timer); activeRequests++; resolve(); }, reject };
|
|
64
|
-
queue.push(entry);
|
|
65
|
-
});
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
function dequeue() {
|
|
69
|
-
activeRequests = Math.max(0, activeRequests - 1);
|
|
70
|
-
if (queue.length > 0) {
|
|
71
|
-
const next = queue.shift();
|
|
72
|
-
next.resolve();
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
async function routes(fastify) {
|
|
77
|
-
const { pool } = require("../lib/db");
|
|
78
|
-
|
|
79
|
-
// Rate limit state (in-memory)
|
|
80
|
-
const rateWindows = new Map();
|
|
81
|
-
|
|
82
|
-
async function authenticate(request, reply) {
|
|
83
|
-
const apiKey = request.headers.authorization?.replace("Bearer ", "") || request.headers["x-api-key"];
|
|
84
|
-
if (!apiKey) return reply.code(401).send({ error: { message: "API key required. Pass via Authorization: Bearer <key>", type: "auth_error" } });
|
|
85
|
-
|
|
86
|
-
// Platform key bypass
|
|
87
|
-
let settings = {};
|
|
88
|
-
try { settings = require("../config/settings.json"); } catch {}
|
|
89
|
-
if (apiKey === settings.platform?.apiKey) {
|
|
90
|
-
request.account = { api_key: apiKey, owner: "platform", tier: "admin", credits_remaining: 999999, rate_limit_rpm: 9999 };
|
|
91
|
-
return;
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
const result = await pool.query("SELECT * FROM api_credits WHERE api_key = $1", [apiKey]);
|
|
95
|
-
if (result.rows.length === 0) return reply.code(401).send({ error: { message: "Invalid API key", type: "auth_error" } });
|
|
96
|
-
|
|
97
|
-
const account = result.rows[0];
|
|
98
|
-
|
|
99
|
-
// Credit check
|
|
100
|
-
if (parseFloat(account.credits_remaining) <= 0) {
|
|
101
|
-
return reply.code(402).send({ error: { message: "Insufficient credits. Add credits at wolverinenode.xyz", type: "billing_error", credits_remaining: 0 } });
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
// Rate limit
|
|
105
|
-
const now = Date.now();
|
|
106
|
-
const window = rateWindows.get(apiKey) || { count: 0, resetAt: now + 60000 };
|
|
107
|
-
if (now > window.resetAt) { window.count = 0; window.resetAt = now + 60000; }
|
|
108
|
-
const limit = account.rate_limit_rpm || TIER_LIMITS[account.tier]?.rpm || 10;
|
|
109
|
-
if (window.count >= limit) {
|
|
110
|
-
const retryAfter = Math.ceil((window.resetAt - now) / 1000);
|
|
111
|
-
return reply.code(429).send({ error: { message: `Rate limit: ${limit} requests/min. Retry in ${retryAfter}s`, type: "rate_limit", retry_after: retryAfter } });
|
|
112
|
-
}
|
|
113
|
-
window.count++;
|
|
114
|
-
rateWindows.set(apiKey, window);
|
|
115
|
-
|
|
116
|
-
request.account = account;
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
// ── POST /chat/completions ──
|
|
120
|
-
fastify.post("/chat/completions", { preHandler: authenticate }, async (request, reply) => {
|
|
121
|
-
const body = request.body || {};
|
|
122
|
-
const requestedModel = body.model || "wolverine-test-1";
|
|
123
|
-
const account = request.account;
|
|
124
|
-
const tier = TIER_LIMITS[account.tier] || TIER_LIMITS.free;
|
|
125
|
-
const startMs = Date.now();
|
|
126
|
-
|
|
127
|
-
// Enforce max tokens per tier
|
|
128
|
-
if (body.max_tokens && body.max_tokens > tier.maxTokens) {
|
|
129
|
-
body.max_tokens = tier.maxTokens;
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
// Map model name for backend
|
|
133
|
-
const backendBody = { ...body, model: MODEL_MAP[requestedModel] || requestedModel };
|
|
134
|
-
|
|
135
|
-
// Queue if GPU saturated
|
|
136
|
-
try {
|
|
137
|
-
await enqueue();
|
|
138
|
-
} catch (err) {
|
|
139
|
-
return reply.code(503).send({ error: { message: err.message, type: "capacity_error", queue_length: queue.length } });
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
try {
|
|
143
|
-
const result = await proxyToInference("/v1/chat/completions", backendBody);
|
|
144
|
-
const latencyMs = Date.now() - startMs;
|
|
145
|
-
|
|
146
|
-
const usage = result.usage || {};
|
|
147
|
-
const inputTokens = usage.prompt_tokens || 0;
|
|
148
|
-
const outputTokens = usage.completion_tokens || 0;
|
|
149
|
-
const cost = tokenCost(requestedModel, inputTokens, outputTokens);
|
|
150
|
-
|
|
151
|
-
// Bill credits (skip for platform)
|
|
152
|
-
if (account.owner !== "platform") {
|
|
153
|
-
await pool.query(
|
|
154
|
-
"UPDATE api_credits SET credits_remaining = credits_remaining - $1, credits_used = credits_used + $1, last_used = NOW() WHERE api_key = $2",
|
|
155
|
-
[cost, account.api_key]
|
|
156
|
-
);
|
|
157
|
-
await pool.query(
|
|
158
|
-
"INSERT INTO api_usage_log (api_key, model, input_tokens, output_tokens, total_tokens, cost, latency_ms, success, endpoint) VALUES ($1, $2, $3, $4, $5, $6, $7, true, $8)",
|
|
159
|
-
[account.api_key, requestedModel, inputTokens, outputTokens, inputTokens + outputTokens, cost, latencyMs, "/v1/chat/completions"]
|
|
160
|
-
);
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
// Rewrite response
|
|
164
|
-
if (result.model) result.model = requestedModel;
|
|
165
|
-
result.x_wolverine = {
|
|
166
|
-
credits_used: Math.round(cost * 1000000) / 1000000,
|
|
167
|
-
credits_remaining: Math.max(0, parseFloat(account.credits_remaining) - cost),
|
|
168
|
-
latency_ms: latencyMs,
|
|
169
|
-
queued: activeRequests > MAX_CONCURRENT,
|
|
170
|
-
};
|
|
171
|
-
|
|
172
|
-
return result;
|
|
173
|
-
} catch (err) {
|
|
174
|
-
if (account.owner !== "platform") {
|
|
175
|
-
await pool.query(
|
|
176
|
-
"INSERT INTO api_usage_log (api_key, model, input_tokens, output_tokens, total_tokens, cost, latency_ms, success, endpoint) VALUES ($1, $2, 0, 0, 0, 0, $3, false, $4)",
|
|
177
|
-
[account.api_key, requestedModel, Date.now() - startMs, "/v1/chat/completions"]
|
|
178
|
-
).catch(() => {});
|
|
179
|
-
}
|
|
180
|
-
return reply.code(502).send({ error: { message: `Inference error: ${err.message}`, type: "inference_error" } });
|
|
181
|
-
} finally {
|
|
182
|
-
dequeue();
|
|
183
|
-
}
|
|
184
|
-
});
|
|
185
|
-
|
|
186
|
-
// ── GET /models ──
|
|
187
|
-
fastify.get("/models", async () => ({
|
|
188
|
-
object: "list",
|
|
189
|
-
data: Object.entries(MODEL_PRICING).map(([id, p]) => ({
|
|
190
|
-
id, object: "model", owned_by: "wolverine",
|
|
191
|
-
created: Math.floor(Date.now() / 1000),
|
|
192
|
-
pricing: { input_credits_per_million: p.input, output_credits_per_million: p.output, usd_per_credit: 0.01 },
|
|
193
|
-
})),
|
|
194
|
-
}));
|
|
195
|
-
|
|
196
|
-
// ── POST /keys/create — generate new API key ──
|
|
197
|
-
fastify.post("/keys/create", { preHandler: authenticate }, async (request, reply) => {
|
|
198
|
-
const account = request.account;
|
|
199
|
-
if (account.tier !== "admin") return reply.code(403).send({ error: { message: "Only admins can create API keys", type: "auth_error" } });
|
|
200
|
-
|
|
201
|
-
const { owner, email, credits, tier, rpm } = request.body || {};
|
|
202
|
-
if (!owner) return reply.code(400).send({ error: { message: "owner required", type: "validation_error" } });
|
|
203
|
-
|
|
204
|
-
const newKey = "wlv_" + crypto.randomBytes(24).toString("hex");
|
|
205
|
-
const keyTier = tier || "free";
|
|
206
|
-
const keyCredits = credits || (keyTier === "free" ? 10 : 0);
|
|
207
|
-
const keyRpm = rpm || TIER_LIMITS[keyTier]?.rpm || 10;
|
|
208
|
-
|
|
209
|
-
await pool.query(
|
|
210
|
-
"INSERT INTO api_credits (api_key, owner, email, credits_remaining, tier, plan_name, rate_limit_rpm) VALUES ($1, $2, $3, $4, $5, $6, $7)",
|
|
211
|
-
[newKey, owner, email || null, keyCredits, keyTier, keyTier, keyRpm]
|
|
212
|
-
);
|
|
213
|
-
|
|
214
|
-
return { api_key: newKey, owner, tier: keyTier, credits: keyCredits, rate_limit_rpm: keyRpm };
|
|
215
|
-
});
|
|
216
|
-
|
|
217
|
-
// ── POST /keys/add-credits — add credits to a key ──
|
|
218
|
-
fastify.post("/keys/add-credits", { preHandler: authenticate }, async (request, reply) => {
|
|
219
|
-
const account = request.account;
|
|
220
|
-
if (account.tier !== "admin") return reply.code(403).send({ error: { message: "Only admins can add credits", type: "auth_error" } });
|
|
221
|
-
|
|
222
|
-
const { api_key, credits } = request.body || {};
|
|
223
|
-
if (!api_key || !credits) return reply.code(400).send({ error: { message: "api_key and credits required" } });
|
|
224
|
-
|
|
225
|
-
await pool.query("UPDATE api_credits SET credits_remaining = credits_remaining + $1 WHERE api_key = $2", [credits, api_key]);
|
|
226
|
-
const updated = await pool.query("SELECT credits_remaining FROM api_credits WHERE api_key = $1", [api_key]);
|
|
227
|
-
return { api_key, credits_added: credits, credits_remaining: parseFloat(updated.rows[0]?.credits_remaining || 0) };
|
|
228
|
-
});
|
|
229
|
-
|
|
230
|
-
// ── GET /keys — list all keys (admin only) ──
|
|
231
|
-
fastify.get("/keys", { preHandler: authenticate }, async (request, reply) => {
|
|
232
|
-
if (request.account.tier !== "admin") return reply.code(403).send({ error: { message: "Admin only" } });
|
|
233
|
-
const { rows } = await pool.query("SELECT api_key, owner, email, tier, credits_remaining, credits_used, rate_limit_rpm, created_at, last_used FROM api_credits ORDER BY created_at DESC");
|
|
234
|
-
return { keys: rows };
|
|
235
|
-
});
|
|
236
|
-
|
|
237
|
-
// ── GET /credits ──
|
|
238
|
-
fastify.get("/credits", { preHandler: authenticate }, async (request, reply) => {
|
|
239
|
-
const a = request.account;
|
|
240
|
-
return {
|
|
241
|
-
credits_remaining: parseFloat(a.credits_remaining),
|
|
242
|
-
credits_used: parseFloat(a.credits_used || 0),
|
|
243
|
-
usd_remaining: parseFloat(a.credits_remaining) * 0.01,
|
|
244
|
-
usd_used: parseFloat(a.credits_used || 0) * 0.01,
|
|
245
|
-
tier: a.tier, rate_limit_rpm: a.rate_limit_rpm, owner: a.owner,
|
|
246
|
-
};
|
|
247
|
-
});
|
|
248
|
-
|
|
249
|
-
// ── GET /usage ──
|
|
250
|
-
fastify.get("/usage", { preHandler: authenticate }, async (request, reply) => {
|
|
251
|
-
const apiKey = request.account.api_key;
|
|
252
|
-
const period = request.query.period || "7d";
|
|
253
|
-
const interval = { "1h": "1 hour", "1d": "1 day", "7d": "7 days", "30d": "30 days" }[period] || "7 days";
|
|
254
|
-
|
|
255
|
-
const summary = await pool.query(
|
|
256
|
-
`SELECT model, COUNT(*) AS calls, SUM(input_tokens) AS input, SUM(output_tokens) AS output,
|
|
257
|
-
SUM(total_tokens) AS tokens, SUM(cost) AS credits_spent, AVG(latency_ms) AS avg_latency,
|
|
258
|
-
COUNT(*) FILTER (WHERE success) AS successes
|
|
259
|
-
FROM api_usage_log WHERE api_key = $1 AND timestamp > NOW() - $2::interval
|
|
260
|
-
GROUP BY model ORDER BY credits_spent DESC`, [apiKey, interval]
|
|
261
|
-
);
|
|
262
|
-
|
|
263
|
-
const timeline = await pool.query(
|
|
264
|
-
`SELECT date_trunc('hour', timestamp) AS hour, SUM(cost) AS credits, SUM(total_tokens) AS tokens, COUNT(*) AS calls
|
|
265
|
-
FROM api_usage_log WHERE api_key = $1 AND timestamp > NOW() - $2::interval
|
|
266
|
-
GROUP BY hour ORDER BY hour`, [apiKey, interval]
|
|
267
|
-
);
|
|
268
|
-
|
|
269
|
-
const totalCredits = summary.rows.reduce((s, r) => s + parseFloat(r.credits_spent || 0), 0);
|
|
270
|
-
|
|
271
|
-
return {
|
|
272
|
-
period,
|
|
273
|
-
total_credits_spent: Math.round(totalCredits * 1000000) / 1000000,
|
|
274
|
-
total_usd_spent: Math.round(totalCredits * 0.01 * 1000000) / 1000000,
|
|
275
|
-
byModel: summary.rows.map(r => ({
|
|
276
|
-
model: r.model, calls: parseInt(r.calls), input: parseInt(r.input || 0), output: parseInt(r.output || 0),
|
|
277
|
-
tokens: parseInt(r.tokens || 0), credits_spent: parseFloat(r.credits_spent || 0),
|
|
278
|
-
usd_spent: parseFloat(r.credits_spent || 0) * 0.01,
|
|
279
|
-
avgLatencyMs: Math.round(parseFloat(r.avg_latency || 0)),
|
|
280
|
-
successRate: parseInt(r.calls) > 0 ? parseFloat(((parseInt(r.successes) / parseInt(r.calls)) * 100).toFixed(2)) : 0,
|
|
281
|
-
})),
|
|
282
|
-
timeline: timeline.rows.map(r => ({
|
|
283
|
-
hour: r.hour, credits: parseFloat(r.credits), tokens: parseInt(r.tokens), calls: parseInt(r.calls),
|
|
284
|
-
})),
|
|
285
|
-
queue: { active: activeRequests, waiting: queue.length, max: MAX_CONCURRENT },
|
|
286
|
-
};
|
|
287
|
-
});
|
|
288
|
-
|
|
289
|
-
// ── GET /health ──
|
|
290
|
-
fastify.get("/health", async () => {
|
|
291
|
-
try {
|
|
292
|
-
const result = await proxyToInference("/health", null, "GET");
|
|
293
|
-
return { status: "ok", inference: result, queue: { active: activeRequests, waiting: queue.length, max: MAX_CONCURRENT } };
|
|
294
|
-
} catch (err) {
|
|
295
|
-
return { status: "down", error: err.message, queue: { active: activeRequests, waiting: queue.length, max: MAX_CONCURRENT } };
|
|
296
|
-
}
|
|
297
|
-
});
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
function proxyToInference(path, body, method = "POST") {
|
|
301
|
-
return new Promise((resolve, reject) => {
|
|
302
|
-
const url = new (require("url").URL)(INFERENCE_URL + path);
|
|
303
|
-
const client = url.protocol === "https:" ? https : http;
|
|
304
|
-
const bodyStr = body ? JSON.stringify(body) : null;
|
|
305
|
-
|
|
306
|
-
const req = client.request({
|
|
307
|
-
hostname: url.hostname,
|
|
308
|
-
port: url.port || (url.protocol === "https:" ? 443 : 80),
|
|
309
|
-
path: url.pathname,
|
|
310
|
-
method,
|
|
311
|
-
timeout: 120000,
|
|
312
|
-
headers: {
|
|
313
|
-
"Content-Type": "application/json",
|
|
314
|
-
...(GPU_KEY ? { "Authorization": `Bearer ${GPU_KEY}` } : {}),
|
|
315
|
-
...(bodyStr ? { "Content-Length": Buffer.byteLength(bodyStr) } : {}),
|
|
316
|
-
},
|
|
317
|
-
}, (res) => {
|
|
318
|
-
let data = "";
|
|
319
|
-
res.on("data", (c) => { data += c; });
|
|
320
|
-
res.on("end", () => { try { resolve(JSON.parse(data)); } catch { resolve({ raw: data }); } });
|
|
321
|
-
});
|
|
322
|
-
req.on("error", reject);
|
|
323
|
-
req.on("timeout", () => { req.destroy(); reject(new Error("Inference timeout")); });
|
|
324
|
-
if (bodyStr) req.write(bodyStr);
|
|
325
|
-
req.end();
|
|
326
|
-
});
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
module.exports = routes;
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|