wolverine-ai 3.4.1 → 3.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +5 -0
- package/README.md +10 -6
- package/bin/wolverine.js +11 -1
- package/package.json +1 -3
- package/server/config/settings.json +18 -1
- package/server/routes/inference.js +324 -0
- package/src/agent/agent-engine.js +86 -18
- package/src/agent/goal-loop.js +11 -6
- package/src/brain/brain.js +8 -4
- package/src/brain/embedder.js +1 -1
- package/src/brain/function-map.js +15 -1
- package/src/core/ai-client.js +21 -1
- package/src/core/error-hook.js +17 -1
- package/src/core/models.js +8 -1
- package/src/core/wolverine.js +69 -5
- package/src/dashboard/server.js +2 -2
- package/src/logger/pricing.js +8 -0
- package/src/logger/token-tracker.js +47 -5
- package/src/monitor/perf-monitor.js +1 -1
- package/src/notifications/notifier.js +1 -1
- package/src/platform/telemetry.js +2 -1
- package/src/security/injection-detector.js +1 -1
- package/src/skills/loop-guard.js +9 -2
- package/CLAUDE.md +0 -146
package/.env.example
CHANGED
|
@@ -6,6 +6,11 @@
|
|
|
6
6
|
# Your OpenAI API key (required)
|
|
7
7
|
OPENAI_API_KEY=
|
|
8
8
|
ANTHROPIC_API_KEY=
|
|
9
|
+
|
|
10
|
+
# ── Wolverine Inference (self-hosted models) ─────────────────────
|
|
11
|
+
# Get your API key at wolverinenode.xyz — $1 = 100 credits
|
|
12
|
+
# Set provider to "wolverine" in server/config/settings.json
|
|
13
|
+
WOLVERINE_API_KEY=
|
|
9
14
|
# ── Dashboard Admin Key (make your own) ──────────────────────────────────────────
|
|
10
15
|
# Required for the agent command interface on the dashboard.
|
|
11
16
|
# Generate: node -e "console.log(require('crypto').randomBytes(32).toString('hex'))"
|
package/README.md
CHANGED
|
@@ -70,7 +70,7 @@ wolverine/
|
|
|
70
70
|
│ ├── core/ ← Wolverine engine
|
|
71
71
|
│ │ ├── wolverine.js ← Heal pipeline + goal loop
|
|
72
72
|
│ │ ├── runner.js ← Process manager (PM2-like)
|
|
73
|
-
│ │ ├── ai-client.js ←
|
|
73
|
+
│ │ ├── ai-client.js ← Dual provider client (OpenAI + Anthropic)
|
|
74
74
|
│ │ ├── models.js ← 10-model configuration system
|
|
75
75
|
│ │ ├── verifier.js ← Fix verification (syntax + boot probe)
|
|
76
76
|
│ │ ├── error-parser.js ← Stack trace parsing + error classification
|
|
@@ -81,7 +81,7 @@ wolverine/
|
|
|
81
81
|
│ │ ├── system-info.js ← Machine detection (cores, RAM, cloud, containers)
|
|
82
82
|
│ │ └── cluster-manager.js← Auto-scaling worker management
|
|
83
83
|
│ ├── agent/ ← AI agent system
|
|
84
|
-
│ │ ├── agent-engine.js ← Multi-turn agent with
|
|
84
|
+
│ │ ├── agent-engine.js ← Multi-turn agent with 18 tools + 45s per-call timeout
|
|
85
85
|
│ │ ├── goal-loop.js ← Goal-driven repair loop
|
|
86
86
|
│ │ ├── research-agent.js← Deep research + learning from failures
|
|
87
87
|
│ │ └── sub-agents.js ← 7 specialized sub-agents (explore/plan/fix/verify/...)
|
|
@@ -151,8 +151,9 @@ Server crashes
|
|
|
151
151
|
|
|
152
152
|
Operational Fix (zero AI tokens):
|
|
153
153
|
→ "Cannot find module 'cors'" → npm install cors (instant, free)
|
|
154
|
-
→ ENOENT on config file →
|
|
154
|
+
→ ENOENT on config file → read source code, infer expected fields, create with correct structure
|
|
155
155
|
→ EACCES/EPERM → chmod 755
|
|
156
|
+
→ EADDRINUSE → find and kill stale process on port
|
|
156
157
|
→ If operational fix works → done. No AI needed.
|
|
157
158
|
|
|
158
159
|
Goal Loop (iterate until fixed or exhausted):
|
|
@@ -215,7 +216,7 @@ The AI agent has 18 built-in tools (inspired by [claw-code](https://github.com/u
|
|
|
215
216
|
| `grep_code` | File | Regex search across codebase with context lines |
|
|
216
217
|
| `list_dir` | File | List directory contents with sizes (find misplaced files) |
|
|
217
218
|
| `move_file` | File | Move or rename files (fix structure problems) |
|
|
218
|
-
| `bash_exec` | Shell | Sandboxed shell execution (npm install, chmod, kill, etc.) |
|
|
219
|
+
| `bash_exec` | Shell | Sandboxed shell execution (npm install, chmod, kill, etc.) 30s default, 60s cap |
|
|
219
220
|
| `git_log` | Shell | View recent commit history |
|
|
220
221
|
| `git_diff` | Shell | View uncommitted changes |
|
|
221
222
|
| `inspect_db` | Database | List tables, show schema, run SELECT on SQLite databases |
|
|
@@ -439,8 +440,11 @@ Three layers prevent token waste:
|
|
|
439
440
|
| **Empty stderr guard** | Signal kills, clean shutdowns with no error | $0.00 |
|
|
440
441
|
| **Loop guard** | Same error failing 3+ times in 10min → files bug report, stops healing | $0.00 after detection |
|
|
441
442
|
| **Global rate limit** | Max 5 heals per 5 minutes regardless of error | Caps total spend |
|
|
443
|
+
| **Per-API-call timeout** | 45s timeout on each AI call — prevents indefinite agent hangs | Saves time + tokens |
|
|
444
|
+
| **Heal timeout** | 5-minute overall heal timeout via Promise.race | Prevents stuck heals |
|
|
445
|
+
| **SIGTERM grace period** | 3s startup grace ignores SIGTERM — prevents restart scripts killing new process | Prevents shutdown loops |
|
|
442
446
|
|
|
443
|
-
**Process dedup:** PID file ensures only one wolverine instance runs. Kills old process on startup.
|
|
447
|
+
**Process dedup:** PID file ensures only one wolverine instance runs. Kills old process on startup. Exit handler only deletes PID file if it still belongs to current process (prevents race condition on restart).
|
|
444
448
|
|
|
445
449
|
**Bug reports:** When loop guard triggers, generates a security-scanned report (no secrets/injection patterns) and sends to the platform backend for human review.
|
|
446
450
|
|
|
@@ -450,7 +454,7 @@ Three layers prevent token waste:
|
|
|
450
454
|
|
|
451
455
|
| Technique | What it does | Cost |
|
|
452
456
|
|-----------|-------------|------|
|
|
453
|
-
| **Dynamic system prompt** | Simple errors get 400-token prompt with 7 tools. Complex get 1200 with 18 + strategy | 50% on 70% of heals |
|
|
457
|
+
| **Dynamic system prompt** | Simple errors get 400-token prompt with 7 tools. Complex get 1200 with 18 + fast-fix strategy table | 50% on 70% of heals |
|
|
454
458
|
| **Brain namespace isolation** | Seed docs (20K tokens) excluded from error heals — only searched for wolverine queries | 50% context reduction |
|
|
455
459
|
| **Prompt caching** | Anthropic system prompt cached server-side — 90% cheaper on repeat calls | 12-16K tokens saved per heal |
|
|
456
460
|
| **Tool result truncation** | Tool output capped at 4K chars — prevents context blowup from large reads | Up to 30K saved per turn |
|
package/bin/wolverine.js
CHANGED
|
@@ -152,13 +152,23 @@ console.log("");
|
|
|
152
152
|
|
|
153
153
|
const runner = new WolverineRunner(scriptPath, { cwd: process.cwd() });
|
|
154
154
|
|
|
155
|
+
// Grace period: ignore SIGTERM for 3s after startup.
|
|
156
|
+
// Prevents restart scripts using `pkill -f wolverine.js` from killing
|
|
157
|
+
// both the old AND newly spawned process.
|
|
158
|
+
let startupGrace = true;
|
|
159
|
+
setTimeout(() => { startupGrace = false; }, 3000);
|
|
160
|
+
|
|
155
161
|
process.on("SIGINT", () => {
|
|
156
|
-
console.log(chalk.yellow(`\n\n👋 Shutting down Wolverine
|
|
162
|
+
console.log(chalk.yellow(`\n\n👋 Shutting down Wolverine...`));
|
|
157
163
|
runner.stop();
|
|
158
164
|
process.exit(0);
|
|
159
165
|
});
|
|
160
166
|
|
|
161
167
|
process.on("SIGTERM", () => {
|
|
168
|
+
if (startupGrace) {
|
|
169
|
+
console.log(chalk.yellow(" ⚡ Ignoring SIGTERM during startup grace period (3s)"));
|
|
170
|
+
return;
|
|
171
|
+
}
|
|
162
172
|
runner.stop();
|
|
163
173
|
process.exit(0);
|
|
164
174
|
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "wolverine-ai",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.6.0",
|
|
4
4
|
"description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -49,8 +49,6 @@
|
|
|
49
49
|
"src/",
|
|
50
50
|
"server/",
|
|
51
51
|
"examples/",
|
|
52
|
-
"README.md",
|
|
53
|
-
"CLAUDE.md",
|
|
54
52
|
".env.example"
|
|
55
53
|
],
|
|
56
54
|
"dependencies": {
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"env": "development"
|
|
6
6
|
},
|
|
7
7
|
|
|
8
|
-
"provider": "
|
|
8
|
+
"provider": "wolverine",
|
|
9
9
|
|
|
10
10
|
"openai_settings": {
|
|
11
11
|
"reasoning": "gpt-5.4-mini",
|
|
@@ -43,6 +43,18 @@
|
|
|
43
43
|
"embedding": "text-embedding-3-small"
|
|
44
44
|
},
|
|
45
45
|
|
|
46
|
+
"wolverine_settings": {
|
|
47
|
+
"reasoning": "wolverine-test-1",
|
|
48
|
+
"coding": "wolverine-test-1",
|
|
49
|
+
"chat": "wolverine-test-1",
|
|
50
|
+
"tool": "wolverine-test-1",
|
|
51
|
+
"classifier": "wolverine-test-1",
|
|
52
|
+
"audit": "wolverine-test-1",
|
|
53
|
+
"compacting": "wolverine-test-1",
|
|
54
|
+
"research": "wolverine-test-1",
|
|
55
|
+
"embedding": "text-embedding-3-small"
|
|
56
|
+
},
|
|
57
|
+
|
|
46
58
|
"server": {
|
|
47
59
|
"port": 3000,
|
|
48
60
|
"maxRetries": 3,
|
|
@@ -84,6 +96,11 @@
|
|
|
84
96
|
"intervalMs": 300000
|
|
85
97
|
},
|
|
86
98
|
|
|
99
|
+
"platform": {
|
|
100
|
+
"apiKey": "",
|
|
101
|
+
"cors": ["http://localhost:3000"]
|
|
102
|
+
},
|
|
103
|
+
|
|
87
104
|
"dashboard": {},
|
|
88
105
|
|
|
89
106
|
"cors": {
|
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
const https = require("https");
|
|
2
|
+
const http = require("http");
|
|
3
|
+
const crypto = require("crypto");
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Wolverine Inference API
|
|
7
|
+
*
|
|
8
|
+
* Credit system: $1 = 100 credits. 1 credit = $0.01 of compute.
|
|
9
|
+
* Token pricing (in credits per million tokens):
|
|
10
|
+
* wolverine-test-1: 1 credit input / 4 credits output per 1M tokens
|
|
11
|
+
* (= $0.01/$0.04 per 1M — 15x cheaper than gpt-4o-mini, 80x cheaper than haiku)
|
|
12
|
+
*
|
|
13
|
+
* Rate limiting: per API key, configurable per tier.
|
|
14
|
+
* Queue: when GPU is at capacity, requests queue with timeout.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
const INFERENCE_URL = process.env.WOLVERINE_INFERENCE_URL || "https://clips-third-players-binding.trycloudflare.com";
|
|
18
|
+
|
|
19
|
+
// Pricing in CREDITS per million tokens ($1 = 100 credits)
|
|
20
|
+
const MODEL_PRICING = {
|
|
21
|
+
"wolverine-test-1": { input: 1.0, output: 4.0 }, // $0.01/$0.04 per 1M
|
|
22
|
+
"wolverine-coding": { input: 1.0, output: 4.0 },
|
|
23
|
+
"wolverine-reasoning": { input: 2.5, output: 10.0 }, // heavier model when available
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
const MODEL_MAP = {
|
|
27
|
+
"wolverine-test-1": "wolverine-test-1",
|
|
28
|
+
"wolverine-coding": "wolverine-test-1",
|
|
29
|
+
"wolverine-reasoning": "wolverine-test-1",
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
const TIER_LIMITS = {
|
|
33
|
+
free: { rpm: 10, maxTokens: 1024 },
|
|
34
|
+
starter: { rpm: 60, maxTokens: 4096 },
|
|
35
|
+
pro: { rpm: 300, maxTokens: 4096 },
|
|
36
|
+
admin: { rpm: 9999, maxTokens: 4096 },
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
function tokenCost(model, inputTokens, outputTokens) {
|
|
40
|
+
const p = MODEL_PRICING[model] || MODEL_PRICING["wolverine-test-1"];
|
|
41
|
+
return ((inputTokens / 1_000_000) * p.input) + ((outputTokens / 1_000_000) * p.output);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// ── Request Queue (handles GPU saturation) ──
|
|
45
|
+
const queue = [];
|
|
46
|
+
let activeRequests = 0;
|
|
47
|
+
const MAX_CONCURRENT = 8; // vLLM max-num-seqs
|
|
48
|
+
const QUEUE_TIMEOUT_MS = 30000;
|
|
49
|
+
|
|
50
|
+
function enqueue() {
|
|
51
|
+
return new Promise((resolve, reject) => {
|
|
52
|
+
if (activeRequests < MAX_CONCURRENT) {
|
|
53
|
+
activeRequests++;
|
|
54
|
+
resolve();
|
|
55
|
+
return;
|
|
56
|
+
}
|
|
57
|
+
const timer = setTimeout(() => {
|
|
58
|
+
const idx = queue.indexOf(entry);
|
|
59
|
+
if (idx >= 0) queue.splice(idx, 1);
|
|
60
|
+
reject(new Error("Queue timeout — GPU at capacity. Try again in a few seconds."));
|
|
61
|
+
}, QUEUE_TIMEOUT_MS);
|
|
62
|
+
const entry = { resolve: () => { clearTimeout(timer); activeRequests++; resolve(); }, reject };
|
|
63
|
+
queue.push(entry);
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function dequeue() {
|
|
68
|
+
activeRequests = Math.max(0, activeRequests - 1);
|
|
69
|
+
if (queue.length > 0) {
|
|
70
|
+
const next = queue.shift();
|
|
71
|
+
next.resolve();
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
async function routes(fastify) {
|
|
76
|
+
const { pool } = require("../lib/db");
|
|
77
|
+
|
|
78
|
+
// Rate limit state (in-memory)
|
|
79
|
+
const rateWindows = new Map();
|
|
80
|
+
|
|
81
|
+
async function authenticate(request, reply) {
|
|
82
|
+
const apiKey = request.headers.authorization?.replace("Bearer ", "") || request.headers["x-api-key"];
|
|
83
|
+
if (!apiKey) return reply.code(401).send({ error: { message: "API key required. Pass via Authorization: Bearer <key>", type: "auth_error" } });
|
|
84
|
+
|
|
85
|
+
// Platform key bypass
|
|
86
|
+
let settings = {};
|
|
87
|
+
try { settings = require("../config/settings.json"); } catch {}
|
|
88
|
+
if (apiKey === settings.platform?.apiKey) {
|
|
89
|
+
request.account = { api_key: apiKey, owner: "platform", tier: "admin", credits_remaining: 999999, rate_limit_rpm: 9999 };
|
|
90
|
+
return;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const result = await pool.query("SELECT * FROM api_credits WHERE api_key = $1", [apiKey]);
|
|
94
|
+
if (result.rows.length === 0) return reply.code(401).send({ error: { message: "Invalid API key", type: "auth_error" } });
|
|
95
|
+
|
|
96
|
+
const account = result.rows[0];
|
|
97
|
+
|
|
98
|
+
// Credit check
|
|
99
|
+
if (parseFloat(account.credits_remaining) <= 0) {
|
|
100
|
+
return reply.code(402).send({ error: { message: "Insufficient credits. Add credits at wolverinenode.xyz", type: "billing_error", credits_remaining: 0 } });
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Rate limit
|
|
104
|
+
const now = Date.now();
|
|
105
|
+
const window = rateWindows.get(apiKey) || { count: 0, resetAt: now + 60000 };
|
|
106
|
+
if (now > window.resetAt) { window.count = 0; window.resetAt = now + 60000; }
|
|
107
|
+
const limit = account.rate_limit_rpm || TIER_LIMITS[account.tier]?.rpm || 10;
|
|
108
|
+
if (window.count >= limit) {
|
|
109
|
+
const retryAfter = Math.ceil((window.resetAt - now) / 1000);
|
|
110
|
+
return reply.code(429).send({ error: { message: `Rate limit: ${limit} requests/min. Retry in ${retryAfter}s`, type: "rate_limit", retry_after: retryAfter } });
|
|
111
|
+
}
|
|
112
|
+
window.count++;
|
|
113
|
+
rateWindows.set(apiKey, window);
|
|
114
|
+
|
|
115
|
+
request.account = account;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// ── POST /chat/completions ──
|
|
119
|
+
fastify.post("/chat/completions", { preHandler: authenticate }, async (request, reply) => {
|
|
120
|
+
const body = request.body || {};
|
|
121
|
+
const requestedModel = body.model || "wolverine-test-1";
|
|
122
|
+
const account = request.account;
|
|
123
|
+
const tier = TIER_LIMITS[account.tier] || TIER_LIMITS.free;
|
|
124
|
+
const startMs = Date.now();
|
|
125
|
+
|
|
126
|
+
// Enforce max tokens per tier
|
|
127
|
+
if (body.max_tokens && body.max_tokens > tier.maxTokens) {
|
|
128
|
+
body.max_tokens = tier.maxTokens;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Map model name for backend
|
|
132
|
+
const backendBody = { ...body, model: MODEL_MAP[requestedModel] || requestedModel };
|
|
133
|
+
|
|
134
|
+
// Queue if GPU saturated
|
|
135
|
+
try {
|
|
136
|
+
await enqueue();
|
|
137
|
+
} catch (err) {
|
|
138
|
+
return reply.code(503).send({ error: { message: err.message, type: "capacity_error", queue_length: queue.length } });
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
try {
|
|
142
|
+
const result = await proxyToInference("/v1/chat/completions", backendBody);
|
|
143
|
+
const latencyMs = Date.now() - startMs;
|
|
144
|
+
|
|
145
|
+
const usage = result.usage || {};
|
|
146
|
+
const inputTokens = usage.prompt_tokens || 0;
|
|
147
|
+
const outputTokens = usage.completion_tokens || 0;
|
|
148
|
+
const cost = tokenCost(requestedModel, inputTokens, outputTokens);
|
|
149
|
+
|
|
150
|
+
// Bill credits (skip for platform)
|
|
151
|
+
if (account.owner !== "platform") {
|
|
152
|
+
await pool.query(
|
|
153
|
+
"UPDATE api_credits SET credits_remaining = credits_remaining - $1, credits_used = credits_used + $1, last_used = NOW() WHERE api_key = $2",
|
|
154
|
+
[cost, account.api_key]
|
|
155
|
+
);
|
|
156
|
+
await pool.query(
|
|
157
|
+
"INSERT INTO api_usage_log (api_key, model, input_tokens, output_tokens, total_tokens, cost, latency_ms, success, endpoint) VALUES ($1, $2, $3, $4, $5, $6, $7, true, $8)",
|
|
158
|
+
[account.api_key, requestedModel, inputTokens, outputTokens, inputTokens + outputTokens, cost, latencyMs, "/v1/chat/completions"]
|
|
159
|
+
);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Rewrite response
|
|
163
|
+
if (result.model) result.model = requestedModel;
|
|
164
|
+
result.x_wolverine = {
|
|
165
|
+
credits_used: Math.round(cost * 1000000) / 1000000,
|
|
166
|
+
credits_remaining: Math.max(0, parseFloat(account.credits_remaining) - cost),
|
|
167
|
+
latency_ms: latencyMs,
|
|
168
|
+
queued: activeRequests > MAX_CONCURRENT,
|
|
169
|
+
};
|
|
170
|
+
|
|
171
|
+
return result;
|
|
172
|
+
} catch (err) {
|
|
173
|
+
if (account.owner !== "platform") {
|
|
174
|
+
await pool.query(
|
|
175
|
+
"INSERT INTO api_usage_log (api_key, model, input_tokens, output_tokens, total_tokens, cost, latency_ms, success, endpoint) VALUES ($1, $2, 0, 0, 0, 0, $3, false, $4)",
|
|
176
|
+
[account.api_key, requestedModel, Date.now() - startMs, "/v1/chat/completions"]
|
|
177
|
+
).catch(() => {});
|
|
178
|
+
}
|
|
179
|
+
return reply.code(502).send({ error: { message: `Inference error: ${err.message}`, type: "inference_error" } });
|
|
180
|
+
} finally {
|
|
181
|
+
dequeue();
|
|
182
|
+
}
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
// ── GET /models ──
|
|
186
|
+
fastify.get("/models", async () => ({
|
|
187
|
+
object: "list",
|
|
188
|
+
data: Object.entries(MODEL_PRICING).map(([id, p]) => ({
|
|
189
|
+
id, object: "model", owned_by: "wolverine",
|
|
190
|
+
created: Math.floor(Date.now() / 1000),
|
|
191
|
+
pricing: { input_credits_per_million: p.input, output_credits_per_million: p.output, usd_per_credit: 0.01 },
|
|
192
|
+
})),
|
|
193
|
+
}));
|
|
194
|
+
|
|
195
|
+
// ── POST /keys/create — generate new API key ──
|
|
196
|
+
fastify.post("/keys/create", { preHandler: authenticate }, async (request, reply) => {
|
|
197
|
+
const account = request.account;
|
|
198
|
+
if (account.tier !== "admin") return reply.code(403).send({ error: { message: "Only admins can create API keys", type: "auth_error" } });
|
|
199
|
+
|
|
200
|
+
const { owner, email, credits, tier, rpm } = request.body || {};
|
|
201
|
+
if (!owner) return reply.code(400).send({ error: { message: "owner required", type: "validation_error" } });
|
|
202
|
+
|
|
203
|
+
const newKey = "wlv_" + crypto.randomBytes(24).toString("hex");
|
|
204
|
+
const keyTier = tier || "free";
|
|
205
|
+
const keyCredits = credits || (keyTier === "free" ? 10 : 0);
|
|
206
|
+
const keyRpm = rpm || TIER_LIMITS[keyTier]?.rpm || 10;
|
|
207
|
+
|
|
208
|
+
await pool.query(
|
|
209
|
+
"INSERT INTO api_credits (api_key, owner, email, credits_remaining, tier, plan_name, rate_limit_rpm) VALUES ($1, $2, $3, $4, $5, $6, $7)",
|
|
210
|
+
[newKey, owner, email || null, keyCredits, keyTier, keyTier, keyRpm]
|
|
211
|
+
);
|
|
212
|
+
|
|
213
|
+
return { api_key: newKey, owner, tier: keyTier, credits: keyCredits, rate_limit_rpm: keyRpm };
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
// ── POST /keys/add-credits — add credits to a key ──
|
|
217
|
+
fastify.post("/keys/add-credits", { preHandler: authenticate }, async (request, reply) => {
|
|
218
|
+
const account = request.account;
|
|
219
|
+
if (account.tier !== "admin") return reply.code(403).send({ error: { message: "Only admins can add credits", type: "auth_error" } });
|
|
220
|
+
|
|
221
|
+
const { api_key, credits } = request.body || {};
|
|
222
|
+
if (!api_key || !credits) return reply.code(400).send({ error: { message: "api_key and credits required" } });
|
|
223
|
+
|
|
224
|
+
await pool.query("UPDATE api_credits SET credits_remaining = credits_remaining + $1 WHERE api_key = $2", [credits, api_key]);
|
|
225
|
+
const updated = await pool.query("SELECT credits_remaining FROM api_credits WHERE api_key = $1", [api_key]);
|
|
226
|
+
return { api_key, credits_added: credits, credits_remaining: parseFloat(updated.rows[0]?.credits_remaining || 0) };
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
// ── GET /keys — list all keys (admin only) ──
|
|
230
|
+
fastify.get("/keys", { preHandler: authenticate }, async (request, reply) => {
|
|
231
|
+
if (request.account.tier !== "admin") return reply.code(403).send({ error: { message: "Admin only" } });
|
|
232
|
+
const { rows } = await pool.query("SELECT api_key, owner, email, tier, credits_remaining, credits_used, rate_limit_rpm, created_at, last_used FROM api_credits ORDER BY created_at DESC");
|
|
233
|
+
return { keys: rows };
|
|
234
|
+
});
|
|
235
|
+
|
|
236
|
+
// ── GET /credits ──
|
|
237
|
+
fastify.get("/credits", { preHandler: authenticate }, async (request, reply) => {
|
|
238
|
+
const a = request.account;
|
|
239
|
+
return {
|
|
240
|
+
credits_remaining: parseFloat(a.credits_remaining),
|
|
241
|
+
credits_used: parseFloat(a.credits_used || 0),
|
|
242
|
+
usd_remaining: parseFloat(a.credits_remaining) * 0.01,
|
|
243
|
+
usd_used: parseFloat(a.credits_used || 0) * 0.01,
|
|
244
|
+
tier: a.tier, rate_limit_rpm: a.rate_limit_rpm, owner: a.owner,
|
|
245
|
+
};
|
|
246
|
+
});
|
|
247
|
+
|
|
248
|
+
// ── GET /usage ──
|
|
249
|
+
fastify.get("/usage", { preHandler: authenticate }, async (request, reply) => {
|
|
250
|
+
const apiKey = request.account.api_key;
|
|
251
|
+
const period = request.query.period || "7d";
|
|
252
|
+
const interval = { "1h": "1 hour", "1d": "1 day", "7d": "7 days", "30d": "30 days" }[period] || "7 days";
|
|
253
|
+
|
|
254
|
+
const summary = await pool.query(
|
|
255
|
+
`SELECT model, COUNT(*) AS calls, SUM(input_tokens) AS input, SUM(output_tokens) AS output,
|
|
256
|
+
SUM(total_tokens) AS tokens, SUM(cost) AS credits_spent, AVG(latency_ms) AS avg_latency,
|
|
257
|
+
COUNT(*) FILTER (WHERE success) AS successes
|
|
258
|
+
FROM api_usage_log WHERE api_key = $1 AND timestamp > NOW() - $2::interval
|
|
259
|
+
GROUP BY model ORDER BY credits_spent DESC`, [apiKey, interval]
|
|
260
|
+
);
|
|
261
|
+
|
|
262
|
+
const timeline = await pool.query(
|
|
263
|
+
`SELECT date_trunc('hour', timestamp) AS hour, SUM(cost) AS credits, SUM(total_tokens) AS tokens, COUNT(*) AS calls
|
|
264
|
+
FROM api_usage_log WHERE api_key = $1 AND timestamp > NOW() - $2::interval
|
|
265
|
+
GROUP BY hour ORDER BY hour`, [apiKey, interval]
|
|
266
|
+
);
|
|
267
|
+
|
|
268
|
+
const totalCredits = summary.rows.reduce((s, r) => s + parseFloat(r.credits_spent || 0), 0);
|
|
269
|
+
|
|
270
|
+
return {
|
|
271
|
+
period,
|
|
272
|
+
total_credits_spent: Math.round(totalCredits * 1000000) / 1000000,
|
|
273
|
+
total_usd_spent: Math.round(totalCredits * 0.01 * 1000000) / 1000000,
|
|
274
|
+
byModel: summary.rows.map(r => ({
|
|
275
|
+
model: r.model, calls: parseInt(r.calls), input: parseInt(r.input || 0), output: parseInt(r.output || 0),
|
|
276
|
+
tokens: parseInt(r.tokens || 0), credits_spent: parseFloat(r.credits_spent || 0),
|
|
277
|
+
usd_spent: parseFloat(r.credits_spent || 0) * 0.01,
|
|
278
|
+
avgLatencyMs: Math.round(parseFloat(r.avg_latency || 0)),
|
|
279
|
+
successRate: parseInt(r.calls) > 0 ? parseFloat(((parseInt(r.successes) / parseInt(r.calls)) * 100).toFixed(2)) : 0,
|
|
280
|
+
})),
|
|
281
|
+
timeline: timeline.rows.map(r => ({
|
|
282
|
+
hour: r.hour, credits: parseFloat(r.credits), tokens: parseInt(r.tokens), calls: parseInt(r.calls),
|
|
283
|
+
})),
|
|
284
|
+
queue: { active: activeRequests, waiting: queue.length, max: MAX_CONCURRENT },
|
|
285
|
+
};
|
|
286
|
+
});
|
|
287
|
+
|
|
288
|
+
// ── GET /health ──
|
|
289
|
+
fastify.get("/health", async () => {
|
|
290
|
+
try {
|
|
291
|
+
const result = await proxyToInference("/health", null, "GET");
|
|
292
|
+
return { status: "ok", inference: result, queue: { active: activeRequests, waiting: queue.length, max: MAX_CONCURRENT } };
|
|
293
|
+
} catch (err) {
|
|
294
|
+
return { status: "down", error: err.message, queue: { active: activeRequests, waiting: queue.length, max: MAX_CONCURRENT } };
|
|
295
|
+
}
|
|
296
|
+
});
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
function proxyToInference(path, body, method = "POST") {
|
|
300
|
+
return new Promise((resolve, reject) => {
|
|
301
|
+
const url = new (require("url").URL)(INFERENCE_URL + path);
|
|
302
|
+
const client = url.protocol === "https:" ? https : http;
|
|
303
|
+
const bodyStr = body ? JSON.stringify(body) : null;
|
|
304
|
+
|
|
305
|
+
const req = client.request({
|
|
306
|
+
hostname: url.hostname,
|
|
307
|
+
port: url.port || (url.protocol === "https:" ? 443 : 80),
|
|
308
|
+
path: url.pathname,
|
|
309
|
+
method,
|
|
310
|
+
timeout: 120000,
|
|
311
|
+
headers: { "Content-Type": "application/json", ...(bodyStr ? { "Content-Length": Buffer.byteLength(bodyStr) } : {}) },
|
|
312
|
+
}, (res) => {
|
|
313
|
+
let data = "";
|
|
314
|
+
res.on("data", (c) => { data += c; });
|
|
315
|
+
res.on("end", () => { try { resolve(JSON.parse(data)); } catch { resolve({ raw: data }); } });
|
|
316
|
+
});
|
|
317
|
+
req.on("error", reject);
|
|
318
|
+
req.on("timeout", () => { req.destroy(); reject(new Error("Inference timeout")); });
|
|
319
|
+
if (bodyStr) req.write(bodyStr);
|
|
320
|
+
req.end();
|
|
321
|
+
});
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
module.exports = routes;
|
|
@@ -414,15 +414,23 @@ class AgentEngine {
|
|
|
414
414
|
}
|
|
415
415
|
|
|
416
416
|
let response;
|
|
417
|
+
const AI_CALL_TIMEOUT_MS = 45000; // 45s per API call — prevents indefinite hangs
|
|
417
418
|
try {
|
|
418
|
-
response = await
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
419
|
+
response = await Promise.race([
|
|
420
|
+
aiCallWithHistory({
|
|
421
|
+
model,
|
|
422
|
+
messages: this.messages,
|
|
423
|
+
tools: allTools,
|
|
424
|
+
maxTokens: 4096,
|
|
425
|
+
}),
|
|
426
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error("AI call timed out after 45s")), AI_CALL_TIMEOUT_MS)),
|
|
427
|
+
]);
|
|
424
428
|
} catch (err) {
|
|
425
429
|
console.log(chalk.red(` Agent API error: ${err.message}`));
|
|
430
|
+
// On timeout, return what we have so far rather than failing completely
|
|
431
|
+
if (err.message.includes("timed out") && this.filesModified.length > 0) {
|
|
432
|
+
return { success: true, summary: `Partial fix applied (API timeout on turn ${this.turnCount})`, filesModified: this.filesModified, turnCount: this.turnCount, totalTokens: this.totalTokens };
|
|
433
|
+
}
|
|
426
434
|
return { success: false, summary: err.message, filesModified: [], turnCount: this.turnCount, totalTokens: this.totalTokens };
|
|
427
435
|
}
|
|
428
436
|
|
|
@@ -440,6 +448,15 @@ class AgentEngine {
|
|
|
440
448
|
const assistantMessage = choice.message || choice;
|
|
441
449
|
this.messages.push(assistantMessage);
|
|
442
450
|
|
|
451
|
+
// Parse Gemma-style text tool calls: "call:tool_name{json_args}" → structured tool_calls
|
|
452
|
+
if ((!assistantMessage.tool_calls || assistantMessage.tool_calls.length === 0) && assistantMessage.content) {
|
|
453
|
+
const parsed = _parseTextToolCalls(assistantMessage.content);
|
|
454
|
+
if (parsed.length > 0) {
|
|
455
|
+
assistantMessage.tool_calls = parsed;
|
|
456
|
+
console.log(chalk.gray(` 🔧 Parsed ${parsed.length} tool call(s) from text output`));
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
|
|
443
460
|
if (!assistantMessage.tool_calls || assistantMessage.tool_calls.length === 0) {
|
|
444
461
|
if (assistantMessage.content) {
|
|
445
462
|
console.log(chalk.gray(` 💬 ${(assistantMessage.content || "").slice(0, 200)}`));
|
|
@@ -1060,26 +1077,31 @@ Project: ${cwd}`;
|
|
|
1060
1077
|
function _fullPrompt(cwd, primaryFile) {
|
|
1061
1078
|
return `You are Wolverine, an autonomous Node.js server repair agent. Diagnose and fix the error.
|
|
1062
1079
|
|
|
1063
|
-
You are a full server doctor. Errors can be code bugs, missing deps, database problems, config issues, port conflicts, permissions, or corrupted state.
|
|
1080
|
+
You are a full server doctor. Errors can be code bugs, missing deps, database problems, config issues, port conflicts, permissions, or corrupted state.
|
|
1081
|
+
|
|
1082
|
+
CRITICAL: Act fast. You have limited turns. Fix immediately when the solution is obvious from the error. Only investigate when the cause is unclear.
|
|
1064
1083
|
|
|
1065
1084
|
For maximum efficiency, invoke multiple independent tools simultaneously rather than sequentially.
|
|
1066
1085
|
|
|
1067
1086
|
TOOLS: read_file, write_file, edit_file, glob_files, grep_code, list_dir, move_file, bash_exec, git_log, git_diff, inspect_db, run_db_fix, check_port, check_env, audit_deps, check_migration, web_fetch, done
|
|
1068
1087
|
|
|
1069
|
-
|
|
1070
|
-
- Cannot find module 'X' → bash_exec: npm install X
|
|
1071
|
-
- Cannot find module './X' →
|
|
1072
|
-
- ENOENT → write_file
|
|
1073
|
-
- EADDRINUSE → check_port
|
|
1074
|
-
- TypeError/ReferenceError → read_file
|
|
1088
|
+
FAST FIXES (act immediately, don't investigate):
|
|
1089
|
+
- Cannot find module 'X' → bash_exec: npm install X → done
|
|
1090
|
+
- Cannot find module './X' → grep for correct path → edit_file → done
|
|
1091
|
+
- ENOENT missing config/json file → read the code that loads it to see what fields it expects → write_file with required fields → done
|
|
1092
|
+
- EADDRINUSE → check_port → bash_exec: kill PID → done
|
|
1093
|
+
- TypeError/ReferenceError → read_file → edit_file → done
|
|
1094
|
+
- Missing env var → check_env → report it → done
|
|
1095
|
+
|
|
1096
|
+
INVESTIGATION (only when cause is unclear):
|
|
1075
1097
|
- Database error → inspect_db then run_db_fix
|
|
1076
|
-
-
|
|
1098
|
+
- Unknown errors → grep_code, list_dir to find root cause
|
|
1077
1099
|
|
|
1078
1100
|
RULES:
|
|
1079
|
-
1.
|
|
1080
|
-
2.
|
|
1081
|
-
3. bash_exec for operational fixes, edit_file for code, run_db_fix for data
|
|
1082
|
-
4.
|
|
1101
|
+
1. Fix on turn 1-2 when possible. Investigation is a last resort.
|
|
1102
|
+
2. For ENOENT config files: read the code that requires the file, then create it with the expected structure.
|
|
1103
|
+
3. bash_exec for operational fixes, edit_file for code, write_file for missing files, run_db_fix for data
|
|
1104
|
+
4. Always call done with summary when finished — never end without calling done.
|
|
1083
1105
|
${primaryFile ? `\nFile: ${primaryFile}` : ""}
|
|
1084
1106
|
Project: ${cwd}`;
|
|
1085
1107
|
}
|
|
@@ -1210,4 +1232,50 @@ function _runPostHook(toolName, toolInput, toolOutput, isError, cwd) {
|
|
|
1210
1232
|
} catch {}
|
|
1211
1233
|
}
|
|
1212
1234
|
|
|
1235
|
+
/**
|
|
1236
|
+
* Parse Gemma-style text tool calls into OpenAI tool_calls format.
|
|
1237
|
+
* Gemma outputs: "call:tool_name{json_args}" or "<|tool_call>call:tool_name{args}<tool_call|>"
|
|
1238
|
+
* We convert to: [{ id, type: "function", function: { name, arguments } }]
|
|
1239
|
+
*/
|
|
1240
|
+
function _parseTextToolCalls(content) {
|
|
1241
|
+
if (!content) return [];
|
|
1242
|
+
const calls = [];
|
|
1243
|
+
// Match patterns: call:name{args} or call:name{"key":"val"}
|
|
1244
|
+
const patterns = [
|
|
1245
|
+
/call:(\w+)\{([^}]*(?:\{[^}]*\}[^}]*)*)\}/g, // call:name{args with nested braces}
|
|
1246
|
+
/call:(\w+)\(([^)]*)\)/g, // call:name(args)
|
|
1247
|
+
];
|
|
1248
|
+
for (const regex of patterns) {
|
|
1249
|
+
let match;
|
|
1250
|
+
while ((match = regex.exec(content)) !== null) {
|
|
1251
|
+
const name = match[1];
|
|
1252
|
+
let argsStr = match[2];
|
|
1253
|
+
// Try to parse as JSON, otherwise build from key:value pairs
|
|
1254
|
+
let args;
|
|
1255
|
+
try {
|
|
1256
|
+
// Clean up Gemma's quoting: path:"value" → "path":"value"
|
|
1257
|
+
const cleaned = argsStr.replace(/(\w+)\s*:\s*/g, '"$1":').replace(/<\|"\|>/g, '"');
|
|
1258
|
+
args = JSON.parse("{" + cleaned + "}");
|
|
1259
|
+
} catch {
|
|
1260
|
+
try { args = JSON.parse(argsStr); } catch {
|
|
1261
|
+
// Last resort: treat as single string argument for the most common param
|
|
1262
|
+
const paramGuess = argsStr.replace(/['"<|>]/g, "").trim();
|
|
1263
|
+
if (name === "read_file" || name === "glob_files") args = { path: paramGuess };
|
|
1264
|
+
else if (name === "grep_code") args = { pattern: paramGuess };
|
|
1265
|
+
else if (name === "bash_exec") args = { command: paramGuess };
|
|
1266
|
+
else if (name === "write_file") args = { path: paramGuess, content: "" };
|
|
1267
|
+
else args = { input: paramGuess };
|
|
1268
|
+
}
|
|
1269
|
+
}
|
|
1270
|
+
calls.push({
|
|
1271
|
+
id: "call_" + Date.now().toString(36) + "_" + calls.length,
|
|
1272
|
+
type: "function",
|
|
1273
|
+
function: { name, arguments: JSON.stringify(args) },
|
|
1274
|
+
});
|
|
1275
|
+
}
|
|
1276
|
+
if (calls.length > 0) break; // use first matching pattern
|
|
1277
|
+
}
|
|
1278
|
+
return calls;
|
|
1279
|
+
}
|
|
1280
|
+
|
|
1213
1281
|
module.exports = { AgentEngine, TOOL_DEFINITIONS, BLOCKED_COMMANDS };
|
package/src/agent/goal-loop.js
CHANGED
|
@@ -107,13 +107,18 @@ class GoalLoop {
|
|
|
107
107
|
explanation: attempt.explanation,
|
|
108
108
|
}).catch(() => {});
|
|
109
109
|
|
|
110
|
-
// Deep research after
|
|
111
|
-
if (iteration >=
|
|
110
|
+
// Deep research only after 3rd failure — avoid adding latency on early iterations
|
|
111
|
+
if (iteration >= 3) {
|
|
112
112
|
console.log(chalk.magenta(` 🔬 Triggering deep research after ${iteration} failures...`));
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
113
|
+
try {
|
|
114
|
+
const research = await Promise.race([
|
|
115
|
+
this.researcher.research(errorMessage, context),
|
|
116
|
+
new Promise((resolve) => setTimeout(() => resolve(null), 30000)), // 30s cap
|
|
117
|
+
]);
|
|
118
|
+
if (research) {
|
|
119
|
+
console.log(chalk.gray(` 🔬 Research insight: ${research.slice(0, 100)}`));
|
|
120
|
+
}
|
|
121
|
+
} catch {}
|
|
117
122
|
}
|
|
118
123
|
}
|
|
119
124
|
|