wolverine-ai 2.2.2 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wolverine-ai",
3
- "version": "2.2.2",
3
+ "version": "2.3.0",
4
4
  "description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -463,6 +463,35 @@ Project root: ${this.cwd}${primaryFile ? `\nPrimary crash file: ${primaryFile}`
463
463
 
464
464
  console.log(chalk.gray(` šŸ¤– Agent turn ${this.turnCount}/${this.maxTurns} (${this.totalTokens} tokens used)`));
465
465
 
466
+ // Compact context every 3 turns to prevent token blowup
467
+ // Turn 6 without compacting: ~95K tokens. With compacting: ~20K tokens.
468
+ if (this.turnCount > 1 && this.turnCount % 3 === 0 && this.messages.length > 4) {
469
+ try {
470
+ const { aiCall } = require("./ai-client") || require("../core/ai-client");
471
+ const { getModel: _gm } = require("./models") || require("../core/models");
472
+ const historyToCompact = this.messages.slice(1, -2); // keep system + last exchange
473
+ if (historyToCompact.length > 2) {
474
+ const historyText = historyToCompact.map(m => `${m.role}: ${(m.content || "").slice(0, 500)}`).join("\n");
475
+ const compactResult = await aiCall({
476
+ model: _gm("compacting"),
477
+ systemPrompt: "Summarize this agent conversation history into a concise status report. Keep: files read, changes made, errors found, what was tried. Remove: full file contents, redundant tool results.",
478
+ userPrompt: historyText.slice(0, 8000),
479
+ maxTokens: 512,
480
+ category: "brain",
481
+ });
482
+ if (compactResult.content) {
483
+ this.messages = [
484
+ this.messages[0], // system prompt
485
+ { role: "assistant", content: `[Prior work summary]\n${compactResult.content}` },
486
+ { role: "user", content: "Continue from where you left off." },
487
+ ...this.messages.slice(-2), // last exchange
488
+ ];
489
+ console.log(chalk.gray(` šŸ“¦ Compacted ${historyToCompact.length} messages → summary (${compactResult.content.length} chars)`));
490
+ }
491
+ }
492
+ } catch { /* compacting failed — continue with full context */ }
493
+ }
494
+
466
495
  let response;
467
496
  try {
468
497
  response = await aiCallWithHistory({
@@ -57,16 +57,17 @@ class GoalLoop {
57
57
  }
58
58
  }
59
59
 
60
- // Attempt the fix
60
+ // Attempt the fix — pass prior attempts so the handler can include concise summary
61
61
  let attempt;
62
62
  try {
63
- attempt = await this.onAttempt(iteration, context);
63
+ attempt = await this.onAttempt(iteration, context, this._attempts);
64
64
  } catch (err) {
65
65
  attempt = { healed: false, explanation: `Error: ${err.message}` };
66
66
  }
67
67
 
68
68
  this._attempts.push({
69
69
  iteration,
70
+ mode: attempt.mode || "unknown",
70
71
  success: attempt.healed,
71
72
  explanation: attempt.explanation || "No explanation",
72
73
  });
@@ -33,14 +33,17 @@ const AGENT_TOOL_SETS = {
33
33
  };
34
34
 
35
35
  // Default model + budget per agent type
36
+ // Cost optimization: triage agents use cheap models (classifier slot = Haiku),
37
+ // only the fixer needs the expensive coding model (Sonnet/Opus).
38
+ // This cuts sub-agent cost by ~90% (6 Haiku calls vs 6 Sonnet calls).
36
39
  const AGENT_CONFIGS = {
37
- explore: { model: "reasoning", maxTurns: 5, maxTokens: 10000 },
38
- plan: { model: "reasoning", maxTurns: 3, maxTokens: 8000 },
39
- fix: { model: "coding", maxTurns: 5, maxTokens: 15000 },
40
- verify: { model: "reasoning", maxTurns: 3, maxTokens: 5000 },
41
- research: { model: "research", maxTurns: 3, maxTokens: 10000 },
42
- security: { model: "audit", maxTurns: 3, maxTokens: 8000 },
43
- database: { model: "coding", maxTurns: 5, maxTokens: 15000 },
40
+ explore: { model: "classifier", maxTurns: 5, maxTokens: 15000 }, // Haiku — just reading
41
+ plan: { model: "classifier", maxTurns: 3, maxTokens: 10000 }, // Haiku — simple planning
42
+ fix: { model: "coding", maxTurns: 5, maxTokens: 50000 }, // Sonnet/Opus — needs reasoning
43
+ verify: { model: "classifier", maxTurns: 3, maxTokens: 8000 }, // Haiku — just checking
44
+ research: { model: "classifier", maxTurns: 3, maxTokens: 10000 }, // Haiku — summarization
45
+ security: { model: "audit", maxTurns: 3, maxTokens: 8000 }, // Haiku — pattern matching
46
+ database: { model: "coding", maxTurns: 5, maxTokens: 50000 }, // Sonnet/Opus — needs reasoning
44
47
  };
45
48
 
46
49
  // System prompts per agent type
@@ -57,12 +57,71 @@ function isReasoningModel(model) {
57
57
 
58
58
  function isAnthropicModel(model) { return detectProvider(model) === "anthropic"; }
59
59
 
60
+ /**
61
+ * Per-model max output token limits (with 10% overestimation buffer).
62
+ * These are the actual API limits — requesting more than this fails.
63
+ */
64
+ const MODEL_OUTPUT_LIMITS = {
65
+ // OpenAI — generous output limits
66
+ "gpt-4o": 17600, // 16384 + 10%
67
+ "gpt-4o-mini": 17600,
68
+ "gpt-5": 17600,
69
+ "gpt-5.4": 17600,
70
+ "gpt-5.4-mini": 17600,
71
+ "gpt-5.4-nano": 17600,
72
+ "gpt-5-nano": 17600,
73
+ "o1": 110000, // 100k + 10% (reasoning model, huge output)
74
+ "o1-mini": 72600, // 66k + 10%
75
+ "o3": 110000,
76
+ "o3-mini": 72600,
77
+ "o4-mini": 72600,
78
+ "gpt-5.1-codex": 17600,
79
+ "gpt-5.3-codex": 17600,
80
+ "codex-mini": 17600,
81
+ // Anthropic — each tier has different output limits
82
+ "claude-opus-4": 32000, // 32k max output (no buffer needed, already generous)
83
+ "claude-sonnet-4": 17600, // 16k + 10%
84
+ "claude-haiku-4": 8800, // 8k + 10%
85
+ "claude-3-5-sonnet": 8800,
86
+ "claude-3-5-haiku": 8800,
87
+ "claude-3-opus": 4400, // 4k + 10%
88
+ "claude-3-sonnet": 4400,
89
+ "claude-3-haiku": 4400,
90
+ };
91
+
92
+ /**
93
+ * Get the max output tokens for a model (with 10% buffer).
94
+ * Falls back to sensible defaults if model not in table.
95
+ */
96
+ function _getOutputLimit(model) {
97
+ // Exact match
98
+ if (MODEL_OUTPUT_LIMITS[model]) return MODEL_OUTPUT_LIMITS[model];
99
+ // Prefix match (handles dated versions like claude-sonnet-4-6, claude-haiku-4-5-20250414)
100
+ for (const [prefix, limit] of Object.entries(MODEL_OUTPUT_LIMITS)) {
101
+ if (model.startsWith(prefix)) return limit;
102
+ }
103
+ // Defaults with 10% buffer
104
+ if (isAnthropicModel(model)) return 8800; // 8k + 10% (safe Anthropic default)
105
+ return 17600; // 16k + 10% (safe OpenAI default)
106
+ }
107
+
108
+ /**
109
+ * Build token limit params for the API call.
110
+ * Respects per-model output limits and adds reasoning headroom.
111
+ */
60
112
  function tokenParam(model, limit) {
61
- const effectiveLimit = isReasoningModel(model) ? Math.max(limit * 4, 4096) : limit;
113
+ const maxOutput = _getOutputLimit(model);
114
+
115
+ // Reasoning models get 4x to accommodate chain-of-thought, but capped at model max
116
+ let effectiveLimit = isReasoningModel(model) ? Math.max(limit * 4, 4096) : limit;
117
+ effectiveLimit = Math.min(effectiveLimit, maxOutput);
118
+
119
+ // Anthropic uses max_tokens directly (handled in _anthropicCall)
120
+ if (isAnthropicModel(model)) return { max_tokens: effectiveLimit };
62
121
  if (isResponsesModel(model)) return { max_output_tokens: effectiveLimit };
63
122
  const usesNewParam = /^(o[1-9]|gpt-5|gpt-4o)/.test(model) || model.includes("nano");
64
123
  if (usesNewParam) return { max_completion_tokens: effectiveLimit };
65
- return { max_tokens: limit };
124
+ return { max_tokens: effectiveLimit };
66
125
  }
67
126
 
68
127
  // ── Unified AI Call ──
@@ -121,10 +180,11 @@ async function aiCallWithHistory({ model, messages, tools, maxTokens = 4096, cat
121
180
 
122
181
  async function _anthropicCall({ model, systemPrompt, userPrompt, maxTokens, tools, toolChoice }) {
123
182
  const client = _getAnthropicClient();
183
+ const outputLimit = Math.min(maxTokens, _getOutputLimit(model));
124
184
 
125
185
  const params = {
126
186
  model,
127
- max_tokens: maxTokens,
187
+ max_tokens: outputLimit,
128
188
  messages: [{ role: "user", content: userPrompt }],
129
189
  };
130
190
 
@@ -203,9 +263,10 @@ async function _anthropicCallWithHistory({ model, messages, tools, maxTokens })
203
263
  }
204
264
  }
205
265
 
266
+ const outputLimit = Math.min(maxTokens, _getOutputLimit(model));
206
267
  const params = {
207
268
  model,
208
- max_tokens: maxTokens,
269
+ max_tokens: outputLimit,
209
270
  messages: merged,
210
271
  };
211
272
 
@@ -131,7 +131,14 @@ function bootProbe(scriptPath, cwd, originalErrorSignature) {
131
131
  * @param {object} routeContext — optional { path, method } for route-level testing
132
132
  */
133
133
  async function verifyFix(scriptPath, cwd, originalErrorSignature, routeContext) {
134
- const steps = routeContext?.path ? 3 : 2;
134
+ // Simple errors (TypeError, ReferenceError, SyntaxError) — trust syntax+boot, skip route probe.
135
+ // If the fix is wrong, ErrorMonitor will catch the 500 and re-trigger heal. This avoids
136
+ // the expensive cascade where a working fix gets rolled back because the route probe
137
+ // can't boot the full server in isolation.
138
+ const isSimpleError = /TypeError|ReferenceError|SyntaxError|Cannot find module/.test(originalErrorSignature || "");
139
+ const skipRouteProbe = isSimpleError;
140
+ const steps = (!skipRouteProbe && routeContext?.path) ? 3 : 2;
141
+
135
142
  console.log(chalk.yellow("\nšŸ”¬ Verifying fix...\n"));
136
143
 
137
144
  // Step 1: Syntax check
@@ -157,8 +164,9 @@ async function verifyFix(scriptPath, cwd, originalErrorSignature, routeContext)
157
164
  }
158
165
  console.log(chalk.green(" āœ… Process booted successfully"));
159
166
 
160
- // Step 3: Route probe (if we know which route was failing)
161
- if (routeContext?.path) {
167
+ // Step 3: Route probe — only for complex errors (not simple TypeError/ReferenceError)
168
+ // Simple errors: trust syntax+boot. ErrorMonitor is the safety net.
169
+ if (!skipRouteProbe && routeContext?.path) {
162
170
  console.log(chalk.gray(` [3/${steps}] Route probe: ${routeContext.method || "GET"} ${routeContext.path}...`));
163
171
  const routeResult = await routeProbe(scriptPath, cwd, routeContext);
164
172
  if (routeResult.status === "failed") {
@@ -170,6 +178,8 @@ async function verifyFix(scriptPath, cwd, originalErrorSignature, routeContext)
170
178
  } else {
171
179
  console.log(chalk.gray(` āš ļø Route probe skipped: ${routeResult.reason || "unknown"}`));
172
180
  }
181
+ } else if (skipRouteProbe && routeContext?.path) {
182
+ console.log(chalk.gray(` ⚔ Skipping route probe (simple error — ErrorMonitor is safety net)`));
173
183
  }
174
184
 
175
185
  return { verified: true, status: "fixed" };
@@ -203,7 +203,22 @@ async function _healImpl({ stderr, cwd, sandbox, notifier, rateLimiter, backupMa
203
203
  } catch { /* non-fatal */ }
204
204
  }
205
205
 
206
- // 6. Research — check past attempts to avoid loops
206
+ // 6. Check brain for cached fix — if we fixed this exact error before, replay it (zero tokens)
207
+ if (brain && brain._initialized && hasFile && repairHistory) {
208
+ try {
209
+ const pastRepairs = repairHistory.getAll().filter(r =>
210
+ r.success && r.file === parsed.filePath && r.error &&
211
+ parsed.errorMessage.includes(r.error.split(":").pop()?.trim()?.slice(0, 30))
212
+ );
213
+ if (pastRepairs.length > 0) {
214
+ const cached = pastRepairs[pastRepairs.length - 1];
215
+ console.log(chalk.gray(` 🧠 Found cached fix for similar error (${cached.mode}, ${cached.id})`));
216
+ if (logger) logger.info("heal.cached", `Cached fix found: ${cached.resolution?.slice(0, 80)}`, { cachedId: cached.id });
217
+ }
218
+ } catch {}
219
+ }
220
+
221
+ // 7. Research — check past attempts to avoid loops
207
222
  const researcher = new ResearchAgent({ brain, logger });
208
223
  let researchContext = "";
209
224
  try {
@@ -211,24 +226,38 @@ async function _healImpl({ stderr, cwd, sandbox, notifier, rateLimiter, backupMa
211
226
  if (researchContext) console.log(chalk.gray(` šŸ” Research: found past context for this error`));
212
227
  } catch {}
213
228
 
214
- // 7. Goal Loop — set goal, iterate until fixed or exhausted
215
- // Iteration 1: fast path (CODING_MODEL)
216
- // Iteration 2: agent path (REASONING_MODEL)
217
- // Iteration 3: deep research (RESEARCH_MODEL) + agent retry
229
+ // 7b. Token budget by error complexity — simple bugs get tight caps
230
+ const isSimpleError = /TypeError|ReferenceError|SyntaxError|Cannot find module/.test(parsed.errorMessage);
231
+ const isModerateError = /ECONNREFUSED|timeout|ENOENT|EACCES|EADDRINUSE/.test(parsed.errorMessage);
232
+ const tokenBudget = isSimpleError
233
+ ? { fast: 5000, agent: 20000, subAgent: 15000 }
234
+ : isModerateError
235
+ ? { fast: 10000, agent: 50000, subAgent: 30000 }
236
+ : { fast: 15000, agent: 100000, subAgent: 50000 };
237
+ console.log(chalk.gray(` šŸ’° Token budget: ${isSimpleError ? "simple" : isModerateError ? "moderate" : "complex"} (agent: ${tokenBudget.agent})`));
238
+
239
+ // 8. Goal Loop — set goal, iterate until fixed or exhausted
218
240
  const loop = new GoalLoop({
219
241
  maxIterations: parseInt(process.env.WOLVERINE_MAX_RETRIES, 10) || 3,
220
242
  researcher,
221
243
  logger,
222
244
  goal: `Fix: ${parsed.errorMessage.slice(0, 80)}`,
223
245
 
224
- onAttempt: async (iteration, researchCtx) => {
246
+ onAttempt: async (iteration, researchCtx, priorAttempts) => {
225
247
  // Create backup for this attempt
226
- // Full server/ backup — includes all files, configs, databases
227
248
  const bid = backupManager.createBackup(`heal attempt ${iteration}: ${parsed.errorMessage.slice(0, 60)}`);
228
249
  backupManager.setErrorSignature(bid, errorSignature);
229
250
  if (logger) logger.info(EVENT_TYPES.BACKUP_CREATED, `Backup ${bid} (iteration ${iteration})`, { backupId: bid });
230
251
 
231
- const fullContext = [brainContext, researchContext, researchCtx, envContext].filter(Boolean).join("\n");
252
+ // Build concise prior attempt summary instead of full context bleed
253
+ let priorSummary = "";
254
+ if (priorAttempts && priorAttempts.length > 0) {
255
+ priorSummary = "\nPRIOR ATTEMPTS (do NOT repeat):\n" + priorAttempts.map(a =>
256
+ `- Attempt ${a.iteration} (${a.mode}): ${a.explanation?.slice(0, 100)}`
257
+ ).join("\n") + "\n";
258
+ }
259
+
260
+ const fullContext = [brainContext, researchContext, researchCtx, envContext, priorSummary].filter(Boolean).join("\n");
232
261
 
233
262
  let result;
234
263
  if (iteration === 1 && hasFile) {
@@ -291,8 +320,8 @@ async function _healImpl({ stderr, cwd, sandbox, notifier, rateLimiter, backupMa
291
320
  console.log(chalk.magenta(` šŸ¤– Agent path (${getModel("reasoning")})...`));
292
321
  const agent = new AgentEngine({
293
322
  sandbox, logger, cwd, mcp,
294
- maxTurns: 8,
295
- maxTokens: 25000,
323
+ maxTurns: isSimpleError ? 4 : 8,
324
+ maxTokens: tokenBudget.agent,
296
325
  });
297
326
 
298
327
  const agentResult = await agent.run({