wolverine-ai 4.0.4 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wolverine-ai",
3
- "version": "4.0.4",
3
+ "version": "4.1.0",
4
4
  "description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -133,6 +133,10 @@ const SEED_DOCS = [
133
133
  text: "Configuration: hybrid-always architecture — no provider selection. Users pick the best model for each of 8 task roles directly in settings.json 'models' section. Mix and match: wolverine for audit, claude for reasoning, gpt for coding. Provider auto-detected from model name. Embedding is separate ('embedding' key) — always wolverine-embedding-1 billed through credits (proxies to text-embedding-3-small at 2x markup). Secrets in .env.local. Config priority: env vars > settings.json > defaults.",
134
134
  metadata: { topic: "configuration" },
135
135
  },
136
+ {
137
+ text: "AI client prompt caching: all 3 providers cache automatically. Anthropic: system prompt marked cache_control:ephemeral, 90% cheaper on repeat calls within 5 min TTL. OpenAI: automatic prefix caching for >=1024 token prefixes, 50% cheaper on cached input, tracked via usage.prompt_tokens_details.cached_tokens. Wolverine/llama.cpp: cache_prompt:true in request body reuses KV cache for identical prefixes between requests, near-zero TTFT on second+ call in a heal pipeline. Cache savings tracked in analytics: cacheCreation (tokens written to cache) and cacheRead (tokens served from cache).",
138
+ metadata: { topic: "prompt-caching" },
139
+ },
136
140
  {
137
141
  text: "Platform telemetry: lightweight background process, zero-config. Default platform: api.wolverinenode.xyz. Auto-registers on first run (retries every 60s until platform responds), saves key to .wolverine/platform-key. Heartbeat payload matches PLATFORM.md spec: instanceId, server (name/port/uptime/status/pid), process (memoryMB/cpuPercent), routes, repairs, usage (tokens/cost/calls/byCategory), brain, backups. Offline-resilient: queues up to 1440 heartbeats locally, drains on reconnect. No chalk dependency, cached version/key in memory, minimal IO. Opt out: WOLVERINE_TELEMETRY=false. Override URL: WOLVERINE_PLATFORM_URL.",
138
142
  metadata: { topic: "platform-telemetry" },
@@ -21,8 +21,9 @@ function _extractTokens(usage) {
21
21
  output: usage.completion_tokens || usage.output_tokens || 0,
22
22
  // Anthropic cache fields
23
23
  cacheCreation: usage.cache_creation_input_tokens || usage.cache_write_tokens || 0,
24
- // OpenAI uses cache_read_tokens, Anthropic uses cache_read_input_tokens
25
- cacheRead: usage.cache_read_input_tokens || usage.cache_read_tokens || 0,
24
+ // OpenAI prompt_tokens_details.cached_tokens + Anthropic cache_read_input_tokens
25
+ cacheRead: usage.cache_read_input_tokens || usage.cache_read_tokens
26
+ || usage.prompt_tokens_details?.cached_tokens || 0,
26
27
  };
27
28
  }
28
29
 
@@ -520,19 +521,20 @@ async function _chatCall(openai, { model, systemPrompt, userPrompt, maxTokens, t
520
521
  if (systemPrompt) messages.push({ role: "system", content: systemPrompt });
521
522
  messages.push({ role: "user", content: userPrompt });
522
523
 
523
- // No temperature for o-series and gpt-5+ (forbidden, causes error)
524
524
  const noTemp = /^(o[1-9]|gpt-5)/.test(model);
525
+ const isWolverine = detectProvider(model) === "wolverine";
525
526
  const params = {
526
527
  model, messages,
527
528
  ...(!noTemp ? { temperature: 0 } : {}),
528
529
  ...tokenParam(model, maxTokens),
529
530
  ..._reasoningParams(model),
531
+ // Prompt caching: llama.cpp reuses KV cache for identical prefixes
532
+ ...(isWolverine ? { cache_prompt: true } : {}),
530
533
  };
531
534
 
532
535
  if (tools && tools.length > 0) {
533
536
  params.tools = tools;
534
537
  params.tool_choice = toolChoice || "auto";
535
- // Disable parallel calls for reliability — sequential is more predictable for healing
536
538
  params.parallel_tool_calls = false;
537
539
  }
538
540
 
@@ -589,11 +591,14 @@ async function _responsesCallWithHistory(openai, { model, messages, tools, maxTo
589
591
 
590
592
  async function _chatCallWithHistory(openai, { model, messages, tools, maxTokens }) {
591
593
  const noTemp = /^(o[1-9]|gpt-5)/.test(model);
594
+ const isWolverine = detectProvider(model) === "wolverine";
592
595
  const params = {
593
596
  model, messages,
594
597
  ...(!noTemp ? { temperature: 0 } : {}),
595
598
  ...tokenParam(model, maxTokens),
596
599
  ..._reasoningParams(model),
600
+ // Prompt caching: llama.cpp KV cache reuse for multi-turn agent conversations
601
+ ...(isWolverine ? { cache_prompt: true } : {}),
597
602
  };
598
603
  if (tools && tools.length > 0) {
599
604
  params.tools = tools;
@@ -459,6 +459,13 @@ class WolverineRunner {
459
459
  return;
460
460
  }
461
461
 
462
+ // #28: SIGKILL = likely OOM — synthesize useful stderr for the heal pipeline
463
+ if (signal === "SIGKILL" && (!this._stderrBuffer.trim() || this._stderrBuffer.trim().length < 10)) {
464
+ this._stderrBuffer = `Process killed by SIGKILL (possible OOM). Memory limit may have been exceeded. Check memory usage patterns and reduce memory consumption.\nExit code: ${code}, Signal: ${signal}`;
465
+ console.log(chalk.red(`\n💀 Process killed by SIGKILL (possible OOM)`));
466
+ this.logger.error(EVENT_TYPES.PROCESS_CRASH, "SIGKILL — possible OOM", { exitCode: code, signal });
467
+ }
468
+
462
469
  // Killed by signal with no stderr — just restart, don't waste tokens healing
463
470
  if (!this._stderrBuffer.trim() || this._stderrBuffer.trim().length < 10) {
464
471
  console.log(chalk.yellow(`\n⚠️ Process killed (code: ${code}, signal: ${signal}) — no error to heal, restarting`));
@@ -483,13 +490,28 @@ class WolverineRunner {
483
490
  }
484
491
 
485
492
  this.retryCount++;
486
- await this._healAndRestart();
493
+ // #3: Guard against unhandled rejections — don't let heal errors crash the parent
494
+ try {
495
+ await this._healAndRestart();
496
+ } catch (healErr) {
497
+ console.log(chalk.red(` ⚠️ Heal error (recovering): ${healErr.message}`));
498
+ this._healInProgress = false;
499
+ this._healStatus = null;
500
+ if (this.running) this._spawn(); // restart without healing
501
+ }
487
502
  });
488
503
 
489
504
  this.child.on("error", (err) => {
490
505
  console.log(chalk.red(`Failed to start process: ${err.message}`));
491
506
  this.logger.error(EVENT_TYPES.PROCESS_CRASH, `Failed to start: ${err.message}`);
492
- this.running = false;
507
+ // #10: Retry spawn after delay instead of permanently dying
508
+ if (this.running && this.retryCount < this.maxRetries) {
509
+ this.retryCount++;
510
+ console.log(chalk.yellow(` Retrying spawn in 5s (attempt ${this.retryCount}/${this.maxRetries})...`));
511
+ setTimeout(() => { if (this.running) this._spawn(); }, 5000);
512
+ } else {
513
+ this.running = false;
514
+ }
493
515
  });
494
516
 
495
517
  // IPC channel: child reports caught 500 errors (Fastify/Express)
@@ -626,9 +648,14 @@ class WolverineRunner {
626
648
  }
627
649
  }
628
650
  } catch (err) {
629
- console.log(chalk.red(`\n🐺 Wolverine encountered an error: ${err.message}`));
651
+ // #4: Don't permanently die on transient errors — restart without healing
652
+ console.log(chalk.red(`\n🐺 Wolverine heal error (recovering): ${err.message}`));
630
653
  this._healInProgress = false;
631
- this.running = false;
654
+ this._healStatus = null;
655
+ if (this.running) {
656
+ console.log(chalk.yellow(" Restarting without healing..."));
657
+ this._spawn();
658
+ }
632
659
  }
633
660
  }
634
661
 
@@ -35,6 +35,17 @@ async function heal(opts) {
35
35
  if (err.message === "timeout") {
36
36
  console.log(chalk.red(`\n🐺 Heal timed out after ${HEAL_TIMEOUT_MS / 1000}s`));
37
37
  if (opts.logger) opts.logger.error(EVENT_TYPES.HEAL_FAILED, `Heal timed out after ${HEAL_TIMEOUT_MS / 1000}s`);
38
+ // #11: Rollback on timeout — the background _healImpl may have partially applied patches
39
+ if (opts.backupManager) {
40
+ try {
41
+ const all = opts.backupManager.getAll();
42
+ const latest = all.find(b => b.status === "unstable");
43
+ if (latest) {
44
+ opts.backupManager.rollbackTo(latest.id);
45
+ console.log(chalk.yellow(` ↩️ Rolled back to ${latest.id} (timeout cleanup)`));
46
+ }
47
+ } catch {}
48
+ }
38
49
  return { healed: false, explanation: `Heal timed out after ${HEAL_TIMEOUT_MS / 1000}s` };
39
50
  }
40
51
  throw err;
@@ -312,7 +323,7 @@ async function _healImpl({ stderr, cwd, sandbox, notifier, rateLimiter, backupMa
312
323
  errorMessage: parsed.errorMessage, stackTrace: parsed.stackTrace,
313
324
  extraContext: envContext,
314
325
  });
315
- rateLimiter.record(errorSignature);
326
+ // #15: Don't record rate limit until AFTER verification — failed attempts shouldn't exhaust the limit
316
327
 
317
328
  // Execute shell commands first (npm install, mkdir, etc.)
318
329
  if (repair.commands && Array.isArray(repair.commands)) {
@@ -351,6 +362,7 @@ async function _healImpl({ stderr, cwd, sandbox, notifier, rateLimiter, backupMa
351
362
  const verification = await verifyFix(parsed.filePath, cwd, errorSignature, routeContext);
352
363
  if (verification.verified) {
353
364
  backupManager.markVerified(bid);
365
+ rateLimiter.record(errorSignature);
354
366
  rateLimiter.clearSignature(errorSignature);
355
367
  // Track tool operations: file read + patch + verify + any commands
356
368
  // These are the same operations an agent would do with read_file/write_file/bash_exec
@@ -360,10 +372,11 @@ async function _healImpl({ stderr, cwd, sandbox, notifier, rateLimiter, backupMa
360
372
  return { healed: true, explanation: repair.explanation, backupId: bid, mode: "fast" };
361
373
  }
362
374
 
363
- backupManager.rollbackTo(bid);
375
+ // #13: Safe rollback — wrap in try/catch to prevent rollback-of-rollback loop
376
+ try { backupManager.rollbackTo(bid); } catch (rbErr) { console.log(chalk.red(` ⚠️ Rollback failed: ${rbErr.message}`)); }
364
377
  return { healed: false, explanation: `Fast path: ${verification.status}` };
365
378
  } catch (err) {
366
- backupManager.rollbackTo(bid);
379
+ try { backupManager.rollbackTo(bid); } catch (rbErr) { console.log(chalk.red(` ⚠️ Rollback failed: ${rbErr.message}`)); }
367
380
  return { healed: false, explanation: `Fast path error: ${err.message}` };
368
381
  }
369
382
  } else if (iteration <= 2) {
@@ -6,14 +6,14 @@
6
6
  },
7
7
 
8
8
  "models": {
9
- "reasoning": "gpt-4o",
10
- "coding": "gpt-4o",
11
- "chat": "gpt-4o-mini",
9
+ "reasoning": "claude-sonnet-4-6",
10
+ "coding": "claude-sonnet-4-6",
11
+ "chat": "gpt-5.4-mini",
12
12
  "tool": "gpt-4o-mini",
13
13
  "classifier": "gpt-4o-mini",
14
- "audit": "gpt-4o-mini",
15
- "compacting": "gpt-4o-mini",
16
- "research": "gpt-4o"
14
+ "audit": "wolverine-test-1",
15
+ "compacting": "wolverine-test-1",
16
+ "research": "claude-sonnet-4-6"
17
17
  },
18
18
 
19
19
  "embedding": "wolverine-embedding-1",