wolverine-ai 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -290,16 +290,26 @@ Secured with `WOLVERINE_ADMIN_KEY` + IP allowlist (localhost + `WOLVERINE_ADMIN_
290
290
 
291
291
  ## 10-Model Configuration (OpenAI + Anthropic)
292
292
 
293
- Every AI task has its own model slot. **Mix and match providers** — set any slot to a `claude-*` model for Anthropic or `gpt-*` for OpenAI. Provider is auto-detected from the model name.
293
+ Every AI task has its own model slot. Three provider presets in `server/config/settings.json`:
294
294
 
295
- ```bash
296
- # .env.local — use Anthropic for reasoning, OpenAI for coding
297
- REASONING_MODEL=claude-sonnet-4-20250514
298
- CODING_MODEL=gpt-5.3-codex
299
- CHAT_MODEL=claude-haiku-4-20250414
300
- AUDIT_MODEL=claude-haiku-4-20250414
295
+ ```json
296
+ {
297
+ "provider": "hybrid", // "openai" | "anthropic" | "hybrid"
298
+ "openai_settings": { ... }, // all OpenAI models
299
+ "anthropic_settings": { ... }, // all Anthropic models
300
+ "hybrid_settings": { // best of both
301
+ "reasoning": "claude-sonnet-4-6",
302
+ "coding": "claude-opus-4-6",
303
+ "tool": "claude-opus-4-6",
304
+ "chat": "claude-haiku-4-5",
305
+ "audit": "gpt-4o-mini", // cheap OpenAI for bulk scans
306
+ "embedding": "text-embedding-3-small" // always OpenAI
307
+ }
308
+ }
301
309
  ```
302
310
 
311
+ Change one line to switch all models: `"provider": "anthropic"`. Or override per-role with env vars.
312
+
303
313
  | Env Variable | Role | Needs Tools? | Example Models |
304
314
  |---|---|---|---|
305
315
  | `REASONING_MODEL` | Multi-file agent | Yes | `claude-sonnet-4`, `gpt-5.4` |
@@ -315,7 +325,8 @@ AUDIT_MODEL=claude-haiku-4-20250414
315
325
  **Notes:**
316
326
  - Embeddings always use OpenAI (Anthropic doesn't have an embedding API)
317
327
  - Tools (all 18) work identically on both providers — normalized at the client level
318
- - Telemetry tracks usage by model AND by provider (`openai` / `anthropic`)
328
+ - Telemetry tracks per-model KPIs: latency, success rate, tokens/sec, cost/call
329
+ - Usage aggregated by model, category, tool, AND provider (`openai` / `anthropic`)
319
330
  - Any future model from either provider works automatically — just set the model name
320
331
 
321
332
  ---
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wolverine-ai",
3
- "version": "2.1.0",
3
+ "version": "2.2.0",
4
4
  "description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -36,9 +36,9 @@
36
36
  "coding": "claude-opus-4-6",
37
37
  "chat": "claude-haiku-4-5",
38
38
  "tool": "claude-opus-4-6",
39
- "classifier": "claude-haiku-4-5",
39
+ "classifier": "gpt-4o-mini",
40
40
  "audit": "gpt-4o-mini",
41
- "compacting": "gpt-4o-mini",
41
+ "compacting": "claude-sonnet-4-6",
42
42
  "research": "claude-sonnet-4-6",
43
43
  "embedding": "text-embedding-3-small"
44
44
  },
@@ -64,11 +64,11 @@ const SEED_DOCS = [
64
64
  metadata: { topic: "verification" },
65
65
  },
66
66
  {
67
- text: "Wolverine multi-file agent: 15-turn agent loop with tools. Can read any file, write any file type (js, json, sql, yaml, env, dockerfile), list directories, and search across the codebase. Used when the fast path single-file fix fails. Tracks token budget (50k max) to control costs.",
67
+ text: "Wolverine multi-file agent: 15-turn agent loop with 18 tools across 7 categories. FILE: read_file (offset/limit), write_file (creates dirs), edit_file (find-and-replace), glob_files (pattern search), grep_code (regex with context), list_dir (directory listing with sizes), move_file (rename/relocate). SHELL: bash_exec (30s default, 60s cap), git_log, git_diff. DATABASE: inspect_db (tables/schema/SELECT on SQLite), run_db_fix (UPDATE/DELETE/ALTER with auto-backup). DIAGNOSTICS: check_port (find what uses a port), check_env (env vars, values redacted). DEPS: audit_deps (full npm health check), check_migration (known upgrade paths). RESEARCH: web_fetch. CONTROL: done. Used when fast path fails. Token budget 50k max.",
68
68
  metadata: { topic: "agent" },
69
69
  },
70
70
  {
71
- text: "Wolverine supports the Responses API for codex models and Chat Completions API for standard models. Auto-detects based on model name. Codex models use openai.responses.create() with input/instructions/tools. Standard models use openai.chat.completions.create() with messages/tools.",
71
+ text: "Wolverine supports dual providers: OpenAI (Chat Completions + Responses API) and Anthropic (Messages API). Provider auto-detected from model name: claude-* Anthropic, gpt-*/o1-*/codex → OpenAI. All responses normalized to same {content, toolCalls, usage} shape — downstream code doesn't know which provider was used. Tool definitions auto-converted between formats. Every call tracked with latencyMs, success/failure, input/output tokens. Three provider modes in settings.json: openai_settings, anthropic_settings, hybrid_settings (Anthropic for heavy tasks, OpenAI for cheap tasks + embeddings).",
72
72
  metadata: { topic: "api-support" },
73
73
  },
74
74
  {
@@ -108,7 +108,7 @@ const SEED_DOCS = [
108
108
  metadata: { topic: "sub-agent-workflow" },
109
109
  },
110
110
  {
111
- text: "Sub-agent tool restrictions (claw-code pattern): explore gets read_file/glob/grep/git. plan gets read_file/glob/grep/brain. fix gets read_file/write_file/edit_file/glob/grep/bash_exec (bash_exec for npm install, chmod, config creation — not all errors are code bugs). verify gets read_file/glob/grep/bash. research gets read_file/grep/web_fetch/brain. security gets read_file/glob/grep. database gets read_file/write_file/edit_file/glob/grep/bash. No agent gets tools it doesn't need.",
111
+ text: "Sub-agent tool restrictions: explore gets read_file/glob/grep/git_log/git_diff/list_dir/check_env/check_port/inspect_db/audit_deps. plan gets read_file/glob/grep/list_dir/inspect_db/check_env/audit_deps/check_migration/search_brain. fix gets read_file/write_file/edit_file/glob/grep/bash_exec/move_file/run_db_fix/audit_deps. verify gets read_file/glob/grep/bash_exec/inspect_db/check_port. research gets read_file/grep/web_fetch/search_brain. security gets read_file/glob/grep/inspect_db. database gets read_file/write_file/edit_file/glob/grep/bash_exec/inspect_db/run_db_fix. Each type gets only the tools it needs.",
112
112
  metadata: { topic: "sub-agent-tools" },
113
113
  },
114
114
  {
@@ -160,7 +160,7 @@ const SEED_DOCS = [
160
160
  metadata: { topic: "smart-edit" },
161
161
  },
162
162
  {
163
- text: "Token tracking: every AI call tracked with input/output tokens + USD cost. Categories: heal, develop, chat, security, classify, research, brain. Tracked by model, by category, by tool. Persisted to .wolverine/usage.json (aggregates) and .wolverine/usage-history.jsonl (full timeline). Auto-saves on every call. Dashboard shows charts + cost breakdowns. Pricing from src/logger/pricing.js, customizable via .wolverine/pricing.json.",
163
+ text: "Token tracking: every AI call tracked with input/output tokens, USD cost, latencyMs, and success/failure. Per-model KPIs: avgLatencyMs, minLatencyMs, maxLatencyMs, tokensPerSecond, successRate, costPerCall, successes, failures. Aggregated by model, by category, by tool, by provider (openai/anthropic). Persisted to .wolverine/usage.json + .wolverine/usage-history.jsonl. Pricing includes both OpenAI (gpt-5.x, o4, codex) and Anthropic (opus-4, sonnet-4, haiku-4) families. Customizable via .wolverine/pricing.json. Telemetry sends all per-model performance data to platform backend for fleet-wide cost:speed:reliability analysis.",
164
164
  metadata: { topic: "token-tracking" },
165
165
  },
166
166
  {
@@ -184,7 +184,7 @@ const SEED_DOCS = [
184
184
  metadata: { topic: "demos" },
185
185
  },
186
186
  {
187
- text: "10 configurable models: REASONING_MODEL (multi-file agent), CODING_MODEL (code repair, Responses API for codex), CHAT_MODEL (simple text), TOOL_MODEL (function calling), CLASSIFIER_MODEL (routing), AUDIT_MODEL (injection detection), COMPACTING_MODEL (brain text compression), RESEARCH_MODEL (deep research), TEXT_EMBEDDING_MODEL (vectors). All in server/config/settings.json. Reasoning models auto-get 4x token limits for chain-of-thought.",
187
+ text: "10 model slots configurable per provider. settings.json has 3 presets: openai_settings, anthropic_settings, hybrid_settings. Set 'provider' to switch all at once. Slots: REASONING_MODEL (agent), CODING_MODEL (repair), CHAT_MODEL (text), TOOL_MODEL (function calling), CLASSIFIER_MODEL (routing), AUDIT_MODEL (injection), COMPACTING_MODEL (brain), RESEARCH_MODEL (deep research), TEXT_EMBEDDING_MODEL (vectors, always OpenAI). Hybrid mode uses Anthropic for heavy tasks (reasoning/coding/tool/research) and OpenAI for cheap tasks (audit/compacting/embedding). Every call tracked per-model with latencyMs, successRate, tokensPerSecond, costPerCall for performance comparison.",
188
188
  metadata: { topic: "model-slots" },
189
189
  },
190
190
  {
@@ -216,8 +216,8 @@ const SEED_DOCS = [
216
216
  metadata: { topic: "error-monitor" },
217
217
  },
218
218
  {
219
- text: "Agent tool harness v2: 16 built-in tools. FILE: read_file, write_file, edit_file, glob_files, grep_code, list_dir, move_file. SHELL: bash_exec, git_log, git_diff. DATABASE: inspect_db (list tables, show schema, run SELECT), run_db_fix (UPDATE/DELETE/INSERT/ALTER with auto-backup). DIAGNOSTICS: check_port (find what's using a port), check_env (list/check env vars, values redacted). RESEARCH: web_fetch. COMPLETION: done. Sub-agents get restricted sets: explorer gets diagnostics (list_dir, check_env, check_port, inspect_db), fixer gets action tools (bash_exec, move_file, run_db_fix), verifier gets inspection tools.",
220
- metadata: { topic: "agent-tools-v2" },
219
+ text: "Agent tool details: read_file supports offset/limit for large files. edit_file does surgical find-and-replace (preferred for small fixes). glob_files discovers files by pattern (**/*.js). grep_code does regex search with context lines. list_dir shows directory contents with file sizes. move_file relocates/renames files. bash_exec runs shell commands (30s default timeout, 60s hard cap, dangerous commands blocked: rm -rf /, git push --force, npm publish). inspect_db reads SQLite: action=tables (list), action=schema (CREATE statements), action=query (SELECT/PRAGMA only). run_db_fix writes SQLite: UPDATE/DELETE/INSERT/ALTER, auto-backs up db file first. check_port finds what process is using a port (netstat/lsof). check_env lists environment variables with values redacted. audit_deps runs full npm health check (vulnerabilities, outdated, peer deps, unused, lock file). check_migration returns known upgrade paths with before/after code patterns. web_fetch retrieves URL content.",
220
+ metadata: { topic: "agent-tools-detail" },
221
221
  },
222
222
  {
223
223
  text: "Server problem categories the agent can fix: CODE BUGS (SyntaxError, TypeError, ReferenceError → edit_file), DEPENDENCIES (Cannot find module → npm install, corrupted node_modules → rm + reinstall), DATABASE (invalid entries → run_db_fix UPDATE, missing table → CREATE TABLE, schema mismatch → ALTER TABLE, constraint violation → fix data or schema), CONFIG (invalid JSON → edit_file, missing env vars → write .env, wrong port → edit config), FILESYSTEM (misplaced files → move_file, missing directories → bash_exec mkdir, wrong permissions → chmod), NETWORK (port conflict → check_port + kill, service down → restart, connection refused → check config), STATE (corrupted cache → delete + restart, stale locks → remove lock file, git conflicts → resolve markers), IDEMPOTENCY (double-fire → add idempotencyGuard middleware, missing idempotency key → add X-Idempotency-Key header support, duplicate DB entries → add UNIQUE constraint or use db.idempotent()). The agent investigates before fixing — reads files, checks directories, inspects databases, never guesses.",
@@ -16,10 +16,10 @@ function _extractTokens(usage) {
16
16
  };
17
17
  }
18
18
 
19
- function _track(model, category, usage, tool) {
19
+ function _track(model, category, usage, tool, latencyMs, success) {
20
20
  if (!_tracker) return;
21
21
  const { input, output } = _extractTokens(usage);
22
- _tracker.record(model, category, input, output, tool);
22
+ _tracker.record(model, category, input, output, tool, latencyMs, success);
23
23
  }
24
24
 
25
25
  // ── Client Management ──
@@ -70,34 +70,50 @@ function tokenParam(model, limit) {
70
70
 
71
71
  async function aiCall({ model, systemPrompt, userPrompt, maxTokens = 2048, tools, toolChoice, category = "chat", tool }) {
72
72
  const provider = detectProvider(model);
73
+ const startMs = Date.now();
73
74
  let result;
74
75
 
75
- if (provider === "anthropic") {
76
- result = await _anthropicCall({ model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
77
- } else if (isResponsesModel(model)) {
78
- result = await _responsesCall(_getOpenAIClient(), { model, systemPrompt, userPrompt, maxTokens, tools });
79
- } else {
80
- result = await _chatCall(_getOpenAIClient(), { model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
81
- }
76
+ try {
77
+ if (provider === "anthropic") {
78
+ result = await _anthropicCall({ model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
79
+ } else if (isResponsesModel(model)) {
80
+ result = await _responsesCall(_getOpenAIClient(), { model, systemPrompt, userPrompt, maxTokens, tools });
81
+ } else {
82
+ result = await _chatCall(_getOpenAIClient(), { model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
83
+ }
82
84
 
83
- _track(model, category, result.usage, tool);
84
- return result;
85
+ const latencyMs = Date.now() - startMs;
86
+ _track(model, category, result.usage, tool, latencyMs, true);
87
+ return result;
88
+ } catch (err) {
89
+ const latencyMs = Date.now() - startMs;
90
+ _track(model, category, {}, tool, latencyMs, false);
91
+ throw err;
92
+ }
85
93
  }
86
94
 
87
95
  async function aiCallWithHistory({ model, messages, tools, maxTokens = 4096, category = "chat", tool }) {
88
96
  const provider = detectProvider(model);
97
+ const startMs = Date.now();
89
98
  let result;
90
99
 
91
- if (provider === "anthropic") {
92
- result = await _anthropicCallWithHistory({ model, messages, tools, maxTokens });
93
- } else if (isResponsesModel(model)) {
94
- result = await _responsesCallWithHistory(_getOpenAIClient(), { model, messages, tools, maxTokens });
95
- } else {
96
- result = await _chatCallWithHistory(_getOpenAIClient(), { model, messages, tools, maxTokens });
97
- }
100
+ try {
101
+ if (provider === "anthropic") {
102
+ result = await _anthropicCallWithHistory({ model, messages, tools, maxTokens });
103
+ } else if (isResponsesModel(model)) {
104
+ result = await _responsesCallWithHistory(_getOpenAIClient(), { model, messages, tools, maxTokens });
105
+ } else {
106
+ result = await _chatCallWithHistory(_getOpenAIClient(), { model, messages, tools, maxTokens });
107
+ }
98
108
 
99
- _track(model, category, result.usage, tool);
100
- return result;
109
+ const latencyMs = Date.now() - startMs;
110
+ _track(model, category, result.usage, tool, latencyMs, true);
111
+ return result;
112
+ } catch (err) {
113
+ const latencyMs = Date.now() - startMs;
114
+ _track(model, category, {}, tool, latencyMs, false);
115
+ throw err;
116
+ }
101
117
  }
102
118
 
103
119
  // ── Anthropic Implementation ──
@@ -64,9 +64,8 @@ class TokenTracker {
64
64
  * @param {number} outputTokens - Completion/output tokens
65
65
  * @param {string} tool - Optional tool name (e.g. "call_endpoint /time")
66
66
  */
67
- record(model, category, inputTokens, outputTokens, tool) {
67
+ record(model, category, inputTokens, outputTokens, tool, latencyMs, success) {
68
68
  const total = (inputTokens || 0) + (outputTokens || 0);
69
- if (total === 0) return;
70
69
 
71
70
  // Calculate USD cost
72
71
  const cost = calculateCost(model, inputTokens || 0, outputTokens || 0);
@@ -78,17 +77,26 @@ class TokenTracker {
78
77
  input: inputTokens || 0,
79
78
  output: outputTokens || 0,
80
79
  total,
81
- cost: Math.round(cost.total * 1000000) / 1000000, // 6 decimal places
80
+ cost: Math.round(cost.total * 1000000) / 1000000,
82
81
  tool: tool || null,
82
+ latencyMs: latencyMs || 0,
83
+ success: success !== false,
83
84
  };
84
85
 
85
86
  // Accumulate by model
86
- if (!this._byModel[model]) this._byModel[model] = { input: 0, output: 0, total: 0, calls: 0, cost: 0 };
87
- this._byModel[model].input += entry.input;
88
- this._byModel[model].output += entry.output;
89
- this._byModel[model].total += total;
90
- this._byModel[model].calls++;
91
- this._byModel[model].cost += cost.total;
87
+ if (!this._byModel[model]) this._byModel[model] = { input: 0, output: 0, total: 0, calls: 0, cost: 0, successes: 0, failures: 0, totalLatencyMs: 0, minLatencyMs: Infinity, maxLatencyMs: 0 };
88
+ const m = this._byModel[model];
89
+ m.input += entry.input;
90
+ m.output += entry.output;
91
+ m.total += total;
92
+ m.calls++;
93
+ m.cost += cost.total;
94
+ if (entry.success) m.successes++; else m.failures++;
95
+ if (latencyMs > 0) {
96
+ m.totalLatencyMs += latencyMs;
97
+ if (latencyMs < m.minLatencyMs) m.minLatencyMs = latencyMs;
98
+ if (latencyMs > m.maxLatencyMs) m.maxLatencyMs = latencyMs;
99
+ }
92
100
 
93
101
  // Accumulate by category
94
102
  if (!this._byCategory[category]) this._byCategory[category] = { input: 0, output: 0, total: 0, calls: 0, cost: 0 };
@@ -142,7 +150,7 @@ class TokenTracker {
142
150
  duration: sessionDuration,
143
151
  tokensPerMinute,
144
152
  },
145
- byModel: this._byModel,
153
+ byModel: this._formatModelStats(),
146
154
  byCategory: this._byCategory,
147
155
  byTool: this._byTool,
148
156
  // Recent in-memory timeline
@@ -153,10 +161,37 @@ class TokenTracker {
153
161
  output: e.output,
154
162
  cat: e.category,
155
163
  model: e.model,
164
+ latencyMs: e.latencyMs || 0,
165
+ success: e.success !== false,
156
166
  })),
157
167
  };
158
168
  }
159
169
 
170
+ /**
171
+ * Format model stats with computed performance metrics.
172
+ */
173
+ _formatModelStats() {
174
+ const result = {};
175
+ for (const [model, m] of Object.entries(this._byModel)) {
176
+ result[model] = {
177
+ input: m.input,
178
+ output: m.output,
179
+ total: m.total,
180
+ calls: m.calls,
181
+ cost: m.cost,
182
+ successes: m.successes || m.calls, // backwards compat
183
+ failures: m.failures || 0,
184
+ successRate: m.calls > 0 ? Math.round(((m.successes || m.calls) / m.calls) * 100) : 0,
185
+ avgLatencyMs: m.calls > 0 && m.totalLatencyMs ? Math.round(m.totalLatencyMs / m.calls) : 0,
186
+ minLatencyMs: m.minLatencyMs === Infinity ? 0 : (m.minLatencyMs || 0),
187
+ maxLatencyMs: m.maxLatencyMs || 0,
188
+ tokensPerSecond: m.totalLatencyMs > 0 ? Math.round((m.total / (m.totalLatencyMs / 1000)) * 10) / 10 : 0,
189
+ costPerCall: m.calls > 0 ? Math.round((m.cost / m.calls) * 1000000) / 1000000 : 0,
190
+ };
191
+ }
192
+ return result;
193
+ }
194
+
160
195
  /**
161
196
  * Load full history from JSONL file. For dashboard charts across sessions.
162
197
  * @param {number} limit — max entries to return (default: 500)