wolverine-ai 4.0.3 → 4.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/brain/brain.js +4 -0
- package/src/brain/embedder.js +2 -2
- package/src/core/ai-client.js +11 -6
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "wolverine-ai",
|
|
3
|
-
"version": "4.0.
|
|
3
|
+
"version": "4.0.5",
|
|
4
4
|
"description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
package/src/brain/brain.js
CHANGED
|
@@ -133,6 +133,10 @@ const SEED_DOCS = [
|
|
|
133
133
|
text: "Configuration: hybrid-always architecture — no provider selection. Users pick the best model for each of 8 task roles directly in settings.json 'models' section. Mix and match: wolverine for audit, claude for reasoning, gpt for coding. Provider auto-detected from model name. Embedding is separate ('embedding' key) — always wolverine-embedding-1 billed through credits (proxies to text-embedding-3-small at 2x markup). Secrets in .env.local. Config priority: env vars > settings.json > defaults.",
|
|
134
134
|
metadata: { topic: "configuration" },
|
|
135
135
|
},
|
|
136
|
+
{
|
|
137
|
+
text: "AI client prompt caching: all 3 providers cache automatically. Anthropic: system prompt marked cache_control:ephemeral, 90% cheaper on repeat calls within 5 min TTL. OpenAI: automatic prefix caching for >=1024 token prefixes, 50% cheaper on cached input, tracked via usage.prompt_tokens_details.cached_tokens. Wolverine/llama.cpp: cache_prompt:true in request body reuses KV cache for identical prefixes between requests, near-zero TTFT on second+ call in a heal pipeline. Cache savings tracked in analytics: cacheCreation (tokens written to cache) and cacheRead (tokens served from cache).",
|
|
138
|
+
metadata: { topic: "prompt-caching" },
|
|
139
|
+
},
|
|
136
140
|
{
|
|
137
141
|
text: "Platform telemetry: lightweight background process, zero-config. Default platform: api.wolverinenode.xyz. Auto-registers on first run (retries every 60s until platform responds), saves key to .wolverine/platform-key. Heartbeat payload matches PLATFORM.md spec: instanceId, server (name/port/uptime/status/pid), process (memoryMB/cpuPercent), routes, repairs, usage (tokens/cost/calls/byCategory), brain, backups. Offline-resilient: queues up to 1440 heartbeats locally, drains on reconnect. No chalk dependency, cached version/key in memory, minimal IO. Opt out: WOLVERINE_TELEMETRY=false. Override URL: WOLVERINE_PLATFORM_URL.",
|
|
138
142
|
metadata: { topic: "platform-telemetry" },
|
package/src/brain/embedder.js
CHANGED
|
@@ -51,7 +51,7 @@ async function embed(text) {
|
|
|
51
51
|
response = await client.embeddings.create({ model, input: text });
|
|
52
52
|
} catch (err) {
|
|
53
53
|
// If wolverine proxy is down (startup, crash loop), fall back to OpenAI direct
|
|
54
|
-
if (provider === "wolverine" && /ECONNREFUSED|ECONNRESET|ETIMEDOUT|fetch failed/i.test(err.message || "")) {
|
|
54
|
+
if (provider === "wolverine" && /ECONNREFUSED|ECONNRESET|ETIMEDOUT|fetch failed|Connection error/i.test(err.message || "")) {
|
|
55
55
|
const directClient = getClient("openai");
|
|
56
56
|
response = await directClient.embeddings.create({ model: "text-embedding-3-small", input: text });
|
|
57
57
|
} else {
|
|
@@ -98,7 +98,7 @@ async function embedBatch(texts) {
|
|
|
98
98
|
try {
|
|
99
99
|
response = await client.embeddings.create({ model, input: uncached });
|
|
100
100
|
} catch (err) {
|
|
101
|
-
if (provider === "wolverine" && /ECONNREFUSED|ECONNRESET|ETIMEDOUT|fetch failed/i.test(err.message || "")) {
|
|
101
|
+
if (provider === "wolverine" && /ECONNREFUSED|ECONNRESET|ETIMEDOUT|fetch failed|Connection error/i.test(err.message || "")) {
|
|
102
102
|
const directClient = getClient("openai");
|
|
103
103
|
response = await directClient.embeddings.create({ model: "text-embedding-3-small", input: uncached });
|
|
104
104
|
} else {
|
package/src/core/ai-client.js
CHANGED
|
@@ -21,8 +21,9 @@ function _extractTokens(usage) {
|
|
|
21
21
|
output: usage.completion_tokens || usage.output_tokens || 0,
|
|
22
22
|
// Anthropic cache fields
|
|
23
23
|
cacheCreation: usage.cache_creation_input_tokens || usage.cache_write_tokens || 0,
|
|
24
|
-
// OpenAI
|
|
25
|
-
cacheRead: usage.cache_read_input_tokens || usage.cache_read_tokens
|
|
24
|
+
// OpenAI prompt_tokens_details.cached_tokens + Anthropic cache_read_input_tokens
|
|
25
|
+
cacheRead: usage.cache_read_input_tokens || usage.cache_read_tokens
|
|
26
|
+
|| usage.prompt_tokens_details?.cached_tokens || 0,
|
|
26
27
|
};
|
|
27
28
|
}
|
|
28
29
|
|
|
@@ -232,7 +233,7 @@ async function aiCall({ model, systemPrompt, userPrompt, maxTokens = 2048, tools
|
|
|
232
233
|
result = await _chatCall(_getWolverineClient(), { model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
|
|
233
234
|
} catch (proxyErr) {
|
|
234
235
|
// If billing proxy is down (server crashing), fall back to direct GPU
|
|
235
|
-
const isConnErr = /ECONNREFUSED|ECONNRESET|ETIMEDOUT|fetch failed/i.test(proxyErr.message || "");
|
|
236
|
+
const isConnErr = /ECONNREFUSED|ECONNRESET|ETIMEDOUT|fetch failed|Connection error/i.test(proxyErr.message || "");
|
|
236
237
|
const directClient = _getWolverineDirectClient();
|
|
237
238
|
if (isConnErr && directClient) {
|
|
238
239
|
console.log(chalk.yellow(" ⚠️ Billing proxy down — using direct GPU (unbilled)"));
|
|
@@ -269,7 +270,7 @@ async function aiCallWithHistory({ model, messages, tools, maxTokens = 4096, cat
|
|
|
269
270
|
try {
|
|
270
271
|
result = await _chatCallWithHistory(_getWolverineClient(), { model, messages, tools, maxTokens });
|
|
271
272
|
} catch (proxyErr) {
|
|
272
|
-
const isConnErr = /ECONNREFUSED|ECONNRESET|ETIMEDOUT|fetch failed/i.test(proxyErr.message || "");
|
|
273
|
+
const isConnErr = /ECONNREFUSED|ECONNRESET|ETIMEDOUT|fetch failed|Connection error/i.test(proxyErr.message || "");
|
|
273
274
|
const directClient = _getWolverineDirectClient();
|
|
274
275
|
if (isConnErr && directClient) {
|
|
275
276
|
console.log(chalk.yellow(" ⚠️ Billing proxy down — using direct GPU (unbilled)"));
|
|
@@ -520,19 +521,20 @@ async function _chatCall(openai, { model, systemPrompt, userPrompt, maxTokens, t
|
|
|
520
521
|
if (systemPrompt) messages.push({ role: "system", content: systemPrompt });
|
|
521
522
|
messages.push({ role: "user", content: userPrompt });
|
|
522
523
|
|
|
523
|
-
// No temperature for o-series and gpt-5+ (forbidden, causes error)
|
|
524
524
|
const noTemp = /^(o[1-9]|gpt-5)/.test(model);
|
|
525
|
+
const isWolverine = detectProvider(model) === "wolverine";
|
|
525
526
|
const params = {
|
|
526
527
|
model, messages,
|
|
527
528
|
...(!noTemp ? { temperature: 0 } : {}),
|
|
528
529
|
...tokenParam(model, maxTokens),
|
|
529
530
|
..._reasoningParams(model),
|
|
531
|
+
// Prompt caching: llama.cpp reuses KV cache for identical prefixes
|
|
532
|
+
...(isWolverine ? { cache_prompt: true } : {}),
|
|
530
533
|
};
|
|
531
534
|
|
|
532
535
|
if (tools && tools.length > 0) {
|
|
533
536
|
params.tools = tools;
|
|
534
537
|
params.tool_choice = toolChoice || "auto";
|
|
535
|
-
// Disable parallel calls for reliability — sequential is more predictable for healing
|
|
536
538
|
params.parallel_tool_calls = false;
|
|
537
539
|
}
|
|
538
540
|
|
|
@@ -589,11 +591,14 @@ async function _responsesCallWithHistory(openai, { model, messages, tools, maxTo
|
|
|
589
591
|
|
|
590
592
|
async function _chatCallWithHistory(openai, { model, messages, tools, maxTokens }) {
|
|
591
593
|
const noTemp = /^(o[1-9]|gpt-5)/.test(model);
|
|
594
|
+
const isWolverine = detectProvider(model) === "wolverine";
|
|
592
595
|
const params = {
|
|
593
596
|
model, messages,
|
|
594
597
|
...(!noTemp ? { temperature: 0 } : {}),
|
|
595
598
|
...tokenParam(model, maxTokens),
|
|
596
599
|
..._reasoningParams(model),
|
|
600
|
+
// Prompt caching: llama.cpp KV cache reuse for multi-turn agent conversations
|
|
601
|
+
...(isWolverine ? { cache_prompt: true } : {}),
|
|
597
602
|
};
|
|
598
603
|
if (tools && tools.length > 0) {
|
|
599
604
|
params.tools = tools;
|