wolverine-ai 4.0.1 → 4.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wolverine-ai",
3
- "version": "4.0.1",
3
+ "version": "4.0.3",
4
4
  "description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -43,14 +43,21 @@ async function embed(text) {
43
43
 
44
44
  const model = getEmbeddingModel();
45
45
  const provider = detectProvider(model);
46
- // wolverine-embedding-1 routes through billing proxy, others go direct
47
46
  const client = provider === "wolverine" ? getClient("wolverine") : getClient("openai");
48
47
 
49
48
  const startMs = Date.now();
50
- const response = await client.embeddings.create({
51
- model,
52
- input: text,
53
- });
49
+ let response;
50
+ try {
51
+ response = await client.embeddings.create({ model, input: text });
52
+ } catch (err) {
53
+ // If wolverine proxy is down (startup, crash loop), fall back to OpenAI direct
54
+ if (provider === "wolverine" && /ECONNREFUSED|ECONNRESET|ETIMEDOUT|fetch failed/i.test(err.message || "")) {
55
+ const directClient = getClient("openai");
56
+ response = await directClient.embeddings.create({ model: "text-embedding-3-small", input: text });
57
+ } else {
58
+ throw err;
59
+ }
60
+ }
54
61
 
55
62
  const embedding = response.data[0].embedding;
56
63
  _trackEmbedding(model, response.usage, Date.now() - startMs, true);
@@ -87,10 +94,17 @@ async function embedBatch(texts) {
87
94
  const client = provider === "wolverine" ? getClient("wolverine") : getClient("openai");
88
95
 
89
96
  const startMs = Date.now();
90
- const response = await client.embeddings.create({
91
- model,
92
- input: uncached,
93
- });
97
+ let response;
98
+ try {
99
+ response = await client.embeddings.create({ model, input: uncached });
100
+ } catch (err) {
101
+ if (provider === "wolverine" && /ECONNREFUSED|ECONNRESET|ETIMEDOUT|fetch failed/i.test(err.message || "")) {
102
+ const directClient = getClient("openai");
103
+ response = await directClient.embeddings.create({ model: "text-embedding-3-small", input: uncached });
104
+ } else {
105
+ throw err;
106
+ }
107
+ }
94
108
  _trackEmbedding(model, response.usage, Date.now() - startMs, true);
95
109
 
96
110
  // Sort by index to maintain order
@@ -51,12 +51,10 @@ function getClient(provider) {
51
51
  return _getOpenAIClient();
52
52
  }
53
53
 
54
+ let _wolverineDirectClient = null;
55
+
54
56
  function _getWolverineClient() {
55
57
  if (!_wolverineClient) {
56
- // Wolverine inference: always route through billing proxy when API key is set.
57
- // WOLVERINE_API_KEY = billed API key (credits deducted per call)
58
- // WOLVERINE_GPU_KEY = direct GPU access (no billing, admin/internal only)
59
- // Priority: API_KEY (billed) > GPU_KEY (direct) — billing is the default path
60
58
  const apiKey = process.env.WOLVERINE_API_KEY || process.env.WOLVERINE_GPU_KEY || "none";
61
59
  const baseURL = process.env.WOLVERINE_INFERENCE_URL
62
60
  ? process.env.WOLVERINE_INFERENCE_URL + "/v1"
@@ -66,6 +64,17 @@ function _getWolverineClient() {
66
64
  return _wolverineClient;
67
65
  }
68
66
 
67
+ // Direct GPU client — bypasses billing proxy. Used as fallback when proxy is down.
68
+ function _getWolverineDirectClient() {
69
+ if (!_wolverineDirectClient && process.env.WOLVERINE_GPU_URL && process.env.WOLVERINE_GPU_KEY) {
70
+ _wolverineDirectClient = new OpenAI({
71
+ apiKey: process.env.WOLVERINE_GPU_KEY,
72
+ baseURL: process.env.WOLVERINE_GPU_URL + "/v1",
73
+ });
74
+ }
75
+ return _wolverineDirectClient;
76
+ }
77
+
69
78
  function _getOpenAIClient() {
70
79
  if (!_openaiClient) {
71
80
  const apiKey = process.env.OPENAI_API_KEY;
@@ -219,7 +228,19 @@ async function aiCall({ model, systemPrompt, userPrompt, maxTokens = 2048, tools
219
228
  if (provider === "anthropic") {
220
229
  result = await _anthropicCall({ model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
221
230
  } else if (provider === "wolverine") {
222
- result = await _chatCall(_getWolverineClient(), { model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
231
+ try {
232
+ result = await _chatCall(_getWolverineClient(), { model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
233
+ } catch (proxyErr) {
234
+ // If billing proxy is down (server crashing), fall back to direct GPU
235
+ const isConnErr = /ECONNREFUSED|ECONNRESET|ETIMEDOUT|fetch failed/i.test(proxyErr.message || "");
236
+ const directClient = _getWolverineDirectClient();
237
+ if (isConnErr && directClient) {
238
+ console.log(chalk.yellow(" ⚠️ Billing proxy down — using direct GPU (unbilled)"));
239
+ result = await _chatCall(directClient, { model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
240
+ } else {
241
+ throw proxyErr;
242
+ }
243
+ }
223
244
  } else if (isResponsesModel(model)) {
224
245
  result = await _responsesCall(_getOpenAIClient(), { model, systemPrompt, userPrompt, maxTokens, tools });
225
246
  } else {
@@ -245,7 +266,18 @@ async function aiCallWithHistory({ model, messages, tools, maxTokens = 4096, cat
245
266
  if (provider === "anthropic") {
246
267
  result = await _anthropicCallWithHistory({ model, messages, tools, maxTokens });
247
268
  } else if (provider === "wolverine") {
248
- result = await _chatCallWithHistory(_getWolverineClient(), { model, messages, tools, maxTokens });
269
+ try {
270
+ result = await _chatCallWithHistory(_getWolverineClient(), { model, messages, tools, maxTokens });
271
+ } catch (proxyErr) {
272
+ const isConnErr = /ECONNREFUSED|ECONNRESET|ETIMEDOUT|fetch failed/i.test(proxyErr.message || "");
273
+ const directClient = _getWolverineDirectClient();
274
+ if (isConnErr && directClient) {
275
+ console.log(chalk.yellow(" ⚠️ Billing proxy down — using direct GPU (unbilled)"));
276
+ result = await _chatCallWithHistory(directClient, { model, messages, tools, maxTokens });
277
+ } else {
278
+ throw proxyErr;
279
+ }
280
+ }
249
281
  } else if (isResponsesModel(model)) {
250
282
  result = await _responsesCallWithHistory(_getOpenAIClient(), { model, messages, tools, maxTokens });
251
283
  } else {