npm - wolverine-ai - Versions diffs - 4.0.1 → 4.0.3 - Mend

wolverine-ai 4.0.1 → 4.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "wolverine-ai",
-  "version": "4.0.1",
+  "version": "4.0.3",
   "description": "Self-healing Node.js server framework powered by AI. Catches crashes, diagnoses errors, generates fixes, verifies, and restarts — automatically.",
   "main": "src/index.js",
   "bin": {

package/src/brain/embedder.js CHANGED Viewed

@@ -43,14 +43,21 @@ async function embed(text) {
   const model = getEmbeddingModel();
   const provider = detectProvider(model);
-  // wolverine-embedding-1 routes through billing proxy, others go direct
   const client = provider === "wolverine" ? getClient("wolverine") : getClient("openai");
   const startMs = Date.now();
-  const response = await client.embeddings.create({
-    model,
-    input: text,
-  });
+  let response;
+  try {
+    response = await client.embeddings.create({ model, input: text });
+  } catch (err) {
+    // If wolverine proxy is down (startup, crash loop), fall back to OpenAI direct
+    if (provider === "wolverine" && /ECONNREFUSED|ECONNRESET|ETIMEDOUT|fetch failed/i.test(err.message || "")) {
+      const directClient = getClient("openai");
+      response = await directClient.embeddings.create({ model: "text-embedding-3-small", input: text });
+    } else {
+      throw err;
+    }
+  }
   const embedding = response.data[0].embedding;
   _trackEmbedding(model, response.usage, Date.now() - startMs, true);
@@ -87,10 +94,17 @@ async function embedBatch(texts) {
   const client = provider === "wolverine" ? getClient("wolverine") : getClient("openai");
   const startMs = Date.now();
-  const response = await client.embeddings.create({
-    model,
-    input: uncached,
-  });
+  let response;
+  try {
+    response = await client.embeddings.create({ model, input: uncached });
+  } catch (err) {
+    if (provider === "wolverine" && /ECONNREFUSED|ECONNRESET|ETIMEDOUT|fetch failed/i.test(err.message || "")) {
+      const directClient = getClient("openai");
+      response = await directClient.embeddings.create({ model: "text-embedding-3-small", input: uncached });
+    } else {
+      throw err;
+    }
+  }
   _trackEmbedding(model, response.usage, Date.now() - startMs, true);
   // Sort by index to maintain order

package/src/core/ai-client.js CHANGED Viewed

@@ -51,12 +51,10 @@ function getClient(provider) {
   return _getOpenAIClient();
 }
+let _wolverineDirectClient = null;
 function _getWolverineClient() {
   if (!_wolverineClient) {
-    // Wolverine inference: always route through billing proxy when API key is set.
-    // WOLVERINE_API_KEY = billed API key (credits deducted per call)
-    // WOLVERINE_GPU_KEY = direct GPU access (no billing, admin/internal only)
-    // Priority: API_KEY (billed) > GPU_KEY (direct) — billing is the default path
     const apiKey = process.env.WOLVERINE_API_KEY || process.env.WOLVERINE_GPU_KEY || "none";
     const baseURL = process.env.WOLVERINE_INFERENCE_URL
       ? process.env.WOLVERINE_INFERENCE_URL + "/v1"
@@ -66,6 +64,17 @@ function _getWolverineClient() {
   return _wolverineClient;
 }
+// Direct GPU client — bypasses billing proxy. Used as fallback when proxy is down.
+function _getWolverineDirectClient() {
+  if (!_wolverineDirectClient && process.env.WOLVERINE_GPU_URL && process.env.WOLVERINE_GPU_KEY) {
+    _wolverineDirectClient = new OpenAI({
+      apiKey: process.env.WOLVERINE_GPU_KEY,
+      baseURL: process.env.WOLVERINE_GPU_URL + "/v1",
+    });
+  }
+  return _wolverineDirectClient;
+}
 function _getOpenAIClient() {
   if (!_openaiClient) {
     const apiKey = process.env.OPENAI_API_KEY;
@@ -219,7 +228,19 @@ async function aiCall({ model, systemPrompt, userPrompt, maxTokens = 2048, tools
     if (provider === "anthropic") {
       result = await _anthropicCall({ model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
     } else if (provider === "wolverine") {
-      result = await _chatCall(_getWolverineClient(), { model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
+      try {
+        result = await _chatCall(_getWolverineClient(), { model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
+      } catch (proxyErr) {
+        // If billing proxy is down (server crashing), fall back to direct GPU
+        const isConnErr = /ECONNREFUSED|ECONNRESET|ETIMEDOUT|fetch failed/i.test(proxyErr.message || "");
+        const directClient = _getWolverineDirectClient();
+        if (isConnErr && directClient) {
+          console.log(chalk.yellow("  ⚠️  Billing proxy down — using direct GPU (unbilled)"));
+          result = await _chatCall(directClient, { model, systemPrompt, userPrompt, maxTokens, tools, toolChoice });
+        } else {
+          throw proxyErr;
+        }
+      }
     } else if (isResponsesModel(model)) {
       result = await _responsesCall(_getOpenAIClient(), { model, systemPrompt, userPrompt, maxTokens, tools });
     } else {
@@ -245,7 +266,18 @@ async function aiCallWithHistory({ model, messages, tools, maxTokens = 4096, cat
     if (provider === "anthropic") {
       result = await _anthropicCallWithHistory({ model, messages, tools, maxTokens });
     } else if (provider === "wolverine") {
-      result = await _chatCallWithHistory(_getWolverineClient(), { model, messages, tools, maxTokens });
+      try {
+        result = await _chatCallWithHistory(_getWolverineClient(), { model, messages, tools, maxTokens });
+      } catch (proxyErr) {
+        const isConnErr = /ECONNREFUSED|ECONNRESET|ETIMEDOUT|fetch failed/i.test(proxyErr.message || "");
+        const directClient = _getWolverineDirectClient();
+        if (isConnErr && directClient) {
+          console.log(chalk.yellow("  ⚠️  Billing proxy down — using direct GPU (unbilled)"));
+          result = await _chatCallWithHistory(directClient, { model, messages, tools, maxTokens });
+        } else {
+          throw proxyErr;
+        }
+      }
     } else if (isResponsesModel(model)) {
       result = await _responsesCallWithHistory(_getOpenAIClient(), { model, messages, tools, maxTokens });
     } else {