npm - @kenkaiiii/gg-ai - Versions diffs - 4.3.2 → 4.3.4 - Mend

@kenkaiiii/gg-ai 4.3.2 → 4.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.js CHANGED Viewed

@@ -426,22 +426,37 @@ function normalizeOpenAIStopReason(reason) {
 }
 // src/providers/anthropic.ts
+var clientCache = /* @__PURE__ */ new Map();
+function getOrCreateClient(options) {
+  const isOAuth = options.apiKey?.startsWith("sk-ant-oat");
+  const key = `${options.apiKey ?? ""}|${options.baseUrl ?? ""}|${isOAuth}`;
+  let client = clientCache.get(key);
+  if (!client) {
+    client = new Anthropic({
+      ...isOAuth ? { apiKey: null, authToken: options.apiKey } : { apiKey: options.apiKey },
+      ...options.baseUrl ? { baseURL: options.baseUrl } : {},
+      ...options.fetch ? { fetch: options.fetch } : {},
+      // Disable SDK-level retries — the agent loop handles retries itself with
+      // stall detection and context compaction.  SDK retries on abort just cycle
+      // through the already-aborted signal, wasting time.
+      maxRetries: 0,
+      ...isOAuth ? {
+        defaultHeaders: {
+          "user-agent": "claude-cli/2.1.75",
+          "x-app": "cli"
+        }
+      } : {}
+    });
+    clientCache.set(key, client);
+  }
+  return client;
+}
 function streamAnthropic(options) {
   return new StreamResult(runStream(options));
 }
 async function* runStream(options) {
+  const client = getOrCreateClient(options);
   const isOAuth = options.apiKey?.startsWith("sk-ant-oat");
-  const client = new Anthropic({
-    ...isOAuth ? { apiKey: null, authToken: options.apiKey } : { apiKey: options.apiKey },
-    ...options.baseUrl ? { baseURL: options.baseUrl } : {},
-    ...options.fetch ? { fetch: options.fetch } : {},
-    ...isOAuth ? {
-      defaultHeaders: {
-        "user-agent": "claude-cli/2.1.75",
-        "x-app": "cli"
-      }
-    } : {}
-  });
   const cacheControl = toAnthropicCacheControl(options.cacheRetention, options.baseUrl);
   const { system: rawSystem, messages } = toAnthropicMessages(options.messages, cacheControl);
   const system = isOAuth ? [
@@ -705,16 +720,26 @@ function toError(err) {
 // src/providers/openai.ts
 import OpenAI from "openai";
+var clientCache2 = /* @__PURE__ */ new Map();
+function getOrCreateClient2(options) {
+  const key = `${options.apiKey ?? ""}|${options.baseUrl ?? ""}`;
+  let client = clientCache2.get(key);
+  if (!client) {
+    client = new OpenAI({
+      apiKey: options.apiKey,
+      ...options.baseUrl ? { baseURL: options.baseUrl } : {},
+      ...options.fetch ? { fetch: options.fetch } : {}
+    });
+    clientCache2.set(key, client);
+  }
+  return client;
+}
 function streamOpenAI(options) {
   return new StreamResult(runStream2(options));
 }
 async function* runStream2(options) {
   const providerName = options.provider ?? "openai";
-  const client = new OpenAI({
-    apiKey: options.apiKey,
-    ...options.baseUrl ? { baseURL: options.baseUrl } : {},
-    ...options.fetch ? { fetch: options.fetch } : {}
-  });
+  const client = getOrCreateClient2(options);
   const usesThinkingParam = options.provider === "glm" || options.provider === "moonshot" || options.provider === "xiaomi";
   const messages = toOpenAIMessages(options.messages, { provider: options.provider });
   const defaultTemp = options.provider === "glm" ? 0.6 : void 0;
@@ -723,7 +748,7 @@ async function* runStream2(options) {
     model: options.model,
     messages,
     stream: true,
-    ...options.maxTokens ? options.provider === "xiaomi" ? { max_completion_tokens: options.maxTokens } : { max_tokens: options.maxTokens } : {},
+    ...options.maxTokens ? { max_tokens: options.maxTokens } : {},
     ...effectiveTemp != null && !options.thinking ? { temperature: effectiveTemp } : {},
     ...options.topP != null ? { top_p: options.topP } : {},
     ...options.stop ? { stop: options.stop } : {},
@@ -743,7 +768,7 @@ async function* runStream2(options) {
   if (usesThinkingParam) {
     if (options.thinking) {
       params.thinking = { type: "enabled" };
-    } else if (options.provider !== "xiaomi") {
+    } else {
       params.thinking = { type: "disabled" };
     }
   }
@@ -792,7 +817,9 @@ async function* runStream2(options) {
     const reasoningContent = delta.reasoning_content;
     if (typeof reasoningContent === "string" && reasoningContent) {
       thinkingAccum += reasoningContent;
-      yield { type: "thinking_delta", text: reasoningContent };
+      if (options.thinking) {
+        yield { type: "thinking_delta", text: reasoningContent };
+      }
     }
     if (delta.content) {
       textAccum += delta.content;