npm - @kenkaiiii/gg-ai - Versions diffs - 4.3.2 → 4.3.4 - Mend

@kenkaiiii/gg-ai 4.3.2 → 4.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.cjs CHANGED Viewed

@@ -472,22 +472,37 @@ function normalizeOpenAIStopReason(reason) {
 }
 // src/providers/anthropic.ts
+var clientCache = /* @__PURE__ */ new Map();
+function getOrCreateClient(options) {
+  const isOAuth = options.apiKey?.startsWith("sk-ant-oat");
+  const key = `${options.apiKey ?? ""}|${options.baseUrl ?? ""}|${isOAuth}`;
+  let client = clientCache.get(key);
+  if (!client) {
+    client = new import_sdk.default({
+      ...isOAuth ? { apiKey: null, authToken: options.apiKey } : { apiKey: options.apiKey },
+      ...options.baseUrl ? { baseURL: options.baseUrl } : {},
+      ...options.fetch ? { fetch: options.fetch } : {},
+      // Disable SDK-level retries — the agent loop handles retries itself with
+      // stall detection and context compaction.  SDK retries on abort just cycle
+      // through the already-aborted signal, wasting time.
+      maxRetries: 0,
+      ...isOAuth ? {
+        defaultHeaders: {
+          "user-agent": "claude-cli/2.1.75",
+          "x-app": "cli"
+        }
+      } : {}
+    });
+    clientCache.set(key, client);
+  }
+  return client;
+}
 function streamAnthropic(options) {
   return new StreamResult(runStream(options));
 }
 async function* runStream(options) {
+  const client = getOrCreateClient(options);
   const isOAuth = options.apiKey?.startsWith("sk-ant-oat");
-  const client = new import_sdk.default({
-    ...isOAuth ? { apiKey: null, authToken: options.apiKey } : { apiKey: options.apiKey },
-    ...options.baseUrl ? { baseURL: options.baseUrl } : {},
-    ...options.fetch ? { fetch: options.fetch } : {},
-    ...isOAuth ? {
-      defaultHeaders: {
-        "user-agent": "claude-cli/2.1.75",
-        "x-app": "cli"
-      }
-    } : {}
-  });
   const cacheControl = toAnthropicCacheControl(options.cacheRetention, options.baseUrl);
   const { system: rawSystem, messages } = toAnthropicMessages(options.messages, cacheControl);
   const system = isOAuth ? [
@@ -751,16 +766,26 @@ function toError(err) {
 // src/providers/openai.ts
 var import_openai = __toESM(require("openai"), 1);
+var clientCache2 = /* @__PURE__ */ new Map();
+function getOrCreateClient2(options) {
+  const key = `${options.apiKey ?? ""}|${options.baseUrl ?? ""}`;
+  let client = clientCache2.get(key);
+  if (!client) {
+    client = new import_openai.default({
+      apiKey: options.apiKey,
+      ...options.baseUrl ? { baseURL: options.baseUrl } : {},
+      ...options.fetch ? { fetch: options.fetch } : {}
+    });
+    clientCache2.set(key, client);
+  }
+  return client;
+}
 function streamOpenAI(options) {
   return new StreamResult(runStream2(options));
 }
 async function* runStream2(options) {
   const providerName = options.provider ?? "openai";
-  const client = new import_openai.default({
-    apiKey: options.apiKey,
-    ...options.baseUrl ? { baseURL: options.baseUrl } : {},
-    ...options.fetch ? { fetch: options.fetch } : {}
-  });
+  const client = getOrCreateClient2(options);
   const usesThinkingParam = options.provider === "glm" || options.provider === "moonshot" || options.provider === "xiaomi";
   const messages = toOpenAIMessages(options.messages, { provider: options.provider });
   const defaultTemp = options.provider === "glm" ? 0.6 : void 0;
@@ -769,7 +794,7 @@ async function* runStream2(options) {
     model: options.model,
     messages,
     stream: true,
-    ...options.maxTokens ? options.provider === "xiaomi" ? { max_completion_tokens: options.maxTokens } : { max_tokens: options.maxTokens } : {},
+    ...options.maxTokens ? { max_tokens: options.maxTokens } : {},
     ...effectiveTemp != null && !options.thinking ? { temperature: effectiveTemp } : {},
     ...options.topP != null ? { top_p: options.topP } : {},
     ...options.stop ? { stop: options.stop } : {},
@@ -789,7 +814,7 @@ async function* runStream2(options) {
   if (usesThinkingParam) {
     if (options.thinking) {
       params.thinking = { type: "enabled" };
-    } else if (options.provider !== "xiaomi") {
+    } else {
       params.thinking = { type: "disabled" };
     }
   }
@@ -838,7 +863,9 @@ async function* runStream2(options) {
     const reasoningContent = delta.reasoning_content;
     if (typeof reasoningContent === "string" && reasoningContent) {
       thinkingAccum += reasoningContent;
-      yield { type: "thinking_delta", text: reasoningContent };
+      if (options.thinking) {
+        yield { type: "thinking_delta", text: reasoningContent };
+      }
     }
     if (delta.content) {
       textAccum += delta.content;