@kenkaiiii/gg-ai 4.11.2 → 4.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -43,6 +43,7 @@ __export(index_exports, {
43
43
  palsuText: () => palsuText,
44
44
  palsuThinking: () => palsuThinking,
45
45
  palsuToolCall: () => palsuToolCall,
46
+ prewarmAnthropicCache: () => prewarmAnthropicCache,
46
47
  providerRegistry: () => providerRegistry,
47
48
  registerPalsuProvider: () => registerPalsuProvider,
48
49
  setProviderDiagnostic: () => setProviderDiagnostic,
@@ -335,7 +336,7 @@ var EventStream = class {
335
336
  }
336
337
  }
337
338
  };
338
- var StreamResult = class {
339
+ var StreamResult = class _StreamResult {
339
340
  response;
340
341
  buffer = [];
341
342
  done = false;
@@ -343,6 +344,18 @@ var StreamResult = class {
343
344
  resolveResponse;
344
345
  rejectResponse;
345
346
  resolveWait = null;
347
+ /**
348
+ * High-water mark: when the buffer exceeds this many unconsumed events,
349
+ * the pump pauses until the consumer drains below the low-water mark.
350
+ * Prevents unbounded memory growth when a consumer is slow.
351
+ * Only active when someone IS iterating — if nobody iterates (the `then()`
352
+ * path), backpressure is skipped so the pump can complete and resolve.
353
+ */
354
+ static HIGH_WATER = 5e3;
355
+ static LOW_WATER = 1e3;
356
+ iterating = false;
357
+ paused = false;
358
+ resolveDrain = null;
346
359
  constructor(generator, signal) {
347
360
  this.response = new Promise((resolve, reject) => {
348
361
  this.resolveResponse = resolve;
@@ -357,6 +370,13 @@ var StreamResult = class {
357
370
  this.buffer.push(next.value);
358
371
  this.resolveWait?.();
359
372
  this.resolveWait = null;
373
+ if (this.iterating && this.buffer.length > _StreamResult.HIGH_WATER) {
374
+ this.paused = true;
375
+ await new Promise((r) => {
376
+ this.resolveDrain = r;
377
+ });
378
+ this.paused = false;
379
+ }
360
380
  next = await this._nextWithAbort(generator, signal);
361
381
  }
362
382
  this.done = true;
@@ -395,11 +415,20 @@ var StreamResult = class {
395
415
  }
396
416
  }
397
417
  async *[Symbol.asyncIterator]() {
418
+ this.iterating = true;
398
419
  let index = 0;
399
420
  while (true) {
400
421
  while (index < this.buffer.length) {
401
422
  yield this.buffer[index++];
402
423
  }
424
+ if (this.paused && index > _StreamResult.LOW_WATER) {
425
+ this.resolveDrain?.();
426
+ this.resolveDrain = null;
427
+ }
428
+ if (index > 0 && !this.paused) {
429
+ this.buffer.splice(0, index);
430
+ index = 0;
431
+ }
403
432
  if (this.error) throw this.error;
404
433
  if (this.done) return;
405
434
  await new Promise((r) => {
@@ -412,16 +441,26 @@ var StreamResult = class {
412
441
  }
413
442
  }
414
443
  then(onfulfilled, onrejected) {
444
+ if (this.paused) {
445
+ this.paused = false;
446
+ this.resolveDrain?.();
447
+ this.resolveDrain = null;
448
+ }
415
449
  return this.response.then(onfulfilled, onrejected);
416
450
  }
417
451
  };
418
452
 
419
453
  // src/utils/zod-to-json-schema.ts
420
454
  var import_zod = require("zod");
455
+ var schemaCache = /* @__PURE__ */ new WeakMap();
421
456
  function zodToJsonSchema(schema) {
457
+ const cached = schemaCache.get(schema);
458
+ if (cached) return cached;
422
459
  const jsonSchema = import_zod.z.toJSONSchema(schema);
423
460
  const { $schema: _schema, ...rest } = jsonSchema;
424
- return normalizeRootForAnthropic(rest);
461
+ const normalized = normalizeRootForAnthropic(rest);
462
+ schemaCache.set(schema, normalized);
463
+ return normalized;
425
464
  }
426
465
  function resolveToolSchema(tool) {
427
466
  return tool.rawInputSchema ?? zodToJsonSchema(tool.parameters);
@@ -813,16 +852,17 @@ function toAnthropicThinking(level, maxTokens, model) {
813
852
  outputConfig: { effort }
814
853
  };
815
854
  }
855
+ const VISIBLE_FLOOR = 1024;
816
856
  const effectiveLevel = level === "xhigh" || level === "max" ? "high" : level;
817
857
  const budgetMap = {
818
- low: Math.max(1024, Math.floor(maxTokens * 0.25)),
819
- medium: Math.max(2048, Math.floor(maxTokens * 0.5)),
820
- high: Math.max(4096, maxTokens)
858
+ low: Math.max(1024, Math.floor(maxTokens * 0.2)),
859
+ medium: Math.max(2048, Math.floor(maxTokens * 0.45)),
860
+ high: Math.max(4096, Math.floor(maxTokens * 0.8))
821
861
  };
822
- const budget = budgetMap[effectiveLevel];
862
+ const budget = Math.max(0, Math.min(budgetMap[effectiveLevel], maxTokens - VISIBLE_FLOOR));
823
863
  return {
824
864
  thinking: { type: "enabled", budget_tokens: budget },
825
- maxTokens: maxTokens + budget
865
+ maxTokens
826
866
  };
827
867
  }
828
868
  function remapToolCallId(id, idMap) {
@@ -1028,26 +1068,83 @@ function parseToolArguments(argsJson) {
1028
1068
  }
1029
1069
 
1030
1070
  // src/providers/anthropic.ts
1071
+ var anthropicClientCache = /* @__PURE__ */ new Map();
1031
1072
  function createClient(options) {
1032
1073
  const isOAuth = options.apiKey?.startsWith("sk-ant-oat");
1033
- return new import_sdk.default({
1074
+ const userAgent = isOAuth ? options.userAgent ?? "claude-cli/2.1.75 (external, cli)" : "";
1075
+ const cacheKey = `${options.apiKey ?? ""}|${options.baseUrl ?? ""}|${userAgent}`;
1076
+ if (!options.fetch) {
1077
+ const cached = anthropicClientCache.get(cacheKey);
1078
+ if (cached) return cached;
1079
+ }
1080
+ const client = new import_sdk.default({
1034
1081
  ...isOAuth ? { apiKey: null, authToken: options.apiKey } : { apiKey: options.apiKey },
1035
1082
  ...options.baseUrl ? { baseURL: options.baseUrl } : {},
1036
1083
  ...options.fetch ? { fetch: options.fetch } : {},
1037
- // Disable SDK retries — the agent loop has its own stall/overload retry
1038
- // logic that surfaces errors properly. SDK retries on 429s can cause
1039
- // multi-minute hangs when the provider stops responding mid-retry.
1040
1084
  maxRetries: 0,
1041
1085
  ...isOAuth ? {
1042
1086
  defaultHeaders: {
1043
- // Anthropic's OAuth edge validates the claude-cli version. Callers
1044
- // (ggcoder) resolve the live version at runtime; the literal here
1045
- // is the offline fallback for direct gg-ai consumers.
1046
- "user-agent": options.userAgent ?? "claude-cli/2.1.75 (external, cli)",
1087
+ "user-agent": userAgent,
1047
1088
  "x-app": "cli"
1048
1089
  }
1049
1090
  } : {}
1050
1091
  });
1092
+ if (!options.fetch) {
1093
+ if (anthropicClientCache.size >= 8) {
1094
+ const oldest = anthropicClientCache.keys().next().value;
1095
+ if (oldest) anthropicClientCache.delete(oldest);
1096
+ }
1097
+ anthropicClientCache.set(cacheKey, client);
1098
+ }
1099
+ return client;
1100
+ }
1101
+ async function prewarmAnthropicCache(options) {
1102
+ try {
1103
+ const client = createClient({
1104
+ apiKey: options.apiKey,
1105
+ baseUrl: options.baseUrl,
1106
+ userAgent: options.userAgent
1107
+ });
1108
+ const cacheControl = toAnthropicCacheControl(options.cacheRetention ?? "long", options.baseUrl);
1109
+ const { system, messages } = toAnthropicMessages(
1110
+ [
1111
+ { role: "system", content: options.system },
1112
+ { role: "user", content: "." }
1113
+ ],
1114
+ cacheControl
1115
+ );
1116
+ const isOAuth = options.apiKey.startsWith("sk-ant-oat");
1117
+ const fullSystem = isOAuth ? [
1118
+ {
1119
+ type: "text",
1120
+ text: "You are Claude Code, Anthropic's official CLI for Claude."
1121
+ },
1122
+ ...system ?? []
1123
+ ] : system;
1124
+ const tools = options.tools?.length ? toAnthropicTools(options.tools, {
1125
+ cacheControl,
1126
+ enableFineGrainedToolStreaming: true
1127
+ }) : void 0;
1128
+ await client.messages.create(
1129
+ {
1130
+ model: options.model,
1131
+ max_tokens: 1,
1132
+ messages,
1133
+ ...fullSystem ? { system: fullSystem } : {},
1134
+ ...tools ? {
1135
+ tools: [
1136
+ ...tools,
1137
+ ...options.serverTools ?? []
1138
+ ]
1139
+ } : {}
1140
+ },
1141
+ {
1142
+ signal: options.signal ?? void 0,
1143
+ ...isOAuth ? { headers: { "anthropic-beta": "claude-code-20250219,oauth-2025-04-20" } } : {}
1144
+ }
1145
+ );
1146
+ } catch {
1147
+ }
1051
1148
  }
1052
1149
  function streamAnthropic(options) {
1053
1150
  return new StreamResult(runStream(options), options.signal);
@@ -1627,13 +1724,27 @@ function extractOpenAIUsage(usage) {
1627
1724
  cacheRead
1628
1725
  };
1629
1726
  }
1727
+ var openaiClientCache = /* @__PURE__ */ new Map();
1630
1728
  function createClient2(options) {
1631
- return new import_openai.default({
1729
+ const cacheKey = `${options.apiKey ?? ""}|${options.baseUrl ?? ""}|${JSON.stringify(options.defaultHeaders ?? {})}`;
1730
+ if (!options.fetch) {
1731
+ const cached = openaiClientCache.get(cacheKey);
1732
+ if (cached) return cached;
1733
+ }
1734
+ const client = new import_openai.default({
1632
1735
  apiKey: options.apiKey,
1633
1736
  ...options.baseUrl ? { baseURL: options.baseUrl } : {},
1634
1737
  ...options.fetch ? { fetch: options.fetch } : {},
1635
1738
  ...options.defaultHeaders ? { defaultHeaders: options.defaultHeaders } : {}
1636
1739
  });
1740
+ if (!options.fetch) {
1741
+ if (openaiClientCache.size >= 8) {
1742
+ const oldest = openaiClientCache.keys().next().value;
1743
+ if (oldest) openaiClientCache.delete(oldest);
1744
+ }
1745
+ openaiClientCache.set(cacheKey, client);
1746
+ }
1747
+ return client;
1637
1748
  }
1638
1749
  function streamOpenAI(options) {
1639
1750
  return new StreamResult(runStream2(options), options.signal);
@@ -2048,9 +2159,6 @@ async function* runStream3(options) {
2048
2159
  body.tools = toCodexTools(options.tools);
2049
2160
  }
2050
2161
  body.prompt_cache_key = normalizePromptCacheKey(options.promptCacheKey ?? "ggcoder");
2051
- if (options.cacheRetention === "long") {
2052
- body.prompt_cache_retention = "24h";
2053
- }
2054
2162
  if (options.temperature != null && !options.thinking) {
2055
2163
  body.temperature = options.temperature;
2056
2164
  }
@@ -3364,6 +3472,7 @@ function registerPalsuProvider(config) {
3364
3472
  palsuText,
3365
3473
  palsuThinking,
3366
3474
  palsuToolCall,
3475
+ prewarmAnthropicCache,
3367
3476
  providerRegistry,
3368
3477
  registerPalsuProvider,
3369
3478
  setProviderDiagnostic,