@kenkaiiii/gg-ai 4.11.3 → 4.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -43,6 +43,7 @@ __export(index_exports, {
43
43
  palsuText: () => palsuText,
44
44
  palsuThinking: () => palsuThinking,
45
45
  palsuToolCall: () => palsuToolCall,
46
+ prewarmAnthropicCache: () => prewarmAnthropicCache,
46
47
  providerRegistry: () => providerRegistry,
47
48
  registerPalsuProvider: () => registerPalsuProvider,
48
49
  setProviderDiagnostic: () => setProviderDiagnostic,
@@ -335,7 +336,7 @@ var EventStream = class {
335
336
  }
336
337
  }
337
338
  };
338
- var StreamResult = class {
339
+ var StreamResult = class _StreamResult {
339
340
  response;
340
341
  buffer = [];
341
342
  done = false;
@@ -343,6 +344,18 @@ var StreamResult = class {
343
344
  resolveResponse;
344
345
  rejectResponse;
345
346
  resolveWait = null;
347
+ /**
348
+ * High-water mark: when the buffer exceeds this many unconsumed events,
349
+ * the pump pauses until the consumer drains below the low-water mark.
350
+ * Prevents unbounded memory growth when a consumer is slow.
351
+ * Only active when someone IS iterating — if nobody iterates (the `then()`
352
+ * path), backpressure is skipped so the pump can complete and resolve.
353
+ */
354
+ static HIGH_WATER = 5e3;
355
+ static LOW_WATER = 1e3;
356
+ iterating = false;
357
+ paused = false;
358
+ resolveDrain = null;
346
359
  constructor(generator, signal) {
347
360
  this.response = new Promise((resolve, reject) => {
348
361
  this.resolveResponse = resolve;
@@ -357,6 +370,13 @@ var StreamResult = class {
357
370
  this.buffer.push(next.value);
358
371
  this.resolveWait?.();
359
372
  this.resolveWait = null;
373
+ if (this.iterating && this.buffer.length > _StreamResult.HIGH_WATER) {
374
+ this.paused = true;
375
+ await new Promise((r) => {
376
+ this.resolveDrain = r;
377
+ });
378
+ this.paused = false;
379
+ }
360
380
  next = await this._nextWithAbort(generator, signal);
361
381
  }
362
382
  this.done = true;
@@ -395,11 +415,20 @@ var StreamResult = class {
395
415
  }
396
416
  }
397
417
  async *[Symbol.asyncIterator]() {
418
+ this.iterating = true;
398
419
  let index = 0;
399
420
  while (true) {
400
421
  while (index < this.buffer.length) {
401
422
  yield this.buffer[index++];
402
423
  }
424
+ if (this.paused && index > _StreamResult.LOW_WATER) {
425
+ this.resolveDrain?.();
426
+ this.resolveDrain = null;
427
+ }
428
+ if (index > 0 && !this.paused) {
429
+ this.buffer.splice(0, index);
430
+ index = 0;
431
+ }
403
432
  if (this.error) throw this.error;
404
433
  if (this.done) return;
405
434
  await new Promise((r) => {
@@ -412,16 +441,26 @@ var StreamResult = class {
412
441
  }
413
442
  }
414
443
  then(onfulfilled, onrejected) {
444
+ if (this.paused) {
445
+ this.paused = false;
446
+ this.resolveDrain?.();
447
+ this.resolveDrain = null;
448
+ }
415
449
  return this.response.then(onfulfilled, onrejected);
416
450
  }
417
451
  };
418
452
 
419
453
  // src/utils/zod-to-json-schema.ts
420
454
  var import_zod = require("zod");
455
+ var schemaCache = /* @__PURE__ */ new WeakMap();
421
456
  function zodToJsonSchema(schema) {
457
+ const cached = schemaCache.get(schema);
458
+ if (cached) return cached;
422
459
  const jsonSchema = import_zod.z.toJSONSchema(schema);
423
460
  const { $schema: _schema, ...rest } = jsonSchema;
424
- return normalizeRootForAnthropic(rest);
461
+ const normalized = normalizeRootForAnthropic(rest);
462
+ schemaCache.set(schema, normalized);
463
+ return normalized;
425
464
  }
426
465
  function resolveToolSchema(tool) {
427
466
  return tool.rawInputSchema ?? zodToJsonSchema(tool.parameters);
@@ -1029,26 +1068,83 @@ function parseToolArguments(argsJson) {
1029
1068
  }
1030
1069
 
1031
1070
  // src/providers/anthropic.ts
1071
+ var anthropicClientCache = /* @__PURE__ */ new Map();
1032
1072
  function createClient(options) {
1033
1073
  const isOAuth = options.apiKey?.startsWith("sk-ant-oat");
1034
- return new import_sdk.default({
1074
+ const userAgent = isOAuth ? options.userAgent ?? "claude-cli/2.1.75 (external, cli)" : "";
1075
+ const cacheKey = `${options.apiKey ?? ""}|${options.baseUrl ?? ""}|${userAgent}`;
1076
+ if (!options.fetch) {
1077
+ const cached = anthropicClientCache.get(cacheKey);
1078
+ if (cached) return cached;
1079
+ }
1080
+ const client = new import_sdk.default({
1035
1081
  ...isOAuth ? { apiKey: null, authToken: options.apiKey } : { apiKey: options.apiKey },
1036
1082
  ...options.baseUrl ? { baseURL: options.baseUrl } : {},
1037
1083
  ...options.fetch ? { fetch: options.fetch } : {},
1038
- // Disable SDK retries — the agent loop has its own stall/overload retry
1039
- // logic that surfaces errors properly. SDK retries on 429s can cause
1040
- // multi-minute hangs when the provider stops responding mid-retry.
1041
1084
  maxRetries: 0,
1042
1085
  ...isOAuth ? {
1043
1086
  defaultHeaders: {
1044
- // Anthropic's OAuth edge validates the claude-cli version. Callers
1045
- // (ggcoder) resolve the live version at runtime; the literal here
1046
- // is the offline fallback for direct gg-ai consumers.
1047
- "user-agent": options.userAgent ?? "claude-cli/2.1.75 (external, cli)",
1087
+ "user-agent": userAgent,
1048
1088
  "x-app": "cli"
1049
1089
  }
1050
1090
  } : {}
1051
1091
  });
1092
+ if (!options.fetch) {
1093
+ if (anthropicClientCache.size >= 8) {
1094
+ const oldest = anthropicClientCache.keys().next().value;
1095
+ if (oldest) anthropicClientCache.delete(oldest);
1096
+ }
1097
+ anthropicClientCache.set(cacheKey, client);
1098
+ }
1099
+ return client;
1100
+ }
1101
+ async function prewarmAnthropicCache(options) {
1102
+ try {
1103
+ const client = createClient({
1104
+ apiKey: options.apiKey,
1105
+ baseUrl: options.baseUrl,
1106
+ userAgent: options.userAgent
1107
+ });
1108
+ const cacheControl = toAnthropicCacheControl(options.cacheRetention ?? "long", options.baseUrl);
1109
+ const { system, messages } = toAnthropicMessages(
1110
+ [
1111
+ { role: "system", content: options.system },
1112
+ { role: "user", content: "." }
1113
+ ],
1114
+ cacheControl
1115
+ );
1116
+ const isOAuth = options.apiKey.startsWith("sk-ant-oat");
1117
+ const fullSystem = isOAuth ? [
1118
+ {
1119
+ type: "text",
1120
+ text: "You are Claude Code, Anthropic's official CLI for Claude."
1121
+ },
1122
+ ...system ?? []
1123
+ ] : system;
1124
+ const tools = options.tools?.length ? toAnthropicTools(options.tools, {
1125
+ cacheControl,
1126
+ enableFineGrainedToolStreaming: true
1127
+ }) : void 0;
1128
+ await client.messages.create(
1129
+ {
1130
+ model: options.model,
1131
+ max_tokens: 1,
1132
+ messages,
1133
+ ...fullSystem ? { system: fullSystem } : {},
1134
+ ...tools ? {
1135
+ tools: [
1136
+ ...tools,
1137
+ ...options.serverTools ?? []
1138
+ ]
1139
+ } : {}
1140
+ },
1141
+ {
1142
+ signal: options.signal ?? void 0,
1143
+ ...isOAuth ? { headers: { "anthropic-beta": "claude-code-20250219,oauth-2025-04-20" } } : {}
1144
+ }
1145
+ );
1146
+ } catch {
1147
+ }
1052
1148
  }
1053
1149
  function streamAnthropic(options) {
1054
1150
  return new StreamResult(runStream(options), options.signal);
@@ -1628,13 +1724,27 @@ function extractOpenAIUsage(usage) {
1628
1724
  cacheRead
1629
1725
  };
1630
1726
  }
1727
+ var openaiClientCache = /* @__PURE__ */ new Map();
1631
1728
  function createClient2(options) {
1632
- return new import_openai.default({
1729
+ const cacheKey = `${options.apiKey ?? ""}|${options.baseUrl ?? ""}|${JSON.stringify(options.defaultHeaders ?? {})}`;
1730
+ if (!options.fetch) {
1731
+ const cached = openaiClientCache.get(cacheKey);
1732
+ if (cached) return cached;
1733
+ }
1734
+ const client = new import_openai.default({
1633
1735
  apiKey: options.apiKey,
1634
1736
  ...options.baseUrl ? { baseURL: options.baseUrl } : {},
1635
1737
  ...options.fetch ? { fetch: options.fetch } : {},
1636
1738
  ...options.defaultHeaders ? { defaultHeaders: options.defaultHeaders } : {}
1637
1739
  });
1740
+ if (!options.fetch) {
1741
+ if (openaiClientCache.size >= 8) {
1742
+ const oldest = openaiClientCache.keys().next().value;
1743
+ if (oldest) openaiClientCache.delete(oldest);
1744
+ }
1745
+ openaiClientCache.set(cacheKey, client);
1746
+ }
1747
+ return client;
1638
1748
  }
1639
1749
  function streamOpenAI(options) {
1640
1750
  return new StreamResult(runStream2(options), options.signal);
@@ -2049,9 +2159,6 @@ async function* runStream3(options) {
2049
2159
  body.tools = toCodexTools(options.tools);
2050
2160
  }
2051
2161
  body.prompt_cache_key = normalizePromptCacheKey(options.promptCacheKey ?? "ggcoder");
2052
- if (options.cacheRetention === "long") {
2053
- body.prompt_cache_retention = "24h";
2054
- }
2055
2162
  if (options.temperature != null && !options.thinking) {
2056
2163
  body.temperature = options.temperature;
2057
2164
  }
@@ -3365,6 +3472,7 @@ function registerPalsuProvider(config) {
3365
3472
  palsuText,
3366
3473
  palsuThinking,
3367
3474
  palsuToolCall,
3475
+ prewarmAnthropicCache,
3368
3476
  providerRegistry,
3369
3477
  registerPalsuProvider,
3370
3478
  setProviderDiagnostic,