@kenkaiiii/gg-ai 4.11.3 → 4.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -255,6 +255,18 @@ declare class StreamResult implements AsyncIterable<StreamEvent> {
255
255
  private resolveResponse;
256
256
  private rejectResponse;
257
257
  private resolveWait;
258
+ /**
259
+ * High-water mark: when the buffer exceeds this many unconsumed events,
260
+ * the pump pauses until the consumer drains below the low-water mark.
261
+ * Prevents unbounded memory growth when a consumer is slow.
262
+ * Only active when someone IS iterating — if nobody iterates (the `then()`
263
+ * path), backpressure is skipped so the pump can complete and resolve.
264
+ */
265
+ private static readonly HIGH_WATER;
266
+ private static readonly LOW_WATER;
267
+ private iterating;
268
+ private paused;
269
+ private resolveDrain;
258
270
  constructor(generator: AsyncGenerator<StreamEvent, StreamResponse>, signal?: AbortSignal);
259
271
  private pump;
260
272
  private _nextWithAbort;
@@ -451,6 +463,28 @@ declare function toOpenAIMessages(messages: Message[], options?: {
451
463
  supportsImages?: boolean;
452
464
  }): OpenAI.ChatCompletionMessageParam[];
453
465
 
466
+ /**
467
+ * Fire a minimal `max_tokens: 1` request that populates the Anthropic prompt
468
+ * cache with the system prompt + tools prefix, so the first real user turn is
469
+ * a cache read instead of a cold cache write. Best-effort: any error is
470
+ * swallowed so a failed pre-warm never blocks the session.
471
+ *
472
+ * Called by AgentSession when speedProfile is "optimized", before the first
473
+ * real agent-loop turn. The cache TTL follows the `cacheRetention` option —
474
+ * pass "long" (1 h) so the pre-warm survives until the user's first message.
475
+ */
476
+ declare function prewarmAnthropicCache(options: {
477
+ apiKey: string;
478
+ model: string;
479
+ system: string;
480
+ tools?: StreamOptions["tools"];
481
+ serverTools?: StreamOptions["serverTools"];
482
+ baseUrl?: string;
483
+ userAgent?: string;
484
+ cacheRetention?: StreamOptions["cacheRetention"];
485
+ signal?: AbortSignal;
486
+ }): Promise<void>;
487
+
454
488
  interface PalsuProviderState {
455
489
  callCount: number;
456
490
  }
@@ -520,4 +554,4 @@ interface PalsuProviderConfig {
520
554
  */
521
555
  declare function registerPalsuProvider(config?: PalsuProviderConfig): PalsuProviderHandle;
522
556
 
523
- export { type AssistantMessage, type CacheRetention, type ContentPart, type DoneEvent, type ErrorEvent, type ErrorSource, EventStream, type FormattedError, GGAIError, type ImageContent, type Message, type PalsuModelConfig, type PalsuModelHandle, type PalsuProviderConfig, type PalsuProviderHandle, type PalsuProviderState, type PalsuResponse, type PalsuResponseFactory, type Provider, type ProviderDiagnosticFn, type ProviderEntry, ProviderError, type ProviderStreamFn, type RawContent, type ServerToolCall, type ServerToolCallEvent, type ServerToolDefinition, type ServerToolResult, type ServerToolResultEvent, type StopReason, type StreamEvent, type StreamOptions, type StreamResponse, StreamResult, type SystemMessage, type TextContent, type TextDeltaEvent, type ThinkingContent, type ThinkingDeltaEvent, type ThinkingLevel, type Tool, type ToolCall, type ToolCallDeltaEvent, type ToolCallDoneEvent, type ToolChoice, type ToolResult, type ToolResultContent, type ToolResultMessage, type Usage, type UserMessage, type VideoContent, classifyProviderError, formatError, formatErrorForDisplay, isHardBillingMessage, isUsageLimitError, palsuAssistantMessage, palsuText, palsuThinking, palsuToolCall, providerRegistry, registerPalsuProvider, setProviderDiagnostic, stream, toAnthropicMessages, toOpenAIMessages };
557
+ export { type AssistantMessage, type CacheRetention, type ContentPart, type DoneEvent, type ErrorEvent, type ErrorSource, EventStream, type FormattedError, GGAIError, type ImageContent, type Message, type PalsuModelConfig, type PalsuModelHandle, type PalsuProviderConfig, type PalsuProviderHandle, type PalsuProviderState, type PalsuResponse, type PalsuResponseFactory, type Provider, type ProviderDiagnosticFn, type ProviderEntry, ProviderError, type ProviderStreamFn, type RawContent, type ServerToolCall, type ServerToolCallEvent, type ServerToolDefinition, type ServerToolResult, type ServerToolResultEvent, type StopReason, type StreamEvent, type StreamOptions, type StreamResponse, StreamResult, type SystemMessage, type TextContent, type TextDeltaEvent, type ThinkingContent, type ThinkingDeltaEvent, type ThinkingLevel, type Tool, type ToolCall, type ToolCallDeltaEvent, type ToolCallDoneEvent, type ToolChoice, type ToolResult, type ToolResultContent, type ToolResultMessage, type Usage, type UserMessage, type VideoContent, classifyProviderError, formatError, formatErrorForDisplay, isHardBillingMessage, isUsageLimitError, palsuAssistantMessage, palsuText, palsuThinking, palsuToolCall, prewarmAnthropicCache, providerRegistry, registerPalsuProvider, setProviderDiagnostic, stream, toAnthropicMessages, toOpenAIMessages };
package/dist/index.d.ts CHANGED
@@ -255,6 +255,18 @@ declare class StreamResult implements AsyncIterable<StreamEvent> {
255
255
  private resolveResponse;
256
256
  private rejectResponse;
257
257
  private resolveWait;
258
+ /**
259
+ * High-water mark: when the buffer exceeds this many unconsumed events,
260
+ * the pump pauses until the consumer drains below the low-water mark.
261
+ * Prevents unbounded memory growth when a consumer is slow.
262
+ * Only active when someone IS iterating — if nobody iterates (the `then()`
263
+ * path), backpressure is skipped so the pump can complete and resolve.
264
+ */
265
+ private static readonly HIGH_WATER;
266
+ private static readonly LOW_WATER;
267
+ private iterating;
268
+ private paused;
269
+ private resolveDrain;
258
270
  constructor(generator: AsyncGenerator<StreamEvent, StreamResponse>, signal?: AbortSignal);
259
271
  private pump;
260
272
  private _nextWithAbort;
@@ -451,6 +463,28 @@ declare function toOpenAIMessages(messages: Message[], options?: {
451
463
  supportsImages?: boolean;
452
464
  }): OpenAI.ChatCompletionMessageParam[];
453
465
 
466
+ /**
467
+ * Fire a minimal `max_tokens: 1` request that populates the Anthropic prompt
468
+ * cache with the system prompt + tools prefix, so the first real user turn is
469
+ * a cache read instead of a cold cache write. Best-effort: any error is
470
+ * swallowed so a failed pre-warm never blocks the session.
471
+ *
472
+ * Called by AgentSession when speedProfile is "optimized", before the first
473
+ * real agent-loop turn. The cache TTL follows the `cacheRetention` option —
474
+ * pass "long" (1 h) so the pre-warm survives until the user's first message.
475
+ */
476
+ declare function prewarmAnthropicCache(options: {
477
+ apiKey: string;
478
+ model: string;
479
+ system: string;
480
+ tools?: StreamOptions["tools"];
481
+ serverTools?: StreamOptions["serverTools"];
482
+ baseUrl?: string;
483
+ userAgent?: string;
484
+ cacheRetention?: StreamOptions["cacheRetention"];
485
+ signal?: AbortSignal;
486
+ }): Promise<void>;
487
+
454
488
  interface PalsuProviderState {
455
489
  callCount: number;
456
490
  }
@@ -520,4 +554,4 @@ interface PalsuProviderConfig {
520
554
  */
521
555
  declare function registerPalsuProvider(config?: PalsuProviderConfig): PalsuProviderHandle;
522
556
 
523
- export { type AssistantMessage, type CacheRetention, type ContentPart, type DoneEvent, type ErrorEvent, type ErrorSource, EventStream, type FormattedError, GGAIError, type ImageContent, type Message, type PalsuModelConfig, type PalsuModelHandle, type PalsuProviderConfig, type PalsuProviderHandle, type PalsuProviderState, type PalsuResponse, type PalsuResponseFactory, type Provider, type ProviderDiagnosticFn, type ProviderEntry, ProviderError, type ProviderStreamFn, type RawContent, type ServerToolCall, type ServerToolCallEvent, type ServerToolDefinition, type ServerToolResult, type ServerToolResultEvent, type StopReason, type StreamEvent, type StreamOptions, type StreamResponse, StreamResult, type SystemMessage, type TextContent, type TextDeltaEvent, type ThinkingContent, type ThinkingDeltaEvent, type ThinkingLevel, type Tool, type ToolCall, type ToolCallDeltaEvent, type ToolCallDoneEvent, type ToolChoice, type ToolResult, type ToolResultContent, type ToolResultMessage, type Usage, type UserMessage, type VideoContent, classifyProviderError, formatError, formatErrorForDisplay, isHardBillingMessage, isUsageLimitError, palsuAssistantMessage, palsuText, palsuThinking, palsuToolCall, providerRegistry, registerPalsuProvider, setProviderDiagnostic, stream, toAnthropicMessages, toOpenAIMessages };
557
+ export { type AssistantMessage, type CacheRetention, type ContentPart, type DoneEvent, type ErrorEvent, type ErrorSource, EventStream, type FormattedError, GGAIError, type ImageContent, type Message, type PalsuModelConfig, type PalsuModelHandle, type PalsuProviderConfig, type PalsuProviderHandle, type PalsuProviderState, type PalsuResponse, type PalsuResponseFactory, type Provider, type ProviderDiagnosticFn, type ProviderEntry, ProviderError, type ProviderStreamFn, type RawContent, type ServerToolCall, type ServerToolCallEvent, type ServerToolDefinition, type ServerToolResult, type ServerToolResultEvent, type StopReason, type StreamEvent, type StreamOptions, type StreamResponse, StreamResult, type SystemMessage, type TextContent, type TextDeltaEvent, type ThinkingContent, type ThinkingDeltaEvent, type ThinkingLevel, type Tool, type ToolCall, type ToolCallDeltaEvent, type ToolCallDoneEvent, type ToolChoice, type ToolResult, type ToolResultContent, type ToolResultMessage, type Usage, type UserMessage, type VideoContent, classifyProviderError, formatError, formatErrorForDisplay, isHardBillingMessage, isUsageLimitError, palsuAssistantMessage, palsuText, palsuThinking, palsuToolCall, prewarmAnthropicCache, providerRegistry, registerPalsuProvider, setProviderDiagnostic, stream, toAnthropicMessages, toOpenAIMessages };
package/dist/index.js CHANGED
@@ -281,7 +281,7 @@ var EventStream = class {
281
281
  }
282
282
  }
283
283
  };
284
- var StreamResult = class {
284
+ var StreamResult = class _StreamResult {
285
285
  response;
286
286
  buffer = [];
287
287
  done = false;
@@ -289,6 +289,18 @@ var StreamResult = class {
289
289
  resolveResponse;
290
290
  rejectResponse;
291
291
  resolveWait = null;
292
+ /**
293
+ * High-water mark: when the buffer exceeds this many unconsumed events,
294
+ * the pump pauses until the consumer drains below the low-water mark.
295
+ * Prevents unbounded memory growth when a consumer is slow.
296
+ * Only active when someone IS iterating — if nobody iterates (the `then()`
297
+ * path), backpressure is skipped so the pump can complete and resolve.
298
+ */
299
+ static HIGH_WATER = 5e3;
300
+ static LOW_WATER = 1e3;
301
+ iterating = false;
302
+ paused = false;
303
+ resolveDrain = null;
292
304
  constructor(generator, signal) {
293
305
  this.response = new Promise((resolve, reject) => {
294
306
  this.resolveResponse = resolve;
@@ -303,6 +315,13 @@ var StreamResult = class {
303
315
  this.buffer.push(next.value);
304
316
  this.resolveWait?.();
305
317
  this.resolveWait = null;
318
+ if (this.iterating && this.buffer.length > _StreamResult.HIGH_WATER) {
319
+ this.paused = true;
320
+ await new Promise((r) => {
321
+ this.resolveDrain = r;
322
+ });
323
+ this.paused = false;
324
+ }
306
325
  next = await this._nextWithAbort(generator, signal);
307
326
  }
308
327
  this.done = true;
@@ -341,11 +360,20 @@ var StreamResult = class {
341
360
  }
342
361
  }
343
362
  async *[Symbol.asyncIterator]() {
363
+ this.iterating = true;
344
364
  let index = 0;
345
365
  while (true) {
346
366
  while (index < this.buffer.length) {
347
367
  yield this.buffer[index++];
348
368
  }
369
+ if (this.paused && index > _StreamResult.LOW_WATER) {
370
+ this.resolveDrain?.();
371
+ this.resolveDrain = null;
372
+ }
373
+ if (index > 0 && !this.paused) {
374
+ this.buffer.splice(0, index);
375
+ index = 0;
376
+ }
349
377
  if (this.error) throw this.error;
350
378
  if (this.done) return;
351
379
  await new Promise((r) => {
@@ -358,16 +386,26 @@ var StreamResult = class {
358
386
  }
359
387
  }
360
388
  then(onfulfilled, onrejected) {
389
+ if (this.paused) {
390
+ this.paused = false;
391
+ this.resolveDrain?.();
392
+ this.resolveDrain = null;
393
+ }
361
394
  return this.response.then(onfulfilled, onrejected);
362
395
  }
363
396
  };
364
397
 
365
398
  // src/utils/zod-to-json-schema.ts
366
399
  import { z } from "zod";
400
+ var schemaCache = /* @__PURE__ */ new WeakMap();
367
401
  function zodToJsonSchema(schema) {
402
+ const cached = schemaCache.get(schema);
403
+ if (cached) return cached;
368
404
  const jsonSchema = z.toJSONSchema(schema);
369
405
  const { $schema: _schema, ...rest } = jsonSchema;
370
- return normalizeRootForAnthropic(rest);
406
+ const normalized = normalizeRootForAnthropic(rest);
407
+ schemaCache.set(schema, normalized);
408
+ return normalized;
371
409
  }
372
410
  function resolveToolSchema(tool) {
373
411
  return tool.rawInputSchema ?? zodToJsonSchema(tool.parameters);
@@ -975,26 +1013,83 @@ function parseToolArguments(argsJson) {
975
1013
  }
976
1014
 
977
1015
  // src/providers/anthropic.ts
1016
+ var anthropicClientCache = /* @__PURE__ */ new Map();
978
1017
  function createClient(options) {
979
1018
  const isOAuth = options.apiKey?.startsWith("sk-ant-oat");
980
- return new Anthropic({
1019
+ const userAgent = isOAuth ? options.userAgent ?? "claude-cli/2.1.75 (external, cli)" : "";
1020
+ const cacheKey = `${options.apiKey ?? ""}|${options.baseUrl ?? ""}|${userAgent}`;
1021
+ if (!options.fetch) {
1022
+ const cached = anthropicClientCache.get(cacheKey);
1023
+ if (cached) return cached;
1024
+ }
1025
+ const client = new Anthropic({
981
1026
  ...isOAuth ? { apiKey: null, authToken: options.apiKey } : { apiKey: options.apiKey },
982
1027
  ...options.baseUrl ? { baseURL: options.baseUrl } : {},
983
1028
  ...options.fetch ? { fetch: options.fetch } : {},
984
- // Disable SDK retries — the agent loop has its own stall/overload retry
985
- // logic that surfaces errors properly. SDK retries on 429s can cause
986
- // multi-minute hangs when the provider stops responding mid-retry.
987
1029
  maxRetries: 0,
988
1030
  ...isOAuth ? {
989
1031
  defaultHeaders: {
990
- // Anthropic's OAuth edge validates the claude-cli version. Callers
991
- // (ggcoder) resolve the live version at runtime; the literal here
992
- // is the offline fallback for direct gg-ai consumers.
993
- "user-agent": options.userAgent ?? "claude-cli/2.1.75 (external, cli)",
1032
+ "user-agent": userAgent,
994
1033
  "x-app": "cli"
995
1034
  }
996
1035
  } : {}
997
1036
  });
1037
+ if (!options.fetch) {
1038
+ if (anthropicClientCache.size >= 8) {
1039
+ const oldest = anthropicClientCache.keys().next().value;
1040
+ if (oldest) anthropicClientCache.delete(oldest);
1041
+ }
1042
+ anthropicClientCache.set(cacheKey, client);
1043
+ }
1044
+ return client;
1045
+ }
1046
+ async function prewarmAnthropicCache(options) {
1047
+ try {
1048
+ const client = createClient({
1049
+ apiKey: options.apiKey,
1050
+ baseUrl: options.baseUrl,
1051
+ userAgent: options.userAgent
1052
+ });
1053
+ const cacheControl = toAnthropicCacheControl(options.cacheRetention ?? "long", options.baseUrl);
1054
+ const { system, messages } = toAnthropicMessages(
1055
+ [
1056
+ { role: "system", content: options.system },
1057
+ { role: "user", content: "." }
1058
+ ],
1059
+ cacheControl
1060
+ );
1061
+ const isOAuth = options.apiKey.startsWith("sk-ant-oat");
1062
+ const fullSystem = isOAuth ? [
1063
+ {
1064
+ type: "text",
1065
+ text: "You are Claude Code, Anthropic's official CLI for Claude."
1066
+ },
1067
+ ...system ?? []
1068
+ ] : system;
1069
+ const tools = options.tools?.length ? toAnthropicTools(options.tools, {
1070
+ cacheControl,
1071
+ enableFineGrainedToolStreaming: true
1072
+ }) : void 0;
1073
+ await client.messages.create(
1074
+ {
1075
+ model: options.model,
1076
+ max_tokens: 1,
1077
+ messages,
1078
+ ...fullSystem ? { system: fullSystem } : {},
1079
+ ...tools ? {
1080
+ tools: [
1081
+ ...tools,
1082
+ ...options.serverTools ?? []
1083
+ ]
1084
+ } : {}
1085
+ },
1086
+ {
1087
+ signal: options.signal ?? void 0,
1088
+ ...isOAuth ? { headers: { "anthropic-beta": "claude-code-20250219,oauth-2025-04-20" } } : {}
1089
+ }
1090
+ );
1091
+ } catch {
1092
+ }
998
1093
  }
999
1094
  function streamAnthropic(options) {
1000
1095
  return new StreamResult(runStream(options), options.signal);
@@ -1574,13 +1669,27 @@ function extractOpenAIUsage(usage) {
1574
1669
  cacheRead
1575
1670
  };
1576
1671
  }
1672
+ var openaiClientCache = /* @__PURE__ */ new Map();
1577
1673
  function createClient2(options) {
1578
- return new OpenAI({
1674
+ const cacheKey = `${options.apiKey ?? ""}|${options.baseUrl ?? ""}|${JSON.stringify(options.defaultHeaders ?? {})}`;
1675
+ if (!options.fetch) {
1676
+ const cached = openaiClientCache.get(cacheKey);
1677
+ if (cached) return cached;
1678
+ }
1679
+ const client = new OpenAI({
1579
1680
  apiKey: options.apiKey,
1580
1681
  ...options.baseUrl ? { baseURL: options.baseUrl } : {},
1581
1682
  ...options.fetch ? { fetch: options.fetch } : {},
1582
1683
  ...options.defaultHeaders ? { defaultHeaders: options.defaultHeaders } : {}
1583
1684
  });
1685
+ if (!options.fetch) {
1686
+ if (openaiClientCache.size >= 8) {
1687
+ const oldest = openaiClientCache.keys().next().value;
1688
+ if (oldest) openaiClientCache.delete(oldest);
1689
+ }
1690
+ openaiClientCache.set(cacheKey, client);
1691
+ }
1692
+ return client;
1584
1693
  }
1585
1694
  function streamOpenAI(options) {
1586
1695
  return new StreamResult(runStream2(options), options.signal);
@@ -1995,9 +2104,6 @@ async function* runStream3(options) {
1995
2104
  body.tools = toCodexTools(options.tools);
1996
2105
  }
1997
2106
  body.prompt_cache_key = normalizePromptCacheKey(options.promptCacheKey ?? "ggcoder");
1998
- if (options.cacheRetention === "long") {
1999
- body.prompt_cache_retention = "24h";
2000
- }
2001
2107
  if (options.temperature != null && !options.thinking) {
2002
2108
  body.temperature = options.temperature;
2003
2109
  }
@@ -3310,6 +3416,7 @@ export {
3310
3416
  palsuText,
3311
3417
  palsuThinking,
3312
3418
  palsuToolCall,
3419
+ prewarmAnthropicCache,
3313
3420
  providerRegistry,
3314
3421
  registerPalsuProvider,
3315
3422
  setProviderDiagnostic,