@kenkaiiii/gg-ai 4.11.2 → 4.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +128 -19
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +35 -1
- package/dist/index.d.ts +35 -1
- package/dist/index.js +127 -19
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.cts
CHANGED
|
@@ -255,6 +255,18 @@ declare class StreamResult implements AsyncIterable<StreamEvent> {
|
|
|
255
255
|
private resolveResponse;
|
|
256
256
|
private rejectResponse;
|
|
257
257
|
private resolveWait;
|
|
258
|
+
/**
|
|
259
|
+
* High-water mark: when the buffer exceeds this many unconsumed events,
|
|
260
|
+
* the pump pauses until the consumer drains below the low-water mark.
|
|
261
|
+
* Prevents unbounded memory growth when a consumer is slow.
|
|
262
|
+
* Only active when someone IS iterating — if nobody iterates (the `then()`
|
|
263
|
+
* path), backpressure is skipped so the pump can complete and resolve.
|
|
264
|
+
*/
|
|
265
|
+
private static readonly HIGH_WATER;
|
|
266
|
+
private static readonly LOW_WATER;
|
|
267
|
+
private iterating;
|
|
268
|
+
private paused;
|
|
269
|
+
private resolveDrain;
|
|
258
270
|
constructor(generator: AsyncGenerator<StreamEvent, StreamResponse>, signal?: AbortSignal);
|
|
259
271
|
private pump;
|
|
260
272
|
private _nextWithAbort;
|
|
@@ -451,6 +463,28 @@ declare function toOpenAIMessages(messages: Message[], options?: {
|
|
|
451
463
|
supportsImages?: boolean;
|
|
452
464
|
}): OpenAI.ChatCompletionMessageParam[];
|
|
453
465
|
|
|
466
|
+
/**
|
|
467
|
+
* Fire a minimal `max_tokens: 1` request that populates the Anthropic prompt
|
|
468
|
+
* cache with the system prompt + tools prefix, so the first real user turn is
|
|
469
|
+
* a cache read instead of a cold cache write. Best-effort: any error is
|
|
470
|
+
* swallowed so a failed pre-warm never blocks the session.
|
|
471
|
+
*
|
|
472
|
+
* Called by AgentSession when speedProfile is "optimized", before the first
|
|
473
|
+
* real agent-loop turn. The cache TTL follows the `cacheRetention` option —
|
|
474
|
+
* pass "long" (1 h) so the pre-warm survives until the user's first message.
|
|
475
|
+
*/
|
|
476
|
+
declare function prewarmAnthropicCache(options: {
|
|
477
|
+
apiKey: string;
|
|
478
|
+
model: string;
|
|
479
|
+
system: string;
|
|
480
|
+
tools?: StreamOptions["tools"];
|
|
481
|
+
serverTools?: StreamOptions["serverTools"];
|
|
482
|
+
baseUrl?: string;
|
|
483
|
+
userAgent?: string;
|
|
484
|
+
cacheRetention?: StreamOptions["cacheRetention"];
|
|
485
|
+
signal?: AbortSignal;
|
|
486
|
+
}): Promise<void>;
|
|
487
|
+
|
|
454
488
|
interface PalsuProviderState {
|
|
455
489
|
callCount: number;
|
|
456
490
|
}
|
|
@@ -520,4 +554,4 @@ interface PalsuProviderConfig {
|
|
|
520
554
|
*/
|
|
521
555
|
declare function registerPalsuProvider(config?: PalsuProviderConfig): PalsuProviderHandle;
|
|
522
556
|
|
|
523
|
-
export { type AssistantMessage, type CacheRetention, type ContentPart, type DoneEvent, type ErrorEvent, type ErrorSource, EventStream, type FormattedError, GGAIError, type ImageContent, type Message, type PalsuModelConfig, type PalsuModelHandle, type PalsuProviderConfig, type PalsuProviderHandle, type PalsuProviderState, type PalsuResponse, type PalsuResponseFactory, type Provider, type ProviderDiagnosticFn, type ProviderEntry, ProviderError, type ProviderStreamFn, type RawContent, type ServerToolCall, type ServerToolCallEvent, type ServerToolDefinition, type ServerToolResult, type ServerToolResultEvent, type StopReason, type StreamEvent, type StreamOptions, type StreamResponse, StreamResult, type SystemMessage, type TextContent, type TextDeltaEvent, type ThinkingContent, type ThinkingDeltaEvent, type ThinkingLevel, type Tool, type ToolCall, type ToolCallDeltaEvent, type ToolCallDoneEvent, type ToolChoice, type ToolResult, type ToolResultContent, type ToolResultMessage, type Usage, type UserMessage, type VideoContent, classifyProviderError, formatError, formatErrorForDisplay, isHardBillingMessage, isUsageLimitError, palsuAssistantMessage, palsuText, palsuThinking, palsuToolCall, providerRegistry, registerPalsuProvider, setProviderDiagnostic, stream, toAnthropicMessages, toOpenAIMessages };
|
|
557
|
+
export { type AssistantMessage, type CacheRetention, type ContentPart, type DoneEvent, type ErrorEvent, type ErrorSource, EventStream, type FormattedError, GGAIError, type ImageContent, type Message, type PalsuModelConfig, type PalsuModelHandle, type PalsuProviderConfig, type PalsuProviderHandle, type PalsuProviderState, type PalsuResponse, type PalsuResponseFactory, type Provider, type ProviderDiagnosticFn, type ProviderEntry, ProviderError, type ProviderStreamFn, type RawContent, type ServerToolCall, type ServerToolCallEvent, type ServerToolDefinition, type ServerToolResult, type ServerToolResultEvent, type StopReason, type StreamEvent, type StreamOptions, type StreamResponse, StreamResult, type SystemMessage, type TextContent, type TextDeltaEvent, type ThinkingContent, type ThinkingDeltaEvent, type ThinkingLevel, type Tool, type ToolCall, type ToolCallDeltaEvent, type ToolCallDoneEvent, type ToolChoice, type ToolResult, type ToolResultContent, type ToolResultMessage, type Usage, type UserMessage, type VideoContent, classifyProviderError, formatError, formatErrorForDisplay, isHardBillingMessage, isUsageLimitError, palsuAssistantMessage, palsuText, palsuThinking, palsuToolCall, prewarmAnthropicCache, providerRegistry, registerPalsuProvider, setProviderDiagnostic, stream, toAnthropicMessages, toOpenAIMessages };
|
package/dist/index.d.ts
CHANGED
|
@@ -255,6 +255,18 @@ declare class StreamResult implements AsyncIterable<StreamEvent> {
|
|
|
255
255
|
private resolveResponse;
|
|
256
256
|
private rejectResponse;
|
|
257
257
|
private resolveWait;
|
|
258
|
+
/**
|
|
259
|
+
* High-water mark: when the buffer exceeds this many unconsumed events,
|
|
260
|
+
* the pump pauses until the consumer drains below the low-water mark.
|
|
261
|
+
* Prevents unbounded memory growth when a consumer is slow.
|
|
262
|
+
* Only active when someone IS iterating — if nobody iterates (the `then()`
|
|
263
|
+
* path), backpressure is skipped so the pump can complete and resolve.
|
|
264
|
+
*/
|
|
265
|
+
private static readonly HIGH_WATER;
|
|
266
|
+
private static readonly LOW_WATER;
|
|
267
|
+
private iterating;
|
|
268
|
+
private paused;
|
|
269
|
+
private resolveDrain;
|
|
258
270
|
constructor(generator: AsyncGenerator<StreamEvent, StreamResponse>, signal?: AbortSignal);
|
|
259
271
|
private pump;
|
|
260
272
|
private _nextWithAbort;
|
|
@@ -451,6 +463,28 @@ declare function toOpenAIMessages(messages: Message[], options?: {
|
|
|
451
463
|
supportsImages?: boolean;
|
|
452
464
|
}): OpenAI.ChatCompletionMessageParam[];
|
|
453
465
|
|
|
466
|
+
/**
|
|
467
|
+
* Fire a minimal `max_tokens: 1` request that populates the Anthropic prompt
|
|
468
|
+
* cache with the system prompt + tools prefix, so the first real user turn is
|
|
469
|
+
* a cache read instead of a cold cache write. Best-effort: any error is
|
|
470
|
+
* swallowed so a failed pre-warm never blocks the session.
|
|
471
|
+
*
|
|
472
|
+
* Called by AgentSession when speedProfile is "optimized", before the first
|
|
473
|
+
* real agent-loop turn. The cache TTL follows the `cacheRetention` option —
|
|
474
|
+
* pass "long" (1 h) so the pre-warm survives until the user's first message.
|
|
475
|
+
*/
|
|
476
|
+
declare function prewarmAnthropicCache(options: {
|
|
477
|
+
apiKey: string;
|
|
478
|
+
model: string;
|
|
479
|
+
system: string;
|
|
480
|
+
tools?: StreamOptions["tools"];
|
|
481
|
+
serverTools?: StreamOptions["serverTools"];
|
|
482
|
+
baseUrl?: string;
|
|
483
|
+
userAgent?: string;
|
|
484
|
+
cacheRetention?: StreamOptions["cacheRetention"];
|
|
485
|
+
signal?: AbortSignal;
|
|
486
|
+
}): Promise<void>;
|
|
487
|
+
|
|
454
488
|
interface PalsuProviderState {
|
|
455
489
|
callCount: number;
|
|
456
490
|
}
|
|
@@ -520,4 +554,4 @@ interface PalsuProviderConfig {
|
|
|
520
554
|
*/
|
|
521
555
|
declare function registerPalsuProvider(config?: PalsuProviderConfig): PalsuProviderHandle;
|
|
522
556
|
|
|
523
|
-
export { type AssistantMessage, type CacheRetention, type ContentPart, type DoneEvent, type ErrorEvent, type ErrorSource, EventStream, type FormattedError, GGAIError, type ImageContent, type Message, type PalsuModelConfig, type PalsuModelHandle, type PalsuProviderConfig, type PalsuProviderHandle, type PalsuProviderState, type PalsuResponse, type PalsuResponseFactory, type Provider, type ProviderDiagnosticFn, type ProviderEntry, ProviderError, type ProviderStreamFn, type RawContent, type ServerToolCall, type ServerToolCallEvent, type ServerToolDefinition, type ServerToolResult, type ServerToolResultEvent, type StopReason, type StreamEvent, type StreamOptions, type StreamResponse, StreamResult, type SystemMessage, type TextContent, type TextDeltaEvent, type ThinkingContent, type ThinkingDeltaEvent, type ThinkingLevel, type Tool, type ToolCall, type ToolCallDeltaEvent, type ToolCallDoneEvent, type ToolChoice, type ToolResult, type ToolResultContent, type ToolResultMessage, type Usage, type UserMessage, type VideoContent, classifyProviderError, formatError, formatErrorForDisplay, isHardBillingMessage, isUsageLimitError, palsuAssistantMessage, palsuText, palsuThinking, palsuToolCall, providerRegistry, registerPalsuProvider, setProviderDiagnostic, stream, toAnthropicMessages, toOpenAIMessages };
|
|
557
|
+
export { type AssistantMessage, type CacheRetention, type ContentPart, type DoneEvent, type ErrorEvent, type ErrorSource, EventStream, type FormattedError, GGAIError, type ImageContent, type Message, type PalsuModelConfig, type PalsuModelHandle, type PalsuProviderConfig, type PalsuProviderHandle, type PalsuProviderState, type PalsuResponse, type PalsuResponseFactory, type Provider, type ProviderDiagnosticFn, type ProviderEntry, ProviderError, type ProviderStreamFn, type RawContent, type ServerToolCall, type ServerToolCallEvent, type ServerToolDefinition, type ServerToolResult, type ServerToolResultEvent, type StopReason, type StreamEvent, type StreamOptions, type StreamResponse, StreamResult, type SystemMessage, type TextContent, type TextDeltaEvent, type ThinkingContent, type ThinkingDeltaEvent, type ThinkingLevel, type Tool, type ToolCall, type ToolCallDeltaEvent, type ToolCallDoneEvent, type ToolChoice, type ToolResult, type ToolResultContent, type ToolResultMessage, type Usage, type UserMessage, type VideoContent, classifyProviderError, formatError, formatErrorForDisplay, isHardBillingMessage, isUsageLimitError, palsuAssistantMessage, palsuText, palsuThinking, palsuToolCall, prewarmAnthropicCache, providerRegistry, registerPalsuProvider, setProviderDiagnostic, stream, toAnthropicMessages, toOpenAIMessages };
|
package/dist/index.js
CHANGED
|
@@ -281,7 +281,7 @@ var EventStream = class {
|
|
|
281
281
|
}
|
|
282
282
|
}
|
|
283
283
|
};
|
|
284
|
-
var StreamResult = class {
|
|
284
|
+
var StreamResult = class _StreamResult {
|
|
285
285
|
response;
|
|
286
286
|
buffer = [];
|
|
287
287
|
done = false;
|
|
@@ -289,6 +289,18 @@ var StreamResult = class {
|
|
|
289
289
|
resolveResponse;
|
|
290
290
|
rejectResponse;
|
|
291
291
|
resolveWait = null;
|
|
292
|
+
/**
|
|
293
|
+
* High-water mark: when the buffer exceeds this many unconsumed events,
|
|
294
|
+
* the pump pauses until the consumer drains below the low-water mark.
|
|
295
|
+
* Prevents unbounded memory growth when a consumer is slow.
|
|
296
|
+
* Only active when someone IS iterating — if nobody iterates (the `then()`
|
|
297
|
+
* path), backpressure is skipped so the pump can complete and resolve.
|
|
298
|
+
*/
|
|
299
|
+
static HIGH_WATER = 5e3;
|
|
300
|
+
static LOW_WATER = 1e3;
|
|
301
|
+
iterating = false;
|
|
302
|
+
paused = false;
|
|
303
|
+
resolveDrain = null;
|
|
292
304
|
constructor(generator, signal) {
|
|
293
305
|
this.response = new Promise((resolve, reject) => {
|
|
294
306
|
this.resolveResponse = resolve;
|
|
@@ -303,6 +315,13 @@ var StreamResult = class {
|
|
|
303
315
|
this.buffer.push(next.value);
|
|
304
316
|
this.resolveWait?.();
|
|
305
317
|
this.resolveWait = null;
|
|
318
|
+
if (this.iterating && this.buffer.length > _StreamResult.HIGH_WATER) {
|
|
319
|
+
this.paused = true;
|
|
320
|
+
await new Promise((r) => {
|
|
321
|
+
this.resolveDrain = r;
|
|
322
|
+
});
|
|
323
|
+
this.paused = false;
|
|
324
|
+
}
|
|
306
325
|
next = await this._nextWithAbort(generator, signal);
|
|
307
326
|
}
|
|
308
327
|
this.done = true;
|
|
@@ -341,11 +360,20 @@ var StreamResult = class {
|
|
|
341
360
|
}
|
|
342
361
|
}
|
|
343
362
|
async *[Symbol.asyncIterator]() {
|
|
363
|
+
this.iterating = true;
|
|
344
364
|
let index = 0;
|
|
345
365
|
while (true) {
|
|
346
366
|
while (index < this.buffer.length) {
|
|
347
367
|
yield this.buffer[index++];
|
|
348
368
|
}
|
|
369
|
+
if (this.paused && index > _StreamResult.LOW_WATER) {
|
|
370
|
+
this.resolveDrain?.();
|
|
371
|
+
this.resolveDrain = null;
|
|
372
|
+
}
|
|
373
|
+
if (index > 0 && !this.paused) {
|
|
374
|
+
this.buffer.splice(0, index);
|
|
375
|
+
index = 0;
|
|
376
|
+
}
|
|
349
377
|
if (this.error) throw this.error;
|
|
350
378
|
if (this.done) return;
|
|
351
379
|
await new Promise((r) => {
|
|
@@ -358,16 +386,26 @@ var StreamResult = class {
|
|
|
358
386
|
}
|
|
359
387
|
}
|
|
360
388
|
then(onfulfilled, onrejected) {
|
|
389
|
+
if (this.paused) {
|
|
390
|
+
this.paused = false;
|
|
391
|
+
this.resolveDrain?.();
|
|
392
|
+
this.resolveDrain = null;
|
|
393
|
+
}
|
|
361
394
|
return this.response.then(onfulfilled, onrejected);
|
|
362
395
|
}
|
|
363
396
|
};
|
|
364
397
|
|
|
365
398
|
// src/utils/zod-to-json-schema.ts
|
|
366
399
|
import { z } from "zod";
|
|
400
|
+
var schemaCache = /* @__PURE__ */ new WeakMap();
|
|
367
401
|
function zodToJsonSchema(schema) {
|
|
402
|
+
const cached = schemaCache.get(schema);
|
|
403
|
+
if (cached) return cached;
|
|
368
404
|
const jsonSchema = z.toJSONSchema(schema);
|
|
369
405
|
const { $schema: _schema, ...rest } = jsonSchema;
|
|
370
|
-
|
|
406
|
+
const normalized = normalizeRootForAnthropic(rest);
|
|
407
|
+
schemaCache.set(schema, normalized);
|
|
408
|
+
return normalized;
|
|
371
409
|
}
|
|
372
410
|
function resolveToolSchema(tool) {
|
|
373
411
|
return tool.rawInputSchema ?? zodToJsonSchema(tool.parameters);
|
|
@@ -759,16 +797,17 @@ function toAnthropicThinking(level, maxTokens, model) {
|
|
|
759
797
|
outputConfig: { effort }
|
|
760
798
|
};
|
|
761
799
|
}
|
|
800
|
+
const VISIBLE_FLOOR = 1024;
|
|
762
801
|
const effectiveLevel = level === "xhigh" || level === "max" ? "high" : level;
|
|
763
802
|
const budgetMap = {
|
|
764
|
-
low: Math.max(1024, Math.floor(maxTokens * 0.
|
|
765
|
-
medium: Math.max(2048, Math.floor(maxTokens * 0.
|
|
766
|
-
high: Math.max(4096, maxTokens)
|
|
803
|
+
low: Math.max(1024, Math.floor(maxTokens * 0.2)),
|
|
804
|
+
medium: Math.max(2048, Math.floor(maxTokens * 0.45)),
|
|
805
|
+
high: Math.max(4096, Math.floor(maxTokens * 0.8))
|
|
767
806
|
};
|
|
768
|
-
const budget = budgetMap[effectiveLevel];
|
|
807
|
+
const budget = Math.max(0, Math.min(budgetMap[effectiveLevel], maxTokens - VISIBLE_FLOOR));
|
|
769
808
|
return {
|
|
770
809
|
thinking: { type: "enabled", budget_tokens: budget },
|
|
771
|
-
maxTokens
|
|
810
|
+
maxTokens
|
|
772
811
|
};
|
|
773
812
|
}
|
|
774
813
|
function remapToolCallId(id, idMap) {
|
|
@@ -974,26 +1013,83 @@ function parseToolArguments(argsJson) {
|
|
|
974
1013
|
}
|
|
975
1014
|
|
|
976
1015
|
// src/providers/anthropic.ts
|
|
1016
|
+
var anthropicClientCache = /* @__PURE__ */ new Map();
|
|
977
1017
|
function createClient(options) {
|
|
978
1018
|
const isOAuth = options.apiKey?.startsWith("sk-ant-oat");
|
|
979
|
-
|
|
1019
|
+
const userAgent = isOAuth ? options.userAgent ?? "claude-cli/2.1.75 (external, cli)" : "";
|
|
1020
|
+
const cacheKey = `${options.apiKey ?? ""}|${options.baseUrl ?? ""}|${userAgent}`;
|
|
1021
|
+
if (!options.fetch) {
|
|
1022
|
+
const cached = anthropicClientCache.get(cacheKey);
|
|
1023
|
+
if (cached) return cached;
|
|
1024
|
+
}
|
|
1025
|
+
const client = new Anthropic({
|
|
980
1026
|
...isOAuth ? { apiKey: null, authToken: options.apiKey } : { apiKey: options.apiKey },
|
|
981
1027
|
...options.baseUrl ? { baseURL: options.baseUrl } : {},
|
|
982
1028
|
...options.fetch ? { fetch: options.fetch } : {},
|
|
983
|
-
// Disable SDK retries — the agent loop has its own stall/overload retry
|
|
984
|
-
// logic that surfaces errors properly. SDK retries on 429s can cause
|
|
985
|
-
// multi-minute hangs when the provider stops responding mid-retry.
|
|
986
1029
|
maxRetries: 0,
|
|
987
1030
|
...isOAuth ? {
|
|
988
1031
|
defaultHeaders: {
|
|
989
|
-
|
|
990
|
-
// (ggcoder) resolve the live version at runtime; the literal here
|
|
991
|
-
// is the offline fallback for direct gg-ai consumers.
|
|
992
|
-
"user-agent": options.userAgent ?? "claude-cli/2.1.75 (external, cli)",
|
|
1032
|
+
"user-agent": userAgent,
|
|
993
1033
|
"x-app": "cli"
|
|
994
1034
|
}
|
|
995
1035
|
} : {}
|
|
996
1036
|
});
|
|
1037
|
+
if (!options.fetch) {
|
|
1038
|
+
if (anthropicClientCache.size >= 8) {
|
|
1039
|
+
const oldest = anthropicClientCache.keys().next().value;
|
|
1040
|
+
if (oldest) anthropicClientCache.delete(oldest);
|
|
1041
|
+
}
|
|
1042
|
+
anthropicClientCache.set(cacheKey, client);
|
|
1043
|
+
}
|
|
1044
|
+
return client;
|
|
1045
|
+
}
|
|
1046
|
+
async function prewarmAnthropicCache(options) {
|
|
1047
|
+
try {
|
|
1048
|
+
const client = createClient({
|
|
1049
|
+
apiKey: options.apiKey,
|
|
1050
|
+
baseUrl: options.baseUrl,
|
|
1051
|
+
userAgent: options.userAgent
|
|
1052
|
+
});
|
|
1053
|
+
const cacheControl = toAnthropicCacheControl(options.cacheRetention ?? "long", options.baseUrl);
|
|
1054
|
+
const { system, messages } = toAnthropicMessages(
|
|
1055
|
+
[
|
|
1056
|
+
{ role: "system", content: options.system },
|
|
1057
|
+
{ role: "user", content: "." }
|
|
1058
|
+
],
|
|
1059
|
+
cacheControl
|
|
1060
|
+
);
|
|
1061
|
+
const isOAuth = options.apiKey.startsWith("sk-ant-oat");
|
|
1062
|
+
const fullSystem = isOAuth ? [
|
|
1063
|
+
{
|
|
1064
|
+
type: "text",
|
|
1065
|
+
text: "You are Claude Code, Anthropic's official CLI for Claude."
|
|
1066
|
+
},
|
|
1067
|
+
...system ?? []
|
|
1068
|
+
] : system;
|
|
1069
|
+
const tools = options.tools?.length ? toAnthropicTools(options.tools, {
|
|
1070
|
+
cacheControl,
|
|
1071
|
+
enableFineGrainedToolStreaming: true
|
|
1072
|
+
}) : void 0;
|
|
1073
|
+
await client.messages.create(
|
|
1074
|
+
{
|
|
1075
|
+
model: options.model,
|
|
1076
|
+
max_tokens: 1,
|
|
1077
|
+
messages,
|
|
1078
|
+
...fullSystem ? { system: fullSystem } : {},
|
|
1079
|
+
...tools ? {
|
|
1080
|
+
tools: [
|
|
1081
|
+
...tools,
|
|
1082
|
+
...options.serverTools ?? []
|
|
1083
|
+
]
|
|
1084
|
+
} : {}
|
|
1085
|
+
},
|
|
1086
|
+
{
|
|
1087
|
+
signal: options.signal ?? void 0,
|
|
1088
|
+
...isOAuth ? { headers: { "anthropic-beta": "claude-code-20250219,oauth-2025-04-20" } } : {}
|
|
1089
|
+
}
|
|
1090
|
+
);
|
|
1091
|
+
} catch {
|
|
1092
|
+
}
|
|
997
1093
|
}
|
|
998
1094
|
function streamAnthropic(options) {
|
|
999
1095
|
return new StreamResult(runStream(options), options.signal);
|
|
@@ -1573,13 +1669,27 @@ function extractOpenAIUsage(usage) {
|
|
|
1573
1669
|
cacheRead
|
|
1574
1670
|
};
|
|
1575
1671
|
}
|
|
1672
|
+
var openaiClientCache = /* @__PURE__ */ new Map();
|
|
1576
1673
|
function createClient2(options) {
|
|
1577
|
-
|
|
1674
|
+
const cacheKey = `${options.apiKey ?? ""}|${options.baseUrl ?? ""}|${JSON.stringify(options.defaultHeaders ?? {})}`;
|
|
1675
|
+
if (!options.fetch) {
|
|
1676
|
+
const cached = openaiClientCache.get(cacheKey);
|
|
1677
|
+
if (cached) return cached;
|
|
1678
|
+
}
|
|
1679
|
+
const client = new OpenAI({
|
|
1578
1680
|
apiKey: options.apiKey,
|
|
1579
1681
|
...options.baseUrl ? { baseURL: options.baseUrl } : {},
|
|
1580
1682
|
...options.fetch ? { fetch: options.fetch } : {},
|
|
1581
1683
|
...options.defaultHeaders ? { defaultHeaders: options.defaultHeaders } : {}
|
|
1582
1684
|
});
|
|
1685
|
+
if (!options.fetch) {
|
|
1686
|
+
if (openaiClientCache.size >= 8) {
|
|
1687
|
+
const oldest = openaiClientCache.keys().next().value;
|
|
1688
|
+
if (oldest) openaiClientCache.delete(oldest);
|
|
1689
|
+
}
|
|
1690
|
+
openaiClientCache.set(cacheKey, client);
|
|
1691
|
+
}
|
|
1692
|
+
return client;
|
|
1583
1693
|
}
|
|
1584
1694
|
function streamOpenAI(options) {
|
|
1585
1695
|
return new StreamResult(runStream2(options), options.signal);
|
|
@@ -1994,9 +2104,6 @@ async function* runStream3(options) {
|
|
|
1994
2104
|
body.tools = toCodexTools(options.tools);
|
|
1995
2105
|
}
|
|
1996
2106
|
body.prompt_cache_key = normalizePromptCacheKey(options.promptCacheKey ?? "ggcoder");
|
|
1997
|
-
if (options.cacheRetention === "long") {
|
|
1998
|
-
body.prompt_cache_retention = "24h";
|
|
1999
|
-
}
|
|
2000
2107
|
if (options.temperature != null && !options.thinking) {
|
|
2001
2108
|
body.temperature = options.temperature;
|
|
2002
2109
|
}
|
|
@@ -3309,6 +3416,7 @@ export {
|
|
|
3309
3416
|
palsuText,
|
|
3310
3417
|
palsuThinking,
|
|
3311
3418
|
palsuToolCall,
|
|
3419
|
+
prewarmAnthropicCache,
|
|
3312
3420
|
providerRegistry,
|
|
3313
3421
|
registerPalsuProvider,
|
|
3314
3422
|
setProviderDiagnostic,
|