@compilr-dev/agents 0.3.4 → 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -33,6 +33,18 @@ export interface ClaudeProviderConfig {
|
|
|
33
33
|
* @default 4096
|
|
34
34
|
*/
|
|
35
35
|
maxTokens?: number;
|
|
36
|
+
/**
|
|
37
|
+
* Enable prompt caching for system prompt and tools.
|
|
38
|
+
*
|
|
39
|
+
* When enabled, the system prompt and tool definitions are cached
|
|
40
|
+
* server-side, reducing token costs by up to 90% on subsequent requests.
|
|
41
|
+
*
|
|
42
|
+
* - Cache write: 1.25x base input cost (first request)
|
|
43
|
+
* - Cache read: 0.1x base input cost (subsequent requests within 5 min)
|
|
44
|
+
*
|
|
45
|
+
* @default true
|
|
46
|
+
*/
|
|
47
|
+
enablePromptCaching?: boolean;
|
|
36
48
|
}
|
|
37
49
|
/**
|
|
38
50
|
* ClaudeProvider implements LLMProvider for Anthropic's Claude API
|
|
@@ -42,6 +54,7 @@ export declare class ClaudeProvider implements LLMProvider {
|
|
|
42
54
|
private readonly client;
|
|
43
55
|
private readonly defaultModel;
|
|
44
56
|
private readonly defaultMaxTokens;
|
|
57
|
+
private readonly enablePromptCaching;
|
|
45
58
|
constructor(config: ClaudeProviderConfig);
|
|
46
59
|
/**
|
|
47
60
|
* Send messages and stream the response
|
|
@@ -71,6 +84,20 @@ export declare class ClaudeProvider implements LLMProvider {
|
|
|
71
84
|
* Convert thinking config to Anthropic API format
|
|
72
85
|
*/
|
|
73
86
|
private convertThinking;
|
|
87
|
+
/**
|
|
88
|
+
* Wrap system prompt in array format with cache_control for prompt caching.
|
|
89
|
+
*
|
|
90
|
+
* When enabled, the system prompt is cached server-side for 5 minutes,
|
|
91
|
+
* reducing token costs by up to 90% on subsequent requests.
|
|
92
|
+
*/
|
|
93
|
+
private wrapSystemPromptWithCache;
|
|
94
|
+
/**
|
|
95
|
+
* Add cache_control to the last tool definition.
|
|
96
|
+
*
|
|
97
|
+
* This caches ALL tool definitions as a single prefix (tools are
|
|
98
|
+
* cached cumulatively up to the cache_control marker).
|
|
99
|
+
*/
|
|
100
|
+
private addCacheControlToLastTool;
|
|
74
101
|
/**
|
|
75
102
|
* Process a stream event into StreamChunks
|
|
76
103
|
*/
|
package/dist/providers/claude.js
CHANGED
|
@@ -28,6 +28,7 @@ export class ClaudeProvider {
|
|
|
28
28
|
client;
|
|
29
29
|
defaultModel;
|
|
30
30
|
defaultMaxTokens;
|
|
31
|
+
enablePromptCaching;
|
|
31
32
|
constructor(config) {
|
|
32
33
|
this.client = new Anthropic({
|
|
33
34
|
apiKey: config.apiKey,
|
|
@@ -35,6 +36,7 @@ export class ClaudeProvider {
|
|
|
35
36
|
});
|
|
36
37
|
this.defaultModel = config.model ?? DEFAULT_MODEL;
|
|
37
38
|
this.defaultMaxTokens = config.maxTokens ?? DEFAULT_MAX_TOKENS;
|
|
39
|
+
this.enablePromptCaching = config.enablePromptCaching ?? true;
|
|
38
40
|
}
|
|
39
41
|
/**
|
|
40
42
|
* Send messages and stream the response
|
|
@@ -50,13 +52,19 @@ export class ClaudeProvider {
|
|
|
50
52
|
toolsChars: JSON.stringify(tools).length,
|
|
51
53
|
};
|
|
52
54
|
try {
|
|
55
|
+
// Determine if prompt caching is enabled
|
|
56
|
+
const shouldCache = options?.enablePromptCaching ?? this.enablePromptCaching;
|
|
53
57
|
// Build request parameters
|
|
54
58
|
const params = {
|
|
55
59
|
model: options?.model ?? this.defaultModel,
|
|
56
60
|
max_tokens: options?.maxTokens ?? this.defaultMaxTokens,
|
|
57
|
-
system: systemPrompt
|
|
61
|
+
system: shouldCache && systemPrompt
|
|
62
|
+
? this.wrapSystemPromptWithCache(systemPrompt)
|
|
63
|
+
: systemPrompt,
|
|
58
64
|
messages: anthropicMessages,
|
|
59
|
-
tools: tools.length > 0
|
|
65
|
+
tools: tools.length > 0
|
|
66
|
+
? (shouldCache ? this.addCacheControlToLastTool(tools) : tools)
|
|
67
|
+
: undefined,
|
|
60
68
|
temperature: options?.temperature,
|
|
61
69
|
stop_sequences: options?.stopSequences,
|
|
62
70
|
};
|
|
@@ -236,6 +244,40 @@ export class ClaudeProvider {
|
|
|
236
244
|
budget_tokens: thinking.budgetTokens,
|
|
237
245
|
};
|
|
238
246
|
}
|
|
247
|
+
/**
|
|
248
|
+
* Wrap system prompt in array format with cache_control for prompt caching.
|
|
249
|
+
*
|
|
250
|
+
* When enabled, the system prompt is cached server-side for 5 minutes,
|
|
251
|
+
* reducing token costs by up to 90% on subsequent requests.
|
|
252
|
+
*/
|
|
253
|
+
wrapSystemPromptWithCache(systemPrompt) {
|
|
254
|
+
return [
|
|
255
|
+
{
|
|
256
|
+
type: 'text',
|
|
257
|
+
text: systemPrompt,
|
|
258
|
+
cache_control: { type: 'ephemeral' },
|
|
259
|
+
},
|
|
260
|
+
];
|
|
261
|
+
}
|
|
262
|
+
/**
|
|
263
|
+
* Add cache_control to the last tool definition.
|
|
264
|
+
*
|
|
265
|
+
* This caches ALL tool definitions as a single prefix (tools are
|
|
266
|
+
* cached cumulatively up to the cache_control marker).
|
|
267
|
+
*/
|
|
268
|
+
addCacheControlToLastTool(tools) {
|
|
269
|
+
if (tools.length === 0)
|
|
270
|
+
return tools;
|
|
271
|
+
return tools.map((tool, index) => {
|
|
272
|
+
if (index === tools.length - 1) {
|
|
273
|
+
return {
|
|
274
|
+
...tool,
|
|
275
|
+
cache_control: { type: 'ephemeral' },
|
|
276
|
+
};
|
|
277
|
+
}
|
|
278
|
+
return tool;
|
|
279
|
+
});
|
|
280
|
+
}
|
|
239
281
|
/**
|
|
240
282
|
* Process a stream event into StreamChunks
|
|
241
283
|
*/
|
|
@@ -323,7 +365,8 @@ export class ClaudeProvider {
|
|
|
323
365
|
*/
|
|
324
366
|
mapError(error) {
|
|
325
367
|
if (error instanceof Anthropic.APIError) {
|
|
326
|
-
|
|
368
|
+
const status = typeof error.status === 'number' ? error.status : undefined;
|
|
369
|
+
return new ProviderError(error.message, 'claude', status, error);
|
|
327
370
|
}
|
|
328
371
|
if (error instanceof Anthropic.APIConnectionError) {
|
|
329
372
|
return new ProviderError(`Connection error: ${error.message}`, 'claude', undefined, error);
|
|
@@ -83,6 +83,11 @@ export interface OpenAIStreamChunk {
|
|
|
83
83
|
prompt_tokens: number;
|
|
84
84
|
completion_tokens: number;
|
|
85
85
|
total_tokens: number;
|
|
86
|
+
/** OpenAI prompt caching: details about cached tokens */
|
|
87
|
+
prompt_tokens_details?: {
|
|
88
|
+
cached_tokens?: number;
|
|
89
|
+
audio_tokens?: number;
|
|
90
|
+
};
|
|
86
91
|
};
|
|
87
92
|
}
|
|
88
93
|
/**
|
|
@@ -141,6 +141,10 @@ export class OpenAICompatibleProvider {
|
|
|
141
141
|
usage = {
|
|
142
142
|
inputTokens: chunk.usage.prompt_tokens,
|
|
143
143
|
outputTokens: chunk.usage.completion_tokens,
|
|
144
|
+
// OpenAI automatic prompt caching: capture cached tokens
|
|
145
|
+
...(chunk.usage.prompt_tokens_details?.cached_tokens
|
|
146
|
+
? { cacheReadTokens: chunk.usage.prompt_tokens_details.cached_tokens }
|
|
147
|
+
: {}),
|
|
144
148
|
};
|
|
145
149
|
}
|
|
146
150
|
}
|
|
@@ -147,6 +147,18 @@ export interface ChatOptions {
|
|
|
147
147
|
* ```
|
|
148
148
|
*/
|
|
149
149
|
thinking?: ThinkingConfig;
|
|
150
|
+
/**
|
|
151
|
+
* Enable prompt caching for system prompt and tools (Claude-specific)
|
|
152
|
+
*
|
|
153
|
+
* When enabled, the system prompt and tool definitions are cached
|
|
154
|
+
* server-side, reducing token costs by up to 90% on subsequent requests.
|
|
155
|
+
*
|
|
156
|
+
* - Cache write: 1.25x base input cost (first request)
|
|
157
|
+
* - Cache read: 0.1x base input cost (subsequent requests within 5 min)
|
|
158
|
+
*
|
|
159
|
+
* @default Provider-level setting (typically true)
|
|
160
|
+
*/
|
|
161
|
+
enablePromptCaching?: boolean;
|
|
150
162
|
}
|
|
151
163
|
/**
|
|
152
164
|
* Tool definition for the LLM
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@compilr-dev/agents",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.6",
|
|
4
4
|
"description": "Lightweight multi-LLM agent library for building CLI AI assistants",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -52,7 +52,7 @@
|
|
|
52
52
|
"node": ">=18.0.0"
|
|
53
53
|
},
|
|
54
54
|
"peerDependencies": {
|
|
55
|
-
"@anthropic-ai/sdk": "^0.
|
|
55
|
+
"@anthropic-ai/sdk": "^0.72.1",
|
|
56
56
|
"@modelcontextprotocol/sdk": "^1.23.0"
|
|
57
57
|
},
|
|
58
58
|
"peerDependenciesMeta": {
|
|
@@ -64,7 +64,7 @@
|
|
|
64
64
|
}
|
|
65
65
|
},
|
|
66
66
|
"devDependencies": {
|
|
67
|
-
"@anthropic-ai/sdk": "^0.
|
|
67
|
+
"@anthropic-ai/sdk": "^0.72.1",
|
|
68
68
|
"@eslint/js": "^9.39.1",
|
|
69
69
|
"@modelcontextprotocol/sdk": "^1.23.0",
|
|
70
70
|
"@types/node": "^24.10.1",
|