@compilr-dev/agents 0.3.4 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -33,6 +33,18 @@ export interface ClaudeProviderConfig {
33
33
  * @default 4096
34
34
  */
35
35
  maxTokens?: number;
36
+ /**
37
+ * Enable prompt caching for system prompt and tools.
38
+ *
39
+ * When enabled, the system prompt and tool definitions are cached
40
+ * server-side, reducing token costs by up to 90% on subsequent requests.
41
+ *
42
+ * - Cache write: 1.25x base input cost (first request)
43
+ * - Cache read: 0.1x base input cost (subsequent requests within 5 min)
44
+ *
45
+ * @default true
46
+ */
47
+ enablePromptCaching?: boolean;
36
48
  }
37
49
  /**
38
50
  * ClaudeProvider implements LLMProvider for Anthropic's Claude API
@@ -42,6 +54,7 @@ export declare class ClaudeProvider implements LLMProvider {
42
54
  private readonly client;
43
55
  private readonly defaultModel;
44
56
  private readonly defaultMaxTokens;
57
+ private readonly enablePromptCaching;
45
58
  constructor(config: ClaudeProviderConfig);
46
59
  /**
47
60
  * Send messages and stream the response
@@ -71,6 +84,20 @@ export declare class ClaudeProvider implements LLMProvider {
71
84
  * Convert thinking config to Anthropic API format
72
85
  */
73
86
  private convertThinking;
87
+ /**
88
+ * Wrap system prompt in array format with cache_control for prompt caching.
89
+ *
90
+ * When enabled, the system prompt is cached server-side for 5 minutes,
91
+ * reducing token costs by up to 90% on subsequent requests.
92
+ */
93
+ private wrapSystemPromptWithCache;
94
+ /**
95
+ * Add cache_control to the last tool definition.
96
+ *
97
+ * This caches ALL tool definitions as a single prefix (tools are
98
+ * cached cumulatively up to the cache_control marker).
99
+ */
100
+ private addCacheControlToLastTool;
74
101
  /**
75
102
  * Process a stream event into StreamChunks
76
103
  */
@@ -28,6 +28,7 @@ export class ClaudeProvider {
28
28
  client;
29
29
  defaultModel;
30
30
  defaultMaxTokens;
31
+ enablePromptCaching;
31
32
  constructor(config) {
32
33
  this.client = new Anthropic({
33
34
  apiKey: config.apiKey,
@@ -35,6 +36,7 @@ export class ClaudeProvider {
35
36
  });
36
37
  this.defaultModel = config.model ?? DEFAULT_MODEL;
37
38
  this.defaultMaxTokens = config.maxTokens ?? DEFAULT_MAX_TOKENS;
39
+ this.enablePromptCaching = config.enablePromptCaching ?? true;
38
40
  }
39
41
  /**
40
42
  * Send messages and stream the response
@@ -50,13 +52,19 @@ export class ClaudeProvider {
50
52
  toolsChars: JSON.stringify(tools).length,
51
53
  };
52
54
  try {
55
+ // Determine if prompt caching is enabled
56
+ const shouldCache = options?.enablePromptCaching ?? this.enablePromptCaching;
53
57
  // Build request parameters
54
58
  const params = {
55
59
  model: options?.model ?? this.defaultModel,
56
60
  max_tokens: options?.maxTokens ?? this.defaultMaxTokens,
57
- system: systemPrompt,
61
+ system: shouldCache && systemPrompt
62
+ ? this.wrapSystemPromptWithCache(systemPrompt)
63
+ : systemPrompt,
58
64
  messages: anthropicMessages,
59
- tools: tools.length > 0 ? tools : undefined,
65
+ tools: tools.length > 0
66
+ ? (shouldCache ? this.addCacheControlToLastTool(tools) : tools)
67
+ : undefined,
60
68
  temperature: options?.temperature,
61
69
  stop_sequences: options?.stopSequences,
62
70
  };
@@ -236,6 +244,40 @@ export class ClaudeProvider {
236
244
  budget_tokens: thinking.budgetTokens,
237
245
  };
238
246
  }
247
+ /**
248
+ * Wrap system prompt in array format with cache_control for prompt caching.
249
+ *
250
+ * When enabled, the system prompt is cached server-side for 5 minutes,
251
+ * reducing token costs by up to 90% on subsequent requests.
252
+ */
253
+ wrapSystemPromptWithCache(systemPrompt) {
254
+ return [
255
+ {
256
+ type: 'text',
257
+ text: systemPrompt,
258
+ cache_control: { type: 'ephemeral' },
259
+ },
260
+ ];
261
+ }
262
+ /**
263
+ * Add cache_control to the last tool definition.
264
+ *
265
+ * This caches ALL tool definitions as a single prefix (tools are
266
+ * cached cumulatively up to the cache_control marker).
267
+ */
268
+ addCacheControlToLastTool(tools) {
269
+ if (tools.length === 0)
270
+ return tools;
271
+ return tools.map((tool, index) => {
272
+ if (index === tools.length - 1) {
273
+ return {
274
+ ...tool,
275
+ cache_control: { type: 'ephemeral' },
276
+ };
277
+ }
278
+ return tool;
279
+ });
280
+ }
239
281
  /**
240
282
  * Process a stream event into StreamChunks
241
283
  */
@@ -323,7 +365,8 @@ export class ClaudeProvider {
323
365
  */
324
366
  mapError(error) {
325
367
  if (error instanceof Anthropic.APIError) {
326
- return new ProviderError(error.message, 'claude', error.status, error);
368
+ const status = typeof error.status === 'number' ? error.status : undefined;
369
+ return new ProviderError(error.message, 'claude', status, error);
327
370
  }
328
371
  if (error instanceof Anthropic.APIConnectionError) {
329
372
  return new ProviderError(`Connection error: ${error.message}`, 'claude', undefined, error);
@@ -83,6 +83,11 @@ export interface OpenAIStreamChunk {
83
83
  prompt_tokens: number;
84
84
  completion_tokens: number;
85
85
  total_tokens: number;
86
+ /** OpenAI prompt caching: details about cached tokens */
87
+ prompt_tokens_details?: {
88
+ cached_tokens?: number;
89
+ audio_tokens?: number;
90
+ };
86
91
  };
87
92
  }
88
93
  /**
@@ -141,6 +141,10 @@ export class OpenAICompatibleProvider {
141
141
  usage = {
142
142
  inputTokens: chunk.usage.prompt_tokens,
143
143
  outputTokens: chunk.usage.completion_tokens,
144
+ // OpenAI automatic prompt caching: capture cached tokens
145
+ ...(chunk.usage.prompt_tokens_details?.cached_tokens
146
+ ? { cacheReadTokens: chunk.usage.prompt_tokens_details.cached_tokens }
147
+ : {}),
144
148
  };
145
149
  }
146
150
  }
@@ -147,6 +147,18 @@ export interface ChatOptions {
147
147
  * ```
148
148
  */
149
149
  thinking?: ThinkingConfig;
150
+ /**
151
+ * Enable prompt caching for system prompt and tools (Claude-specific)
152
+ *
153
+ * When enabled, the system prompt and tool definitions are cached
154
+ * server-side, reducing token costs by up to 90% on subsequent requests.
155
+ *
156
+ * - Cache write: 1.25x base input cost (first request)
157
+ * - Cache read: 0.1x base input cost (subsequent requests within 5 min)
158
+ *
159
+ * @default Provider-level setting (typically true)
160
+ */
161
+ enablePromptCaching?: boolean;
150
162
  }
151
163
  /**
152
164
  * Tool definition for the LLM
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@compilr-dev/agents",
3
- "version": "0.3.4",
3
+ "version": "0.3.6",
4
4
  "description": "Lightweight multi-LLM agent library for building CLI AI assistants",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -52,7 +52,7 @@
52
52
  "node": ">=18.0.0"
53
53
  },
54
54
  "peerDependencies": {
55
- "@anthropic-ai/sdk": "^0.30.0",
55
+ "@anthropic-ai/sdk": "^0.72.1",
56
56
  "@modelcontextprotocol/sdk": "^1.23.0"
57
57
  },
58
58
  "peerDependenciesMeta": {
@@ -64,7 +64,7 @@
64
64
  }
65
65
  },
66
66
  "devDependencies": {
67
- "@anthropic-ai/sdk": "^0.30.1",
67
+ "@anthropic-ai/sdk": "^0.72.1",
68
68
  "@eslint/js": "^9.39.1",
69
69
  "@modelcontextprotocol/sdk": "^1.23.0",
70
70
  "@types/node": "^24.10.1",