@compilr-dev/agents 0.3.27 → 0.3.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/providers/claude.d.ts +12 -0
- package/dist/providers/claude.js +64 -1
- package/package.json +1 -1
|
@@ -107,6 +107,18 @@ export declare class ClaudeProvider implements LLMProvider {
|
|
|
107
107
|
* reducing token costs by up to 90% on subsequent requests.
|
|
108
108
|
*/
|
|
109
109
|
private wrapSystemPromptWithCache;
|
|
110
|
+
/**
|
|
111
|
+
* Add cache_control breakpoint to conversation messages.
|
|
112
|
+
*
|
|
113
|
+
* Caches the conversation history prefix (all messages except the most recent turn).
|
|
114
|
+
* This avoids re-processing the entire history on each API call.
|
|
115
|
+
* Only applies when there are enough messages to benefit (>= 4 messages = 2+ turns).
|
|
116
|
+
*
|
|
117
|
+
* Strategy: place cache_control on the last content block of the second-to-last
|
|
118
|
+
* user message. This caches system + tools + all messages up to that point.
|
|
119
|
+
* Only the most recent user message is uncached (and processed at full cost).
|
|
120
|
+
*/
|
|
121
|
+
private addCacheControlToMessages;
|
|
110
122
|
/**
|
|
111
123
|
* Add cache_control to the last tool definition.
|
|
112
124
|
*
|
package/dist/providers/claude.js
CHANGED
|
@@ -66,7 +66,9 @@ export class ClaudeProvider {
|
|
|
66
66
|
model: options?.model ?? this.defaultModel,
|
|
67
67
|
max_tokens: options?.maxTokens ?? this.defaultMaxTokens,
|
|
68
68
|
system: shouldCache && systemPrompt ? this.wrapSystemPromptWithCache(systemPrompt) : systemPrompt,
|
|
69
|
-
messages:
|
|
69
|
+
messages: shouldCache
|
|
70
|
+
? this.addCacheControlToMessages(anthropicMessages)
|
|
71
|
+
: anthropicMessages,
|
|
70
72
|
tools: tools.length > 0
|
|
71
73
|
? shouldCache
|
|
72
74
|
? this.addCacheControlToLastTool(tools)
|
|
@@ -255,6 +257,67 @@ export class ClaudeProvider {
|
|
|
255
257
|
},
|
|
256
258
|
];
|
|
257
259
|
}
|
|
260
|
+
/**
|
|
261
|
+
* Add cache_control breakpoint to conversation messages.
|
|
262
|
+
*
|
|
263
|
+
* Caches the conversation history prefix (all messages except the most recent turn).
|
|
264
|
+
* This avoids re-processing the entire history on each API call.
|
|
265
|
+
* Only applies when there are enough messages to benefit (>= 4 messages = 2+ turns).
|
|
266
|
+
*
|
|
267
|
+
* Strategy: place cache_control on the last content block of the second-to-last
|
|
268
|
+
* user message. This caches system + tools + all messages up to that point.
|
|
269
|
+
* Only the most recent user message is uncached (and processed at full cost).
|
|
270
|
+
*/
|
|
271
|
+
addCacheControlToMessages(messages) {
|
|
272
|
+
// Need at least 4 messages (2 turns) to benefit from caching
|
|
273
|
+
if (messages.length < 4)
|
|
274
|
+
return messages;
|
|
275
|
+
// Find the second-to-last user message
|
|
276
|
+
let targetIndex = -1;
|
|
277
|
+
let userCount = 0;
|
|
278
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
279
|
+
if (messages[i].role === 'user') {
|
|
280
|
+
userCount++;
|
|
281
|
+
if (userCount === 2) {
|
|
282
|
+
targetIndex = i;
|
|
283
|
+
break;
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
if (targetIndex < 0)
|
|
288
|
+
return messages;
|
|
289
|
+
// Clone messages to avoid mutating originals
|
|
290
|
+
const result = messages.map((msg, idx) => {
|
|
291
|
+
if (idx !== targetIndex)
|
|
292
|
+
return msg;
|
|
293
|
+
// Add cache_control to the last content block of this message
|
|
294
|
+
const content = msg.content;
|
|
295
|
+
if (typeof content === 'string') {
|
|
296
|
+
return {
|
|
297
|
+
...msg,
|
|
298
|
+
content: [
|
|
299
|
+
{
|
|
300
|
+
type: 'text',
|
|
301
|
+
text: content,
|
|
302
|
+
cache_control: { type: 'ephemeral' },
|
|
303
|
+
},
|
|
304
|
+
],
|
|
305
|
+
};
|
|
306
|
+
}
|
|
307
|
+
if (Array.isArray(content) && content.length > 0) {
|
|
308
|
+
const lastBlock = content[content.length - 1];
|
|
309
|
+
return {
|
|
310
|
+
...msg,
|
|
311
|
+
content: [
|
|
312
|
+
...content.slice(0, -1),
|
|
313
|
+
{ ...lastBlock, cache_control: { type: 'ephemeral' } },
|
|
314
|
+
],
|
|
315
|
+
};
|
|
316
|
+
}
|
|
317
|
+
return msg;
|
|
318
|
+
});
|
|
319
|
+
return result;
|
|
320
|
+
}
|
|
258
321
|
/**
|
|
259
322
|
* Add cache_control to the last tool definition.
|
|
260
323
|
*
|