npm - @compilr-dev/agents - Versions diffs - 0.3.4 → 0.3.6 - Mend

@compilr-dev/agents 0.3.4 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/providers/claude.d.ts +27 -0
package/dist/providers/claude.js +46 -3
package/dist/providers/openai-compatible.d.ts +5 -0
package/dist/providers/openai-compatible.js +4 -0
package/dist/providers/types.d.ts +12 -0
package/package.json +3 -3

package/dist/providers/claude.d.ts CHANGED Viewed

@@ -33,6 +33,18 @@ export interface ClaudeProviderConfig {
      * @default 4096
      */
     maxTokens?: number;
+    /**
+     * Enable prompt caching for system prompt and tools.
+     *
+     * When enabled, the system prompt and tool definitions are cached
+     * server-side, reducing token costs by up to 90% on subsequent requests.
+     *
+     * - Cache write: 1.25x base input cost (first request)
+     * - Cache read: 0.1x base input cost (subsequent requests within 5 min)
+     *
+     * @default true
+     */
+    enablePromptCaching?: boolean;
 }
 /**
  * ClaudeProvider implements LLMProvider for Anthropic's Claude API
@@ -42,6 +54,7 @@ export declare class ClaudeProvider implements LLMProvider {
     private readonly client;
     private readonly defaultModel;
     private readonly defaultMaxTokens;
+    private readonly enablePromptCaching;
     constructor(config: ClaudeProviderConfig);
     /**
      * Send messages and stream the response
@@ -71,6 +84,20 @@ export declare class ClaudeProvider implements LLMProvider {
      * Convert thinking config to Anthropic API format
      */
     private convertThinking;
+    /**
+     * Wrap system prompt in array format with cache_control for prompt caching.
+     *
+     * When enabled, the system prompt is cached server-side for 5 minutes,
+     * reducing token costs by up to 90% on subsequent requests.
+     */
+    private wrapSystemPromptWithCache;
+    /**
+     * Add cache_control to the last tool definition.
+     *
+     * This caches ALL tool definitions as a single prefix (tools are
+     * cached cumulatively up to the cache_control marker).
+     */
+    private addCacheControlToLastTool;
     /**
      * Process a stream event into StreamChunks
      */

package/dist/providers/claude.js CHANGED Viewed

@@ -28,6 +28,7 @@ export class ClaudeProvider {
     client;
     defaultModel;
     defaultMaxTokens;
+    enablePromptCaching;
     constructor(config) {
         this.client = new Anthropic({
             apiKey: config.apiKey,
@@ -35,6 +36,7 @@ export class ClaudeProvider {
         });
         this.defaultModel = config.model ?? DEFAULT_MODEL;
         this.defaultMaxTokens = config.maxTokens ?? DEFAULT_MAX_TOKENS;
+        this.enablePromptCaching = config.enablePromptCaching ?? true;
     }
     /**
      * Send messages and stream the response
@@ -50,13 +52,19 @@ export class ClaudeProvider {
             toolsChars: JSON.stringify(tools).length,
         };
         try {
+            // Determine if prompt caching is enabled
+            const shouldCache = options?.enablePromptCaching ?? this.enablePromptCaching;
             // Build request parameters
             const params = {
                 model: options?.model ?? this.defaultModel,
                 max_tokens: options?.maxTokens ?? this.defaultMaxTokens,
-                system: systemPrompt,
+                system: shouldCache && systemPrompt
+                    ? this.wrapSystemPromptWithCache(systemPrompt)
+                    : systemPrompt,
                 messages: anthropicMessages,
-                tools: tools.length > 0 ? tools : undefined,
+                tools: tools.length > 0
+                    ? (shouldCache ? this.addCacheControlToLastTool(tools) : tools)
+                    : undefined,
                 temperature: options?.temperature,
                 stop_sequences: options?.stopSequences,
             };
@@ -236,6 +244,40 @@ export class ClaudeProvider {
             budget_tokens: thinking.budgetTokens,
         };
     }
+    /**
+     * Wrap system prompt in array format with cache_control for prompt caching.
+     *
+     * When enabled, the system prompt is cached server-side for 5 minutes,
+     * reducing token costs by up to 90% on subsequent requests.
+     */
+    wrapSystemPromptWithCache(systemPrompt) {
+        return [
+            {
+                type: 'text',
+                text: systemPrompt,
+                cache_control: { type: 'ephemeral' },
+            },
+        ];
+    }
+    /**
+     * Add cache_control to the last tool definition.
+     *
+     * This caches ALL tool definitions as a single prefix (tools are
+     * cached cumulatively up to the cache_control marker).
+     */
+    addCacheControlToLastTool(tools) {
+        if (tools.length === 0)
+            return tools;
+        return tools.map((tool, index) => {
+            if (index === tools.length - 1) {
+                return {
+                    ...tool,
+                    cache_control: { type: 'ephemeral' },
+                };
+            }
+            return tool;
+        });
+    }
     /**
      * Process a stream event into StreamChunks
      */
@@ -323,7 +365,8 @@ export class ClaudeProvider {
      */
     mapError(error) {
         if (error instanceof Anthropic.APIError) {
-            return new ProviderError(error.message, 'claude', error.status, error);
+            const status = typeof error.status === 'number' ? error.status : undefined;
+            return new ProviderError(error.message, 'claude', status, error);
         }
         if (error instanceof Anthropic.APIConnectionError) {
             return new ProviderError(`Connection error: ${error.message}`, 'claude', undefined, error);

package/dist/providers/openai-compatible.d.ts CHANGED Viewed

@@ -83,6 +83,11 @@ export interface OpenAIStreamChunk {
         prompt_tokens: number;
         completion_tokens: number;
         total_tokens: number;
+        /** OpenAI prompt caching: details about cached tokens */
+        prompt_tokens_details?: {
+            cached_tokens?: number;
+            audio_tokens?: number;
+        };
     };
 }
 /**

package/dist/providers/openai-compatible.js CHANGED Viewed

@@ -141,6 +141,10 @@ export class OpenAICompatibleProvider {
                             usage = {
                                 inputTokens: chunk.usage.prompt_tokens,
                                 outputTokens: chunk.usage.completion_tokens,
+                                // OpenAI automatic prompt caching: capture cached tokens
+                                ...(chunk.usage.prompt_tokens_details?.cached_tokens
+                                    ? { cacheReadTokens: chunk.usage.prompt_tokens_details.cached_tokens }
+                                    : {}),
                             };
                         }
                     }

package/dist/providers/types.d.ts CHANGED Viewed

@@ -147,6 +147,18 @@ export interface ChatOptions {
      * ```
      */
     thinking?: ThinkingConfig;
+    /**
+     * Enable prompt caching for system prompt and tools (Claude-specific)
+     *
+     * When enabled, the system prompt and tool definitions are cached
+     * server-side, reducing token costs by up to 90% on subsequent requests.
+     *
+     * - Cache write: 1.25x base input cost (first request)
+     * - Cache read: 0.1x base input cost (subsequent requests within 5 min)
+     *
+     * @default Provider-level setting (typically true)
+     */
+    enablePromptCaching?: boolean;
 }
 /**
  * Tool definition for the LLM

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@compilr-dev/agents",
-  "version": "0.3.4",
+  "version": "0.3.6",
   "description": "Lightweight multi-LLM agent library for building CLI AI assistants",
   "type": "module",
   "main": "dist/index.js",
@@ -52,7 +52,7 @@
     "node": ">=18.0.0"
   },
   "peerDependencies": {
-    "@anthropic-ai/sdk": "^0.30.0",
+    "@anthropic-ai/sdk": "^0.72.1",
     "@modelcontextprotocol/sdk": "^1.23.0"
   },
   "peerDependenciesMeta": {
@@ -64,7 +64,7 @@
     }
   },
   "devDependencies": {
-    "@anthropic-ai/sdk": "^0.30.1",
+    "@anthropic-ai/sdk": "^0.72.1",
     "@eslint/js": "^9.39.1",
     "@modelcontextprotocol/sdk": "^1.23.0",
     "@types/node": "^24.10.1",