npm - @vybestack/llxprt-code-core - Versions diffs - 0.5.0-nightly.251121.027a6733 → 0.5.0-nightly.251121.bd93fe760 - Mend

@vybestack/llxprt-code-core 0.5.0-nightly.251121.027a6733 → 0.5.0-nightly.251121.bd93fe760

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (150) hide show

package/dist/index.d.ts +1 -0
package/dist/index.js +1 -0
package/dist/index.js.map +1 -1
package/dist/src/auth/types.d.ts +2 -2
package/dist/src/code_assist/oauth2.js +36 -9
package/dist/src/code_assist/oauth2.js.map +1 -1
package/dist/src/config/config.d.ts +9 -0
package/dist/src/config/config.js +16 -0
package/dist/src/config/config.js.map +1 -1
package/dist/src/confirmation-bus/index.d.ts +2 -0
package/dist/src/confirmation-bus/index.js +3 -0
package/dist/src/confirmation-bus/index.js.map +1 -0
package/dist/src/confirmation-bus/message-bus.d.ts +60 -0
package/dist/src/confirmation-bus/message-bus.js +141 -0
package/dist/src/confirmation-bus/message-bus.js.map +1 -0
package/dist/src/confirmation-bus/types.d.ts +59 -0
package/dist/src/confirmation-bus/types.js +10 -0
package/dist/src/confirmation-bus/types.js.map +1 -0
package/dist/src/core/client.d.ts +1 -1
package/dist/src/core/client.js +24 -11
package/dist/src/core/client.js.map +1 -1
package/dist/src/core/coreToolScheduler.d.ts +18 -1
package/dist/src/core/coreToolScheduler.js +133 -13
package/dist/src/core/coreToolScheduler.js.map +1 -1
package/dist/src/core/geminiChat.js +2 -0
package/dist/src/core/geminiChat.js.map +1 -1
package/dist/src/core/subagent.d.ts +4 -1
package/dist/src/core/subagent.js +31 -3
package/dist/src/core/subagent.js.map +1 -1
package/dist/src/core/subagentOrchestrator.d.ts +2 -1
package/dist/src/core/subagentOrchestrator.js +31 -6
package/dist/src/core/subagentOrchestrator.js.map +1 -1
package/dist/src/ide/detect-ide.d.ts +44 -14
package/dist/src/ide/detect-ide.js +35 -75
package/dist/src/ide/detect-ide.js.map +1 -1
package/dist/src/ide/ide-client.d.ts +4 -4
package/dist/src/ide/ide-client.js +25 -24
package/dist/src/ide/ide-client.js.map +1 -1
package/dist/src/ide/ide-installer.d.ts +2 -2
package/dist/src/ide/ide-installer.js +7 -9
package/dist/src/ide/ide-installer.js.map +1 -1
package/dist/src/index.d.ts +7 -1
package/dist/src/index.js +9 -1
package/dist/src/index.js.map +1 -1
package/dist/src/mcp/oauth-provider.d.ts +4 -1
package/dist/src/mcp/oauth-provider.js +30 -27
package/dist/src/mcp/oauth-provider.js.map +1 -1
package/dist/src/policy/config.d.ts +51 -0
package/dist/src/policy/config.js +102 -0
package/dist/src/policy/config.js.map +1 -0
package/dist/src/policy/index.d.ts +5 -0
package/dist/src/policy/index.js +6 -0
package/dist/src/policy/index.js.map +1 -0
package/dist/src/policy/policies/discovered.toml +9 -0
package/dist/src/policy/policies/read-only.toml +68 -0
package/dist/src/policy/policies/write.toml +69 -0
package/dist/src/policy/policies/yolo.toml +8 -0
package/dist/src/policy/policy-engine.d.ts +55 -0
package/dist/src/policy/policy-engine.js +126 -0
package/dist/src/policy/policy-engine.js.map +1 -0
package/dist/src/policy/stable-stringify.d.ts +29 -0
package/dist/src/policy/stable-stringify.js +111 -0
package/dist/src/policy/stable-stringify.js.map +1 -0
package/dist/src/policy/toml-loader.d.ts +37 -0
package/dist/src/policy/toml-loader.js +183 -0
package/dist/src/policy/toml-loader.js.map +1 -0
package/dist/src/policy/types.d.ts +16 -0
package/dist/src/policy/types.js +7 -0
package/dist/src/policy/types.js.map +1 -0
package/dist/src/providers/LoggingProviderWrapper.d.ts +2 -0
package/dist/src/providers/LoggingProviderWrapper.js +27 -6
package/dist/src/providers/LoggingProviderWrapper.js.map +1 -1
package/dist/src/providers/ProviderManager.d.ts +18 -0
package/dist/src/providers/ProviderManager.js +54 -3
package/dist/src/providers/ProviderManager.js.map +1 -1
package/dist/src/providers/anthropic/AnthropicProvider.d.ts +49 -0
package/dist/src/providers/anthropic/AnthropicProvider.js +444 -30
package/dist/src/providers/anthropic/AnthropicProvider.js.map +1 -1
package/dist/src/runtime/AgentRuntimeLoader.d.ts +1 -0
package/dist/src/runtime/AgentRuntimeLoader.js +6 -1
package/dist/src/runtime/AgentRuntimeLoader.js.map +1 -1
package/dist/src/services/history/HistoryService.d.ts +4 -0
package/dist/src/services/history/HistoryService.js +18 -0
package/dist/src/services/history/HistoryService.js.map +1 -1
package/dist/src/services/history/IContent.d.ts +6 -0
package/dist/src/services/history/IContent.js.map +1 -1
package/dist/src/settings/types.d.ts +7 -0
package/dist/src/telemetry/uiTelemetry.d.ts +1 -1
package/dist/src/telemetry/uiTelemetry.js +2 -3
package/dist/src/telemetry/uiTelemetry.js.map +1 -1
package/dist/src/tools/edit.d.ts +3 -2
package/dist/src/tools/edit.js +23 -10
package/dist/src/tools/edit.js.map +1 -1
package/dist/src/tools/glob.d.ts +3 -2
package/dist/src/tools/glob.js +2 -2
package/dist/src/tools/glob.js.map +1 -1
package/dist/src/tools/grep.d.ts +3 -2
package/dist/src/tools/grep.js +2 -2
package/dist/src/tools/grep.js.map +1 -1
package/dist/src/tools/ls.d.ts +3 -2
package/dist/src/tools/ls.js +2 -2
package/dist/src/tools/ls.js.map +1 -1
package/dist/src/tools/mcp-tool.js +7 -1
package/dist/src/tools/mcp-tool.js.map +1 -1
package/dist/src/tools/memoryTool.d.ts +5 -2
package/dist/src/tools/memoryTool.js +12 -4
package/dist/src/tools/memoryTool.js.map +1 -1
package/dist/src/tools/read-file.d.ts +3 -2
package/dist/src/tools/read-file.js +2 -2
package/dist/src/tools/read-file.js.map +1 -1
package/dist/src/tools/read-many-files.d.ts +3 -2
package/dist/src/tools/read-many-files.js +2 -2
package/dist/src/tools/read-many-files.js.map +1 -1
package/dist/src/tools/ripGrep.d.ts +3 -2
package/dist/src/tools/ripGrep.js +2 -2
package/dist/src/tools/ripGrep.js.map +1 -1
package/dist/src/tools/shell.d.ts +3 -2
package/dist/src/tools/shell.js +10 -6
package/dist/src/tools/shell.js.map +1 -1
package/dist/src/tools/smart-edit.d.ts +3 -2
package/dist/src/tools/smart-edit.js +13 -9
package/dist/src/tools/smart-edit.js.map +1 -1
package/dist/src/tools/task.d.ts +1 -0
package/dist/src/tools/task.js +33 -16
package/dist/src/tools/task.js.map +1 -1
package/dist/src/tools/tool-confirmation-types.d.ts +20 -0
package/dist/src/tools/tool-confirmation-types.js +15 -0
package/dist/src/tools/tool-confirmation-types.js.map +1 -0
package/dist/src/tools/tool-error.d.ts +1 -0
package/dist/src/tools/tool-error.js +1 -0
package/dist/src/tools/tool-error.js.map +1 -1
package/dist/src/tools/tool-registry.d.ts +8 -1
package/dist/src/tools/tool-registry.js +18 -4
package/dist/src/tools/tool-registry.js.map +1 -1
package/dist/src/tools/tools.d.ts +52 -14
package/dist/src/tools/tools.js +71 -15
package/dist/src/tools/tools.js.map +1 -1
package/dist/src/tools/web-fetch.d.ts +3 -2
package/dist/src/tools/web-fetch.js +11 -6
package/dist/src/tools/web-fetch.js.map +1 -1
package/dist/src/tools/web-search-invocation.d.ts +3 -1
package/dist/src/tools/web-search-invocation.js +5 -2
package/dist/src/tools/web-search-invocation.js.map +1 -1
package/dist/src/tools/web-search.d.ts +3 -2
package/dist/src/tools/web-search.js +6 -4
package/dist/src/tools/web-search.js.map +1 -1
package/dist/src/tools/write-file.d.ts +3 -2
package/dist/src/tools/write-file.js +11 -6
package/dist/src/tools/write-file.js.map +1 -1
package/package.json +4 -2

package/dist/src/providers/anthropic/AnthropicProvider.js CHANGED Viewed

@@ -29,6 +29,8 @@ export class AnthropicProvider extends BaseProvider {
         { pattern: /claude-.*3.*opus/i, tokens: 4096 },
         { pattern: /claude-.*3.*haiku/i, tokens: 4096 },
     ];
+    // Rate limit state tracking - updated on each API response
+    lastRateLimitInfo;
     constructor(apiKey, baseURL, config, oauthManager) {
         // Initialize base provider with auth configuration
         const baseConfig = {
@@ -71,6 +73,12 @@ export class AnthropicProvider extends BaseProvider {
     getErrorsLogger() {
         return new DebugLogger('llxprt:anthropic:errors');
     }
+    getCacheLogger() {
+        return new DebugLogger('llxprt:anthropic:cache');
+    }
+    getRateLimitLogger() {
+        return new DebugLogger('llxprt:anthropic:ratelimit');
+    }
     instantiateClient(authToken, baseURL) {
         const isOAuthToken = authToken.startsWith('sk-ant-oat');
         const clientConfig = {
@@ -527,6 +535,32 @@ export class AnthropicProvider extends BaseProvider {
         // Unknown format - assume it's a raw UUID
         return 'hist_tool_' + id;
     }
+    /**
+     * Sort object keys alphabetically for stable JSON serialization
+     * This prevents cache invalidation due to key order changes
+     */
+    sortObjectKeys(obj) {
+        const sorted = Object.keys(obj)
+            .sort()
+            .reduce((acc, key) => {
+            acc[key] = obj[key];
+            return acc;
+        }, {});
+        return sorted;
+    }
+    /**
+     * Merge beta headers, ensuring no duplicates
+     */
+    mergeBetaHeaders(existing, addition) {
+        if (!existing)
+            return addition;
+        const parts = new Set(existing
+            .split(',')
+            .map((s) => s.trim())
+            .filter(Boolean));
+        parts.add(addition);
+        return Array.from(parts).join(', ');
+    }
     /**
      * @plan PLAN-20251023-STATELESS-HARDENING.P08
      * @requirement REQ-SP4-002, REQ-SP4-003
@@ -748,7 +782,25 @@ export class AnthropicProvider extends BaseProvider {
         const detectedFormat = this.detectToolFormat();
         const needsQwenParameterProcessing = detectedFormat === 'qwen';
         // Convert Gemini format tools to anthropic format (always for Anthropic API)
-        const anthropicTools = callFormatter.convertGeminiToFormat(tools, 'anthropic');
+        let anthropicTools = callFormatter.convertGeminiToFormat(tools, 'anthropic');
+        // Stabilize tool ordering and JSON schema keys to prevent cache invalidation
+        if (anthropicTools && anthropicTools.length > 0) {
+            anthropicTools = [...anthropicTools]
+                .sort((a, b) => a.name.localeCompare(b.name))
+                .map((tool) => {
+                const schema = tool.input_schema;
+                if (schema.properties) {
+                    return {
+                        ...tool,
+                        input_schema: {
+                            ...schema,
+                            properties: this.sortObjectKeys(schema.properties),
+                        },
+                    };
+                }
+                return tool;
+            });
+        }
         const toolNamesForPrompt = tools === undefined
             ? undefined
             : Array.from(new Set(tools.flatMap((group) => group.functionDeclarations
@@ -765,32 +817,78 @@ export class AnthropicProvider extends BaseProvider {
         // Derive model parameters on demand from ephemeral settings
         const configEphemeralSettings = options.invocation?.ephemerals ?? {};
         const requestOverrides = configEphemeralSettings['anthropic'] || {};
+        // Get caching setting from ephemeral settings (session override) or provider settings
+        const providerSettings = this.resolveSettingsService().getProviderSettings(this.name) ?? {};
+        const cachingSetting = configEphemeralSettings['prompt-caching'] ??
+            providerSettings['prompt-caching'] ??
+            '1h';
+        const wantCaching = cachingSetting !== 'off';
+        const ttl = cachingSetting === '1h' ? '1h' : '5m';
+        const cacheLogger = this.getCacheLogger();
+        if (wantCaching) {
+            cacheLogger.debug(() => `Prompt caching enabled with TTL: ${ttl}`);
+        }
         // For OAuth mode, inject core system prompt as the first human message
         if (isOAuth) {
             const corePrompt = await getCoreSystemPromptAsync(userMemory, currentModel, toolNamesForPrompt);
             if (corePrompt) {
-                anthropicMessages.unshift({
-                    role: 'user',
-                    content: `<system>\n${corePrompt}\n</system>\n\nUser provided conversation begins here:`,
-                });
+                if (wantCaching) {
+                    anthropicMessages.unshift({
+                        role: 'user',
+                        content: [
+                            {
+                                type: 'text',
+                                text: `<system>\n${corePrompt}\n</system>\n\nUser provided conversation begins here:`,
+                                cache_control: { type: 'ephemeral', ttl },
+                            },
+                        ],
+                    });
+                    cacheLogger.debug(() => 'Added cache_control to OAuth system message');
+                }
+                else {
+                    anthropicMessages.unshift({
+                        role: 'user',
+                        content: `<system>\n${corePrompt}\n</system>\n\nUser provided conversation begins here:`,
+                    });
+                }
             }
         }
+        // Build system field with caching support
         const systemPrompt = !isOAuth
             ? await getCoreSystemPromptAsync(userMemory, currentModel, toolNamesForPrompt)
             : undefined;
+        let systemField = {};
+        if (isOAuth) {
+            systemField = {
+                system: "You are Claude Code, Anthropic's official CLI for Claude.",
+            };
+        }
+        else if (systemPrompt) {
+            if (wantCaching) {
+                // Use array format with cache_control breakpoint
+                systemField = {
+                    system: [
+                        {
+                            type: 'text',
+                            text: systemPrompt,
+                            cache_control: { type: 'ephemeral', ttl },
+                        },
+                    ],
+                };
+                cacheLogger.debug(() => `Added cache_control to system prompt (${ttl})`);
+            }
+            else {
+                // Use string format (no caching)
+                systemField = { system: systemPrompt };
+            }
+        }
         const requestBody = {
             model: currentModel,
             messages: anthropicMessages,
             max_tokens: this.getMaxTokensForModel(currentModel),
             stream: streamingEnabled,
             ...requestOverrides, // Use derived ephemeral overrides instead of memoized instance state
-            ...(isOAuth
-                ? {
-                    system: "You are Claude Code, Anthropic's official CLI for Claude.",
-                }
-                : systemPrompt
-                    ? { system: systemPrompt }
-                    : {}),
+            ...systemField,
             ...(anthropicTools && anthropicTools.length > 0
                 ? { tools: anthropicTools }
                 : {}),
@@ -805,24 +903,131 @@ export class AnthropicProvider extends BaseProvider {
             });
         }
         // Make the API call with retry logic
-        const customHeaders = this.getCustomHeaders();
-        const apiCall = () => customHeaders
+        let customHeaders = this.getCustomHeaders() || {};
+        // For OAuth, always include the oauth beta header in customHeaders
+        // to ensure it's not overridden by cache headers
+        if (isOAuth) {
+            const existingBeta = customHeaders['anthropic-beta'];
+            customHeaders = {
+                ...customHeaders,
+                'anthropic-beta': this.mergeBetaHeaders(existingBeta, 'oauth-2025-04-20'),
+            };
+        }
+        // Add extended-cache-ttl beta header for 1h caching
+        if (wantCaching && ttl === '1h') {
+            const existingBeta = customHeaders['anthropic-beta'];
+            customHeaders = {
+                ...customHeaders,
+                'anthropic-beta': this.mergeBetaHeaders(existingBeta, 'extended-cache-ttl-2025-04-11'),
+            };
+            cacheLogger.debug(() => 'Added extended-cache-ttl-2025-04-11 beta header for 1h caching');
+        }
+        const apiCall = () => Object.keys(customHeaders).length > 0
             ? client.messages.create(requestBody, { headers: customHeaders })
             : client.messages.create(requestBody);
         const { maxAttempts, initialDelayMs } = this.getRetryConfig();
-        const response = await retryWithBackoff(apiCall, {
-            maxAttempts,
-            initialDelayMs,
-            shouldRetry: this.shouldRetryAnthropicResponse.bind(this),
-            trackThrottleWaitTime: this.throttleTracker,
-        });
+        // Proactively throttle if approaching rate limits
+        await this.waitForRateLimitIfNeeded(configEphemeralSettings);
+        // For non-streaming, use withResponse() to access headers
+        // For streaming, we can't access headers easily, so we skip rate limit extraction
+        const rateLimitLogger = this.getRateLimitLogger();
+        let responseHeaders;
+        let response;
+        if (streamingEnabled) {
+            // Streaming mode - can't easily access headers
+            response = await retryWithBackoff(apiCall, {
+                maxAttempts,
+                initialDelayMs,
+                shouldRetry: this.shouldRetryAnthropicResponse.bind(this),
+                trackThrottleWaitTime: this.throttleTracker,
+            });
+            rateLimitLogger.debug(() => 'Streaming mode - rate limit headers not extracted');
+        }
+        else {
+            // Non-streaming mode - use withResponse() to get headers
+            const apiCallWithResponse = async () => {
+                const promise = apiCall();
+                // The promise has a withResponse() method we can call
+                if (promise &&
+                    typeof promise === 'object' &&
+                    'withResponse' in promise) {
+                    return promise.withResponse();
+                }
+                // Fallback if withResponse is not available
+                return { data: await promise, response: undefined };
+            };
+            const result = await retryWithBackoff(apiCallWithResponse, {
+                maxAttempts,
+                initialDelayMs,
+                shouldRetry: this.shouldRetryAnthropicResponse.bind(this),
+                trackThrottleWaitTime: this.throttleTracker,
+            });
+            response = result.data;
+            if (result.response) {
+                responseHeaders = result.response.headers;
+                // Extract and process rate limit headers
+                const rateLimitInfo = this.extractRateLimitHeaders(responseHeaders);
+                this.lastRateLimitInfo = rateLimitInfo;
+                rateLimitLogger.debug(() => {
+                    const parts = [];
+                    if (rateLimitInfo.requestsRemaining !== undefined &&
+                        rateLimitInfo.requestsLimit !== undefined) {
+                        parts.push(`requests=${rateLimitInfo.requestsRemaining}/${rateLimitInfo.requestsLimit}`);
+                    }
+                    if (rateLimitInfo.tokensRemaining !== undefined &&
+                        rateLimitInfo.tokensLimit !== undefined) {
+                        parts.push(`tokens=${rateLimitInfo.tokensRemaining}/${rateLimitInfo.tokensLimit}`);
+                    }
+                    if (rateLimitInfo.inputTokensRemaining !== undefined &&
+                        rateLimitInfo.inputTokensLimit !== undefined) {
+                        parts.push(`input_tokens=${rateLimitInfo.inputTokensRemaining}/${rateLimitInfo.inputTokensLimit}`);
+                    }
+                    return parts.length > 0
+                        ? `Rate limits: ${parts.join(', ')}`
+                        : 'Rate limits: no data';
+                });
+                // Check and warn if approaching limits
+                this.checkRateLimits(rateLimitInfo);
+            }
+        }
         if (streamingEnabled) {
             // Handle streaming response - response is already a Stream when streaming is enabled
             const stream = response;
             let currentToolCall;
             this.getStreamingLogger().debug(() => 'Processing streaming response');
             for await (const chunk of stream) {
-                if (chunk.type === 'content_block_start') {
+                if (chunk.type === 'message_start') {
+                    // Extract cache metrics from message_start event
+                    const usage = chunk.message?.usage;
+                    if (usage) {
+                        const cacheRead = usage.cache_read_input_tokens ?? 0;
+                        const cacheCreation = usage.cache_creation_input_tokens ?? 0;
+                        cacheLogger.debug(() => `[AnthropicProvider streaming] Emitting usage metadata: cacheRead=${cacheRead}, cacheCreation=${cacheCreation}, raw values: cache_read_input_tokens=${usage.cache_read_input_tokens}, cache_creation_input_tokens=${usage.cache_creation_input_tokens}`);
+                        if (cacheRead > 0 || cacheCreation > 0) {
+                            cacheLogger.debug(() => {
+                                const hitRate = cacheRead + (usage.input_tokens ?? 0) > 0
+                                    ? (cacheRead / (cacheRead + (usage.input_tokens ?? 0))) *
+                                        100
+                                    : 0;
+                                return `Cache metrics: read=${cacheRead}, creation=${cacheCreation}, hit_rate=${hitRate.toFixed(1)}%`;
+                            });
+                        }
+                        yield {
+                            speaker: 'ai',
+                            blocks: [],
+                            metadata: {
+                                usage: {
+                                    promptTokens: usage.input_tokens ?? 0,
+                                    completionTokens: usage.output_tokens ?? 0,
+                                    totalTokens: (usage.input_tokens ?? 0) + (usage.output_tokens ?? 0),
+                                    cache_read_input_tokens: cacheRead,
+                                    cache_creation_input_tokens: cacheCreation,
+                                },
+                            },
+                        };
+                    }
+                }
+                else if (chunk.type === 'content_block_start') {
                     if (chunk.content_block.type === 'tool_use') {
                         const toolBlock = chunk.content_block;
                         this.getStreamingLogger().debug(() => `Starting tool use: ${toolBlock.name}`);
@@ -872,17 +1077,21 @@ export class AnthropicProvider extends BaseProvider {
                     }
                 }
                 else if (chunk.type === 'message_delta' && chunk.usage) {
-                    // Emit usage metadata
-                    this.getStreamingLogger().debug(() => `Received usage metadata`);
+                    // Emit usage metadata including cache fields
+                    const usage = chunk.usage;
+                    const cacheRead = usage.cache_read_input_tokens ?? 0;
+                    const cacheCreation = usage.cache_creation_input_tokens ?? 0;
+                    this.getStreamingLogger().debug(() => `Received usage metadata from message_delta: promptTokens=${usage.input_tokens || 0}, completionTokens=${usage.output_tokens || 0}, cacheRead=${cacheRead}, cacheCreation=${cacheCreation}`);
                     yield {
                         speaker: 'ai',
                         blocks: [],
                         metadata: {
                             usage: {
-                                promptTokens: chunk.usage.input_tokens || 0,
-                                completionTokens: chunk.usage.output_tokens || 0,
-                                totalTokens: (chunk.usage.input_tokens || 0) +
-                                    (chunk.usage.output_tokens || 0),
+                                promptTokens: usage.input_tokens || 0,
+                                completionTokens: usage.output_tokens || 0,
+                                totalTokens: (usage.input_tokens || 0) + (usage.output_tokens || 0),
+                                cache_read_input_tokens: cacheRead,
+                                cache_creation_input_tokens: cacheCreation,
                             },
                         },
                     };
@@ -916,11 +1125,25 @@ export class AnthropicProvider extends BaseProvider {
             };
             // Add usage metadata if present
             if (message.usage) {
+                const usage = message.usage;
+                const cacheRead = usage.cache_read_input_tokens ?? 0;
+                const cacheCreation = usage.cache_creation_input_tokens ?? 0;
+                cacheLogger.debug(() => `[AnthropicProvider non-streaming] Setting usage metadata: cacheRead=${cacheRead}, cacheCreation=${cacheCreation}, raw values: cache_read_input_tokens=${usage.cache_read_input_tokens}, cache_creation_input_tokens=${usage.cache_creation_input_tokens}`);
+                if (cacheRead > 0 || cacheCreation > 0) {
+                    cacheLogger.debug(() => {
+                        const hitRate = cacheRead + usage.input_tokens > 0
+                            ? (cacheRead / (cacheRead + usage.input_tokens)) * 100
+                            : 0;
+                        return `Cache metrics: read=${cacheRead}, creation=${cacheCreation}, hit_rate=${hitRate.toFixed(1)}%`;
+                    });
+                }
                 result.metadata = {
                     usage: {
-                        promptTokens: message.usage.input_tokens,
-                        completionTokens: message.usage.output_tokens,
-                        totalTokens: message.usage.input_tokens + message.usage.output_tokens,
+                        promptTokens: usage.input_tokens,
+                        completionTokens: usage.output_tokens,
+                        totalTokens: usage.input_tokens + usage.output_tokens,
+                        cache_read_input_tokens: cacheRead,
+                        cache_creation_input_tokens: cacheCreation,
                     },
                 };
             }
@@ -934,6 +1157,15 @@ export class AnthropicProvider extends BaseProvider {
         return { maxAttempts, initialDelayMs };
     }
     shouldRetryAnthropicResponse(error) {
+        // Check for Anthropic-specific error types (overloaded_error)
+        if (error && typeof error === 'object') {
+            const errorObj = error;
+            const errorType = errorObj.error?.type || errorObj.type;
+            if (errorType === 'overloaded_error') {
+                this.getLogger().debug(() => 'Will retry Anthropic request due to overloaded_error');
+                return true;
+            }
+        }
         const status = getErrorStatus(error);
         if (status === 429 || (status && status >= 500 && status < 600)) {
             this.getLogger().debug(() => `Will retry Anthropic request due to status ${status}`);
@@ -945,5 +1177,187 @@ export class AnthropicProvider extends BaseProvider {
         }
         return false;
     }
+    /**
+     * Extract rate limit information from response headers
+     */
+    extractRateLimitHeaders(headers) {
+        const rateLimitLogger = this.getRateLimitLogger();
+        const info = {};
+        // Extract requests rate limit info
+        const requestsLimit = headers.get('anthropic-ratelimit-requests-limit');
+        const requestsRemaining = headers.get('anthropic-ratelimit-requests-remaining');
+        const requestsReset = headers.get('anthropic-ratelimit-requests-reset');
+        if (requestsLimit) {
+            info.requestsLimit = parseInt(requestsLimit, 10);
+        }
+        if (requestsRemaining) {
+            info.requestsRemaining = parseInt(requestsRemaining, 10);
+        }
+        if (requestsReset) {
+            try {
+                const date = new Date(requestsReset);
+                // Only set if the date is valid
+                if (!isNaN(date.getTime())) {
+                    info.requestsReset = date;
+                }
+            }
+            catch (_error) {
+                rateLimitLogger.debug(() => `Failed to parse requests reset date: ${requestsReset}`);
+            }
+        }
+        // Extract tokens rate limit info
+        const tokensLimit = headers.get('anthropic-ratelimit-tokens-limit');
+        const tokensRemaining = headers.get('anthropic-ratelimit-tokens-remaining');
+        const tokensReset = headers.get('anthropic-ratelimit-tokens-reset');
+        if (tokensLimit) {
+            info.tokensLimit = parseInt(tokensLimit, 10);
+        }
+        if (tokensRemaining) {
+            info.tokensRemaining = parseInt(tokensRemaining, 10);
+        }
+        if (tokensReset) {
+            try {
+                const date = new Date(tokensReset);
+                // Only set if the date is valid
+                if (!isNaN(date.getTime())) {
+                    info.tokensReset = date;
+                }
+            }
+            catch (_error) {
+                rateLimitLogger.debug(() => `Failed to parse tokens reset date: ${tokensReset}`);
+            }
+        }
+        // Extract input tokens rate limit info
+        const inputTokensLimit = headers.get('anthropic-ratelimit-input-tokens-limit');
+        const inputTokensRemaining = headers.get('anthropic-ratelimit-input-tokens-remaining');
+        if (inputTokensLimit) {
+            info.inputTokensLimit = parseInt(inputTokensLimit, 10);
+        }
+        if (inputTokensRemaining) {
+            info.inputTokensRemaining = parseInt(inputTokensRemaining, 10);
+        }
+        return info;
+    }
+    /**
+     * Check rate limits and log warnings if approaching limits
+     */
+    checkRateLimits(info) {
+        const rateLimitLogger = this.getRateLimitLogger();
+        // Check requests rate limit (warn at 10% remaining)
+        if (info.requestsLimit !== undefined &&
+            info.requestsRemaining !== undefined) {
+            const percentage = (info.requestsRemaining / info.requestsLimit) * 100;
+            if (percentage < 10) {
+                const resetTime = info.requestsReset
+                    ? ` (resets at ${info.requestsReset.toISOString()})`
+                    : '';
+                rateLimitLogger.debug(() => `WARNING: Approaching requests rate limit - ${info.requestsRemaining}/${info.requestsLimit} remaining (${percentage.toFixed(1)}%)${resetTime}`);
+            }
+        }
+        // Check tokens rate limit (warn at 10% remaining)
+        if (info.tokensLimit !== undefined && info.tokensRemaining !== undefined) {
+            const percentage = (info.tokensRemaining / info.tokensLimit) * 100;
+            if (percentage < 10) {
+                const resetTime = info.tokensReset
+                    ? ` (resets at ${info.tokensReset.toISOString()})`
+                    : '';
+                rateLimitLogger.debug(() => `WARNING: Approaching tokens rate limit - ${info.tokensRemaining}/${info.tokensLimit} remaining (${percentage.toFixed(1)}%)${resetTime}`);
+            }
+        }
+        // Check input tokens rate limit (warn at 10% remaining)
+        if (info.inputTokensLimit !== undefined &&
+            info.inputTokensRemaining !== undefined) {
+            const percentage = (info.inputTokensRemaining / info.inputTokensLimit) * 100;
+            if (percentage < 10) {
+                rateLimitLogger.debug(() => `WARNING: Approaching input tokens rate limit - ${info.inputTokensRemaining}/${info.inputTokensLimit} remaining (${percentage.toFixed(1)}%)`);
+            }
+        }
+    }
+    /**
+     * Get current rate limit information
+     * Returns the last known rate limit state from the most recent API call
+     */
+    getRateLimitInfo() {
+        return this.lastRateLimitInfo;
+    }
+    /**
+     * Wait for rate limit reset if needed based on current rate limit state
+     * This proactively throttles requests before they're made to prevent hitting rate limits
+     * @private
+     */
+    async waitForRateLimitIfNeeded(ephemeralSettings) {
+        const rateLimitLogger = this.getRateLimitLogger();
+        const info = this.lastRateLimitInfo;
+        // No rate limit data yet - skip throttling
+        if (!info) {
+            return;
+        }
+        // Check if throttling is enabled (default: on)
+        const throttleEnabled = ephemeralSettings['rate-limit-throttle'] ?? 'on';
+        if (throttleEnabled === 'off') {
+            return;
+        }
+        // Get threshold percentage (default: 5%)
+        const thresholdPercentage = ephemeralSettings['rate-limit-throttle-threshold'] ?? 5;
+        // Get max wait time (default: 60 seconds)
+        const maxWaitMs = ephemeralSettings['rate-limit-max-wait'] ?? 60000;
+        const now = Date.now();
+        // Check requests remaining
+        if (info.requestsRemaining !== undefined &&
+            info.requestsLimit !== undefined &&
+            info.requestsReset) {
+            const percentage = (info.requestsRemaining / info.requestsLimit) * 100;
+            if (percentage < thresholdPercentage) {
+                const resetTime = info.requestsReset.getTime();
+                const waitMs = resetTime - now;
+                // Only wait if reset time is in the future
+                if (waitMs > 0) {
+                    const actualWaitMs = Math.min(waitMs, maxWaitMs);
+                    rateLimitLogger.debug(() => `Rate limit throttle: requests at ${percentage.toFixed(1)}% (${info.requestsRemaining}/${info.requestsLimit}), waiting ${actualWaitMs}ms until reset`);
+                    if (waitMs > maxWaitMs) {
+                        rateLimitLogger.debug(() => `Rate limit reset in ${waitMs}ms exceeds max wait of ${maxWaitMs}ms, capping wait time`);
+                    }
+                    await this.sleep(actualWaitMs);
+                    return;
+                }
+            }
+        }
+        // Check tokens remaining
+        if (info.tokensRemaining !== undefined &&
+            info.tokensLimit !== undefined &&
+            info.tokensReset) {
+            const percentage = (info.tokensRemaining / info.tokensLimit) * 100;
+            if (percentage < thresholdPercentage) {
+                const resetTime = info.tokensReset.getTime();
+                const waitMs = resetTime - now;
+                // Only wait if reset time is in the future
+                if (waitMs > 0) {
+                    const actualWaitMs = Math.min(waitMs, maxWaitMs);
+                    rateLimitLogger.debug(() => `Rate limit throttle: tokens at ${percentage.toFixed(1)}% (${info.tokensRemaining}/${info.tokensLimit}), waiting ${actualWaitMs}ms until reset`);
+                    if (waitMs > maxWaitMs) {
+                        rateLimitLogger.debug(() => `Rate limit reset in ${waitMs}ms exceeds max wait of ${maxWaitMs}ms, capping wait time`);
+                    }
+                    await this.sleep(actualWaitMs);
+                    return;
+                }
+            }
+        }
+        // Check input tokens remaining
+        if (info.inputTokensRemaining !== undefined &&
+            info.inputTokensLimit !== undefined) {
+            const percentage = (info.inputTokensRemaining / info.inputTokensLimit) * 100;
+            if (percentage < thresholdPercentage) {
+                // For input tokens, we don't have a reset time, so we can only log a warning
+                rateLimitLogger.debug(() => `Rate limit warning: input tokens at ${percentage.toFixed(1)}% (${info.inputTokensRemaining}/${info.inputTokensLimit}), no reset time available`);
+            }
+        }
+    }
+    /**
+     * Sleep for the specified number of milliseconds
+     * @private
+     */
+    sleep(ms) {
+        return new Promise((resolve) => setTimeout(resolve, ms));
+    }
 }
 //# sourceMappingURL=AnthropicProvider.js.map