@vybestack/llxprt-code-core 0.5.0-nightly.251121.027a6733 → 0.5.0-nightly.251121.bd93fe760
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +1 -0
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/src/auth/types.d.ts +2 -2
- package/dist/src/code_assist/oauth2.js +36 -9
- package/dist/src/code_assist/oauth2.js.map +1 -1
- package/dist/src/config/config.d.ts +9 -0
- package/dist/src/config/config.js +16 -0
- package/dist/src/config/config.js.map +1 -1
- package/dist/src/confirmation-bus/index.d.ts +2 -0
- package/dist/src/confirmation-bus/index.js +3 -0
- package/dist/src/confirmation-bus/index.js.map +1 -0
- package/dist/src/confirmation-bus/message-bus.d.ts +60 -0
- package/dist/src/confirmation-bus/message-bus.js +141 -0
- package/dist/src/confirmation-bus/message-bus.js.map +1 -0
- package/dist/src/confirmation-bus/types.d.ts +59 -0
- package/dist/src/confirmation-bus/types.js +10 -0
- package/dist/src/confirmation-bus/types.js.map +1 -0
- package/dist/src/core/client.d.ts +1 -1
- package/dist/src/core/client.js +24 -11
- package/dist/src/core/client.js.map +1 -1
- package/dist/src/core/coreToolScheduler.d.ts +18 -1
- package/dist/src/core/coreToolScheduler.js +133 -13
- package/dist/src/core/coreToolScheduler.js.map +1 -1
- package/dist/src/core/geminiChat.js +2 -0
- package/dist/src/core/geminiChat.js.map +1 -1
- package/dist/src/core/subagent.d.ts +4 -1
- package/dist/src/core/subagent.js +31 -3
- package/dist/src/core/subagent.js.map +1 -1
- package/dist/src/core/subagentOrchestrator.d.ts +2 -1
- package/dist/src/core/subagentOrchestrator.js +31 -6
- package/dist/src/core/subagentOrchestrator.js.map +1 -1
- package/dist/src/ide/detect-ide.d.ts +44 -14
- package/dist/src/ide/detect-ide.js +35 -75
- package/dist/src/ide/detect-ide.js.map +1 -1
- package/dist/src/ide/ide-client.d.ts +4 -4
- package/dist/src/ide/ide-client.js +25 -24
- package/dist/src/ide/ide-client.js.map +1 -1
- package/dist/src/ide/ide-installer.d.ts +2 -2
- package/dist/src/ide/ide-installer.js +7 -9
- package/dist/src/ide/ide-installer.js.map +1 -1
- package/dist/src/index.d.ts +7 -1
- package/dist/src/index.js +9 -1
- package/dist/src/index.js.map +1 -1
- package/dist/src/mcp/oauth-provider.d.ts +4 -1
- package/dist/src/mcp/oauth-provider.js +30 -27
- package/dist/src/mcp/oauth-provider.js.map +1 -1
- package/dist/src/policy/config.d.ts +51 -0
- package/dist/src/policy/config.js +102 -0
- package/dist/src/policy/config.js.map +1 -0
- package/dist/src/policy/index.d.ts +5 -0
- package/dist/src/policy/index.js +6 -0
- package/dist/src/policy/index.js.map +1 -0
- package/dist/src/policy/policies/discovered.toml +9 -0
- package/dist/src/policy/policies/read-only.toml +68 -0
- package/dist/src/policy/policies/write.toml +69 -0
- package/dist/src/policy/policies/yolo.toml +8 -0
- package/dist/src/policy/policy-engine.d.ts +55 -0
- package/dist/src/policy/policy-engine.js +126 -0
- package/dist/src/policy/policy-engine.js.map +1 -0
- package/dist/src/policy/stable-stringify.d.ts +29 -0
- package/dist/src/policy/stable-stringify.js +111 -0
- package/dist/src/policy/stable-stringify.js.map +1 -0
- package/dist/src/policy/toml-loader.d.ts +37 -0
- package/dist/src/policy/toml-loader.js +183 -0
- package/dist/src/policy/toml-loader.js.map +1 -0
- package/dist/src/policy/types.d.ts +16 -0
- package/dist/src/policy/types.js +7 -0
- package/dist/src/policy/types.js.map +1 -0
- package/dist/src/providers/LoggingProviderWrapper.d.ts +2 -0
- package/dist/src/providers/LoggingProviderWrapper.js +27 -6
- package/dist/src/providers/LoggingProviderWrapper.js.map +1 -1
- package/dist/src/providers/ProviderManager.d.ts +18 -0
- package/dist/src/providers/ProviderManager.js +54 -3
- package/dist/src/providers/ProviderManager.js.map +1 -1
- package/dist/src/providers/anthropic/AnthropicProvider.d.ts +49 -0
- package/dist/src/providers/anthropic/AnthropicProvider.js +444 -30
- package/dist/src/providers/anthropic/AnthropicProvider.js.map +1 -1
- package/dist/src/runtime/AgentRuntimeLoader.d.ts +1 -0
- package/dist/src/runtime/AgentRuntimeLoader.js +6 -1
- package/dist/src/runtime/AgentRuntimeLoader.js.map +1 -1
- package/dist/src/services/history/HistoryService.d.ts +4 -0
- package/dist/src/services/history/HistoryService.js +18 -0
- package/dist/src/services/history/HistoryService.js.map +1 -1
- package/dist/src/services/history/IContent.d.ts +6 -0
- package/dist/src/services/history/IContent.js.map +1 -1
- package/dist/src/settings/types.d.ts +7 -0
- package/dist/src/telemetry/uiTelemetry.d.ts +1 -1
- package/dist/src/telemetry/uiTelemetry.js +2 -3
- package/dist/src/telemetry/uiTelemetry.js.map +1 -1
- package/dist/src/tools/edit.d.ts +3 -2
- package/dist/src/tools/edit.js +23 -10
- package/dist/src/tools/edit.js.map +1 -1
- package/dist/src/tools/glob.d.ts +3 -2
- package/dist/src/tools/glob.js +2 -2
- package/dist/src/tools/glob.js.map +1 -1
- package/dist/src/tools/grep.d.ts +3 -2
- package/dist/src/tools/grep.js +2 -2
- package/dist/src/tools/grep.js.map +1 -1
- package/dist/src/tools/ls.d.ts +3 -2
- package/dist/src/tools/ls.js +2 -2
- package/dist/src/tools/ls.js.map +1 -1
- package/dist/src/tools/mcp-tool.js +7 -1
- package/dist/src/tools/mcp-tool.js.map +1 -1
- package/dist/src/tools/memoryTool.d.ts +5 -2
- package/dist/src/tools/memoryTool.js +12 -4
- package/dist/src/tools/memoryTool.js.map +1 -1
- package/dist/src/tools/read-file.d.ts +3 -2
- package/dist/src/tools/read-file.js +2 -2
- package/dist/src/tools/read-file.js.map +1 -1
- package/dist/src/tools/read-many-files.d.ts +3 -2
- package/dist/src/tools/read-many-files.js +2 -2
- package/dist/src/tools/read-many-files.js.map +1 -1
- package/dist/src/tools/ripGrep.d.ts +3 -2
- package/dist/src/tools/ripGrep.js +2 -2
- package/dist/src/tools/ripGrep.js.map +1 -1
- package/dist/src/tools/shell.d.ts +3 -2
- package/dist/src/tools/shell.js +10 -6
- package/dist/src/tools/shell.js.map +1 -1
- package/dist/src/tools/smart-edit.d.ts +3 -2
- package/dist/src/tools/smart-edit.js +13 -9
- package/dist/src/tools/smart-edit.js.map +1 -1
- package/dist/src/tools/task.d.ts +1 -0
- package/dist/src/tools/task.js +33 -16
- package/dist/src/tools/task.js.map +1 -1
- package/dist/src/tools/tool-confirmation-types.d.ts +20 -0
- package/dist/src/tools/tool-confirmation-types.js +15 -0
- package/dist/src/tools/tool-confirmation-types.js.map +1 -0
- package/dist/src/tools/tool-error.d.ts +1 -0
- package/dist/src/tools/tool-error.js +1 -0
- package/dist/src/tools/tool-error.js.map +1 -1
- package/dist/src/tools/tool-registry.d.ts +8 -1
- package/dist/src/tools/tool-registry.js +18 -4
- package/dist/src/tools/tool-registry.js.map +1 -1
- package/dist/src/tools/tools.d.ts +52 -14
- package/dist/src/tools/tools.js +71 -15
- package/dist/src/tools/tools.js.map +1 -1
- package/dist/src/tools/web-fetch.d.ts +3 -2
- package/dist/src/tools/web-fetch.js +11 -6
- package/dist/src/tools/web-fetch.js.map +1 -1
- package/dist/src/tools/web-search-invocation.d.ts +3 -1
- package/dist/src/tools/web-search-invocation.js +5 -2
- package/dist/src/tools/web-search-invocation.js.map +1 -1
- package/dist/src/tools/web-search.d.ts +3 -2
- package/dist/src/tools/web-search.js +6 -4
- package/dist/src/tools/web-search.js.map +1 -1
- package/dist/src/tools/write-file.d.ts +3 -2
- package/dist/src/tools/write-file.js +11 -6
- package/dist/src/tools/write-file.js.map +1 -1
- package/package.json +4 -2
|
@@ -29,6 +29,8 @@ export class AnthropicProvider extends BaseProvider {
|
|
|
29
29
|
{ pattern: /claude-.*3.*opus/i, tokens: 4096 },
|
|
30
30
|
{ pattern: /claude-.*3.*haiku/i, tokens: 4096 },
|
|
31
31
|
];
|
|
32
|
+
// Rate limit state tracking - updated on each API response
|
|
33
|
+
lastRateLimitInfo;
|
|
32
34
|
constructor(apiKey, baseURL, config, oauthManager) {
|
|
33
35
|
// Initialize base provider with auth configuration
|
|
34
36
|
const baseConfig = {
|
|
@@ -71,6 +73,12 @@ export class AnthropicProvider extends BaseProvider {
|
|
|
71
73
|
getErrorsLogger() {
|
|
72
74
|
return new DebugLogger('llxprt:anthropic:errors');
|
|
73
75
|
}
|
|
76
|
+
getCacheLogger() {
|
|
77
|
+
return new DebugLogger('llxprt:anthropic:cache');
|
|
78
|
+
}
|
|
79
|
+
getRateLimitLogger() {
|
|
80
|
+
return new DebugLogger('llxprt:anthropic:ratelimit');
|
|
81
|
+
}
|
|
74
82
|
instantiateClient(authToken, baseURL) {
|
|
75
83
|
const isOAuthToken = authToken.startsWith('sk-ant-oat');
|
|
76
84
|
const clientConfig = {
|
|
@@ -527,6 +535,32 @@ export class AnthropicProvider extends BaseProvider {
|
|
|
527
535
|
// Unknown format - assume it's a raw UUID
|
|
528
536
|
return 'hist_tool_' + id;
|
|
529
537
|
}
|
|
538
|
+
/**
|
|
539
|
+
* Sort object keys alphabetically for stable JSON serialization
|
|
540
|
+
* This prevents cache invalidation due to key order changes
|
|
541
|
+
*/
|
|
542
|
+
sortObjectKeys(obj) {
|
|
543
|
+
const sorted = Object.keys(obj)
|
|
544
|
+
.sort()
|
|
545
|
+
.reduce((acc, key) => {
|
|
546
|
+
acc[key] = obj[key];
|
|
547
|
+
return acc;
|
|
548
|
+
}, {});
|
|
549
|
+
return sorted;
|
|
550
|
+
}
|
|
551
|
+
/**
|
|
552
|
+
* Merge beta headers, ensuring no duplicates
|
|
553
|
+
*/
|
|
554
|
+
mergeBetaHeaders(existing, addition) {
|
|
555
|
+
if (!existing)
|
|
556
|
+
return addition;
|
|
557
|
+
const parts = new Set(existing
|
|
558
|
+
.split(',')
|
|
559
|
+
.map((s) => s.trim())
|
|
560
|
+
.filter(Boolean));
|
|
561
|
+
parts.add(addition);
|
|
562
|
+
return Array.from(parts).join(', ');
|
|
563
|
+
}
|
|
530
564
|
/**
|
|
531
565
|
* @plan PLAN-20251023-STATELESS-HARDENING.P08
|
|
532
566
|
* @requirement REQ-SP4-002, REQ-SP4-003
|
|
@@ -748,7 +782,25 @@ export class AnthropicProvider extends BaseProvider {
|
|
|
748
782
|
const detectedFormat = this.detectToolFormat();
|
|
749
783
|
const needsQwenParameterProcessing = detectedFormat === 'qwen';
|
|
750
784
|
// Convert Gemini format tools to anthropic format (always for Anthropic API)
|
|
751
|
-
|
|
785
|
+
let anthropicTools = callFormatter.convertGeminiToFormat(tools, 'anthropic');
|
|
786
|
+
// Stabilize tool ordering and JSON schema keys to prevent cache invalidation
|
|
787
|
+
if (anthropicTools && anthropicTools.length > 0) {
|
|
788
|
+
anthropicTools = [...anthropicTools]
|
|
789
|
+
.sort((a, b) => a.name.localeCompare(b.name))
|
|
790
|
+
.map((tool) => {
|
|
791
|
+
const schema = tool.input_schema;
|
|
792
|
+
if (schema.properties) {
|
|
793
|
+
return {
|
|
794
|
+
...tool,
|
|
795
|
+
input_schema: {
|
|
796
|
+
...schema,
|
|
797
|
+
properties: this.sortObjectKeys(schema.properties),
|
|
798
|
+
},
|
|
799
|
+
};
|
|
800
|
+
}
|
|
801
|
+
return tool;
|
|
802
|
+
});
|
|
803
|
+
}
|
|
752
804
|
const toolNamesForPrompt = tools === undefined
|
|
753
805
|
? undefined
|
|
754
806
|
: Array.from(new Set(tools.flatMap((group) => group.functionDeclarations
|
|
@@ -765,32 +817,78 @@ export class AnthropicProvider extends BaseProvider {
|
|
|
765
817
|
// Derive model parameters on demand from ephemeral settings
|
|
766
818
|
const configEphemeralSettings = options.invocation?.ephemerals ?? {};
|
|
767
819
|
const requestOverrides = configEphemeralSettings['anthropic'] || {};
|
|
820
|
+
// Get caching setting from ephemeral settings (session override) or provider settings
|
|
821
|
+
const providerSettings = this.resolveSettingsService().getProviderSettings(this.name) ?? {};
|
|
822
|
+
const cachingSetting = configEphemeralSettings['prompt-caching'] ??
|
|
823
|
+
providerSettings['prompt-caching'] ??
|
|
824
|
+
'1h';
|
|
825
|
+
const wantCaching = cachingSetting !== 'off';
|
|
826
|
+
const ttl = cachingSetting === '1h' ? '1h' : '5m';
|
|
827
|
+
const cacheLogger = this.getCacheLogger();
|
|
828
|
+
if (wantCaching) {
|
|
829
|
+
cacheLogger.debug(() => `Prompt caching enabled with TTL: ${ttl}`);
|
|
830
|
+
}
|
|
768
831
|
// For OAuth mode, inject core system prompt as the first human message
|
|
769
832
|
if (isOAuth) {
|
|
770
833
|
const corePrompt = await getCoreSystemPromptAsync(userMemory, currentModel, toolNamesForPrompt);
|
|
771
834
|
if (corePrompt) {
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
835
|
+
if (wantCaching) {
|
|
836
|
+
anthropicMessages.unshift({
|
|
837
|
+
role: 'user',
|
|
838
|
+
content: [
|
|
839
|
+
{
|
|
840
|
+
type: 'text',
|
|
841
|
+
text: `<system>\n${corePrompt}\n</system>\n\nUser provided conversation begins here:`,
|
|
842
|
+
cache_control: { type: 'ephemeral', ttl },
|
|
843
|
+
},
|
|
844
|
+
],
|
|
845
|
+
});
|
|
846
|
+
cacheLogger.debug(() => 'Added cache_control to OAuth system message');
|
|
847
|
+
}
|
|
848
|
+
else {
|
|
849
|
+
anthropicMessages.unshift({
|
|
850
|
+
role: 'user',
|
|
851
|
+
content: `<system>\n${corePrompt}\n</system>\n\nUser provided conversation begins here:`,
|
|
852
|
+
});
|
|
853
|
+
}
|
|
776
854
|
}
|
|
777
855
|
}
|
|
856
|
+
// Build system field with caching support
|
|
778
857
|
const systemPrompt = !isOAuth
|
|
779
858
|
? await getCoreSystemPromptAsync(userMemory, currentModel, toolNamesForPrompt)
|
|
780
859
|
: undefined;
|
|
860
|
+
let systemField = {};
|
|
861
|
+
if (isOAuth) {
|
|
862
|
+
systemField = {
|
|
863
|
+
system: "You are Claude Code, Anthropic's official CLI for Claude.",
|
|
864
|
+
};
|
|
865
|
+
}
|
|
866
|
+
else if (systemPrompt) {
|
|
867
|
+
if (wantCaching) {
|
|
868
|
+
// Use array format with cache_control breakpoint
|
|
869
|
+
systemField = {
|
|
870
|
+
system: [
|
|
871
|
+
{
|
|
872
|
+
type: 'text',
|
|
873
|
+
text: systemPrompt,
|
|
874
|
+
cache_control: { type: 'ephemeral', ttl },
|
|
875
|
+
},
|
|
876
|
+
],
|
|
877
|
+
};
|
|
878
|
+
cacheLogger.debug(() => `Added cache_control to system prompt (${ttl})`);
|
|
879
|
+
}
|
|
880
|
+
else {
|
|
881
|
+
// Use string format (no caching)
|
|
882
|
+
systemField = { system: systemPrompt };
|
|
883
|
+
}
|
|
884
|
+
}
|
|
781
885
|
const requestBody = {
|
|
782
886
|
model: currentModel,
|
|
783
887
|
messages: anthropicMessages,
|
|
784
888
|
max_tokens: this.getMaxTokensForModel(currentModel),
|
|
785
889
|
stream: streamingEnabled,
|
|
786
890
|
...requestOverrides, // Use derived ephemeral overrides instead of memoized instance state
|
|
787
|
-
...
|
|
788
|
-
? {
|
|
789
|
-
system: "You are Claude Code, Anthropic's official CLI for Claude.",
|
|
790
|
-
}
|
|
791
|
-
: systemPrompt
|
|
792
|
-
? { system: systemPrompt }
|
|
793
|
-
: {}),
|
|
891
|
+
...systemField,
|
|
794
892
|
...(anthropicTools && anthropicTools.length > 0
|
|
795
893
|
? { tools: anthropicTools }
|
|
796
894
|
: {}),
|
|
@@ -805,24 +903,131 @@ export class AnthropicProvider extends BaseProvider {
|
|
|
805
903
|
});
|
|
806
904
|
}
|
|
807
905
|
// Make the API call with retry logic
|
|
808
|
-
|
|
809
|
-
|
|
906
|
+
let customHeaders = this.getCustomHeaders() || {};
|
|
907
|
+
// For OAuth, always include the oauth beta header in customHeaders
|
|
908
|
+
// to ensure it's not overridden by cache headers
|
|
909
|
+
if (isOAuth) {
|
|
910
|
+
const existingBeta = customHeaders['anthropic-beta'];
|
|
911
|
+
customHeaders = {
|
|
912
|
+
...customHeaders,
|
|
913
|
+
'anthropic-beta': this.mergeBetaHeaders(existingBeta, 'oauth-2025-04-20'),
|
|
914
|
+
};
|
|
915
|
+
}
|
|
916
|
+
// Add extended-cache-ttl beta header for 1h caching
|
|
917
|
+
if (wantCaching && ttl === '1h') {
|
|
918
|
+
const existingBeta = customHeaders['anthropic-beta'];
|
|
919
|
+
customHeaders = {
|
|
920
|
+
...customHeaders,
|
|
921
|
+
'anthropic-beta': this.mergeBetaHeaders(existingBeta, 'extended-cache-ttl-2025-04-11'),
|
|
922
|
+
};
|
|
923
|
+
cacheLogger.debug(() => 'Added extended-cache-ttl-2025-04-11 beta header for 1h caching');
|
|
924
|
+
}
|
|
925
|
+
const apiCall = () => Object.keys(customHeaders).length > 0
|
|
810
926
|
? client.messages.create(requestBody, { headers: customHeaders })
|
|
811
927
|
: client.messages.create(requestBody);
|
|
812
928
|
const { maxAttempts, initialDelayMs } = this.getRetryConfig();
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
929
|
+
// Proactively throttle if approaching rate limits
|
|
930
|
+
await this.waitForRateLimitIfNeeded(configEphemeralSettings);
|
|
931
|
+
// For non-streaming, use withResponse() to access headers
|
|
932
|
+
// For streaming, we can't access headers easily, so we skip rate limit extraction
|
|
933
|
+
const rateLimitLogger = this.getRateLimitLogger();
|
|
934
|
+
let responseHeaders;
|
|
935
|
+
let response;
|
|
936
|
+
if (streamingEnabled) {
|
|
937
|
+
// Streaming mode - can't easily access headers
|
|
938
|
+
response = await retryWithBackoff(apiCall, {
|
|
939
|
+
maxAttempts,
|
|
940
|
+
initialDelayMs,
|
|
941
|
+
shouldRetry: this.shouldRetryAnthropicResponse.bind(this),
|
|
942
|
+
trackThrottleWaitTime: this.throttleTracker,
|
|
943
|
+
});
|
|
944
|
+
rateLimitLogger.debug(() => 'Streaming mode - rate limit headers not extracted');
|
|
945
|
+
}
|
|
946
|
+
else {
|
|
947
|
+
// Non-streaming mode - use withResponse() to get headers
|
|
948
|
+
const apiCallWithResponse = async () => {
|
|
949
|
+
const promise = apiCall();
|
|
950
|
+
// The promise has a withResponse() method we can call
|
|
951
|
+
if (promise &&
|
|
952
|
+
typeof promise === 'object' &&
|
|
953
|
+
'withResponse' in promise) {
|
|
954
|
+
return promise.withResponse();
|
|
955
|
+
}
|
|
956
|
+
// Fallback if withResponse is not available
|
|
957
|
+
return { data: await promise, response: undefined };
|
|
958
|
+
};
|
|
959
|
+
const result = await retryWithBackoff(apiCallWithResponse, {
|
|
960
|
+
maxAttempts,
|
|
961
|
+
initialDelayMs,
|
|
962
|
+
shouldRetry: this.shouldRetryAnthropicResponse.bind(this),
|
|
963
|
+
trackThrottleWaitTime: this.throttleTracker,
|
|
964
|
+
});
|
|
965
|
+
response = result.data;
|
|
966
|
+
if (result.response) {
|
|
967
|
+
responseHeaders = result.response.headers;
|
|
968
|
+
// Extract and process rate limit headers
|
|
969
|
+
const rateLimitInfo = this.extractRateLimitHeaders(responseHeaders);
|
|
970
|
+
this.lastRateLimitInfo = rateLimitInfo;
|
|
971
|
+
rateLimitLogger.debug(() => {
|
|
972
|
+
const parts = [];
|
|
973
|
+
if (rateLimitInfo.requestsRemaining !== undefined &&
|
|
974
|
+
rateLimitInfo.requestsLimit !== undefined) {
|
|
975
|
+
parts.push(`requests=${rateLimitInfo.requestsRemaining}/${rateLimitInfo.requestsLimit}`);
|
|
976
|
+
}
|
|
977
|
+
if (rateLimitInfo.tokensRemaining !== undefined &&
|
|
978
|
+
rateLimitInfo.tokensLimit !== undefined) {
|
|
979
|
+
parts.push(`tokens=${rateLimitInfo.tokensRemaining}/${rateLimitInfo.tokensLimit}`);
|
|
980
|
+
}
|
|
981
|
+
if (rateLimitInfo.inputTokensRemaining !== undefined &&
|
|
982
|
+
rateLimitInfo.inputTokensLimit !== undefined) {
|
|
983
|
+
parts.push(`input_tokens=${rateLimitInfo.inputTokensRemaining}/${rateLimitInfo.inputTokensLimit}`);
|
|
984
|
+
}
|
|
985
|
+
return parts.length > 0
|
|
986
|
+
? `Rate limits: ${parts.join(', ')}`
|
|
987
|
+
: 'Rate limits: no data';
|
|
988
|
+
});
|
|
989
|
+
// Check and warn if approaching limits
|
|
990
|
+
this.checkRateLimits(rateLimitInfo);
|
|
991
|
+
}
|
|
992
|
+
}
|
|
819
993
|
if (streamingEnabled) {
|
|
820
994
|
// Handle streaming response - response is already a Stream when streaming is enabled
|
|
821
995
|
const stream = response;
|
|
822
996
|
let currentToolCall;
|
|
823
997
|
this.getStreamingLogger().debug(() => 'Processing streaming response');
|
|
824
998
|
for await (const chunk of stream) {
|
|
825
|
-
if (chunk.type === '
|
|
999
|
+
if (chunk.type === 'message_start') {
|
|
1000
|
+
// Extract cache metrics from message_start event
|
|
1001
|
+
const usage = chunk.message?.usage;
|
|
1002
|
+
if (usage) {
|
|
1003
|
+
const cacheRead = usage.cache_read_input_tokens ?? 0;
|
|
1004
|
+
const cacheCreation = usage.cache_creation_input_tokens ?? 0;
|
|
1005
|
+
cacheLogger.debug(() => `[AnthropicProvider streaming] Emitting usage metadata: cacheRead=${cacheRead}, cacheCreation=${cacheCreation}, raw values: cache_read_input_tokens=${usage.cache_read_input_tokens}, cache_creation_input_tokens=${usage.cache_creation_input_tokens}`);
|
|
1006
|
+
if (cacheRead > 0 || cacheCreation > 0) {
|
|
1007
|
+
cacheLogger.debug(() => {
|
|
1008
|
+
const hitRate = cacheRead + (usage.input_tokens ?? 0) > 0
|
|
1009
|
+
? (cacheRead / (cacheRead + (usage.input_tokens ?? 0))) *
|
|
1010
|
+
100
|
|
1011
|
+
: 0;
|
|
1012
|
+
return `Cache metrics: read=${cacheRead}, creation=${cacheCreation}, hit_rate=${hitRate.toFixed(1)}%`;
|
|
1013
|
+
});
|
|
1014
|
+
}
|
|
1015
|
+
yield {
|
|
1016
|
+
speaker: 'ai',
|
|
1017
|
+
blocks: [],
|
|
1018
|
+
metadata: {
|
|
1019
|
+
usage: {
|
|
1020
|
+
promptTokens: usage.input_tokens ?? 0,
|
|
1021
|
+
completionTokens: usage.output_tokens ?? 0,
|
|
1022
|
+
totalTokens: (usage.input_tokens ?? 0) + (usage.output_tokens ?? 0),
|
|
1023
|
+
cache_read_input_tokens: cacheRead,
|
|
1024
|
+
cache_creation_input_tokens: cacheCreation,
|
|
1025
|
+
},
|
|
1026
|
+
},
|
|
1027
|
+
};
|
|
1028
|
+
}
|
|
1029
|
+
}
|
|
1030
|
+
else if (chunk.type === 'content_block_start') {
|
|
826
1031
|
if (chunk.content_block.type === 'tool_use') {
|
|
827
1032
|
const toolBlock = chunk.content_block;
|
|
828
1033
|
this.getStreamingLogger().debug(() => `Starting tool use: ${toolBlock.name}`);
|
|
@@ -872,17 +1077,21 @@ export class AnthropicProvider extends BaseProvider {
|
|
|
872
1077
|
}
|
|
873
1078
|
}
|
|
874
1079
|
else if (chunk.type === 'message_delta' && chunk.usage) {
|
|
875
|
-
// Emit usage metadata
|
|
876
|
-
|
|
1080
|
+
// Emit usage metadata including cache fields
|
|
1081
|
+
const usage = chunk.usage;
|
|
1082
|
+
const cacheRead = usage.cache_read_input_tokens ?? 0;
|
|
1083
|
+
const cacheCreation = usage.cache_creation_input_tokens ?? 0;
|
|
1084
|
+
this.getStreamingLogger().debug(() => `Received usage metadata from message_delta: promptTokens=${usage.input_tokens || 0}, completionTokens=${usage.output_tokens || 0}, cacheRead=${cacheRead}, cacheCreation=${cacheCreation}`);
|
|
877
1085
|
yield {
|
|
878
1086
|
speaker: 'ai',
|
|
879
1087
|
blocks: [],
|
|
880
1088
|
metadata: {
|
|
881
1089
|
usage: {
|
|
882
|
-
promptTokens:
|
|
883
|
-
completionTokens:
|
|
884
|
-
totalTokens: (
|
|
885
|
-
|
|
1090
|
+
promptTokens: usage.input_tokens || 0,
|
|
1091
|
+
completionTokens: usage.output_tokens || 0,
|
|
1092
|
+
totalTokens: (usage.input_tokens || 0) + (usage.output_tokens || 0),
|
|
1093
|
+
cache_read_input_tokens: cacheRead,
|
|
1094
|
+
cache_creation_input_tokens: cacheCreation,
|
|
886
1095
|
},
|
|
887
1096
|
},
|
|
888
1097
|
};
|
|
@@ -916,11 +1125,25 @@ export class AnthropicProvider extends BaseProvider {
|
|
|
916
1125
|
};
|
|
917
1126
|
// Add usage metadata if present
|
|
918
1127
|
if (message.usage) {
|
|
1128
|
+
const usage = message.usage;
|
|
1129
|
+
const cacheRead = usage.cache_read_input_tokens ?? 0;
|
|
1130
|
+
const cacheCreation = usage.cache_creation_input_tokens ?? 0;
|
|
1131
|
+
cacheLogger.debug(() => `[AnthropicProvider non-streaming] Setting usage metadata: cacheRead=${cacheRead}, cacheCreation=${cacheCreation}, raw values: cache_read_input_tokens=${usage.cache_read_input_tokens}, cache_creation_input_tokens=${usage.cache_creation_input_tokens}`);
|
|
1132
|
+
if (cacheRead > 0 || cacheCreation > 0) {
|
|
1133
|
+
cacheLogger.debug(() => {
|
|
1134
|
+
const hitRate = cacheRead + usage.input_tokens > 0
|
|
1135
|
+
? (cacheRead / (cacheRead + usage.input_tokens)) * 100
|
|
1136
|
+
: 0;
|
|
1137
|
+
return `Cache metrics: read=${cacheRead}, creation=${cacheCreation}, hit_rate=${hitRate.toFixed(1)}%`;
|
|
1138
|
+
});
|
|
1139
|
+
}
|
|
919
1140
|
result.metadata = {
|
|
920
1141
|
usage: {
|
|
921
|
-
promptTokens:
|
|
922
|
-
completionTokens:
|
|
923
|
-
totalTokens:
|
|
1142
|
+
promptTokens: usage.input_tokens,
|
|
1143
|
+
completionTokens: usage.output_tokens,
|
|
1144
|
+
totalTokens: usage.input_tokens + usage.output_tokens,
|
|
1145
|
+
cache_read_input_tokens: cacheRead,
|
|
1146
|
+
cache_creation_input_tokens: cacheCreation,
|
|
924
1147
|
},
|
|
925
1148
|
};
|
|
926
1149
|
}
|
|
@@ -934,6 +1157,15 @@ export class AnthropicProvider extends BaseProvider {
|
|
|
934
1157
|
return { maxAttempts, initialDelayMs };
|
|
935
1158
|
}
|
|
936
1159
|
shouldRetryAnthropicResponse(error) {
|
|
1160
|
+
// Check for Anthropic-specific error types (overloaded_error)
|
|
1161
|
+
if (error && typeof error === 'object') {
|
|
1162
|
+
const errorObj = error;
|
|
1163
|
+
const errorType = errorObj.error?.type || errorObj.type;
|
|
1164
|
+
if (errorType === 'overloaded_error') {
|
|
1165
|
+
this.getLogger().debug(() => 'Will retry Anthropic request due to overloaded_error');
|
|
1166
|
+
return true;
|
|
1167
|
+
}
|
|
1168
|
+
}
|
|
937
1169
|
const status = getErrorStatus(error);
|
|
938
1170
|
if (status === 429 || (status && status >= 500 && status < 600)) {
|
|
939
1171
|
this.getLogger().debug(() => `Will retry Anthropic request due to status ${status}`);
|
|
@@ -945,5 +1177,187 @@ export class AnthropicProvider extends BaseProvider {
|
|
|
945
1177
|
}
|
|
946
1178
|
return false;
|
|
947
1179
|
}
|
|
1180
|
+
/**
|
|
1181
|
+
* Extract rate limit information from response headers
|
|
1182
|
+
*/
|
|
1183
|
+
extractRateLimitHeaders(headers) {
|
|
1184
|
+
const rateLimitLogger = this.getRateLimitLogger();
|
|
1185
|
+
const info = {};
|
|
1186
|
+
// Extract requests rate limit info
|
|
1187
|
+
const requestsLimit = headers.get('anthropic-ratelimit-requests-limit');
|
|
1188
|
+
const requestsRemaining = headers.get('anthropic-ratelimit-requests-remaining');
|
|
1189
|
+
const requestsReset = headers.get('anthropic-ratelimit-requests-reset');
|
|
1190
|
+
if (requestsLimit) {
|
|
1191
|
+
info.requestsLimit = parseInt(requestsLimit, 10);
|
|
1192
|
+
}
|
|
1193
|
+
if (requestsRemaining) {
|
|
1194
|
+
info.requestsRemaining = parseInt(requestsRemaining, 10);
|
|
1195
|
+
}
|
|
1196
|
+
if (requestsReset) {
|
|
1197
|
+
try {
|
|
1198
|
+
const date = new Date(requestsReset);
|
|
1199
|
+
// Only set if the date is valid
|
|
1200
|
+
if (!isNaN(date.getTime())) {
|
|
1201
|
+
info.requestsReset = date;
|
|
1202
|
+
}
|
|
1203
|
+
}
|
|
1204
|
+
catch (_error) {
|
|
1205
|
+
rateLimitLogger.debug(() => `Failed to parse requests reset date: ${requestsReset}`);
|
|
1206
|
+
}
|
|
1207
|
+
}
|
|
1208
|
+
// Extract tokens rate limit info
|
|
1209
|
+
const tokensLimit = headers.get('anthropic-ratelimit-tokens-limit');
|
|
1210
|
+
const tokensRemaining = headers.get('anthropic-ratelimit-tokens-remaining');
|
|
1211
|
+
const tokensReset = headers.get('anthropic-ratelimit-tokens-reset');
|
|
1212
|
+
if (tokensLimit) {
|
|
1213
|
+
info.tokensLimit = parseInt(tokensLimit, 10);
|
|
1214
|
+
}
|
|
1215
|
+
if (tokensRemaining) {
|
|
1216
|
+
info.tokensRemaining = parseInt(tokensRemaining, 10);
|
|
1217
|
+
}
|
|
1218
|
+
if (tokensReset) {
|
|
1219
|
+
try {
|
|
1220
|
+
const date = new Date(tokensReset);
|
|
1221
|
+
// Only set if the date is valid
|
|
1222
|
+
if (!isNaN(date.getTime())) {
|
|
1223
|
+
info.tokensReset = date;
|
|
1224
|
+
}
|
|
1225
|
+
}
|
|
1226
|
+
catch (_error) {
|
|
1227
|
+
rateLimitLogger.debug(() => `Failed to parse tokens reset date: ${tokensReset}`);
|
|
1228
|
+
}
|
|
1229
|
+
}
|
|
1230
|
+
// Extract input tokens rate limit info
|
|
1231
|
+
const inputTokensLimit = headers.get('anthropic-ratelimit-input-tokens-limit');
|
|
1232
|
+
const inputTokensRemaining = headers.get('anthropic-ratelimit-input-tokens-remaining');
|
|
1233
|
+
if (inputTokensLimit) {
|
|
1234
|
+
info.inputTokensLimit = parseInt(inputTokensLimit, 10);
|
|
1235
|
+
}
|
|
1236
|
+
if (inputTokensRemaining) {
|
|
1237
|
+
info.inputTokensRemaining = parseInt(inputTokensRemaining, 10);
|
|
1238
|
+
}
|
|
1239
|
+
return info;
|
|
1240
|
+
}
|
|
1241
|
+
/**
|
|
1242
|
+
* Check rate limits and log warnings if approaching limits
|
|
1243
|
+
*/
|
|
1244
|
+
checkRateLimits(info) {
|
|
1245
|
+
const rateLimitLogger = this.getRateLimitLogger();
|
|
1246
|
+
// Check requests rate limit (warn at 10% remaining)
|
|
1247
|
+
if (info.requestsLimit !== undefined &&
|
|
1248
|
+
info.requestsRemaining !== undefined) {
|
|
1249
|
+
const percentage = (info.requestsRemaining / info.requestsLimit) * 100;
|
|
1250
|
+
if (percentage < 10) {
|
|
1251
|
+
const resetTime = info.requestsReset
|
|
1252
|
+
? ` (resets at ${info.requestsReset.toISOString()})`
|
|
1253
|
+
: '';
|
|
1254
|
+
rateLimitLogger.debug(() => `WARNING: Approaching requests rate limit - ${info.requestsRemaining}/${info.requestsLimit} remaining (${percentage.toFixed(1)}%)${resetTime}`);
|
|
1255
|
+
}
|
|
1256
|
+
}
|
|
1257
|
+
// Check tokens rate limit (warn at 10% remaining)
|
|
1258
|
+
if (info.tokensLimit !== undefined && info.tokensRemaining !== undefined) {
|
|
1259
|
+
const percentage = (info.tokensRemaining / info.tokensLimit) * 100;
|
|
1260
|
+
if (percentage < 10) {
|
|
1261
|
+
const resetTime = info.tokensReset
|
|
1262
|
+
? ` (resets at ${info.tokensReset.toISOString()})`
|
|
1263
|
+
: '';
|
|
1264
|
+
rateLimitLogger.debug(() => `WARNING: Approaching tokens rate limit - ${info.tokensRemaining}/${info.tokensLimit} remaining (${percentage.toFixed(1)}%)${resetTime}`);
|
|
1265
|
+
}
|
|
1266
|
+
}
|
|
1267
|
+
// Check input tokens rate limit (warn at 10% remaining)
|
|
1268
|
+
if (info.inputTokensLimit !== undefined &&
|
|
1269
|
+
info.inputTokensRemaining !== undefined) {
|
|
1270
|
+
const percentage = (info.inputTokensRemaining / info.inputTokensLimit) * 100;
|
|
1271
|
+
if (percentage < 10) {
|
|
1272
|
+
rateLimitLogger.debug(() => `WARNING: Approaching input tokens rate limit - ${info.inputTokensRemaining}/${info.inputTokensLimit} remaining (${percentage.toFixed(1)}%)`);
|
|
1273
|
+
}
|
|
1274
|
+
}
|
|
1275
|
+
}
|
|
1276
|
+
/**
|
|
1277
|
+
* Get current rate limit information
|
|
1278
|
+
* Returns the last known rate limit state from the most recent API call
|
|
1279
|
+
*/
|
|
1280
|
+
getRateLimitInfo() {
|
|
1281
|
+
return this.lastRateLimitInfo;
|
|
1282
|
+
}
|
|
1283
|
+
/**
|
|
1284
|
+
* Wait for rate limit reset if needed based on current rate limit state
|
|
1285
|
+
* This proactively throttles requests before they're made to prevent hitting rate limits
|
|
1286
|
+
* @private
|
|
1287
|
+
*/
|
|
1288
|
+
async waitForRateLimitIfNeeded(ephemeralSettings) {
|
|
1289
|
+
const rateLimitLogger = this.getRateLimitLogger();
|
|
1290
|
+
const info = this.lastRateLimitInfo;
|
|
1291
|
+
// No rate limit data yet - skip throttling
|
|
1292
|
+
if (!info) {
|
|
1293
|
+
return;
|
|
1294
|
+
}
|
|
1295
|
+
// Check if throttling is enabled (default: on)
|
|
1296
|
+
const throttleEnabled = ephemeralSettings['rate-limit-throttle'] ?? 'on';
|
|
1297
|
+
if (throttleEnabled === 'off') {
|
|
1298
|
+
return;
|
|
1299
|
+
}
|
|
1300
|
+
// Get threshold percentage (default: 5%)
|
|
1301
|
+
const thresholdPercentage = ephemeralSettings['rate-limit-throttle-threshold'] ?? 5;
|
|
1302
|
+
// Get max wait time (default: 60 seconds)
|
|
1303
|
+
const maxWaitMs = ephemeralSettings['rate-limit-max-wait'] ?? 60000;
|
|
1304
|
+
const now = Date.now();
|
|
1305
|
+
// Check requests remaining
|
|
1306
|
+
if (info.requestsRemaining !== undefined &&
|
|
1307
|
+
info.requestsLimit !== undefined &&
|
|
1308
|
+
info.requestsReset) {
|
|
1309
|
+
const percentage = (info.requestsRemaining / info.requestsLimit) * 100;
|
|
1310
|
+
if (percentage < thresholdPercentage) {
|
|
1311
|
+
const resetTime = info.requestsReset.getTime();
|
|
1312
|
+
const waitMs = resetTime - now;
|
|
1313
|
+
// Only wait if reset time is in the future
|
|
1314
|
+
if (waitMs > 0) {
|
|
1315
|
+
const actualWaitMs = Math.min(waitMs, maxWaitMs);
|
|
1316
|
+
rateLimitLogger.debug(() => `Rate limit throttle: requests at ${percentage.toFixed(1)}% (${info.requestsRemaining}/${info.requestsLimit}), waiting ${actualWaitMs}ms until reset`);
|
|
1317
|
+
if (waitMs > maxWaitMs) {
|
|
1318
|
+
rateLimitLogger.debug(() => `Rate limit reset in ${waitMs}ms exceeds max wait of ${maxWaitMs}ms, capping wait time`);
|
|
1319
|
+
}
|
|
1320
|
+
await this.sleep(actualWaitMs);
|
|
1321
|
+
return;
|
|
1322
|
+
}
|
|
1323
|
+
}
|
|
1324
|
+
}
|
|
1325
|
+
// Check tokens remaining
|
|
1326
|
+
if (info.tokensRemaining !== undefined &&
|
|
1327
|
+
info.tokensLimit !== undefined &&
|
|
1328
|
+
info.tokensReset) {
|
|
1329
|
+
const percentage = (info.tokensRemaining / info.tokensLimit) * 100;
|
|
1330
|
+
if (percentage < thresholdPercentage) {
|
|
1331
|
+
const resetTime = info.tokensReset.getTime();
|
|
1332
|
+
const waitMs = resetTime - now;
|
|
1333
|
+
// Only wait if reset time is in the future
|
|
1334
|
+
if (waitMs > 0) {
|
|
1335
|
+
const actualWaitMs = Math.min(waitMs, maxWaitMs);
|
|
1336
|
+
rateLimitLogger.debug(() => `Rate limit throttle: tokens at ${percentage.toFixed(1)}% (${info.tokensRemaining}/${info.tokensLimit}), waiting ${actualWaitMs}ms until reset`);
|
|
1337
|
+
if (waitMs > maxWaitMs) {
|
|
1338
|
+
rateLimitLogger.debug(() => `Rate limit reset in ${waitMs}ms exceeds max wait of ${maxWaitMs}ms, capping wait time`);
|
|
1339
|
+
}
|
|
1340
|
+
await this.sleep(actualWaitMs);
|
|
1341
|
+
return;
|
|
1342
|
+
}
|
|
1343
|
+
}
|
|
1344
|
+
}
|
|
1345
|
+
// Check input tokens remaining
|
|
1346
|
+
if (info.inputTokensRemaining !== undefined &&
|
|
1347
|
+
info.inputTokensLimit !== undefined) {
|
|
1348
|
+
const percentage = (info.inputTokensRemaining / info.inputTokensLimit) * 100;
|
|
1349
|
+
if (percentage < thresholdPercentage) {
|
|
1350
|
+
// For input tokens, we don't have a reset time, so we can only log a warning
|
|
1351
|
+
rateLimitLogger.debug(() => `Rate limit warning: input tokens at ${percentage.toFixed(1)}% (${info.inputTokensRemaining}/${info.inputTokensLimit}), no reset time available`);
|
|
1352
|
+
}
|
|
1353
|
+
}
|
|
1354
|
+
}
|
|
1355
|
+
/**
|
|
1356
|
+
* Sleep for the specified number of milliseconds
|
|
1357
|
+
* @private
|
|
1358
|
+
*/
|
|
1359
|
+
sleep(ms) {
|
|
1360
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
1361
|
+
}
|
|
948
1362
|
}
|
|
949
1363
|
//# sourceMappingURL=AnthropicProvider.js.map
|