@vybestack/llxprt-code-core 0.5.0-nightly.251120.027a6733 → 0.5.0-nightly.251121.9dcb43714

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/dist/src/auth/types.d.ts +2 -2
  2. package/dist/src/code_assist/oauth2.js +36 -9
  3. package/dist/src/code_assist/oauth2.js.map +1 -1
  4. package/dist/src/core/subagent.d.ts +4 -1
  5. package/dist/src/core/subagent.js +31 -3
  6. package/dist/src/core/subagent.js.map +1 -1
  7. package/dist/src/core/subagentOrchestrator.d.ts +2 -1
  8. package/dist/src/core/subagentOrchestrator.js +31 -6
  9. package/dist/src/core/subagentOrchestrator.js.map +1 -1
  10. package/dist/src/providers/LoggingProviderWrapper.d.ts +2 -0
  11. package/dist/src/providers/LoggingProviderWrapper.js +27 -6
  12. package/dist/src/providers/LoggingProviderWrapper.js.map +1 -1
  13. package/dist/src/providers/ProviderManager.d.ts +18 -0
  14. package/dist/src/providers/ProviderManager.js +54 -3
  15. package/dist/src/providers/ProviderManager.js.map +1 -1
  16. package/dist/src/providers/anthropic/AnthropicProvider.d.ts +49 -0
  17. package/dist/src/providers/anthropic/AnthropicProvider.js +444 -30
  18. package/dist/src/providers/anthropic/AnthropicProvider.js.map +1 -1
  19. package/dist/src/runtime/AgentRuntimeLoader.d.ts +1 -0
  20. package/dist/src/runtime/AgentRuntimeLoader.js +6 -1
  21. package/dist/src/runtime/AgentRuntimeLoader.js.map +1 -1
  22. package/dist/src/services/history/HistoryService.d.ts +4 -0
  23. package/dist/src/services/history/HistoryService.js +18 -0
  24. package/dist/src/services/history/HistoryService.js.map +1 -1
  25. package/dist/src/services/history/IContent.d.ts +6 -0
  26. package/dist/src/services/history/IContent.js.map +1 -1
  27. package/dist/src/settings/types.d.ts +7 -0
  28. package/dist/src/tools/task.d.ts +1 -0
  29. package/dist/src/tools/task.js +33 -16
  30. package/dist/src/tools/task.js.map +1 -1
  31. package/package.json +1 -1
@@ -29,6 +29,8 @@ export class AnthropicProvider extends BaseProvider {
29
29
  { pattern: /claude-.*3.*opus/i, tokens: 4096 },
30
30
  { pattern: /claude-.*3.*haiku/i, tokens: 4096 },
31
31
  ];
32
+ // Rate limit state tracking - updated on each API response
33
+ lastRateLimitInfo;
32
34
  constructor(apiKey, baseURL, config, oauthManager) {
33
35
  // Initialize base provider with auth configuration
34
36
  const baseConfig = {
@@ -71,6 +73,12 @@ export class AnthropicProvider extends BaseProvider {
71
73
  getErrorsLogger() {
72
74
  return new DebugLogger('llxprt:anthropic:errors');
73
75
  }
76
+ getCacheLogger() {
77
+ return new DebugLogger('llxprt:anthropic:cache');
78
+ }
79
+ getRateLimitLogger() {
80
+ return new DebugLogger('llxprt:anthropic:ratelimit');
81
+ }
74
82
  instantiateClient(authToken, baseURL) {
75
83
  const isOAuthToken = authToken.startsWith('sk-ant-oat');
76
84
  const clientConfig = {
@@ -527,6 +535,32 @@ export class AnthropicProvider extends BaseProvider {
527
535
  // Unknown format - assume it's a raw UUID
528
536
  return 'hist_tool_' + id;
529
537
  }
538
+ /**
539
+ * Sort object keys alphabetically for stable JSON serialization
540
+ * This prevents cache invalidation due to key order changes
541
+ */
542
+ sortObjectKeys(obj) {
543
+ const sorted = Object.keys(obj)
544
+ .sort()
545
+ .reduce((acc, key) => {
546
+ acc[key] = obj[key];
547
+ return acc;
548
+ }, {});
549
+ return sorted;
550
+ }
551
+ /**
552
+ * Merge beta headers, ensuring no duplicates
553
+ */
554
+ mergeBetaHeaders(existing, addition) {
555
+ if (!existing)
556
+ return addition;
557
+ const parts = new Set(existing
558
+ .split(',')
559
+ .map((s) => s.trim())
560
+ .filter(Boolean));
561
+ parts.add(addition);
562
+ return Array.from(parts).join(', ');
563
+ }
530
564
  /**
531
565
  * @plan PLAN-20251023-STATELESS-HARDENING.P08
532
566
  * @requirement REQ-SP4-002, REQ-SP4-003
@@ -748,7 +782,25 @@ export class AnthropicProvider extends BaseProvider {
748
782
  const detectedFormat = this.detectToolFormat();
749
783
  const needsQwenParameterProcessing = detectedFormat === 'qwen';
750
784
  // Convert Gemini format tools to anthropic format (always for Anthropic API)
751
- const anthropicTools = callFormatter.convertGeminiToFormat(tools, 'anthropic');
785
+ let anthropicTools = callFormatter.convertGeminiToFormat(tools, 'anthropic');
786
+ // Stabilize tool ordering and JSON schema keys to prevent cache invalidation
787
+ if (anthropicTools && anthropicTools.length > 0) {
788
+ anthropicTools = [...anthropicTools]
789
+ .sort((a, b) => a.name.localeCompare(b.name))
790
+ .map((tool) => {
791
+ const schema = tool.input_schema;
792
+ if (schema.properties) {
793
+ return {
794
+ ...tool,
795
+ input_schema: {
796
+ ...schema,
797
+ properties: this.sortObjectKeys(schema.properties),
798
+ },
799
+ };
800
+ }
801
+ return tool;
802
+ });
803
+ }
752
804
  const toolNamesForPrompt = tools === undefined
753
805
  ? undefined
754
806
  : Array.from(new Set(tools.flatMap((group) => group.functionDeclarations
@@ -765,32 +817,78 @@ export class AnthropicProvider extends BaseProvider {
765
817
  // Derive model parameters on demand from ephemeral settings
766
818
  const configEphemeralSettings = options.invocation?.ephemerals ?? {};
767
819
  const requestOverrides = configEphemeralSettings['anthropic'] || {};
820
+ // Get caching setting from ephemeral settings (session override) or provider settings
821
+ const providerSettings = this.resolveSettingsService().getProviderSettings(this.name) ?? {};
822
+ const cachingSetting = configEphemeralSettings['prompt-caching'] ??
823
+ providerSettings['prompt-caching'] ??
824
+ '1h';
825
+ const wantCaching = cachingSetting !== 'off';
826
+ const ttl = cachingSetting === '1h' ? '1h' : '5m';
827
+ const cacheLogger = this.getCacheLogger();
828
+ if (wantCaching) {
829
+ cacheLogger.debug(() => `Prompt caching enabled with TTL: ${ttl}`);
830
+ }
768
831
  // For OAuth mode, inject core system prompt as the first human message
769
832
  if (isOAuth) {
770
833
  const corePrompt = await getCoreSystemPromptAsync(userMemory, currentModel, toolNamesForPrompt);
771
834
  if (corePrompt) {
772
- anthropicMessages.unshift({
773
- role: 'user',
774
- content: `<system>\n${corePrompt}\n</system>\n\nUser provided conversation begins here:`,
775
- });
835
+ if (wantCaching) {
836
+ anthropicMessages.unshift({
837
+ role: 'user',
838
+ content: [
839
+ {
840
+ type: 'text',
841
+ text: `<system>\n${corePrompt}\n</system>\n\nUser provided conversation begins here:`,
842
+ cache_control: { type: 'ephemeral', ttl },
843
+ },
844
+ ],
845
+ });
846
+ cacheLogger.debug(() => 'Added cache_control to OAuth system message');
847
+ }
848
+ else {
849
+ anthropicMessages.unshift({
850
+ role: 'user',
851
+ content: `<system>\n${corePrompt}\n</system>\n\nUser provided conversation begins here:`,
852
+ });
853
+ }
776
854
  }
777
855
  }
856
+ // Build system field with caching support
778
857
  const systemPrompt = !isOAuth
779
858
  ? await getCoreSystemPromptAsync(userMemory, currentModel, toolNamesForPrompt)
780
859
  : undefined;
860
+ let systemField = {};
861
+ if (isOAuth) {
862
+ systemField = {
863
+ system: "You are Claude Code, Anthropic's official CLI for Claude.",
864
+ };
865
+ }
866
+ else if (systemPrompt) {
867
+ if (wantCaching) {
868
+ // Use array format with cache_control breakpoint
869
+ systemField = {
870
+ system: [
871
+ {
872
+ type: 'text',
873
+ text: systemPrompt,
874
+ cache_control: { type: 'ephemeral', ttl },
875
+ },
876
+ ],
877
+ };
878
+ cacheLogger.debug(() => `Added cache_control to system prompt (${ttl})`);
879
+ }
880
+ else {
881
+ // Use string format (no caching)
882
+ systemField = { system: systemPrompt };
883
+ }
884
+ }
781
885
  const requestBody = {
782
886
  model: currentModel,
783
887
  messages: anthropicMessages,
784
888
  max_tokens: this.getMaxTokensForModel(currentModel),
785
889
  stream: streamingEnabled,
786
890
  ...requestOverrides, // Use derived ephemeral overrides instead of memoized instance state
787
- ...(isOAuth
788
- ? {
789
- system: "You are Claude Code, Anthropic's official CLI for Claude.",
790
- }
791
- : systemPrompt
792
- ? { system: systemPrompt }
793
- : {}),
891
+ ...systemField,
794
892
  ...(anthropicTools && anthropicTools.length > 0
795
893
  ? { tools: anthropicTools }
796
894
  : {}),
@@ -805,24 +903,131 @@ export class AnthropicProvider extends BaseProvider {
805
903
  });
806
904
  }
807
905
  // Make the API call with retry logic
808
- const customHeaders = this.getCustomHeaders();
809
- const apiCall = () => customHeaders
906
+ let customHeaders = this.getCustomHeaders() || {};
907
+ // For OAuth, always include the oauth beta header in customHeaders
908
+ // to ensure it's not overridden by cache headers
909
+ if (isOAuth) {
910
+ const existingBeta = customHeaders['anthropic-beta'];
911
+ customHeaders = {
912
+ ...customHeaders,
913
+ 'anthropic-beta': this.mergeBetaHeaders(existingBeta, 'oauth-2025-04-20'),
914
+ };
915
+ }
916
+ // Add extended-cache-ttl beta header for 1h caching
917
+ if (wantCaching && ttl === '1h') {
918
+ const existingBeta = customHeaders['anthropic-beta'];
919
+ customHeaders = {
920
+ ...customHeaders,
921
+ 'anthropic-beta': this.mergeBetaHeaders(existingBeta, 'extended-cache-ttl-2025-04-11'),
922
+ };
923
+ cacheLogger.debug(() => 'Added extended-cache-ttl-2025-04-11 beta header for 1h caching');
924
+ }
925
+ const apiCall = () => Object.keys(customHeaders).length > 0
810
926
  ? client.messages.create(requestBody, { headers: customHeaders })
811
927
  : client.messages.create(requestBody);
812
928
  const { maxAttempts, initialDelayMs } = this.getRetryConfig();
813
- const response = await retryWithBackoff(apiCall, {
814
- maxAttempts,
815
- initialDelayMs,
816
- shouldRetry: this.shouldRetryAnthropicResponse.bind(this),
817
- trackThrottleWaitTime: this.throttleTracker,
818
- });
929
+ // Proactively throttle if approaching rate limits
930
+ await this.waitForRateLimitIfNeeded(configEphemeralSettings);
931
+ // For non-streaming, use withResponse() to access headers
932
+ // For streaming, we can't access headers easily, so we skip rate limit extraction
933
+ const rateLimitLogger = this.getRateLimitLogger();
934
+ let responseHeaders;
935
+ let response;
936
+ if (streamingEnabled) {
937
+ // Streaming mode - can't easily access headers
938
+ response = await retryWithBackoff(apiCall, {
939
+ maxAttempts,
940
+ initialDelayMs,
941
+ shouldRetry: this.shouldRetryAnthropicResponse.bind(this),
942
+ trackThrottleWaitTime: this.throttleTracker,
943
+ });
944
+ rateLimitLogger.debug(() => 'Streaming mode - rate limit headers not extracted');
945
+ }
946
+ else {
947
+ // Non-streaming mode - use withResponse() to get headers
948
+ const apiCallWithResponse = async () => {
949
+ const promise = apiCall();
950
+ // The promise has a withResponse() method we can call
951
+ if (promise &&
952
+ typeof promise === 'object' &&
953
+ 'withResponse' in promise) {
954
+ return promise.withResponse();
955
+ }
956
+ // Fallback if withResponse is not available
957
+ return { data: await promise, response: undefined };
958
+ };
959
+ const result = await retryWithBackoff(apiCallWithResponse, {
960
+ maxAttempts,
961
+ initialDelayMs,
962
+ shouldRetry: this.shouldRetryAnthropicResponse.bind(this),
963
+ trackThrottleWaitTime: this.throttleTracker,
964
+ });
965
+ response = result.data;
966
+ if (result.response) {
967
+ responseHeaders = result.response.headers;
968
+ // Extract and process rate limit headers
969
+ const rateLimitInfo = this.extractRateLimitHeaders(responseHeaders);
970
+ this.lastRateLimitInfo = rateLimitInfo;
971
+ rateLimitLogger.debug(() => {
972
+ const parts = [];
973
+ if (rateLimitInfo.requestsRemaining !== undefined &&
974
+ rateLimitInfo.requestsLimit !== undefined) {
975
+ parts.push(`requests=${rateLimitInfo.requestsRemaining}/${rateLimitInfo.requestsLimit}`);
976
+ }
977
+ if (rateLimitInfo.tokensRemaining !== undefined &&
978
+ rateLimitInfo.tokensLimit !== undefined) {
979
+ parts.push(`tokens=${rateLimitInfo.tokensRemaining}/${rateLimitInfo.tokensLimit}`);
980
+ }
981
+ if (rateLimitInfo.inputTokensRemaining !== undefined &&
982
+ rateLimitInfo.inputTokensLimit !== undefined) {
983
+ parts.push(`input_tokens=${rateLimitInfo.inputTokensRemaining}/${rateLimitInfo.inputTokensLimit}`);
984
+ }
985
+ return parts.length > 0
986
+ ? `Rate limits: ${parts.join(', ')}`
987
+ : 'Rate limits: no data';
988
+ });
989
+ // Check and warn if approaching limits
990
+ this.checkRateLimits(rateLimitInfo);
991
+ }
992
+ }
819
993
  if (streamingEnabled) {
820
994
  // Handle streaming response - response is already a Stream when streaming is enabled
821
995
  const stream = response;
822
996
  let currentToolCall;
823
997
  this.getStreamingLogger().debug(() => 'Processing streaming response');
824
998
  for await (const chunk of stream) {
825
- if (chunk.type === 'content_block_start') {
999
+ if (chunk.type === 'message_start') {
1000
+ // Extract cache metrics from message_start event
1001
+ const usage = chunk.message?.usage;
1002
+ if (usage) {
1003
+ const cacheRead = usage.cache_read_input_tokens ?? 0;
1004
+ const cacheCreation = usage.cache_creation_input_tokens ?? 0;
1005
+ cacheLogger.debug(() => `[AnthropicProvider streaming] Emitting usage metadata: cacheRead=${cacheRead}, cacheCreation=${cacheCreation}, raw values: cache_read_input_tokens=${usage.cache_read_input_tokens}, cache_creation_input_tokens=${usage.cache_creation_input_tokens}`);
1006
+ if (cacheRead > 0 || cacheCreation > 0) {
1007
+ cacheLogger.debug(() => {
1008
+ const hitRate = cacheRead + (usage.input_tokens ?? 0) > 0
1009
+ ? (cacheRead / (cacheRead + (usage.input_tokens ?? 0))) *
1010
+ 100
1011
+ : 0;
1012
+ return `Cache metrics: read=${cacheRead}, creation=${cacheCreation}, hit_rate=${hitRate.toFixed(1)}%`;
1013
+ });
1014
+ }
1015
+ yield {
1016
+ speaker: 'ai',
1017
+ blocks: [],
1018
+ metadata: {
1019
+ usage: {
1020
+ promptTokens: usage.input_tokens ?? 0,
1021
+ completionTokens: usage.output_tokens ?? 0,
1022
+ totalTokens: (usage.input_tokens ?? 0) + (usage.output_tokens ?? 0),
1023
+ cache_read_input_tokens: cacheRead,
1024
+ cache_creation_input_tokens: cacheCreation,
1025
+ },
1026
+ },
1027
+ };
1028
+ }
1029
+ }
1030
+ else if (chunk.type === 'content_block_start') {
826
1031
  if (chunk.content_block.type === 'tool_use') {
827
1032
  const toolBlock = chunk.content_block;
828
1033
  this.getStreamingLogger().debug(() => `Starting tool use: ${toolBlock.name}`);
@@ -872,17 +1077,21 @@ export class AnthropicProvider extends BaseProvider {
872
1077
  }
873
1078
  }
874
1079
  else if (chunk.type === 'message_delta' && chunk.usage) {
875
- // Emit usage metadata
876
- this.getStreamingLogger().debug(() => `Received usage metadata`);
1080
+ // Emit usage metadata including cache fields
1081
+ const usage = chunk.usage;
1082
+ const cacheRead = usage.cache_read_input_tokens ?? 0;
1083
+ const cacheCreation = usage.cache_creation_input_tokens ?? 0;
1084
+ this.getStreamingLogger().debug(() => `Received usage metadata from message_delta: promptTokens=${usage.input_tokens || 0}, completionTokens=${usage.output_tokens || 0}, cacheRead=${cacheRead}, cacheCreation=${cacheCreation}`);
877
1085
  yield {
878
1086
  speaker: 'ai',
879
1087
  blocks: [],
880
1088
  metadata: {
881
1089
  usage: {
882
- promptTokens: chunk.usage.input_tokens || 0,
883
- completionTokens: chunk.usage.output_tokens || 0,
884
- totalTokens: (chunk.usage.input_tokens || 0) +
885
- (chunk.usage.output_tokens || 0),
1090
+ promptTokens: usage.input_tokens || 0,
1091
+ completionTokens: usage.output_tokens || 0,
1092
+ totalTokens: (usage.input_tokens || 0) + (usage.output_tokens || 0),
1093
+ cache_read_input_tokens: cacheRead,
1094
+ cache_creation_input_tokens: cacheCreation,
886
1095
  },
887
1096
  },
888
1097
  };
@@ -916,11 +1125,25 @@ export class AnthropicProvider extends BaseProvider {
916
1125
  };
917
1126
  // Add usage metadata if present
918
1127
  if (message.usage) {
1128
+ const usage = message.usage;
1129
+ const cacheRead = usage.cache_read_input_tokens ?? 0;
1130
+ const cacheCreation = usage.cache_creation_input_tokens ?? 0;
1131
+ cacheLogger.debug(() => `[AnthropicProvider non-streaming] Setting usage metadata: cacheRead=${cacheRead}, cacheCreation=${cacheCreation}, raw values: cache_read_input_tokens=${usage.cache_read_input_tokens}, cache_creation_input_tokens=${usage.cache_creation_input_tokens}`);
1132
+ if (cacheRead > 0 || cacheCreation > 0) {
1133
+ cacheLogger.debug(() => {
1134
+ const hitRate = cacheRead + usage.input_tokens > 0
1135
+ ? (cacheRead / (cacheRead + usage.input_tokens)) * 100
1136
+ : 0;
1137
+ return `Cache metrics: read=${cacheRead}, creation=${cacheCreation}, hit_rate=${hitRate.toFixed(1)}%`;
1138
+ });
1139
+ }
919
1140
  result.metadata = {
920
1141
  usage: {
921
- promptTokens: message.usage.input_tokens,
922
- completionTokens: message.usage.output_tokens,
923
- totalTokens: message.usage.input_tokens + message.usage.output_tokens,
1142
+ promptTokens: usage.input_tokens,
1143
+ completionTokens: usage.output_tokens,
1144
+ totalTokens: usage.input_tokens + usage.output_tokens,
1145
+ cache_read_input_tokens: cacheRead,
1146
+ cache_creation_input_tokens: cacheCreation,
924
1147
  },
925
1148
  };
926
1149
  }
@@ -934,6 +1157,15 @@ export class AnthropicProvider extends BaseProvider {
934
1157
  return { maxAttempts, initialDelayMs };
935
1158
  }
936
1159
  shouldRetryAnthropicResponse(error) {
1160
+ // Check for Anthropic-specific error types (overloaded_error)
1161
+ if (error && typeof error === 'object') {
1162
+ const errorObj = error;
1163
+ const errorType = errorObj.error?.type || errorObj.type;
1164
+ if (errorType === 'overloaded_error') {
1165
+ this.getLogger().debug(() => 'Will retry Anthropic request due to overloaded_error');
1166
+ return true;
1167
+ }
1168
+ }
937
1169
  const status = getErrorStatus(error);
938
1170
  if (status === 429 || (status && status >= 500 && status < 600)) {
939
1171
  this.getLogger().debug(() => `Will retry Anthropic request due to status ${status}`);
@@ -945,5 +1177,187 @@ export class AnthropicProvider extends BaseProvider {
945
1177
  }
946
1178
  return false;
947
1179
  }
1180
+ /**
1181
+ * Extract rate limit information from response headers
1182
+ */
1183
+ extractRateLimitHeaders(headers) {
1184
+ const rateLimitLogger = this.getRateLimitLogger();
1185
+ const info = {};
1186
+ // Extract requests rate limit info
1187
+ const requestsLimit = headers.get('anthropic-ratelimit-requests-limit');
1188
+ const requestsRemaining = headers.get('anthropic-ratelimit-requests-remaining');
1189
+ const requestsReset = headers.get('anthropic-ratelimit-requests-reset');
1190
+ if (requestsLimit) {
1191
+ info.requestsLimit = parseInt(requestsLimit, 10);
1192
+ }
1193
+ if (requestsRemaining) {
1194
+ info.requestsRemaining = parseInt(requestsRemaining, 10);
1195
+ }
1196
+ if (requestsReset) {
1197
+ try {
1198
+ const date = new Date(requestsReset);
1199
+ // Only set if the date is valid
1200
+ if (!isNaN(date.getTime())) {
1201
+ info.requestsReset = date;
1202
+ }
1203
+ }
1204
+ catch (_error) {
1205
+ rateLimitLogger.debug(() => `Failed to parse requests reset date: ${requestsReset}`);
1206
+ }
1207
+ }
1208
+ // Extract tokens rate limit info
1209
+ const tokensLimit = headers.get('anthropic-ratelimit-tokens-limit');
1210
+ const tokensRemaining = headers.get('anthropic-ratelimit-tokens-remaining');
1211
+ const tokensReset = headers.get('anthropic-ratelimit-tokens-reset');
1212
+ if (tokensLimit) {
1213
+ info.tokensLimit = parseInt(tokensLimit, 10);
1214
+ }
1215
+ if (tokensRemaining) {
1216
+ info.tokensRemaining = parseInt(tokensRemaining, 10);
1217
+ }
1218
+ if (tokensReset) {
1219
+ try {
1220
+ const date = new Date(tokensReset);
1221
+ // Only set if the date is valid
1222
+ if (!isNaN(date.getTime())) {
1223
+ info.tokensReset = date;
1224
+ }
1225
+ }
1226
+ catch (_error) {
1227
+ rateLimitLogger.debug(() => `Failed to parse tokens reset date: ${tokensReset}`);
1228
+ }
1229
+ }
1230
+ // Extract input tokens rate limit info
1231
+ const inputTokensLimit = headers.get('anthropic-ratelimit-input-tokens-limit');
1232
+ const inputTokensRemaining = headers.get('anthropic-ratelimit-input-tokens-remaining');
1233
+ if (inputTokensLimit) {
1234
+ info.inputTokensLimit = parseInt(inputTokensLimit, 10);
1235
+ }
1236
+ if (inputTokensRemaining) {
1237
+ info.inputTokensRemaining = parseInt(inputTokensRemaining, 10);
1238
+ }
1239
+ return info;
1240
+ }
1241
+ /**
1242
+ * Check rate limits and log warnings if approaching limits
1243
+ */
1244
+ checkRateLimits(info) {
1245
+ const rateLimitLogger = this.getRateLimitLogger();
1246
+ // Check requests rate limit (warn at 10% remaining)
1247
+ if (info.requestsLimit !== undefined &&
1248
+ info.requestsRemaining !== undefined) {
1249
+ const percentage = (info.requestsRemaining / info.requestsLimit) * 100;
1250
+ if (percentage < 10) {
1251
+ const resetTime = info.requestsReset
1252
+ ? ` (resets at ${info.requestsReset.toISOString()})`
1253
+ : '';
1254
+ rateLimitLogger.debug(() => `WARNING: Approaching requests rate limit - ${info.requestsRemaining}/${info.requestsLimit} remaining (${percentage.toFixed(1)}%)${resetTime}`);
1255
+ }
1256
+ }
1257
+ // Check tokens rate limit (warn at 10% remaining)
1258
+ if (info.tokensLimit !== undefined && info.tokensRemaining !== undefined) {
1259
+ const percentage = (info.tokensRemaining / info.tokensLimit) * 100;
1260
+ if (percentage < 10) {
1261
+ const resetTime = info.tokensReset
1262
+ ? ` (resets at ${info.tokensReset.toISOString()})`
1263
+ : '';
1264
+ rateLimitLogger.debug(() => `WARNING: Approaching tokens rate limit - ${info.tokensRemaining}/${info.tokensLimit} remaining (${percentage.toFixed(1)}%)${resetTime}`);
1265
+ }
1266
+ }
1267
+ // Check input tokens rate limit (warn at 10% remaining)
1268
+ if (info.inputTokensLimit !== undefined &&
1269
+ info.inputTokensRemaining !== undefined) {
1270
+ const percentage = (info.inputTokensRemaining / info.inputTokensLimit) * 100;
1271
+ if (percentage < 10) {
1272
+ rateLimitLogger.debug(() => `WARNING: Approaching input tokens rate limit - ${info.inputTokensRemaining}/${info.inputTokensLimit} remaining (${percentage.toFixed(1)}%)`);
1273
+ }
1274
+ }
1275
+ }
1276
+ /**
1277
+ * Get current rate limit information
1278
+ * Returns the last known rate limit state from the most recent API call
1279
+ */
1280
+ getRateLimitInfo() {
1281
+ return this.lastRateLimitInfo;
1282
+ }
1283
+ /**
1284
+ * Wait for rate limit reset if needed based on current rate limit state
1285
+ * This proactively throttles requests before they're made to prevent hitting rate limits
1286
+ * @private
1287
+ */
1288
+ async waitForRateLimitIfNeeded(ephemeralSettings) {
1289
+ const rateLimitLogger = this.getRateLimitLogger();
1290
+ const info = this.lastRateLimitInfo;
1291
+ // No rate limit data yet - skip throttling
1292
+ if (!info) {
1293
+ return;
1294
+ }
1295
+ // Check if throttling is enabled (default: on)
1296
+ const throttleEnabled = ephemeralSettings['rate-limit-throttle'] ?? 'on';
1297
+ if (throttleEnabled === 'off') {
1298
+ return;
1299
+ }
1300
+ // Get threshold percentage (default: 5%)
1301
+ const thresholdPercentage = ephemeralSettings['rate-limit-throttle-threshold'] ?? 5;
1302
+ // Get max wait time (default: 60 seconds)
1303
+ const maxWaitMs = ephemeralSettings['rate-limit-max-wait'] ?? 60000;
1304
+ const now = Date.now();
1305
+ // Check requests remaining
1306
+ if (info.requestsRemaining !== undefined &&
1307
+ info.requestsLimit !== undefined &&
1308
+ info.requestsReset) {
1309
+ const percentage = (info.requestsRemaining / info.requestsLimit) * 100;
1310
+ if (percentage < thresholdPercentage) {
1311
+ const resetTime = info.requestsReset.getTime();
1312
+ const waitMs = resetTime - now;
1313
+ // Only wait if reset time is in the future
1314
+ if (waitMs > 0) {
1315
+ const actualWaitMs = Math.min(waitMs, maxWaitMs);
1316
+ rateLimitLogger.debug(() => `Rate limit throttle: requests at ${percentage.toFixed(1)}% (${info.requestsRemaining}/${info.requestsLimit}), waiting ${actualWaitMs}ms until reset`);
1317
+ if (waitMs > maxWaitMs) {
1318
+ rateLimitLogger.debug(() => `Rate limit reset in ${waitMs}ms exceeds max wait of ${maxWaitMs}ms, capping wait time`);
1319
+ }
1320
+ await this.sleep(actualWaitMs);
1321
+ return;
1322
+ }
1323
+ }
1324
+ }
1325
+ // Check tokens remaining
1326
+ if (info.tokensRemaining !== undefined &&
1327
+ info.tokensLimit !== undefined &&
1328
+ info.tokensReset) {
1329
+ const percentage = (info.tokensRemaining / info.tokensLimit) * 100;
1330
+ if (percentage < thresholdPercentage) {
1331
+ const resetTime = info.tokensReset.getTime();
1332
+ const waitMs = resetTime - now;
1333
+ // Only wait if reset time is in the future
1334
+ if (waitMs > 0) {
1335
+ const actualWaitMs = Math.min(waitMs, maxWaitMs);
1336
+ rateLimitLogger.debug(() => `Rate limit throttle: tokens at ${percentage.toFixed(1)}% (${info.tokensRemaining}/${info.tokensLimit}), waiting ${actualWaitMs}ms until reset`);
1337
+ if (waitMs > maxWaitMs) {
1338
+ rateLimitLogger.debug(() => `Rate limit reset in ${waitMs}ms exceeds max wait of ${maxWaitMs}ms, capping wait time`);
1339
+ }
1340
+ await this.sleep(actualWaitMs);
1341
+ return;
1342
+ }
1343
+ }
1344
+ }
1345
+ // Check input tokens remaining
1346
+ if (info.inputTokensRemaining !== undefined &&
1347
+ info.inputTokensLimit !== undefined) {
1348
+ const percentage = (info.inputTokensRemaining / info.inputTokensLimit) * 100;
1349
+ if (percentage < thresholdPercentage) {
1350
+ // For input tokens, we don't have a reset time, so we can only log a warning
1351
+ rateLimitLogger.debug(() => `Rate limit warning: input tokens at ${percentage.toFixed(1)}% (${info.inputTokensRemaining}/${info.inputTokensLimit}), no reset time available`);
1352
+ }
1353
+ }
1354
+ }
1355
+ /**
1356
+ * Sleep for the specified number of milliseconds
1357
+ * @private
1358
+ */
1359
+ sleep(ms) {
1360
+ return new Promise((resolve) => setTimeout(resolve, ms));
1361
+ }
948
1362
  }
949
1363
  //# sourceMappingURL=AnthropicProvider.js.map