@vybestack/llxprt-code-core 0.5.0 → 0.6.0-nightly.251128.1049d5f2b

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. package/dist/index.d.ts +1 -1
  2. package/dist/index.js +1 -1
  3. package/dist/index.js.map +1 -1
  4. package/dist/src/agents/codebase-investigator.d.ts +46 -0
  5. package/dist/src/agents/codebase-investigator.js +136 -0
  6. package/dist/src/agents/codebase-investigator.js.map +1 -0
  7. package/dist/src/agents/executor.d.ts +92 -0
  8. package/dist/src/agents/executor.js +624 -0
  9. package/dist/src/agents/executor.js.map +1 -0
  10. package/dist/src/agents/invocation.d.ts +45 -0
  11. package/dist/src/agents/invocation.js +114 -0
  12. package/dist/src/agents/invocation.js.map +1 -0
  13. package/dist/src/agents/registry.d.ts +38 -0
  14. package/dist/src/agents/registry.js +64 -0
  15. package/dist/src/agents/registry.js.map +1 -0
  16. package/dist/src/agents/types.d.ts +145 -0
  17. package/dist/src/agents/types.js +17 -0
  18. package/dist/src/agents/types.js.map +1 -0
  19. package/dist/src/agents/utils.d.ts +15 -0
  20. package/dist/src/agents/utils.js +27 -0
  21. package/dist/src/agents/utils.js.map +1 -0
  22. package/dist/src/auth/types.d.ts +4 -4
  23. package/dist/src/code_assist/oauth-credential-storage.d.ts +27 -0
  24. package/dist/src/code_assist/oauth-credential-storage.js +115 -0
  25. package/dist/src/code_assist/oauth-credential-storage.js.map +1 -0
  26. package/dist/src/code_assist/oauth2.js +36 -9
  27. package/dist/src/code_assist/oauth2.js.map +1 -1
  28. package/dist/src/config/config.d.ts +72 -8
  29. package/dist/src/config/config.js +130 -23
  30. package/dist/src/config/config.js.map +1 -1
  31. package/dist/src/config/constants.d.ts +11 -0
  32. package/dist/src/config/constants.js +16 -0
  33. package/dist/src/config/constants.js.map +1 -0
  34. package/dist/src/config/storage.d.ts +1 -0
  35. package/dist/src/config/storage.js +2 -1
  36. package/dist/src/config/storage.js.map +1 -1
  37. package/dist/src/confirmation-bus/index.d.ts +2 -0
  38. package/dist/src/confirmation-bus/index.js +3 -0
  39. package/dist/src/confirmation-bus/index.js.map +1 -0
  40. package/dist/src/confirmation-bus/message-bus.d.ts +60 -0
  41. package/dist/src/confirmation-bus/message-bus.js +141 -0
  42. package/dist/src/confirmation-bus/message-bus.js.map +1 -0
  43. package/dist/src/confirmation-bus/types.d.ts +59 -0
  44. package/dist/src/confirmation-bus/types.js +10 -0
  45. package/dist/src/confirmation-bus/types.js.map +1 -0
  46. package/dist/src/core/baseLlmClient.d.ts +77 -0
  47. package/dist/src/core/baseLlmClient.js +175 -0
  48. package/dist/src/core/baseLlmClient.js.map +1 -0
  49. package/dist/src/core/client.d.ts +13 -1
  50. package/dist/src/core/client.js +98 -119
  51. package/dist/src/core/client.js.map +1 -1
  52. package/dist/src/core/coreToolScheduler.d.ts +20 -1
  53. package/dist/src/core/coreToolScheduler.js +160 -16
  54. package/dist/src/core/coreToolScheduler.js.map +1 -1
  55. package/dist/src/core/geminiChat.d.ts +8 -1
  56. package/dist/src/core/geminiChat.js +30 -21
  57. package/dist/src/core/geminiChat.js.map +1 -1
  58. package/dist/src/core/subagent.d.ts +16 -1
  59. package/dist/src/core/subagent.js +59 -3
  60. package/dist/src/core/subagent.js.map +1 -1
  61. package/dist/src/core/subagentOrchestrator.d.ts +2 -1
  62. package/dist/src/core/subagentOrchestrator.js +36 -6
  63. package/dist/src/core/subagentOrchestrator.js.map +1 -1
  64. package/dist/src/core/turn.d.ts +1 -4
  65. package/dist/src/core/turn.js +2 -12
  66. package/dist/src/core/turn.js.map +1 -1
  67. package/dist/src/ide/detect-ide.d.ts +44 -14
  68. package/dist/src/ide/detect-ide.js +35 -75
  69. package/dist/src/ide/detect-ide.js.map +1 -1
  70. package/dist/src/ide/ide-client.d.ts +5 -4
  71. package/dist/src/ide/ide-client.js +34 -25
  72. package/dist/src/ide/ide-client.js.map +1 -1
  73. package/dist/src/ide/ide-installer.d.ts +2 -2
  74. package/dist/src/ide/ide-installer.js +7 -9
  75. package/dist/src/ide/ide-installer.js.map +1 -1
  76. package/dist/src/index.d.ts +10 -2
  77. package/dist/src/index.js +12 -3
  78. package/dist/src/index.js.map +1 -1
  79. package/dist/src/mcp/oauth-provider.d.ts +5 -1
  80. package/dist/src/mcp/oauth-provider.js +56 -44
  81. package/dist/src/mcp/oauth-provider.js.map +1 -1
  82. package/dist/src/mcp/oauth-token-storage.d.ts +43 -40
  83. package/dist/src/mcp/oauth-token-storage.js +114 -44
  84. package/dist/src/mcp/oauth-token-storage.js.map +1 -1
  85. package/dist/src/mcp/oauth-utils.js +1 -0
  86. package/dist/src/mcp/oauth-utils.js.map +1 -1
  87. package/dist/src/mcp/sa-impersonation-provider.d.ts +33 -0
  88. package/dist/src/mcp/sa-impersonation-provider.js +130 -0
  89. package/dist/src/mcp/sa-impersonation-provider.js.map +1 -0
  90. package/dist/src/mcp/token-storage/hybrid-token-storage.js +1 -1
  91. package/dist/src/policy/config.d.ts +51 -0
  92. package/dist/src/policy/config.js +102 -0
  93. package/dist/src/policy/config.js.map +1 -0
  94. package/dist/src/policy/index.d.ts +5 -0
  95. package/dist/src/policy/index.js +6 -0
  96. package/dist/src/policy/index.js.map +1 -0
  97. package/dist/src/policy/policies/discovered.toml +9 -0
  98. package/dist/src/policy/policies/read-only.toml +68 -0
  99. package/dist/src/policy/policies/write.toml +69 -0
  100. package/dist/src/policy/policies/yolo.toml +8 -0
  101. package/dist/src/policy/policy-engine.d.ts +55 -0
  102. package/dist/src/policy/policy-engine.js +126 -0
  103. package/dist/src/policy/policy-engine.js.map +1 -0
  104. package/dist/src/policy/stable-stringify.d.ts +29 -0
  105. package/dist/src/policy/stable-stringify.js +111 -0
  106. package/dist/src/policy/stable-stringify.js.map +1 -0
  107. package/dist/src/policy/toml-loader.d.ts +37 -0
  108. package/dist/src/policy/toml-loader.js +183 -0
  109. package/dist/src/policy/toml-loader.js.map +1 -0
  110. package/dist/src/policy/types.d.ts +16 -0
  111. package/dist/src/policy/types.js +7 -0
  112. package/dist/src/policy/types.js.map +1 -0
  113. package/dist/src/providers/LoggingProviderWrapper.d.ts +2 -0
  114. package/dist/src/providers/LoggingProviderWrapper.js +27 -6
  115. package/dist/src/providers/LoggingProviderWrapper.js.map +1 -1
  116. package/dist/src/providers/ProviderManager.d.ts +18 -0
  117. package/dist/src/providers/ProviderManager.js +54 -3
  118. package/dist/src/providers/ProviderManager.js.map +1 -1
  119. package/dist/src/providers/anthropic/AnthropicProvider.d.ts +49 -0
  120. package/dist/src/providers/anthropic/AnthropicProvider.js +468 -30
  121. package/dist/src/providers/anthropic/AnthropicProvider.js.map +1 -1
  122. package/dist/src/providers/openai/OpenAIProvider.d.ts +3 -0
  123. package/dist/src/providers/openai/OpenAIProvider.js +12 -6
  124. package/dist/src/providers/openai/OpenAIProvider.js.map +1 -1
  125. package/dist/src/providers/utils/localEndpoint.d.ts +39 -0
  126. package/dist/src/providers/utils/localEndpoint.js +117 -0
  127. package/dist/src/providers/utils/localEndpoint.js.map +1 -0
  128. package/dist/src/runtime/AgentRuntimeLoader.d.ts +1 -0
  129. package/dist/src/runtime/AgentRuntimeLoader.js +6 -1
  130. package/dist/src/runtime/AgentRuntimeLoader.js.map +1 -1
  131. package/dist/src/runtime/createAgentRuntimeContext.js +8 -7
  132. package/dist/src/runtime/createAgentRuntimeContext.js.map +1 -1
  133. package/dist/src/services/fileSystemService.d.ts +9 -0
  134. package/dist/src/services/fileSystemService.js +12 -1
  135. package/dist/src/services/fileSystemService.js.map +1 -1
  136. package/dist/src/services/history/HistoryService.d.ts +4 -0
  137. package/dist/src/services/history/HistoryService.js +18 -0
  138. package/dist/src/services/history/HistoryService.js.map +1 -1
  139. package/dist/src/services/history/IContent.d.ts +6 -0
  140. package/dist/src/services/history/IContent.js.map +1 -1
  141. package/dist/src/services/shellExecutionService.js +0 -6
  142. package/dist/src/services/shellExecutionService.js.map +1 -1
  143. package/dist/src/settings/types.d.ts +7 -0
  144. package/dist/src/storage/sessionTypes.d.ts +27 -0
  145. package/dist/src/storage/sessionTypes.js +10 -0
  146. package/dist/src/storage/sessionTypes.js.map +1 -0
  147. package/dist/src/telemetry/constants.d.ts +8 -0
  148. package/dist/src/telemetry/constants.js +8 -0
  149. package/dist/src/telemetry/constants.js.map +1 -1
  150. package/dist/src/telemetry/loggers.d.ts +9 -1
  151. package/dist/src/telemetry/loggers.js +154 -2
  152. package/dist/src/telemetry/loggers.js.map +1 -1
  153. package/dist/src/telemetry/metrics.d.ts +5 -0
  154. package/dist/src/telemetry/metrics.js +4 -0
  155. package/dist/src/telemetry/metrics.js.map +1 -1
  156. package/dist/src/telemetry/types.d.ts +62 -1
  157. package/dist/src/telemetry/types.js +92 -0
  158. package/dist/src/telemetry/types.js.map +1 -1
  159. package/dist/src/telemetry/uiTelemetry.d.ts +1 -1
  160. package/dist/src/telemetry/uiTelemetry.js +2 -3
  161. package/dist/src/telemetry/uiTelemetry.js.map +1 -1
  162. package/dist/src/test-utils/config.js +14 -0
  163. package/dist/src/test-utils/config.js.map +1 -1
  164. package/dist/src/test-utils/mock-tool.d.ts +8 -4
  165. package/dist/src/test-utils/mock-tool.js +35 -18
  166. package/dist/src/test-utils/mock-tool.js.map +1 -1
  167. package/dist/src/test-utils/tools.d.ts +1 -1
  168. package/dist/src/test-utils/tools.js +4 -4
  169. package/dist/src/test-utils/tools.js.map +1 -1
  170. package/dist/src/tools/edit.d.ts +3 -2
  171. package/dist/src/tools/edit.js +29 -10
  172. package/dist/src/tools/edit.js.map +1 -1
  173. package/dist/src/tools/glob.d.ts +6 -4
  174. package/dist/src/tools/glob.js +3 -3
  175. package/dist/src/tools/glob.js.map +1 -1
  176. package/dist/src/tools/grep.d.ts +3 -2
  177. package/dist/src/tools/grep.js +2 -2
  178. package/dist/src/tools/grep.js.map +1 -1
  179. package/dist/src/tools/ls.d.ts +4 -3
  180. package/dist/src/tools/ls.js +3 -3
  181. package/dist/src/tools/ls.js.map +1 -1
  182. package/dist/src/tools/mcp-client.d.ts +9 -18
  183. package/dist/src/tools/mcp-client.js +60 -102
  184. package/dist/src/tools/mcp-client.js.map +1 -1
  185. package/dist/src/tools/mcp-tool.js +7 -1
  186. package/dist/src/tools/mcp-tool.js.map +1 -1
  187. package/dist/src/tools/memoryTool.d.ts +6 -2
  188. package/dist/src/tools/memoryTool.js +14 -4
  189. package/dist/src/tools/memoryTool.js.map +1 -1
  190. package/dist/src/tools/modifiable-tool.d.ts +1 -1
  191. package/dist/src/tools/modifiable-tool.js +9 -1
  192. package/dist/src/tools/modifiable-tool.js.map +1 -1
  193. package/dist/src/tools/read-file.d.ts +3 -2
  194. package/dist/src/tools/read-file.js +2 -2
  195. package/dist/src/tools/read-file.js.map +1 -1
  196. package/dist/src/tools/read-many-files.d.ts +3 -2
  197. package/dist/src/tools/read-many-files.js +2 -2
  198. package/dist/src/tools/read-many-files.js.map +1 -1
  199. package/dist/src/tools/ripGrep.d.ts +3 -2
  200. package/dist/src/tools/ripGrep.js +2 -2
  201. package/dist/src/tools/ripGrep.js.map +1 -1
  202. package/dist/src/tools/shell.d.ts +3 -2
  203. package/dist/src/tools/shell.js +69 -9
  204. package/dist/src/tools/shell.js.map +1 -1
  205. package/dist/src/tools/smart-edit.d.ts +22 -2
  206. package/dist/src/tools/smart-edit.js +124 -12
  207. package/dist/src/tools/smart-edit.js.map +1 -1
  208. package/dist/src/tools/task.d.ts +1 -0
  209. package/dist/src/tools/task.js +33 -16
  210. package/dist/src/tools/task.js.map +1 -1
  211. package/dist/src/tools/tool-confirmation-types.d.ts +20 -0
  212. package/dist/src/tools/tool-confirmation-types.js +15 -0
  213. package/dist/src/tools/tool-confirmation-types.js.map +1 -0
  214. package/dist/src/tools/tool-error.d.ts +2 -0
  215. package/dist/src/tools/tool-error.js +2 -0
  216. package/dist/src/tools/tool-error.js.map +1 -1
  217. package/dist/src/tools/tool-registry.d.ts +8 -1
  218. package/dist/src/tools/tool-registry.js +18 -4
  219. package/dist/src/tools/tool-registry.js.map +1 -1
  220. package/dist/src/tools/tools.d.ts +52 -14
  221. package/dist/src/tools/tools.js +71 -15
  222. package/dist/src/tools/tools.js.map +1 -1
  223. package/dist/src/tools/web-fetch.d.ts +3 -2
  224. package/dist/src/tools/web-fetch.js +11 -6
  225. package/dist/src/tools/web-fetch.js.map +1 -1
  226. package/dist/src/tools/web-search-invocation.d.ts +3 -1
  227. package/dist/src/tools/web-search-invocation.js +5 -2
  228. package/dist/src/tools/web-search-invocation.js.map +1 -1
  229. package/dist/src/tools/web-search.d.ts +3 -2
  230. package/dist/src/tools/web-search.js +6 -4
  231. package/dist/src/tools/web-search.js.map +1 -1
  232. package/dist/src/tools/write-file.d.ts +3 -2
  233. package/dist/src/tools/write-file.js +11 -6
  234. package/dist/src/tools/write-file.js.map +1 -1
  235. package/dist/src/utils/bfsFileSearch.d.ts +2 -2
  236. package/dist/src/utils/editor.js +5 -3
  237. package/dist/src/utils/editor.js.map +1 -1
  238. package/dist/src/utils/getFolderStructure.d.ts +2 -2
  239. package/dist/src/utils/getFolderStructure.js +1 -1
  240. package/dist/src/utils/getFolderStructure.js.map +1 -1
  241. package/dist/src/utils/llm-edit-fixer.js +10 -1
  242. package/dist/src/utils/llm-edit-fixer.js.map +1 -1
  243. package/dist/src/utils/memoryDiscovery.d.ts +2 -1
  244. package/dist/src/utils/memoryDiscovery.js +3 -2
  245. package/dist/src/utils/memoryDiscovery.js.map +1 -1
  246. package/dist/src/utils/memoryImportProcessor.js +13 -20
  247. package/dist/src/utils/memoryImportProcessor.js.map +1 -1
  248. package/dist/src/utils/retry.d.ts +5 -1
  249. package/dist/src/utils/retry.js +31 -16
  250. package/dist/src/utils/retry.js.map +1 -1
  251. package/dist/src/utils/schemaValidator.js +11 -1
  252. package/dist/src/utils/schemaValidator.js.map +1 -1
  253. package/dist/src/utils/shell-utils.d.ts +1 -0
  254. package/dist/src/utils/shell-utils.js +6 -2
  255. package/dist/src/utils/shell-utils.js.map +1 -1
  256. package/dist/src/utils/thoughtUtils.d.ts +21 -0
  257. package/dist/src/utils/thoughtUtils.js +39 -0
  258. package/dist/src/utils/thoughtUtils.js.map +1 -0
  259. package/dist/src/utils/tool-utils.js +2 -2
  260. package/dist/src/utils/tool-utils.js.map +1 -1
  261. package/package.json +6 -4
@@ -29,6 +29,8 @@ export class AnthropicProvider extends BaseProvider {
29
29
  { pattern: /claude-.*3.*opus/i, tokens: 4096 },
30
30
  { pattern: /claude-.*3.*haiku/i, tokens: 4096 },
31
31
  ];
32
+ // Rate limit state tracking - updated on each API response
33
+ lastRateLimitInfo;
32
34
  constructor(apiKey, baseURL, config, oauthManager) {
33
35
  // Initialize base provider with auth configuration
34
36
  const baseConfig = {
@@ -71,6 +73,12 @@ export class AnthropicProvider extends BaseProvider {
71
73
  getErrorsLogger() {
72
74
  return new DebugLogger('llxprt:anthropic:errors');
73
75
  }
76
+ getCacheLogger() {
77
+ return new DebugLogger('llxprt:anthropic:cache');
78
+ }
79
+ getRateLimitLogger() {
80
+ return new DebugLogger('llxprt:anthropic:ratelimit');
81
+ }
74
82
  instantiateClient(authToken, baseURL) {
75
83
  const isOAuthToken = authToken.startsWith('sk-ant-oat');
76
84
  const clientConfig = {
@@ -165,6 +173,22 @@ export class AnthropicProvider extends BaseProvider {
165
173
  // For OAuth, return only the working models
166
174
  this.getAuthLogger().debug(() => 'Using hardcoded model list for OAuth authentication');
167
175
  return [
176
+ {
177
+ id: 'claude-opus-4-5-20251101',
178
+ name: 'Claude Opus 4.5',
179
+ provider: 'anthropic',
180
+ supportedToolFormats: ['anthropic'],
181
+ contextWindow: 500000,
182
+ maxOutputTokens: 32000,
183
+ },
184
+ {
185
+ id: 'claude-opus-4-5',
186
+ name: 'Claude Opus 4.5',
187
+ provider: 'anthropic',
188
+ supportedToolFormats: ['anthropic'],
189
+ contextWindow: 500000,
190
+ maxOutputTokens: 32000,
191
+ },
168
192
  {
169
193
  id: 'claude-opus-4-1-20250805',
170
194
  name: 'Claude Opus 4.1',
@@ -289,6 +313,14 @@ export class AnthropicProvider extends BaseProvider {
289
313
  */
290
314
  getDefaultModels() {
291
315
  return [
316
+ {
317
+ id: 'claude-opus-4-5-20251101',
318
+ name: 'Claude Opus 4.5',
319
+ provider: 'anthropic',
320
+ supportedToolFormats: ['anthropic'],
321
+ contextWindow: 500000,
322
+ maxOutputTokens: 32000,
323
+ },
292
324
  {
293
325
  id: 'claude-opus-4-1-20250805',
294
326
  name: 'Claude Opus 4.1',
@@ -527,6 +559,32 @@ export class AnthropicProvider extends BaseProvider {
527
559
  // Unknown format - assume it's a raw UUID
528
560
  return 'hist_tool_' + id;
529
561
  }
562
+ /**
563
+ * Sort object keys alphabetically for stable JSON serialization
564
+ * This prevents cache invalidation due to key order changes
565
+ */
566
+ sortObjectKeys(obj) {
567
+ const sorted = Object.keys(obj)
568
+ .sort()
569
+ .reduce((acc, key) => {
570
+ acc[key] = obj[key];
571
+ return acc;
572
+ }, {});
573
+ return sorted;
574
+ }
575
+ /**
576
+ * Merge beta headers, ensuring no duplicates
577
+ */
578
+ mergeBetaHeaders(existing, addition) {
579
+ if (!existing)
580
+ return addition;
581
+ const parts = new Set(existing
582
+ .split(',')
583
+ .map((s) => s.trim())
584
+ .filter(Boolean));
585
+ parts.add(addition);
586
+ return Array.from(parts).join(', ');
587
+ }
530
588
  /**
531
589
  * @plan PLAN-20251023-STATELESS-HARDENING.P08
532
590
  * @requirement REQ-SP4-002, REQ-SP4-003
@@ -748,7 +806,25 @@ export class AnthropicProvider extends BaseProvider {
748
806
  const detectedFormat = this.detectToolFormat();
749
807
  const needsQwenParameterProcessing = detectedFormat === 'qwen';
750
808
  // Convert Gemini format tools to anthropic format (always for Anthropic API)
751
- const anthropicTools = callFormatter.convertGeminiToFormat(tools, 'anthropic');
809
+ let anthropicTools = callFormatter.convertGeminiToFormat(tools, 'anthropic');
810
+ // Stabilize tool ordering and JSON schema keys to prevent cache invalidation
811
+ if (anthropicTools && anthropicTools.length > 0) {
812
+ anthropicTools = [...anthropicTools]
813
+ .sort((a, b) => a.name.localeCompare(b.name))
814
+ .map((tool) => {
815
+ const schema = tool.input_schema;
816
+ if (schema.properties) {
817
+ return {
818
+ ...tool,
819
+ input_schema: {
820
+ ...schema,
821
+ properties: this.sortObjectKeys(schema.properties),
822
+ },
823
+ };
824
+ }
825
+ return tool;
826
+ });
827
+ }
752
828
  const toolNamesForPrompt = tools === undefined
753
829
  ? undefined
754
830
  : Array.from(new Set(tools.flatMap((group) => group.functionDeclarations
@@ -765,32 +841,78 @@ export class AnthropicProvider extends BaseProvider {
765
841
  // Derive model parameters on demand from ephemeral settings
766
842
  const configEphemeralSettings = options.invocation?.ephemerals ?? {};
767
843
  const requestOverrides = configEphemeralSettings['anthropic'] || {};
844
+ // Get caching setting from ephemeral settings (session override) or provider settings
845
+ const providerSettings = this.resolveSettingsService().getProviderSettings(this.name) ?? {};
846
+ const cachingSetting = configEphemeralSettings['prompt-caching'] ??
847
+ providerSettings['prompt-caching'] ??
848
+ '1h';
849
+ const wantCaching = cachingSetting !== 'off';
850
+ const ttl = cachingSetting === '1h' ? '1h' : '5m';
851
+ const cacheLogger = this.getCacheLogger();
852
+ if (wantCaching) {
853
+ cacheLogger.debug(() => `Prompt caching enabled with TTL: ${ttl}`);
854
+ }
768
855
  // For OAuth mode, inject core system prompt as the first human message
769
856
  if (isOAuth) {
770
857
  const corePrompt = await getCoreSystemPromptAsync(userMemory, currentModel, toolNamesForPrompt);
771
858
  if (corePrompt) {
772
- anthropicMessages.unshift({
773
- role: 'user',
774
- content: `<system>\n${corePrompt}\n</system>\n\nUser provided conversation begins here:`,
775
- });
859
+ if (wantCaching) {
860
+ anthropicMessages.unshift({
861
+ role: 'user',
862
+ content: [
863
+ {
864
+ type: 'text',
865
+ text: `<system>\n${corePrompt}\n</system>\n\nUser provided conversation begins here:`,
866
+ cache_control: { type: 'ephemeral', ttl },
867
+ },
868
+ ],
869
+ });
870
+ cacheLogger.debug(() => 'Added cache_control to OAuth system message');
871
+ }
872
+ else {
873
+ anthropicMessages.unshift({
874
+ role: 'user',
875
+ content: `<system>\n${corePrompt}\n</system>\n\nUser provided conversation begins here:`,
876
+ });
877
+ }
776
878
  }
777
879
  }
880
+ // Build system field with caching support
778
881
  const systemPrompt = !isOAuth
779
882
  ? await getCoreSystemPromptAsync(userMemory, currentModel, toolNamesForPrompt)
780
883
  : undefined;
884
+ let systemField = {};
885
+ if (isOAuth) {
886
+ systemField = {
887
+ system: "You are Claude Code, Anthropic's official CLI for Claude.",
888
+ };
889
+ }
890
+ else if (systemPrompt) {
891
+ if (wantCaching) {
892
+ // Use array format with cache_control breakpoint
893
+ systemField = {
894
+ system: [
895
+ {
896
+ type: 'text',
897
+ text: systemPrompt,
898
+ cache_control: { type: 'ephemeral', ttl },
899
+ },
900
+ ],
901
+ };
902
+ cacheLogger.debug(() => `Added cache_control to system prompt (${ttl})`);
903
+ }
904
+ else {
905
+ // Use string format (no caching)
906
+ systemField = { system: systemPrompt };
907
+ }
908
+ }
781
909
  const requestBody = {
782
910
  model: currentModel,
783
911
  messages: anthropicMessages,
784
912
  max_tokens: this.getMaxTokensForModel(currentModel),
785
913
  stream: streamingEnabled,
786
914
  ...requestOverrides, // Use derived ephemeral overrides instead of memoized instance state
787
- ...(isOAuth
788
- ? {
789
- system: "You are Claude Code, Anthropic's official CLI for Claude.",
790
- }
791
- : systemPrompt
792
- ? { system: systemPrompt }
793
- : {}),
915
+ ...systemField,
794
916
  ...(anthropicTools && anthropicTools.length > 0
795
917
  ? { tools: anthropicTools }
796
918
  : {}),
@@ -805,24 +927,131 @@ export class AnthropicProvider extends BaseProvider {
805
927
  });
806
928
  }
807
929
  // Make the API call with retry logic
808
- const customHeaders = this.getCustomHeaders();
809
- const apiCall = () => customHeaders
930
+ let customHeaders = this.getCustomHeaders() || {};
931
+ // For OAuth, always include the oauth beta header in customHeaders
932
+ // to ensure it's not overridden by cache headers
933
+ if (isOAuth) {
934
+ const existingBeta = customHeaders['anthropic-beta'];
935
+ customHeaders = {
936
+ ...customHeaders,
937
+ 'anthropic-beta': this.mergeBetaHeaders(existingBeta, 'oauth-2025-04-20'),
938
+ };
939
+ }
940
+ // Add extended-cache-ttl beta header for 1h caching
941
+ if (wantCaching && ttl === '1h') {
942
+ const existingBeta = customHeaders['anthropic-beta'];
943
+ customHeaders = {
944
+ ...customHeaders,
945
+ 'anthropic-beta': this.mergeBetaHeaders(existingBeta, 'extended-cache-ttl-2025-04-11'),
946
+ };
947
+ cacheLogger.debug(() => 'Added extended-cache-ttl-2025-04-11 beta header for 1h caching');
948
+ }
949
+ const apiCall = () => Object.keys(customHeaders).length > 0
810
950
  ? client.messages.create(requestBody, { headers: customHeaders })
811
951
  : client.messages.create(requestBody);
812
952
  const { maxAttempts, initialDelayMs } = this.getRetryConfig();
813
- const response = await retryWithBackoff(apiCall, {
814
- maxAttempts,
815
- initialDelayMs,
816
- shouldRetry: this.shouldRetryAnthropicResponse.bind(this),
817
- trackThrottleWaitTime: this.throttleTracker,
818
- });
953
+ // Proactively throttle if approaching rate limits
954
+ await this.waitForRateLimitIfNeeded(configEphemeralSettings);
955
+ // For non-streaming, use withResponse() to access headers
956
+ // For streaming, we can't access headers easily, so we skip rate limit extraction
957
+ const rateLimitLogger = this.getRateLimitLogger();
958
+ let responseHeaders;
959
+ let response;
960
+ if (streamingEnabled) {
961
+ // Streaming mode - can't easily access headers
962
+ response = await retryWithBackoff(apiCall, {
963
+ maxAttempts,
964
+ initialDelayMs,
965
+ shouldRetryOnError: this.shouldRetryAnthropicResponse.bind(this),
966
+ trackThrottleWaitTime: this.throttleTracker,
967
+ });
968
+ rateLimitLogger.debug(() => 'Streaming mode - rate limit headers not extracted');
969
+ }
970
+ else {
971
+ // Non-streaming mode - use withResponse() to get headers
972
+ const apiCallWithResponse = async () => {
973
+ const promise = apiCall();
974
+ // The promise has a withResponse() method we can call
975
+ if (promise &&
976
+ typeof promise === 'object' &&
977
+ 'withResponse' in promise) {
978
+ return promise.withResponse();
979
+ }
980
+ // Fallback if withResponse is not available
981
+ return { data: await promise, response: undefined };
982
+ };
983
+ const result = await retryWithBackoff(apiCallWithResponse, {
984
+ maxAttempts,
985
+ initialDelayMs,
986
+ shouldRetryOnError: this.shouldRetryAnthropicResponse.bind(this),
987
+ trackThrottleWaitTime: this.throttleTracker,
988
+ });
989
+ response = result.data;
990
+ if (result.response) {
991
+ responseHeaders = result.response.headers;
992
+ // Extract and process rate limit headers
993
+ const rateLimitInfo = this.extractRateLimitHeaders(responseHeaders);
994
+ this.lastRateLimitInfo = rateLimitInfo;
995
+ rateLimitLogger.debug(() => {
996
+ const parts = [];
997
+ if (rateLimitInfo.requestsRemaining !== undefined &&
998
+ rateLimitInfo.requestsLimit !== undefined) {
999
+ parts.push(`requests=${rateLimitInfo.requestsRemaining}/${rateLimitInfo.requestsLimit}`);
1000
+ }
1001
+ if (rateLimitInfo.tokensRemaining !== undefined &&
1002
+ rateLimitInfo.tokensLimit !== undefined) {
1003
+ parts.push(`tokens=${rateLimitInfo.tokensRemaining}/${rateLimitInfo.tokensLimit}`);
1004
+ }
1005
+ if (rateLimitInfo.inputTokensRemaining !== undefined &&
1006
+ rateLimitInfo.inputTokensLimit !== undefined) {
1007
+ parts.push(`input_tokens=${rateLimitInfo.inputTokensRemaining}/${rateLimitInfo.inputTokensLimit}`);
1008
+ }
1009
+ return parts.length > 0
1010
+ ? `Rate limits: ${parts.join(', ')}`
1011
+ : 'Rate limits: no data';
1012
+ });
1013
+ // Check and warn if approaching limits
1014
+ this.checkRateLimits(rateLimitInfo);
1015
+ }
1016
+ }
819
1017
  if (streamingEnabled) {
820
1018
  // Handle streaming response - response is already a Stream when streaming is enabled
821
1019
  const stream = response;
822
1020
  let currentToolCall;
823
1021
  this.getStreamingLogger().debug(() => 'Processing streaming response');
824
1022
  for await (const chunk of stream) {
825
- if (chunk.type === 'content_block_start') {
1023
+ if (chunk.type === 'message_start') {
1024
+ // Extract cache metrics from message_start event
1025
+ const usage = chunk.message?.usage;
1026
+ if (usage) {
1027
+ const cacheRead = usage.cache_read_input_tokens ?? 0;
1028
+ const cacheCreation = usage.cache_creation_input_tokens ?? 0;
1029
+ cacheLogger.debug(() => `[AnthropicProvider streaming] Emitting usage metadata: cacheRead=${cacheRead}, cacheCreation=${cacheCreation}, raw values: cache_read_input_tokens=${usage.cache_read_input_tokens}, cache_creation_input_tokens=${usage.cache_creation_input_tokens}`);
1030
+ if (cacheRead > 0 || cacheCreation > 0) {
1031
+ cacheLogger.debug(() => {
1032
+ const hitRate = cacheRead + (usage.input_tokens ?? 0) > 0
1033
+ ? (cacheRead / (cacheRead + (usage.input_tokens ?? 0))) *
1034
+ 100
1035
+ : 0;
1036
+ return `Cache metrics: read=${cacheRead}, creation=${cacheCreation}, hit_rate=${hitRate.toFixed(1)}%`;
1037
+ });
1038
+ }
1039
+ yield {
1040
+ speaker: 'ai',
1041
+ blocks: [],
1042
+ metadata: {
1043
+ usage: {
1044
+ promptTokens: usage.input_tokens ?? 0,
1045
+ completionTokens: usage.output_tokens ?? 0,
1046
+ totalTokens: (usage.input_tokens ?? 0) + (usage.output_tokens ?? 0),
1047
+ cache_read_input_tokens: cacheRead,
1048
+ cache_creation_input_tokens: cacheCreation,
1049
+ },
1050
+ },
1051
+ };
1052
+ }
1053
+ }
1054
+ else if (chunk.type === 'content_block_start') {
826
1055
  if (chunk.content_block.type === 'tool_use') {
827
1056
  const toolBlock = chunk.content_block;
828
1057
  this.getStreamingLogger().debug(() => `Starting tool use: ${toolBlock.name}`);
@@ -872,17 +1101,21 @@ export class AnthropicProvider extends BaseProvider {
872
1101
  }
873
1102
  }
874
1103
  else if (chunk.type === 'message_delta' && chunk.usage) {
875
- // Emit usage metadata
876
- this.getStreamingLogger().debug(() => `Received usage metadata`);
1104
+ // Emit usage metadata including cache fields
1105
+ const usage = chunk.usage;
1106
+ const cacheRead = usage.cache_read_input_tokens ?? 0;
1107
+ const cacheCreation = usage.cache_creation_input_tokens ?? 0;
1108
+ this.getStreamingLogger().debug(() => `Received usage metadata from message_delta: promptTokens=${usage.input_tokens || 0}, completionTokens=${usage.output_tokens || 0}, cacheRead=${cacheRead}, cacheCreation=${cacheCreation}`);
877
1109
  yield {
878
1110
  speaker: 'ai',
879
1111
  blocks: [],
880
1112
  metadata: {
881
1113
  usage: {
882
- promptTokens: chunk.usage.input_tokens || 0,
883
- completionTokens: chunk.usage.output_tokens || 0,
884
- totalTokens: (chunk.usage.input_tokens || 0) +
885
- (chunk.usage.output_tokens || 0),
1114
+ promptTokens: usage.input_tokens || 0,
1115
+ completionTokens: usage.output_tokens || 0,
1116
+ totalTokens: (usage.input_tokens || 0) + (usage.output_tokens || 0),
1117
+ cache_read_input_tokens: cacheRead,
1118
+ cache_creation_input_tokens: cacheCreation,
886
1119
  },
887
1120
  },
888
1121
  };
@@ -916,11 +1149,25 @@ export class AnthropicProvider extends BaseProvider {
916
1149
  };
917
1150
  // Add usage metadata if present
918
1151
  if (message.usage) {
1152
+ const usage = message.usage;
1153
+ const cacheRead = usage.cache_read_input_tokens ?? 0;
1154
+ const cacheCreation = usage.cache_creation_input_tokens ?? 0;
1155
+ cacheLogger.debug(() => `[AnthropicProvider non-streaming] Setting usage metadata: cacheRead=${cacheRead}, cacheCreation=${cacheCreation}, raw values: cache_read_input_tokens=${usage.cache_read_input_tokens}, cache_creation_input_tokens=${usage.cache_creation_input_tokens}`);
1156
+ if (cacheRead > 0 || cacheCreation > 0) {
1157
+ cacheLogger.debug(() => {
1158
+ const hitRate = cacheRead + usage.input_tokens > 0
1159
+ ? (cacheRead / (cacheRead + usage.input_tokens)) * 100
1160
+ : 0;
1161
+ return `Cache metrics: read=${cacheRead}, creation=${cacheCreation}, hit_rate=${hitRate.toFixed(1)}%`;
1162
+ });
1163
+ }
919
1164
  result.metadata = {
920
1165
  usage: {
921
- promptTokens: message.usage.input_tokens,
922
- completionTokens: message.usage.output_tokens,
923
- totalTokens: message.usage.input_tokens + message.usage.output_tokens,
1166
+ promptTokens: usage.input_tokens,
1167
+ completionTokens: usage.output_tokens,
1168
+ totalTokens: usage.input_tokens + usage.output_tokens,
1169
+ cache_read_input_tokens: cacheRead,
1170
+ cache_creation_input_tokens: cacheCreation,
924
1171
  },
925
1172
  };
926
1173
  }
@@ -934,6 +1181,15 @@ export class AnthropicProvider extends BaseProvider {
934
1181
  return { maxAttempts, initialDelayMs };
935
1182
  }
936
1183
  shouldRetryAnthropicResponse(error) {
1184
+ // Check for Anthropic-specific error types (overloaded_error)
1185
+ if (error && typeof error === 'object') {
1186
+ const errorObj = error;
1187
+ const errorType = errorObj.error?.type || errorObj.type;
1188
+ if (errorType === 'overloaded_error') {
1189
+ this.getLogger().debug(() => 'Will retry Anthropic request due to overloaded_error');
1190
+ return true;
1191
+ }
1192
+ }
937
1193
  const status = getErrorStatus(error);
938
1194
  if (status === 429 || (status && status >= 500 && status < 600)) {
939
1195
  this.getLogger().debug(() => `Will retry Anthropic request due to status ${status}`);
@@ -945,5 +1201,187 @@ export class AnthropicProvider extends BaseProvider {
945
1201
  }
946
1202
  return false;
947
1203
  }
1204
+ /**
1205
+ * Extract rate limit information from response headers
1206
+ */
1207
+ extractRateLimitHeaders(headers) {
1208
+ const rateLimitLogger = this.getRateLimitLogger();
1209
+ const info = {};
1210
+ // Extract requests rate limit info
1211
+ const requestsLimit = headers.get('anthropic-ratelimit-requests-limit');
1212
+ const requestsRemaining = headers.get('anthropic-ratelimit-requests-remaining');
1213
+ const requestsReset = headers.get('anthropic-ratelimit-requests-reset');
1214
+ if (requestsLimit) {
1215
+ info.requestsLimit = parseInt(requestsLimit, 10);
1216
+ }
1217
+ if (requestsRemaining) {
1218
+ info.requestsRemaining = parseInt(requestsRemaining, 10);
1219
+ }
1220
+ if (requestsReset) {
1221
+ try {
1222
+ const date = new Date(requestsReset);
1223
+ // Only set if the date is valid
1224
+ if (!isNaN(date.getTime())) {
1225
+ info.requestsReset = date;
1226
+ }
1227
+ }
1228
+ catch (_error) {
1229
+ rateLimitLogger.debug(() => `Failed to parse requests reset date: ${requestsReset}`);
1230
+ }
1231
+ }
1232
+ // Extract tokens rate limit info
1233
+ const tokensLimit = headers.get('anthropic-ratelimit-tokens-limit');
1234
+ const tokensRemaining = headers.get('anthropic-ratelimit-tokens-remaining');
1235
+ const tokensReset = headers.get('anthropic-ratelimit-tokens-reset');
1236
+ if (tokensLimit) {
1237
+ info.tokensLimit = parseInt(tokensLimit, 10);
1238
+ }
1239
+ if (tokensRemaining) {
1240
+ info.tokensRemaining = parseInt(tokensRemaining, 10);
1241
+ }
1242
+ if (tokensReset) {
1243
+ try {
1244
+ const date = new Date(tokensReset);
1245
+ // Only set if the date is valid
1246
+ if (!isNaN(date.getTime())) {
1247
+ info.tokensReset = date;
1248
+ }
1249
+ }
1250
+ catch (_error) {
1251
+ rateLimitLogger.debug(() => `Failed to parse tokens reset date: ${tokensReset}`);
1252
+ }
1253
+ }
1254
+ // Extract input tokens rate limit info
1255
+ const inputTokensLimit = headers.get('anthropic-ratelimit-input-tokens-limit');
1256
+ const inputTokensRemaining = headers.get('anthropic-ratelimit-input-tokens-remaining');
1257
+ if (inputTokensLimit) {
1258
+ info.inputTokensLimit = parseInt(inputTokensLimit, 10);
1259
+ }
1260
+ if (inputTokensRemaining) {
1261
+ info.inputTokensRemaining = parseInt(inputTokensRemaining, 10);
1262
+ }
1263
+ return info;
1264
+ }
1265
+ /**
1266
+ * Check rate limits and log warnings if approaching limits
1267
+ */
1268
+ checkRateLimits(info) {
1269
+ const rateLimitLogger = this.getRateLimitLogger();
1270
+ // Check requests rate limit (warn at 10% remaining)
1271
+ if (info.requestsLimit !== undefined &&
1272
+ info.requestsRemaining !== undefined) {
1273
+ const percentage = (info.requestsRemaining / info.requestsLimit) * 100;
1274
+ if (percentage < 10) {
1275
+ const resetTime = info.requestsReset
1276
+ ? ` (resets at ${info.requestsReset.toISOString()})`
1277
+ : '';
1278
+ rateLimitLogger.debug(() => `WARNING: Approaching requests rate limit - ${info.requestsRemaining}/${info.requestsLimit} remaining (${percentage.toFixed(1)}%)${resetTime}`);
1279
+ }
1280
+ }
1281
+ // Check tokens rate limit (warn at 10% remaining)
1282
+ if (info.tokensLimit !== undefined && info.tokensRemaining !== undefined) {
1283
+ const percentage = (info.tokensRemaining / info.tokensLimit) * 100;
1284
+ if (percentage < 10) {
1285
+ const resetTime = info.tokensReset
1286
+ ? ` (resets at ${info.tokensReset.toISOString()})`
1287
+ : '';
1288
+ rateLimitLogger.debug(() => `WARNING: Approaching tokens rate limit - ${info.tokensRemaining}/${info.tokensLimit} remaining (${percentage.toFixed(1)}%)${resetTime}`);
1289
+ }
1290
+ }
1291
+ // Check input tokens rate limit (warn at 10% remaining)
1292
+ if (info.inputTokensLimit !== undefined &&
1293
+ info.inputTokensRemaining !== undefined) {
1294
+ const percentage = (info.inputTokensRemaining / info.inputTokensLimit) * 100;
1295
+ if (percentage < 10) {
1296
+ rateLimitLogger.debug(() => `WARNING: Approaching input tokens rate limit - ${info.inputTokensRemaining}/${info.inputTokensLimit} remaining (${percentage.toFixed(1)}%)`);
1297
+ }
1298
+ }
1299
+ }
1300
+ /**
1301
+ * Get current rate limit information
1302
+ * Returns the last known rate limit state from the most recent API call
1303
+ */
1304
+ getRateLimitInfo() {
1305
+ return this.lastRateLimitInfo;
1306
+ }
1307
+ /**
1308
+ * Wait for rate limit reset if needed based on current rate limit state
1309
+ * This proactively throttles requests before they're made to prevent hitting rate limits
1310
+ * @private
1311
+ */
1312
+ async waitForRateLimitIfNeeded(ephemeralSettings) {
1313
+ const rateLimitLogger = this.getRateLimitLogger();
1314
+ const info = this.lastRateLimitInfo;
1315
+ // No rate limit data yet - skip throttling
1316
+ if (!info) {
1317
+ return;
1318
+ }
1319
+ // Check if throttling is enabled (default: on)
1320
+ const throttleEnabled = ephemeralSettings['rate-limit-throttle'] ?? 'on';
1321
+ if (throttleEnabled === 'off') {
1322
+ return;
1323
+ }
1324
+ // Get threshold percentage (default: 5%)
1325
+ const thresholdPercentage = ephemeralSettings['rate-limit-throttle-threshold'] ?? 5;
1326
+ // Get max wait time (default: 60 seconds)
1327
+ const maxWaitMs = ephemeralSettings['rate-limit-max-wait'] ?? 60000;
1328
+ const now = Date.now();
1329
+ // Check requests remaining
1330
+ if (info.requestsRemaining !== undefined &&
1331
+ info.requestsLimit !== undefined &&
1332
+ info.requestsReset) {
1333
+ const percentage = (info.requestsRemaining / info.requestsLimit) * 100;
1334
+ if (percentage < thresholdPercentage) {
1335
+ const resetTime = info.requestsReset.getTime();
1336
+ const waitMs = resetTime - now;
1337
+ // Only wait if reset time is in the future
1338
+ if (waitMs > 0) {
1339
+ const actualWaitMs = Math.min(waitMs, maxWaitMs);
1340
+ rateLimitLogger.debug(() => `Rate limit throttle: requests at ${percentage.toFixed(1)}% (${info.requestsRemaining}/${info.requestsLimit}), waiting ${actualWaitMs}ms until reset`);
1341
+ if (waitMs > maxWaitMs) {
1342
+ rateLimitLogger.debug(() => `Rate limit reset in ${waitMs}ms exceeds max wait of ${maxWaitMs}ms, capping wait time`);
1343
+ }
1344
+ await this.sleep(actualWaitMs);
1345
+ return;
1346
+ }
1347
+ }
1348
+ }
1349
+ // Check tokens remaining
1350
+ if (info.tokensRemaining !== undefined &&
1351
+ info.tokensLimit !== undefined &&
1352
+ info.tokensReset) {
1353
+ const percentage = (info.tokensRemaining / info.tokensLimit) * 100;
1354
+ if (percentage < thresholdPercentage) {
1355
+ const resetTime = info.tokensReset.getTime();
1356
+ const waitMs = resetTime - now;
1357
+ // Only wait if reset time is in the future
1358
+ if (waitMs > 0) {
1359
+ const actualWaitMs = Math.min(waitMs, maxWaitMs);
1360
+ rateLimitLogger.debug(() => `Rate limit throttle: tokens at ${percentage.toFixed(1)}% (${info.tokensRemaining}/${info.tokensLimit}), waiting ${actualWaitMs}ms until reset`);
1361
+ if (waitMs > maxWaitMs) {
1362
+ rateLimitLogger.debug(() => `Rate limit reset in ${waitMs}ms exceeds max wait of ${maxWaitMs}ms, capping wait time`);
1363
+ }
1364
+ await this.sleep(actualWaitMs);
1365
+ return;
1366
+ }
1367
+ }
1368
+ }
1369
+ // Check input tokens remaining
1370
+ if (info.inputTokensRemaining !== undefined &&
1371
+ info.inputTokensLimit !== undefined) {
1372
+ const percentage = (info.inputTokensRemaining / info.inputTokensLimit) * 100;
1373
+ if (percentage < thresholdPercentage) {
1374
+ // For input tokens, we don't have a reset time, so we can only log a warning
1375
+ rateLimitLogger.debug(() => `Rate limit warning: input tokens at ${percentage.toFixed(1)}% (${info.inputTokensRemaining}/${info.inputTokensLimit}), no reset time available`);
1376
+ }
1377
+ }
1378
+ }
1379
+ /**
1380
+ * Sleep for the specified number of milliseconds
1381
+ * @private
1382
+ */
1383
+ sleep(ms) {
1384
+ return new Promise((resolve) => setTimeout(resolve, ms));
1385
+ }
948
1386
  }
949
1387
  //# sourceMappingURL=AnthropicProvider.js.map