klaude-code 1.2.6__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. klaude_code/auth/__init__.py +24 -0
  2. klaude_code/auth/codex/__init__.py +20 -0
  3. klaude_code/auth/codex/exceptions.py +17 -0
  4. klaude_code/auth/codex/jwt_utils.py +45 -0
  5. klaude_code/auth/codex/oauth.py +229 -0
  6. klaude_code/auth/codex/token_manager.py +84 -0
  7. klaude_code/cli/auth_cmd.py +73 -0
  8. klaude_code/cli/config_cmd.py +91 -0
  9. klaude_code/cli/cost_cmd.py +338 -0
  10. klaude_code/cli/debug.py +78 -0
  11. klaude_code/cli/list_model.py +307 -0
  12. klaude_code/cli/main.py +233 -134
  13. klaude_code/cli/runtime.py +309 -117
  14. klaude_code/{version.py → cli/self_update.py} +114 -5
  15. klaude_code/cli/session_cmd.py +37 -21
  16. klaude_code/command/__init__.py +88 -27
  17. klaude_code/command/clear_cmd.py +8 -7
  18. klaude_code/command/command_abc.py +31 -31
  19. klaude_code/command/debug_cmd.py +79 -0
  20. klaude_code/command/export_cmd.py +19 -53
  21. klaude_code/command/export_online_cmd.py +154 -0
  22. klaude_code/command/fork_session_cmd.py +267 -0
  23. klaude_code/command/help_cmd.py +7 -8
  24. klaude_code/command/model_cmd.py +60 -10
  25. klaude_code/command/model_select.py +84 -0
  26. klaude_code/command/prompt-jj-describe.md +32 -0
  27. klaude_code/command/prompt_command.py +19 -11
  28. klaude_code/command/refresh_cmd.py +8 -10
  29. klaude_code/command/registry.py +139 -40
  30. klaude_code/command/release_notes_cmd.py +84 -0
  31. klaude_code/command/resume_cmd.py +111 -0
  32. klaude_code/command/status_cmd.py +104 -60
  33. klaude_code/command/terminal_setup_cmd.py +7 -9
  34. klaude_code/command/thinking_cmd.py +98 -0
  35. klaude_code/config/__init__.py +14 -6
  36. klaude_code/config/assets/__init__.py +1 -0
  37. klaude_code/config/assets/builtin_config.yaml +303 -0
  38. klaude_code/config/builtin_config.py +38 -0
  39. klaude_code/config/config.py +378 -109
  40. klaude_code/config/select_model.py +117 -53
  41. klaude_code/config/thinking.py +269 -0
  42. klaude_code/{const/__init__.py → const.py} +50 -19
  43. klaude_code/core/agent.py +20 -28
  44. klaude_code/core/executor.py +327 -112
  45. klaude_code/core/manager/__init__.py +2 -4
  46. klaude_code/core/manager/llm_clients.py +1 -15
  47. klaude_code/core/manager/llm_clients_builder.py +10 -11
  48. klaude_code/core/manager/sub_agent_manager.py +37 -6
  49. klaude_code/core/prompt.py +63 -44
  50. klaude_code/core/prompts/prompt-claude-code.md +2 -13
  51. klaude_code/core/prompts/prompt-codex-gpt-5-1-codex-max.md +117 -0
  52. klaude_code/core/prompts/prompt-codex-gpt-5-2-codex.md +117 -0
  53. klaude_code/core/prompts/prompt-codex.md +9 -42
  54. klaude_code/core/prompts/prompt-minimal.md +12 -0
  55. klaude_code/core/prompts/{prompt-subagent-explore.md → prompt-sub-agent-explore.md} +16 -3
  56. klaude_code/core/prompts/{prompt-subagent-oracle.md → prompt-sub-agent-oracle.md} +1 -2
  57. klaude_code/core/prompts/prompt-sub-agent-web.md +51 -0
  58. klaude_code/core/reminders.py +283 -95
  59. klaude_code/core/task.py +113 -75
  60. klaude_code/core/tool/__init__.py +24 -31
  61. klaude_code/core/tool/file/_utils.py +36 -0
  62. klaude_code/core/tool/file/apply_patch.py +17 -25
  63. klaude_code/core/tool/file/apply_patch_tool.py +57 -77
  64. klaude_code/core/tool/file/diff_builder.py +151 -0
  65. klaude_code/core/tool/file/edit_tool.py +50 -63
  66. klaude_code/core/tool/file/move_tool.md +41 -0
  67. klaude_code/core/tool/file/move_tool.py +435 -0
  68. klaude_code/core/tool/file/read_tool.md +1 -1
  69. klaude_code/core/tool/file/read_tool.py +86 -86
  70. klaude_code/core/tool/file/write_tool.py +59 -69
  71. klaude_code/core/tool/report_back_tool.py +84 -0
  72. klaude_code/core/tool/shell/bash_tool.py +265 -22
  73. klaude_code/core/tool/shell/command_safety.py +3 -6
  74. klaude_code/core/tool/{memory → skill}/skill_tool.py +16 -26
  75. klaude_code/core/tool/sub_agent_tool.py +13 -2
  76. klaude_code/core/tool/todo/todo_write_tool.md +0 -157
  77. klaude_code/core/tool/todo/todo_write_tool.py +1 -1
  78. klaude_code/core/tool/todo/todo_write_tool_raw.md +182 -0
  79. klaude_code/core/tool/todo/update_plan_tool.py +1 -1
  80. klaude_code/core/tool/tool_abc.py +18 -0
  81. klaude_code/core/tool/tool_context.py +27 -12
  82. klaude_code/core/tool/tool_registry.py +7 -7
  83. klaude_code/core/tool/tool_runner.py +44 -36
  84. klaude_code/core/tool/truncation.py +29 -14
  85. klaude_code/core/tool/web/mermaid_tool.md +43 -0
  86. klaude_code/core/tool/web/mermaid_tool.py +2 -5
  87. klaude_code/core/tool/web/web_fetch_tool.md +1 -1
  88. klaude_code/core/tool/web/web_fetch_tool.py +112 -22
  89. klaude_code/core/tool/web/web_search_tool.md +23 -0
  90. klaude_code/core/tool/web/web_search_tool.py +130 -0
  91. klaude_code/core/turn.py +168 -66
  92. klaude_code/llm/__init__.py +2 -10
  93. klaude_code/llm/anthropic/client.py +190 -178
  94. klaude_code/llm/anthropic/input.py +39 -15
  95. klaude_code/llm/bedrock/__init__.py +3 -0
  96. klaude_code/llm/bedrock/client.py +60 -0
  97. klaude_code/llm/client.py +7 -21
  98. klaude_code/llm/codex/__init__.py +5 -0
  99. klaude_code/llm/codex/client.py +149 -0
  100. klaude_code/llm/google/__init__.py +3 -0
  101. klaude_code/llm/google/client.py +309 -0
  102. klaude_code/llm/google/input.py +215 -0
  103. klaude_code/llm/input_common.py +3 -9
  104. klaude_code/llm/openai_compatible/client.py +72 -164
  105. klaude_code/llm/openai_compatible/input.py +6 -4
  106. klaude_code/llm/openai_compatible/stream.py +273 -0
  107. klaude_code/llm/openai_compatible/tool_call_accumulator.py +17 -1
  108. klaude_code/llm/openrouter/client.py +89 -160
  109. klaude_code/llm/openrouter/input.py +18 -30
  110. klaude_code/llm/openrouter/reasoning.py +118 -0
  111. klaude_code/llm/registry.py +39 -7
  112. klaude_code/llm/responses/client.py +184 -171
  113. klaude_code/llm/responses/input.py +20 -1
  114. klaude_code/llm/usage.py +17 -12
  115. klaude_code/protocol/commands.py +17 -1
  116. klaude_code/protocol/events.py +31 -4
  117. klaude_code/protocol/llm_param.py +13 -10
  118. klaude_code/protocol/model.py +232 -29
  119. klaude_code/protocol/op.py +90 -1
  120. klaude_code/protocol/op_handler.py +35 -1
  121. klaude_code/protocol/sub_agent/__init__.py +117 -0
  122. klaude_code/protocol/sub_agent/explore.py +63 -0
  123. klaude_code/protocol/sub_agent/oracle.py +91 -0
  124. klaude_code/protocol/sub_agent/task.py +61 -0
  125. klaude_code/protocol/sub_agent/web.py +79 -0
  126. klaude_code/protocol/tools.py +4 -2
  127. klaude_code/session/__init__.py +2 -2
  128. klaude_code/session/codec.py +71 -0
  129. klaude_code/session/export.py +293 -86
  130. klaude_code/session/selector.py +89 -67
  131. klaude_code/session/session.py +320 -309
  132. klaude_code/session/store.py +220 -0
  133. klaude_code/session/templates/export_session.html +595 -83
  134. klaude_code/session/templates/mermaid_viewer.html +926 -0
  135. klaude_code/skill/__init__.py +27 -0
  136. klaude_code/skill/assets/deslop/SKILL.md +17 -0
  137. klaude_code/skill/assets/dev-docs/SKILL.md +108 -0
  138. klaude_code/skill/assets/handoff/SKILL.md +39 -0
  139. klaude_code/skill/assets/jj-workspace/SKILL.md +20 -0
  140. klaude_code/skill/assets/skill-creator/SKILL.md +139 -0
  141. klaude_code/{core/tool/memory/skill_loader.py → skill/loader.py} +55 -15
  142. klaude_code/skill/manager.py +70 -0
  143. klaude_code/skill/system_skills.py +192 -0
  144. klaude_code/trace/__init__.py +20 -2
  145. klaude_code/trace/log.py +150 -5
  146. klaude_code/ui/__init__.py +4 -9
  147. klaude_code/ui/core/input.py +1 -1
  148. klaude_code/ui/core/stage_manager.py +7 -7
  149. klaude_code/ui/modes/debug/display.py +2 -1
  150. klaude_code/ui/modes/repl/__init__.py +3 -48
  151. klaude_code/ui/modes/repl/clipboard.py +5 -5
  152. klaude_code/ui/modes/repl/completers.py +487 -123
  153. klaude_code/ui/modes/repl/display.py +5 -4
  154. klaude_code/ui/modes/repl/event_handler.py +370 -117
  155. klaude_code/ui/modes/repl/input_prompt_toolkit.py +552 -105
  156. klaude_code/ui/modes/repl/key_bindings.py +146 -23
  157. klaude_code/ui/modes/repl/renderer.py +189 -99
  158. klaude_code/ui/renderers/assistant.py +9 -2
  159. klaude_code/ui/renderers/bash_syntax.py +178 -0
  160. klaude_code/ui/renderers/common.py +78 -0
  161. klaude_code/ui/renderers/developer.py +104 -48
  162. klaude_code/ui/renderers/diffs.py +87 -6
  163. klaude_code/ui/renderers/errors.py +11 -6
  164. klaude_code/ui/renderers/mermaid_viewer.py +57 -0
  165. klaude_code/ui/renderers/metadata.py +112 -76
  166. klaude_code/ui/renderers/sub_agent.py +92 -7
  167. klaude_code/ui/renderers/thinking.py +40 -18
  168. klaude_code/ui/renderers/tools.py +405 -227
  169. klaude_code/ui/renderers/user_input.py +73 -13
  170. klaude_code/ui/rich/__init__.py +10 -1
  171. klaude_code/ui/rich/cjk_wrap.py +228 -0
  172. klaude_code/ui/rich/code_panel.py +131 -0
  173. klaude_code/ui/rich/live.py +17 -0
  174. klaude_code/ui/rich/markdown.py +305 -170
  175. klaude_code/ui/rich/searchable_text.py +10 -13
  176. klaude_code/ui/rich/status.py +190 -49
  177. klaude_code/ui/rich/theme.py +135 -39
  178. klaude_code/ui/terminal/__init__.py +55 -0
  179. klaude_code/ui/terminal/color.py +1 -1
  180. klaude_code/ui/terminal/control.py +13 -22
  181. klaude_code/ui/terminal/notifier.py +44 -4
  182. klaude_code/ui/terminal/selector.py +658 -0
  183. klaude_code/ui/utils/common.py +0 -18
  184. klaude_code-1.8.0.dist-info/METADATA +377 -0
  185. klaude_code-1.8.0.dist-info/RECORD +219 -0
  186. {klaude_code-1.2.6.dist-info → klaude_code-1.8.0.dist-info}/entry_points.txt +1 -0
  187. klaude_code/command/diff_cmd.py +0 -138
  188. klaude_code/command/prompt-dev-docs-update.md +0 -56
  189. klaude_code/command/prompt-dev-docs.md +0 -46
  190. klaude_code/config/list_model.py +0 -162
  191. klaude_code/core/manager/agent_manager.py +0 -127
  192. klaude_code/core/prompts/prompt-subagent-webfetch.md +0 -46
  193. klaude_code/core/tool/file/multi_edit_tool.md +0 -42
  194. klaude_code/core/tool/file/multi_edit_tool.py +0 -199
  195. klaude_code/core/tool/memory/memory_tool.md +0 -16
  196. klaude_code/core/tool/memory/memory_tool.py +0 -462
  197. klaude_code/llm/openrouter/reasoning_handler.py +0 -209
  198. klaude_code/protocol/sub_agent.py +0 -348
  199. klaude_code/ui/utils/debouncer.py +0 -42
  200. klaude_code-1.2.6.dist-info/METADATA +0 -178
  201. klaude_code-1.2.6.dist-info/RECORD +0 -167
  202. /klaude_code/core/prompts/{prompt-subagent.md → prompt-sub-agent.md} +0 -0
  203. /klaude_code/core/tool/{memory → skill}/__init__.py +0 -0
  204. /klaude_code/core/tool/{memory → skill}/skill_tool.md +0 -0
  205. {klaude_code-1.2.6.dist-info → klaude_code-1.8.0.dist-info}/WHEEL +0 -0
@@ -1,11 +1,11 @@
1
1
  import json
2
- import time
2
+ import os
3
3
  from collections.abc import AsyncGenerator
4
- from typing import override
4
+ from typing import Any, override
5
5
 
6
6
  import anthropic
7
7
  import httpx
8
- from anthropic import RateLimitError
8
+ from anthropic import APIError
9
9
  from anthropic.types.beta.beta_input_json_delta import BetaInputJSONDelta
10
10
  from anthropic.types.beta.beta_raw_content_block_delta_event import BetaRawContentBlockDeltaEvent
11
11
  from anthropic.types.beta.beta_raw_content_block_start_event import BetaRawContentBlockStartEvent
@@ -16,26 +16,190 @@ from anthropic.types.beta.beta_signature_delta import BetaSignatureDelta
16
16
  from anthropic.types.beta.beta_text_delta import BetaTextDelta
17
17
  from anthropic.types.beta.beta_thinking_delta import BetaThinkingDelta
18
18
  from anthropic.types.beta.beta_tool_use_block import BetaToolUseBlock
19
+ from anthropic.types.beta.message_create_params import MessageCreateParamsStreaming
19
20
 
20
21
  from klaude_code import const
21
22
  from klaude_code.llm.anthropic.input import convert_history_to_input, convert_system_to_input, convert_tool_schema
22
- from klaude_code.llm.client import LLMClientABC, call_with_logged_payload
23
+ from klaude_code.llm.client import LLMClientABC
23
24
  from klaude_code.llm.input_common import apply_config_defaults
24
25
  from klaude_code.llm.registry import register
25
- from klaude_code.llm.usage import calculate_cost
26
+ from klaude_code.llm.usage import MetadataTracker
26
27
  from klaude_code.protocol import llm_param, model
27
28
  from klaude_code.trace import DebugType, log_debug
28
29
 
29
30
 
31
+ def build_payload(param: llm_param.LLMCallParameter) -> MessageCreateParamsStreaming:
32
+ """Build Anthropic API request parameters."""
33
+ messages = convert_history_to_input(param.input, param.model)
34
+ tools = convert_tool_schema(param.tools)
35
+ system = convert_system_to_input(param.system)
36
+
37
+ payload: MessageCreateParamsStreaming = {
38
+ "model": str(param.model),
39
+ "tool_choice": {
40
+ "type": "auto",
41
+ "disable_parallel_tool_use": False,
42
+ },
43
+ "stream": True,
44
+ "max_tokens": param.max_tokens or const.DEFAULT_MAX_TOKENS,
45
+ "temperature": param.temperature or const.DEFAULT_TEMPERATURE,
46
+ "messages": messages,
47
+ "system": system,
48
+ "tools": tools,
49
+ "betas": ["interleaved-thinking-2025-05-14", "context-1m-2025-08-07"],
50
+ }
51
+
52
+ if param.thinking and param.thinking.type == "enabled":
53
+ payload["thinking"] = anthropic.types.ThinkingConfigEnabledParam(
54
+ type="enabled",
55
+ budget_tokens=param.thinking.budget_tokens or const.DEFAULT_ANTHROPIC_THINKING_BUDGET_TOKENS,
56
+ )
57
+
58
+ return payload
59
+
60
+
61
+ async def parse_anthropic_stream(
62
+ stream: Any,
63
+ param: llm_param.LLMCallParameter,
64
+ metadata_tracker: MetadataTracker,
65
+ ) -> AsyncGenerator[model.ConversationItem]:
66
+ """Parse Anthropic beta messages stream and yield conversation items.
67
+
68
+ This function is shared between AnthropicClient and BedrockClient.
69
+ """
70
+ accumulated_thinking: list[str] = []
71
+ accumulated_content: list[str] = []
72
+ response_id: str | None = None
73
+
74
+ current_tool_name: str | None = None
75
+ current_tool_call_id: str | None = None
76
+ current_tool_inputs: list[str] | None = None
77
+
78
+ input_token = 0
79
+ cached_token = 0
80
+
81
+ async for event in await stream:
82
+ log_debug(
83
+ f"[{event.type}]",
84
+ event.model_dump_json(exclude_none=True),
85
+ style="blue",
86
+ debug_type=DebugType.LLM_STREAM,
87
+ )
88
+ match event:
89
+ case BetaRawMessageStartEvent() as event:
90
+ response_id = event.message.id
91
+ cached_token = event.message.usage.cache_read_input_tokens or 0
92
+ input_token = event.message.usage.input_tokens
93
+ yield model.StartItem(response_id=response_id)
94
+ case BetaRawContentBlockDeltaEvent() as event:
95
+ match event.delta:
96
+ case BetaThinkingDelta() as delta:
97
+ if delta.thinking:
98
+ metadata_tracker.record_token()
99
+ accumulated_thinking.append(delta.thinking)
100
+ yield model.ReasoningTextDelta(
101
+ content=delta.thinking,
102
+ response_id=response_id,
103
+ )
104
+ case BetaSignatureDelta() as delta:
105
+ yield model.ReasoningEncryptedItem(
106
+ encrypted_content=delta.signature,
107
+ response_id=response_id,
108
+ model=str(param.model),
109
+ )
110
+ case BetaTextDelta() as delta:
111
+ if delta.text:
112
+ metadata_tracker.record_token()
113
+ accumulated_content.append(delta.text)
114
+ yield model.AssistantMessageDelta(
115
+ content=delta.text,
116
+ response_id=response_id,
117
+ )
118
+ case BetaInputJSONDelta() as delta:
119
+ if current_tool_inputs is not None:
120
+ if delta.partial_json:
121
+ metadata_tracker.record_token()
122
+ current_tool_inputs.append(delta.partial_json)
123
+ case _:
124
+ pass
125
+ case BetaRawContentBlockStartEvent() as event:
126
+ match event.content_block:
127
+ case BetaToolUseBlock() as block:
128
+ metadata_tracker.record_token()
129
+ yield model.ToolCallStartItem(
130
+ response_id=response_id,
131
+ call_id=block.id,
132
+ name=block.name,
133
+ )
134
+ current_tool_name = block.name
135
+ current_tool_call_id = block.id
136
+ current_tool_inputs = []
137
+ case _:
138
+ pass
139
+ case BetaRawContentBlockStopEvent():
140
+ if len(accumulated_thinking) > 0:
141
+ metadata_tracker.record_token()
142
+ full_thinking = "".join(accumulated_thinking)
143
+ yield model.ReasoningTextItem(
144
+ content=full_thinking,
145
+ response_id=response_id,
146
+ model=str(param.model),
147
+ )
148
+ accumulated_thinking.clear()
149
+ if len(accumulated_content) > 0:
150
+ metadata_tracker.record_token()
151
+ yield model.AssistantMessageItem(
152
+ content="".join(accumulated_content),
153
+ response_id=response_id,
154
+ )
155
+ accumulated_content.clear()
156
+ if current_tool_name and current_tool_call_id:
157
+ metadata_tracker.record_token()
158
+ yield model.ToolCallItem(
159
+ name=current_tool_name,
160
+ call_id=current_tool_call_id,
161
+ arguments="".join(current_tool_inputs) if current_tool_inputs else "",
162
+ response_id=response_id,
163
+ )
164
+ current_tool_name = None
165
+ current_tool_call_id = None
166
+ current_tool_inputs = None
167
+ case BetaRawMessageDeltaEvent() as event:
168
+ metadata_tracker.set_usage(
169
+ model.Usage(
170
+ input_tokens=input_token + cached_token,
171
+ output_tokens=event.usage.output_tokens,
172
+ cached_tokens=cached_token,
173
+ context_size=input_token + cached_token + event.usage.output_tokens,
174
+ context_limit=param.context_limit,
175
+ max_tokens=param.max_tokens,
176
+ )
177
+ )
178
+ metadata_tracker.set_model_name(str(param.model))
179
+ metadata_tracker.set_response_id(response_id)
180
+ yield metadata_tracker.finalize()
181
+ case _:
182
+ pass
183
+
184
+
30
185
  @register(llm_param.LLMClientProtocol.ANTHROPIC)
31
186
  class AnthropicClient(LLMClientABC):
32
187
  def __init__(self, config: llm_param.LLMConfigParameter):
33
188
  super().__init__(config)
34
- client = anthropic.AsyncAnthropic(
35
- api_key=config.api_key,
36
- base_url=config.base_url,
37
- timeout=httpx.Timeout(300.0, connect=15.0, read=285.0),
38
- )
189
+ # Remove ANTHROPIC_AUTH_TOKEN env var to prevent anthropic SDK from adding
190
+ # Authorization: Bearer header that may conflict with third-party APIs
191
+ # (e.g., deepseek, moonshot) that use Authorization header for authentication.
192
+ # The API key will be sent via X-Api-Key header instead.
193
+ saved_auth_token = os.environ.pop("ANTHROPIC_AUTH_TOKEN", None)
194
+ try:
195
+ client = anthropic.AsyncAnthropic(
196
+ api_key=config.api_key,
197
+ base_url=config.base_url,
198
+ timeout=httpx.Timeout(300.0, connect=15.0, read=285.0),
199
+ )
200
+ finally:
201
+ if saved_auth_token is not None:
202
+ os.environ["ANTHROPIC_AUTH_TOKEN"] = saved_auth_token
39
203
  self.client: anthropic.AsyncAnthropic = client
40
204
 
41
205
  @classmethod
@@ -44,178 +208,26 @@ class AnthropicClient(LLMClientABC):
44
208
  return cls(config)
45
209
 
46
210
  @override
47
- async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
211
+ async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
48
212
  param = apply_config_defaults(param, self.get_llm_config())
49
213
 
50
- request_start_time = time.time()
51
- first_token_time: float | None = None
52
- last_token_time: float | None = None
53
-
54
- messages = convert_history_to_input(param.input, param.model)
55
- tools = convert_tool_schema(param.tools)
56
- system = convert_system_to_input(param.system)
57
-
58
- stream = call_with_logged_payload(
59
- self.client.beta.messages.create,
60
- model=str(param.model),
61
- tool_choice={
62
- "type": "auto",
63
- "disable_parallel_tool_use": False,
64
- },
65
- stream=True,
66
- max_tokens=param.max_tokens or const.DEFAULT_MAX_TOKENS,
67
- temperature=param.temperature or const.DEFAULT_TEMPERATURE,
68
- messages=messages,
69
- system=system,
70
- tools=tools,
71
- betas=["interleaved-thinking-2025-05-14", "context-1m-2025-08-07"],
72
- thinking=anthropic.types.ThinkingConfigEnabledParam(
73
- type=param.thinking.type,
74
- budget_tokens=param.thinking.budget_tokens or const.DEFAULT_ANTHROPIC_THINKING_BUDGET_TOKENS,
75
- )
76
- if param.thinking and param.thinking.type == "enabled"
77
- else anthropic.types.ThinkingConfigDisabledParam(
78
- type="disabled",
79
- ),
80
- extra_headers={"extra": json.dumps({"session_id": param.session_id})},
81
- )
214
+ metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
82
215
 
83
- accumulated_thinking: list[str] = []
84
- accumulated_content: list[str] = []
85
- response_id: str | None = None
216
+ payload = build_payload(param)
86
217
 
87
- current_tool_name: str | None = None
88
- current_tool_call_id: str | None = None
89
- current_tool_inputs: list[str] | None = None
218
+ log_debug(
219
+ json.dumps(payload, ensure_ascii=False, default=str),
220
+ style="yellow",
221
+ debug_type=DebugType.LLM_PAYLOAD,
222
+ )
90
223
 
91
- input_tokens = 0
92
- cached_tokens = 0
93
- output_tokens = 0
224
+ stream = self.client.beta.messages.create(
225
+ **payload,
226
+ extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
227
+ )
94
228
 
95
229
  try:
96
- async for event in await stream:
97
- log_debug(
98
- f"[{event.type}]",
99
- event.model_dump_json(exclude_none=True),
100
- style="blue",
101
- debug_type=DebugType.LLM_STREAM,
102
- )
103
- match event:
104
- case BetaRawMessageStartEvent() as event:
105
- response_id = event.message.id
106
- cached_tokens = event.message.usage.cache_read_input_tokens or 0
107
- input_tokens = (event.message.usage.input_tokens or 0) + (
108
- event.message.usage.cache_creation_input_tokens or 0
109
- )
110
- output_tokens = event.message.usage.output_tokens or 0
111
- yield model.StartItem(response_id=response_id)
112
- case BetaRawContentBlockDeltaEvent() as event:
113
- match event.delta:
114
- case BetaThinkingDelta() as delta:
115
- if first_token_time is None:
116
- first_token_time = time.time()
117
- last_token_time = time.time()
118
- accumulated_thinking.append(delta.thinking)
119
- case BetaSignatureDelta() as delta:
120
- if first_token_time is None:
121
- first_token_time = time.time()
122
- last_token_time = time.time()
123
- yield model.ReasoningEncryptedItem(
124
- encrypted_content=delta.signature,
125
- response_id=response_id,
126
- model=str(param.model),
127
- )
128
- case BetaTextDelta() as delta:
129
- if first_token_time is None:
130
- first_token_time = time.time()
131
- last_token_time = time.time()
132
- accumulated_content.append(delta.text)
133
- yield model.AssistantMessageDelta(
134
- content=delta.text,
135
- response_id=response_id,
136
- )
137
- case BetaInputJSONDelta() as delta:
138
- if first_token_time is None:
139
- first_token_time = time.time()
140
- last_token_time = time.time()
141
- if current_tool_inputs is not None:
142
- current_tool_inputs.append(delta.partial_json)
143
- case _:
144
- pass
145
- case BetaRawContentBlockStartEvent() as event:
146
- match event.content_block:
147
- case BetaToolUseBlock() as block:
148
- yield model.ToolCallStartItem(
149
- response_id=response_id,
150
- call_id=block.id,
151
- name=block.name,
152
- )
153
- current_tool_name = block.name
154
- current_tool_call_id = block.id
155
- current_tool_inputs = []
156
- case _:
157
- pass
158
- case BetaRawContentBlockStopEvent() as event:
159
- if len(accumulated_thinking) > 0:
160
- full_thinking = "".join(accumulated_thinking)
161
- yield model.ReasoningTextItem(
162
- content=full_thinking,
163
- response_id=response_id,
164
- model=str(param.model),
165
- )
166
- accumulated_thinking.clear()
167
- if len(accumulated_content) > 0:
168
- yield model.AssistantMessageItem(
169
- content="".join(accumulated_content),
170
- response_id=response_id,
171
- )
172
- accumulated_content.clear()
173
- if current_tool_name and current_tool_call_id:
174
- yield model.ToolCallItem(
175
- name=current_tool_name,
176
- call_id=current_tool_call_id,
177
- arguments="".join(current_tool_inputs) if current_tool_inputs else "",
178
- response_id=response_id,
179
- )
180
- current_tool_name = None
181
- current_tool_call_id = None
182
- current_tool_inputs = None
183
- case BetaRawMessageDeltaEvent() as event:
184
- input_tokens += (event.usage.input_tokens or 0) + (event.usage.cache_creation_input_tokens or 0)
185
- output_tokens += event.usage.output_tokens or 0
186
- cached_tokens += event.usage.cache_read_input_tokens or 0
187
- total_tokens = input_tokens + cached_tokens + output_tokens
188
- context_usage_percent = (
189
- (total_tokens / param.context_limit) * 100 if param.context_limit else None
190
- )
191
-
192
- throughput_tps: float | None = None
193
- first_token_latency_ms: float | None = None
194
-
195
- if first_token_time is not None:
196
- first_token_latency_ms = (first_token_time - request_start_time) * 1000
197
-
198
- if first_token_time is not None and last_token_time is not None and output_tokens > 0:
199
- time_duration = last_token_time - first_token_time
200
- if time_duration >= 0.15:
201
- throughput_tps = output_tokens / time_duration
202
-
203
- usage = model.Usage(
204
- input_tokens=input_tokens,
205
- output_tokens=output_tokens,
206
- cached_tokens=cached_tokens,
207
- total_tokens=total_tokens,
208
- context_usage_percent=context_usage_percent,
209
- throughput_tps=throughput_tps,
210
- first_token_latency_ms=first_token_latency_ms,
211
- )
212
- calculate_cost(usage, self._config.cost)
213
- yield model.ResponseMetadataItem(
214
- usage=usage,
215
- response_id=response_id,
216
- model_name=str(param.model),
217
- )
218
- case _:
219
- pass
220
- except RateLimitError as e:
221
- yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
230
+ async for item in parse_anthropic_stream(stream, param, metadata_tracker):
231
+ yield item
232
+ except (APIError, httpx.HTTPError) as e:
233
+ yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
@@ -73,7 +73,8 @@ def _user_group_to_message(group: UserGroup) -> BetaMessageParam:
73
73
  return {"role": "user", "content": blocks}
74
74
 
75
75
 
76
- def _tool_group_to_message(group: ToolGroup) -> BetaMessageParam:
76
+ def _tool_group_to_block(group: ToolGroup) -> dict[str, object]:
77
+ """Convert a single ToolGroup to a tool_result block."""
77
78
  tool_content: list[BetaTextBlockParam | BetaImageBlockParam] = []
78
79
  merged_text = merge_reminder_text(
79
80
  group.tool_result.output or "<system-reminder>Tool ran without output or errors</system-reminder>",
@@ -84,34 +85,41 @@ def _tool_group_to_message(group: ToolGroup) -> BetaMessageParam:
84
85
  tool_content.append(_image_part_to_block(image))
85
86
  for image in group.reminder_images:
86
87
  tool_content.append(_image_part_to_block(image))
88
+ return {
89
+ "type": "tool_result",
90
+ "tool_use_id": group.tool_result.call_id,
91
+ "is_error": group.tool_result.status == "error",
92
+ "content": tool_content,
93
+ }
94
+
95
+
96
+ def _tool_groups_to_message(groups: list[ToolGroup]) -> BetaMessageParam:
97
+ """Convert one or more ToolGroups to a single user message with multiple tool_result blocks."""
87
98
  return {
88
99
  "role": "user",
89
- "content": [
90
- {
91
- "type": "tool_result",
92
- "tool_use_id": group.tool_result.call_id,
93
- "is_error": group.tool_result.status == "error",
94
- "content": tool_content,
95
- }
96
- ],
100
+ "content": [_tool_group_to_block(group) for group in groups],
97
101
  }
98
102
 
99
103
 
100
104
  def _assistant_group_to_message(group: AssistantGroup, model_name: str | None) -> BetaMessageParam:
101
105
  content: list[dict[str, object]] = []
102
106
  current_reasoning_content: str | None = None
107
+ degraded_thinking_texts: list[str] = []
103
108
 
104
109
  # Process reasoning items in original order so that text and
105
110
  # encrypted parts are paired correctly for the given model.
111
+ # For cross-model scenarios, degrade thinking to plain text.
106
112
  for item in group.reasoning_items:
107
113
  if isinstance(item, model.ReasoningTextItem):
108
114
  if model_name != item.model:
109
- continue
110
- current_reasoning_content = item.content
115
+ # Cross-model: collect thinking text for degradation
116
+ if item.content:
117
+ degraded_thinking_texts.append(item.content)
118
+ else:
119
+ current_reasoning_content = item.content
111
120
  else:
112
- if model_name != item.model:
113
- continue
114
- if item.encrypted_content and len(item.encrypted_content) > 0:
121
+ # Same model: preserve signature
122
+ if model_name == item.model and item.encrypted_content and len(item.encrypted_content) > 0:
115
123
  content.append(
116
124
  {
117
125
  "type": "thinking",
@@ -127,6 +135,11 @@ def _assistant_group_to_message(group: AssistantGroup, model_name: str | None) -
127
135
  if len(current_reasoning_content or "") > 0:
128
136
  content.insert(0, {"type": "thinking", "thinking": current_reasoning_content})
129
137
 
138
+ # Cross-model: degrade thinking to plain text with <thinking> tags
139
+ if degraded_thinking_texts:
140
+ degraded_text = "<thinking>\n" + "\n".join(degraded_thinking_texts) + "\n</thinking>"
141
+ content.insert(0, {"type": "text", "text": degraded_text})
142
+
130
143
  if group.text_content:
131
144
  content.append({"type": "text", "text": group.text_content})
132
145
 
@@ -165,15 +178,26 @@ def convert_history_to_input(
165
178
  model_name: Model name. Used to verify that signatures are valid for the same model
166
179
  """
167
180
  messages: list[BetaMessageParam] = []
181
+ pending_tool_groups: list[ToolGroup] = []
182
+
183
+ def flush_tool_groups() -> None:
184
+ nonlocal pending_tool_groups
185
+ if pending_tool_groups:
186
+ messages.append(_tool_groups_to_message(pending_tool_groups))
187
+ pending_tool_groups = []
188
+
168
189
  for group in parse_message_groups(history):
169
190
  match group:
170
191
  case UserGroup():
192
+ flush_tool_groups()
171
193
  messages.append(_user_group_to_message(group))
172
194
  case ToolGroup():
173
- messages.append(_tool_group_to_message(group))
195
+ pending_tool_groups.append(group)
174
196
  case AssistantGroup():
197
+ flush_tool_groups()
175
198
  messages.append(_assistant_group_to_message(group, model_name))
176
199
 
200
+ flush_tool_groups()
177
201
  _add_cache_control(messages)
178
202
  return messages
179
203
 
@@ -0,0 +1,3 @@
1
+ from klaude_code.llm.bedrock.client import BedrockClient
2
+
3
+ __all__ = ["BedrockClient"]
@@ -0,0 +1,60 @@
1
+ """AWS Bedrock LLM client using Anthropic SDK."""
2
+
3
+ import json
4
+ from collections.abc import AsyncGenerator
5
+ from typing import override
6
+
7
+ import anthropic
8
+ import httpx
9
+ from anthropic import APIError
10
+
11
+ from klaude_code.llm.anthropic.client import build_payload, parse_anthropic_stream
12
+ from klaude_code.llm.client import LLMClientABC
13
+ from klaude_code.llm.input_common import apply_config_defaults
14
+ from klaude_code.llm.registry import register
15
+ from klaude_code.llm.usage import MetadataTracker
16
+ from klaude_code.protocol import llm_param, model
17
+ from klaude_code.trace import DebugType, log_debug
18
+
19
+
20
+ @register(llm_param.LLMClientProtocol.BEDROCK)
21
+ class BedrockClient(LLMClientABC):
22
+ """LLM client for AWS Bedrock using Anthropic SDK."""
23
+
24
+ def __init__(self, config: llm_param.LLMConfigParameter):
25
+ super().__init__(config)
26
+ self.client = anthropic.AsyncAnthropicBedrock(
27
+ aws_access_key=config.aws_access_key,
28
+ aws_secret_key=config.aws_secret_key,
29
+ aws_region=config.aws_region,
30
+ aws_session_token=config.aws_session_token,
31
+ aws_profile=config.aws_profile,
32
+ timeout=httpx.Timeout(300.0, connect=15.0, read=285.0),
33
+ )
34
+
35
+ @classmethod
36
+ @override
37
+ def create(cls, config: llm_param.LLMConfigParameter) -> "LLMClientABC":
38
+ return cls(config)
39
+
40
+ @override
41
+ async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
42
+ param = apply_config_defaults(param, self.get_llm_config())
43
+
44
+ metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
45
+
46
+ payload = build_payload(param)
47
+
48
+ log_debug(
49
+ json.dumps(payload, ensure_ascii=False, default=str),
50
+ style="yellow",
51
+ debug_type=DebugType.LLM_PAYLOAD,
52
+ )
53
+
54
+ stream = self.client.beta.messages.create(**payload)
55
+
56
+ try:
57
+ async for item in parse_anthropic_stream(stream, param, metadata_tracker):
58
+ yield item
59
+ except (APIError, httpx.HTTPError) as e:
60
+ yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
klaude_code/llm/client.py CHANGED
@@ -1,10 +1,8 @@
1
- import json
2
1
  from abc import ABC, abstractmethod
3
2
  from collections.abc import AsyncGenerator
4
- from typing import Callable, ParamSpec, TypeVar, cast
3
+ from typing import ParamSpec, TypeVar, cast
5
4
 
6
5
  from klaude_code.protocol import llm_param, model
7
- from klaude_code.trace import DebugType, log_debug
8
6
 
9
7
 
10
8
  class LLMClientABC(ABC):
@@ -17,9 +15,9 @@ class LLMClientABC(ABC):
17
15
  pass
18
16
 
19
17
  @abstractmethod
20
- async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
18
+ async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
21
19
  raise NotImplementedError
22
- yield cast(model.ConversationItem, None) # pyright: ignore[reportUnreachable]
20
+ yield cast(model.ConversationItem, None)
23
21
 
24
22
  def get_llm_config(self) -> llm_param.LLMConfigParameter:
25
23
  return self._config
@@ -28,22 +26,10 @@ class LLMClientABC(ABC):
28
26
  def model_name(self) -> str:
29
27
  return self._config.model or ""
30
28
 
29
+ @property
30
+ def protocol(self) -> llm_param.LLMClientProtocol:
31
+ return self._config.protocol
32
+
31
33
 
32
34
  P = ParamSpec("P")
33
35
  R = TypeVar("R")
34
-
35
-
36
- def call_with_logged_payload(func: Callable[P, R], *args: P.args, **kwargs: P.kwargs) -> R:
37
- """Call an SDK function while logging the JSON payload.
38
-
39
- The function reuses the original callable's type signature via ParamSpec
40
- so static type checkers can validate arguments at the call site.
41
- """
42
-
43
- payload = {k: v for k, v in kwargs.items() if v is not None}
44
- log_debug(
45
- json.dumps(payload, ensure_ascii=False, default=str),
46
- style="yellow",
47
- debug_type=DebugType.LLM_PAYLOAD,
48
- )
49
- return func(*args, **kwargs)
@@ -0,0 +1,5 @@
1
+ """Codex LLM client using ChatGPT subscription."""
2
+
3
+ from klaude_code.llm.codex.client import CodexClient
4
+
5
+ __all__ = ["CodexClient"]