klaude-code 1.2.6__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. klaude_code/auth/__init__.py +24 -0
  2. klaude_code/auth/codex/__init__.py +20 -0
  3. klaude_code/auth/codex/exceptions.py +17 -0
  4. klaude_code/auth/codex/jwt_utils.py +45 -0
  5. klaude_code/auth/codex/oauth.py +229 -0
  6. klaude_code/auth/codex/token_manager.py +84 -0
  7. klaude_code/cli/auth_cmd.py +73 -0
  8. klaude_code/cli/config_cmd.py +91 -0
  9. klaude_code/cli/cost_cmd.py +338 -0
  10. klaude_code/cli/debug.py +78 -0
  11. klaude_code/cli/list_model.py +307 -0
  12. klaude_code/cli/main.py +233 -134
  13. klaude_code/cli/runtime.py +309 -117
  14. klaude_code/{version.py → cli/self_update.py} +114 -5
  15. klaude_code/cli/session_cmd.py +37 -21
  16. klaude_code/command/__init__.py +88 -27
  17. klaude_code/command/clear_cmd.py +8 -7
  18. klaude_code/command/command_abc.py +31 -31
  19. klaude_code/command/debug_cmd.py +79 -0
  20. klaude_code/command/export_cmd.py +19 -53
  21. klaude_code/command/export_online_cmd.py +154 -0
  22. klaude_code/command/fork_session_cmd.py +267 -0
  23. klaude_code/command/help_cmd.py +7 -8
  24. klaude_code/command/model_cmd.py +60 -10
  25. klaude_code/command/model_select.py +84 -0
  26. klaude_code/command/prompt-jj-describe.md +32 -0
  27. klaude_code/command/prompt_command.py +19 -11
  28. klaude_code/command/refresh_cmd.py +8 -10
  29. klaude_code/command/registry.py +139 -40
  30. klaude_code/command/release_notes_cmd.py +84 -0
  31. klaude_code/command/resume_cmd.py +111 -0
  32. klaude_code/command/status_cmd.py +104 -60
  33. klaude_code/command/terminal_setup_cmd.py +7 -9
  34. klaude_code/command/thinking_cmd.py +98 -0
  35. klaude_code/config/__init__.py +14 -6
  36. klaude_code/config/assets/__init__.py +1 -0
  37. klaude_code/config/assets/builtin_config.yaml +303 -0
  38. klaude_code/config/builtin_config.py +38 -0
  39. klaude_code/config/config.py +378 -109
  40. klaude_code/config/select_model.py +117 -53
  41. klaude_code/config/thinking.py +269 -0
  42. klaude_code/{const/__init__.py → const.py} +50 -19
  43. klaude_code/core/agent.py +20 -28
  44. klaude_code/core/executor.py +327 -112
  45. klaude_code/core/manager/__init__.py +2 -4
  46. klaude_code/core/manager/llm_clients.py +1 -15
  47. klaude_code/core/manager/llm_clients_builder.py +10 -11
  48. klaude_code/core/manager/sub_agent_manager.py +37 -6
  49. klaude_code/core/prompt.py +63 -44
  50. klaude_code/core/prompts/prompt-claude-code.md +2 -13
  51. klaude_code/core/prompts/prompt-codex-gpt-5-1-codex-max.md +117 -0
  52. klaude_code/core/prompts/prompt-codex-gpt-5-2-codex.md +117 -0
  53. klaude_code/core/prompts/prompt-codex.md +9 -42
  54. klaude_code/core/prompts/prompt-minimal.md +12 -0
  55. klaude_code/core/prompts/{prompt-subagent-explore.md → prompt-sub-agent-explore.md} +16 -3
  56. klaude_code/core/prompts/{prompt-subagent-oracle.md → prompt-sub-agent-oracle.md} +1 -2
  57. klaude_code/core/prompts/prompt-sub-agent-web.md +51 -0
  58. klaude_code/core/reminders.py +283 -95
  59. klaude_code/core/task.py +113 -75
  60. klaude_code/core/tool/__init__.py +24 -31
  61. klaude_code/core/tool/file/_utils.py +36 -0
  62. klaude_code/core/tool/file/apply_patch.py +17 -25
  63. klaude_code/core/tool/file/apply_patch_tool.py +57 -77
  64. klaude_code/core/tool/file/diff_builder.py +151 -0
  65. klaude_code/core/tool/file/edit_tool.py +50 -63
  66. klaude_code/core/tool/file/move_tool.md +41 -0
  67. klaude_code/core/tool/file/move_tool.py +435 -0
  68. klaude_code/core/tool/file/read_tool.md +1 -1
  69. klaude_code/core/tool/file/read_tool.py +86 -86
  70. klaude_code/core/tool/file/write_tool.py +59 -69
  71. klaude_code/core/tool/report_back_tool.py +84 -0
  72. klaude_code/core/tool/shell/bash_tool.py +265 -22
  73. klaude_code/core/tool/shell/command_safety.py +3 -6
  74. klaude_code/core/tool/{memory → skill}/skill_tool.py +16 -26
  75. klaude_code/core/tool/sub_agent_tool.py +13 -2
  76. klaude_code/core/tool/todo/todo_write_tool.md +0 -157
  77. klaude_code/core/tool/todo/todo_write_tool.py +1 -1
  78. klaude_code/core/tool/todo/todo_write_tool_raw.md +182 -0
  79. klaude_code/core/tool/todo/update_plan_tool.py +1 -1
  80. klaude_code/core/tool/tool_abc.py +18 -0
  81. klaude_code/core/tool/tool_context.py +27 -12
  82. klaude_code/core/tool/tool_registry.py +7 -7
  83. klaude_code/core/tool/tool_runner.py +44 -36
  84. klaude_code/core/tool/truncation.py +29 -14
  85. klaude_code/core/tool/web/mermaid_tool.md +43 -0
  86. klaude_code/core/tool/web/mermaid_tool.py +2 -5
  87. klaude_code/core/tool/web/web_fetch_tool.md +1 -1
  88. klaude_code/core/tool/web/web_fetch_tool.py +112 -22
  89. klaude_code/core/tool/web/web_search_tool.md +23 -0
  90. klaude_code/core/tool/web/web_search_tool.py +130 -0
  91. klaude_code/core/turn.py +168 -66
  92. klaude_code/llm/__init__.py +2 -10
  93. klaude_code/llm/anthropic/client.py +190 -178
  94. klaude_code/llm/anthropic/input.py +39 -15
  95. klaude_code/llm/bedrock/__init__.py +3 -0
  96. klaude_code/llm/bedrock/client.py +60 -0
  97. klaude_code/llm/client.py +7 -21
  98. klaude_code/llm/codex/__init__.py +5 -0
  99. klaude_code/llm/codex/client.py +149 -0
  100. klaude_code/llm/google/__init__.py +3 -0
  101. klaude_code/llm/google/client.py +309 -0
  102. klaude_code/llm/google/input.py +215 -0
  103. klaude_code/llm/input_common.py +3 -9
  104. klaude_code/llm/openai_compatible/client.py +72 -164
  105. klaude_code/llm/openai_compatible/input.py +6 -4
  106. klaude_code/llm/openai_compatible/stream.py +273 -0
  107. klaude_code/llm/openai_compatible/tool_call_accumulator.py +17 -1
  108. klaude_code/llm/openrouter/client.py +89 -160
  109. klaude_code/llm/openrouter/input.py +18 -30
  110. klaude_code/llm/openrouter/reasoning.py +118 -0
  111. klaude_code/llm/registry.py +39 -7
  112. klaude_code/llm/responses/client.py +184 -171
  113. klaude_code/llm/responses/input.py +20 -1
  114. klaude_code/llm/usage.py +17 -12
  115. klaude_code/protocol/commands.py +17 -1
  116. klaude_code/protocol/events.py +31 -4
  117. klaude_code/protocol/llm_param.py +13 -10
  118. klaude_code/protocol/model.py +232 -29
  119. klaude_code/protocol/op.py +90 -1
  120. klaude_code/protocol/op_handler.py +35 -1
  121. klaude_code/protocol/sub_agent/__init__.py +117 -0
  122. klaude_code/protocol/sub_agent/explore.py +63 -0
  123. klaude_code/protocol/sub_agent/oracle.py +91 -0
  124. klaude_code/protocol/sub_agent/task.py +61 -0
  125. klaude_code/protocol/sub_agent/web.py +79 -0
  126. klaude_code/protocol/tools.py +4 -2
  127. klaude_code/session/__init__.py +2 -2
  128. klaude_code/session/codec.py +71 -0
  129. klaude_code/session/export.py +293 -86
  130. klaude_code/session/selector.py +89 -67
  131. klaude_code/session/session.py +320 -309
  132. klaude_code/session/store.py +220 -0
  133. klaude_code/session/templates/export_session.html +595 -83
  134. klaude_code/session/templates/mermaid_viewer.html +926 -0
  135. klaude_code/skill/__init__.py +27 -0
  136. klaude_code/skill/assets/deslop/SKILL.md +17 -0
  137. klaude_code/skill/assets/dev-docs/SKILL.md +108 -0
  138. klaude_code/skill/assets/handoff/SKILL.md +39 -0
  139. klaude_code/skill/assets/jj-workspace/SKILL.md +20 -0
  140. klaude_code/skill/assets/skill-creator/SKILL.md +139 -0
  141. klaude_code/{core/tool/memory/skill_loader.py → skill/loader.py} +55 -15
  142. klaude_code/skill/manager.py +70 -0
  143. klaude_code/skill/system_skills.py +192 -0
  144. klaude_code/trace/__init__.py +20 -2
  145. klaude_code/trace/log.py +150 -5
  146. klaude_code/ui/__init__.py +4 -9
  147. klaude_code/ui/core/input.py +1 -1
  148. klaude_code/ui/core/stage_manager.py +7 -7
  149. klaude_code/ui/modes/debug/display.py +2 -1
  150. klaude_code/ui/modes/repl/__init__.py +3 -48
  151. klaude_code/ui/modes/repl/clipboard.py +5 -5
  152. klaude_code/ui/modes/repl/completers.py +487 -123
  153. klaude_code/ui/modes/repl/display.py +5 -4
  154. klaude_code/ui/modes/repl/event_handler.py +370 -117
  155. klaude_code/ui/modes/repl/input_prompt_toolkit.py +552 -105
  156. klaude_code/ui/modes/repl/key_bindings.py +146 -23
  157. klaude_code/ui/modes/repl/renderer.py +189 -99
  158. klaude_code/ui/renderers/assistant.py +9 -2
  159. klaude_code/ui/renderers/bash_syntax.py +178 -0
  160. klaude_code/ui/renderers/common.py +78 -0
  161. klaude_code/ui/renderers/developer.py +104 -48
  162. klaude_code/ui/renderers/diffs.py +87 -6
  163. klaude_code/ui/renderers/errors.py +11 -6
  164. klaude_code/ui/renderers/mermaid_viewer.py +57 -0
  165. klaude_code/ui/renderers/metadata.py +112 -76
  166. klaude_code/ui/renderers/sub_agent.py +92 -7
  167. klaude_code/ui/renderers/thinking.py +40 -18
  168. klaude_code/ui/renderers/tools.py +405 -227
  169. klaude_code/ui/renderers/user_input.py +73 -13
  170. klaude_code/ui/rich/__init__.py +10 -1
  171. klaude_code/ui/rich/cjk_wrap.py +228 -0
  172. klaude_code/ui/rich/code_panel.py +131 -0
  173. klaude_code/ui/rich/live.py +17 -0
  174. klaude_code/ui/rich/markdown.py +305 -170
  175. klaude_code/ui/rich/searchable_text.py +10 -13
  176. klaude_code/ui/rich/status.py +190 -49
  177. klaude_code/ui/rich/theme.py +135 -39
  178. klaude_code/ui/terminal/__init__.py +55 -0
  179. klaude_code/ui/terminal/color.py +1 -1
  180. klaude_code/ui/terminal/control.py +13 -22
  181. klaude_code/ui/terminal/notifier.py +44 -4
  182. klaude_code/ui/terminal/selector.py +658 -0
  183. klaude_code/ui/utils/common.py +0 -18
  184. klaude_code-1.8.0.dist-info/METADATA +377 -0
  185. klaude_code-1.8.0.dist-info/RECORD +219 -0
  186. {klaude_code-1.2.6.dist-info → klaude_code-1.8.0.dist-info}/entry_points.txt +1 -0
  187. klaude_code/command/diff_cmd.py +0 -138
  188. klaude_code/command/prompt-dev-docs-update.md +0 -56
  189. klaude_code/command/prompt-dev-docs.md +0 -46
  190. klaude_code/config/list_model.py +0 -162
  191. klaude_code/core/manager/agent_manager.py +0 -127
  192. klaude_code/core/prompts/prompt-subagent-webfetch.md +0 -46
  193. klaude_code/core/tool/file/multi_edit_tool.md +0 -42
  194. klaude_code/core/tool/file/multi_edit_tool.py +0 -199
  195. klaude_code/core/tool/memory/memory_tool.md +0 -16
  196. klaude_code/core/tool/memory/memory_tool.py +0 -462
  197. klaude_code/llm/openrouter/reasoning_handler.py +0 -209
  198. klaude_code/protocol/sub_agent.py +0 -348
  199. klaude_code/ui/utils/debouncer.py +0 -42
  200. klaude_code-1.2.6.dist-info/METADATA +0 -178
  201. klaude_code-1.2.6.dist-info/RECORD +0 -167
  202. /klaude_code/core/prompts/{prompt-subagent.md → prompt-sub-agent.md} +0 -0
  203. /klaude_code/core/tool/{memory → skill}/__init__.py +0 -0
  204. /klaude_code/core/tool/{memory → skill}/skill_tool.md +0 -0
  205. {klaude_code-1.2.6.dist-info → klaude_code-1.8.0.dist-info}/WHEEL +0 -0
@@ -1,20 +1,181 @@
1
1
  import json
2
- import time
3
2
  from collections.abc import AsyncGenerator
4
- from typing import override
3
+ from typing import TYPE_CHECKING, override
5
4
 
6
5
  import httpx
7
- from openai import AsyncAzureOpenAI, AsyncOpenAI, RateLimitError
6
+ import openai
7
+ from openai import AsyncAzureOpenAI, AsyncOpenAI
8
8
  from openai.types import responses
9
+ from openai.types.responses.response_create_params import ResponseCreateParamsStreaming
9
10
 
10
- from klaude_code.llm.client import LLMClientABC, call_with_logged_payload
11
+ from klaude_code.llm.client import LLMClientABC
11
12
  from klaude_code.llm.input_common import apply_config_defaults
12
13
  from klaude_code.llm.registry import register
13
14
  from klaude_code.llm.responses.input import convert_history_to_input, convert_tool_schema
14
- from klaude_code.llm.usage import calculate_cost
15
+ from klaude_code.llm.usage import MetadataTracker
15
16
  from klaude_code.protocol import llm_param, model
16
17
  from klaude_code.trace import DebugType, log_debug
17
18
 
19
+ if TYPE_CHECKING:
20
+ from openai import AsyncStream
21
+ from openai.types.responses import ResponseStreamEvent
22
+
23
+
24
+ def build_payload(param: llm_param.LLMCallParameter) -> ResponseCreateParamsStreaming:
25
+ """Build OpenAI Responses API request parameters."""
26
+ inputs = convert_history_to_input(param.input, param.model)
27
+ tools = convert_tool_schema(param.tools)
28
+
29
+ payload: ResponseCreateParamsStreaming = {
30
+ "model": str(param.model),
31
+ "tool_choice": "auto",
32
+ "parallel_tool_calls": True,
33
+ "include": [
34
+ "reasoning.encrypted_content",
35
+ ],
36
+ "store": False,
37
+ "stream": True,
38
+ "temperature": param.temperature,
39
+ "max_output_tokens": param.max_tokens,
40
+ "input": inputs,
41
+ "instructions": param.system,
42
+ "tools": tools,
43
+ "prompt_cache_key": param.session_id or "",
44
+ }
45
+
46
+ if param.thinking and param.thinking.reasoning_effort:
47
+ payload["reasoning"] = {
48
+ "effort": param.thinking.reasoning_effort,
49
+ "summary": param.thinking.reasoning_summary,
50
+ }
51
+
52
+ if param.verbosity:
53
+ payload["text"] = {"verbosity": param.verbosity}
54
+
55
+ return payload
56
+
57
+
58
+ async def parse_responses_stream(
59
+ stream: "AsyncStream[ResponseStreamEvent]",
60
+ param: llm_param.LLMCallParameter,
61
+ metadata_tracker: MetadataTracker,
62
+ ) -> AsyncGenerator[model.ConversationItem]:
63
+ """Parse OpenAI Responses API stream events into ConversationItems."""
64
+ response_id: str | None = None
65
+
66
+ try:
67
+ async for event in stream:
68
+ log_debug(
69
+ f"[{event.type}]",
70
+ event.model_dump_json(exclude_none=True),
71
+ style="blue",
72
+ debug_type=DebugType.LLM_STREAM,
73
+ )
74
+ match event:
75
+ case responses.ResponseCreatedEvent() as event:
76
+ response_id = event.response.id
77
+ yield model.StartItem(response_id=response_id)
78
+ case responses.ResponseReasoningSummaryTextDeltaEvent() as event:
79
+ if event.delta:
80
+ metadata_tracker.record_token()
81
+ yield model.ReasoningTextDelta(
82
+ content=event.delta,
83
+ response_id=response_id,
84
+ )
85
+ case responses.ResponseReasoningSummaryTextDoneEvent() as event:
86
+ if event.text:
87
+ yield model.ReasoningTextItem(
88
+ content=event.text,
89
+ response_id=response_id,
90
+ model=str(param.model),
91
+ )
92
+ case responses.ResponseTextDeltaEvent() as event:
93
+ if event.delta:
94
+ metadata_tracker.record_token()
95
+ yield model.AssistantMessageDelta(content=event.delta, response_id=response_id)
96
+ case responses.ResponseOutputItemAddedEvent() as event:
97
+ if isinstance(event.item, responses.ResponseFunctionToolCall):
98
+ metadata_tracker.record_token()
99
+ yield model.ToolCallStartItem(
100
+ response_id=response_id,
101
+ call_id=event.item.call_id,
102
+ name=event.item.name,
103
+ )
104
+ case responses.ResponseOutputItemDoneEvent() as event:
105
+ match event.item:
106
+ case responses.ResponseReasoningItem() as item:
107
+ if item.encrypted_content:
108
+ metadata_tracker.record_token()
109
+ yield model.ReasoningEncryptedItem(
110
+ id=item.id,
111
+ encrypted_content=item.encrypted_content,
112
+ response_id=response_id,
113
+ model=str(param.model),
114
+ )
115
+ case responses.ResponseOutputMessage() as item:
116
+ metadata_tracker.record_token()
117
+ yield model.AssistantMessageItem(
118
+ content="\n".join(
119
+ [
120
+ part.text
121
+ for part in item.content
122
+ if isinstance(part, responses.ResponseOutputText)
123
+ ]
124
+ ),
125
+ id=item.id,
126
+ response_id=response_id,
127
+ )
128
+ case responses.ResponseFunctionToolCall() as item:
129
+ metadata_tracker.record_token()
130
+ yield model.ToolCallItem(
131
+ name=item.name,
132
+ arguments=item.arguments.strip(),
133
+ call_id=item.call_id,
134
+ id=item.id,
135
+ response_id=response_id,
136
+ )
137
+ case _:
138
+ pass
139
+ case responses.ResponseCompletedEvent() as event:
140
+ error_reason: str | None = None
141
+ if event.response.incomplete_details is not None:
142
+ error_reason = event.response.incomplete_details.reason
143
+ if event.response.usage is not None:
144
+ metadata_tracker.set_usage(
145
+ model.Usage(
146
+ input_tokens=event.response.usage.input_tokens,
147
+ output_tokens=event.response.usage.output_tokens,
148
+ cached_tokens=event.response.usage.input_tokens_details.cached_tokens,
149
+ reasoning_tokens=event.response.usage.output_tokens_details.reasoning_tokens,
150
+ context_size=event.response.usage.total_tokens,
151
+ context_limit=param.context_limit,
152
+ max_tokens=param.max_tokens,
153
+ )
154
+ )
155
+ metadata_tracker.set_model_name(str(param.model))
156
+ metadata_tracker.set_response_id(response_id)
157
+ yield metadata_tracker.finalize()
158
+ if event.response.status != "completed":
159
+ error_message = f"LLM response finished with status '{event.response.status}'"
160
+ if error_reason:
161
+ error_message = f"{error_message}: {error_reason}"
162
+ log_debug(
163
+ "[LLM status warning]",
164
+ error_message,
165
+ style="red",
166
+ debug_type=DebugType.LLM_STREAM,
167
+ )
168
+ yield model.StreamErrorItem(error=error_message)
169
+ case _:
170
+ log_debug(
171
+ "[Unhandled stream event]",
172
+ str(event),
173
+ style="red",
174
+ debug_type=DebugType.LLM_STREAM,
175
+ )
176
+ except (openai.OpenAIError, httpx.HTTPError) as e:
177
+ yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
178
+
18
179
 
19
180
  @register(llm_param.LLMClientProtocol.RESPONSES)
20
181
  class ResponsesClient(LLMClientABC):
@@ -43,174 +204,26 @@ class ResponsesClient(LLMClientABC):
43
204
  return cls(config)
44
205
 
45
206
  @override
46
- async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
207
+ async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
47
208
  param = apply_config_defaults(param, self.get_llm_config())
48
209
 
49
- request_start_time = time.time()
50
- first_token_time: float | None = None
51
- last_token_time: float | None = None
52
- response_id: str | None = None
53
-
54
- inputs = convert_history_to_input(param.input, param.model)
55
- tools = convert_tool_schema(param.tools)
56
-
57
- parallel_tool_calls = True
58
-
59
- stream = call_with_logged_payload(
60
- self.client.responses.create,
61
- model=str(param.model),
62
- tool_choice="auto",
63
- parallel_tool_calls=parallel_tool_calls, # OpenAI's Codex is always False, we try to enable it here. It seems gpt-5-codex has bugs when parallel_tool_calls is True.
64
- include=[
65
- "reasoning.encrypted_content",
66
- ],
67
- store=param.store,
68
- previous_response_id=param.previous_response_id,
69
- stream=True,
70
- temperature=param.temperature,
71
- max_output_tokens=param.max_tokens,
72
- input=inputs,
73
- instructions=param.system,
74
- tools=tools,
75
- text={
76
- "verbosity": param.verbosity,
77
- },
78
- reasoning={
79
- "effort": param.thinking.reasoning_effort,
80
- "summary": param.thinking.reasoning_summary,
81
- }
82
- if param.thinking and param.thinking.reasoning_effort
83
- else None,
84
- extra_headers={"extra": json.dumps({"session_id": param.session_id})},
85
- )
86
-
87
- try:
88
- async for event in await stream:
89
- log_debug(
90
- f"[{event.type}]",
91
- event.model_dump_json(exclude_none=True),
92
- style="blue",
93
- debug_type=DebugType.LLM_STREAM,
94
- )
95
- match event:
96
- case responses.ResponseCreatedEvent() as event:
97
- response_id = event.response.id
98
- yield model.StartItem(response_id=response_id)
99
- case responses.ResponseReasoningSummaryTextDoneEvent() as event:
100
- if event.text:
101
- yield model.ReasoningTextItem(
102
- content=event.text,
103
- response_id=response_id,
104
- model=str(param.model),
105
- )
106
- case responses.ResponseTextDeltaEvent() as event:
107
- if first_token_time is None:
108
- first_token_time = time.time()
109
- last_token_time = time.time()
110
- yield model.AssistantMessageDelta(content=event.delta, response_id=response_id)
111
- case responses.ResponseOutputItemAddedEvent() as event:
112
- if isinstance(event.item, responses.ResponseFunctionToolCall):
113
- yield model.ToolCallStartItem(
114
- response_id=response_id,
115
- call_id=event.item.call_id,
116
- name=event.item.name,
117
- )
118
- case responses.ResponseOutputItemDoneEvent() as event:
119
- match event.item:
120
- case responses.ResponseReasoningItem() as item:
121
- if item.encrypted_content:
122
- yield model.ReasoningEncryptedItem(
123
- id=item.id,
124
- encrypted_content=item.encrypted_content,
125
- response_id=response_id,
126
- model=str(param.model),
127
- )
128
- case responses.ResponseOutputMessage() as item:
129
- yield model.AssistantMessageItem(
130
- content="\n".join(
131
- [
132
- part.text
133
- for part in item.content
134
- if isinstance(part, responses.ResponseOutputText)
135
- ]
136
- ),
137
- id=item.id,
138
- response_id=response_id,
139
- )
140
- case responses.ResponseFunctionToolCall() as item:
141
- if first_token_time is None:
142
- first_token_time = time.time()
143
- last_token_time = time.time()
144
- yield model.ToolCallItem(
145
- name=item.name,
146
- arguments=item.arguments.strip(),
147
- call_id=item.call_id,
148
- id=item.id,
149
- response_id=response_id,
150
- )
151
- case _:
152
- pass
153
- case responses.ResponseCompletedEvent() as event:
154
- usage: model.Usage | None = None
155
- error_reason: str | None = None
156
- if event.response.incomplete_details is not None:
157
- error_reason = event.response.incomplete_details.reason
158
- if event.response.usage is not None:
159
- total_tokens = event.response.usage.total_tokens
160
- context_usage_percent = (
161
- (total_tokens / param.context_limit) * 100 if param.context_limit else None
162
- )
163
-
164
- throughput_tps: float | None = None
165
- first_token_latency_ms: float | None = None
210
+ metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
166
211
 
167
- if first_token_time is not None:
168
- first_token_latency_ms = (first_token_time - request_start_time) * 1000
212
+ payload = build_payload(param)
169
213
 
170
- if (
171
- first_token_time is not None
172
- and last_token_time is not None
173
- and event.response.usage.output_tokens > 0
174
- ):
175
- time_duration = last_token_time - first_token_time
176
- if time_duration >= 0.15:
177
- throughput_tps = event.response.usage.output_tokens / time_duration
214
+ log_debug(
215
+ json.dumps(payload, ensure_ascii=False, default=str),
216
+ style="yellow",
217
+ debug_type=DebugType.LLM_PAYLOAD,
218
+ )
219
+ try:
220
+ stream = await self.client.responses.create(
221
+ **payload,
222
+ extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
223
+ )
224
+ except (openai.OpenAIError, httpx.HTTPError) as e:
225
+ yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
226
+ return
178
227
 
179
- usage = model.Usage(
180
- input_tokens=event.response.usage.input_tokens,
181
- cached_tokens=event.response.usage.input_tokens_details.cached_tokens,
182
- reasoning_tokens=event.response.usage.output_tokens_details.reasoning_tokens,
183
- output_tokens=event.response.usage.output_tokens,
184
- total_tokens=total_tokens,
185
- context_usage_percent=context_usage_percent,
186
- throughput_tps=throughput_tps,
187
- first_token_latency_ms=first_token_latency_ms,
188
- )
189
- calculate_cost(usage, self._config.cost)
190
- yield model.ResponseMetadataItem(
191
- usage=usage,
192
- response_id=response_id,
193
- model_name=str(param.model),
194
- status=event.response.status,
195
- error_reason=error_reason,
196
- )
197
- if event.response.status != "completed":
198
- error_message = f"LLM response finished with status '{event.response.status}'"
199
- if error_reason:
200
- error_message = f"{error_message}: {error_reason}"
201
- log_debug(
202
- "[LLM status warning]",
203
- error_message,
204
- style="red",
205
- debug_type=DebugType.LLM_STREAM,
206
- )
207
- yield model.StreamErrorItem(error=error_message)
208
- case _:
209
- log_debug(
210
- "[Unhandled stream event]",
211
- str(event),
212
- style="red",
213
- debug_type=DebugType.LLM_STREAM,
214
- )
215
- except RateLimitError as e:
216
- yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
228
+ async for item in parse_responses_stream(stream, param, metadata_tracker):
229
+ yield item
@@ -1,5 +1,6 @@
1
1
  # pyright: reportReturnType=false
2
2
  # pyright: reportArgumentType=false
3
+ # pyright: reportAssignmentType=false
3
4
 
4
5
  from typing import Any
5
6
 
@@ -34,7 +35,7 @@ def _build_tool_result_item(tool: model.ToolResultItem) -> responses.ResponseInp
34
35
  "call_id": tool.call_id,
35
36
  "output": content_parts,
36
37
  }
37
- return item # type: ignore[return-value]
38
+ return item
38
39
 
39
40
 
40
41
  def convert_history_to_input(
@@ -51,6 +52,7 @@ def convert_history_to_input(
51
52
  items: list[responses.ResponseInputItemParam] = []
52
53
 
53
54
  pending_reasoning_text: str | None = None
55
+ degraded_thinking_texts: list[str] = []
54
56
 
55
57
  for item in history:
56
58
  match item:
@@ -60,6 +62,9 @@ def convert_history_to_input(
60
62
  # or we can choose to output it if the next item is NOT reasoning?
61
63
  # For now, based on instructions, we pair them.
62
64
  if model_name != item.model:
65
+ # Cross-model: collect thinking text for degradation
66
+ if item.content:
67
+ degraded_thinking_texts.append(item.content)
63
68
  continue
64
69
  pending_reasoning_text = item.content
65
70
 
@@ -130,6 +135,20 @@ def convert_history_to_input(
130
135
  # Other items may be Metadata
131
136
  continue
132
137
 
138
+ # Cross-model: degrade thinking to plain text with <thinking> tags
139
+ if degraded_thinking_texts:
140
+ degraded_item: responses.ResponseInputItemParam = {
141
+ "type": "message",
142
+ "role": "assistant",
143
+ "content": [
144
+ {
145
+ "type": "output_text",
146
+ "text": "<thinking>\n" + "\n".join(degraded_thinking_texts) + "\n</thinking>",
147
+ }
148
+ ],
149
+ }
150
+ items.insert(0, degraded_item)
151
+
133
152
  return items
134
153
 
135
154
 
klaude_code/llm/usage.py CHANGED
@@ -14,6 +14,9 @@ def calculate_cost(usage: model.Usage, cost_config: llm_param.Cost | None) -> No
14
14
  if cost_config is None:
15
15
  return
16
16
 
17
+ # Set currency
18
+ usage.currency = cost_config.currency
19
+
17
20
  # Non-cached input tokens cost
18
21
  non_cached_input = usage.input_tokens - usage.cached_tokens
19
22
  usage.input_cost = (non_cached_input / 1_000_000) * cost_config.input
@@ -24,9 +27,6 @@ def calculate_cost(usage: model.Usage, cost_config: llm_param.Cost | None) -> No
24
27
  # Cache read cost
25
28
  usage.cache_read_cost = (usage.cached_tokens / 1_000_000) * cost_config.cache_read
26
29
 
27
- # Total cost
28
- usage.total_cost = usage.input_cost + usage.output_cost + usage.cache_read_cost
29
-
30
30
 
31
31
  class MetadataTracker:
32
32
  """Tracks timing and metadata for LLM responses."""
@@ -81,7 +81,7 @@ class MetadataTracker:
81
81
  ) * 1000
82
82
 
83
83
  if self._last_token_time is not None and self._metadata_item.usage.output_tokens > 0:
84
- time_duration = self._last_token_time - self._first_token_time
84
+ time_duration = self._last_token_time - self._request_start_time
85
85
  if time_duration >= 0.15:
86
86
  self._metadata_item.usage.throughput_tps = self._metadata_item.usage.output_tokens / time_duration
87
87
 
@@ -92,18 +92,23 @@ class MetadataTracker:
92
92
  return self._metadata_item
93
93
 
94
94
 
95
- def convert_usage(usage: openai.types.CompletionUsage, context_limit: int | None = None) -> model.Usage:
96
- """Convert OpenAI CompletionUsage to internal Usage model."""
97
- total_tokens = usage.total_tokens
98
- context_usage_percent = (total_tokens / context_limit) * 100 if context_limit else None
95
+ def convert_usage(
96
+ usage: openai.types.CompletionUsage,
97
+ context_limit: int | None = None,
98
+ max_tokens: int | None = None,
99
+ ) -> model.Usage:
100
+ """Convert OpenAI CompletionUsage to internal Usage model.
101
+
102
+ context_token is set to total_tokens from the API response,
103
+ representing the actual context window usage for this turn.
104
+ """
99
105
  return model.Usage(
100
106
  input_tokens=usage.prompt_tokens,
101
107
  cached_tokens=(usage.prompt_tokens_details.cached_tokens if usage.prompt_tokens_details else 0) or 0,
102
108
  reasoning_tokens=(usage.completion_tokens_details.reasoning_tokens if usage.completion_tokens_details else 0)
103
109
  or 0,
104
110
  output_tokens=usage.completion_tokens,
105
- total_tokens=total_tokens,
106
- context_usage_percent=context_usage_percent,
107
- throughput_tps=None,
108
- first_token_latency_ms=None,
111
+ context_size=usage.total_tokens,
112
+ context_limit=context_limit,
113
+ max_tokens=max_tokens,
109
114
  )
@@ -1,9 +1,20 @@
1
+ from dataclasses import dataclass
1
2
  from enum import Enum
2
3
 
3
4
 
5
+ @dataclass(frozen=True, slots=True)
6
+ class CommandInfo:
7
+ """Lightweight command metadata for UI purposes (no logic)."""
8
+
9
+ name: str
10
+ summary: str
11
+ support_addition_params: bool = False
12
+ placeholder: str = ""
13
+
14
+
4
15
  class CommandName(str, Enum):
5
16
  INIT = "init"
6
- DIFF = "diff"
17
+ DEBUG = "debug"
7
18
  HELP = "help"
8
19
  MODEL = "model"
9
20
  COMPACT = "compact"
@@ -11,7 +22,12 @@ class CommandName(str, Enum):
11
22
  CLEAR = "clear"
12
23
  TERMINAL_SETUP = "terminal-setup"
13
24
  EXPORT = "export"
25
+ EXPORT_ONLINE = "export-online"
14
26
  STATUS = "status"
27
+ RELEASE_NOTES = "release-notes"
28
+ THINKING = "thinking"
29
+ FORK_SESSION = "fork-session"
30
+ RESUME = "resume"
15
31
  # PLAN and DOC are dynamically registered now, but kept here if needed for reference
16
32
  # or we can remove them if no code explicitly imports them.
17
33
  # PLAN = "plan"
@@ -16,6 +16,7 @@ class EndEvent(BaseModel):
16
16
  class ErrorEvent(BaseModel):
17
17
  error_message: str
18
18
  can_retry: bool = False
19
+ session_id: str | None = None
19
20
 
20
21
 
21
22
  class TaskStartEvent(BaseModel):
@@ -26,6 +27,7 @@ class TaskStartEvent(BaseModel):
26
27
  class TaskFinishEvent(BaseModel):
27
28
  session_id: str
28
29
  task_result: str
30
+ has_structured_output: bool = False
29
31
 
30
32
 
31
33
  class TurnStartEvent(BaseModel):
@@ -54,6 +56,12 @@ class ThinkingEvent(BaseModel):
54
56
  content: str
55
57
 
56
58
 
59
+ class ThinkingDeltaEvent(BaseModel):
60
+ session_id: str
61
+ response_id: str | None = None
62
+ content: str
63
+
64
+
57
65
  class AssistantMessageDeltaEvent(BaseModel):
58
66
  session_id: str
59
67
  response_id: str | None = None
@@ -79,7 +87,6 @@ class ToolCallEvent(BaseModel):
79
87
  tool_call_id: str
80
88
  tool_name: str
81
89
  arguments: str
82
- is_replay: bool = False
83
90
 
84
91
 
85
92
  class ToolResultEvent(BaseModel):
@@ -90,16 +97,23 @@ class ToolResultEvent(BaseModel):
90
97
  result: str
91
98
  ui_extra: model.ToolResultUIExtra | None = None
92
99
  status: Literal["success", "error"]
93
- is_replay: bool = False
100
+ task_metadata: model.TaskMetadata | None = None # Sub-agent task metadata
94
101
 
95
102
 
96
103
  class ResponseMetadataEvent(BaseModel):
97
- """Showing model name, usage tokens, task duration, and turn count."""
104
+ """Internal event for turn-level metadata. Not exposed to UI directly."""
98
105
 
99
106
  session_id: str
100
107
  metadata: model.ResponseMetadataItem
101
108
 
102
109
 
110
+ class TaskMetadataEvent(BaseModel):
111
+ """Task-level aggregated metadata for UI display."""
112
+
113
+ session_id: str
114
+ metadata: model.TaskMetadataItem
115
+
116
+
103
117
  class UserMessageEvent(BaseModel):
104
118
  session_id: str
105
119
  content: str
@@ -120,16 +134,26 @@ class TodoChangeEvent(BaseModel):
120
134
  todos: list[model.TodoItem]
121
135
 
122
136
 
137
+ class ContextUsageEvent(BaseModel):
138
+ """Real-time context usage update during task execution."""
139
+
140
+ session_id: str
141
+ context_percent: float # Context usage percentage (0-100)
142
+
143
+
123
144
  HistoryItemEvent = (
124
145
  ThinkingEvent
146
+ | TaskStartEvent
147
+ | TaskFinishEvent
125
148
  | TurnStartEvent # This event is used for UI to print new empty line
126
149
  | AssistantMessageEvent
127
150
  | ToolCallEvent
128
151
  | ToolResultEvent
129
152
  | UserMessageEvent
130
- | ResponseMetadataEvent
153
+ | TaskMetadataEvent
131
154
  | InterruptEvent
132
155
  | DeveloperMessageEvent
156
+ | ErrorEvent
133
157
  )
134
158
 
135
159
 
@@ -144,11 +168,13 @@ Event = (
144
168
  TaskStartEvent
145
169
  | TaskFinishEvent
146
170
  | ThinkingEvent
171
+ | ThinkingDeltaEvent
147
172
  | AssistantMessageDeltaEvent
148
173
  | AssistantMessageEvent
149
174
  | ToolCallEvent
150
175
  | ToolResultEvent
151
176
  | ResponseMetadataEvent
177
+ | TaskMetadataEvent
152
178
  | ReplayHistoryEvent
153
179
  | ErrorEvent
154
180
  | EndEvent
@@ -160,4 +186,5 @@ Event = (
160
186
  | TurnStartEvent
161
187
  | TurnEndEvent
162
188
  | TurnToolCallStartEvent
189
+ | ContextUsageEvent
163
190
  )