klaude-code 1.2.7__py3-none-any.whl → 1.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. klaude_code/auth/codex/__init__.py +1 -1
  2. klaude_code/command/__init__.py +2 -0
  3. klaude_code/command/prompt-deslop.md +14 -0
  4. klaude_code/command/release_notes_cmd.py +86 -0
  5. klaude_code/command/status_cmd.py +92 -54
  6. klaude_code/core/agent.py +13 -19
  7. klaude_code/core/manager/sub_agent_manager.py +5 -1
  8. klaude_code/core/prompt.py +38 -28
  9. klaude_code/core/reminders.py +4 -4
  10. klaude_code/core/task.py +60 -45
  11. klaude_code/core/tool/__init__.py +2 -0
  12. klaude_code/core/tool/file/apply_patch_tool.py +1 -1
  13. klaude_code/core/tool/file/edit_tool.py +1 -1
  14. klaude_code/core/tool/file/multi_edit_tool.py +1 -1
  15. klaude_code/core/tool/file/write_tool.py +1 -1
  16. klaude_code/core/tool/memory/memory_tool.py +2 -2
  17. klaude_code/core/tool/sub_agent_tool.py +2 -1
  18. klaude_code/core/tool/todo/todo_write_tool.py +1 -1
  19. klaude_code/core/tool/todo/update_plan_tool.py +1 -1
  20. klaude_code/core/tool/tool_context.py +21 -4
  21. klaude_code/core/tool/tool_runner.py +5 -8
  22. klaude_code/core/tool/web/mermaid_tool.py +1 -4
  23. klaude_code/core/turn.py +90 -62
  24. klaude_code/llm/anthropic/client.py +15 -46
  25. klaude_code/llm/client.py +1 -1
  26. klaude_code/llm/codex/client.py +44 -30
  27. klaude_code/llm/input_common.py +0 -6
  28. klaude_code/llm/openai_compatible/client.py +29 -73
  29. klaude_code/llm/openai_compatible/input.py +6 -4
  30. klaude_code/llm/openai_compatible/stream_processor.py +82 -0
  31. klaude_code/llm/openrouter/client.py +29 -59
  32. klaude_code/llm/openrouter/input.py +4 -27
  33. klaude_code/llm/responses/client.py +49 -79
  34. klaude_code/llm/usage.py +51 -10
  35. klaude_code/protocol/commands.py +1 -0
  36. klaude_code/protocol/events.py +12 -2
  37. klaude_code/protocol/model.py +142 -26
  38. klaude_code/protocol/sub_agent.py +5 -1
  39. klaude_code/session/export.py +51 -27
  40. klaude_code/session/session.py +33 -16
  41. klaude_code/session/templates/export_session.html +4 -1
  42. klaude_code/ui/modes/repl/__init__.py +1 -5
  43. klaude_code/ui/modes/repl/event_handler.py +153 -54
  44. klaude_code/ui/modes/repl/renderer.py +6 -4
  45. klaude_code/ui/renderers/developer.py +35 -25
  46. klaude_code/ui/renderers/metadata.py +68 -30
  47. klaude_code/ui/renderers/tools.py +53 -87
  48. klaude_code/ui/rich/markdown.py +5 -5
  49. {klaude_code-1.2.7.dist-info → klaude_code-1.2.9.dist-info}/METADATA +1 -1
  50. {klaude_code-1.2.7.dist-info → klaude_code-1.2.9.dist-info}/RECORD +52 -49
  51. {klaude_code-1.2.7.dist-info → klaude_code-1.2.9.dist-info}/WHEEL +0 -0
  52. {klaude_code-1.2.7.dist-info → klaude_code-1.2.9.dist-info}/entry_points.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  from collections.abc import AsyncGenerator
2
- from typing import Literal, override
2
+ from typing import override
3
3
 
4
4
  import httpx
5
5
  import openai
@@ -7,7 +7,7 @@ import openai
7
7
  from klaude_code.llm.client import LLMClientABC, call_with_logged_payload
8
8
  from klaude_code.llm.input_common import apply_config_defaults
9
9
  from klaude_code.llm.openai_compatible.input import convert_tool_schema
10
- from klaude_code.llm.openai_compatible.tool_call_accumulator import BasicToolCallAccumulator, ToolCallAccumulatorABC
10
+ from klaude_code.llm.openai_compatible.stream_processor import StreamStateManager
11
11
  from klaude_code.llm.openrouter.input import convert_history_to_input, is_claude_model
12
12
  from klaude_code.llm.openrouter.reasoning_handler import ReasoningDetail, ReasoningStreamHandler
13
13
  from klaude_code.llm.registry import register
@@ -77,36 +77,14 @@ class OpenRouterClient(LLMClientABC):
77
77
  extra_headers=extra_headers, # pyright: ignore[reportUnknownArgumentType]
78
78
  )
79
79
 
80
- stage: Literal["waiting", "reasoning", "assistant", "tool", "done"] = "waiting"
81
- response_id: str | None = None
82
- accumulated_content: list[str] = []
83
- accumulated_tool_calls: ToolCallAccumulatorABC = BasicToolCallAccumulator()
84
- emitted_tool_start_indices: set[int] = set()
85
80
  reasoning_handler = ReasoningStreamHandler(
86
81
  param_model=str(param.model),
87
- response_id=response_id,
82
+ response_id=None,
83
+ )
84
+ state = StreamStateManager(
85
+ param_model=str(param.model),
86
+ reasoning_flusher=reasoning_handler.flush,
88
87
  )
89
-
90
- def flush_reasoning_items() -> list[model.ConversationItem]:
91
- return reasoning_handler.flush()
92
-
93
- def flush_assistant_items() -> list[model.ConversationItem]:
94
- nonlocal accumulated_content
95
- if len(accumulated_content) == 0:
96
- return []
97
- item = model.AssistantMessageItem(
98
- content="".join(accumulated_content),
99
- response_id=response_id,
100
- )
101
- accumulated_content = []
102
- return [item]
103
-
104
- def flush_tool_call_items() -> list[model.ToolCallItem]:
105
- nonlocal accumulated_tool_calls
106
- items: list[model.ToolCallItem] = accumulated_tool_calls.get()
107
- if items:
108
- accumulated_tool_calls.chunks_by_step = [] # pyright: ignore[reportAttributeAccessIssue]
109
- return items
110
88
 
111
89
  try:
112
90
  async for event in await stream:
@@ -115,11 +93,10 @@ class OpenRouterClient(LLMClientABC):
115
93
  style="blue",
116
94
  debug_type=DebugType.LLM_STREAM,
117
95
  )
118
- if not response_id and event.id:
119
- response_id = event.id
120
- reasoning_handler.set_response_id(response_id)
121
- accumulated_tool_calls.response_id = response_id
122
- yield model.StartItem(response_id=response_id)
96
+ if not state.response_id and event.id:
97
+ state.set_response_id(event.id)
98
+ reasoning_handler.set_response_id(event.id)
99
+ yield model.StartItem(response_id=event.id)
123
100
  if (
124
101
  event.usage is not None and event.usage.completion_tokens is not None # pyright: ignore[reportUnnecessaryComparison]
125
102
  ): # gcp gemini will return None usage field
@@ -140,7 +117,7 @@ class OpenRouterClient(LLMClientABC):
140
117
  try:
141
118
  reasoning_detail = ReasoningDetail.model_validate(item)
142
119
  metadata_tracker.record_token()
143
- stage = "reasoning"
120
+ state.stage = "reasoning"
144
121
  for conversation_item in reasoning_handler.on_detail(reasoning_detail):
145
122
  yield conversation_item
146
123
  except Exception as e:
@@ -148,53 +125,46 @@ class OpenRouterClient(LLMClientABC):
148
125
 
149
126
  # Assistant
150
127
  if delta.content and (
151
- stage == "assistant" or delta.content.strip()
128
+ state.stage == "assistant" or delta.content.strip()
152
129
  ): # Process all content in assistant stage, filter empty content in reasoning stage
153
130
  metadata_tracker.record_token()
154
- if stage == "reasoning":
155
- for item in flush_reasoning_items():
131
+ if state.stage == "reasoning":
132
+ for item in state.flush_reasoning():
156
133
  yield item
157
- stage = "assistant"
158
- accumulated_content.append(delta.content)
134
+ state.stage = "assistant"
135
+ state.accumulated_content.append(delta.content)
159
136
  yield model.AssistantMessageDelta(
160
137
  content=delta.content,
161
- response_id=response_id,
138
+ response_id=state.response_id,
162
139
  )
163
140
 
164
141
  # Tool
165
142
  if delta.tool_calls and len(delta.tool_calls) > 0:
166
143
  metadata_tracker.record_token()
167
- if stage == "reasoning":
168
- for item in flush_reasoning_items():
144
+ if state.stage == "reasoning":
145
+ for item in state.flush_reasoning():
169
146
  yield item
170
- elif stage == "assistant":
171
- for item in flush_assistant_items():
147
+ elif state.stage == "assistant":
148
+ for item in state.flush_assistant():
172
149
  yield item
173
- stage = "tool"
150
+ state.stage = "tool"
174
151
  # Emit ToolCallStartItem for new tool calls
175
152
  for tc in delta.tool_calls:
176
- if tc.index not in emitted_tool_start_indices and tc.function and tc.function.name:
177
- emitted_tool_start_indices.add(tc.index)
153
+ if tc.index not in state.emitted_tool_start_indices and tc.function and tc.function.name:
154
+ state.emitted_tool_start_indices.add(tc.index)
178
155
  yield model.ToolCallStartItem(
179
- response_id=response_id,
156
+ response_id=state.response_id,
180
157
  call_id=tc.id or "",
181
158
  name=tc.function.name,
182
159
  )
183
- accumulated_tool_calls.add(delta.tool_calls)
160
+ state.accumulated_tool_calls.add(delta.tool_calls)
184
161
 
185
162
  except (openai.OpenAIError, httpx.HTTPError) as e:
186
163
  yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
187
164
 
188
165
  # Finalize
189
- for item in flush_reasoning_items():
190
- yield item
191
-
192
- for item in flush_assistant_items():
166
+ for item in state.flush_all():
193
167
  yield item
194
168
 
195
- if stage == "tool":
196
- for tool_call_item in flush_tool_call_items():
197
- yield tool_call_item
198
-
199
- metadata_tracker.set_response_id(response_id)
169
+ metadata_tracker.set_response_id(state.response_id)
200
170
  yield metadata_tracker.finalize()
@@ -7,9 +7,9 @@
7
7
  # pyright: reportGeneralTypeIssues=false
8
8
 
9
9
  from openai.types import chat
10
- from openai.types.chat import ChatCompletionContentPartParam
11
10
 
12
- from klaude_code.llm.input_common import AssistantGroup, ToolGroup, UserGroup, merge_reminder_text, parse_message_groups
11
+ from klaude_code.llm.input_common import AssistantGroup, ToolGroup, UserGroup, parse_message_groups
12
+ from klaude_code.llm.openai_compatible.input import tool_group_to_openai_message, user_group_to_openai_message
13
13
  from klaude_code.protocol import model
14
14
 
15
15
 
@@ -25,29 +25,6 @@ def is_gemini_model(model_name: str | None) -> bool:
25
25
  return model_name is not None and model_name.startswith("google/gemini")
26
26
 
27
27
 
28
- def _user_group_to_message(group: UserGroup) -> chat.ChatCompletionMessageParam:
29
- parts: list[ChatCompletionContentPartParam] = []
30
- for text in group.text_parts:
31
- parts.append({"type": "text", "text": text + "\n"})
32
- for image in group.images:
33
- parts.append({"type": "image_url", "image_url": {"url": image.image_url.url}})
34
- if not parts:
35
- parts.append({"type": "text", "text": ""})
36
- return {"role": "user", "content": parts}
37
-
38
-
39
- def _tool_group_to_message(group: ToolGroup) -> chat.ChatCompletionMessageParam:
40
- merged_text = merge_reminder_text(
41
- group.tool_result.output or "<system-reminder>Tool ran without output or errors</system-reminder>",
42
- group.reminder_texts,
43
- )
44
- return {
45
- "role": "tool",
46
- "content": [{"type": "text", "text": merged_text}],
47
- "tool_call_id": group.tool_result.call_id,
48
- }
49
-
50
-
51
28
  def _assistant_group_to_message(group: AssistantGroup, model_name: str | None) -> chat.ChatCompletionMessageParam:
52
29
  assistant_message: dict[str, object] = {"role": "assistant"}
53
30
 
@@ -150,9 +127,9 @@ def convert_history_to_input(
150
127
  for group in parse_message_groups(history):
151
128
  match group:
152
129
  case UserGroup():
153
- messages.append(_user_group_to_message(group))
130
+ messages.append(user_group_to_openai_message(group))
154
131
  case ToolGroup():
155
- messages.append(_tool_group_to_message(group))
132
+ messages.append(tool_group_to_openai_message(group))
156
133
  case AssistantGroup():
157
134
  messages.append(_assistant_group_to_message(group, model_name))
158
135
 
@@ -1,17 +1,17 @@
1
1
  import json
2
- import time
3
2
  from collections.abc import AsyncGenerator
4
3
  from typing import TYPE_CHECKING, override
5
4
 
6
5
  import httpx
7
- from openai import AsyncAzureOpenAI, AsyncOpenAI, RateLimitError
6
+ import openai
7
+ from openai import AsyncAzureOpenAI, AsyncOpenAI
8
8
  from openai.types import responses
9
9
 
10
10
  from klaude_code.llm.client import LLMClientABC, call_with_logged_payload
11
11
  from klaude_code.llm.input_common import apply_config_defaults
12
12
  from klaude_code.llm.registry import register
13
13
  from klaude_code.llm.responses.input import convert_history_to_input, convert_tool_schema
14
- from klaude_code.llm.usage import calculate_cost
14
+ from klaude_code.llm.usage import MetadataTracker, convert_responses_usage
15
15
  from klaude_code.protocol import llm_param, model
16
16
  from klaude_code.trace import DebugType, log_debug
17
17
 
@@ -23,12 +23,9 @@ if TYPE_CHECKING:
23
23
  async def parse_responses_stream(
24
24
  stream: "AsyncStream[ResponseStreamEvent]",
25
25
  param: llm_param.LLMCallParameter,
26
- cost_config: llm_param.Cost | None,
27
- request_start_time: float,
26
+ metadata_tracker: MetadataTracker,
28
27
  ) -> AsyncGenerator[model.ConversationItem, None]:
29
28
  """Parse OpenAI Responses API stream events into ConversationItems."""
30
- first_token_time: float | None = None
31
- last_token_time: float | None = None
32
29
  response_id: str | None = None
33
30
 
34
31
  try:
@@ -51,9 +48,7 @@ async def parse_responses_stream(
51
48
  model=str(param.model),
52
49
  )
53
50
  case responses.ResponseTextDeltaEvent() as event:
54
- if first_token_time is None:
55
- first_token_time = time.time()
56
- last_token_time = time.time()
51
+ metadata_tracker.record_token()
57
52
  yield model.AssistantMessageDelta(content=event.delta, response_id=response_id)
58
53
  case responses.ResponseOutputItemAddedEvent() as event:
59
54
  if isinstance(event.item, responses.ResponseFunctionToolCall):
@@ -85,9 +80,7 @@ async def parse_responses_stream(
85
80
  response_id=response_id,
86
81
  )
87
82
  case responses.ResponseFunctionToolCall() as item:
88
- if first_token_time is None:
89
- first_token_time = time.time()
90
- last_token_time = time.time()
83
+ metadata_tracker.record_token()
91
84
  yield model.ToolCallItem(
92
85
  name=item.name,
93
86
  arguments=item.arguments.strip(),
@@ -98,49 +91,22 @@ async def parse_responses_stream(
98
91
  case _:
99
92
  pass
100
93
  case responses.ResponseCompletedEvent() as event:
101
- usage: model.Usage | None = None
102
94
  error_reason: str | None = None
103
95
  if event.response.incomplete_details is not None:
104
96
  error_reason = event.response.incomplete_details.reason
105
97
  if event.response.usage is not None:
106
- total_tokens = event.response.usage.total_tokens
107
- context_usage_percent = (
108
- (total_tokens / param.context_limit) * 100 if param.context_limit else None
109
- )
110
-
111
- throughput_tps: float | None = None
112
- first_token_latency_ms: float | None = None
113
-
114
- if first_token_time is not None:
115
- first_token_latency_ms = (first_token_time - request_start_time) * 1000
116
-
117
- if (
118
- first_token_time is not None
119
- and last_token_time is not None
120
- and event.response.usage.output_tokens > 0
121
- ):
122
- time_duration = last_token_time - first_token_time
123
- if time_duration >= 0.15:
124
- throughput_tps = event.response.usage.output_tokens / time_duration
125
-
126
- usage = model.Usage(
98
+ usage = convert_responses_usage(
127
99
  input_tokens=event.response.usage.input_tokens,
100
+ output_tokens=event.response.usage.output_tokens,
128
101
  cached_tokens=event.response.usage.input_tokens_details.cached_tokens,
129
102
  reasoning_tokens=event.response.usage.output_tokens_details.reasoning_tokens,
130
- output_tokens=event.response.usage.output_tokens,
131
- total_tokens=total_tokens,
132
- context_usage_percent=context_usage_percent,
133
- throughput_tps=throughput_tps,
134
- first_token_latency_ms=first_token_latency_ms,
103
+ total_tokens=event.response.usage.total_tokens,
104
+ context_limit=param.context_limit,
135
105
  )
136
- calculate_cost(usage, cost_config)
137
- yield model.ResponseMetadataItem(
138
- usage=usage,
139
- response_id=response_id,
140
- model_name=str(param.model),
141
- status=event.response.status,
142
- error_reason=error_reason,
143
- )
106
+ metadata_tracker.set_usage(usage)
107
+ metadata_tracker.set_model_name(str(param.model))
108
+ metadata_tracker.set_response_id(response_id)
109
+ yield metadata_tracker.finalize()
144
110
  if event.response.status != "completed":
145
111
  error_message = f"LLM response finished with status '{event.response.status}'"
146
112
  if error_reason:
@@ -159,7 +125,7 @@ async def parse_responses_stream(
159
125
  style="red",
160
126
  debug_type=DebugType.LLM_STREAM,
161
127
  )
162
- except RateLimitError as e:
128
+ except (openai.OpenAIError, httpx.HTTPError) as e:
163
129
  yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
164
130
 
165
131
 
@@ -193,39 +159,43 @@ class ResponsesClient(LLMClientABC):
193
159
  async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
194
160
  param = apply_config_defaults(param, self.get_llm_config())
195
161
 
196
- request_start_time = time.time()
162
+ metadata_tracker = MetadataTracker(cost_config=self._config.cost)
197
163
 
198
164
  inputs = convert_history_to_input(param.input, param.model)
199
165
  tools = convert_tool_schema(param.tools)
200
166
 
201
- stream = await call_with_logged_payload(
202
- self.client.responses.create,
203
- model=str(param.model),
204
- tool_choice="auto",
205
- parallel_tool_calls=True,
206
- include=[
207
- "reasoning.encrypted_content",
208
- ],
209
- store=param.store,
210
- previous_response_id=param.previous_response_id,
211
- stream=True,
212
- temperature=param.temperature,
213
- max_output_tokens=param.max_tokens,
214
- input=inputs,
215
- instructions=param.system,
216
- tools=tools,
217
- text={
218
- "verbosity": param.verbosity,
219
- },
220
- prompt_cache_key=param.session_id or "",
221
- reasoning={
222
- "effort": param.thinking.reasoning_effort,
223
- "summary": param.thinking.reasoning_summary,
224
- }
225
- if param.thinking and param.thinking.reasoning_effort
226
- else None,
227
- extra_headers={"extra": json.dumps({"session_id": param.session_id})},
228
- )
167
+ try:
168
+ stream = await call_with_logged_payload(
169
+ self.client.responses.create,
170
+ model=str(param.model),
171
+ tool_choice="auto",
172
+ parallel_tool_calls=True,
173
+ include=[
174
+ "reasoning.encrypted_content",
175
+ ],
176
+ store=param.store,
177
+ previous_response_id=param.previous_response_id,
178
+ stream=True,
179
+ temperature=param.temperature,
180
+ max_output_tokens=param.max_tokens,
181
+ input=inputs,
182
+ instructions=param.system,
183
+ tools=tools,
184
+ text={
185
+ "verbosity": param.verbosity,
186
+ },
187
+ prompt_cache_key=param.session_id or "",
188
+ reasoning={
189
+ "effort": param.thinking.reasoning_effort,
190
+ "summary": param.thinking.reasoning_summary,
191
+ }
192
+ if param.thinking and param.thinking.reasoning_effort
193
+ else None,
194
+ extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
195
+ )
196
+ except (openai.OpenAIError, httpx.HTTPError) as e:
197
+ yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
198
+ return
229
199
 
230
- async for item in parse_responses_stream(stream, param, self._config.cost, request_start_time):
200
+ async for item in parse_responses_stream(stream, param, metadata_tracker):
231
201
  yield item
klaude_code/llm/usage.py CHANGED
@@ -27,9 +27,6 @@ def calculate_cost(usage: model.Usage, cost_config: llm_param.Cost | None) -> No
27
27
  # Cache read cost
28
28
  usage.cache_read_cost = (usage.cached_tokens / 1_000_000) * cost_config.cache_read
29
29
 
30
- # Total cost
31
- usage.total_cost = usage.input_cost + usage.output_cost + usage.cache_read_cost
32
-
33
30
 
34
31
  class MetadataTracker:
35
32
  """Tracks timing and metadata for LLM responses."""
@@ -96,17 +93,61 @@ class MetadataTracker:
96
93
 
97
94
 
98
95
  def convert_usage(usage: openai.types.CompletionUsage, context_limit: int | None = None) -> model.Usage:
99
- """Convert OpenAI CompletionUsage to internal Usage model."""
100
- total_tokens = usage.total_tokens
101
- context_usage_percent = (total_tokens / context_limit) * 100 if context_limit else None
96
+ """Convert OpenAI CompletionUsage to internal Usage model.
97
+
98
+ context_window_size is set to total_tokens from the API response,
99
+ representing the actual context window usage for this turn.
100
+ """
102
101
  return model.Usage(
103
102
  input_tokens=usage.prompt_tokens,
104
103
  cached_tokens=(usage.prompt_tokens_details.cached_tokens if usage.prompt_tokens_details else 0) or 0,
105
104
  reasoning_tokens=(usage.completion_tokens_details.reasoning_tokens if usage.completion_tokens_details else 0)
106
105
  or 0,
107
106
  output_tokens=usage.completion_tokens,
108
- total_tokens=total_tokens,
109
- context_usage_percent=context_usage_percent,
110
- throughput_tps=None,
111
- first_token_latency_ms=None,
107
+ context_window_size=usage.total_tokens,
108
+ context_limit=context_limit,
109
+ )
110
+
111
+
112
+ def convert_anthropic_usage(
113
+ input_tokens: int,
114
+ output_tokens: int,
115
+ cached_tokens: int,
116
+ context_limit: int | None = None,
117
+ ) -> model.Usage:
118
+ """Convert Anthropic usage data to internal Usage model.
119
+
120
+ context_window_size is computed from input + cached + output tokens,
121
+ representing the actual context window usage for this turn.
122
+ """
123
+ context_window_size = input_tokens + cached_tokens + output_tokens
124
+ return model.Usage(
125
+ input_tokens=input_tokens,
126
+ output_tokens=output_tokens,
127
+ cached_tokens=cached_tokens,
128
+ context_window_size=context_window_size,
129
+ context_limit=context_limit,
130
+ )
131
+
132
+
133
+ def convert_responses_usage(
134
+ input_tokens: int,
135
+ output_tokens: int,
136
+ cached_tokens: int,
137
+ reasoning_tokens: int,
138
+ total_tokens: int,
139
+ context_limit: int | None = None,
140
+ ) -> model.Usage:
141
+ """Convert OpenAI Responses API usage data to internal Usage model.
142
+
143
+ context_window_size is set to total_tokens from the API response,
144
+ representing the actual context window usage for this turn.
145
+ """
146
+ return model.Usage(
147
+ input_tokens=input_tokens,
148
+ output_tokens=output_tokens,
149
+ cached_tokens=cached_tokens,
150
+ reasoning_tokens=reasoning_tokens,
151
+ context_window_size=total_tokens,
152
+ context_limit=context_limit,
112
153
  )
@@ -12,6 +12,7 @@ class CommandName(str, Enum):
12
12
  TERMINAL_SETUP = "terminal-setup"
13
13
  EXPORT = "export"
14
14
  STATUS = "status"
15
+ RELEASE_NOTES = "release-notes"
15
16
  # PLAN and DOC are dynamically registered now, but kept here if needed for reference
16
17
  # or we can remove them if no code explicitly imports them.
17
18
  # PLAN = "plan"
@@ -91,15 +91,23 @@ class ToolResultEvent(BaseModel):
91
91
  ui_extra: model.ToolResultUIExtra | None = None
92
92
  status: Literal["success", "error"]
93
93
  is_replay: bool = False
94
+ task_metadata: model.TaskMetadata | None = None # Sub-agent task metadata
94
95
 
95
96
 
96
97
  class ResponseMetadataEvent(BaseModel):
97
- """Showing model name, usage tokens, task duration, and turn count."""
98
+ """Internal event for turn-level metadata. Not exposed to UI directly."""
98
99
 
99
100
  session_id: str
100
101
  metadata: model.ResponseMetadataItem
101
102
 
102
103
 
104
+ class TaskMetadataEvent(BaseModel):
105
+ """Task-level aggregated metadata for UI display."""
106
+
107
+ session_id: str
108
+ metadata: model.TaskMetadataItem
109
+
110
+
103
111
  class UserMessageEvent(BaseModel):
104
112
  session_id: str
105
113
  content: str
@@ -127,9 +135,10 @@ HistoryItemEvent = (
127
135
  | ToolCallEvent
128
136
  | ToolResultEvent
129
137
  | UserMessageEvent
130
- | ResponseMetadataEvent
138
+ | TaskMetadataEvent
131
139
  | InterruptEvent
132
140
  | DeveloperMessageEvent
141
+ | ErrorEvent
133
142
  )
134
143
 
135
144
 
@@ -149,6 +158,7 @@ Event = (
149
158
  | ToolCallEvent
150
159
  | ToolResultEvent
151
160
  | ResponseMetadataEvent
161
+ | TaskMetadataEvent
152
162
  | ReplayHistoryEvent
153
163
  | ErrorEvent
154
164
  | EndEvent