klaude-code 1.2.8__py3-none-any.whl → 1.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. klaude_code/auth/codex/__init__.py +1 -1
  2. klaude_code/command/__init__.py +2 -0
  3. klaude_code/command/prompt-deslop.md +14 -0
  4. klaude_code/command/release_notes_cmd.py +86 -0
  5. klaude_code/command/status_cmd.py +92 -54
  6. klaude_code/core/agent.py +13 -19
  7. klaude_code/core/manager/sub_agent_manager.py +5 -1
  8. klaude_code/core/prompt.py +38 -28
  9. klaude_code/core/reminders.py +4 -4
  10. klaude_code/core/task.py +59 -40
  11. klaude_code/core/tool/__init__.py +2 -0
  12. klaude_code/core/tool/file/apply_patch_tool.py +1 -1
  13. klaude_code/core/tool/file/edit_tool.py +1 -1
  14. klaude_code/core/tool/file/multi_edit_tool.py +1 -1
  15. klaude_code/core/tool/file/write_tool.py +1 -1
  16. klaude_code/core/tool/memory/memory_tool.py +2 -2
  17. klaude_code/core/tool/sub_agent_tool.py +2 -1
  18. klaude_code/core/tool/todo/todo_write_tool.py +1 -1
  19. klaude_code/core/tool/todo/update_plan_tool.py +1 -1
  20. klaude_code/core/tool/tool_context.py +21 -4
  21. klaude_code/core/tool/tool_runner.py +5 -8
  22. klaude_code/core/tool/web/mermaid_tool.py +1 -4
  23. klaude_code/core/turn.py +40 -37
  24. klaude_code/llm/anthropic/client.py +13 -44
  25. klaude_code/llm/client.py +1 -1
  26. klaude_code/llm/codex/client.py +4 -3
  27. klaude_code/llm/input_common.py +0 -6
  28. klaude_code/llm/openai_compatible/client.py +28 -72
  29. klaude_code/llm/openai_compatible/input.py +6 -4
  30. klaude_code/llm/openai_compatible/stream_processor.py +82 -0
  31. klaude_code/llm/openrouter/client.py +29 -59
  32. klaude_code/llm/openrouter/input.py +4 -27
  33. klaude_code/llm/responses/client.py +15 -48
  34. klaude_code/llm/usage.py +51 -10
  35. klaude_code/protocol/commands.py +1 -0
  36. klaude_code/protocol/events.py +11 -2
  37. klaude_code/protocol/model.py +142 -24
  38. klaude_code/protocol/sub_agent.py +5 -1
  39. klaude_code/session/export.py +51 -27
  40. klaude_code/session/session.py +28 -16
  41. klaude_code/session/templates/export_session.html +4 -1
  42. klaude_code/ui/modes/repl/__init__.py +1 -5
  43. klaude_code/ui/modes/repl/event_handler.py +153 -54
  44. klaude_code/ui/modes/repl/renderer.py +4 -4
  45. klaude_code/ui/renderers/developer.py +35 -25
  46. klaude_code/ui/renderers/metadata.py +68 -30
  47. klaude_code/ui/renderers/tools.py +53 -87
  48. klaude_code/ui/rich/markdown.py +5 -5
  49. {klaude_code-1.2.8.dist-info → klaude_code-1.2.9.dist-info}/METADATA +1 -1
  50. {klaude_code-1.2.8.dist-info → klaude_code-1.2.9.dist-info}/RECORD +52 -49
  51. {klaude_code-1.2.8.dist-info → klaude_code-1.2.9.dist-info}/WHEEL +0 -0
  52. {klaude_code-1.2.8.dist-info → klaude_code-1.2.9.dist-info}/entry_points.txt +0 -0
@@ -7,9 +7,9 @@
7
7
  # pyright: reportGeneralTypeIssues=false
8
8
 
9
9
  from openai.types import chat
10
- from openai.types.chat import ChatCompletionContentPartParam
11
10
 
12
- from klaude_code.llm.input_common import AssistantGroup, ToolGroup, UserGroup, merge_reminder_text, parse_message_groups
11
+ from klaude_code.llm.input_common import AssistantGroup, ToolGroup, UserGroup, parse_message_groups
12
+ from klaude_code.llm.openai_compatible.input import tool_group_to_openai_message, user_group_to_openai_message
13
13
  from klaude_code.protocol import model
14
14
 
15
15
 
@@ -25,29 +25,6 @@ def is_gemini_model(model_name: str | None) -> bool:
25
25
  return model_name is not None and model_name.startswith("google/gemini")
26
26
 
27
27
 
28
- def _user_group_to_message(group: UserGroup) -> chat.ChatCompletionMessageParam:
29
- parts: list[ChatCompletionContentPartParam] = []
30
- for text in group.text_parts:
31
- parts.append({"type": "text", "text": text + "\n"})
32
- for image in group.images:
33
- parts.append({"type": "image_url", "image_url": {"url": image.image_url.url}})
34
- if not parts:
35
- parts.append({"type": "text", "text": ""})
36
- return {"role": "user", "content": parts}
37
-
38
-
39
- def _tool_group_to_message(group: ToolGroup) -> chat.ChatCompletionMessageParam:
40
- merged_text = merge_reminder_text(
41
- group.tool_result.output or "<system-reminder>Tool ran without output or errors</system-reminder>",
42
- group.reminder_texts,
43
- )
44
- return {
45
- "role": "tool",
46
- "content": [{"type": "text", "text": merged_text}],
47
- "tool_call_id": group.tool_result.call_id,
48
- }
49
-
50
-
51
28
  def _assistant_group_to_message(group: AssistantGroup, model_name: str | None) -> chat.ChatCompletionMessageParam:
52
29
  assistant_message: dict[str, object] = {"role": "assistant"}
53
30
 
@@ -150,9 +127,9 @@ def convert_history_to_input(
150
127
  for group in parse_message_groups(history):
151
128
  match group:
152
129
  case UserGroup():
153
- messages.append(_user_group_to_message(group))
130
+ messages.append(user_group_to_openai_message(group))
154
131
  case ToolGroup():
155
- messages.append(_tool_group_to_message(group))
132
+ messages.append(tool_group_to_openai_message(group))
156
133
  case AssistantGroup():
157
134
  messages.append(_assistant_group_to_message(group, model_name))
158
135
 
@@ -1,5 +1,4 @@
1
1
  import json
2
- import time
3
2
  from collections.abc import AsyncGenerator
4
3
  from typing import TYPE_CHECKING, override
5
4
 
@@ -12,7 +11,7 @@ from klaude_code.llm.client import LLMClientABC, call_with_logged_payload
12
11
  from klaude_code.llm.input_common import apply_config_defaults
13
12
  from klaude_code.llm.registry import register
14
13
  from klaude_code.llm.responses.input import convert_history_to_input, convert_tool_schema
15
- from klaude_code.llm.usage import calculate_cost
14
+ from klaude_code.llm.usage import MetadataTracker, convert_responses_usage
16
15
  from klaude_code.protocol import llm_param, model
17
16
  from klaude_code.trace import DebugType, log_debug
18
17
 
@@ -24,12 +23,9 @@ if TYPE_CHECKING:
24
23
  async def parse_responses_stream(
25
24
  stream: "AsyncStream[ResponseStreamEvent]",
26
25
  param: llm_param.LLMCallParameter,
27
- cost_config: llm_param.Cost | None,
28
- request_start_time: float,
26
+ metadata_tracker: MetadataTracker,
29
27
  ) -> AsyncGenerator[model.ConversationItem, None]:
30
28
  """Parse OpenAI Responses API stream events into ConversationItems."""
31
- first_token_time: float | None = None
32
- last_token_time: float | None = None
33
29
  response_id: str | None = None
34
30
 
35
31
  try:
@@ -52,9 +48,7 @@ async def parse_responses_stream(
52
48
  model=str(param.model),
53
49
  )
54
50
  case responses.ResponseTextDeltaEvent() as event:
55
- if first_token_time is None:
56
- first_token_time = time.time()
57
- last_token_time = time.time()
51
+ metadata_tracker.record_token()
58
52
  yield model.AssistantMessageDelta(content=event.delta, response_id=response_id)
59
53
  case responses.ResponseOutputItemAddedEvent() as event:
60
54
  if isinstance(event.item, responses.ResponseFunctionToolCall):
@@ -86,9 +80,7 @@ async def parse_responses_stream(
86
80
  response_id=response_id,
87
81
  )
88
82
  case responses.ResponseFunctionToolCall() as item:
89
- if first_token_time is None:
90
- first_token_time = time.time()
91
- last_token_time = time.time()
83
+ metadata_tracker.record_token()
92
84
  yield model.ToolCallItem(
93
85
  name=item.name,
94
86
  arguments=item.arguments.strip(),
@@ -99,47 +91,22 @@ async def parse_responses_stream(
99
91
  case _:
100
92
  pass
101
93
  case responses.ResponseCompletedEvent() as event:
102
- usage: model.Usage | None = None
103
94
  error_reason: str | None = None
104
95
  if event.response.incomplete_details is not None:
105
96
  error_reason = event.response.incomplete_details.reason
106
97
  if event.response.usage is not None:
107
- total_tokens = event.response.usage.total_tokens
108
- context_usage_percent = (
109
- (total_tokens / param.context_limit) * 100 if param.context_limit else None
110
- )
111
-
112
- throughput_tps: float | None = None
113
- first_token_latency_ms: float | None = None
114
-
115
- if first_token_time is not None:
116
- first_token_latency_ms = (first_token_time - request_start_time) * 1000
117
-
118
- if (
119
- first_token_time is not None
120
- and last_token_time is not None
121
- and event.response.usage.output_tokens > 0
122
- ):
123
- time_duration = last_token_time - first_token_time
124
- if time_duration >= 0.15:
125
- throughput_tps = event.response.usage.output_tokens / time_duration
126
-
127
- usage = model.Usage(
98
+ usage = convert_responses_usage(
128
99
  input_tokens=event.response.usage.input_tokens,
100
+ output_tokens=event.response.usage.output_tokens,
129
101
  cached_tokens=event.response.usage.input_tokens_details.cached_tokens,
130
102
  reasoning_tokens=event.response.usage.output_tokens_details.reasoning_tokens,
131
- output_tokens=event.response.usage.output_tokens,
132
- total_tokens=total_tokens,
133
- context_usage_percent=context_usage_percent,
134
- throughput_tps=throughput_tps,
135
- first_token_latency_ms=first_token_latency_ms,
103
+ total_tokens=event.response.usage.total_tokens,
104
+ context_limit=param.context_limit,
136
105
  )
137
- calculate_cost(usage, cost_config)
138
- yield model.ResponseMetadataItem(
139
- usage=usage,
140
- response_id=response_id,
141
- model_name=str(param.model),
142
- )
106
+ metadata_tracker.set_usage(usage)
107
+ metadata_tracker.set_model_name(str(param.model))
108
+ metadata_tracker.set_response_id(response_id)
109
+ yield metadata_tracker.finalize()
143
110
  if event.response.status != "completed":
144
111
  error_message = f"LLM response finished with status '{event.response.status}'"
145
112
  if error_reason:
@@ -192,7 +159,7 @@ class ResponsesClient(LLMClientABC):
192
159
  async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
193
160
  param = apply_config_defaults(param, self.get_llm_config())
194
161
 
195
- request_start_time = time.time()
162
+ metadata_tracker = MetadataTracker(cost_config=self._config.cost)
196
163
 
197
164
  inputs = convert_history_to_input(param.input, param.model)
198
165
  tools = convert_tool_schema(param.tools)
@@ -224,11 +191,11 @@ class ResponsesClient(LLMClientABC):
224
191
  }
225
192
  if param.thinking and param.thinking.reasoning_effort
226
193
  else None,
227
- extra_headers={"extra": json.dumps({"session_id": param.session_id})},
194
+ extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
228
195
  )
229
196
  except (openai.OpenAIError, httpx.HTTPError) as e:
230
197
  yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
231
198
  return
232
199
 
233
- async for item in parse_responses_stream(stream, param, self._config.cost, request_start_time):
200
+ async for item in parse_responses_stream(stream, param, metadata_tracker):
234
201
  yield item
klaude_code/llm/usage.py CHANGED
@@ -27,9 +27,6 @@ def calculate_cost(usage: model.Usage, cost_config: llm_param.Cost | None) -> No
27
27
  # Cache read cost
28
28
  usage.cache_read_cost = (usage.cached_tokens / 1_000_000) * cost_config.cache_read
29
29
 
30
- # Total cost
31
- usage.total_cost = usage.input_cost + usage.output_cost + usage.cache_read_cost
32
-
33
30
 
34
31
  class MetadataTracker:
35
32
  """Tracks timing and metadata for LLM responses."""
@@ -96,17 +93,61 @@ class MetadataTracker:
96
93
 
97
94
 
98
95
  def convert_usage(usage: openai.types.CompletionUsage, context_limit: int | None = None) -> model.Usage:
99
- """Convert OpenAI CompletionUsage to internal Usage model."""
100
- total_tokens = usage.total_tokens
101
- context_usage_percent = (total_tokens / context_limit) * 100 if context_limit else None
96
+ """Convert OpenAI CompletionUsage to internal Usage model.
97
+
98
+ context_window_size is set to total_tokens from the API response,
99
+ representing the actual context window usage for this turn.
100
+ """
102
101
  return model.Usage(
103
102
  input_tokens=usage.prompt_tokens,
104
103
  cached_tokens=(usage.prompt_tokens_details.cached_tokens if usage.prompt_tokens_details else 0) or 0,
105
104
  reasoning_tokens=(usage.completion_tokens_details.reasoning_tokens if usage.completion_tokens_details else 0)
106
105
  or 0,
107
106
  output_tokens=usage.completion_tokens,
108
- total_tokens=total_tokens,
109
- context_usage_percent=context_usage_percent,
110
- throughput_tps=None,
111
- first_token_latency_ms=None,
107
+ context_window_size=usage.total_tokens,
108
+ context_limit=context_limit,
109
+ )
110
+
111
+
112
+ def convert_anthropic_usage(
113
+ input_tokens: int,
114
+ output_tokens: int,
115
+ cached_tokens: int,
116
+ context_limit: int | None = None,
117
+ ) -> model.Usage:
118
+ """Convert Anthropic usage data to internal Usage model.
119
+
120
+ context_window_size is computed from input + cached + output tokens,
121
+ representing the actual context window usage for this turn.
122
+ """
123
+ context_window_size = input_tokens + cached_tokens + output_tokens
124
+ return model.Usage(
125
+ input_tokens=input_tokens,
126
+ output_tokens=output_tokens,
127
+ cached_tokens=cached_tokens,
128
+ context_window_size=context_window_size,
129
+ context_limit=context_limit,
130
+ )
131
+
132
+
133
+ def convert_responses_usage(
134
+ input_tokens: int,
135
+ output_tokens: int,
136
+ cached_tokens: int,
137
+ reasoning_tokens: int,
138
+ total_tokens: int,
139
+ context_limit: int | None = None,
140
+ ) -> model.Usage:
141
+ """Convert OpenAI Responses API usage data to internal Usage model.
142
+
143
+ context_window_size is set to total_tokens from the API response,
144
+ representing the actual context window usage for this turn.
145
+ """
146
+ return model.Usage(
147
+ input_tokens=input_tokens,
148
+ output_tokens=output_tokens,
149
+ cached_tokens=cached_tokens,
150
+ reasoning_tokens=reasoning_tokens,
151
+ context_window_size=total_tokens,
152
+ context_limit=context_limit,
112
153
  )
@@ -12,6 +12,7 @@ class CommandName(str, Enum):
12
12
  TERMINAL_SETUP = "terminal-setup"
13
13
  EXPORT = "export"
14
14
  STATUS = "status"
15
+ RELEASE_NOTES = "release-notes"
15
16
  # PLAN and DOC are dynamically registered now, but kept here if needed for reference
16
17
  # or we can remove them if no code explicitly imports them.
17
18
  # PLAN = "plan"
@@ -91,15 +91,23 @@ class ToolResultEvent(BaseModel):
91
91
  ui_extra: model.ToolResultUIExtra | None = None
92
92
  status: Literal["success", "error"]
93
93
  is_replay: bool = False
94
+ task_metadata: model.TaskMetadata | None = None # Sub-agent task metadata
94
95
 
95
96
 
96
97
  class ResponseMetadataEvent(BaseModel):
97
- """Showing model name, usage tokens, task duration, and turn count."""
98
+ """Internal event for turn-level metadata. Not exposed to UI directly."""
98
99
 
99
100
  session_id: str
100
101
  metadata: model.ResponseMetadataItem
101
102
 
102
103
 
104
+ class TaskMetadataEvent(BaseModel):
105
+ """Task-level aggregated metadata for UI display."""
106
+
107
+ session_id: str
108
+ metadata: model.TaskMetadataItem
109
+
110
+
103
111
  class UserMessageEvent(BaseModel):
104
112
  session_id: str
105
113
  content: str
@@ -127,7 +135,7 @@ HistoryItemEvent = (
127
135
  | ToolCallEvent
128
136
  | ToolResultEvent
129
137
  | UserMessageEvent
130
- | ResponseMetadataEvent
138
+ | TaskMetadataEvent
131
139
  | InterruptEvent
132
140
  | DeveloperMessageEvent
133
141
  | ErrorEvent
@@ -150,6 +158,7 @@ Event = (
150
158
  | ToolCallEvent
151
159
  | ToolResultEvent
152
160
  | ResponseMetadataEvent
161
+ | TaskMetadataEvent
153
162
  | ReplayHistoryEvent
154
163
  | ErrorEvent
155
164
  | EndEvent
@@ -1,8 +1,8 @@
1
1
  from datetime import datetime
2
2
  from enum import Enum
3
- from typing import Literal
3
+ from typing import Annotated, Literal
4
4
 
5
- from pydantic import BaseModel, Field
5
+ from pydantic import BaseModel, ConfigDict, Field, computed_field
6
6
 
7
7
  from klaude_code.protocol.commands import CommandName
8
8
  from klaude_code.protocol.tools import SubAgentType
@@ -12,12 +12,16 @@ TodoStatusType = Literal["pending", "in_progress", "completed"]
12
12
 
13
13
 
14
14
  class Usage(BaseModel):
15
+ # Token Usage (primary state)
15
16
  input_tokens: int = 0
16
17
  cached_tokens: int = 0
17
18
  reasoning_tokens: int = 0
18
19
  output_tokens: int = 0
19
- total_tokens: int = 0
20
- context_usage_percent: float | None = None
20
+
21
+ # Context window tracking
22
+ context_window_size: int | None = None # Peak total_tokens seen (for context usage display)
23
+ context_limit: int | None = None # Model's context limit
24
+
21
25
  throughput_tps: float | None = None
22
26
  first_token_latency_ms: float | None = None
23
27
 
@@ -25,14 +29,39 @@ class Usage(BaseModel):
25
29
  input_cost: float | None = None # Cost for non-cached input tokens
26
30
  output_cost: float | None = None # Cost for output tokens (including reasoning)
27
31
  cache_read_cost: float | None = None # Cost for cached tokens
28
- total_cost: float | None = None # Total cost (input + output + cache_read)
29
32
  currency: str = "USD" # Currency for cost display (USD or CNY)
30
33
 
34
+ @computed_field # type: ignore[prop-decorator]
35
+ @property
36
+ def total_tokens(self) -> int:
37
+ """Total tokens computed from input + output tokens."""
38
+ return self.input_tokens + self.output_tokens
39
+
40
+ @computed_field # type: ignore[prop-decorator]
41
+ @property
42
+ def total_cost(self) -> float | None:
43
+ """Total cost computed from input + output + cache_read costs."""
44
+ costs = [self.input_cost, self.output_cost, self.cache_read_cost]
45
+ non_none = [c for c in costs if c is not None]
46
+ return sum(non_none) if non_none else None
47
+
48
+ @computed_field # type: ignore[prop-decorator]
49
+ @property
50
+ def context_usage_percent(self) -> float | None:
51
+ """Context usage percentage computed from context_window_size / context_limit."""
52
+ if self.context_limit is None or self.context_limit <= 0:
53
+ return None
54
+ if self.context_window_size is None:
55
+ return None
56
+ return (self.context_window_size / self.context_limit) * 100
57
+
31
58
 
32
59
  class TodoItem(BaseModel):
60
+ model_config = ConfigDict(populate_by_name=True)
61
+
33
62
  content: str
34
63
  status: TodoStatusType
35
- activeForm: str = ""
64
+ active_form: str = Field(default="", alias="activeForm")
36
65
 
37
66
 
38
67
  class TodoUIExtra(BaseModel):
@@ -40,43 +69,55 @@ class TodoUIExtra(BaseModel):
40
69
  new_completed: list[str]
41
70
 
42
71
 
43
- class ToolResultUIExtraType(str, Enum):
44
- DIFF_TEXT = "diff_text"
45
- TODO_LIST = "todo_list"
46
- SESSION_ID = "session_id"
47
- MERMAID_LINK = "mermaid_link"
48
- TRUNCATION = "truncation"
49
- SESSION_STATUS = "session_status"
50
-
51
-
52
72
  class ToolSideEffect(str, Enum):
53
73
  TODO_CHANGE = "todo_change"
54
74
 
55
75
 
76
+ # Discriminated union types for ToolResultUIExtra
77
+ class DiffTextUIExtra(BaseModel):
78
+ type: Literal["diff_text"] = "diff_text"
79
+ diff_text: str
80
+
81
+
82
+ class TodoListUIExtra(BaseModel):
83
+ type: Literal["todo_list"] = "todo_list"
84
+ todo_list: TodoUIExtra
85
+
86
+
87
+ class SessionIdUIExtra(BaseModel):
88
+ type: Literal["session_id"] = "session_id"
89
+ session_id: str
90
+
91
+
56
92
  class MermaidLinkUIExtra(BaseModel):
93
+ type: Literal["mermaid_link"] = "mermaid_link"
57
94
  link: str
58
95
  line_count: int
59
96
 
60
97
 
61
98
  class TruncationUIExtra(BaseModel):
99
+ type: Literal["truncation"] = "truncation"
62
100
  saved_file_path: str
63
101
  original_length: int
64
102
  truncated_length: int
65
103
 
66
104
 
67
105
  class SessionStatusUIExtra(BaseModel):
106
+ type: Literal["session_status"] = "session_status"
68
107
  usage: "Usage"
69
108
  task_count: int
109
+ by_model: list["TaskMetadata"] = []
70
110
 
71
111
 
72
- class ToolResultUIExtra(BaseModel):
73
- type: ToolResultUIExtraType
74
- diff_text: str | None = None
75
- todo_list: TodoUIExtra | None = None
76
- session_id: str | None = None
77
- mermaid_link: MermaidLinkUIExtra | None = None
78
- truncation: TruncationUIExtra | None = None
79
- session_status: SessionStatusUIExtra | None = None
112
+ ToolResultUIExtra = Annotated[
113
+ DiffTextUIExtra
114
+ | TodoListUIExtra
115
+ | SessionIdUIExtra
116
+ | MermaidLinkUIExtra
117
+ | TruncationUIExtra
118
+ | SessionStatusUIExtra,
119
+ Field(discriminator="type"),
120
+ ]
80
121
 
81
122
 
82
123
  class AtPatternParseResult(BaseModel):
@@ -240,6 +281,7 @@ class ToolResultItem(BaseModel):
240
281
  ui_extra: ToolResultUIExtra | None = None # Extra data for UI display, e.g. diff render
241
282
  images: list[ImageURLPart] | None = None
242
283
  side_effects: list[ToolSideEffect] | None = None
284
+ task_metadata: "TaskMetadata | None" = None # Sub-agent task metadata for propagation to main agent
243
285
  created_at: datetime = Field(default_factory=datetime.now)
244
286
 
245
287
 
@@ -255,6 +297,8 @@ class StreamErrorItem(BaseModel):
255
297
 
256
298
 
257
299
  class ResponseMetadataItem(BaseModel):
300
+ """Metadata for a single LLM response (turn-level)."""
301
+
258
302
  response_id: str | None = None
259
303
  usage: Usage | None = None
260
304
  model_name: str = ""
@@ -263,6 +307,73 @@ class ResponseMetadataItem(BaseModel):
263
307
  created_at: datetime = Field(default_factory=datetime.now)
264
308
 
265
309
 
310
+ class TaskMetadata(BaseModel):
311
+ """Base metadata for a task execution (used by both main and sub-agents)."""
312
+
313
+ usage: Usage | None = None
314
+ model_name: str = ""
315
+ provider: str | None = None
316
+ task_duration_s: float | None = None
317
+
318
+ @staticmethod
319
+ def aggregate_by_model(metadata_list: list["TaskMetadata"]) -> list["TaskMetadata"]:
320
+ """Aggregate multiple TaskMetadata by (model_name, provider).
321
+
322
+ Returns a list sorted by total_cost descending.
323
+
324
+ Note: total_tokens and total_cost are now computed fields,
325
+ so we only accumulate the primary state fields here.
326
+ """
327
+ aggregated: dict[tuple[str, str | None], TaskMetadata] = {}
328
+
329
+ for meta in metadata_list:
330
+ if not meta.usage:
331
+ continue
332
+
333
+ key = (meta.model_name, meta.provider)
334
+ usage = meta.usage
335
+
336
+ if key not in aggregated:
337
+ aggregated[key] = TaskMetadata(
338
+ model_name=meta.model_name,
339
+ provider=meta.provider,
340
+ usage=Usage(currency=usage.currency),
341
+ )
342
+
343
+ agg = aggregated[key]
344
+ if agg.usage is None:
345
+ continue
346
+
347
+ # Accumulate primary token fields (total_tokens is computed)
348
+ agg.usage.input_tokens += usage.input_tokens
349
+ agg.usage.cached_tokens += usage.cached_tokens
350
+ agg.usage.reasoning_tokens += usage.reasoning_tokens
351
+ agg.usage.output_tokens += usage.output_tokens
352
+
353
+ # Accumulate cost components (total_cost is computed)
354
+ if usage.input_cost is not None:
355
+ agg.usage.input_cost = (agg.usage.input_cost or 0.0) + usage.input_cost
356
+ if usage.output_cost is not None:
357
+ agg.usage.output_cost = (agg.usage.output_cost or 0.0) + usage.output_cost
358
+ if usage.cache_read_cost is not None:
359
+ agg.usage.cache_read_cost = (agg.usage.cache_read_cost or 0.0) + usage.cache_read_cost
360
+
361
+ # Sort by total_cost descending
362
+ return sorted(
363
+ aggregated.values(),
364
+ key=lambda m: m.usage.total_cost if m.usage and m.usage.total_cost else 0.0,
365
+ reverse=True,
366
+ )
367
+
368
+
369
+ class TaskMetadataItem(BaseModel):
370
+ """Aggregated metadata for a complete task, stored in conversation history."""
371
+
372
+ main: TaskMetadata = Field(default_factory=TaskMetadata)
373
+ sub_agent_task_metadata: list[TaskMetadata] = Field(default_factory=lambda: list[TaskMetadata]())
374
+ created_at: datetime = Field(default_factory=datetime.now)
375
+
376
+
266
377
  MessageItem = (
267
378
  UserMessageItem
268
379
  | AssistantMessageItem
@@ -278,7 +389,14 @@ MessageItem = (
278
389
  StreamItem = AssistantMessageDelta
279
390
 
280
391
  ConversationItem = (
281
- StartItem | InterruptItem | StreamErrorItem | StreamItem | MessageItem | ResponseMetadataItem | ToolCallStartItem
392
+ StartItem
393
+ | InterruptItem
394
+ | StreamErrorItem
395
+ | StreamItem
396
+ | MessageItem
397
+ | ResponseMetadataItem
398
+ | TaskMetadataItem
399
+ | ToolCallStartItem
282
400
  )
283
401
 
284
402
 
@@ -1,10 +1,13 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from dataclasses import dataclass, field
4
- from typing import Any, Callable
4
+ from typing import TYPE_CHECKING, Any, Callable
5
5
 
6
6
  from klaude_code.protocol import tools
7
7
 
8
+ if TYPE_CHECKING:
9
+ from klaude_code.protocol import model
10
+
8
11
  AvailabilityPredicate = Callable[[str], bool]
9
12
  PromptBuilder = Callable[[dict[str, Any]], str]
10
13
 
@@ -14,6 +17,7 @@ class SubAgentResult:
14
17
  task_result: str
15
18
  session_id: str
16
19
  error: bool = False
20
+ task_metadata: model.TaskMetadata | None = None
17
21
 
18
22
 
19
23
  def _default_prompt_builder(args: dict[str, Any]) -> str: