klaude-code 1.2.8__py3-none-any.whl → 1.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. klaude_code/auth/codex/__init__.py +1 -1
  2. klaude_code/cli/main.py +12 -1
  3. klaude_code/cli/runtime.py +7 -11
  4. klaude_code/command/__init__.py +68 -21
  5. klaude_code/command/clear_cmd.py +6 -2
  6. klaude_code/command/command_abc.py +5 -2
  7. klaude_code/command/diff_cmd.py +5 -2
  8. klaude_code/command/export_cmd.py +7 -4
  9. klaude_code/command/help_cmd.py +6 -2
  10. klaude_code/command/model_cmd.py +5 -2
  11. klaude_code/command/prompt-deslop.md +14 -0
  12. klaude_code/command/prompt_command.py +8 -3
  13. klaude_code/command/refresh_cmd.py +6 -2
  14. klaude_code/command/registry.py +17 -5
  15. klaude_code/command/release_notes_cmd.py +89 -0
  16. klaude_code/command/status_cmd.py +98 -56
  17. klaude_code/command/terminal_setup_cmd.py +7 -4
  18. klaude_code/const/__init__.py +1 -1
  19. klaude_code/core/agent.py +66 -26
  20. klaude_code/core/executor.py +2 -2
  21. klaude_code/core/manager/agent_manager.py +6 -7
  22. klaude_code/core/manager/llm_clients.py +47 -22
  23. klaude_code/core/manager/llm_clients_builder.py +19 -7
  24. klaude_code/core/manager/sub_agent_manager.py +6 -2
  25. klaude_code/core/prompt.py +38 -28
  26. klaude_code/core/reminders.py +4 -7
  27. klaude_code/core/task.py +59 -40
  28. klaude_code/core/tool/__init__.py +2 -0
  29. klaude_code/core/tool/file/_utils.py +30 -0
  30. klaude_code/core/tool/file/apply_patch_tool.py +1 -1
  31. klaude_code/core/tool/file/edit_tool.py +6 -31
  32. klaude_code/core/tool/file/multi_edit_tool.py +7 -32
  33. klaude_code/core/tool/file/read_tool.py +6 -18
  34. klaude_code/core/tool/file/write_tool.py +6 -31
  35. klaude_code/core/tool/memory/__init__.py +5 -0
  36. klaude_code/core/tool/memory/memory_tool.py +2 -2
  37. klaude_code/core/tool/memory/skill_loader.py +2 -1
  38. klaude_code/core/tool/memory/skill_tool.py +13 -0
  39. klaude_code/core/tool/sub_agent_tool.py +2 -1
  40. klaude_code/core/tool/todo/todo_write_tool.py +1 -1
  41. klaude_code/core/tool/todo/update_plan_tool.py +1 -1
  42. klaude_code/core/tool/tool_context.py +21 -4
  43. klaude_code/core/tool/tool_runner.py +5 -8
  44. klaude_code/core/tool/web/mermaid_tool.py +1 -4
  45. klaude_code/core/turn.py +40 -37
  46. klaude_code/llm/__init__.py +2 -12
  47. klaude_code/llm/anthropic/client.py +14 -44
  48. klaude_code/llm/client.py +2 -2
  49. klaude_code/llm/codex/client.py +4 -3
  50. klaude_code/llm/input_common.py +0 -6
  51. klaude_code/llm/openai_compatible/client.py +31 -74
  52. klaude_code/llm/openai_compatible/input.py +6 -4
  53. klaude_code/llm/openai_compatible/stream_processor.py +82 -0
  54. klaude_code/llm/openrouter/client.py +32 -62
  55. klaude_code/llm/openrouter/input.py +4 -27
  56. klaude_code/llm/registry.py +33 -7
  57. klaude_code/llm/responses/client.py +16 -48
  58. klaude_code/llm/responses/input.py +1 -1
  59. klaude_code/llm/usage.py +61 -11
  60. klaude_code/protocol/commands.py +1 -0
  61. klaude_code/protocol/events.py +11 -2
  62. klaude_code/protocol/model.py +147 -24
  63. klaude_code/protocol/op.py +1 -0
  64. klaude_code/protocol/sub_agent.py +5 -1
  65. klaude_code/session/export.py +56 -32
  66. klaude_code/session/session.py +43 -21
  67. klaude_code/session/templates/export_session.html +4 -1
  68. klaude_code/ui/core/input.py +1 -1
  69. klaude_code/ui/modes/repl/__init__.py +1 -5
  70. klaude_code/ui/modes/repl/clipboard.py +5 -5
  71. klaude_code/ui/modes/repl/event_handler.py +153 -54
  72. klaude_code/ui/modes/repl/renderer.py +4 -4
  73. klaude_code/ui/renderers/developer.py +35 -25
  74. klaude_code/ui/renderers/metadata.py +68 -30
  75. klaude_code/ui/renderers/tools.py +53 -87
  76. klaude_code/ui/rich/markdown.py +5 -5
  77. klaude_code/ui/terminal/control.py +2 -2
  78. klaude_code/version.py +3 -3
  79. {klaude_code-1.2.8.dist-info → klaude_code-1.2.10.dist-info}/METADATA +1 -1
  80. {klaude_code-1.2.8.dist-info → klaude_code-1.2.10.dist-info}/RECORD +82 -78
  81. {klaude_code-1.2.8.dist-info → klaude_code-1.2.10.dist-info}/WHEEL +0 -0
  82. {klaude_code-1.2.8.dist-info → klaude_code-1.2.10.dist-info}/entry_points.txt +0 -0
@@ -1,5 +1,4 @@
1
1
  import json
2
- import time
3
2
  from collections.abc import AsyncGenerator
4
3
  from typing import TYPE_CHECKING, override
5
4
 
@@ -12,7 +11,7 @@ from klaude_code.llm.client import LLMClientABC, call_with_logged_payload
12
11
  from klaude_code.llm.input_common import apply_config_defaults
13
12
  from klaude_code.llm.registry import register
14
13
  from klaude_code.llm.responses.input import convert_history_to_input, convert_tool_schema
15
- from klaude_code.llm.usage import calculate_cost
14
+ from klaude_code.llm.usage import MetadataTracker, convert_responses_usage
16
15
  from klaude_code.protocol import llm_param, model
17
16
  from klaude_code.trace import DebugType, log_debug
18
17
 
@@ -24,12 +23,9 @@ if TYPE_CHECKING:
24
23
  async def parse_responses_stream(
25
24
  stream: "AsyncStream[ResponseStreamEvent]",
26
25
  param: llm_param.LLMCallParameter,
27
- cost_config: llm_param.Cost | None,
28
- request_start_time: float,
26
+ metadata_tracker: MetadataTracker,
29
27
  ) -> AsyncGenerator[model.ConversationItem, None]:
30
28
  """Parse OpenAI Responses API stream events into ConversationItems."""
31
- first_token_time: float | None = None
32
- last_token_time: float | None = None
33
29
  response_id: str | None = None
34
30
 
35
31
  try:
@@ -52,9 +48,7 @@ async def parse_responses_stream(
52
48
  model=str(param.model),
53
49
  )
54
50
  case responses.ResponseTextDeltaEvent() as event:
55
- if first_token_time is None:
56
- first_token_time = time.time()
57
- last_token_time = time.time()
51
+ metadata_tracker.record_token()
58
52
  yield model.AssistantMessageDelta(content=event.delta, response_id=response_id)
59
53
  case responses.ResponseOutputItemAddedEvent() as event:
60
54
  if isinstance(event.item, responses.ResponseFunctionToolCall):
@@ -86,9 +80,7 @@ async def parse_responses_stream(
86
80
  response_id=response_id,
87
81
  )
88
82
  case responses.ResponseFunctionToolCall() as item:
89
- if first_token_time is None:
90
- first_token_time = time.time()
91
- last_token_time = time.time()
83
+ metadata_tracker.record_token()
92
84
  yield model.ToolCallItem(
93
85
  name=item.name,
94
86
  arguments=item.arguments.strip(),
@@ -99,47 +91,23 @@ async def parse_responses_stream(
99
91
  case _:
100
92
  pass
101
93
  case responses.ResponseCompletedEvent() as event:
102
- usage: model.Usage | None = None
103
94
  error_reason: str | None = None
104
95
  if event.response.incomplete_details is not None:
105
96
  error_reason = event.response.incomplete_details.reason
106
97
  if event.response.usage is not None:
107
- total_tokens = event.response.usage.total_tokens
108
- context_usage_percent = (
109
- (total_tokens / param.context_limit) * 100 if param.context_limit else None
110
- )
111
-
112
- throughput_tps: float | None = None
113
- first_token_latency_ms: float | None = None
114
-
115
- if first_token_time is not None:
116
- first_token_latency_ms = (first_token_time - request_start_time) * 1000
117
-
118
- if (
119
- first_token_time is not None
120
- and last_token_time is not None
121
- and event.response.usage.output_tokens > 0
122
- ):
123
- time_duration = last_token_time - first_token_time
124
- if time_duration >= 0.15:
125
- throughput_tps = event.response.usage.output_tokens / time_duration
126
-
127
- usage = model.Usage(
98
+ usage = convert_responses_usage(
128
99
  input_tokens=event.response.usage.input_tokens,
100
+ output_tokens=event.response.usage.output_tokens,
129
101
  cached_tokens=event.response.usage.input_tokens_details.cached_tokens,
130
102
  reasoning_tokens=event.response.usage.output_tokens_details.reasoning_tokens,
131
- output_tokens=event.response.usage.output_tokens,
132
- total_tokens=total_tokens,
133
- context_usage_percent=context_usage_percent,
134
- throughput_tps=throughput_tps,
135
- first_token_latency_ms=first_token_latency_ms,
103
+ total_tokens=event.response.usage.total_tokens,
104
+ context_limit=param.context_limit,
105
+ max_tokens=param.max_tokens,
136
106
  )
137
- calculate_cost(usage, cost_config)
138
- yield model.ResponseMetadataItem(
139
- usage=usage,
140
- response_id=response_id,
141
- model_name=str(param.model),
142
- )
107
+ metadata_tracker.set_usage(usage)
108
+ metadata_tracker.set_model_name(str(param.model))
109
+ metadata_tracker.set_response_id(response_id)
110
+ yield metadata_tracker.finalize()
143
111
  if event.response.status != "completed":
144
112
  error_message = f"LLM response finished with status '{event.response.status}'"
145
113
  if error_reason:
@@ -192,7 +160,7 @@ class ResponsesClient(LLMClientABC):
192
160
  async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
193
161
  param = apply_config_defaults(param, self.get_llm_config())
194
162
 
195
- request_start_time = time.time()
163
+ metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
196
164
 
197
165
  inputs = convert_history_to_input(param.input, param.model)
198
166
  tools = convert_tool_schema(param.tools)
@@ -224,11 +192,11 @@ class ResponsesClient(LLMClientABC):
224
192
  }
225
193
  if param.thinking and param.thinking.reasoning_effort
226
194
  else None,
227
- extra_headers={"extra": json.dumps({"session_id": param.session_id})},
195
+ extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
228
196
  )
229
197
  except (openai.OpenAIError, httpx.HTTPError) as e:
230
198
  yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
231
199
  return
232
200
 
233
- async for item in parse_responses_stream(stream, param, self._config.cost, request_start_time):
201
+ async for item in parse_responses_stream(stream, param, metadata_tracker):
234
202
  yield item
@@ -34,7 +34,7 @@ def _build_tool_result_item(tool: model.ToolResultItem) -> responses.ResponseInp
34
34
  "call_id": tool.call_id,
35
35
  "output": content_parts,
36
36
  }
37
- return item # type: ignore[return-value]
37
+ return item
38
38
 
39
39
 
40
40
  def convert_history_to_input(
klaude_code/llm/usage.py CHANGED
@@ -27,9 +27,6 @@ def calculate_cost(usage: model.Usage, cost_config: llm_param.Cost | None) -> No
27
27
  # Cache read cost
28
28
  usage.cache_read_cost = (usage.cached_tokens / 1_000_000) * cost_config.cache_read
29
29
 
30
- # Total cost
31
- usage.total_cost = usage.input_cost + usage.output_cost + usage.cache_read_cost
32
-
33
30
 
34
31
  class MetadataTracker:
35
32
  """Tracks timing and metadata for LLM responses."""
@@ -95,18 +92,71 @@ class MetadataTracker:
95
92
  return self._metadata_item
96
93
 
97
94
 
98
- def convert_usage(usage: openai.types.CompletionUsage, context_limit: int | None = None) -> model.Usage:
99
- """Convert OpenAI CompletionUsage to internal Usage model."""
100
- total_tokens = usage.total_tokens
101
- context_usage_percent = (total_tokens / context_limit) * 100 if context_limit else None
95
+ def convert_usage(
96
+ usage: openai.types.CompletionUsage,
97
+ context_limit: int | None = None,
98
+ max_tokens: int | None = None,
99
+ ) -> model.Usage:
100
+ """Convert OpenAI CompletionUsage to internal Usage model.
101
+
102
+ context_token is set to total_tokens from the API response,
103
+ representing the actual context window usage for this turn.
104
+ """
102
105
  return model.Usage(
103
106
  input_tokens=usage.prompt_tokens,
104
107
  cached_tokens=(usage.prompt_tokens_details.cached_tokens if usage.prompt_tokens_details else 0) or 0,
105
108
  reasoning_tokens=(usage.completion_tokens_details.reasoning_tokens if usage.completion_tokens_details else 0)
106
109
  or 0,
107
110
  output_tokens=usage.completion_tokens,
108
- total_tokens=total_tokens,
109
- context_usage_percent=context_usage_percent,
110
- throughput_tps=None,
111
- first_token_latency_ms=None,
111
+ context_token=usage.total_tokens,
112
+ context_limit=context_limit,
113
+ max_tokens=max_tokens,
114
+ )
115
+
116
+
117
+ def convert_anthropic_usage(
118
+ input_tokens: int,
119
+ output_tokens: int,
120
+ cached_tokens: int,
121
+ context_limit: int | None = None,
122
+ max_tokens: int | None = None,
123
+ ) -> model.Usage:
124
+ """Convert Anthropic usage data to internal Usage model.
125
+
126
+ context_token is computed from input + cached + output tokens,
127
+ representing the actual context window usage for this turn.
128
+ """
129
+ context_token = input_tokens + cached_tokens + output_tokens
130
+ return model.Usage(
131
+ input_tokens=input_tokens,
132
+ output_tokens=output_tokens,
133
+ cached_tokens=cached_tokens,
134
+ context_token=context_token,
135
+ context_limit=context_limit,
136
+ max_tokens=max_tokens,
137
+ )
138
+
139
+
140
+ def convert_responses_usage(
141
+ input_tokens: int,
142
+ output_tokens: int,
143
+ cached_tokens: int,
144
+ reasoning_tokens: int,
145
+ total_tokens: int,
146
+ context_limit: int | None = None,
147
+ max_tokens: int | None = None,
148
+ ) -> model.Usage:
149
+ """Convert OpenAI Responses API usage data to internal Usage model.
150
+
151
+ context_token is set to total_tokens from the API response,
152
+ representing the actual context window usage for this turn.
153
+ """
154
+ return model.Usage(
155
+ input_tokens=input_tokens,
156
+ output_tokens=output_tokens,
157
+ cached_tokens=cached_tokens,
158
+ reasoning_tokens=reasoning_tokens,
159
+ context_token=total_tokens,
160
+ context_limit=context_limit,
161
+ max_tokens=max_tokens,
112
162
  )
@@ -12,6 +12,7 @@ class CommandName(str, Enum):
12
12
  TERMINAL_SETUP = "terminal-setup"
13
13
  EXPORT = "export"
14
14
  STATUS = "status"
15
+ RELEASE_NOTES = "release-notes"
15
16
  # PLAN and DOC are dynamically registered now, but kept here if needed for reference
16
17
  # or we can remove them if no code explicitly imports them.
17
18
  # PLAN = "plan"
@@ -91,15 +91,23 @@ class ToolResultEvent(BaseModel):
91
91
  ui_extra: model.ToolResultUIExtra | None = None
92
92
  status: Literal["success", "error"]
93
93
  is_replay: bool = False
94
+ task_metadata: model.TaskMetadata | None = None # Sub-agent task metadata
94
95
 
95
96
 
96
97
  class ResponseMetadataEvent(BaseModel):
97
- """Showing model name, usage tokens, task duration, and turn count."""
98
+ """Internal event for turn-level metadata. Not exposed to UI directly."""
98
99
 
99
100
  session_id: str
100
101
  metadata: model.ResponseMetadataItem
101
102
 
102
103
 
104
+ class TaskMetadataEvent(BaseModel):
105
+ """Task-level aggregated metadata for UI display."""
106
+
107
+ session_id: str
108
+ metadata: model.TaskMetadataItem
109
+
110
+
103
111
  class UserMessageEvent(BaseModel):
104
112
  session_id: str
105
113
  content: str
@@ -127,7 +135,7 @@ HistoryItemEvent = (
127
135
  | ToolCallEvent
128
136
  | ToolResultEvent
129
137
  | UserMessageEvent
130
- | ResponseMetadataEvent
138
+ | TaskMetadataEvent
131
139
  | InterruptEvent
132
140
  | DeveloperMessageEvent
133
141
  | ErrorEvent
@@ -150,6 +158,7 @@ Event = (
150
158
  | ToolCallEvent
151
159
  | ToolResultEvent
152
160
  | ResponseMetadataEvent
161
+ | TaskMetadataEvent
153
162
  | ReplayHistoryEvent
154
163
  | ErrorEvent
155
164
  | EndEvent
@@ -1,9 +1,10 @@
1
1
  from datetime import datetime
2
2
  from enum import Enum
3
- from typing import Literal
3
+ from typing import Annotated, Literal
4
4
 
5
- from pydantic import BaseModel, Field
5
+ from pydantic import BaseModel, ConfigDict, Field, computed_field
6
6
 
7
+ from klaude_code import const
7
8
  from klaude_code.protocol.commands import CommandName
8
9
  from klaude_code.protocol.tools import SubAgentType
9
10
 
@@ -12,12 +13,17 @@ TodoStatusType = Literal["pending", "in_progress", "completed"]
12
13
 
13
14
 
14
15
  class Usage(BaseModel):
16
+ # Token Usage (primary state)
15
17
  input_tokens: int = 0
16
18
  cached_tokens: int = 0
17
19
  reasoning_tokens: int = 0
18
20
  output_tokens: int = 0
19
- total_tokens: int = 0
20
- context_usage_percent: float | None = None
21
+
22
+ # Context window tracking
23
+ context_token: int | None = None # Peak total_tokens seen (for context usage display)
24
+ context_limit: int | None = None # Model's context limit
25
+ max_tokens: int | None = None # Max output tokens for this request
26
+
21
27
  throughput_tps: float | None = None
22
28
  first_token_latency_ms: float | None = None
23
29
 
@@ -25,14 +31,42 @@ class Usage(BaseModel):
25
31
  input_cost: float | None = None # Cost for non-cached input tokens
26
32
  output_cost: float | None = None # Cost for output tokens (including reasoning)
27
33
  cache_read_cost: float | None = None # Cost for cached tokens
28
- total_cost: float | None = None # Total cost (input + output + cache_read)
29
34
  currency: str = "USD" # Currency for cost display (USD or CNY)
30
35
 
36
+ @computed_field
37
+ @property
38
+ def total_tokens(self) -> int:
39
+ """Total tokens computed from input + output tokens."""
40
+ return self.input_tokens + self.output_tokens
41
+
42
+ @computed_field
43
+ @property
44
+ def total_cost(self) -> float | None:
45
+ """Total cost computed from input + output + cache_read costs."""
46
+ costs = [self.input_cost, self.output_cost, self.cache_read_cost]
47
+ non_none = [c for c in costs if c is not None]
48
+ return sum(non_none) if non_none else None
49
+
50
+ @computed_field
51
+ @property
52
+ def context_usage_percent(self) -> float | None:
53
+ """Context usage percentage computed from context_token / (context_limit - max_tokens)."""
54
+ if self.context_limit is None or self.context_limit <= 0:
55
+ return None
56
+ if self.context_token is None:
57
+ return None
58
+ effective_limit = self.context_limit - (self.max_tokens or const.DEFAULT_MAX_TOKENS)
59
+ if effective_limit <= 0:
60
+ return None
61
+ return (self.context_token / effective_limit) * 100
62
+
31
63
 
32
64
  class TodoItem(BaseModel):
65
+ model_config = ConfigDict(populate_by_name=True)
66
+
33
67
  content: str
34
68
  status: TodoStatusType
35
- activeForm: str = ""
69
+ active_form: str = Field(default="", alias="activeForm")
36
70
 
37
71
 
38
72
  class TodoUIExtra(BaseModel):
@@ -40,43 +74,55 @@ class TodoUIExtra(BaseModel):
40
74
  new_completed: list[str]
41
75
 
42
76
 
43
- class ToolResultUIExtraType(str, Enum):
44
- DIFF_TEXT = "diff_text"
45
- TODO_LIST = "todo_list"
46
- SESSION_ID = "session_id"
47
- MERMAID_LINK = "mermaid_link"
48
- TRUNCATION = "truncation"
49
- SESSION_STATUS = "session_status"
50
-
51
-
52
77
  class ToolSideEffect(str, Enum):
53
78
  TODO_CHANGE = "todo_change"
54
79
 
55
80
 
81
+ # Discriminated union types for ToolResultUIExtra
82
+ class DiffTextUIExtra(BaseModel):
83
+ type: Literal["diff_text"] = "diff_text"
84
+ diff_text: str
85
+
86
+
87
+ class TodoListUIExtra(BaseModel):
88
+ type: Literal["todo_list"] = "todo_list"
89
+ todo_list: TodoUIExtra
90
+
91
+
92
+ class SessionIdUIExtra(BaseModel):
93
+ type: Literal["session_id"] = "session_id"
94
+ session_id: str
95
+
96
+
56
97
  class MermaidLinkUIExtra(BaseModel):
98
+ type: Literal["mermaid_link"] = "mermaid_link"
57
99
  link: str
58
100
  line_count: int
59
101
 
60
102
 
61
103
  class TruncationUIExtra(BaseModel):
104
+ type: Literal["truncation"] = "truncation"
62
105
  saved_file_path: str
63
106
  original_length: int
64
107
  truncated_length: int
65
108
 
66
109
 
67
110
  class SessionStatusUIExtra(BaseModel):
111
+ type: Literal["session_status"] = "session_status"
68
112
  usage: "Usage"
69
113
  task_count: int
114
+ by_model: list["TaskMetadata"] = []
70
115
 
71
116
 
72
- class ToolResultUIExtra(BaseModel):
73
- type: ToolResultUIExtraType
74
- diff_text: str | None = None
75
- todo_list: TodoUIExtra | None = None
76
- session_id: str | None = None
77
- mermaid_link: MermaidLinkUIExtra | None = None
78
- truncation: TruncationUIExtra | None = None
79
- session_status: SessionStatusUIExtra | None = None
117
+ ToolResultUIExtra = Annotated[
118
+ DiffTextUIExtra
119
+ | TodoListUIExtra
120
+ | SessionIdUIExtra
121
+ | MermaidLinkUIExtra
122
+ | TruncationUIExtra
123
+ | SessionStatusUIExtra,
124
+ Field(discriminator="type"),
125
+ ]
80
126
 
81
127
 
82
128
  class AtPatternParseResult(BaseModel):
@@ -240,6 +286,7 @@ class ToolResultItem(BaseModel):
240
286
  ui_extra: ToolResultUIExtra | None = None # Extra data for UI display, e.g. diff render
241
287
  images: list[ImageURLPart] | None = None
242
288
  side_effects: list[ToolSideEffect] | None = None
289
+ task_metadata: "TaskMetadata | None" = None # Sub-agent task metadata for propagation to main agent
243
290
  created_at: datetime = Field(default_factory=datetime.now)
244
291
 
245
292
 
@@ -255,6 +302,8 @@ class StreamErrorItem(BaseModel):
255
302
 
256
303
 
257
304
  class ResponseMetadataItem(BaseModel):
305
+ """Metadata for a single LLM response (turn-level)."""
306
+
258
307
  response_id: str | None = None
259
308
  usage: Usage | None = None
260
309
  model_name: str = ""
@@ -263,6 +312,73 @@ class ResponseMetadataItem(BaseModel):
263
312
  created_at: datetime = Field(default_factory=datetime.now)
264
313
 
265
314
 
315
+ class TaskMetadata(BaseModel):
316
+ """Base metadata for a task execution (used by both main and sub-agents)."""
317
+
318
+ usage: Usage | None = None
319
+ model_name: str = ""
320
+ provider: str | None = None
321
+ task_duration_s: float | None = None
322
+
323
+ @staticmethod
324
+ def aggregate_by_model(metadata_list: list["TaskMetadata"]) -> list["TaskMetadata"]:
325
+ """Aggregate multiple TaskMetadata by (model_name, provider).
326
+
327
+ Returns a list sorted by total_cost descending.
328
+
329
+ Note: total_tokens and total_cost are now computed fields,
330
+ so we only accumulate the primary state fields here.
331
+ """
332
+ aggregated: dict[tuple[str, str | None], TaskMetadata] = {}
333
+
334
+ for meta in metadata_list:
335
+ if not meta.usage:
336
+ continue
337
+
338
+ key = (meta.model_name, meta.provider)
339
+ usage = meta.usage
340
+
341
+ if key not in aggregated:
342
+ aggregated[key] = TaskMetadata(
343
+ model_name=meta.model_name,
344
+ provider=meta.provider,
345
+ usage=Usage(currency=usage.currency),
346
+ )
347
+
348
+ agg = aggregated[key]
349
+ if agg.usage is None:
350
+ continue
351
+
352
+ # Accumulate primary token fields (total_tokens is computed)
353
+ agg.usage.input_tokens += usage.input_tokens
354
+ agg.usage.cached_tokens += usage.cached_tokens
355
+ agg.usage.reasoning_tokens += usage.reasoning_tokens
356
+ agg.usage.output_tokens += usage.output_tokens
357
+
358
+ # Accumulate cost components (total_cost is computed)
359
+ if usage.input_cost is not None:
360
+ agg.usage.input_cost = (agg.usage.input_cost or 0.0) + usage.input_cost
361
+ if usage.output_cost is not None:
362
+ agg.usage.output_cost = (agg.usage.output_cost or 0.0) + usage.output_cost
363
+ if usage.cache_read_cost is not None:
364
+ agg.usage.cache_read_cost = (agg.usage.cache_read_cost or 0.0) + usage.cache_read_cost
365
+
366
+ # Sort by total_cost descending
367
+ return sorted(
368
+ aggregated.values(),
369
+ key=lambda m: m.usage.total_cost if m.usage and m.usage.total_cost else 0.0,
370
+ reverse=True,
371
+ )
372
+
373
+
374
+ class TaskMetadataItem(BaseModel):
375
+ """Aggregated metadata for a complete task, stored in conversation history."""
376
+
377
+ main: TaskMetadata = Field(default_factory=TaskMetadata)
378
+ sub_agent_task_metadata: list[TaskMetadata] = Field(default_factory=lambda: list[TaskMetadata]())
379
+ created_at: datetime = Field(default_factory=datetime.now)
380
+
381
+
266
382
  MessageItem = (
267
383
  UserMessageItem
268
384
  | AssistantMessageItem
@@ -278,7 +394,14 @@ MessageItem = (
278
394
  StreamItem = AssistantMessageDelta
279
395
 
280
396
  ConversationItem = (
281
- StartItem | InterruptItem | StreamErrorItem | StreamItem | MessageItem | ResponseMetadataItem | ToolCallStartItem
397
+ StartItem
398
+ | InterruptItem
399
+ | StreamErrorItem
400
+ | StreamItem
401
+ | MessageItem
402
+ | ResponseMetadataItem
403
+ | TaskMetadataItem
404
+ | ToolCallStartItem
282
405
  )
283
406
 
284
407
 
@@ -67,6 +67,7 @@ class InitAgentOperation(Operation):
67
67
 
68
68
  type: OperationType = OperationType.INIT_AGENT
69
69
  session_id: str | None = None
70
+ is_new_session: bool = False
70
71
 
71
72
  async def execute(self, handler: OperationHandler) -> None:
72
73
  await handler.handle_init_agent(self)
@@ -1,10 +1,13 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from dataclasses import dataclass, field
4
- from typing import Any, Callable
4
+ from typing import TYPE_CHECKING, Any, Callable
5
5
 
6
6
  from klaude_code.protocol import tools
7
7
 
8
+ if TYPE_CHECKING:
9
+ from klaude_code.protocol import model
10
+
8
11
  AvailabilityPredicate = Callable[[str], bool]
9
12
  PromptBuilder = Callable[[dict[str, Any]], str]
10
13
 
@@ -14,6 +17,7 @@ class SubAgentResult:
14
17
  task_result: str
15
18
  session_id: str
16
19
  error: bool = False
20
+ task_metadata: model.TaskMetadata | None = None
17
21
 
18
22
 
19
23
  def _default_prompt_builder(args: dict[str, Any]) -> str: