klaude-code 1.2.8__py3-none-any.whl → 1.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- klaude_code/auth/codex/__init__.py +1 -1
- klaude_code/command/__init__.py +2 -0
- klaude_code/command/prompt-deslop.md +14 -0
- klaude_code/command/release_notes_cmd.py +86 -0
- klaude_code/command/status_cmd.py +92 -54
- klaude_code/core/agent.py +13 -19
- klaude_code/core/manager/sub_agent_manager.py +5 -1
- klaude_code/core/prompt.py +38 -28
- klaude_code/core/reminders.py +4 -4
- klaude_code/core/task.py +59 -40
- klaude_code/core/tool/__init__.py +2 -0
- klaude_code/core/tool/file/apply_patch_tool.py +1 -1
- klaude_code/core/tool/file/edit_tool.py +1 -1
- klaude_code/core/tool/file/multi_edit_tool.py +1 -1
- klaude_code/core/tool/file/write_tool.py +1 -1
- klaude_code/core/tool/memory/memory_tool.py +2 -2
- klaude_code/core/tool/sub_agent_tool.py +2 -1
- klaude_code/core/tool/todo/todo_write_tool.py +1 -1
- klaude_code/core/tool/todo/update_plan_tool.py +1 -1
- klaude_code/core/tool/tool_context.py +21 -4
- klaude_code/core/tool/tool_runner.py +5 -8
- klaude_code/core/tool/web/mermaid_tool.py +1 -4
- klaude_code/core/turn.py +40 -37
- klaude_code/llm/anthropic/client.py +13 -44
- klaude_code/llm/client.py +1 -1
- klaude_code/llm/codex/client.py +4 -3
- klaude_code/llm/input_common.py +0 -6
- klaude_code/llm/openai_compatible/client.py +28 -72
- klaude_code/llm/openai_compatible/input.py +6 -4
- klaude_code/llm/openai_compatible/stream_processor.py +82 -0
- klaude_code/llm/openrouter/client.py +29 -59
- klaude_code/llm/openrouter/input.py +4 -27
- klaude_code/llm/responses/client.py +15 -48
- klaude_code/llm/usage.py +51 -10
- klaude_code/protocol/commands.py +1 -0
- klaude_code/protocol/events.py +11 -2
- klaude_code/protocol/model.py +142 -24
- klaude_code/protocol/sub_agent.py +5 -1
- klaude_code/session/export.py +51 -27
- klaude_code/session/session.py +28 -16
- klaude_code/session/templates/export_session.html +4 -1
- klaude_code/ui/modes/repl/__init__.py +1 -5
- klaude_code/ui/modes/repl/event_handler.py +153 -54
- klaude_code/ui/modes/repl/renderer.py +4 -4
- klaude_code/ui/renderers/developer.py +35 -25
- klaude_code/ui/renderers/metadata.py +68 -30
- klaude_code/ui/renderers/tools.py +53 -87
- klaude_code/ui/rich/markdown.py +5 -5
- {klaude_code-1.2.8.dist-info → klaude_code-1.2.9.dist-info}/METADATA +1 -1
- {klaude_code-1.2.8.dist-info → klaude_code-1.2.9.dist-info}/RECORD +52 -49
- {klaude_code-1.2.8.dist-info → klaude_code-1.2.9.dist-info}/WHEEL +0 -0
- {klaude_code-1.2.8.dist-info → klaude_code-1.2.9.dist-info}/entry_points.txt +0 -0
|
@@ -7,9 +7,9 @@
|
|
|
7
7
|
# pyright: reportGeneralTypeIssues=false
|
|
8
8
|
|
|
9
9
|
from openai.types import chat
|
|
10
|
-
from openai.types.chat import ChatCompletionContentPartParam
|
|
11
10
|
|
|
12
|
-
from klaude_code.llm.input_common import AssistantGroup, ToolGroup, UserGroup,
|
|
11
|
+
from klaude_code.llm.input_common import AssistantGroup, ToolGroup, UserGroup, parse_message_groups
|
|
12
|
+
from klaude_code.llm.openai_compatible.input import tool_group_to_openai_message, user_group_to_openai_message
|
|
13
13
|
from klaude_code.protocol import model
|
|
14
14
|
|
|
15
15
|
|
|
@@ -25,29 +25,6 @@ def is_gemini_model(model_name: str | None) -> bool:
|
|
|
25
25
|
return model_name is not None and model_name.startswith("google/gemini")
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
def _user_group_to_message(group: UserGroup) -> chat.ChatCompletionMessageParam:
|
|
29
|
-
parts: list[ChatCompletionContentPartParam] = []
|
|
30
|
-
for text in group.text_parts:
|
|
31
|
-
parts.append({"type": "text", "text": text + "\n"})
|
|
32
|
-
for image in group.images:
|
|
33
|
-
parts.append({"type": "image_url", "image_url": {"url": image.image_url.url}})
|
|
34
|
-
if not parts:
|
|
35
|
-
parts.append({"type": "text", "text": ""})
|
|
36
|
-
return {"role": "user", "content": parts}
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def _tool_group_to_message(group: ToolGroup) -> chat.ChatCompletionMessageParam:
|
|
40
|
-
merged_text = merge_reminder_text(
|
|
41
|
-
group.tool_result.output or "<system-reminder>Tool ran without output or errors</system-reminder>",
|
|
42
|
-
group.reminder_texts,
|
|
43
|
-
)
|
|
44
|
-
return {
|
|
45
|
-
"role": "tool",
|
|
46
|
-
"content": [{"type": "text", "text": merged_text}],
|
|
47
|
-
"tool_call_id": group.tool_result.call_id,
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
|
|
51
28
|
def _assistant_group_to_message(group: AssistantGroup, model_name: str | None) -> chat.ChatCompletionMessageParam:
|
|
52
29
|
assistant_message: dict[str, object] = {"role": "assistant"}
|
|
53
30
|
|
|
@@ -150,9 +127,9 @@ def convert_history_to_input(
|
|
|
150
127
|
for group in parse_message_groups(history):
|
|
151
128
|
match group:
|
|
152
129
|
case UserGroup():
|
|
153
|
-
messages.append(
|
|
130
|
+
messages.append(user_group_to_openai_message(group))
|
|
154
131
|
case ToolGroup():
|
|
155
|
-
messages.append(
|
|
132
|
+
messages.append(tool_group_to_openai_message(group))
|
|
156
133
|
case AssistantGroup():
|
|
157
134
|
messages.append(_assistant_group_to_message(group, model_name))
|
|
158
135
|
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import json
|
|
2
|
-
import time
|
|
3
2
|
from collections.abc import AsyncGenerator
|
|
4
3
|
from typing import TYPE_CHECKING, override
|
|
5
4
|
|
|
@@ -12,7 +11,7 @@ from klaude_code.llm.client import LLMClientABC, call_with_logged_payload
|
|
|
12
11
|
from klaude_code.llm.input_common import apply_config_defaults
|
|
13
12
|
from klaude_code.llm.registry import register
|
|
14
13
|
from klaude_code.llm.responses.input import convert_history_to_input, convert_tool_schema
|
|
15
|
-
from klaude_code.llm.usage import
|
|
14
|
+
from klaude_code.llm.usage import MetadataTracker, convert_responses_usage
|
|
16
15
|
from klaude_code.protocol import llm_param, model
|
|
17
16
|
from klaude_code.trace import DebugType, log_debug
|
|
18
17
|
|
|
@@ -24,12 +23,9 @@ if TYPE_CHECKING:
|
|
|
24
23
|
async def parse_responses_stream(
|
|
25
24
|
stream: "AsyncStream[ResponseStreamEvent]",
|
|
26
25
|
param: llm_param.LLMCallParameter,
|
|
27
|
-
|
|
28
|
-
request_start_time: float,
|
|
26
|
+
metadata_tracker: MetadataTracker,
|
|
29
27
|
) -> AsyncGenerator[model.ConversationItem, None]:
|
|
30
28
|
"""Parse OpenAI Responses API stream events into ConversationItems."""
|
|
31
|
-
first_token_time: float | None = None
|
|
32
|
-
last_token_time: float | None = None
|
|
33
29
|
response_id: str | None = None
|
|
34
30
|
|
|
35
31
|
try:
|
|
@@ -52,9 +48,7 @@ async def parse_responses_stream(
|
|
|
52
48
|
model=str(param.model),
|
|
53
49
|
)
|
|
54
50
|
case responses.ResponseTextDeltaEvent() as event:
|
|
55
|
-
|
|
56
|
-
first_token_time = time.time()
|
|
57
|
-
last_token_time = time.time()
|
|
51
|
+
metadata_tracker.record_token()
|
|
58
52
|
yield model.AssistantMessageDelta(content=event.delta, response_id=response_id)
|
|
59
53
|
case responses.ResponseOutputItemAddedEvent() as event:
|
|
60
54
|
if isinstance(event.item, responses.ResponseFunctionToolCall):
|
|
@@ -86,9 +80,7 @@ async def parse_responses_stream(
|
|
|
86
80
|
response_id=response_id,
|
|
87
81
|
)
|
|
88
82
|
case responses.ResponseFunctionToolCall() as item:
|
|
89
|
-
|
|
90
|
-
first_token_time = time.time()
|
|
91
|
-
last_token_time = time.time()
|
|
83
|
+
metadata_tracker.record_token()
|
|
92
84
|
yield model.ToolCallItem(
|
|
93
85
|
name=item.name,
|
|
94
86
|
arguments=item.arguments.strip(),
|
|
@@ -99,47 +91,22 @@ async def parse_responses_stream(
|
|
|
99
91
|
case _:
|
|
100
92
|
pass
|
|
101
93
|
case responses.ResponseCompletedEvent() as event:
|
|
102
|
-
usage: model.Usage | None = None
|
|
103
94
|
error_reason: str | None = None
|
|
104
95
|
if event.response.incomplete_details is not None:
|
|
105
96
|
error_reason = event.response.incomplete_details.reason
|
|
106
97
|
if event.response.usage is not None:
|
|
107
|
-
|
|
108
|
-
context_usage_percent = (
|
|
109
|
-
(total_tokens / param.context_limit) * 100 if param.context_limit else None
|
|
110
|
-
)
|
|
111
|
-
|
|
112
|
-
throughput_tps: float | None = None
|
|
113
|
-
first_token_latency_ms: float | None = None
|
|
114
|
-
|
|
115
|
-
if first_token_time is not None:
|
|
116
|
-
first_token_latency_ms = (first_token_time - request_start_time) * 1000
|
|
117
|
-
|
|
118
|
-
if (
|
|
119
|
-
first_token_time is not None
|
|
120
|
-
and last_token_time is not None
|
|
121
|
-
and event.response.usage.output_tokens > 0
|
|
122
|
-
):
|
|
123
|
-
time_duration = last_token_time - first_token_time
|
|
124
|
-
if time_duration >= 0.15:
|
|
125
|
-
throughput_tps = event.response.usage.output_tokens / time_duration
|
|
126
|
-
|
|
127
|
-
usage = model.Usage(
|
|
98
|
+
usage = convert_responses_usage(
|
|
128
99
|
input_tokens=event.response.usage.input_tokens,
|
|
100
|
+
output_tokens=event.response.usage.output_tokens,
|
|
129
101
|
cached_tokens=event.response.usage.input_tokens_details.cached_tokens,
|
|
130
102
|
reasoning_tokens=event.response.usage.output_tokens_details.reasoning_tokens,
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
context_usage_percent=context_usage_percent,
|
|
134
|
-
throughput_tps=throughput_tps,
|
|
135
|
-
first_token_latency_ms=first_token_latency_ms,
|
|
103
|
+
total_tokens=event.response.usage.total_tokens,
|
|
104
|
+
context_limit=param.context_limit,
|
|
136
105
|
)
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
model_name=str(param.model),
|
|
142
|
-
)
|
|
106
|
+
metadata_tracker.set_usage(usage)
|
|
107
|
+
metadata_tracker.set_model_name(str(param.model))
|
|
108
|
+
metadata_tracker.set_response_id(response_id)
|
|
109
|
+
yield metadata_tracker.finalize()
|
|
143
110
|
if event.response.status != "completed":
|
|
144
111
|
error_message = f"LLM response finished with status '{event.response.status}'"
|
|
145
112
|
if error_reason:
|
|
@@ -192,7 +159,7 @@ class ResponsesClient(LLMClientABC):
|
|
|
192
159
|
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
|
|
193
160
|
param = apply_config_defaults(param, self.get_llm_config())
|
|
194
161
|
|
|
195
|
-
|
|
162
|
+
metadata_tracker = MetadataTracker(cost_config=self._config.cost)
|
|
196
163
|
|
|
197
164
|
inputs = convert_history_to_input(param.input, param.model)
|
|
198
165
|
tools = convert_tool_schema(param.tools)
|
|
@@ -224,11 +191,11 @@ class ResponsesClient(LLMClientABC):
|
|
|
224
191
|
}
|
|
225
192
|
if param.thinking and param.thinking.reasoning_effort
|
|
226
193
|
else None,
|
|
227
|
-
extra_headers={"extra": json.dumps({"session_id": param.session_id})},
|
|
194
|
+
extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
|
|
228
195
|
)
|
|
229
196
|
except (openai.OpenAIError, httpx.HTTPError) as e:
|
|
230
197
|
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
|
|
231
198
|
return
|
|
232
199
|
|
|
233
|
-
async for item in parse_responses_stream(stream, param,
|
|
200
|
+
async for item in parse_responses_stream(stream, param, metadata_tracker):
|
|
234
201
|
yield item
|
klaude_code/llm/usage.py
CHANGED
|
@@ -27,9 +27,6 @@ def calculate_cost(usage: model.Usage, cost_config: llm_param.Cost | None) -> No
|
|
|
27
27
|
# Cache read cost
|
|
28
28
|
usage.cache_read_cost = (usage.cached_tokens / 1_000_000) * cost_config.cache_read
|
|
29
29
|
|
|
30
|
-
# Total cost
|
|
31
|
-
usage.total_cost = usage.input_cost + usage.output_cost + usage.cache_read_cost
|
|
32
|
-
|
|
33
30
|
|
|
34
31
|
class MetadataTracker:
|
|
35
32
|
"""Tracks timing and metadata for LLM responses."""
|
|
@@ -96,17 +93,61 @@ class MetadataTracker:
|
|
|
96
93
|
|
|
97
94
|
|
|
98
95
|
def convert_usage(usage: openai.types.CompletionUsage, context_limit: int | None = None) -> model.Usage:
|
|
99
|
-
"""Convert OpenAI CompletionUsage to internal Usage model.
|
|
100
|
-
|
|
101
|
-
|
|
96
|
+
"""Convert OpenAI CompletionUsage to internal Usage model.
|
|
97
|
+
|
|
98
|
+
context_window_size is set to total_tokens from the API response,
|
|
99
|
+
representing the actual context window usage for this turn.
|
|
100
|
+
"""
|
|
102
101
|
return model.Usage(
|
|
103
102
|
input_tokens=usage.prompt_tokens,
|
|
104
103
|
cached_tokens=(usage.prompt_tokens_details.cached_tokens if usage.prompt_tokens_details else 0) or 0,
|
|
105
104
|
reasoning_tokens=(usage.completion_tokens_details.reasoning_tokens if usage.completion_tokens_details else 0)
|
|
106
105
|
or 0,
|
|
107
106
|
output_tokens=usage.completion_tokens,
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
107
|
+
context_window_size=usage.total_tokens,
|
|
108
|
+
context_limit=context_limit,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def convert_anthropic_usage(
|
|
113
|
+
input_tokens: int,
|
|
114
|
+
output_tokens: int,
|
|
115
|
+
cached_tokens: int,
|
|
116
|
+
context_limit: int | None = None,
|
|
117
|
+
) -> model.Usage:
|
|
118
|
+
"""Convert Anthropic usage data to internal Usage model.
|
|
119
|
+
|
|
120
|
+
context_window_size is computed from input + cached + output tokens,
|
|
121
|
+
representing the actual context window usage for this turn.
|
|
122
|
+
"""
|
|
123
|
+
context_window_size = input_tokens + cached_tokens + output_tokens
|
|
124
|
+
return model.Usage(
|
|
125
|
+
input_tokens=input_tokens,
|
|
126
|
+
output_tokens=output_tokens,
|
|
127
|
+
cached_tokens=cached_tokens,
|
|
128
|
+
context_window_size=context_window_size,
|
|
129
|
+
context_limit=context_limit,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def convert_responses_usage(
|
|
134
|
+
input_tokens: int,
|
|
135
|
+
output_tokens: int,
|
|
136
|
+
cached_tokens: int,
|
|
137
|
+
reasoning_tokens: int,
|
|
138
|
+
total_tokens: int,
|
|
139
|
+
context_limit: int | None = None,
|
|
140
|
+
) -> model.Usage:
|
|
141
|
+
"""Convert OpenAI Responses API usage data to internal Usage model.
|
|
142
|
+
|
|
143
|
+
context_window_size is set to total_tokens from the API response,
|
|
144
|
+
representing the actual context window usage for this turn.
|
|
145
|
+
"""
|
|
146
|
+
return model.Usage(
|
|
147
|
+
input_tokens=input_tokens,
|
|
148
|
+
output_tokens=output_tokens,
|
|
149
|
+
cached_tokens=cached_tokens,
|
|
150
|
+
reasoning_tokens=reasoning_tokens,
|
|
151
|
+
context_window_size=total_tokens,
|
|
152
|
+
context_limit=context_limit,
|
|
112
153
|
)
|
klaude_code/protocol/commands.py
CHANGED
|
@@ -12,6 +12,7 @@ class CommandName(str, Enum):
|
|
|
12
12
|
TERMINAL_SETUP = "terminal-setup"
|
|
13
13
|
EXPORT = "export"
|
|
14
14
|
STATUS = "status"
|
|
15
|
+
RELEASE_NOTES = "release-notes"
|
|
15
16
|
# PLAN and DOC are dynamically registered now, but kept here if needed for reference
|
|
16
17
|
# or we can remove them if no code explicitly imports them.
|
|
17
18
|
# PLAN = "plan"
|
klaude_code/protocol/events.py
CHANGED
|
@@ -91,15 +91,23 @@ class ToolResultEvent(BaseModel):
|
|
|
91
91
|
ui_extra: model.ToolResultUIExtra | None = None
|
|
92
92
|
status: Literal["success", "error"]
|
|
93
93
|
is_replay: bool = False
|
|
94
|
+
task_metadata: model.TaskMetadata | None = None # Sub-agent task metadata
|
|
94
95
|
|
|
95
96
|
|
|
96
97
|
class ResponseMetadataEvent(BaseModel):
|
|
97
|
-
"""
|
|
98
|
+
"""Internal event for turn-level metadata. Not exposed to UI directly."""
|
|
98
99
|
|
|
99
100
|
session_id: str
|
|
100
101
|
metadata: model.ResponseMetadataItem
|
|
101
102
|
|
|
102
103
|
|
|
104
|
+
class TaskMetadataEvent(BaseModel):
|
|
105
|
+
"""Task-level aggregated metadata for UI display."""
|
|
106
|
+
|
|
107
|
+
session_id: str
|
|
108
|
+
metadata: model.TaskMetadataItem
|
|
109
|
+
|
|
110
|
+
|
|
103
111
|
class UserMessageEvent(BaseModel):
|
|
104
112
|
session_id: str
|
|
105
113
|
content: str
|
|
@@ -127,7 +135,7 @@ HistoryItemEvent = (
|
|
|
127
135
|
| ToolCallEvent
|
|
128
136
|
| ToolResultEvent
|
|
129
137
|
| UserMessageEvent
|
|
130
|
-
|
|
|
138
|
+
| TaskMetadataEvent
|
|
131
139
|
| InterruptEvent
|
|
132
140
|
| DeveloperMessageEvent
|
|
133
141
|
| ErrorEvent
|
|
@@ -150,6 +158,7 @@ Event = (
|
|
|
150
158
|
| ToolCallEvent
|
|
151
159
|
| ToolResultEvent
|
|
152
160
|
| ResponseMetadataEvent
|
|
161
|
+
| TaskMetadataEvent
|
|
153
162
|
| ReplayHistoryEvent
|
|
154
163
|
| ErrorEvent
|
|
155
164
|
| EndEvent
|
klaude_code/protocol/model.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
2
|
from enum import Enum
|
|
3
|
-
from typing import Literal
|
|
3
|
+
from typing import Annotated, Literal
|
|
4
4
|
|
|
5
|
-
from pydantic import BaseModel, Field
|
|
5
|
+
from pydantic import BaseModel, ConfigDict, Field, computed_field
|
|
6
6
|
|
|
7
7
|
from klaude_code.protocol.commands import CommandName
|
|
8
8
|
from klaude_code.protocol.tools import SubAgentType
|
|
@@ -12,12 +12,16 @@ TodoStatusType = Literal["pending", "in_progress", "completed"]
|
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class Usage(BaseModel):
|
|
15
|
+
# Token Usage (primary state)
|
|
15
16
|
input_tokens: int = 0
|
|
16
17
|
cached_tokens: int = 0
|
|
17
18
|
reasoning_tokens: int = 0
|
|
18
19
|
output_tokens: int = 0
|
|
19
|
-
|
|
20
|
-
|
|
20
|
+
|
|
21
|
+
# Context window tracking
|
|
22
|
+
context_window_size: int | None = None # Peak total_tokens seen (for context usage display)
|
|
23
|
+
context_limit: int | None = None # Model's context limit
|
|
24
|
+
|
|
21
25
|
throughput_tps: float | None = None
|
|
22
26
|
first_token_latency_ms: float | None = None
|
|
23
27
|
|
|
@@ -25,14 +29,39 @@ class Usage(BaseModel):
|
|
|
25
29
|
input_cost: float | None = None # Cost for non-cached input tokens
|
|
26
30
|
output_cost: float | None = None # Cost for output tokens (including reasoning)
|
|
27
31
|
cache_read_cost: float | None = None # Cost for cached tokens
|
|
28
|
-
total_cost: float | None = None # Total cost (input + output + cache_read)
|
|
29
32
|
currency: str = "USD" # Currency for cost display (USD or CNY)
|
|
30
33
|
|
|
34
|
+
@computed_field # type: ignore[prop-decorator]
|
|
35
|
+
@property
|
|
36
|
+
def total_tokens(self) -> int:
|
|
37
|
+
"""Total tokens computed from input + output tokens."""
|
|
38
|
+
return self.input_tokens + self.output_tokens
|
|
39
|
+
|
|
40
|
+
@computed_field # type: ignore[prop-decorator]
|
|
41
|
+
@property
|
|
42
|
+
def total_cost(self) -> float | None:
|
|
43
|
+
"""Total cost computed from input + output + cache_read costs."""
|
|
44
|
+
costs = [self.input_cost, self.output_cost, self.cache_read_cost]
|
|
45
|
+
non_none = [c for c in costs if c is not None]
|
|
46
|
+
return sum(non_none) if non_none else None
|
|
47
|
+
|
|
48
|
+
@computed_field # type: ignore[prop-decorator]
|
|
49
|
+
@property
|
|
50
|
+
def context_usage_percent(self) -> float | None:
|
|
51
|
+
"""Context usage percentage computed from context_window_size / context_limit."""
|
|
52
|
+
if self.context_limit is None or self.context_limit <= 0:
|
|
53
|
+
return None
|
|
54
|
+
if self.context_window_size is None:
|
|
55
|
+
return None
|
|
56
|
+
return (self.context_window_size / self.context_limit) * 100
|
|
57
|
+
|
|
31
58
|
|
|
32
59
|
class TodoItem(BaseModel):
|
|
60
|
+
model_config = ConfigDict(populate_by_name=True)
|
|
61
|
+
|
|
33
62
|
content: str
|
|
34
63
|
status: TodoStatusType
|
|
35
|
-
|
|
64
|
+
active_form: str = Field(default="", alias="activeForm")
|
|
36
65
|
|
|
37
66
|
|
|
38
67
|
class TodoUIExtra(BaseModel):
|
|
@@ -40,43 +69,55 @@ class TodoUIExtra(BaseModel):
|
|
|
40
69
|
new_completed: list[str]
|
|
41
70
|
|
|
42
71
|
|
|
43
|
-
class ToolResultUIExtraType(str, Enum):
|
|
44
|
-
DIFF_TEXT = "diff_text"
|
|
45
|
-
TODO_LIST = "todo_list"
|
|
46
|
-
SESSION_ID = "session_id"
|
|
47
|
-
MERMAID_LINK = "mermaid_link"
|
|
48
|
-
TRUNCATION = "truncation"
|
|
49
|
-
SESSION_STATUS = "session_status"
|
|
50
|
-
|
|
51
|
-
|
|
52
72
|
class ToolSideEffect(str, Enum):
|
|
53
73
|
TODO_CHANGE = "todo_change"
|
|
54
74
|
|
|
55
75
|
|
|
76
|
+
# Discriminated union types for ToolResultUIExtra
|
|
77
|
+
class DiffTextUIExtra(BaseModel):
|
|
78
|
+
type: Literal["diff_text"] = "diff_text"
|
|
79
|
+
diff_text: str
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class TodoListUIExtra(BaseModel):
|
|
83
|
+
type: Literal["todo_list"] = "todo_list"
|
|
84
|
+
todo_list: TodoUIExtra
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class SessionIdUIExtra(BaseModel):
|
|
88
|
+
type: Literal["session_id"] = "session_id"
|
|
89
|
+
session_id: str
|
|
90
|
+
|
|
91
|
+
|
|
56
92
|
class MermaidLinkUIExtra(BaseModel):
|
|
93
|
+
type: Literal["mermaid_link"] = "mermaid_link"
|
|
57
94
|
link: str
|
|
58
95
|
line_count: int
|
|
59
96
|
|
|
60
97
|
|
|
61
98
|
class TruncationUIExtra(BaseModel):
|
|
99
|
+
type: Literal["truncation"] = "truncation"
|
|
62
100
|
saved_file_path: str
|
|
63
101
|
original_length: int
|
|
64
102
|
truncated_length: int
|
|
65
103
|
|
|
66
104
|
|
|
67
105
|
class SessionStatusUIExtra(BaseModel):
|
|
106
|
+
type: Literal["session_status"] = "session_status"
|
|
68
107
|
usage: "Usage"
|
|
69
108
|
task_count: int
|
|
109
|
+
by_model: list["TaskMetadata"] = []
|
|
70
110
|
|
|
71
111
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
112
|
+
ToolResultUIExtra = Annotated[
|
|
113
|
+
DiffTextUIExtra
|
|
114
|
+
| TodoListUIExtra
|
|
115
|
+
| SessionIdUIExtra
|
|
116
|
+
| MermaidLinkUIExtra
|
|
117
|
+
| TruncationUIExtra
|
|
118
|
+
| SessionStatusUIExtra,
|
|
119
|
+
Field(discriminator="type"),
|
|
120
|
+
]
|
|
80
121
|
|
|
81
122
|
|
|
82
123
|
class AtPatternParseResult(BaseModel):
|
|
@@ -240,6 +281,7 @@ class ToolResultItem(BaseModel):
|
|
|
240
281
|
ui_extra: ToolResultUIExtra | None = None # Extra data for UI display, e.g. diff render
|
|
241
282
|
images: list[ImageURLPart] | None = None
|
|
242
283
|
side_effects: list[ToolSideEffect] | None = None
|
|
284
|
+
task_metadata: "TaskMetadata | None" = None # Sub-agent task metadata for propagation to main agent
|
|
243
285
|
created_at: datetime = Field(default_factory=datetime.now)
|
|
244
286
|
|
|
245
287
|
|
|
@@ -255,6 +297,8 @@ class StreamErrorItem(BaseModel):
|
|
|
255
297
|
|
|
256
298
|
|
|
257
299
|
class ResponseMetadataItem(BaseModel):
|
|
300
|
+
"""Metadata for a single LLM response (turn-level)."""
|
|
301
|
+
|
|
258
302
|
response_id: str | None = None
|
|
259
303
|
usage: Usage | None = None
|
|
260
304
|
model_name: str = ""
|
|
@@ -263,6 +307,73 @@ class ResponseMetadataItem(BaseModel):
|
|
|
263
307
|
created_at: datetime = Field(default_factory=datetime.now)
|
|
264
308
|
|
|
265
309
|
|
|
310
|
+
class TaskMetadata(BaseModel):
|
|
311
|
+
"""Base metadata for a task execution (used by both main and sub-agents)."""
|
|
312
|
+
|
|
313
|
+
usage: Usage | None = None
|
|
314
|
+
model_name: str = ""
|
|
315
|
+
provider: str | None = None
|
|
316
|
+
task_duration_s: float | None = None
|
|
317
|
+
|
|
318
|
+
@staticmethod
|
|
319
|
+
def aggregate_by_model(metadata_list: list["TaskMetadata"]) -> list["TaskMetadata"]:
|
|
320
|
+
"""Aggregate multiple TaskMetadata by (model_name, provider).
|
|
321
|
+
|
|
322
|
+
Returns a list sorted by total_cost descending.
|
|
323
|
+
|
|
324
|
+
Note: total_tokens and total_cost are now computed fields,
|
|
325
|
+
so we only accumulate the primary state fields here.
|
|
326
|
+
"""
|
|
327
|
+
aggregated: dict[tuple[str, str | None], TaskMetadata] = {}
|
|
328
|
+
|
|
329
|
+
for meta in metadata_list:
|
|
330
|
+
if not meta.usage:
|
|
331
|
+
continue
|
|
332
|
+
|
|
333
|
+
key = (meta.model_name, meta.provider)
|
|
334
|
+
usage = meta.usage
|
|
335
|
+
|
|
336
|
+
if key not in aggregated:
|
|
337
|
+
aggregated[key] = TaskMetadata(
|
|
338
|
+
model_name=meta.model_name,
|
|
339
|
+
provider=meta.provider,
|
|
340
|
+
usage=Usage(currency=usage.currency),
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
agg = aggregated[key]
|
|
344
|
+
if agg.usage is None:
|
|
345
|
+
continue
|
|
346
|
+
|
|
347
|
+
# Accumulate primary token fields (total_tokens is computed)
|
|
348
|
+
agg.usage.input_tokens += usage.input_tokens
|
|
349
|
+
agg.usage.cached_tokens += usage.cached_tokens
|
|
350
|
+
agg.usage.reasoning_tokens += usage.reasoning_tokens
|
|
351
|
+
agg.usage.output_tokens += usage.output_tokens
|
|
352
|
+
|
|
353
|
+
# Accumulate cost components (total_cost is computed)
|
|
354
|
+
if usage.input_cost is not None:
|
|
355
|
+
agg.usage.input_cost = (agg.usage.input_cost or 0.0) + usage.input_cost
|
|
356
|
+
if usage.output_cost is not None:
|
|
357
|
+
agg.usage.output_cost = (agg.usage.output_cost or 0.0) + usage.output_cost
|
|
358
|
+
if usage.cache_read_cost is not None:
|
|
359
|
+
agg.usage.cache_read_cost = (agg.usage.cache_read_cost or 0.0) + usage.cache_read_cost
|
|
360
|
+
|
|
361
|
+
# Sort by total_cost descending
|
|
362
|
+
return sorted(
|
|
363
|
+
aggregated.values(),
|
|
364
|
+
key=lambda m: m.usage.total_cost if m.usage and m.usage.total_cost else 0.0,
|
|
365
|
+
reverse=True,
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
class TaskMetadataItem(BaseModel):
|
|
370
|
+
"""Aggregated metadata for a complete task, stored in conversation history."""
|
|
371
|
+
|
|
372
|
+
main: TaskMetadata = Field(default_factory=TaskMetadata)
|
|
373
|
+
sub_agent_task_metadata: list[TaskMetadata] = Field(default_factory=lambda: list[TaskMetadata]())
|
|
374
|
+
created_at: datetime = Field(default_factory=datetime.now)
|
|
375
|
+
|
|
376
|
+
|
|
266
377
|
MessageItem = (
|
|
267
378
|
UserMessageItem
|
|
268
379
|
| AssistantMessageItem
|
|
@@ -278,7 +389,14 @@ MessageItem = (
|
|
|
278
389
|
StreamItem = AssistantMessageDelta
|
|
279
390
|
|
|
280
391
|
ConversationItem = (
|
|
281
|
-
StartItem
|
|
392
|
+
StartItem
|
|
393
|
+
| InterruptItem
|
|
394
|
+
| StreamErrorItem
|
|
395
|
+
| StreamItem
|
|
396
|
+
| MessageItem
|
|
397
|
+
| ResponseMetadataItem
|
|
398
|
+
| TaskMetadataItem
|
|
399
|
+
| ToolCallStartItem
|
|
282
400
|
)
|
|
283
401
|
|
|
284
402
|
|
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from dataclasses import dataclass, field
|
|
4
|
-
from typing import Any, Callable
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Callable
|
|
5
5
|
|
|
6
6
|
from klaude_code.protocol import tools
|
|
7
7
|
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from klaude_code.protocol import model
|
|
10
|
+
|
|
8
11
|
AvailabilityPredicate = Callable[[str], bool]
|
|
9
12
|
PromptBuilder = Callable[[dict[str, Any]], str]
|
|
10
13
|
|
|
@@ -14,6 +17,7 @@ class SubAgentResult:
|
|
|
14
17
|
task_result: str
|
|
15
18
|
session_id: str
|
|
16
19
|
error: bool = False
|
|
20
|
+
task_metadata: model.TaskMetadata | None = None
|
|
17
21
|
|
|
18
22
|
|
|
19
23
|
def _default_prompt_builder(args: dict[str, Any]) -> str:
|