klaude-code 1.2.8__py3-none-any.whl → 1.2.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- klaude_code/auth/codex/__init__.py +1 -1
- klaude_code/cli/main.py +12 -1
- klaude_code/cli/runtime.py +7 -11
- klaude_code/command/__init__.py +68 -21
- klaude_code/command/clear_cmd.py +6 -2
- klaude_code/command/command_abc.py +5 -2
- klaude_code/command/diff_cmd.py +5 -2
- klaude_code/command/export_cmd.py +7 -4
- klaude_code/command/help_cmd.py +6 -2
- klaude_code/command/model_cmd.py +5 -2
- klaude_code/command/prompt-deslop.md +14 -0
- klaude_code/command/prompt_command.py +8 -3
- klaude_code/command/refresh_cmd.py +6 -2
- klaude_code/command/registry.py +17 -5
- klaude_code/command/release_notes_cmd.py +89 -0
- klaude_code/command/status_cmd.py +98 -56
- klaude_code/command/terminal_setup_cmd.py +7 -4
- klaude_code/const/__init__.py +1 -1
- klaude_code/core/agent.py +66 -26
- klaude_code/core/executor.py +2 -2
- klaude_code/core/manager/agent_manager.py +6 -7
- klaude_code/core/manager/llm_clients.py +47 -22
- klaude_code/core/manager/llm_clients_builder.py +19 -7
- klaude_code/core/manager/sub_agent_manager.py +6 -2
- klaude_code/core/prompt.py +38 -28
- klaude_code/core/reminders.py +4 -7
- klaude_code/core/task.py +59 -40
- klaude_code/core/tool/__init__.py +2 -0
- klaude_code/core/tool/file/_utils.py +30 -0
- klaude_code/core/tool/file/apply_patch_tool.py +1 -1
- klaude_code/core/tool/file/edit_tool.py +6 -31
- klaude_code/core/tool/file/multi_edit_tool.py +7 -32
- klaude_code/core/tool/file/read_tool.py +6 -18
- klaude_code/core/tool/file/write_tool.py +6 -31
- klaude_code/core/tool/memory/__init__.py +5 -0
- klaude_code/core/tool/memory/memory_tool.py +2 -2
- klaude_code/core/tool/memory/skill_loader.py +2 -1
- klaude_code/core/tool/memory/skill_tool.py +13 -0
- klaude_code/core/tool/sub_agent_tool.py +2 -1
- klaude_code/core/tool/todo/todo_write_tool.py +1 -1
- klaude_code/core/tool/todo/update_plan_tool.py +1 -1
- klaude_code/core/tool/tool_context.py +21 -4
- klaude_code/core/tool/tool_runner.py +5 -8
- klaude_code/core/tool/web/mermaid_tool.py +1 -4
- klaude_code/core/turn.py +40 -37
- klaude_code/llm/__init__.py +2 -12
- klaude_code/llm/anthropic/client.py +14 -44
- klaude_code/llm/client.py +2 -2
- klaude_code/llm/codex/client.py +4 -3
- klaude_code/llm/input_common.py +0 -6
- klaude_code/llm/openai_compatible/client.py +31 -74
- klaude_code/llm/openai_compatible/input.py +6 -4
- klaude_code/llm/openai_compatible/stream_processor.py +82 -0
- klaude_code/llm/openrouter/client.py +32 -62
- klaude_code/llm/openrouter/input.py +4 -27
- klaude_code/llm/registry.py +33 -7
- klaude_code/llm/responses/client.py +16 -48
- klaude_code/llm/responses/input.py +1 -1
- klaude_code/llm/usage.py +61 -11
- klaude_code/protocol/commands.py +1 -0
- klaude_code/protocol/events.py +11 -2
- klaude_code/protocol/model.py +147 -24
- klaude_code/protocol/op.py +1 -0
- klaude_code/protocol/sub_agent.py +5 -1
- klaude_code/session/export.py +56 -32
- klaude_code/session/session.py +43 -21
- klaude_code/session/templates/export_session.html +4 -1
- klaude_code/ui/core/input.py +1 -1
- klaude_code/ui/modes/repl/__init__.py +1 -5
- klaude_code/ui/modes/repl/clipboard.py +5 -5
- klaude_code/ui/modes/repl/event_handler.py +153 -54
- klaude_code/ui/modes/repl/renderer.py +4 -4
- klaude_code/ui/renderers/developer.py +35 -25
- klaude_code/ui/renderers/metadata.py +68 -30
- klaude_code/ui/renderers/tools.py +53 -87
- klaude_code/ui/rich/markdown.py +5 -5
- klaude_code/ui/terminal/control.py +2 -2
- klaude_code/version.py +3 -3
- {klaude_code-1.2.8.dist-info → klaude_code-1.2.10.dist-info}/METADATA +1 -1
- {klaude_code-1.2.8.dist-info → klaude_code-1.2.10.dist-info}/RECORD +82 -78
- {klaude_code-1.2.8.dist-info → klaude_code-1.2.10.dist-info}/WHEEL +0 -0
- {klaude_code-1.2.8.dist-info → klaude_code-1.2.10.dist-info}/entry_points.txt +0 -0
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import json
|
|
2
|
-
import time
|
|
3
2
|
from collections.abc import AsyncGenerator
|
|
4
3
|
from typing import TYPE_CHECKING, override
|
|
5
4
|
|
|
@@ -12,7 +11,7 @@ from klaude_code.llm.client import LLMClientABC, call_with_logged_payload
|
|
|
12
11
|
from klaude_code.llm.input_common import apply_config_defaults
|
|
13
12
|
from klaude_code.llm.registry import register
|
|
14
13
|
from klaude_code.llm.responses.input import convert_history_to_input, convert_tool_schema
|
|
15
|
-
from klaude_code.llm.usage import
|
|
14
|
+
from klaude_code.llm.usage import MetadataTracker, convert_responses_usage
|
|
16
15
|
from klaude_code.protocol import llm_param, model
|
|
17
16
|
from klaude_code.trace import DebugType, log_debug
|
|
18
17
|
|
|
@@ -24,12 +23,9 @@ if TYPE_CHECKING:
|
|
|
24
23
|
async def parse_responses_stream(
|
|
25
24
|
stream: "AsyncStream[ResponseStreamEvent]",
|
|
26
25
|
param: llm_param.LLMCallParameter,
|
|
27
|
-
|
|
28
|
-
request_start_time: float,
|
|
26
|
+
metadata_tracker: MetadataTracker,
|
|
29
27
|
) -> AsyncGenerator[model.ConversationItem, None]:
|
|
30
28
|
"""Parse OpenAI Responses API stream events into ConversationItems."""
|
|
31
|
-
first_token_time: float | None = None
|
|
32
|
-
last_token_time: float | None = None
|
|
33
29
|
response_id: str | None = None
|
|
34
30
|
|
|
35
31
|
try:
|
|
@@ -52,9 +48,7 @@ async def parse_responses_stream(
|
|
|
52
48
|
model=str(param.model),
|
|
53
49
|
)
|
|
54
50
|
case responses.ResponseTextDeltaEvent() as event:
|
|
55
|
-
|
|
56
|
-
first_token_time = time.time()
|
|
57
|
-
last_token_time = time.time()
|
|
51
|
+
metadata_tracker.record_token()
|
|
58
52
|
yield model.AssistantMessageDelta(content=event.delta, response_id=response_id)
|
|
59
53
|
case responses.ResponseOutputItemAddedEvent() as event:
|
|
60
54
|
if isinstance(event.item, responses.ResponseFunctionToolCall):
|
|
@@ -86,9 +80,7 @@ async def parse_responses_stream(
|
|
|
86
80
|
response_id=response_id,
|
|
87
81
|
)
|
|
88
82
|
case responses.ResponseFunctionToolCall() as item:
|
|
89
|
-
|
|
90
|
-
first_token_time = time.time()
|
|
91
|
-
last_token_time = time.time()
|
|
83
|
+
metadata_tracker.record_token()
|
|
92
84
|
yield model.ToolCallItem(
|
|
93
85
|
name=item.name,
|
|
94
86
|
arguments=item.arguments.strip(),
|
|
@@ -99,47 +91,23 @@ async def parse_responses_stream(
|
|
|
99
91
|
case _:
|
|
100
92
|
pass
|
|
101
93
|
case responses.ResponseCompletedEvent() as event:
|
|
102
|
-
usage: model.Usage | None = None
|
|
103
94
|
error_reason: str | None = None
|
|
104
95
|
if event.response.incomplete_details is not None:
|
|
105
96
|
error_reason = event.response.incomplete_details.reason
|
|
106
97
|
if event.response.usage is not None:
|
|
107
|
-
|
|
108
|
-
context_usage_percent = (
|
|
109
|
-
(total_tokens / param.context_limit) * 100 if param.context_limit else None
|
|
110
|
-
)
|
|
111
|
-
|
|
112
|
-
throughput_tps: float | None = None
|
|
113
|
-
first_token_latency_ms: float | None = None
|
|
114
|
-
|
|
115
|
-
if first_token_time is not None:
|
|
116
|
-
first_token_latency_ms = (first_token_time - request_start_time) * 1000
|
|
117
|
-
|
|
118
|
-
if (
|
|
119
|
-
first_token_time is not None
|
|
120
|
-
and last_token_time is not None
|
|
121
|
-
and event.response.usage.output_tokens > 0
|
|
122
|
-
):
|
|
123
|
-
time_duration = last_token_time - first_token_time
|
|
124
|
-
if time_duration >= 0.15:
|
|
125
|
-
throughput_tps = event.response.usage.output_tokens / time_duration
|
|
126
|
-
|
|
127
|
-
usage = model.Usage(
|
|
98
|
+
usage = convert_responses_usage(
|
|
128
99
|
input_tokens=event.response.usage.input_tokens,
|
|
100
|
+
output_tokens=event.response.usage.output_tokens,
|
|
129
101
|
cached_tokens=event.response.usage.input_tokens_details.cached_tokens,
|
|
130
102
|
reasoning_tokens=event.response.usage.output_tokens_details.reasoning_tokens,
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
throughput_tps=throughput_tps,
|
|
135
|
-
first_token_latency_ms=first_token_latency_ms,
|
|
103
|
+
total_tokens=event.response.usage.total_tokens,
|
|
104
|
+
context_limit=param.context_limit,
|
|
105
|
+
max_tokens=param.max_tokens,
|
|
136
106
|
)
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
model_name=str(param.model),
|
|
142
|
-
)
|
|
107
|
+
metadata_tracker.set_usage(usage)
|
|
108
|
+
metadata_tracker.set_model_name(str(param.model))
|
|
109
|
+
metadata_tracker.set_response_id(response_id)
|
|
110
|
+
yield metadata_tracker.finalize()
|
|
143
111
|
if event.response.status != "completed":
|
|
144
112
|
error_message = f"LLM response finished with status '{event.response.status}'"
|
|
145
113
|
if error_reason:
|
|
@@ -192,7 +160,7 @@ class ResponsesClient(LLMClientABC):
|
|
|
192
160
|
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
|
|
193
161
|
param = apply_config_defaults(param, self.get_llm_config())
|
|
194
162
|
|
|
195
|
-
|
|
163
|
+
metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
|
|
196
164
|
|
|
197
165
|
inputs = convert_history_to_input(param.input, param.model)
|
|
198
166
|
tools = convert_tool_schema(param.tools)
|
|
@@ -224,11 +192,11 @@ class ResponsesClient(LLMClientABC):
|
|
|
224
192
|
}
|
|
225
193
|
if param.thinking and param.thinking.reasoning_effort
|
|
226
194
|
else None,
|
|
227
|
-
extra_headers={"extra": json.dumps({"session_id": param.session_id})},
|
|
195
|
+
extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
|
|
228
196
|
)
|
|
229
197
|
except (openai.OpenAIError, httpx.HTTPError) as e:
|
|
230
198
|
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
|
|
231
199
|
return
|
|
232
200
|
|
|
233
|
-
async for item in parse_responses_stream(stream, param,
|
|
201
|
+
async for item in parse_responses_stream(stream, param, metadata_tracker):
|
|
234
202
|
yield item
|
klaude_code/llm/usage.py
CHANGED
|
@@ -27,9 +27,6 @@ def calculate_cost(usage: model.Usage, cost_config: llm_param.Cost | None) -> No
|
|
|
27
27
|
# Cache read cost
|
|
28
28
|
usage.cache_read_cost = (usage.cached_tokens / 1_000_000) * cost_config.cache_read
|
|
29
29
|
|
|
30
|
-
# Total cost
|
|
31
|
-
usage.total_cost = usage.input_cost + usage.output_cost + usage.cache_read_cost
|
|
32
|
-
|
|
33
30
|
|
|
34
31
|
class MetadataTracker:
|
|
35
32
|
"""Tracks timing and metadata for LLM responses."""
|
|
@@ -95,18 +92,71 @@ class MetadataTracker:
|
|
|
95
92
|
return self._metadata_item
|
|
96
93
|
|
|
97
94
|
|
|
98
|
-
def convert_usage(
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
95
|
+
def convert_usage(
|
|
96
|
+
usage: openai.types.CompletionUsage,
|
|
97
|
+
context_limit: int | None = None,
|
|
98
|
+
max_tokens: int | None = None,
|
|
99
|
+
) -> model.Usage:
|
|
100
|
+
"""Convert OpenAI CompletionUsage to internal Usage model.
|
|
101
|
+
|
|
102
|
+
context_token is set to total_tokens from the API response,
|
|
103
|
+
representing the actual context window usage for this turn.
|
|
104
|
+
"""
|
|
102
105
|
return model.Usage(
|
|
103
106
|
input_tokens=usage.prompt_tokens,
|
|
104
107
|
cached_tokens=(usage.prompt_tokens_details.cached_tokens if usage.prompt_tokens_details else 0) or 0,
|
|
105
108
|
reasoning_tokens=(usage.completion_tokens_details.reasoning_tokens if usage.completion_tokens_details else 0)
|
|
106
109
|
or 0,
|
|
107
110
|
output_tokens=usage.completion_tokens,
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
111
|
+
context_token=usage.total_tokens,
|
|
112
|
+
context_limit=context_limit,
|
|
113
|
+
max_tokens=max_tokens,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def convert_anthropic_usage(
|
|
118
|
+
input_tokens: int,
|
|
119
|
+
output_tokens: int,
|
|
120
|
+
cached_tokens: int,
|
|
121
|
+
context_limit: int | None = None,
|
|
122
|
+
max_tokens: int | None = None,
|
|
123
|
+
) -> model.Usage:
|
|
124
|
+
"""Convert Anthropic usage data to internal Usage model.
|
|
125
|
+
|
|
126
|
+
context_token is computed from input + cached + output tokens,
|
|
127
|
+
representing the actual context window usage for this turn.
|
|
128
|
+
"""
|
|
129
|
+
context_token = input_tokens + cached_tokens + output_tokens
|
|
130
|
+
return model.Usage(
|
|
131
|
+
input_tokens=input_tokens,
|
|
132
|
+
output_tokens=output_tokens,
|
|
133
|
+
cached_tokens=cached_tokens,
|
|
134
|
+
context_token=context_token,
|
|
135
|
+
context_limit=context_limit,
|
|
136
|
+
max_tokens=max_tokens,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def convert_responses_usage(
|
|
141
|
+
input_tokens: int,
|
|
142
|
+
output_tokens: int,
|
|
143
|
+
cached_tokens: int,
|
|
144
|
+
reasoning_tokens: int,
|
|
145
|
+
total_tokens: int,
|
|
146
|
+
context_limit: int | None = None,
|
|
147
|
+
max_tokens: int | None = None,
|
|
148
|
+
) -> model.Usage:
|
|
149
|
+
"""Convert OpenAI Responses API usage data to internal Usage model.
|
|
150
|
+
|
|
151
|
+
context_token is set to total_tokens from the API response,
|
|
152
|
+
representing the actual context window usage for this turn.
|
|
153
|
+
"""
|
|
154
|
+
return model.Usage(
|
|
155
|
+
input_tokens=input_tokens,
|
|
156
|
+
output_tokens=output_tokens,
|
|
157
|
+
cached_tokens=cached_tokens,
|
|
158
|
+
reasoning_tokens=reasoning_tokens,
|
|
159
|
+
context_token=total_tokens,
|
|
160
|
+
context_limit=context_limit,
|
|
161
|
+
max_tokens=max_tokens,
|
|
112
162
|
)
|
klaude_code/protocol/commands.py
CHANGED
|
@@ -12,6 +12,7 @@ class CommandName(str, Enum):
|
|
|
12
12
|
TERMINAL_SETUP = "terminal-setup"
|
|
13
13
|
EXPORT = "export"
|
|
14
14
|
STATUS = "status"
|
|
15
|
+
RELEASE_NOTES = "release-notes"
|
|
15
16
|
# PLAN and DOC are dynamically registered now, but kept here if needed for reference
|
|
16
17
|
# or we can remove them if no code explicitly imports them.
|
|
17
18
|
# PLAN = "plan"
|
klaude_code/protocol/events.py
CHANGED
|
@@ -91,15 +91,23 @@ class ToolResultEvent(BaseModel):
|
|
|
91
91
|
ui_extra: model.ToolResultUIExtra | None = None
|
|
92
92
|
status: Literal["success", "error"]
|
|
93
93
|
is_replay: bool = False
|
|
94
|
+
task_metadata: model.TaskMetadata | None = None # Sub-agent task metadata
|
|
94
95
|
|
|
95
96
|
|
|
96
97
|
class ResponseMetadataEvent(BaseModel):
|
|
97
|
-
"""
|
|
98
|
+
"""Internal event for turn-level metadata. Not exposed to UI directly."""
|
|
98
99
|
|
|
99
100
|
session_id: str
|
|
100
101
|
metadata: model.ResponseMetadataItem
|
|
101
102
|
|
|
102
103
|
|
|
104
|
+
class TaskMetadataEvent(BaseModel):
|
|
105
|
+
"""Task-level aggregated metadata for UI display."""
|
|
106
|
+
|
|
107
|
+
session_id: str
|
|
108
|
+
metadata: model.TaskMetadataItem
|
|
109
|
+
|
|
110
|
+
|
|
103
111
|
class UserMessageEvent(BaseModel):
|
|
104
112
|
session_id: str
|
|
105
113
|
content: str
|
|
@@ -127,7 +135,7 @@ HistoryItemEvent = (
|
|
|
127
135
|
| ToolCallEvent
|
|
128
136
|
| ToolResultEvent
|
|
129
137
|
| UserMessageEvent
|
|
130
|
-
|
|
|
138
|
+
| TaskMetadataEvent
|
|
131
139
|
| InterruptEvent
|
|
132
140
|
| DeveloperMessageEvent
|
|
133
141
|
| ErrorEvent
|
|
@@ -150,6 +158,7 @@ Event = (
|
|
|
150
158
|
| ToolCallEvent
|
|
151
159
|
| ToolResultEvent
|
|
152
160
|
| ResponseMetadataEvent
|
|
161
|
+
| TaskMetadataEvent
|
|
153
162
|
| ReplayHistoryEvent
|
|
154
163
|
| ErrorEvent
|
|
155
164
|
| EndEvent
|
klaude_code/protocol/model.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
2
|
from enum import Enum
|
|
3
|
-
from typing import Literal
|
|
3
|
+
from typing import Annotated, Literal
|
|
4
4
|
|
|
5
|
-
from pydantic import BaseModel, Field
|
|
5
|
+
from pydantic import BaseModel, ConfigDict, Field, computed_field
|
|
6
6
|
|
|
7
|
+
from klaude_code import const
|
|
7
8
|
from klaude_code.protocol.commands import CommandName
|
|
8
9
|
from klaude_code.protocol.tools import SubAgentType
|
|
9
10
|
|
|
@@ -12,12 +13,17 @@ TodoStatusType = Literal["pending", "in_progress", "completed"]
|
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class Usage(BaseModel):
|
|
16
|
+
# Token Usage (primary state)
|
|
15
17
|
input_tokens: int = 0
|
|
16
18
|
cached_tokens: int = 0
|
|
17
19
|
reasoning_tokens: int = 0
|
|
18
20
|
output_tokens: int = 0
|
|
19
|
-
|
|
20
|
-
|
|
21
|
+
|
|
22
|
+
# Context window tracking
|
|
23
|
+
context_token: int | None = None # Peak total_tokens seen (for context usage display)
|
|
24
|
+
context_limit: int | None = None # Model's context limit
|
|
25
|
+
max_tokens: int | None = None # Max output tokens for this request
|
|
26
|
+
|
|
21
27
|
throughput_tps: float | None = None
|
|
22
28
|
first_token_latency_ms: float | None = None
|
|
23
29
|
|
|
@@ -25,14 +31,42 @@ class Usage(BaseModel):
|
|
|
25
31
|
input_cost: float | None = None # Cost for non-cached input tokens
|
|
26
32
|
output_cost: float | None = None # Cost for output tokens (including reasoning)
|
|
27
33
|
cache_read_cost: float | None = None # Cost for cached tokens
|
|
28
|
-
total_cost: float | None = None # Total cost (input + output + cache_read)
|
|
29
34
|
currency: str = "USD" # Currency for cost display (USD or CNY)
|
|
30
35
|
|
|
36
|
+
@computed_field
|
|
37
|
+
@property
|
|
38
|
+
def total_tokens(self) -> int:
|
|
39
|
+
"""Total tokens computed from input + output tokens."""
|
|
40
|
+
return self.input_tokens + self.output_tokens
|
|
41
|
+
|
|
42
|
+
@computed_field
|
|
43
|
+
@property
|
|
44
|
+
def total_cost(self) -> float | None:
|
|
45
|
+
"""Total cost computed from input + output + cache_read costs."""
|
|
46
|
+
costs = [self.input_cost, self.output_cost, self.cache_read_cost]
|
|
47
|
+
non_none = [c for c in costs if c is not None]
|
|
48
|
+
return sum(non_none) if non_none else None
|
|
49
|
+
|
|
50
|
+
@computed_field
|
|
51
|
+
@property
|
|
52
|
+
def context_usage_percent(self) -> float | None:
|
|
53
|
+
"""Context usage percentage computed from context_token / (context_limit - max_tokens)."""
|
|
54
|
+
if self.context_limit is None or self.context_limit <= 0:
|
|
55
|
+
return None
|
|
56
|
+
if self.context_token is None:
|
|
57
|
+
return None
|
|
58
|
+
effective_limit = self.context_limit - (self.max_tokens or const.DEFAULT_MAX_TOKENS)
|
|
59
|
+
if effective_limit <= 0:
|
|
60
|
+
return None
|
|
61
|
+
return (self.context_token / effective_limit) * 100
|
|
62
|
+
|
|
31
63
|
|
|
32
64
|
class TodoItem(BaseModel):
|
|
65
|
+
model_config = ConfigDict(populate_by_name=True)
|
|
66
|
+
|
|
33
67
|
content: str
|
|
34
68
|
status: TodoStatusType
|
|
35
|
-
|
|
69
|
+
active_form: str = Field(default="", alias="activeForm")
|
|
36
70
|
|
|
37
71
|
|
|
38
72
|
class TodoUIExtra(BaseModel):
|
|
@@ -40,43 +74,55 @@ class TodoUIExtra(BaseModel):
|
|
|
40
74
|
new_completed: list[str]
|
|
41
75
|
|
|
42
76
|
|
|
43
|
-
class ToolResultUIExtraType(str, Enum):
|
|
44
|
-
DIFF_TEXT = "diff_text"
|
|
45
|
-
TODO_LIST = "todo_list"
|
|
46
|
-
SESSION_ID = "session_id"
|
|
47
|
-
MERMAID_LINK = "mermaid_link"
|
|
48
|
-
TRUNCATION = "truncation"
|
|
49
|
-
SESSION_STATUS = "session_status"
|
|
50
|
-
|
|
51
|
-
|
|
52
77
|
class ToolSideEffect(str, Enum):
|
|
53
78
|
TODO_CHANGE = "todo_change"
|
|
54
79
|
|
|
55
80
|
|
|
81
|
+
# Discriminated union types for ToolResultUIExtra
|
|
82
|
+
class DiffTextUIExtra(BaseModel):
|
|
83
|
+
type: Literal["diff_text"] = "diff_text"
|
|
84
|
+
diff_text: str
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class TodoListUIExtra(BaseModel):
|
|
88
|
+
type: Literal["todo_list"] = "todo_list"
|
|
89
|
+
todo_list: TodoUIExtra
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class SessionIdUIExtra(BaseModel):
|
|
93
|
+
type: Literal["session_id"] = "session_id"
|
|
94
|
+
session_id: str
|
|
95
|
+
|
|
96
|
+
|
|
56
97
|
class MermaidLinkUIExtra(BaseModel):
|
|
98
|
+
type: Literal["mermaid_link"] = "mermaid_link"
|
|
57
99
|
link: str
|
|
58
100
|
line_count: int
|
|
59
101
|
|
|
60
102
|
|
|
61
103
|
class TruncationUIExtra(BaseModel):
|
|
104
|
+
type: Literal["truncation"] = "truncation"
|
|
62
105
|
saved_file_path: str
|
|
63
106
|
original_length: int
|
|
64
107
|
truncated_length: int
|
|
65
108
|
|
|
66
109
|
|
|
67
110
|
class SessionStatusUIExtra(BaseModel):
|
|
111
|
+
type: Literal["session_status"] = "session_status"
|
|
68
112
|
usage: "Usage"
|
|
69
113
|
task_count: int
|
|
114
|
+
by_model: list["TaskMetadata"] = []
|
|
70
115
|
|
|
71
116
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
117
|
+
ToolResultUIExtra = Annotated[
|
|
118
|
+
DiffTextUIExtra
|
|
119
|
+
| TodoListUIExtra
|
|
120
|
+
| SessionIdUIExtra
|
|
121
|
+
| MermaidLinkUIExtra
|
|
122
|
+
| TruncationUIExtra
|
|
123
|
+
| SessionStatusUIExtra,
|
|
124
|
+
Field(discriminator="type"),
|
|
125
|
+
]
|
|
80
126
|
|
|
81
127
|
|
|
82
128
|
class AtPatternParseResult(BaseModel):
|
|
@@ -240,6 +286,7 @@ class ToolResultItem(BaseModel):
|
|
|
240
286
|
ui_extra: ToolResultUIExtra | None = None # Extra data for UI display, e.g. diff render
|
|
241
287
|
images: list[ImageURLPart] | None = None
|
|
242
288
|
side_effects: list[ToolSideEffect] | None = None
|
|
289
|
+
task_metadata: "TaskMetadata | None" = None # Sub-agent task metadata for propagation to main agent
|
|
243
290
|
created_at: datetime = Field(default_factory=datetime.now)
|
|
244
291
|
|
|
245
292
|
|
|
@@ -255,6 +302,8 @@ class StreamErrorItem(BaseModel):
|
|
|
255
302
|
|
|
256
303
|
|
|
257
304
|
class ResponseMetadataItem(BaseModel):
|
|
305
|
+
"""Metadata for a single LLM response (turn-level)."""
|
|
306
|
+
|
|
258
307
|
response_id: str | None = None
|
|
259
308
|
usage: Usage | None = None
|
|
260
309
|
model_name: str = ""
|
|
@@ -263,6 +312,73 @@ class ResponseMetadataItem(BaseModel):
|
|
|
263
312
|
created_at: datetime = Field(default_factory=datetime.now)
|
|
264
313
|
|
|
265
314
|
|
|
315
|
+
class TaskMetadata(BaseModel):
|
|
316
|
+
"""Base metadata for a task execution (used by both main and sub-agents)."""
|
|
317
|
+
|
|
318
|
+
usage: Usage | None = None
|
|
319
|
+
model_name: str = ""
|
|
320
|
+
provider: str | None = None
|
|
321
|
+
task_duration_s: float | None = None
|
|
322
|
+
|
|
323
|
+
@staticmethod
|
|
324
|
+
def aggregate_by_model(metadata_list: list["TaskMetadata"]) -> list["TaskMetadata"]:
|
|
325
|
+
"""Aggregate multiple TaskMetadata by (model_name, provider).
|
|
326
|
+
|
|
327
|
+
Returns a list sorted by total_cost descending.
|
|
328
|
+
|
|
329
|
+
Note: total_tokens and total_cost are now computed fields,
|
|
330
|
+
so we only accumulate the primary state fields here.
|
|
331
|
+
"""
|
|
332
|
+
aggregated: dict[tuple[str, str | None], TaskMetadata] = {}
|
|
333
|
+
|
|
334
|
+
for meta in metadata_list:
|
|
335
|
+
if not meta.usage:
|
|
336
|
+
continue
|
|
337
|
+
|
|
338
|
+
key = (meta.model_name, meta.provider)
|
|
339
|
+
usage = meta.usage
|
|
340
|
+
|
|
341
|
+
if key not in aggregated:
|
|
342
|
+
aggregated[key] = TaskMetadata(
|
|
343
|
+
model_name=meta.model_name,
|
|
344
|
+
provider=meta.provider,
|
|
345
|
+
usage=Usage(currency=usage.currency),
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
agg = aggregated[key]
|
|
349
|
+
if agg.usage is None:
|
|
350
|
+
continue
|
|
351
|
+
|
|
352
|
+
# Accumulate primary token fields (total_tokens is computed)
|
|
353
|
+
agg.usage.input_tokens += usage.input_tokens
|
|
354
|
+
agg.usage.cached_tokens += usage.cached_tokens
|
|
355
|
+
agg.usage.reasoning_tokens += usage.reasoning_tokens
|
|
356
|
+
agg.usage.output_tokens += usage.output_tokens
|
|
357
|
+
|
|
358
|
+
# Accumulate cost components (total_cost is computed)
|
|
359
|
+
if usage.input_cost is not None:
|
|
360
|
+
agg.usage.input_cost = (agg.usage.input_cost or 0.0) + usage.input_cost
|
|
361
|
+
if usage.output_cost is not None:
|
|
362
|
+
agg.usage.output_cost = (agg.usage.output_cost or 0.0) + usage.output_cost
|
|
363
|
+
if usage.cache_read_cost is not None:
|
|
364
|
+
agg.usage.cache_read_cost = (agg.usage.cache_read_cost or 0.0) + usage.cache_read_cost
|
|
365
|
+
|
|
366
|
+
# Sort by total_cost descending
|
|
367
|
+
return sorted(
|
|
368
|
+
aggregated.values(),
|
|
369
|
+
key=lambda m: m.usage.total_cost if m.usage and m.usage.total_cost else 0.0,
|
|
370
|
+
reverse=True,
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
class TaskMetadataItem(BaseModel):
|
|
375
|
+
"""Aggregated metadata for a complete task, stored in conversation history."""
|
|
376
|
+
|
|
377
|
+
main: TaskMetadata = Field(default_factory=TaskMetadata)
|
|
378
|
+
sub_agent_task_metadata: list[TaskMetadata] = Field(default_factory=lambda: list[TaskMetadata]())
|
|
379
|
+
created_at: datetime = Field(default_factory=datetime.now)
|
|
380
|
+
|
|
381
|
+
|
|
266
382
|
MessageItem = (
|
|
267
383
|
UserMessageItem
|
|
268
384
|
| AssistantMessageItem
|
|
@@ -278,7 +394,14 @@ MessageItem = (
|
|
|
278
394
|
StreamItem = AssistantMessageDelta
|
|
279
395
|
|
|
280
396
|
ConversationItem = (
|
|
281
|
-
StartItem
|
|
397
|
+
StartItem
|
|
398
|
+
| InterruptItem
|
|
399
|
+
| StreamErrorItem
|
|
400
|
+
| StreamItem
|
|
401
|
+
| MessageItem
|
|
402
|
+
| ResponseMetadataItem
|
|
403
|
+
| TaskMetadataItem
|
|
404
|
+
| ToolCallStartItem
|
|
282
405
|
)
|
|
283
406
|
|
|
284
407
|
|
klaude_code/protocol/op.py
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from dataclasses import dataclass, field
|
|
4
|
-
from typing import Any, Callable
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Callable
|
|
5
5
|
|
|
6
6
|
from klaude_code.protocol import tools
|
|
7
7
|
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from klaude_code.protocol import model
|
|
10
|
+
|
|
8
11
|
AvailabilityPredicate = Callable[[str], bool]
|
|
9
12
|
PromptBuilder = Callable[[dict[str, Any]], str]
|
|
10
13
|
|
|
@@ -14,6 +17,7 @@ class SubAgentResult:
|
|
|
14
17
|
task_result: str
|
|
15
18
|
session_id: str
|
|
16
19
|
error: bool = False
|
|
20
|
+
task_metadata: model.TaskMetadata | None = None
|
|
17
21
|
|
|
18
22
|
|
|
19
23
|
def _default_prompt_builder(args: dict[str, Any]) -> str:
|