klaude-code 1.2.7__py3-none-any.whl → 1.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- klaude_code/auth/codex/__init__.py +1 -1
- klaude_code/command/__init__.py +2 -0
- klaude_code/command/prompt-deslop.md +14 -0
- klaude_code/command/release_notes_cmd.py +86 -0
- klaude_code/command/status_cmd.py +92 -54
- klaude_code/core/agent.py +13 -19
- klaude_code/core/manager/sub_agent_manager.py +5 -1
- klaude_code/core/prompt.py +38 -28
- klaude_code/core/reminders.py +4 -4
- klaude_code/core/task.py +60 -45
- klaude_code/core/tool/__init__.py +2 -0
- klaude_code/core/tool/file/apply_patch_tool.py +1 -1
- klaude_code/core/tool/file/edit_tool.py +1 -1
- klaude_code/core/tool/file/multi_edit_tool.py +1 -1
- klaude_code/core/tool/file/write_tool.py +1 -1
- klaude_code/core/tool/memory/memory_tool.py +2 -2
- klaude_code/core/tool/sub_agent_tool.py +2 -1
- klaude_code/core/tool/todo/todo_write_tool.py +1 -1
- klaude_code/core/tool/todo/update_plan_tool.py +1 -1
- klaude_code/core/tool/tool_context.py +21 -4
- klaude_code/core/tool/tool_runner.py +5 -8
- klaude_code/core/tool/web/mermaid_tool.py +1 -4
- klaude_code/core/turn.py +90 -62
- klaude_code/llm/anthropic/client.py +15 -46
- klaude_code/llm/client.py +1 -1
- klaude_code/llm/codex/client.py +44 -30
- klaude_code/llm/input_common.py +0 -6
- klaude_code/llm/openai_compatible/client.py +29 -73
- klaude_code/llm/openai_compatible/input.py +6 -4
- klaude_code/llm/openai_compatible/stream_processor.py +82 -0
- klaude_code/llm/openrouter/client.py +29 -59
- klaude_code/llm/openrouter/input.py +4 -27
- klaude_code/llm/responses/client.py +49 -79
- klaude_code/llm/usage.py +51 -10
- klaude_code/protocol/commands.py +1 -0
- klaude_code/protocol/events.py +12 -2
- klaude_code/protocol/model.py +142 -26
- klaude_code/protocol/sub_agent.py +5 -1
- klaude_code/session/export.py +51 -27
- klaude_code/session/session.py +33 -16
- klaude_code/session/templates/export_session.html +4 -1
- klaude_code/ui/modes/repl/__init__.py +1 -5
- klaude_code/ui/modes/repl/event_handler.py +153 -54
- klaude_code/ui/modes/repl/renderer.py +6 -4
- klaude_code/ui/renderers/developer.py +35 -25
- klaude_code/ui/renderers/metadata.py +68 -30
- klaude_code/ui/renderers/tools.py +53 -87
- klaude_code/ui/rich/markdown.py +5 -5
- {klaude_code-1.2.7.dist-info → klaude_code-1.2.9.dist-info}/METADATA +1 -1
- {klaude_code-1.2.7.dist-info → klaude_code-1.2.9.dist-info}/RECORD +52 -49
- {klaude_code-1.2.7.dist-info → klaude_code-1.2.9.dist-info}/WHEEL +0 -0
- {klaude_code-1.2.7.dist-info → klaude_code-1.2.9.dist-info}/entry_points.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from collections.abc import AsyncGenerator
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import override
|
|
3
3
|
|
|
4
4
|
import httpx
|
|
5
5
|
import openai
|
|
@@ -7,7 +7,7 @@ import openai
|
|
|
7
7
|
from klaude_code.llm.client import LLMClientABC, call_with_logged_payload
|
|
8
8
|
from klaude_code.llm.input_common import apply_config_defaults
|
|
9
9
|
from klaude_code.llm.openai_compatible.input import convert_tool_schema
|
|
10
|
-
from klaude_code.llm.openai_compatible.
|
|
10
|
+
from klaude_code.llm.openai_compatible.stream_processor import StreamStateManager
|
|
11
11
|
from klaude_code.llm.openrouter.input import convert_history_to_input, is_claude_model
|
|
12
12
|
from klaude_code.llm.openrouter.reasoning_handler import ReasoningDetail, ReasoningStreamHandler
|
|
13
13
|
from klaude_code.llm.registry import register
|
|
@@ -77,36 +77,14 @@ class OpenRouterClient(LLMClientABC):
|
|
|
77
77
|
extra_headers=extra_headers, # pyright: ignore[reportUnknownArgumentType]
|
|
78
78
|
)
|
|
79
79
|
|
|
80
|
-
stage: Literal["waiting", "reasoning", "assistant", "tool", "done"] = "waiting"
|
|
81
|
-
response_id: str | None = None
|
|
82
|
-
accumulated_content: list[str] = []
|
|
83
|
-
accumulated_tool_calls: ToolCallAccumulatorABC = BasicToolCallAccumulator()
|
|
84
|
-
emitted_tool_start_indices: set[int] = set()
|
|
85
80
|
reasoning_handler = ReasoningStreamHandler(
|
|
86
81
|
param_model=str(param.model),
|
|
87
|
-
response_id=
|
|
82
|
+
response_id=None,
|
|
83
|
+
)
|
|
84
|
+
state = StreamStateManager(
|
|
85
|
+
param_model=str(param.model),
|
|
86
|
+
reasoning_flusher=reasoning_handler.flush,
|
|
88
87
|
)
|
|
89
|
-
|
|
90
|
-
def flush_reasoning_items() -> list[model.ConversationItem]:
|
|
91
|
-
return reasoning_handler.flush()
|
|
92
|
-
|
|
93
|
-
def flush_assistant_items() -> list[model.ConversationItem]:
|
|
94
|
-
nonlocal accumulated_content
|
|
95
|
-
if len(accumulated_content) == 0:
|
|
96
|
-
return []
|
|
97
|
-
item = model.AssistantMessageItem(
|
|
98
|
-
content="".join(accumulated_content),
|
|
99
|
-
response_id=response_id,
|
|
100
|
-
)
|
|
101
|
-
accumulated_content = []
|
|
102
|
-
return [item]
|
|
103
|
-
|
|
104
|
-
def flush_tool_call_items() -> list[model.ToolCallItem]:
|
|
105
|
-
nonlocal accumulated_tool_calls
|
|
106
|
-
items: list[model.ToolCallItem] = accumulated_tool_calls.get()
|
|
107
|
-
if items:
|
|
108
|
-
accumulated_tool_calls.chunks_by_step = [] # pyright: ignore[reportAttributeAccessIssue]
|
|
109
|
-
return items
|
|
110
88
|
|
|
111
89
|
try:
|
|
112
90
|
async for event in await stream:
|
|
@@ -115,11 +93,10 @@ class OpenRouterClient(LLMClientABC):
|
|
|
115
93
|
style="blue",
|
|
116
94
|
debug_type=DebugType.LLM_STREAM,
|
|
117
95
|
)
|
|
118
|
-
if not response_id and event.id:
|
|
119
|
-
|
|
120
|
-
reasoning_handler.set_response_id(
|
|
121
|
-
|
|
122
|
-
yield model.StartItem(response_id=response_id)
|
|
96
|
+
if not state.response_id and event.id:
|
|
97
|
+
state.set_response_id(event.id)
|
|
98
|
+
reasoning_handler.set_response_id(event.id)
|
|
99
|
+
yield model.StartItem(response_id=event.id)
|
|
123
100
|
if (
|
|
124
101
|
event.usage is not None and event.usage.completion_tokens is not None # pyright: ignore[reportUnnecessaryComparison]
|
|
125
102
|
): # gcp gemini will return None usage field
|
|
@@ -140,7 +117,7 @@ class OpenRouterClient(LLMClientABC):
|
|
|
140
117
|
try:
|
|
141
118
|
reasoning_detail = ReasoningDetail.model_validate(item)
|
|
142
119
|
metadata_tracker.record_token()
|
|
143
|
-
stage = "reasoning"
|
|
120
|
+
state.stage = "reasoning"
|
|
144
121
|
for conversation_item in reasoning_handler.on_detail(reasoning_detail):
|
|
145
122
|
yield conversation_item
|
|
146
123
|
except Exception as e:
|
|
@@ -148,53 +125,46 @@ class OpenRouterClient(LLMClientABC):
|
|
|
148
125
|
|
|
149
126
|
# Assistant
|
|
150
127
|
if delta.content and (
|
|
151
|
-
stage == "assistant" or delta.content.strip()
|
|
128
|
+
state.stage == "assistant" or delta.content.strip()
|
|
152
129
|
): # Process all content in assistant stage, filter empty content in reasoning stage
|
|
153
130
|
metadata_tracker.record_token()
|
|
154
|
-
if stage == "reasoning":
|
|
155
|
-
for item in
|
|
131
|
+
if state.stage == "reasoning":
|
|
132
|
+
for item in state.flush_reasoning():
|
|
156
133
|
yield item
|
|
157
|
-
stage = "assistant"
|
|
158
|
-
accumulated_content.append(delta.content)
|
|
134
|
+
state.stage = "assistant"
|
|
135
|
+
state.accumulated_content.append(delta.content)
|
|
159
136
|
yield model.AssistantMessageDelta(
|
|
160
137
|
content=delta.content,
|
|
161
|
-
response_id=response_id,
|
|
138
|
+
response_id=state.response_id,
|
|
162
139
|
)
|
|
163
140
|
|
|
164
141
|
# Tool
|
|
165
142
|
if delta.tool_calls and len(delta.tool_calls) > 0:
|
|
166
143
|
metadata_tracker.record_token()
|
|
167
|
-
if stage == "reasoning":
|
|
168
|
-
for item in
|
|
144
|
+
if state.stage == "reasoning":
|
|
145
|
+
for item in state.flush_reasoning():
|
|
169
146
|
yield item
|
|
170
|
-
elif stage == "assistant":
|
|
171
|
-
for item in
|
|
147
|
+
elif state.stage == "assistant":
|
|
148
|
+
for item in state.flush_assistant():
|
|
172
149
|
yield item
|
|
173
|
-
stage = "tool"
|
|
150
|
+
state.stage = "tool"
|
|
174
151
|
# Emit ToolCallStartItem for new tool calls
|
|
175
152
|
for tc in delta.tool_calls:
|
|
176
|
-
if tc.index not in emitted_tool_start_indices and tc.function and tc.function.name:
|
|
177
|
-
emitted_tool_start_indices.add(tc.index)
|
|
153
|
+
if tc.index not in state.emitted_tool_start_indices and tc.function and tc.function.name:
|
|
154
|
+
state.emitted_tool_start_indices.add(tc.index)
|
|
178
155
|
yield model.ToolCallStartItem(
|
|
179
|
-
response_id=response_id,
|
|
156
|
+
response_id=state.response_id,
|
|
180
157
|
call_id=tc.id or "",
|
|
181
158
|
name=tc.function.name,
|
|
182
159
|
)
|
|
183
|
-
accumulated_tool_calls.add(delta.tool_calls)
|
|
160
|
+
state.accumulated_tool_calls.add(delta.tool_calls)
|
|
184
161
|
|
|
185
162
|
except (openai.OpenAIError, httpx.HTTPError) as e:
|
|
186
163
|
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
|
|
187
164
|
|
|
188
165
|
# Finalize
|
|
189
|
-
for item in
|
|
190
|
-
yield item
|
|
191
|
-
|
|
192
|
-
for item in flush_assistant_items():
|
|
166
|
+
for item in state.flush_all():
|
|
193
167
|
yield item
|
|
194
168
|
|
|
195
|
-
|
|
196
|
-
for tool_call_item in flush_tool_call_items():
|
|
197
|
-
yield tool_call_item
|
|
198
|
-
|
|
199
|
-
metadata_tracker.set_response_id(response_id)
|
|
169
|
+
metadata_tracker.set_response_id(state.response_id)
|
|
200
170
|
yield metadata_tracker.finalize()
|
|
@@ -7,9 +7,9 @@
|
|
|
7
7
|
# pyright: reportGeneralTypeIssues=false
|
|
8
8
|
|
|
9
9
|
from openai.types import chat
|
|
10
|
-
from openai.types.chat import ChatCompletionContentPartParam
|
|
11
10
|
|
|
12
|
-
from klaude_code.llm.input_common import AssistantGroup, ToolGroup, UserGroup,
|
|
11
|
+
from klaude_code.llm.input_common import AssistantGroup, ToolGroup, UserGroup, parse_message_groups
|
|
12
|
+
from klaude_code.llm.openai_compatible.input import tool_group_to_openai_message, user_group_to_openai_message
|
|
13
13
|
from klaude_code.protocol import model
|
|
14
14
|
|
|
15
15
|
|
|
@@ -25,29 +25,6 @@ def is_gemini_model(model_name: str | None) -> bool:
|
|
|
25
25
|
return model_name is not None and model_name.startswith("google/gemini")
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
def _user_group_to_message(group: UserGroup) -> chat.ChatCompletionMessageParam:
|
|
29
|
-
parts: list[ChatCompletionContentPartParam] = []
|
|
30
|
-
for text in group.text_parts:
|
|
31
|
-
parts.append({"type": "text", "text": text + "\n"})
|
|
32
|
-
for image in group.images:
|
|
33
|
-
parts.append({"type": "image_url", "image_url": {"url": image.image_url.url}})
|
|
34
|
-
if not parts:
|
|
35
|
-
parts.append({"type": "text", "text": ""})
|
|
36
|
-
return {"role": "user", "content": parts}
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def _tool_group_to_message(group: ToolGroup) -> chat.ChatCompletionMessageParam:
|
|
40
|
-
merged_text = merge_reminder_text(
|
|
41
|
-
group.tool_result.output or "<system-reminder>Tool ran without output or errors</system-reminder>",
|
|
42
|
-
group.reminder_texts,
|
|
43
|
-
)
|
|
44
|
-
return {
|
|
45
|
-
"role": "tool",
|
|
46
|
-
"content": [{"type": "text", "text": merged_text}],
|
|
47
|
-
"tool_call_id": group.tool_result.call_id,
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
|
|
51
28
|
def _assistant_group_to_message(group: AssistantGroup, model_name: str | None) -> chat.ChatCompletionMessageParam:
|
|
52
29
|
assistant_message: dict[str, object] = {"role": "assistant"}
|
|
53
30
|
|
|
@@ -150,9 +127,9 @@ def convert_history_to_input(
|
|
|
150
127
|
for group in parse_message_groups(history):
|
|
151
128
|
match group:
|
|
152
129
|
case UserGroup():
|
|
153
|
-
messages.append(
|
|
130
|
+
messages.append(user_group_to_openai_message(group))
|
|
154
131
|
case ToolGroup():
|
|
155
|
-
messages.append(
|
|
132
|
+
messages.append(tool_group_to_openai_message(group))
|
|
156
133
|
case AssistantGroup():
|
|
157
134
|
messages.append(_assistant_group_to_message(group, model_name))
|
|
158
135
|
|
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
import json
|
|
2
|
-
import time
|
|
3
2
|
from collections.abc import AsyncGenerator
|
|
4
3
|
from typing import TYPE_CHECKING, override
|
|
5
4
|
|
|
6
5
|
import httpx
|
|
7
|
-
|
|
6
|
+
import openai
|
|
7
|
+
from openai import AsyncAzureOpenAI, AsyncOpenAI
|
|
8
8
|
from openai.types import responses
|
|
9
9
|
|
|
10
10
|
from klaude_code.llm.client import LLMClientABC, call_with_logged_payload
|
|
11
11
|
from klaude_code.llm.input_common import apply_config_defaults
|
|
12
12
|
from klaude_code.llm.registry import register
|
|
13
13
|
from klaude_code.llm.responses.input import convert_history_to_input, convert_tool_schema
|
|
14
|
-
from klaude_code.llm.usage import
|
|
14
|
+
from klaude_code.llm.usage import MetadataTracker, convert_responses_usage
|
|
15
15
|
from klaude_code.protocol import llm_param, model
|
|
16
16
|
from klaude_code.trace import DebugType, log_debug
|
|
17
17
|
|
|
@@ -23,12 +23,9 @@ if TYPE_CHECKING:
|
|
|
23
23
|
async def parse_responses_stream(
|
|
24
24
|
stream: "AsyncStream[ResponseStreamEvent]",
|
|
25
25
|
param: llm_param.LLMCallParameter,
|
|
26
|
-
|
|
27
|
-
request_start_time: float,
|
|
26
|
+
metadata_tracker: MetadataTracker,
|
|
28
27
|
) -> AsyncGenerator[model.ConversationItem, None]:
|
|
29
28
|
"""Parse OpenAI Responses API stream events into ConversationItems."""
|
|
30
|
-
first_token_time: float | None = None
|
|
31
|
-
last_token_time: float | None = None
|
|
32
29
|
response_id: str | None = None
|
|
33
30
|
|
|
34
31
|
try:
|
|
@@ -51,9 +48,7 @@ async def parse_responses_stream(
|
|
|
51
48
|
model=str(param.model),
|
|
52
49
|
)
|
|
53
50
|
case responses.ResponseTextDeltaEvent() as event:
|
|
54
|
-
|
|
55
|
-
first_token_time = time.time()
|
|
56
|
-
last_token_time = time.time()
|
|
51
|
+
metadata_tracker.record_token()
|
|
57
52
|
yield model.AssistantMessageDelta(content=event.delta, response_id=response_id)
|
|
58
53
|
case responses.ResponseOutputItemAddedEvent() as event:
|
|
59
54
|
if isinstance(event.item, responses.ResponseFunctionToolCall):
|
|
@@ -85,9 +80,7 @@ async def parse_responses_stream(
|
|
|
85
80
|
response_id=response_id,
|
|
86
81
|
)
|
|
87
82
|
case responses.ResponseFunctionToolCall() as item:
|
|
88
|
-
|
|
89
|
-
first_token_time = time.time()
|
|
90
|
-
last_token_time = time.time()
|
|
83
|
+
metadata_tracker.record_token()
|
|
91
84
|
yield model.ToolCallItem(
|
|
92
85
|
name=item.name,
|
|
93
86
|
arguments=item.arguments.strip(),
|
|
@@ -98,49 +91,22 @@ async def parse_responses_stream(
|
|
|
98
91
|
case _:
|
|
99
92
|
pass
|
|
100
93
|
case responses.ResponseCompletedEvent() as event:
|
|
101
|
-
usage: model.Usage | None = None
|
|
102
94
|
error_reason: str | None = None
|
|
103
95
|
if event.response.incomplete_details is not None:
|
|
104
96
|
error_reason = event.response.incomplete_details.reason
|
|
105
97
|
if event.response.usage is not None:
|
|
106
|
-
|
|
107
|
-
context_usage_percent = (
|
|
108
|
-
(total_tokens / param.context_limit) * 100 if param.context_limit else None
|
|
109
|
-
)
|
|
110
|
-
|
|
111
|
-
throughput_tps: float | None = None
|
|
112
|
-
first_token_latency_ms: float | None = None
|
|
113
|
-
|
|
114
|
-
if first_token_time is not None:
|
|
115
|
-
first_token_latency_ms = (first_token_time - request_start_time) * 1000
|
|
116
|
-
|
|
117
|
-
if (
|
|
118
|
-
first_token_time is not None
|
|
119
|
-
and last_token_time is not None
|
|
120
|
-
and event.response.usage.output_tokens > 0
|
|
121
|
-
):
|
|
122
|
-
time_duration = last_token_time - first_token_time
|
|
123
|
-
if time_duration >= 0.15:
|
|
124
|
-
throughput_tps = event.response.usage.output_tokens / time_duration
|
|
125
|
-
|
|
126
|
-
usage = model.Usage(
|
|
98
|
+
usage = convert_responses_usage(
|
|
127
99
|
input_tokens=event.response.usage.input_tokens,
|
|
100
|
+
output_tokens=event.response.usage.output_tokens,
|
|
128
101
|
cached_tokens=event.response.usage.input_tokens_details.cached_tokens,
|
|
129
102
|
reasoning_tokens=event.response.usage.output_tokens_details.reasoning_tokens,
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
context_usage_percent=context_usage_percent,
|
|
133
|
-
throughput_tps=throughput_tps,
|
|
134
|
-
first_token_latency_ms=first_token_latency_ms,
|
|
103
|
+
total_tokens=event.response.usage.total_tokens,
|
|
104
|
+
context_limit=param.context_limit,
|
|
135
105
|
)
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
model_name=str(param.model),
|
|
141
|
-
status=event.response.status,
|
|
142
|
-
error_reason=error_reason,
|
|
143
|
-
)
|
|
106
|
+
metadata_tracker.set_usage(usage)
|
|
107
|
+
metadata_tracker.set_model_name(str(param.model))
|
|
108
|
+
metadata_tracker.set_response_id(response_id)
|
|
109
|
+
yield metadata_tracker.finalize()
|
|
144
110
|
if event.response.status != "completed":
|
|
145
111
|
error_message = f"LLM response finished with status '{event.response.status}'"
|
|
146
112
|
if error_reason:
|
|
@@ -159,7 +125,7 @@ async def parse_responses_stream(
|
|
|
159
125
|
style="red",
|
|
160
126
|
debug_type=DebugType.LLM_STREAM,
|
|
161
127
|
)
|
|
162
|
-
except
|
|
128
|
+
except (openai.OpenAIError, httpx.HTTPError) as e:
|
|
163
129
|
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
|
|
164
130
|
|
|
165
131
|
|
|
@@ -193,39 +159,43 @@ class ResponsesClient(LLMClientABC):
|
|
|
193
159
|
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
|
|
194
160
|
param = apply_config_defaults(param, self.get_llm_config())
|
|
195
161
|
|
|
196
|
-
|
|
162
|
+
metadata_tracker = MetadataTracker(cost_config=self._config.cost)
|
|
197
163
|
|
|
198
164
|
inputs = convert_history_to_input(param.input, param.model)
|
|
199
165
|
tools = convert_tool_schema(param.tools)
|
|
200
166
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
167
|
+
try:
|
|
168
|
+
stream = await call_with_logged_payload(
|
|
169
|
+
self.client.responses.create,
|
|
170
|
+
model=str(param.model),
|
|
171
|
+
tool_choice="auto",
|
|
172
|
+
parallel_tool_calls=True,
|
|
173
|
+
include=[
|
|
174
|
+
"reasoning.encrypted_content",
|
|
175
|
+
],
|
|
176
|
+
store=param.store,
|
|
177
|
+
previous_response_id=param.previous_response_id,
|
|
178
|
+
stream=True,
|
|
179
|
+
temperature=param.temperature,
|
|
180
|
+
max_output_tokens=param.max_tokens,
|
|
181
|
+
input=inputs,
|
|
182
|
+
instructions=param.system,
|
|
183
|
+
tools=tools,
|
|
184
|
+
text={
|
|
185
|
+
"verbosity": param.verbosity,
|
|
186
|
+
},
|
|
187
|
+
prompt_cache_key=param.session_id or "",
|
|
188
|
+
reasoning={
|
|
189
|
+
"effort": param.thinking.reasoning_effort,
|
|
190
|
+
"summary": param.thinking.reasoning_summary,
|
|
191
|
+
}
|
|
192
|
+
if param.thinking and param.thinking.reasoning_effort
|
|
193
|
+
else None,
|
|
194
|
+
extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
|
|
195
|
+
)
|
|
196
|
+
except (openai.OpenAIError, httpx.HTTPError) as e:
|
|
197
|
+
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
|
|
198
|
+
return
|
|
229
199
|
|
|
230
|
-
async for item in parse_responses_stream(stream, param,
|
|
200
|
+
async for item in parse_responses_stream(stream, param, metadata_tracker):
|
|
231
201
|
yield item
|
klaude_code/llm/usage.py
CHANGED
|
@@ -27,9 +27,6 @@ def calculate_cost(usage: model.Usage, cost_config: llm_param.Cost | None) -> No
|
|
|
27
27
|
# Cache read cost
|
|
28
28
|
usage.cache_read_cost = (usage.cached_tokens / 1_000_000) * cost_config.cache_read
|
|
29
29
|
|
|
30
|
-
# Total cost
|
|
31
|
-
usage.total_cost = usage.input_cost + usage.output_cost + usage.cache_read_cost
|
|
32
|
-
|
|
33
30
|
|
|
34
31
|
class MetadataTracker:
|
|
35
32
|
"""Tracks timing and metadata for LLM responses."""
|
|
@@ -96,17 +93,61 @@ class MetadataTracker:
|
|
|
96
93
|
|
|
97
94
|
|
|
98
95
|
def convert_usage(usage: openai.types.CompletionUsage, context_limit: int | None = None) -> model.Usage:
|
|
99
|
-
"""Convert OpenAI CompletionUsage to internal Usage model.
|
|
100
|
-
|
|
101
|
-
|
|
96
|
+
"""Convert OpenAI CompletionUsage to internal Usage model.
|
|
97
|
+
|
|
98
|
+
context_window_size is set to total_tokens from the API response,
|
|
99
|
+
representing the actual context window usage for this turn.
|
|
100
|
+
"""
|
|
102
101
|
return model.Usage(
|
|
103
102
|
input_tokens=usage.prompt_tokens,
|
|
104
103
|
cached_tokens=(usage.prompt_tokens_details.cached_tokens if usage.prompt_tokens_details else 0) or 0,
|
|
105
104
|
reasoning_tokens=(usage.completion_tokens_details.reasoning_tokens if usage.completion_tokens_details else 0)
|
|
106
105
|
or 0,
|
|
107
106
|
output_tokens=usage.completion_tokens,
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
107
|
+
context_window_size=usage.total_tokens,
|
|
108
|
+
context_limit=context_limit,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def convert_anthropic_usage(
|
|
113
|
+
input_tokens: int,
|
|
114
|
+
output_tokens: int,
|
|
115
|
+
cached_tokens: int,
|
|
116
|
+
context_limit: int | None = None,
|
|
117
|
+
) -> model.Usage:
|
|
118
|
+
"""Convert Anthropic usage data to internal Usage model.
|
|
119
|
+
|
|
120
|
+
context_window_size is computed from input + cached + output tokens,
|
|
121
|
+
representing the actual context window usage for this turn.
|
|
122
|
+
"""
|
|
123
|
+
context_window_size = input_tokens + cached_tokens + output_tokens
|
|
124
|
+
return model.Usage(
|
|
125
|
+
input_tokens=input_tokens,
|
|
126
|
+
output_tokens=output_tokens,
|
|
127
|
+
cached_tokens=cached_tokens,
|
|
128
|
+
context_window_size=context_window_size,
|
|
129
|
+
context_limit=context_limit,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def convert_responses_usage(
|
|
134
|
+
input_tokens: int,
|
|
135
|
+
output_tokens: int,
|
|
136
|
+
cached_tokens: int,
|
|
137
|
+
reasoning_tokens: int,
|
|
138
|
+
total_tokens: int,
|
|
139
|
+
context_limit: int | None = None,
|
|
140
|
+
) -> model.Usage:
|
|
141
|
+
"""Convert OpenAI Responses API usage data to internal Usage model.
|
|
142
|
+
|
|
143
|
+
context_window_size is set to total_tokens from the API response,
|
|
144
|
+
representing the actual context window usage for this turn.
|
|
145
|
+
"""
|
|
146
|
+
return model.Usage(
|
|
147
|
+
input_tokens=input_tokens,
|
|
148
|
+
output_tokens=output_tokens,
|
|
149
|
+
cached_tokens=cached_tokens,
|
|
150
|
+
reasoning_tokens=reasoning_tokens,
|
|
151
|
+
context_window_size=total_tokens,
|
|
152
|
+
context_limit=context_limit,
|
|
112
153
|
)
|
klaude_code/protocol/commands.py
CHANGED
|
@@ -12,6 +12,7 @@ class CommandName(str, Enum):
|
|
|
12
12
|
TERMINAL_SETUP = "terminal-setup"
|
|
13
13
|
EXPORT = "export"
|
|
14
14
|
STATUS = "status"
|
|
15
|
+
RELEASE_NOTES = "release-notes"
|
|
15
16
|
# PLAN and DOC are dynamically registered now, but kept here if needed for reference
|
|
16
17
|
# or we can remove them if no code explicitly imports them.
|
|
17
18
|
# PLAN = "plan"
|
klaude_code/protocol/events.py
CHANGED
|
@@ -91,15 +91,23 @@ class ToolResultEvent(BaseModel):
|
|
|
91
91
|
ui_extra: model.ToolResultUIExtra | None = None
|
|
92
92
|
status: Literal["success", "error"]
|
|
93
93
|
is_replay: bool = False
|
|
94
|
+
task_metadata: model.TaskMetadata | None = None # Sub-agent task metadata
|
|
94
95
|
|
|
95
96
|
|
|
96
97
|
class ResponseMetadataEvent(BaseModel):
|
|
97
|
-
"""
|
|
98
|
+
"""Internal event for turn-level metadata. Not exposed to UI directly."""
|
|
98
99
|
|
|
99
100
|
session_id: str
|
|
100
101
|
metadata: model.ResponseMetadataItem
|
|
101
102
|
|
|
102
103
|
|
|
104
|
+
class TaskMetadataEvent(BaseModel):
|
|
105
|
+
"""Task-level aggregated metadata for UI display."""
|
|
106
|
+
|
|
107
|
+
session_id: str
|
|
108
|
+
metadata: model.TaskMetadataItem
|
|
109
|
+
|
|
110
|
+
|
|
103
111
|
class UserMessageEvent(BaseModel):
|
|
104
112
|
session_id: str
|
|
105
113
|
content: str
|
|
@@ -127,9 +135,10 @@ HistoryItemEvent = (
|
|
|
127
135
|
| ToolCallEvent
|
|
128
136
|
| ToolResultEvent
|
|
129
137
|
| UserMessageEvent
|
|
130
|
-
|
|
|
138
|
+
| TaskMetadataEvent
|
|
131
139
|
| InterruptEvent
|
|
132
140
|
| DeveloperMessageEvent
|
|
141
|
+
| ErrorEvent
|
|
133
142
|
)
|
|
134
143
|
|
|
135
144
|
|
|
@@ -149,6 +158,7 @@ Event = (
|
|
|
149
158
|
| ToolCallEvent
|
|
150
159
|
| ToolResultEvent
|
|
151
160
|
| ResponseMetadataEvent
|
|
161
|
+
| TaskMetadataEvent
|
|
152
162
|
| ReplayHistoryEvent
|
|
153
163
|
| ErrorEvent
|
|
154
164
|
| EndEvent
|