klaude-code 1.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- klaude_code/__init__.py +0 -0
- klaude_code/cli/__init__.py +1 -0
- klaude_code/cli/main.py +298 -0
- klaude_code/cli/runtime.py +331 -0
- klaude_code/cli/session_cmd.py +80 -0
- klaude_code/command/__init__.py +43 -0
- klaude_code/command/clear_cmd.py +20 -0
- klaude_code/command/command_abc.py +92 -0
- klaude_code/command/diff_cmd.py +138 -0
- klaude_code/command/export_cmd.py +86 -0
- klaude_code/command/help_cmd.py +51 -0
- klaude_code/command/model_cmd.py +43 -0
- klaude_code/command/prompt-dev-docs-update.md +56 -0
- klaude_code/command/prompt-dev-docs.md +46 -0
- klaude_code/command/prompt-init.md +45 -0
- klaude_code/command/prompt_command.py +69 -0
- klaude_code/command/refresh_cmd.py +43 -0
- klaude_code/command/registry.py +110 -0
- klaude_code/command/status_cmd.py +111 -0
- klaude_code/command/terminal_setup_cmd.py +252 -0
- klaude_code/config/__init__.py +11 -0
- klaude_code/config/config.py +177 -0
- klaude_code/config/list_model.py +162 -0
- klaude_code/config/select_model.py +67 -0
- klaude_code/const/__init__.py +133 -0
- klaude_code/core/__init__.py +0 -0
- klaude_code/core/agent.py +165 -0
- klaude_code/core/executor.py +485 -0
- klaude_code/core/manager/__init__.py +19 -0
- klaude_code/core/manager/agent_manager.py +127 -0
- klaude_code/core/manager/llm_clients.py +42 -0
- klaude_code/core/manager/llm_clients_builder.py +49 -0
- klaude_code/core/manager/sub_agent_manager.py +86 -0
- klaude_code/core/prompt.py +89 -0
- klaude_code/core/prompts/prompt-claude-code.md +98 -0
- klaude_code/core/prompts/prompt-codex.md +331 -0
- klaude_code/core/prompts/prompt-gemini.md +43 -0
- klaude_code/core/prompts/prompt-subagent-explore.md +27 -0
- klaude_code/core/prompts/prompt-subagent-oracle.md +23 -0
- klaude_code/core/prompts/prompt-subagent-webfetch.md +46 -0
- klaude_code/core/prompts/prompt-subagent.md +8 -0
- klaude_code/core/reminders.py +445 -0
- klaude_code/core/task.py +237 -0
- klaude_code/core/tool/__init__.py +75 -0
- klaude_code/core/tool/file/__init__.py +0 -0
- klaude_code/core/tool/file/apply_patch.py +492 -0
- klaude_code/core/tool/file/apply_patch_tool.md +1 -0
- klaude_code/core/tool/file/apply_patch_tool.py +204 -0
- klaude_code/core/tool/file/edit_tool.md +9 -0
- klaude_code/core/tool/file/edit_tool.py +274 -0
- klaude_code/core/tool/file/multi_edit_tool.md +42 -0
- klaude_code/core/tool/file/multi_edit_tool.py +199 -0
- klaude_code/core/tool/file/read_tool.md +14 -0
- klaude_code/core/tool/file/read_tool.py +326 -0
- klaude_code/core/tool/file/write_tool.md +8 -0
- klaude_code/core/tool/file/write_tool.py +146 -0
- klaude_code/core/tool/memory/__init__.py +0 -0
- klaude_code/core/tool/memory/memory_tool.md +16 -0
- klaude_code/core/tool/memory/memory_tool.py +462 -0
- klaude_code/core/tool/memory/skill_loader.py +245 -0
- klaude_code/core/tool/memory/skill_tool.md +24 -0
- klaude_code/core/tool/memory/skill_tool.py +97 -0
- klaude_code/core/tool/shell/__init__.py +0 -0
- klaude_code/core/tool/shell/bash_tool.md +43 -0
- klaude_code/core/tool/shell/bash_tool.py +123 -0
- klaude_code/core/tool/shell/command_safety.py +363 -0
- klaude_code/core/tool/sub_agent_tool.py +83 -0
- klaude_code/core/tool/todo/__init__.py +0 -0
- klaude_code/core/tool/todo/todo_write_tool.md +182 -0
- klaude_code/core/tool/todo/todo_write_tool.py +121 -0
- klaude_code/core/tool/todo/update_plan_tool.md +3 -0
- klaude_code/core/tool/todo/update_plan_tool.py +104 -0
- klaude_code/core/tool/tool_abc.py +25 -0
- klaude_code/core/tool/tool_context.py +106 -0
- klaude_code/core/tool/tool_registry.py +78 -0
- klaude_code/core/tool/tool_runner.py +252 -0
- klaude_code/core/tool/truncation.py +170 -0
- klaude_code/core/tool/web/__init__.py +0 -0
- klaude_code/core/tool/web/mermaid_tool.md +21 -0
- klaude_code/core/tool/web/mermaid_tool.py +76 -0
- klaude_code/core/tool/web/web_fetch_tool.md +8 -0
- klaude_code/core/tool/web/web_fetch_tool.py +159 -0
- klaude_code/core/turn.py +220 -0
- klaude_code/llm/__init__.py +21 -0
- klaude_code/llm/anthropic/__init__.py +3 -0
- klaude_code/llm/anthropic/client.py +221 -0
- klaude_code/llm/anthropic/input.py +200 -0
- klaude_code/llm/client.py +49 -0
- klaude_code/llm/input_common.py +239 -0
- klaude_code/llm/openai_compatible/__init__.py +3 -0
- klaude_code/llm/openai_compatible/client.py +211 -0
- klaude_code/llm/openai_compatible/input.py +109 -0
- klaude_code/llm/openai_compatible/tool_call_accumulator.py +80 -0
- klaude_code/llm/openrouter/__init__.py +3 -0
- klaude_code/llm/openrouter/client.py +200 -0
- klaude_code/llm/openrouter/input.py +160 -0
- klaude_code/llm/openrouter/reasoning_handler.py +209 -0
- klaude_code/llm/registry.py +22 -0
- klaude_code/llm/responses/__init__.py +3 -0
- klaude_code/llm/responses/client.py +216 -0
- klaude_code/llm/responses/input.py +167 -0
- klaude_code/llm/usage.py +109 -0
- klaude_code/protocol/__init__.py +4 -0
- klaude_code/protocol/commands.py +21 -0
- klaude_code/protocol/events.py +163 -0
- klaude_code/protocol/llm_param.py +147 -0
- klaude_code/protocol/model.py +287 -0
- klaude_code/protocol/op.py +89 -0
- klaude_code/protocol/op_handler.py +28 -0
- klaude_code/protocol/sub_agent.py +348 -0
- klaude_code/protocol/tools.py +15 -0
- klaude_code/session/__init__.py +4 -0
- klaude_code/session/export.py +624 -0
- klaude_code/session/selector.py +76 -0
- klaude_code/session/session.py +474 -0
- klaude_code/session/templates/export_session.html +1434 -0
- klaude_code/trace/__init__.py +3 -0
- klaude_code/trace/log.py +168 -0
- klaude_code/ui/__init__.py +91 -0
- klaude_code/ui/core/__init__.py +1 -0
- klaude_code/ui/core/display.py +103 -0
- klaude_code/ui/core/input.py +71 -0
- klaude_code/ui/core/stage_manager.py +55 -0
- klaude_code/ui/modes/__init__.py +1 -0
- klaude_code/ui/modes/debug/__init__.py +1 -0
- klaude_code/ui/modes/debug/display.py +36 -0
- klaude_code/ui/modes/exec/__init__.py +1 -0
- klaude_code/ui/modes/exec/display.py +63 -0
- klaude_code/ui/modes/repl/__init__.py +51 -0
- klaude_code/ui/modes/repl/clipboard.py +152 -0
- klaude_code/ui/modes/repl/completers.py +429 -0
- klaude_code/ui/modes/repl/display.py +60 -0
- klaude_code/ui/modes/repl/event_handler.py +375 -0
- klaude_code/ui/modes/repl/input_prompt_toolkit.py +198 -0
- klaude_code/ui/modes/repl/key_bindings.py +170 -0
- klaude_code/ui/modes/repl/renderer.py +281 -0
- klaude_code/ui/renderers/__init__.py +0 -0
- klaude_code/ui/renderers/assistant.py +21 -0
- klaude_code/ui/renderers/common.py +8 -0
- klaude_code/ui/renderers/developer.py +158 -0
- klaude_code/ui/renderers/diffs.py +215 -0
- klaude_code/ui/renderers/errors.py +16 -0
- klaude_code/ui/renderers/metadata.py +190 -0
- klaude_code/ui/renderers/sub_agent.py +71 -0
- klaude_code/ui/renderers/thinking.py +39 -0
- klaude_code/ui/renderers/tools.py +551 -0
- klaude_code/ui/renderers/user_input.py +65 -0
- klaude_code/ui/rich/__init__.py +1 -0
- klaude_code/ui/rich/live.py +65 -0
- klaude_code/ui/rich/markdown.py +308 -0
- klaude_code/ui/rich/quote.py +34 -0
- klaude_code/ui/rich/searchable_text.py +71 -0
- klaude_code/ui/rich/status.py +240 -0
- klaude_code/ui/rich/theme.py +274 -0
- klaude_code/ui/terminal/__init__.py +1 -0
- klaude_code/ui/terminal/color.py +244 -0
- klaude_code/ui/terminal/control.py +147 -0
- klaude_code/ui/terminal/notifier.py +107 -0
- klaude_code/ui/terminal/progress_bar.py +87 -0
- klaude_code/ui/utils/__init__.py +1 -0
- klaude_code/ui/utils/common.py +108 -0
- klaude_code/ui/utils/debouncer.py +42 -0
- klaude_code/version.py +163 -0
- klaude_code-1.2.6.dist-info/METADATA +178 -0
- klaude_code-1.2.6.dist-info/RECORD +167 -0
- klaude_code-1.2.6.dist-info/WHEEL +4 -0
- klaude_code-1.2.6.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import time
|
|
3
|
+
from collections.abc import AsyncGenerator
|
|
4
|
+
from typing import override
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
from openai import AsyncAzureOpenAI, AsyncOpenAI, RateLimitError
|
|
8
|
+
from openai.types import responses
|
|
9
|
+
|
|
10
|
+
from klaude_code.llm.client import LLMClientABC, call_with_logged_payload
|
|
11
|
+
from klaude_code.llm.input_common import apply_config_defaults
|
|
12
|
+
from klaude_code.llm.registry import register
|
|
13
|
+
from klaude_code.llm.responses.input import convert_history_to_input, convert_tool_schema
|
|
14
|
+
from klaude_code.llm.usage import calculate_cost
|
|
15
|
+
from klaude_code.protocol import llm_param, model
|
|
16
|
+
from klaude_code.trace import DebugType, log_debug
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@register(llm_param.LLMClientProtocol.RESPONSES)
|
|
20
|
+
class ResponsesClient(LLMClientABC):
|
|
21
|
+
def __init__(self, config: llm_param.LLMConfigParameter):
|
|
22
|
+
super().__init__(config)
|
|
23
|
+
if config.is_azure:
|
|
24
|
+
if not config.base_url:
|
|
25
|
+
raise ValueError("Azure endpoint is required")
|
|
26
|
+
client = AsyncAzureOpenAI(
|
|
27
|
+
api_key=config.api_key,
|
|
28
|
+
azure_endpoint=str(config.base_url),
|
|
29
|
+
api_version=config.azure_api_version,
|
|
30
|
+
timeout=httpx.Timeout(300.0, connect=15.0, read=285.0),
|
|
31
|
+
)
|
|
32
|
+
else:
|
|
33
|
+
client = AsyncOpenAI(
|
|
34
|
+
api_key=config.api_key,
|
|
35
|
+
base_url=config.base_url,
|
|
36
|
+
timeout=httpx.Timeout(300.0, connect=15.0, read=285.0),
|
|
37
|
+
)
|
|
38
|
+
self.client: AsyncAzureOpenAI | AsyncOpenAI = client
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
@override
|
|
42
|
+
def create(cls, config: llm_param.LLMConfigParameter) -> "LLMClientABC":
|
|
43
|
+
return cls(config)
|
|
44
|
+
|
|
45
|
+
@override
|
|
46
|
+
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
|
|
47
|
+
param = apply_config_defaults(param, self.get_llm_config())
|
|
48
|
+
|
|
49
|
+
request_start_time = time.time()
|
|
50
|
+
first_token_time: float | None = None
|
|
51
|
+
last_token_time: float | None = None
|
|
52
|
+
response_id: str | None = None
|
|
53
|
+
|
|
54
|
+
inputs = convert_history_to_input(param.input, param.model)
|
|
55
|
+
tools = convert_tool_schema(param.tools)
|
|
56
|
+
|
|
57
|
+
parallel_tool_calls = True
|
|
58
|
+
|
|
59
|
+
stream = call_with_logged_payload(
|
|
60
|
+
self.client.responses.create,
|
|
61
|
+
model=str(param.model),
|
|
62
|
+
tool_choice="auto",
|
|
63
|
+
parallel_tool_calls=parallel_tool_calls, # OpenAI's Codex is always False, we try to enable it here. It seems gpt-5-codex has bugs when parallel_tool_calls is True.
|
|
64
|
+
include=[
|
|
65
|
+
"reasoning.encrypted_content",
|
|
66
|
+
],
|
|
67
|
+
store=param.store,
|
|
68
|
+
previous_response_id=param.previous_response_id,
|
|
69
|
+
stream=True,
|
|
70
|
+
temperature=param.temperature,
|
|
71
|
+
max_output_tokens=param.max_tokens,
|
|
72
|
+
input=inputs,
|
|
73
|
+
instructions=param.system,
|
|
74
|
+
tools=tools,
|
|
75
|
+
text={
|
|
76
|
+
"verbosity": param.verbosity,
|
|
77
|
+
},
|
|
78
|
+
reasoning={
|
|
79
|
+
"effort": param.thinking.reasoning_effort,
|
|
80
|
+
"summary": param.thinking.reasoning_summary,
|
|
81
|
+
}
|
|
82
|
+
if param.thinking and param.thinking.reasoning_effort
|
|
83
|
+
else None,
|
|
84
|
+
extra_headers={"extra": json.dumps({"session_id": param.session_id})},
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
async for event in await stream:
|
|
89
|
+
log_debug(
|
|
90
|
+
f"[{event.type}]",
|
|
91
|
+
event.model_dump_json(exclude_none=True),
|
|
92
|
+
style="blue",
|
|
93
|
+
debug_type=DebugType.LLM_STREAM,
|
|
94
|
+
)
|
|
95
|
+
match event:
|
|
96
|
+
case responses.ResponseCreatedEvent() as event:
|
|
97
|
+
response_id = event.response.id
|
|
98
|
+
yield model.StartItem(response_id=response_id)
|
|
99
|
+
case responses.ResponseReasoningSummaryTextDoneEvent() as event:
|
|
100
|
+
if event.text:
|
|
101
|
+
yield model.ReasoningTextItem(
|
|
102
|
+
content=event.text,
|
|
103
|
+
response_id=response_id,
|
|
104
|
+
model=str(param.model),
|
|
105
|
+
)
|
|
106
|
+
case responses.ResponseTextDeltaEvent() as event:
|
|
107
|
+
if first_token_time is None:
|
|
108
|
+
first_token_time = time.time()
|
|
109
|
+
last_token_time = time.time()
|
|
110
|
+
yield model.AssistantMessageDelta(content=event.delta, response_id=response_id)
|
|
111
|
+
case responses.ResponseOutputItemAddedEvent() as event:
|
|
112
|
+
if isinstance(event.item, responses.ResponseFunctionToolCall):
|
|
113
|
+
yield model.ToolCallStartItem(
|
|
114
|
+
response_id=response_id,
|
|
115
|
+
call_id=event.item.call_id,
|
|
116
|
+
name=event.item.name,
|
|
117
|
+
)
|
|
118
|
+
case responses.ResponseOutputItemDoneEvent() as event:
|
|
119
|
+
match event.item:
|
|
120
|
+
case responses.ResponseReasoningItem() as item:
|
|
121
|
+
if item.encrypted_content:
|
|
122
|
+
yield model.ReasoningEncryptedItem(
|
|
123
|
+
id=item.id,
|
|
124
|
+
encrypted_content=item.encrypted_content,
|
|
125
|
+
response_id=response_id,
|
|
126
|
+
model=str(param.model),
|
|
127
|
+
)
|
|
128
|
+
case responses.ResponseOutputMessage() as item:
|
|
129
|
+
yield model.AssistantMessageItem(
|
|
130
|
+
content="\n".join(
|
|
131
|
+
[
|
|
132
|
+
part.text
|
|
133
|
+
for part in item.content
|
|
134
|
+
if isinstance(part, responses.ResponseOutputText)
|
|
135
|
+
]
|
|
136
|
+
),
|
|
137
|
+
id=item.id,
|
|
138
|
+
response_id=response_id,
|
|
139
|
+
)
|
|
140
|
+
case responses.ResponseFunctionToolCall() as item:
|
|
141
|
+
if first_token_time is None:
|
|
142
|
+
first_token_time = time.time()
|
|
143
|
+
last_token_time = time.time()
|
|
144
|
+
yield model.ToolCallItem(
|
|
145
|
+
name=item.name,
|
|
146
|
+
arguments=item.arguments.strip(),
|
|
147
|
+
call_id=item.call_id,
|
|
148
|
+
id=item.id,
|
|
149
|
+
response_id=response_id,
|
|
150
|
+
)
|
|
151
|
+
case _:
|
|
152
|
+
pass
|
|
153
|
+
case responses.ResponseCompletedEvent() as event:
|
|
154
|
+
usage: model.Usage | None = None
|
|
155
|
+
error_reason: str | None = None
|
|
156
|
+
if event.response.incomplete_details is not None:
|
|
157
|
+
error_reason = event.response.incomplete_details.reason
|
|
158
|
+
if event.response.usage is not None:
|
|
159
|
+
total_tokens = event.response.usage.total_tokens
|
|
160
|
+
context_usage_percent = (
|
|
161
|
+
(total_tokens / param.context_limit) * 100 if param.context_limit else None
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
throughput_tps: float | None = None
|
|
165
|
+
first_token_latency_ms: float | None = None
|
|
166
|
+
|
|
167
|
+
if first_token_time is not None:
|
|
168
|
+
first_token_latency_ms = (first_token_time - request_start_time) * 1000
|
|
169
|
+
|
|
170
|
+
if (
|
|
171
|
+
first_token_time is not None
|
|
172
|
+
and last_token_time is not None
|
|
173
|
+
and event.response.usage.output_tokens > 0
|
|
174
|
+
):
|
|
175
|
+
time_duration = last_token_time - first_token_time
|
|
176
|
+
if time_duration >= 0.15:
|
|
177
|
+
throughput_tps = event.response.usage.output_tokens / time_duration
|
|
178
|
+
|
|
179
|
+
usage = model.Usage(
|
|
180
|
+
input_tokens=event.response.usage.input_tokens,
|
|
181
|
+
cached_tokens=event.response.usage.input_tokens_details.cached_tokens,
|
|
182
|
+
reasoning_tokens=event.response.usage.output_tokens_details.reasoning_tokens,
|
|
183
|
+
output_tokens=event.response.usage.output_tokens,
|
|
184
|
+
total_tokens=total_tokens,
|
|
185
|
+
context_usage_percent=context_usage_percent,
|
|
186
|
+
throughput_tps=throughput_tps,
|
|
187
|
+
first_token_latency_ms=first_token_latency_ms,
|
|
188
|
+
)
|
|
189
|
+
calculate_cost(usage, self._config.cost)
|
|
190
|
+
yield model.ResponseMetadataItem(
|
|
191
|
+
usage=usage,
|
|
192
|
+
response_id=response_id,
|
|
193
|
+
model_name=str(param.model),
|
|
194
|
+
status=event.response.status,
|
|
195
|
+
error_reason=error_reason,
|
|
196
|
+
)
|
|
197
|
+
if event.response.status != "completed":
|
|
198
|
+
error_message = f"LLM response finished with status '{event.response.status}'"
|
|
199
|
+
if error_reason:
|
|
200
|
+
error_message = f"{error_message}: {error_reason}"
|
|
201
|
+
log_debug(
|
|
202
|
+
"[LLM status warning]",
|
|
203
|
+
error_message,
|
|
204
|
+
style="red",
|
|
205
|
+
debug_type=DebugType.LLM_STREAM,
|
|
206
|
+
)
|
|
207
|
+
yield model.StreamErrorItem(error=error_message)
|
|
208
|
+
case _:
|
|
209
|
+
log_debug(
|
|
210
|
+
"[Unhandled stream event]",
|
|
211
|
+
str(event),
|
|
212
|
+
style="red",
|
|
213
|
+
debug_type=DebugType.LLM_STREAM,
|
|
214
|
+
)
|
|
215
|
+
except RateLimitError as e:
|
|
216
|
+
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
# pyright: reportReturnType=false
|
|
2
|
+
# pyright: reportArgumentType=false
|
|
3
|
+
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from openai.types import responses
|
|
7
|
+
|
|
8
|
+
from klaude_code.protocol import llm_param, model
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _build_user_content_parts(
|
|
12
|
+
user: model.UserMessageItem,
|
|
13
|
+
) -> list[responses.ResponseInputContentParam]:
|
|
14
|
+
parts: list[responses.ResponseInputContentParam] = []
|
|
15
|
+
if user.content is not None:
|
|
16
|
+
parts.append({"type": "input_text", "text": user.content})
|
|
17
|
+
for image in user.images or []:
|
|
18
|
+
parts.append({"type": "input_image", "detail": "auto", "image_url": image.image_url.url})
|
|
19
|
+
if not parts:
|
|
20
|
+
parts.append({"type": "input_text", "text": ""})
|
|
21
|
+
return parts
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _build_tool_result_item(tool: model.ToolResultItem) -> responses.ResponseInputItemParam:
|
|
25
|
+
content_parts: list[responses.ResponseInputContentParam] = []
|
|
26
|
+
text_output = tool.output or "<system-reminder>Tool ran without output or errors</system-reminder>"
|
|
27
|
+
if text_output:
|
|
28
|
+
content_parts.append({"type": "input_text", "text": text_output})
|
|
29
|
+
for image in tool.images or []:
|
|
30
|
+
content_parts.append({"type": "input_image", "detail": "auto", "image_url": image.image_url.url})
|
|
31
|
+
|
|
32
|
+
item: dict[str, Any] = {
|
|
33
|
+
"type": "function_call_output",
|
|
34
|
+
"call_id": tool.call_id,
|
|
35
|
+
"output": content_parts,
|
|
36
|
+
}
|
|
37
|
+
return item # type: ignore[return-value]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def convert_history_to_input(
|
|
41
|
+
history: list[model.ConversationItem],
|
|
42
|
+
model_name: str | None = None,
|
|
43
|
+
) -> responses.ResponseInputParam:
|
|
44
|
+
"""
|
|
45
|
+
Convert a list of conversation items to a list of response input params.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
history: List of conversation items.
|
|
49
|
+
model_name: Model name. Used to verify that signatures are valid for the same model.
|
|
50
|
+
"""
|
|
51
|
+
items: list[responses.ResponseInputItemParam] = []
|
|
52
|
+
|
|
53
|
+
pending_reasoning_text: str | None = None
|
|
54
|
+
|
|
55
|
+
for item in history:
|
|
56
|
+
match item:
|
|
57
|
+
case model.ReasoningTextItem() as item:
|
|
58
|
+
# For now, we only store the text. We wait for the encrypted item to output both.
|
|
59
|
+
# If no encrypted item follows (e.g. incomplete stream?), this text might be lost
|
|
60
|
+
# or we can choose to output it if the next item is NOT reasoning?
|
|
61
|
+
# For now, based on instructions, we pair them.
|
|
62
|
+
if model_name != item.model:
|
|
63
|
+
continue
|
|
64
|
+
pending_reasoning_text = item.content
|
|
65
|
+
|
|
66
|
+
case model.ReasoningEncryptedItem() as item:
|
|
67
|
+
if item.encrypted_content and len(item.encrypted_content) > 0 and model_name == item.model:
|
|
68
|
+
items.append(convert_reasoning_inputs(pending_reasoning_text, item))
|
|
69
|
+
# Reset pending text after consumption
|
|
70
|
+
pending_reasoning_text = None
|
|
71
|
+
|
|
72
|
+
case model.ToolCallItem() as t:
|
|
73
|
+
items.append(
|
|
74
|
+
{
|
|
75
|
+
"type": "function_call",
|
|
76
|
+
"name": t.name,
|
|
77
|
+
"arguments": t.arguments,
|
|
78
|
+
"call_id": t.call_id,
|
|
79
|
+
"id": t.id,
|
|
80
|
+
}
|
|
81
|
+
)
|
|
82
|
+
case model.ToolResultItem() as t:
|
|
83
|
+
items.append(_build_tool_result_item(t))
|
|
84
|
+
case model.AssistantMessageItem() as a:
|
|
85
|
+
items.append(
|
|
86
|
+
{
|
|
87
|
+
"type": "message",
|
|
88
|
+
"role": "assistant",
|
|
89
|
+
"id": a.id,
|
|
90
|
+
"content": [
|
|
91
|
+
{
|
|
92
|
+
"type": "output_text",
|
|
93
|
+
"text": a.content,
|
|
94
|
+
}
|
|
95
|
+
],
|
|
96
|
+
}
|
|
97
|
+
)
|
|
98
|
+
case model.UserMessageItem() as u:
|
|
99
|
+
items.append(
|
|
100
|
+
{
|
|
101
|
+
"type": "message",
|
|
102
|
+
"role": "user",
|
|
103
|
+
"id": u.id,
|
|
104
|
+
"content": _build_user_content_parts(u),
|
|
105
|
+
}
|
|
106
|
+
)
|
|
107
|
+
case model.DeveloperMessageItem() as d:
|
|
108
|
+
dev_parts: list[responses.ResponseInputContentParam] = []
|
|
109
|
+
if d.content is not None:
|
|
110
|
+
dev_parts.append({"type": "input_text", "text": d.content})
|
|
111
|
+
for image in d.images or []:
|
|
112
|
+
dev_parts.append(
|
|
113
|
+
{
|
|
114
|
+
"type": "input_image",
|
|
115
|
+
"detail": "auto",
|
|
116
|
+
"image_url": image.image_url.url,
|
|
117
|
+
}
|
|
118
|
+
)
|
|
119
|
+
if not dev_parts:
|
|
120
|
+
dev_parts.append({"type": "input_text", "text": ""})
|
|
121
|
+
items.append(
|
|
122
|
+
{
|
|
123
|
+
"type": "message",
|
|
124
|
+
"role": "user", # GPT-5 series do not support image in "developer" role, so we set it to "user"
|
|
125
|
+
"id": d.id,
|
|
126
|
+
"content": dev_parts,
|
|
127
|
+
}
|
|
128
|
+
)
|
|
129
|
+
case _:
|
|
130
|
+
# Other items may be Metadata
|
|
131
|
+
continue
|
|
132
|
+
|
|
133
|
+
return items
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def convert_reasoning_inputs(
|
|
137
|
+
text_content: str | None, encrypted_item: model.ReasoningEncryptedItem
|
|
138
|
+
) -> responses.ResponseInputItemParam:
|
|
139
|
+
result = {"type": "reasoning", "content": None}
|
|
140
|
+
|
|
141
|
+
result["summary"] = [
|
|
142
|
+
{
|
|
143
|
+
"type": "summary_text",
|
|
144
|
+
"text": text_content or "",
|
|
145
|
+
}
|
|
146
|
+
]
|
|
147
|
+
if encrypted_item.encrypted_content:
|
|
148
|
+
result["encrypted_content"] = encrypted_item.encrypted_content
|
|
149
|
+
if encrypted_item.id is not None:
|
|
150
|
+
result["id"] = encrypted_item.id
|
|
151
|
+
return result
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def convert_tool_schema(
|
|
155
|
+
tools: list[llm_param.ToolSchema] | None,
|
|
156
|
+
) -> list[responses.ToolParam]:
|
|
157
|
+
if tools is None:
|
|
158
|
+
return []
|
|
159
|
+
return [
|
|
160
|
+
{
|
|
161
|
+
"type": "function",
|
|
162
|
+
"name": tool.name,
|
|
163
|
+
"description": tool.description,
|
|
164
|
+
"parameters": tool.parameters,
|
|
165
|
+
}
|
|
166
|
+
for tool in tools
|
|
167
|
+
]
|
klaude_code/llm/usage.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import time
|
|
2
|
+
|
|
3
|
+
import openai.types
|
|
4
|
+
|
|
5
|
+
from klaude_code.protocol import llm_param, model
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def calculate_cost(usage: model.Usage, cost_config: llm_param.Cost | None) -> None:
|
|
9
|
+
"""Calculate and set cost fields on usage based on cost configuration.
|
|
10
|
+
|
|
11
|
+
Note: input_tokens includes cached_tokens, so we need to subtract cached_tokens
|
|
12
|
+
to get the actual non-cached input tokens for cost calculation.
|
|
13
|
+
"""
|
|
14
|
+
if cost_config is None:
|
|
15
|
+
return
|
|
16
|
+
|
|
17
|
+
# Non-cached input tokens cost
|
|
18
|
+
non_cached_input = usage.input_tokens - usage.cached_tokens
|
|
19
|
+
usage.input_cost = (non_cached_input / 1_000_000) * cost_config.input
|
|
20
|
+
|
|
21
|
+
# Output tokens cost (includes reasoning tokens)
|
|
22
|
+
usage.output_cost = (usage.output_tokens / 1_000_000) * cost_config.output
|
|
23
|
+
|
|
24
|
+
# Cache read cost
|
|
25
|
+
usage.cache_read_cost = (usage.cached_tokens / 1_000_000) * cost_config.cache_read
|
|
26
|
+
|
|
27
|
+
# Total cost
|
|
28
|
+
usage.total_cost = usage.input_cost + usage.output_cost + usage.cache_read_cost
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class MetadataTracker:
|
|
32
|
+
"""Tracks timing and metadata for LLM responses."""
|
|
33
|
+
|
|
34
|
+
def __init__(self, cost_config: llm_param.Cost | None = None) -> None:
|
|
35
|
+
self._request_start_time: float = time.time()
|
|
36
|
+
self._first_token_time: float | None = None
|
|
37
|
+
self._last_token_time: float | None = None
|
|
38
|
+
self._metadata_item = model.ResponseMetadataItem()
|
|
39
|
+
self._cost_config = cost_config
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def metadata_item(self) -> model.ResponseMetadataItem:
|
|
43
|
+
return self._metadata_item
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def first_token_time(self) -> float | None:
|
|
47
|
+
return self._first_token_time
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def last_token_time(self) -> float | None:
|
|
51
|
+
return self._last_token_time
|
|
52
|
+
|
|
53
|
+
def record_token(self) -> None:
|
|
54
|
+
"""Record a token arrival, updating first/last token times."""
|
|
55
|
+
now = time.time()
|
|
56
|
+
if self._first_token_time is None:
|
|
57
|
+
self._first_token_time = now
|
|
58
|
+
self._last_token_time = now
|
|
59
|
+
|
|
60
|
+
def set_usage(self, usage: model.Usage) -> None:
|
|
61
|
+
"""Set the usage information."""
|
|
62
|
+
self._metadata_item.usage = usage
|
|
63
|
+
|
|
64
|
+
def set_model_name(self, model_name: str) -> None:
|
|
65
|
+
"""Set the model name."""
|
|
66
|
+
self._metadata_item.model_name = model_name
|
|
67
|
+
|
|
68
|
+
def set_provider(self, provider: str) -> None:
|
|
69
|
+
"""Set the provider name."""
|
|
70
|
+
self._metadata_item.provider = provider
|
|
71
|
+
|
|
72
|
+
def set_response_id(self, response_id: str | None) -> None:
|
|
73
|
+
"""Set the response ID."""
|
|
74
|
+
self._metadata_item.response_id = response_id
|
|
75
|
+
|
|
76
|
+
def finalize(self) -> model.ResponseMetadataItem:
|
|
77
|
+
"""Finalize and return the metadata item with calculated performance metrics."""
|
|
78
|
+
if self._metadata_item.usage and self._first_token_time is not None:
|
|
79
|
+
self._metadata_item.usage.first_token_latency_ms = (
|
|
80
|
+
self._first_token_time - self._request_start_time
|
|
81
|
+
) * 1000
|
|
82
|
+
|
|
83
|
+
if self._last_token_time is not None and self._metadata_item.usage.output_tokens > 0:
|
|
84
|
+
time_duration = self._last_token_time - self._first_token_time
|
|
85
|
+
if time_duration >= 0.15:
|
|
86
|
+
self._metadata_item.usage.throughput_tps = self._metadata_item.usage.output_tokens / time_duration
|
|
87
|
+
|
|
88
|
+
# Calculate cost if config is available
|
|
89
|
+
if self._metadata_item.usage:
|
|
90
|
+
calculate_cost(self._metadata_item.usage, self._cost_config)
|
|
91
|
+
|
|
92
|
+
return self._metadata_item
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def convert_usage(usage: openai.types.CompletionUsage, context_limit: int | None = None) -> model.Usage:
|
|
96
|
+
"""Convert OpenAI CompletionUsage to internal Usage model."""
|
|
97
|
+
total_tokens = usage.total_tokens
|
|
98
|
+
context_usage_percent = (total_tokens / context_limit) * 100 if context_limit else None
|
|
99
|
+
return model.Usage(
|
|
100
|
+
input_tokens=usage.prompt_tokens,
|
|
101
|
+
cached_tokens=(usage.prompt_tokens_details.cached_tokens if usage.prompt_tokens_details else 0) or 0,
|
|
102
|
+
reasoning_tokens=(usage.completion_tokens_details.reasoning_tokens if usage.completion_tokens_details else 0)
|
|
103
|
+
or 0,
|
|
104
|
+
output_tokens=usage.completion_tokens,
|
|
105
|
+
total_tokens=total_tokens,
|
|
106
|
+
context_usage_percent=context_usage_percent,
|
|
107
|
+
throughput_tps=None,
|
|
108
|
+
first_token_latency_ms=None,
|
|
109
|
+
)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class CommandName(str, Enum):
|
|
5
|
+
INIT = "init"
|
|
6
|
+
DIFF = "diff"
|
|
7
|
+
HELP = "help"
|
|
8
|
+
MODEL = "model"
|
|
9
|
+
COMPACT = "compact"
|
|
10
|
+
REFRESH_TERMINAL = "refresh-terminal"
|
|
11
|
+
CLEAR = "clear"
|
|
12
|
+
TERMINAL_SETUP = "terminal-setup"
|
|
13
|
+
EXPORT = "export"
|
|
14
|
+
STATUS = "status"
|
|
15
|
+
# PLAN and DOC are dynamically registered now, but kept here if needed for reference
|
|
16
|
+
# or we can remove them if no code explicitly imports them.
|
|
17
|
+
# PLAN = "plan"
|
|
18
|
+
# DOC = "doc"
|
|
19
|
+
|
|
20
|
+
def __str__(self) -> str:
|
|
21
|
+
return self.value
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
from klaude_code.protocol import llm_param, model
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
Event is how Agent Executor and UI Display communicate.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class EndEvent(BaseModel):
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ErrorEvent(BaseModel):
|
|
17
|
+
error_message: str
|
|
18
|
+
can_retry: bool = False
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class TaskStartEvent(BaseModel):
|
|
22
|
+
session_id: str
|
|
23
|
+
sub_agent_state: model.SubAgentState | None = None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class TaskFinishEvent(BaseModel):
|
|
27
|
+
session_id: str
|
|
28
|
+
task_result: str
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class TurnStartEvent(BaseModel):
|
|
32
|
+
"""For now, this event is used for UI to flush developer message buffer and print an empty line"""
|
|
33
|
+
|
|
34
|
+
session_id: str
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class TurnEndEvent(BaseModel):
|
|
38
|
+
session_id: str
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class TurnToolCallStartEvent(BaseModel):
|
|
42
|
+
"""For UI changing status text"""
|
|
43
|
+
|
|
44
|
+
session_id: str
|
|
45
|
+
response_id: str | None = None
|
|
46
|
+
tool_call_id: str
|
|
47
|
+
tool_name: str
|
|
48
|
+
arguments: str
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class ThinkingEvent(BaseModel):
|
|
52
|
+
session_id: str
|
|
53
|
+
response_id: str | None = None
|
|
54
|
+
content: str
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class AssistantMessageDeltaEvent(BaseModel):
|
|
58
|
+
session_id: str
|
|
59
|
+
response_id: str | None = None
|
|
60
|
+
content: str
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class AssistantMessageEvent(BaseModel):
|
|
64
|
+
response_id: str | None = None
|
|
65
|
+
session_id: str
|
|
66
|
+
content: str
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class DeveloperMessageEvent(BaseModel):
|
|
70
|
+
"""DeveloperMessages are reminders in user messages or tool results, see: core/reminders.py"""
|
|
71
|
+
|
|
72
|
+
session_id: str
|
|
73
|
+
item: model.DeveloperMessageItem
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class ToolCallEvent(BaseModel):
|
|
77
|
+
session_id: str
|
|
78
|
+
response_id: str | None = None
|
|
79
|
+
tool_call_id: str
|
|
80
|
+
tool_name: str
|
|
81
|
+
arguments: str
|
|
82
|
+
is_replay: bool = False
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class ToolResultEvent(BaseModel):
|
|
86
|
+
session_id: str
|
|
87
|
+
response_id: str | None = None
|
|
88
|
+
tool_call_id: str
|
|
89
|
+
tool_name: str
|
|
90
|
+
result: str
|
|
91
|
+
ui_extra: model.ToolResultUIExtra | None = None
|
|
92
|
+
status: Literal["success", "error"]
|
|
93
|
+
is_replay: bool = False
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class ResponseMetadataEvent(BaseModel):
|
|
97
|
+
"""Showing model name, usage tokens, task duration, and turn count."""
|
|
98
|
+
|
|
99
|
+
session_id: str
|
|
100
|
+
metadata: model.ResponseMetadataItem
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class UserMessageEvent(BaseModel):
|
|
104
|
+
session_id: str
|
|
105
|
+
content: str
|
|
106
|
+
images: list[model.ImageURLPart] | None = None
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class WelcomeEvent(BaseModel):
|
|
110
|
+
work_dir: str
|
|
111
|
+
llm_config: llm_param.LLMConfigParameter
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class InterruptEvent(BaseModel):
|
|
115
|
+
session_id: str
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class TodoChangeEvent(BaseModel):
|
|
119
|
+
session_id: str
|
|
120
|
+
todos: list[model.TodoItem]
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
HistoryItemEvent = (
|
|
124
|
+
ThinkingEvent
|
|
125
|
+
| TurnStartEvent # This event is used for UI to print new empty line
|
|
126
|
+
| AssistantMessageEvent
|
|
127
|
+
| ToolCallEvent
|
|
128
|
+
| ToolResultEvent
|
|
129
|
+
| UserMessageEvent
|
|
130
|
+
| ResponseMetadataEvent
|
|
131
|
+
| InterruptEvent
|
|
132
|
+
| DeveloperMessageEvent
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class ReplayHistoryEvent(BaseModel):
|
|
137
|
+
session_id: str
|
|
138
|
+
events: list[HistoryItemEvent]
|
|
139
|
+
updated_at: float
|
|
140
|
+
is_load: bool = True
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
Event = (
|
|
144
|
+
TaskStartEvent
|
|
145
|
+
| TaskFinishEvent
|
|
146
|
+
| ThinkingEvent
|
|
147
|
+
| AssistantMessageDeltaEvent
|
|
148
|
+
| AssistantMessageEvent
|
|
149
|
+
| ToolCallEvent
|
|
150
|
+
| ToolResultEvent
|
|
151
|
+
| ResponseMetadataEvent
|
|
152
|
+
| ReplayHistoryEvent
|
|
153
|
+
| ErrorEvent
|
|
154
|
+
| EndEvent
|
|
155
|
+
| WelcomeEvent
|
|
156
|
+
| UserMessageEvent
|
|
157
|
+
| InterruptEvent
|
|
158
|
+
| DeveloperMessageEvent
|
|
159
|
+
| TodoChangeEvent
|
|
160
|
+
| TurnStartEvent
|
|
161
|
+
| TurnEndEvent
|
|
162
|
+
| TurnToolCallStartEvent
|
|
163
|
+
)
|