klaude-code 1.2.1__py3-none-any.whl → 1.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- klaude_code/cli/main.py +9 -4
- klaude_code/cli/runtime.py +42 -43
- klaude_code/command/__init__.py +7 -5
- klaude_code/command/clear_cmd.py +6 -29
- klaude_code/command/command_abc.py +44 -8
- klaude_code/command/diff_cmd.py +33 -27
- klaude_code/command/export_cmd.py +18 -26
- klaude_code/command/help_cmd.py +10 -8
- klaude_code/command/model_cmd.py +11 -40
- klaude_code/command/{prompt-update-dev-doc.md → prompt-dev-docs-update.md} +3 -2
- klaude_code/command/{prompt-dev-doc.md → prompt-dev-docs.md} +3 -2
- klaude_code/command/prompt-init.md +2 -5
- klaude_code/command/prompt_command.py +6 -6
- klaude_code/command/refresh_cmd.py +4 -5
- klaude_code/command/registry.py +16 -19
- klaude_code/command/terminal_setup_cmd.py +12 -11
- klaude_code/config/__init__.py +4 -0
- klaude_code/config/config.py +25 -26
- klaude_code/config/list_model.py +8 -3
- klaude_code/config/select_model.py +1 -1
- klaude_code/const/__init__.py +1 -1
- klaude_code/core/__init__.py +0 -3
- klaude_code/core/agent.py +25 -50
- klaude_code/core/executor.py +268 -101
- klaude_code/core/prompt.py +12 -12
- klaude_code/core/{prompt → prompts}/prompt-gemini.md +1 -1
- klaude_code/core/reminders.py +76 -95
- klaude_code/core/task.py +21 -14
- klaude_code/core/tool/__init__.py +45 -11
- klaude_code/core/tool/file/apply_patch.py +5 -1
- klaude_code/core/tool/file/apply_patch_tool.py +11 -13
- klaude_code/core/tool/file/edit_tool.py +27 -23
- klaude_code/core/tool/file/multi_edit_tool.py +15 -17
- klaude_code/core/tool/file/read_tool.py +41 -36
- klaude_code/core/tool/file/write_tool.py +13 -15
- klaude_code/core/tool/memory/memory_tool.py +85 -68
- klaude_code/core/tool/memory/skill_tool.py +10 -12
- klaude_code/core/tool/shell/bash_tool.py +24 -22
- klaude_code/core/tool/shell/command_safety.py +12 -1
- klaude_code/core/tool/sub_agent_tool.py +11 -12
- klaude_code/core/tool/todo/todo_write_tool.py +21 -28
- klaude_code/core/tool/todo/update_plan_tool.py +14 -24
- klaude_code/core/tool/tool_abc.py +3 -4
- klaude_code/core/tool/tool_context.py +7 -7
- klaude_code/core/tool/tool_registry.py +30 -47
- klaude_code/core/tool/tool_runner.py +35 -43
- klaude_code/core/tool/truncation.py +14 -20
- klaude_code/core/tool/web/mermaid_tool.py +12 -14
- klaude_code/core/tool/web/web_fetch_tool.py +15 -17
- klaude_code/core/turn.py +19 -7
- klaude_code/llm/__init__.py +3 -4
- klaude_code/llm/anthropic/client.py +30 -46
- klaude_code/llm/anthropic/input.py +4 -11
- klaude_code/llm/client.py +29 -8
- klaude_code/llm/input_common.py +66 -36
- klaude_code/llm/openai_compatible/client.py +42 -84
- klaude_code/llm/openai_compatible/input.py +11 -16
- klaude_code/llm/openai_compatible/tool_call_accumulator.py +2 -2
- klaude_code/llm/openrouter/client.py +40 -289
- klaude_code/llm/openrouter/input.py +13 -35
- klaude_code/llm/openrouter/reasoning_handler.py +209 -0
- klaude_code/llm/registry.py +5 -75
- klaude_code/llm/responses/client.py +34 -55
- klaude_code/llm/responses/input.py +24 -26
- klaude_code/llm/usage.py +109 -0
- klaude_code/protocol/__init__.py +4 -0
- klaude_code/protocol/events.py +3 -2
- klaude_code/protocol/{llm_parameter.py → llm_param.py} +12 -32
- klaude_code/protocol/model.py +49 -4
- klaude_code/protocol/op.py +18 -16
- klaude_code/protocol/op_handler.py +28 -0
- klaude_code/{core → protocol}/sub_agent.py +7 -0
- klaude_code/session/export.py +150 -70
- klaude_code/session/session.py +28 -14
- klaude_code/session/templates/export_session.html +180 -42
- klaude_code/trace/__init__.py +2 -2
- klaude_code/trace/log.py +11 -5
- klaude_code/ui/__init__.py +91 -8
- klaude_code/ui/core/__init__.py +1 -0
- klaude_code/ui/core/display.py +103 -0
- klaude_code/ui/core/input.py +71 -0
- klaude_code/ui/modes/__init__.py +1 -0
- klaude_code/ui/modes/debug/__init__.py +1 -0
- klaude_code/ui/{base/debug_event_display.py → modes/debug/display.py} +9 -5
- klaude_code/ui/modes/exec/__init__.py +1 -0
- klaude_code/ui/{base/exec_display.py → modes/exec/display.py} +28 -2
- klaude_code/ui/{repl → modes/repl}/__init__.py +5 -6
- klaude_code/ui/modes/repl/clipboard.py +152 -0
- klaude_code/ui/modes/repl/completers.py +429 -0
- klaude_code/ui/modes/repl/display.py +60 -0
- klaude_code/ui/modes/repl/event_handler.py +375 -0
- klaude_code/ui/modes/repl/input_prompt_toolkit.py +198 -0
- klaude_code/ui/modes/repl/key_bindings.py +170 -0
- klaude_code/ui/{repl → modes/repl}/renderer.py +109 -132
- klaude_code/ui/renderers/assistant.py +21 -0
- klaude_code/ui/renderers/common.py +0 -16
- klaude_code/ui/renderers/developer.py +18 -18
- klaude_code/ui/renderers/diffs.py +36 -14
- klaude_code/ui/renderers/errors.py +1 -1
- klaude_code/ui/renderers/metadata.py +50 -27
- klaude_code/ui/renderers/sub_agent.py +43 -9
- klaude_code/ui/renderers/thinking.py +33 -1
- klaude_code/ui/renderers/tools.py +212 -20
- klaude_code/ui/renderers/user_input.py +19 -23
- klaude_code/ui/rich/__init__.py +1 -0
- klaude_code/ui/{rich_ext → rich}/searchable_text.py +3 -1
- klaude_code/ui/{renderers → rich}/status.py +29 -18
- klaude_code/ui/{base → rich}/theme.py +8 -2
- klaude_code/ui/terminal/__init__.py +1 -0
- klaude_code/ui/{base/terminal_color.py → terminal/color.py} +4 -1
- klaude_code/ui/{base/terminal_control.py → terminal/control.py} +1 -0
- klaude_code/ui/{base/terminal_notifier.py → terminal/notifier.py} +5 -2
- klaude_code/ui/utils/__init__.py +1 -0
- klaude_code/ui/{base/utils.py → utils/common.py} +35 -3
- {klaude_code-1.2.1.dist-info → klaude_code-1.2.3.dist-info}/METADATA +1 -1
- klaude_code-1.2.3.dist-info/RECORD +161 -0
- klaude_code/core/clipboard_manifest.py +0 -124
- klaude_code/llm/openrouter/tool_call_accumulator.py +0 -80
- klaude_code/ui/base/__init__.py +0 -1
- klaude_code/ui/base/display_abc.py +0 -36
- klaude_code/ui/base/input_abc.py +0 -20
- klaude_code/ui/repl/display.py +0 -36
- klaude_code/ui/repl/event_handler.py +0 -247
- klaude_code/ui/repl/input.py +0 -773
- klaude_code/ui/rich_ext/__init__.py +0 -1
- klaude_code-1.2.1.dist-info/RECORD +0 -151
- /klaude_code/core/{prompt → prompts}/prompt-claude-code.md +0 -0
- /klaude_code/core/{prompt → prompts}/prompt-codex.md +0 -0
- /klaude_code/core/{prompt → prompts}/prompt-subagent-explore.md +0 -0
- /klaude_code/core/{prompt → prompts}/prompt-subagent-oracle.md +0 -0
- /klaude_code/core/{prompt → prompts}/prompt-subagent-webfetch.md +0 -0
- /klaude_code/core/{prompt → prompts}/prompt-subagent.md +0 -0
- /klaude_code/ui/{base → core}/stage_manager.py +0 -0
- /klaude_code/ui/{rich_ext → rich}/live.py +0 -0
- /klaude_code/ui/{rich_ext → rich}/markdown.py +0 -0
- /klaude_code/ui/{rich_ext → rich}/quote.py +0 -0
- /klaude_code/ui/{base → terminal}/progress_bar.py +0 -0
- /klaude_code/ui/{base → utils}/debouncer.py +0 -0
- {klaude_code-1.2.1.dist-info → klaude_code-1.2.3.dist-info}/WHEEL +0 -0
- {klaude_code-1.2.1.dist-info → klaude_code-1.2.3.dist-info}/entry_points.txt +0 -0
|
@@ -1,254 +1,24 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import time
|
|
3
1
|
from collections.abc import AsyncGenerator
|
|
4
|
-
from
|
|
5
|
-
from typing import Callable, Literal, ParamSpec, TypeVar, override
|
|
2
|
+
from typing import Literal, override
|
|
6
3
|
|
|
7
4
|
import httpx
|
|
8
5
|
import openai
|
|
9
|
-
from pydantic import BaseModel
|
|
10
6
|
|
|
11
|
-
from klaude_code.llm.client import LLMClientABC
|
|
7
|
+
from klaude_code.llm.client import LLMClientABC, call_with_logged_payload
|
|
8
|
+
from klaude_code.llm.input_common import apply_config_defaults
|
|
9
|
+
from klaude_code.llm.openai_compatible.input import convert_tool_schema
|
|
12
10
|
from klaude_code.llm.openai_compatible.tool_call_accumulator import BasicToolCallAccumulator, ToolCallAccumulatorABC
|
|
13
|
-
from klaude_code.llm.openrouter.input import convert_history_to_input,
|
|
11
|
+
from klaude_code.llm.openrouter.input import convert_history_to_input, is_claude_model
|
|
12
|
+
from klaude_code.llm.openrouter.reasoning_handler import ReasoningDetail, ReasoningStreamHandler
|
|
14
13
|
from klaude_code.llm.registry import register
|
|
15
|
-
from klaude_code.
|
|
16
|
-
from klaude_code.protocol
|
|
17
|
-
LLMCallParameter,
|
|
18
|
-
LLMClientProtocol,
|
|
19
|
-
LLMConfigParameter,
|
|
20
|
-
apply_config_defaults,
|
|
21
|
-
)
|
|
22
|
-
from klaude_code.protocol.model import StreamErrorItem
|
|
14
|
+
from klaude_code.llm.usage import MetadataTracker, convert_usage
|
|
15
|
+
from klaude_code.protocol import llm_param, model
|
|
23
16
|
from klaude_code.trace import DebugType, log, log_debug
|
|
24
17
|
|
|
25
|
-
P = ParamSpec("P")
|
|
26
|
-
R = TypeVar("R")
|
|
27
18
|
|
|
28
|
-
|
|
29
|
-
def call_with_logged_payload(func: Callable[P, R], *args: P.args, **kwargs: P.kwargs) -> R:
|
|
30
|
-
"""Call an SDK function while logging the JSON payload.
|
|
31
|
-
|
|
32
|
-
The function reuses the original callable's type signature via ParamSpec
|
|
33
|
-
so static type checkers can validate arguments at the call site.
|
|
34
|
-
"""
|
|
35
|
-
|
|
36
|
-
payload = {k: v for k, v in kwargs.items() if v is not None}
|
|
37
|
-
log_debug(
|
|
38
|
-
json.dumps(payload, ensure_ascii=False, default=str),
|
|
39
|
-
style="yellow",
|
|
40
|
-
debug_type=DebugType.LLM_PAYLOAD,
|
|
41
|
-
)
|
|
42
|
-
return func(*args, **kwargs)
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
class ReasoningDetail(BaseModel):
|
|
46
|
-
"""OpenRouter's https://openrouter.ai/docs/use-cases/reasoning-tokens#reasoning_details-array-structure"""
|
|
47
|
-
|
|
48
|
-
type: str
|
|
49
|
-
format: str
|
|
50
|
-
index: int
|
|
51
|
-
id: str | None = None
|
|
52
|
-
data: str | None = None # OpenAI's encrypted content
|
|
53
|
-
summary: str | None = None
|
|
54
|
-
text: str | None = None
|
|
55
|
-
signature: str | None = None # Claude's signature
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
class ReasoningMode(str, Enum):
|
|
59
|
-
COMPLETE_CHUNK = "complete_chunk"
|
|
60
|
-
GPT5_SECTIONS = "gpt5_sections"
|
|
61
|
-
ACCUMULATE = "accumulate"
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
class ReasoningStreamHandler:
|
|
65
|
-
"""Encapsulates reasoning stream handling across different model behaviors."""
|
|
66
|
-
|
|
67
|
-
def __init__(
|
|
68
|
-
self,
|
|
69
|
-
param_model: str,
|
|
70
|
-
response_id: str | None,
|
|
71
|
-
) -> None:
|
|
72
|
-
self._param_model = param_model
|
|
73
|
-
self._response_id = response_id
|
|
74
|
-
|
|
75
|
-
self._reasoning_id: str | None = None
|
|
76
|
-
self._accumulated_reasoning: list[str] = []
|
|
77
|
-
self._gpt5_line_buffer: str = ""
|
|
78
|
-
self._gpt5_section_lines: list[str] = []
|
|
79
|
-
|
|
80
|
-
def set_response_id(self, response_id: str | None) -> None:
|
|
81
|
-
"""Update the response identifier used for emitted items."""
|
|
82
|
-
|
|
83
|
-
self._response_id = response_id
|
|
84
|
-
|
|
85
|
-
def on_detail(self, detail: ReasoningDetail) -> list[model.ConversationItem]:
|
|
86
|
-
"""Process a single reasoning detail and return streamable items."""
|
|
87
|
-
|
|
88
|
-
items: list[model.ConversationItem] = []
|
|
89
|
-
|
|
90
|
-
if detail.type == "reasoning.encrypted":
|
|
91
|
-
self._reasoning_id = detail.id
|
|
92
|
-
if encrypted_item := self._build_encrypted_item(detail.data, detail):
|
|
93
|
-
items.append(encrypted_item)
|
|
94
|
-
return items
|
|
95
|
-
|
|
96
|
-
if detail.type in ("reasoning.text", "reasoning.summary"):
|
|
97
|
-
self._reasoning_id = detail.id
|
|
98
|
-
if encrypted_item := self._build_encrypted_item(detail.signature, detail):
|
|
99
|
-
items.append(encrypted_item)
|
|
100
|
-
text = detail.text if detail.type == "reasoning.text" else detail.summary
|
|
101
|
-
if text:
|
|
102
|
-
items.extend(self._handle_text(text))
|
|
103
|
-
|
|
104
|
-
return items
|
|
105
|
-
|
|
106
|
-
def flush(self) -> list[model.ConversationItem]:
|
|
107
|
-
"""Flush buffered reasoning text and encrypted payloads."""
|
|
108
|
-
|
|
109
|
-
items: list[model.ConversationItem] = []
|
|
110
|
-
mode = self._resolve_mode()
|
|
111
|
-
|
|
112
|
-
if mode is ReasoningMode.GPT5_SECTIONS:
|
|
113
|
-
for section in self._drain_gpt5_sections():
|
|
114
|
-
items.append(self._build_text_item(section))
|
|
115
|
-
elif self._accumulated_reasoning and mode is ReasoningMode.ACCUMULATE:
|
|
116
|
-
items.append(self._build_text_item("".join(self._accumulated_reasoning)))
|
|
117
|
-
self._accumulated_reasoning = []
|
|
118
|
-
|
|
119
|
-
return items
|
|
120
|
-
|
|
121
|
-
def _handle_text(self, text: str) -> list[model.ReasoningTextItem]:
|
|
122
|
-
mode = self._resolve_mode()
|
|
123
|
-
if mode is ReasoningMode.COMPLETE_CHUNK:
|
|
124
|
-
return [self._build_text_item(text)]
|
|
125
|
-
if mode is ReasoningMode.GPT5_SECTIONS:
|
|
126
|
-
sections = self._process_gpt5_text(text)
|
|
127
|
-
return [self._build_text_item(section) for section in sections]
|
|
128
|
-
self._accumulated_reasoning.append(text)
|
|
129
|
-
return []
|
|
130
|
-
|
|
131
|
-
def _build_text_item(self, content: str) -> model.ReasoningTextItem:
|
|
132
|
-
return model.ReasoningTextItem(
|
|
133
|
-
id=self._reasoning_id,
|
|
134
|
-
content=content,
|
|
135
|
-
response_id=self._response_id,
|
|
136
|
-
model=self._param_model,
|
|
137
|
-
)
|
|
138
|
-
|
|
139
|
-
def _build_encrypted_item(
|
|
140
|
-
self,
|
|
141
|
-
content: str | None,
|
|
142
|
-
detail: ReasoningDetail,
|
|
143
|
-
) -> model.ReasoningEncryptedItem | None:
|
|
144
|
-
if not content:
|
|
145
|
-
return None
|
|
146
|
-
return model.ReasoningEncryptedItem(
|
|
147
|
-
id=detail.id,
|
|
148
|
-
encrypted_content=content,
|
|
149
|
-
format=detail.format,
|
|
150
|
-
response_id=self._response_id,
|
|
151
|
-
model=self._param_model,
|
|
152
|
-
)
|
|
153
|
-
|
|
154
|
-
def _process_gpt5_text(self, text: str) -> list[str]:
|
|
155
|
-
emitted_sections: list[str] = []
|
|
156
|
-
self._gpt5_line_buffer += text
|
|
157
|
-
while True:
|
|
158
|
-
newline_index = self._gpt5_line_buffer.find("\n")
|
|
159
|
-
if newline_index == -1:
|
|
160
|
-
break
|
|
161
|
-
line = self._gpt5_line_buffer[:newline_index]
|
|
162
|
-
self._gpt5_line_buffer = self._gpt5_line_buffer[newline_index + 1 :]
|
|
163
|
-
remainder = line
|
|
164
|
-
while True:
|
|
165
|
-
split_result = self._split_gpt5_title_line(remainder)
|
|
166
|
-
if split_result is None:
|
|
167
|
-
break
|
|
168
|
-
prefix_segment, title_segment, remainder = split_result
|
|
169
|
-
if prefix_segment:
|
|
170
|
-
if not self._gpt5_section_lines:
|
|
171
|
-
self._gpt5_section_lines = []
|
|
172
|
-
self._gpt5_section_lines.append(f"{prefix_segment}\n")
|
|
173
|
-
if self._gpt5_section_lines:
|
|
174
|
-
emitted_sections.append("".join(self._gpt5_section_lines))
|
|
175
|
-
self._gpt5_section_lines = [f"{title_segment} \n"] # Add two spaces for markdown line break
|
|
176
|
-
if remainder:
|
|
177
|
-
if not self._gpt5_section_lines:
|
|
178
|
-
self._gpt5_section_lines = []
|
|
179
|
-
self._gpt5_section_lines.append(f"{remainder}\n")
|
|
180
|
-
return emitted_sections
|
|
181
|
-
|
|
182
|
-
def _drain_gpt5_sections(self) -> list[str]:
|
|
183
|
-
sections: list[str] = []
|
|
184
|
-
if self._gpt5_line_buffer:
|
|
185
|
-
if not self._gpt5_section_lines:
|
|
186
|
-
self._gpt5_section_lines = [self._gpt5_line_buffer]
|
|
187
|
-
else:
|
|
188
|
-
self._gpt5_section_lines.append(self._gpt5_line_buffer)
|
|
189
|
-
self._gpt5_line_buffer = ""
|
|
190
|
-
if self._gpt5_section_lines:
|
|
191
|
-
sections.append("".join(self._gpt5_section_lines))
|
|
192
|
-
self._gpt5_section_lines = []
|
|
193
|
-
return sections
|
|
194
|
-
|
|
195
|
-
def _is_gpt5(self) -> bool:
|
|
196
|
-
return "gpt-5" in self._param_model.lower()
|
|
197
|
-
|
|
198
|
-
def _is_complete_chunk_reasoning_model(self) -> bool:
|
|
199
|
-
"""Whether the current model emits reasoning in complete chunks (e.g. Gemini)."""
|
|
200
|
-
|
|
201
|
-
return self._param_model.startswith("google/gemini")
|
|
202
|
-
|
|
203
|
-
def _resolve_mode(self) -> ReasoningMode:
|
|
204
|
-
if self._is_complete_chunk_reasoning_model():
|
|
205
|
-
return ReasoningMode.COMPLETE_CHUNK
|
|
206
|
-
if self._is_gpt5():
|
|
207
|
-
return ReasoningMode.GPT5_SECTIONS
|
|
208
|
-
return ReasoningMode.ACCUMULATE
|
|
209
|
-
|
|
210
|
-
def _is_gpt5_title_line(self, line: str) -> bool:
|
|
211
|
-
stripped = line.strip()
|
|
212
|
-
if not stripped:
|
|
213
|
-
return False
|
|
214
|
-
return stripped.startswith("**") and stripped.endswith("**") and stripped.count("**") >= 2
|
|
215
|
-
|
|
216
|
-
def _split_gpt5_title_line(self, line: str) -> tuple[str | None, str, str] | None:
|
|
217
|
-
if not line:
|
|
218
|
-
return None
|
|
219
|
-
search_start = 0
|
|
220
|
-
while True:
|
|
221
|
-
opening_index = line.find("**", search_start)
|
|
222
|
-
if opening_index == -1:
|
|
223
|
-
return None
|
|
224
|
-
closing_index = line.find("**", opening_index + 2)
|
|
225
|
-
if closing_index == -1:
|
|
226
|
-
return None
|
|
227
|
-
title_candidate = line[opening_index : closing_index + 2]
|
|
228
|
-
stripped_title = title_candidate.strip()
|
|
229
|
-
if self._is_gpt5_title_line(stripped_title):
|
|
230
|
-
# Treat as a GPT-5 title only when everything after the
|
|
231
|
-
# bold segment is either whitespace or starts a new bold
|
|
232
|
-
# title. This prevents inline bold like `**xxx**yyyy`
|
|
233
|
-
# from being misclassified as a section title while
|
|
234
|
-
# preserving support for consecutive titles in one line.
|
|
235
|
-
after = line[closing_index + 2 :]
|
|
236
|
-
if after.strip() and not after.lstrip().startswith("**"):
|
|
237
|
-
search_start = closing_index + 2
|
|
238
|
-
continue
|
|
239
|
-
prefix_segment = line[:opening_index]
|
|
240
|
-
remainder_segment = after
|
|
241
|
-
return (
|
|
242
|
-
prefix_segment if prefix_segment else None,
|
|
243
|
-
stripped_title,
|
|
244
|
-
remainder_segment,
|
|
245
|
-
)
|
|
246
|
-
search_start = closing_index + 2
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
@register(LLMClientProtocol.OPENROUTER)
|
|
19
|
+
@register(llm_param.LLMClientProtocol.OPENROUTER)
|
|
250
20
|
class OpenRouterClient(LLMClientABC):
|
|
251
|
-
def __init__(self, config: LLMConfigParameter):
|
|
21
|
+
def __init__(self, config: llm_param.LLMConfigParameter):
|
|
252
22
|
super().__init__(config)
|
|
253
23
|
client = openai.AsyncOpenAI(
|
|
254
24
|
api_key=config.api_key,
|
|
@@ -259,18 +29,16 @@ class OpenRouterClient(LLMClientABC):
|
|
|
259
29
|
|
|
260
30
|
@classmethod
|
|
261
31
|
@override
|
|
262
|
-
def create(cls, config: LLMConfigParameter) -> "LLMClientABC":
|
|
32
|
+
def create(cls, config: llm_param.LLMConfigParameter) -> "LLMClientABC":
|
|
263
33
|
return cls(config)
|
|
264
34
|
|
|
265
35
|
@override
|
|
266
|
-
async def call(self, param: LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
|
|
36
|
+
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
|
|
267
37
|
param = apply_config_defaults(param, self.get_llm_config())
|
|
268
38
|
messages = convert_history_to_input(param.input, param.system, param.model)
|
|
269
39
|
tools = convert_tool_schema(param.tools)
|
|
270
40
|
|
|
271
|
-
|
|
272
|
-
first_token_time: float | None = None
|
|
273
|
-
last_token_time: float | None = None
|
|
41
|
+
metadata_tracker = MetadataTracker(cost_config=self._config.cost)
|
|
274
42
|
|
|
275
43
|
extra_body: dict[str, object] = {
|
|
276
44
|
"usage": {"include": True} # To get the cache tokens at the end of the response
|
|
@@ -313,7 +81,7 @@ class OpenRouterClient(LLMClientABC):
|
|
|
313
81
|
response_id: str | None = None
|
|
314
82
|
accumulated_content: list[str] = []
|
|
315
83
|
accumulated_tool_calls: ToolCallAccumulatorABC = BasicToolCallAccumulator()
|
|
316
|
-
|
|
84
|
+
emitted_tool_start_indices: set[int] = set()
|
|
317
85
|
reasoning_handler = ReasoningStreamHandler(
|
|
318
86
|
param_model=str(param.model),
|
|
319
87
|
response_id=response_id,
|
|
@@ -342,18 +110,24 @@ class OpenRouterClient(LLMClientABC):
|
|
|
342
110
|
|
|
343
111
|
try:
|
|
344
112
|
async for event in await stream:
|
|
345
|
-
log_debug(
|
|
113
|
+
log_debug(
|
|
114
|
+
event.model_dump_json(exclude_none=True),
|
|
115
|
+
style="blue",
|
|
116
|
+
debug_type=DebugType.LLM_STREAM,
|
|
117
|
+
)
|
|
346
118
|
if not response_id and event.id:
|
|
347
119
|
response_id = event.id
|
|
348
120
|
reasoning_handler.set_response_id(response_id)
|
|
349
121
|
accumulated_tool_calls.response_id = response_id
|
|
350
122
|
yield model.StartItem(response_id=response_id)
|
|
351
|
-
if
|
|
352
|
-
|
|
123
|
+
if (
|
|
124
|
+
event.usage is not None and event.usage.completion_tokens is not None # pyright: ignore[reportUnnecessaryComparison]
|
|
125
|
+
): # gcp gemini will return None usage field
|
|
126
|
+
metadata_tracker.set_usage(convert_usage(event.usage, param.context_limit))
|
|
353
127
|
if event.model:
|
|
354
|
-
|
|
128
|
+
metadata_tracker.set_model_name(event.model)
|
|
355
129
|
if provider := getattr(event, "provider", None):
|
|
356
|
-
|
|
130
|
+
metadata_tracker.set_provider(str(provider))
|
|
357
131
|
|
|
358
132
|
if len(event.choices) == 0:
|
|
359
133
|
continue
|
|
@@ -365,9 +139,7 @@ class OpenRouterClient(LLMClientABC):
|
|
|
365
139
|
for item in reasoning_details:
|
|
366
140
|
try:
|
|
367
141
|
reasoning_detail = ReasoningDetail.model_validate(item)
|
|
368
|
-
|
|
369
|
-
first_token_time = time.time()
|
|
370
|
-
last_token_time = time.time()
|
|
142
|
+
metadata_tracker.record_token()
|
|
371
143
|
stage = "reasoning"
|
|
372
144
|
for conversation_item in reasoning_handler.on_detail(reasoning_detail):
|
|
373
145
|
yield conversation_item
|
|
@@ -378,9 +150,7 @@ class OpenRouterClient(LLMClientABC):
|
|
|
378
150
|
if delta.content and (
|
|
379
151
|
stage == "assistant" or delta.content.strip()
|
|
380
152
|
): # Process all content in assistant stage, filter empty content in reasoning stage
|
|
381
|
-
|
|
382
|
-
first_token_time = time.time()
|
|
383
|
-
last_token_time = time.time()
|
|
153
|
+
metadata_tracker.record_token()
|
|
384
154
|
if stage == "reasoning":
|
|
385
155
|
for item in flush_reasoning_items():
|
|
386
156
|
yield item
|
|
@@ -393,9 +163,7 @@ class OpenRouterClient(LLMClientABC):
|
|
|
393
163
|
|
|
394
164
|
# Tool
|
|
395
165
|
if delta.tool_calls and len(delta.tool_calls) > 0:
|
|
396
|
-
|
|
397
|
-
first_token_time = time.time()
|
|
398
|
-
last_token_time = time.time()
|
|
166
|
+
metadata_tracker.record_token()
|
|
399
167
|
if stage == "reasoning":
|
|
400
168
|
for item in flush_reasoning_items():
|
|
401
169
|
yield item
|
|
@@ -403,10 +171,19 @@ class OpenRouterClient(LLMClientABC):
|
|
|
403
171
|
for item in flush_assistant_items():
|
|
404
172
|
yield item
|
|
405
173
|
stage = "tool"
|
|
174
|
+
# Emit ToolCallStartItem for new tool calls
|
|
175
|
+
for tc in delta.tool_calls:
|
|
176
|
+
if tc.index not in emitted_tool_start_indices and tc.function and tc.function.name:
|
|
177
|
+
emitted_tool_start_indices.add(tc.index)
|
|
178
|
+
yield model.ToolCallStartItem(
|
|
179
|
+
response_id=response_id,
|
|
180
|
+
call_id=tc.id or "",
|
|
181
|
+
name=tc.function.name,
|
|
182
|
+
)
|
|
406
183
|
accumulated_tool_calls.add(delta.tool_calls)
|
|
407
184
|
|
|
408
185
|
except (openai.OpenAIError, httpx.HTTPError) as e:
|
|
409
|
-
yield StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
|
|
186
|
+
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
|
|
410
187
|
|
|
411
188
|
# Finalize
|
|
412
189
|
for item in flush_reasoning_items():
|
|
@@ -419,31 +196,5 @@ class OpenRouterClient(LLMClientABC):
|
|
|
419
196
|
for tool_call_item in flush_tool_call_items():
|
|
420
197
|
yield tool_call_item
|
|
421
198
|
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
# Calculate performance metrics if we have timing data
|
|
425
|
-
if metadata_item.usage and first_token_time is not None:
|
|
426
|
-
metadata_item.usage.first_token_latency_ms = (first_token_time - request_start_time) * 1000
|
|
427
|
-
|
|
428
|
-
if last_token_time is not None and metadata_item.usage.output_tokens > 0:
|
|
429
|
-
time_duration = last_token_time - first_token_time
|
|
430
|
-
if time_duration >= 0.15:
|
|
431
|
-
metadata_item.usage.throughput_tps = metadata_item.usage.output_tokens / time_duration
|
|
432
|
-
|
|
433
|
-
yield metadata_item
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
def convert_usage(usage: openai.types.CompletionUsage, context_limit: int | None = None) -> model.Usage:
|
|
437
|
-
total_tokens = usage.total_tokens
|
|
438
|
-
context_usage_percent = (total_tokens / context_limit) * 100 if context_limit else None
|
|
439
|
-
return model.Usage(
|
|
440
|
-
input_tokens=usage.prompt_tokens,
|
|
441
|
-
cached_tokens=(usage.prompt_tokens_details.cached_tokens if usage.prompt_tokens_details else 0) or 0,
|
|
442
|
-
reasoning_tokens=(usage.completion_tokens_details.reasoning_tokens if usage.completion_tokens_details else 0)
|
|
443
|
-
or 0,
|
|
444
|
-
output_tokens=usage.completion_tokens,
|
|
445
|
-
total_tokens=total_tokens,
|
|
446
|
-
context_usage_percent=context_usage_percent,
|
|
447
|
-
throughput_tps=None,
|
|
448
|
-
first_token_latency_ms=None,
|
|
449
|
-
)
|
|
199
|
+
metadata_tracker.set_response_id(response_id)
|
|
200
|
+
yield metadata_tracker.finalize()
|
|
@@ -9,23 +9,19 @@
|
|
|
9
9
|
from openai.types import chat
|
|
10
10
|
from openai.types.chat import ChatCompletionContentPartParam
|
|
11
11
|
|
|
12
|
-
from klaude_code.llm.input_common import
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
from klaude_code.protocol import model as protocol_model
|
|
20
|
-
from klaude_code.protocol.llm_parameter import ToolSchema
|
|
21
|
-
from klaude_code.protocol.model import ConversationItem
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def is_claude_model(model_name: str | None):
|
|
12
|
+
from klaude_code.llm.input_common import AssistantGroup, ToolGroup, UserGroup, merge_reminder_text, parse_message_groups
|
|
13
|
+
from klaude_code.protocol import model
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def is_claude_model(model_name: str | None) -> bool:
|
|
17
|
+
"""Return True if the model name represents an Anthropic Claude model."""
|
|
18
|
+
|
|
25
19
|
return model_name is not None and model_name.startswith("anthropic/claude")
|
|
26
20
|
|
|
27
21
|
|
|
28
|
-
def is_gemini_model(model_name: str | None):
|
|
22
|
+
def is_gemini_model(model_name: str | None) -> bool:
|
|
23
|
+
"""Return True if the model name represents a Google Gemini model."""
|
|
24
|
+
|
|
29
25
|
return model_name is not None and model_name.startswith("google/gemini")
|
|
30
26
|
|
|
31
27
|
|
|
@@ -78,7 +74,7 @@ def _assistant_group_to_message(group: AssistantGroup, model_name: str | None) -
|
|
|
78
74
|
for item in group.reasoning_items:
|
|
79
75
|
if model_name != item.model:
|
|
80
76
|
continue
|
|
81
|
-
if isinstance(item,
|
|
77
|
+
if isinstance(item, model.ReasoningEncryptedItem):
|
|
82
78
|
if item.encrypted_content and len(item.encrypted_content) > 0:
|
|
83
79
|
reasoning_details.append(
|
|
84
80
|
{
|
|
@@ -89,7 +85,7 @@ def _assistant_group_to_message(group: AssistantGroup, model_name: str | None) -
|
|
|
89
85
|
"index": len(reasoning_details),
|
|
90
86
|
}
|
|
91
87
|
)
|
|
92
|
-
elif isinstance(item,
|
|
88
|
+
elif isinstance(item, model.ReasoningTextItem):
|
|
93
89
|
reasoning_details.append(
|
|
94
90
|
{
|
|
95
91
|
"id": item.id,
|
|
@@ -119,7 +115,7 @@ def _add_cache_control(messages: list[chat.ChatCompletionMessageParam], use_cach
|
|
|
119
115
|
|
|
120
116
|
|
|
121
117
|
def convert_history_to_input(
|
|
122
|
-
history: list[ConversationItem],
|
|
118
|
+
history: list[model.ConversationItem],
|
|
123
119
|
system: str | None = None,
|
|
124
120
|
model_name: str | None = None,
|
|
125
121
|
) -> list[chat.ChatCompletionMessageParam]:
|
|
@@ -161,21 +157,3 @@ def convert_history_to_input(
|
|
|
161
157
|
|
|
162
158
|
_add_cache_control(messages, use_cache_control)
|
|
163
159
|
return messages
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
def convert_tool_schema(
|
|
167
|
-
tools: list[ToolSchema] | None,
|
|
168
|
-
) -> list[chat.ChatCompletionToolParam]:
|
|
169
|
-
if tools is None:
|
|
170
|
-
return []
|
|
171
|
-
return [
|
|
172
|
-
{
|
|
173
|
-
"type": "function",
|
|
174
|
-
"function": {
|
|
175
|
-
"name": tool.name,
|
|
176
|
-
"description": tool.description,
|
|
177
|
-
"parameters": tool.parameters,
|
|
178
|
-
},
|
|
179
|
-
}
|
|
180
|
-
for tool in tools
|
|
181
|
-
]
|