klaude-code 1.2.1__py3-none-any.whl → 1.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- klaude_code/cli/main.py +9 -4
- klaude_code/cli/runtime.py +42 -43
- klaude_code/command/__init__.py +7 -5
- klaude_code/command/clear_cmd.py +6 -29
- klaude_code/command/command_abc.py +44 -8
- klaude_code/command/diff_cmd.py +33 -27
- klaude_code/command/export_cmd.py +18 -26
- klaude_code/command/help_cmd.py +10 -8
- klaude_code/command/model_cmd.py +11 -40
- klaude_code/command/{prompt-update-dev-doc.md → prompt-dev-docs-update.md} +3 -2
- klaude_code/command/{prompt-dev-doc.md → prompt-dev-docs.md} +3 -2
- klaude_code/command/prompt-init.md +2 -5
- klaude_code/command/prompt_command.py +6 -6
- klaude_code/command/refresh_cmd.py +4 -5
- klaude_code/command/registry.py +16 -19
- klaude_code/command/terminal_setup_cmd.py +12 -11
- klaude_code/config/__init__.py +4 -0
- klaude_code/config/config.py +25 -26
- klaude_code/config/list_model.py +8 -3
- klaude_code/config/select_model.py +1 -1
- klaude_code/const/__init__.py +1 -1
- klaude_code/core/__init__.py +0 -3
- klaude_code/core/agent.py +25 -50
- klaude_code/core/executor.py +268 -101
- klaude_code/core/prompt.py +12 -12
- klaude_code/core/{prompt → prompts}/prompt-gemini.md +1 -1
- klaude_code/core/reminders.py +76 -95
- klaude_code/core/task.py +21 -14
- klaude_code/core/tool/__init__.py +45 -11
- klaude_code/core/tool/file/apply_patch.py +5 -1
- klaude_code/core/tool/file/apply_patch_tool.py +11 -13
- klaude_code/core/tool/file/edit_tool.py +27 -23
- klaude_code/core/tool/file/multi_edit_tool.py +15 -17
- klaude_code/core/tool/file/read_tool.py +41 -36
- klaude_code/core/tool/file/write_tool.py +13 -15
- klaude_code/core/tool/memory/memory_tool.py +85 -68
- klaude_code/core/tool/memory/skill_tool.py +10 -12
- klaude_code/core/tool/shell/bash_tool.py +24 -22
- klaude_code/core/tool/shell/command_safety.py +12 -1
- klaude_code/core/tool/sub_agent_tool.py +11 -12
- klaude_code/core/tool/todo/todo_write_tool.py +21 -28
- klaude_code/core/tool/todo/update_plan_tool.py +14 -24
- klaude_code/core/tool/tool_abc.py +3 -4
- klaude_code/core/tool/tool_context.py +7 -7
- klaude_code/core/tool/tool_registry.py +30 -47
- klaude_code/core/tool/tool_runner.py +35 -43
- klaude_code/core/tool/truncation.py +14 -20
- klaude_code/core/tool/web/mermaid_tool.py +12 -14
- klaude_code/core/tool/web/web_fetch_tool.py +15 -17
- klaude_code/core/turn.py +19 -7
- klaude_code/llm/__init__.py +3 -4
- klaude_code/llm/anthropic/client.py +30 -46
- klaude_code/llm/anthropic/input.py +4 -11
- klaude_code/llm/client.py +29 -8
- klaude_code/llm/input_common.py +66 -36
- klaude_code/llm/openai_compatible/client.py +42 -84
- klaude_code/llm/openai_compatible/input.py +11 -16
- klaude_code/llm/openai_compatible/tool_call_accumulator.py +2 -2
- klaude_code/llm/openrouter/client.py +40 -289
- klaude_code/llm/openrouter/input.py +13 -35
- klaude_code/llm/openrouter/reasoning_handler.py +209 -0
- klaude_code/llm/registry.py +5 -75
- klaude_code/llm/responses/client.py +34 -55
- klaude_code/llm/responses/input.py +24 -26
- klaude_code/llm/usage.py +109 -0
- klaude_code/protocol/__init__.py +4 -0
- klaude_code/protocol/events.py +3 -2
- klaude_code/protocol/{llm_parameter.py → llm_param.py} +12 -32
- klaude_code/protocol/model.py +49 -4
- klaude_code/protocol/op.py +18 -16
- klaude_code/protocol/op_handler.py +28 -0
- klaude_code/{core → protocol}/sub_agent.py +7 -0
- klaude_code/session/export.py +150 -70
- klaude_code/session/session.py +28 -14
- klaude_code/session/templates/export_session.html +180 -42
- klaude_code/trace/__init__.py +2 -2
- klaude_code/trace/log.py +11 -5
- klaude_code/ui/__init__.py +91 -8
- klaude_code/ui/core/__init__.py +1 -0
- klaude_code/ui/core/display.py +103 -0
- klaude_code/ui/core/input.py +71 -0
- klaude_code/ui/modes/__init__.py +1 -0
- klaude_code/ui/modes/debug/__init__.py +1 -0
- klaude_code/ui/{base/debug_event_display.py → modes/debug/display.py} +9 -5
- klaude_code/ui/modes/exec/__init__.py +1 -0
- klaude_code/ui/{base/exec_display.py → modes/exec/display.py} +28 -2
- klaude_code/ui/{repl → modes/repl}/__init__.py +5 -6
- klaude_code/ui/modes/repl/clipboard.py +152 -0
- klaude_code/ui/modes/repl/completers.py +429 -0
- klaude_code/ui/modes/repl/display.py +60 -0
- klaude_code/ui/modes/repl/event_handler.py +375 -0
- klaude_code/ui/modes/repl/input_prompt_toolkit.py +198 -0
- klaude_code/ui/modes/repl/key_bindings.py +170 -0
- klaude_code/ui/{repl → modes/repl}/renderer.py +109 -132
- klaude_code/ui/renderers/assistant.py +21 -0
- klaude_code/ui/renderers/common.py +0 -16
- klaude_code/ui/renderers/developer.py +18 -18
- klaude_code/ui/renderers/diffs.py +36 -14
- klaude_code/ui/renderers/errors.py +1 -1
- klaude_code/ui/renderers/metadata.py +50 -27
- klaude_code/ui/renderers/sub_agent.py +43 -9
- klaude_code/ui/renderers/thinking.py +33 -1
- klaude_code/ui/renderers/tools.py +212 -20
- klaude_code/ui/renderers/user_input.py +19 -23
- klaude_code/ui/rich/__init__.py +1 -0
- klaude_code/ui/{rich_ext → rich}/searchable_text.py +3 -1
- klaude_code/ui/{renderers → rich}/status.py +29 -18
- klaude_code/ui/{base → rich}/theme.py +8 -2
- klaude_code/ui/terminal/__init__.py +1 -0
- klaude_code/ui/{base/terminal_color.py → terminal/color.py} +4 -1
- klaude_code/ui/{base/terminal_control.py → terminal/control.py} +1 -0
- klaude_code/ui/{base/terminal_notifier.py → terminal/notifier.py} +5 -2
- klaude_code/ui/utils/__init__.py +1 -0
- klaude_code/ui/{base/utils.py → utils/common.py} +35 -3
- {klaude_code-1.2.1.dist-info → klaude_code-1.2.3.dist-info}/METADATA +1 -1
- klaude_code-1.2.3.dist-info/RECORD +161 -0
- klaude_code/core/clipboard_manifest.py +0 -124
- klaude_code/llm/openrouter/tool_call_accumulator.py +0 -80
- klaude_code/ui/base/__init__.py +0 -1
- klaude_code/ui/base/display_abc.py +0 -36
- klaude_code/ui/base/input_abc.py +0 -20
- klaude_code/ui/repl/display.py +0 -36
- klaude_code/ui/repl/event_handler.py +0 -247
- klaude_code/ui/repl/input.py +0 -773
- klaude_code/ui/rich_ext/__init__.py +0 -1
- klaude_code-1.2.1.dist-info/RECORD +0 -151
- /klaude_code/core/{prompt → prompts}/prompt-claude-code.md +0 -0
- /klaude_code/core/{prompt → prompts}/prompt-codex.md +0 -0
- /klaude_code/core/{prompt → prompts}/prompt-subagent-explore.md +0 -0
- /klaude_code/core/{prompt → prompts}/prompt-subagent-oracle.md +0 -0
- /klaude_code/core/{prompt → prompts}/prompt-subagent-webfetch.md +0 -0
- /klaude_code/core/{prompt → prompts}/prompt-subagent.md +0 -0
- /klaude_code/ui/{base → core}/stage_manager.py +0 -0
- /klaude_code/ui/{rich_ext → rich}/live.py +0 -0
- /klaude_code/ui/{rich_ext → rich}/markdown.py +0 -0
- /klaude_code/ui/{rich_ext → rich}/quote.py +0 -0
- /klaude_code/ui/{base → terminal}/progress_bar.py +0 -0
- /klaude_code/ui/{base → utils}/debouncer.py +0 -0
- {klaude_code-1.2.1.dist-info → klaude_code-1.2.3.dist-info}/WHEEL +0 -0
- {klaude_code-1.2.1.dist-info → klaude_code-1.2.3.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
from klaude_code.protocol import model
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ReasoningDetail(BaseModel):
|
|
9
|
+
"""OpenRouter's https://openrouter.ai/docs/use-cases/reasoning-tokens#reasoning_details-array-structure"""
|
|
10
|
+
|
|
11
|
+
type: str
|
|
12
|
+
format: str
|
|
13
|
+
index: int
|
|
14
|
+
id: str | None = None
|
|
15
|
+
data: str | None = None # OpenAI's encrypted content
|
|
16
|
+
summary: str | None = None
|
|
17
|
+
text: str | None = None
|
|
18
|
+
signature: str | None = None # Claude's signature
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ReasoningMode(str, Enum):
|
|
22
|
+
COMPLETE_CHUNK = "complete_chunk"
|
|
23
|
+
GPT5_SECTIONS = "gpt5_sections"
|
|
24
|
+
ACCUMULATE = "accumulate"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ReasoningStreamHandler:
|
|
28
|
+
"""Encapsulates reasoning stream handling across different model behaviors."""
|
|
29
|
+
|
|
30
|
+
def __init__(
|
|
31
|
+
self,
|
|
32
|
+
param_model: str,
|
|
33
|
+
response_id: str | None,
|
|
34
|
+
) -> None:
|
|
35
|
+
self._param_model = param_model
|
|
36
|
+
self._response_id = response_id
|
|
37
|
+
|
|
38
|
+
self._reasoning_id: str | None = None
|
|
39
|
+
self._accumulated_reasoning: list[str] = []
|
|
40
|
+
self._gpt5_line_buffer: str = ""
|
|
41
|
+
self._gpt5_section_lines: list[str] = []
|
|
42
|
+
|
|
43
|
+
def set_response_id(self, response_id: str | None) -> None:
|
|
44
|
+
"""Update the response identifier used for emitted items."""
|
|
45
|
+
|
|
46
|
+
self._response_id = response_id
|
|
47
|
+
|
|
48
|
+
def on_detail(self, detail: ReasoningDetail) -> list[model.ConversationItem]:
|
|
49
|
+
"""Process a single reasoning detail and return streamable items."""
|
|
50
|
+
|
|
51
|
+
items: list[model.ConversationItem] = []
|
|
52
|
+
|
|
53
|
+
if detail.type == "reasoning.encrypted":
|
|
54
|
+
self._reasoning_id = detail.id
|
|
55
|
+
if encrypted_item := self._build_encrypted_item(detail.data, detail):
|
|
56
|
+
items.append(encrypted_item)
|
|
57
|
+
return items
|
|
58
|
+
|
|
59
|
+
if detail.type in ("reasoning.text", "reasoning.summary"):
|
|
60
|
+
self._reasoning_id = detail.id
|
|
61
|
+
if encrypted_item := self._build_encrypted_item(detail.signature, detail):
|
|
62
|
+
items.append(encrypted_item)
|
|
63
|
+
text = detail.text if detail.type == "reasoning.text" else detail.summary
|
|
64
|
+
if text:
|
|
65
|
+
items.extend(self._handle_text(text))
|
|
66
|
+
|
|
67
|
+
return items
|
|
68
|
+
|
|
69
|
+
def flush(self) -> list[model.ConversationItem]:
|
|
70
|
+
"""Flush buffered reasoning text and encrypted payloads."""
|
|
71
|
+
|
|
72
|
+
items: list[model.ConversationItem] = []
|
|
73
|
+
mode = self._resolve_mode()
|
|
74
|
+
|
|
75
|
+
if mode is ReasoningMode.GPT5_SECTIONS:
|
|
76
|
+
for section in self._drain_gpt5_sections():
|
|
77
|
+
items.append(self._build_text_item(section))
|
|
78
|
+
elif self._accumulated_reasoning and mode is ReasoningMode.ACCUMULATE:
|
|
79
|
+
items.append(self._build_text_item("".join(self._accumulated_reasoning)))
|
|
80
|
+
self._accumulated_reasoning = []
|
|
81
|
+
|
|
82
|
+
return items
|
|
83
|
+
|
|
84
|
+
def _handle_text(self, text: str) -> list[model.ReasoningTextItem]:
|
|
85
|
+
mode = self._resolve_mode()
|
|
86
|
+
if mode is ReasoningMode.COMPLETE_CHUNK:
|
|
87
|
+
return [self._build_text_item(text)]
|
|
88
|
+
if mode is ReasoningMode.GPT5_SECTIONS:
|
|
89
|
+
sections = self._process_gpt5_text(text)
|
|
90
|
+
return [self._build_text_item(section) for section in sections]
|
|
91
|
+
self._accumulated_reasoning.append(text)
|
|
92
|
+
return []
|
|
93
|
+
|
|
94
|
+
def _build_text_item(self, content: str) -> model.ReasoningTextItem:
|
|
95
|
+
return model.ReasoningTextItem(
|
|
96
|
+
id=self._reasoning_id,
|
|
97
|
+
content=content,
|
|
98
|
+
response_id=self._response_id,
|
|
99
|
+
model=self._param_model,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
def _build_encrypted_item(
|
|
103
|
+
self,
|
|
104
|
+
content: str | None,
|
|
105
|
+
detail: ReasoningDetail,
|
|
106
|
+
) -> model.ReasoningEncryptedItem | None:
|
|
107
|
+
if not content:
|
|
108
|
+
return None
|
|
109
|
+
return model.ReasoningEncryptedItem(
|
|
110
|
+
id=detail.id,
|
|
111
|
+
encrypted_content=content,
|
|
112
|
+
format=detail.format,
|
|
113
|
+
response_id=self._response_id,
|
|
114
|
+
model=self._param_model,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
def _process_gpt5_text(self, text: str) -> list[str]:
|
|
118
|
+
emitted_sections: list[str] = []
|
|
119
|
+
self._gpt5_line_buffer += text
|
|
120
|
+
while True:
|
|
121
|
+
newline_index = self._gpt5_line_buffer.find("\n")
|
|
122
|
+
if newline_index == -1:
|
|
123
|
+
break
|
|
124
|
+
line = self._gpt5_line_buffer[:newline_index]
|
|
125
|
+
self._gpt5_line_buffer = self._gpt5_line_buffer[newline_index + 1 :]
|
|
126
|
+
remainder = line
|
|
127
|
+
while True:
|
|
128
|
+
split_result = self._split_gpt5_title_line(remainder)
|
|
129
|
+
if split_result is None:
|
|
130
|
+
break
|
|
131
|
+
prefix_segment, title_segment, remainder = split_result
|
|
132
|
+
if prefix_segment:
|
|
133
|
+
if not self._gpt5_section_lines:
|
|
134
|
+
self._gpt5_section_lines = []
|
|
135
|
+
self._gpt5_section_lines.append(f"{prefix_segment}\n")
|
|
136
|
+
if self._gpt5_section_lines:
|
|
137
|
+
emitted_sections.append("".join(self._gpt5_section_lines))
|
|
138
|
+
self._gpt5_section_lines = [f"{title_segment} \n"] # Add two spaces for markdown line break
|
|
139
|
+
if remainder:
|
|
140
|
+
if not self._gpt5_section_lines:
|
|
141
|
+
self._gpt5_section_lines = []
|
|
142
|
+
self._gpt5_section_lines.append(f"{remainder}\n")
|
|
143
|
+
return emitted_sections
|
|
144
|
+
|
|
145
|
+
def _drain_gpt5_sections(self) -> list[str]:
|
|
146
|
+
sections: list[str] = []
|
|
147
|
+
if self._gpt5_line_buffer:
|
|
148
|
+
if not self._gpt5_section_lines:
|
|
149
|
+
self._gpt5_section_lines = [self._gpt5_line_buffer]
|
|
150
|
+
else:
|
|
151
|
+
self._gpt5_section_lines.append(self._gpt5_line_buffer)
|
|
152
|
+
self._gpt5_line_buffer = ""
|
|
153
|
+
if self._gpt5_section_lines:
|
|
154
|
+
sections.append("".join(self._gpt5_section_lines))
|
|
155
|
+
self._gpt5_section_lines = []
|
|
156
|
+
return sections
|
|
157
|
+
|
|
158
|
+
def _is_gpt5(self) -> bool:
|
|
159
|
+
return "gpt-5" in self._param_model.lower()
|
|
160
|
+
|
|
161
|
+
def _is_complete_chunk_reasoning_model(self) -> bool:
|
|
162
|
+
"""Whether the current model emits reasoning in complete chunks (e.g. Gemini)."""
|
|
163
|
+
|
|
164
|
+
return self._param_model.startswith("google/gemini")
|
|
165
|
+
|
|
166
|
+
def _resolve_mode(self) -> ReasoningMode:
|
|
167
|
+
if self._is_complete_chunk_reasoning_model():
|
|
168
|
+
return ReasoningMode.COMPLETE_CHUNK
|
|
169
|
+
if self._is_gpt5():
|
|
170
|
+
return ReasoningMode.GPT5_SECTIONS
|
|
171
|
+
return ReasoningMode.ACCUMULATE
|
|
172
|
+
|
|
173
|
+
def _is_gpt5_title_line(self, line: str) -> bool:
|
|
174
|
+
stripped = line.strip()
|
|
175
|
+
if not stripped:
|
|
176
|
+
return False
|
|
177
|
+
return stripped.startswith("**") and stripped.endswith("**") and stripped.count("**") >= 2
|
|
178
|
+
|
|
179
|
+
def _split_gpt5_title_line(self, line: str) -> tuple[str | None, str, str] | None:
|
|
180
|
+
if not line:
|
|
181
|
+
return None
|
|
182
|
+
search_start = 0
|
|
183
|
+
while True:
|
|
184
|
+
opening_index = line.find("**", search_start)
|
|
185
|
+
if opening_index == -1:
|
|
186
|
+
return None
|
|
187
|
+
closing_index = line.find("**", opening_index + 2)
|
|
188
|
+
if closing_index == -1:
|
|
189
|
+
return None
|
|
190
|
+
title_candidate = line[opening_index : closing_index + 2]
|
|
191
|
+
stripped_title = title_candidate.strip()
|
|
192
|
+
if self._is_gpt5_title_line(stripped_title):
|
|
193
|
+
# Treat as a GPT-5 title only when everything after the
|
|
194
|
+
# bold segment is either whitespace or starts a new bold
|
|
195
|
+
# title. This prevents inline bold like `**xxx**yyyy`
|
|
196
|
+
# from being misclassified as a section title while
|
|
197
|
+
# preserving support for consecutive titles in one line.
|
|
198
|
+
after = line[closing_index + 2 :]
|
|
199
|
+
if after.strip() and not after.lstrip().startswith("**"):
|
|
200
|
+
search_start = closing_index + 2
|
|
201
|
+
continue
|
|
202
|
+
prefix_segment = line[:opening_index]
|
|
203
|
+
remainder_segment = after
|
|
204
|
+
return (
|
|
205
|
+
prefix_segment if prefix_segment else None,
|
|
206
|
+
stripped_title,
|
|
207
|
+
remainder_segment,
|
|
208
|
+
)
|
|
209
|
+
search_start = closing_index + 2
|
klaude_code/llm/registry.py
CHANGED
|
@@ -1,22 +1,14 @@
|
|
|
1
|
-
from
|
|
2
|
-
|
|
3
|
-
from dataclasses import dataclass, field
|
|
4
|
-
from typing import TYPE_CHECKING, Callable, TypeVar
|
|
1
|
+
from typing import Callable, TypeVar
|
|
5
2
|
|
|
6
3
|
from klaude_code.llm.client import LLMClientABC
|
|
7
|
-
from klaude_code.protocol
|
|
8
|
-
from klaude_code.protocol.tools import SubAgentType
|
|
9
|
-
from klaude_code.trace import DebugType, log_debug
|
|
10
|
-
|
|
11
|
-
if TYPE_CHECKING:
|
|
12
|
-
from klaude_code.config.config import Config
|
|
4
|
+
from klaude_code.protocol import llm_param
|
|
13
5
|
|
|
14
|
-
_REGISTRY: dict[LLMClientProtocol, type[LLMClientABC]] = {}
|
|
6
|
+
_REGISTRY: dict[llm_param.LLMClientProtocol, type[LLMClientABC]] = {}
|
|
15
7
|
|
|
16
8
|
T = TypeVar("T", bound=LLMClientABC)
|
|
17
9
|
|
|
18
10
|
|
|
19
|
-
def register(name: LLMClientProtocol) -> Callable[[type[T]], type[T]]:
|
|
11
|
+
def register(name: llm_param.LLMClientProtocol) -> Callable[[type[T]], type[T]]:
|
|
20
12
|
def _decorator(cls: type[T]) -> type[T]:
|
|
21
13
|
_REGISTRY[name] = cls
|
|
22
14
|
return cls
|
|
@@ -24,69 +16,7 @@ def register(name: LLMClientProtocol) -> Callable[[type[T]], type[T]]:
|
|
|
24
16
|
return _decorator
|
|
25
17
|
|
|
26
18
|
|
|
27
|
-
def create_llm_client(config: LLMConfigParameter) -> LLMClientABC:
|
|
19
|
+
def create_llm_client(config: llm_param.LLMConfigParameter) -> LLMClientABC:
|
|
28
20
|
if config.protocol not in _REGISTRY:
|
|
29
21
|
raise ValueError(f"Unknown LLMClient protocol: {config.protocol}")
|
|
30
22
|
return _REGISTRY[config.protocol].create(config)
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
@dataclass
|
|
34
|
-
class LLMClients:
|
|
35
|
-
"""Container for LLM clients used by main agent and sub-agents."""
|
|
36
|
-
|
|
37
|
-
main: LLMClientABC
|
|
38
|
-
sub_clients: dict[SubAgentType, LLMClientABC] = field(default_factory=lambda: {})
|
|
39
|
-
|
|
40
|
-
def get_client(self, sub_agent_type: SubAgentType | None = None) -> LLMClientABC:
|
|
41
|
-
"""Get client for given sub-agent type, or main client if None."""
|
|
42
|
-
if sub_agent_type is None:
|
|
43
|
-
return self.main
|
|
44
|
-
return self.sub_clients.get(sub_agent_type) or self.main
|
|
45
|
-
|
|
46
|
-
@classmethod
|
|
47
|
-
def from_config(
|
|
48
|
-
cls,
|
|
49
|
-
config: Config,
|
|
50
|
-
model_override: str | None = None,
|
|
51
|
-
enabled_sub_agents: list[SubAgentType] | None = None,
|
|
52
|
-
) -> LLMClients:
|
|
53
|
-
"""Create LLMClients from application config.
|
|
54
|
-
|
|
55
|
-
Args:
|
|
56
|
-
config: Application configuration
|
|
57
|
-
model_override: Optional model name to override the main model
|
|
58
|
-
enabled_sub_agents: List of sub-agent types to initialize clients for
|
|
59
|
-
|
|
60
|
-
Returns:
|
|
61
|
-
LLMClients instance
|
|
62
|
-
"""
|
|
63
|
-
from klaude_code.core.sub_agent import get_sub_agent_profile
|
|
64
|
-
|
|
65
|
-
# Resolve main agent LLM config
|
|
66
|
-
if model_override:
|
|
67
|
-
llm_config = config.get_model_config(model_override)
|
|
68
|
-
else:
|
|
69
|
-
llm_config = config.get_main_model_config()
|
|
70
|
-
|
|
71
|
-
log_debug(
|
|
72
|
-
"Main LLM config",
|
|
73
|
-
llm_config.model_dump_json(exclude_none=True),
|
|
74
|
-
style="yellow",
|
|
75
|
-
debug_type=DebugType.LLM_CONFIG,
|
|
76
|
-
)
|
|
77
|
-
|
|
78
|
-
main_client = create_llm_client(llm_config)
|
|
79
|
-
sub_clients: dict[SubAgentType, LLMClientABC] = {}
|
|
80
|
-
|
|
81
|
-
# Initialize sub-agent clients
|
|
82
|
-
for sub_agent_type in enabled_sub_agents or []:
|
|
83
|
-
model_name = config.subagent_models.get(sub_agent_type)
|
|
84
|
-
if not model_name:
|
|
85
|
-
continue
|
|
86
|
-
profile = get_sub_agent_profile(sub_agent_type)
|
|
87
|
-
if not profile.enabled_for_model(main_client.model_name):
|
|
88
|
-
continue
|
|
89
|
-
sub_llm_config = config.get_model_config(model_name)
|
|
90
|
-
sub_clients[sub_agent_type] = create_llm_client(sub_llm_config)
|
|
91
|
-
|
|
92
|
-
return cls(main=main_client, sub_clients=sub_clients)
|
|
@@ -1,58 +1,24 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import time
|
|
3
3
|
from collections.abc import AsyncGenerator
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import override
|
|
5
5
|
|
|
6
6
|
import httpx
|
|
7
7
|
from openai import AsyncAzureOpenAI, AsyncOpenAI, RateLimitError
|
|
8
8
|
from openai.types import responses
|
|
9
9
|
|
|
10
|
-
from klaude_code.llm.client import LLMClientABC
|
|
10
|
+
from klaude_code.llm.client import LLMClientABC, call_with_logged_payload
|
|
11
|
+
from klaude_code.llm.input_common import apply_config_defaults
|
|
11
12
|
from klaude_code.llm.registry import register
|
|
12
13
|
from klaude_code.llm.responses.input import convert_history_to_input, convert_tool_schema
|
|
13
|
-
from klaude_code.
|
|
14
|
-
|
|
15
|
-
LLMClientProtocol,
|
|
16
|
-
LLMConfigParameter,
|
|
17
|
-
apply_config_defaults,
|
|
18
|
-
)
|
|
19
|
-
from klaude_code.protocol.model import (
|
|
20
|
-
AssistantMessageDelta,
|
|
21
|
-
AssistantMessageItem,
|
|
22
|
-
ConversationItem,
|
|
23
|
-
ReasoningEncryptedItem,
|
|
24
|
-
ReasoningTextItem,
|
|
25
|
-
ResponseMetadataItem,
|
|
26
|
-
StartItem,
|
|
27
|
-
StreamErrorItem,
|
|
28
|
-
ToolCallItem,
|
|
29
|
-
Usage,
|
|
30
|
-
)
|
|
14
|
+
from klaude_code.llm.usage import calculate_cost
|
|
15
|
+
from klaude_code.protocol import llm_param, model
|
|
31
16
|
from klaude_code.trace import DebugType, log_debug
|
|
32
17
|
|
|
33
|
-
P = ParamSpec("P")
|
|
34
|
-
R = TypeVar("R")
|
|
35
18
|
|
|
36
|
-
|
|
37
|
-
def call_with_logged_payload(func: Callable[P, R], *args: P.args, **kwargs: P.kwargs) -> R:
|
|
38
|
-
"""Call an SDK function while logging the JSON payload.
|
|
39
|
-
|
|
40
|
-
The function reuses the original callable's type signature via ParamSpec
|
|
41
|
-
so static type checkers can validate arguments at the call site.
|
|
42
|
-
"""
|
|
43
|
-
|
|
44
|
-
payload = {k: v for k, v in kwargs.items() if v is not None}
|
|
45
|
-
log_debug(
|
|
46
|
-
json.dumps(payload, ensure_ascii=False, default=str),
|
|
47
|
-
style="yellow",
|
|
48
|
-
debug_type=DebugType.LLM_PAYLOAD,
|
|
49
|
-
)
|
|
50
|
-
return func(*args, **kwargs)
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
@register(LLMClientProtocol.RESPONSES)
|
|
19
|
+
@register(llm_param.LLMClientProtocol.RESPONSES)
|
|
54
20
|
class ResponsesClient(LLMClientABC):
|
|
55
|
-
def __init__(self, config: LLMConfigParameter):
|
|
21
|
+
def __init__(self, config: llm_param.LLMConfigParameter):
|
|
56
22
|
super().__init__(config)
|
|
57
23
|
if config.is_azure:
|
|
58
24
|
if not config.base_url:
|
|
@@ -73,11 +39,11 @@ class ResponsesClient(LLMClientABC):
|
|
|
73
39
|
|
|
74
40
|
@classmethod
|
|
75
41
|
@override
|
|
76
|
-
def create(cls, config: LLMConfigParameter) -> "LLMClientABC":
|
|
42
|
+
def create(cls, config: llm_param.LLMConfigParameter) -> "LLMClientABC":
|
|
77
43
|
return cls(config)
|
|
78
44
|
|
|
79
45
|
@override
|
|
80
|
-
async def call(self, param: LLMCallParameter) -> AsyncGenerator[ConversationItem, None]:
|
|
46
|
+
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
|
|
81
47
|
param = apply_config_defaults(param, self.get_llm_config())
|
|
82
48
|
|
|
83
49
|
request_start_time = time.time()
|
|
@@ -129,10 +95,10 @@ class ResponsesClient(LLMClientABC):
|
|
|
129
95
|
match event:
|
|
130
96
|
case responses.ResponseCreatedEvent() as event:
|
|
131
97
|
response_id = event.response.id
|
|
132
|
-
yield StartItem(response_id=response_id)
|
|
98
|
+
yield model.StartItem(response_id=response_id)
|
|
133
99
|
case responses.ResponseReasoningSummaryTextDoneEvent() as event:
|
|
134
100
|
if event.text:
|
|
135
|
-
yield ReasoningTextItem(
|
|
101
|
+
yield model.ReasoningTextItem(
|
|
136
102
|
content=event.text,
|
|
137
103
|
response_id=response_id,
|
|
138
104
|
model=str(param.model),
|
|
@@ -141,19 +107,26 @@ class ResponsesClient(LLMClientABC):
|
|
|
141
107
|
if first_token_time is None:
|
|
142
108
|
first_token_time = time.time()
|
|
143
109
|
last_token_time = time.time()
|
|
144
|
-
yield AssistantMessageDelta(content=event.delta, response_id=response_id)
|
|
110
|
+
yield model.AssistantMessageDelta(content=event.delta, response_id=response_id)
|
|
111
|
+
case responses.ResponseOutputItemAddedEvent() as event:
|
|
112
|
+
if isinstance(event.item, responses.ResponseFunctionToolCall):
|
|
113
|
+
yield model.ToolCallStartItem(
|
|
114
|
+
response_id=response_id,
|
|
115
|
+
call_id=event.item.call_id,
|
|
116
|
+
name=event.item.name,
|
|
117
|
+
)
|
|
145
118
|
case responses.ResponseOutputItemDoneEvent() as event:
|
|
146
119
|
match event.item:
|
|
147
120
|
case responses.ResponseReasoningItem() as item:
|
|
148
121
|
if item.encrypted_content:
|
|
149
|
-
yield ReasoningEncryptedItem(
|
|
122
|
+
yield model.ReasoningEncryptedItem(
|
|
150
123
|
id=item.id,
|
|
151
124
|
encrypted_content=item.encrypted_content,
|
|
152
125
|
response_id=response_id,
|
|
153
126
|
model=str(param.model),
|
|
154
127
|
)
|
|
155
128
|
case responses.ResponseOutputMessage() as item:
|
|
156
|
-
yield AssistantMessageItem(
|
|
129
|
+
yield model.AssistantMessageItem(
|
|
157
130
|
content="\n".join(
|
|
158
131
|
[
|
|
159
132
|
part.text
|
|
@@ -168,7 +141,7 @@ class ResponsesClient(LLMClientABC):
|
|
|
168
141
|
if first_token_time is None:
|
|
169
142
|
first_token_time = time.time()
|
|
170
143
|
last_token_time = time.time()
|
|
171
|
-
yield ToolCallItem(
|
|
144
|
+
yield model.ToolCallItem(
|
|
172
145
|
name=item.name,
|
|
173
146
|
arguments=item.arguments.strip(),
|
|
174
147
|
call_id=item.call_id,
|
|
@@ -178,7 +151,7 @@ class ResponsesClient(LLMClientABC):
|
|
|
178
151
|
case _:
|
|
179
152
|
pass
|
|
180
153
|
case responses.ResponseCompletedEvent() as event:
|
|
181
|
-
usage: Usage | None = None
|
|
154
|
+
usage: model.Usage | None = None
|
|
182
155
|
error_reason: str | None = None
|
|
183
156
|
if event.response.incomplete_details is not None:
|
|
184
157
|
error_reason = event.response.incomplete_details.reason
|
|
@@ -203,7 +176,7 @@ class ResponsesClient(LLMClientABC):
|
|
|
203
176
|
if time_duration >= 0.15:
|
|
204
177
|
throughput_tps = event.response.usage.output_tokens / time_duration
|
|
205
178
|
|
|
206
|
-
usage = Usage(
|
|
179
|
+
usage = model.Usage(
|
|
207
180
|
input_tokens=event.response.usage.input_tokens,
|
|
208
181
|
cached_tokens=event.response.usage.input_tokens_details.cached_tokens,
|
|
209
182
|
reasoning_tokens=event.response.usage.output_tokens_details.reasoning_tokens,
|
|
@@ -213,7 +186,8 @@ class ResponsesClient(LLMClientABC):
|
|
|
213
186
|
throughput_tps=throughput_tps,
|
|
214
187
|
first_token_latency_ms=first_token_latency_ms,
|
|
215
188
|
)
|
|
216
|
-
|
|
189
|
+
calculate_cost(usage, self._config.cost)
|
|
190
|
+
yield model.ResponseMetadataItem(
|
|
217
191
|
usage=usage,
|
|
218
192
|
response_id=response_id,
|
|
219
193
|
model_name=str(param.model),
|
|
@@ -230,8 +204,13 @@ class ResponsesClient(LLMClientABC):
|
|
|
230
204
|
style="red",
|
|
231
205
|
debug_type=DebugType.LLM_STREAM,
|
|
232
206
|
)
|
|
233
|
-
yield StreamErrorItem(error=error_message)
|
|
207
|
+
yield model.StreamErrorItem(error=error_message)
|
|
234
208
|
case _:
|
|
235
|
-
log_debug(
|
|
209
|
+
log_debug(
|
|
210
|
+
"[Unhandled stream event]",
|
|
211
|
+
str(event),
|
|
212
|
+
style="red",
|
|
213
|
+
debug_type=DebugType.LLM_STREAM,
|
|
214
|
+
)
|
|
236
215
|
except RateLimitError as e:
|
|
237
|
-
yield StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
|
|
216
|
+
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
|
|
@@ -5,20 +5,12 @@ from typing import Any
|
|
|
5
5
|
|
|
6
6
|
from openai.types import responses
|
|
7
7
|
|
|
8
|
-
from klaude_code.protocol
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
ReasoningTextItem,
|
|
15
|
-
ToolCallItem,
|
|
16
|
-
ToolResultItem,
|
|
17
|
-
UserMessageItem,
|
|
18
|
-
)
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def _build_user_content_parts(user: UserMessageItem) -> list[responses.ResponseInputContentParam]:
|
|
8
|
+
from klaude_code.protocol import llm_param, model
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _build_user_content_parts(
|
|
12
|
+
user: model.UserMessageItem,
|
|
13
|
+
) -> list[responses.ResponseInputContentParam]:
|
|
22
14
|
parts: list[responses.ResponseInputContentParam] = []
|
|
23
15
|
if user.content is not None:
|
|
24
16
|
parts.append({"type": "input_text", "text": user.content})
|
|
@@ -29,7 +21,7 @@ def _build_user_content_parts(user: UserMessageItem) -> list[responses.ResponseI
|
|
|
29
21
|
return parts
|
|
30
22
|
|
|
31
23
|
|
|
32
|
-
def _build_tool_result_item(tool: ToolResultItem) -> responses.ResponseInputItemParam:
|
|
24
|
+
def _build_tool_result_item(tool: model.ToolResultItem) -> responses.ResponseInputItemParam:
|
|
33
25
|
content_parts: list[responses.ResponseInputContentParam] = []
|
|
34
26
|
text_output = tool.output or ""
|
|
35
27
|
if text_output:
|
|
@@ -46,7 +38,7 @@ def _build_tool_result_item(tool: ToolResultItem) -> responses.ResponseInputItem
|
|
|
46
38
|
|
|
47
39
|
|
|
48
40
|
def convert_history_to_input(
|
|
49
|
-
history: list[ConversationItem],
|
|
41
|
+
history: list[model.ConversationItem],
|
|
50
42
|
model_name: str | None = None,
|
|
51
43
|
) -> responses.ResponseInputParam:
|
|
52
44
|
"""
|
|
@@ -62,7 +54,7 @@ def convert_history_to_input(
|
|
|
62
54
|
|
|
63
55
|
for item in history:
|
|
64
56
|
match item:
|
|
65
|
-
case ReasoningTextItem() as item:
|
|
57
|
+
case model.ReasoningTextItem() as item:
|
|
66
58
|
# For now, we only store the text. We wait for the encrypted item to output both.
|
|
67
59
|
# If no encrypted item follows (e.g. incomplete stream?), this text might be lost
|
|
68
60
|
# or we can choose to output it if the next item is NOT reasoning?
|
|
@@ -71,13 +63,13 @@ def convert_history_to_input(
|
|
|
71
63
|
continue
|
|
72
64
|
pending_reasoning_text = item.content
|
|
73
65
|
|
|
74
|
-
case ReasoningEncryptedItem() as item:
|
|
66
|
+
case model.ReasoningEncryptedItem() as item:
|
|
75
67
|
if item.encrypted_content and len(item.encrypted_content) > 0 and model_name == item.model:
|
|
76
68
|
items.append(convert_reasoning_inputs(pending_reasoning_text, item))
|
|
77
69
|
# Reset pending text after consumption
|
|
78
70
|
pending_reasoning_text = None
|
|
79
71
|
|
|
80
|
-
case ToolCallItem() as t:
|
|
72
|
+
case model.ToolCallItem() as t:
|
|
81
73
|
items.append(
|
|
82
74
|
{
|
|
83
75
|
"type": "function_call",
|
|
@@ -87,9 +79,9 @@ def convert_history_to_input(
|
|
|
87
79
|
"id": t.id,
|
|
88
80
|
}
|
|
89
81
|
)
|
|
90
|
-
case ToolResultItem() as t:
|
|
82
|
+
case model.ToolResultItem() as t:
|
|
91
83
|
items.append(_build_tool_result_item(t))
|
|
92
|
-
case AssistantMessageItem() as a:
|
|
84
|
+
case model.AssistantMessageItem() as a:
|
|
93
85
|
items.append(
|
|
94
86
|
{
|
|
95
87
|
"type": "message",
|
|
@@ -103,7 +95,7 @@ def convert_history_to_input(
|
|
|
103
95
|
],
|
|
104
96
|
}
|
|
105
97
|
)
|
|
106
|
-
case UserMessageItem() as u:
|
|
98
|
+
case model.UserMessageItem() as u:
|
|
107
99
|
items.append(
|
|
108
100
|
{
|
|
109
101
|
"type": "message",
|
|
@@ -112,12 +104,18 @@ def convert_history_to_input(
|
|
|
112
104
|
"content": _build_user_content_parts(u),
|
|
113
105
|
}
|
|
114
106
|
)
|
|
115
|
-
case DeveloperMessageItem() as d:
|
|
107
|
+
case model.DeveloperMessageItem() as d:
|
|
116
108
|
dev_parts: list[responses.ResponseInputContentParam] = []
|
|
117
109
|
if d.content is not None:
|
|
118
110
|
dev_parts.append({"type": "input_text", "text": d.content})
|
|
119
111
|
for image in d.images or []:
|
|
120
|
-
dev_parts.append(
|
|
112
|
+
dev_parts.append(
|
|
113
|
+
{
|
|
114
|
+
"type": "input_image",
|
|
115
|
+
"detail": "auto",
|
|
116
|
+
"image_url": image.image_url.url,
|
|
117
|
+
}
|
|
118
|
+
)
|
|
121
119
|
if not dev_parts:
|
|
122
120
|
dev_parts.append({"type": "input_text", "text": ""})
|
|
123
121
|
items.append(
|
|
@@ -136,7 +134,7 @@ def convert_history_to_input(
|
|
|
136
134
|
|
|
137
135
|
|
|
138
136
|
def convert_reasoning_inputs(
|
|
139
|
-
text_content: str | None, encrypted_item: ReasoningEncryptedItem
|
|
137
|
+
text_content: str | None, encrypted_item: model.ReasoningEncryptedItem
|
|
140
138
|
) -> responses.ResponseInputItemParam:
|
|
141
139
|
result = {"type": "reasoning", "content": None}
|
|
142
140
|
|
|
@@ -154,7 +152,7 @@ def convert_reasoning_inputs(
|
|
|
154
152
|
|
|
155
153
|
|
|
156
154
|
def convert_tool_schema(
|
|
157
|
-
tools: list[ToolSchema] | None,
|
|
155
|
+
tools: list[llm_param.ToolSchema] | None,
|
|
158
156
|
) -> list[responses.ToolParam]:
|
|
159
157
|
if tools is None:
|
|
160
158
|
return []
|