klaude-code 1.2.11__py3-none-any.whl → 1.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- klaude_code/auth/codex/oauth.py +3 -3
- klaude_code/cli/main.py +5 -5
- klaude_code/cli/runtime.py +19 -27
- klaude_code/cli/session_cmd.py +6 -8
- klaude_code/command/__init__.py +31 -28
- klaude_code/command/clear_cmd.py +0 -2
- klaude_code/command/diff_cmd.py +0 -2
- klaude_code/command/export_cmd.py +3 -5
- klaude_code/command/help_cmd.py +0 -2
- klaude_code/command/model_cmd.py +0 -2
- klaude_code/command/refresh_cmd.py +0 -2
- klaude_code/command/registry.py +5 -9
- klaude_code/command/release_notes_cmd.py +0 -2
- klaude_code/command/status_cmd.py +2 -4
- klaude_code/command/terminal_setup_cmd.py +2 -4
- klaude_code/command/thinking_cmd.py +229 -0
- klaude_code/config/__init__.py +1 -1
- klaude_code/config/list_model.py +1 -1
- klaude_code/config/select_model.py +5 -15
- klaude_code/const/__init__.py +1 -1
- klaude_code/core/agent.py +14 -69
- klaude_code/core/executor.py +11 -10
- klaude_code/core/manager/agent_manager.py +4 -4
- klaude_code/core/manager/llm_clients.py +10 -49
- klaude_code/core/manager/llm_clients_builder.py +8 -21
- klaude_code/core/manager/sub_agent_manager.py +3 -3
- klaude_code/core/prompt.py +3 -3
- klaude_code/core/reminders.py +1 -1
- klaude_code/core/task.py +4 -5
- klaude_code/core/tool/__init__.py +16 -25
- klaude_code/core/tool/file/_utils.py +1 -1
- klaude_code/core/tool/file/apply_patch.py +17 -25
- klaude_code/core/tool/file/apply_patch_tool.py +4 -7
- klaude_code/core/tool/file/edit_tool.py +4 -11
- klaude_code/core/tool/file/multi_edit_tool.py +2 -3
- klaude_code/core/tool/file/read_tool.py +3 -4
- klaude_code/core/tool/file/write_tool.py +2 -3
- klaude_code/core/tool/memory/memory_tool.py +2 -8
- klaude_code/core/tool/memory/skill_loader.py +3 -2
- klaude_code/core/tool/shell/command_safety.py +0 -1
- klaude_code/core/tool/tool_context.py +1 -3
- klaude_code/core/tool/tool_registry.py +2 -1
- klaude_code/core/tool/tool_runner.py +1 -1
- klaude_code/core/tool/truncation.py +2 -5
- klaude_code/core/turn.py +9 -4
- klaude_code/llm/anthropic/client.py +62 -49
- klaude_code/llm/client.py +2 -20
- klaude_code/llm/codex/client.py +51 -32
- klaude_code/llm/input_common.py +2 -2
- klaude_code/llm/openai_compatible/client.py +60 -39
- klaude_code/llm/openai_compatible/stream_processor.py +2 -1
- klaude_code/llm/openrouter/client.py +79 -45
- klaude_code/llm/openrouter/reasoning_handler.py +19 -132
- klaude_code/llm/registry.py +6 -5
- klaude_code/llm/responses/client.py +65 -43
- klaude_code/llm/usage.py +1 -49
- klaude_code/protocol/commands.py +1 -0
- klaude_code/protocol/events.py +7 -0
- klaude_code/protocol/llm_param.py +1 -9
- klaude_code/protocol/model.py +10 -6
- klaude_code/protocol/sub_agent.py +2 -1
- klaude_code/session/export.py +1 -8
- klaude_code/session/selector.py +12 -7
- klaude_code/session/session.py +2 -4
- klaude_code/trace/__init__.py +1 -1
- klaude_code/trace/log.py +1 -1
- klaude_code/ui/__init__.py +4 -9
- klaude_code/ui/core/stage_manager.py +7 -4
- klaude_code/ui/modes/repl/__init__.py +1 -1
- klaude_code/ui/modes/repl/completers.py +6 -7
- klaude_code/ui/modes/repl/display.py +3 -4
- klaude_code/ui/modes/repl/event_handler.py +63 -5
- klaude_code/ui/modes/repl/key_bindings.py +2 -3
- klaude_code/ui/modes/repl/renderer.py +2 -1
- klaude_code/ui/renderers/diffs.py +1 -4
- klaude_code/ui/renderers/metadata.py +1 -12
- klaude_code/ui/rich/markdown.py +3 -3
- klaude_code/ui/rich/searchable_text.py +6 -6
- klaude_code/ui/rich/status.py +3 -4
- klaude_code/ui/rich/theme.py +1 -4
- klaude_code/ui/terminal/control.py +7 -16
- klaude_code/ui/terminal/notifier.py +2 -4
- klaude_code/ui/utils/common.py +1 -1
- klaude_code/ui/utils/debouncer.py +2 -2
- {klaude_code-1.2.11.dist-info → klaude_code-1.2.13.dist-info}/METADATA +1 -1
- {klaude_code-1.2.11.dist-info → klaude_code-1.2.13.dist-info}/RECORD +88 -87
- {klaude_code-1.2.11.dist-info → klaude_code-1.2.13.dist-info}/WHEEL +0 -0
- {klaude_code-1.2.11.dist-info → klaude_code-1.2.13.dist-info}/entry_points.txt +0 -0
|
@@ -1,10 +1,12 @@
|
|
|
1
|
+
import json
|
|
1
2
|
from collections.abc import AsyncGenerator
|
|
2
3
|
from typing import override
|
|
3
4
|
|
|
4
5
|
import httpx
|
|
5
6
|
import openai
|
|
7
|
+
from openai.types.chat.completion_create_params import CompletionCreateParamsStreaming
|
|
6
8
|
|
|
7
|
-
from klaude_code.llm.client import LLMClientABC
|
|
9
|
+
from klaude_code.llm.client import LLMClientABC
|
|
8
10
|
from klaude_code.llm.input_common import apply_config_defaults
|
|
9
11
|
from klaude_code.llm.openai_compatible.input import convert_tool_schema
|
|
10
12
|
from klaude_code.llm.openai_compatible.stream_processor import StreamStateManager
|
|
@@ -16,6 +18,53 @@ from klaude_code.protocol import llm_param, model
|
|
|
16
18
|
from klaude_code.trace import DebugType, log, log_debug
|
|
17
19
|
|
|
18
20
|
|
|
21
|
+
def build_payload(
|
|
22
|
+
param: llm_param.LLMCallParameter,
|
|
23
|
+
) -> tuple[CompletionCreateParamsStreaming, dict[str, object], dict[str, str]]:
|
|
24
|
+
"""Build OpenRouter API request parameters."""
|
|
25
|
+
messages = convert_history_to_input(param.input, param.system, param.model)
|
|
26
|
+
tools = convert_tool_schema(param.tools)
|
|
27
|
+
|
|
28
|
+
extra_body: dict[str, object] = {
|
|
29
|
+
"usage": {"include": True}, # To get the cache tokens at the end of the response
|
|
30
|
+
"debug": {
|
|
31
|
+
"echo_upstream_body": True
|
|
32
|
+
}, # https://openrouter.ai/docs/api/reference/errors-and-debugging#debug-option-shape
|
|
33
|
+
}
|
|
34
|
+
extra_headers: dict[str, str] = {}
|
|
35
|
+
|
|
36
|
+
if param.thinking:
|
|
37
|
+
if param.thinking.budget_tokens is not None:
|
|
38
|
+
extra_body["reasoning"] = {
|
|
39
|
+
"max_tokens": param.thinking.budget_tokens,
|
|
40
|
+
"enable": True,
|
|
41
|
+
} # OpenRouter: https://openrouter.ai/docs/use-cases/reasoning-tokens#anthropic-models-with-reasoning-tokens
|
|
42
|
+
elif param.thinking.reasoning_effort is not None:
|
|
43
|
+
extra_body["reasoning"] = {
|
|
44
|
+
"effort": param.thinking.reasoning_effort,
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if param.provider_routing:
|
|
48
|
+
extra_body["provider"] = param.provider_routing.model_dump(exclude_none=True)
|
|
49
|
+
|
|
50
|
+
if is_claude_model(param.model):
|
|
51
|
+
extra_headers["x-anthropic-beta"] = "fine-grained-tool-streaming-2025-05-14,interleaved-thinking-2025-05-14"
|
|
52
|
+
|
|
53
|
+
payload: CompletionCreateParamsStreaming = {
|
|
54
|
+
"model": str(param.model),
|
|
55
|
+
"tool_choice": "auto",
|
|
56
|
+
"parallel_tool_calls": True,
|
|
57
|
+
"stream": True,
|
|
58
|
+
"messages": messages,
|
|
59
|
+
"temperature": param.temperature,
|
|
60
|
+
"max_tokens": param.max_tokens,
|
|
61
|
+
"tools": tools,
|
|
62
|
+
"verbosity": param.verbosity,
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
return payload, extra_body, extra_headers
|
|
66
|
+
|
|
67
|
+
|
|
19
68
|
@register(llm_param.LLMClientProtocol.OPENROUTER)
|
|
20
69
|
class OpenRouterClient(LLMClientABC):
|
|
21
70
|
def __init__(self, config: llm_param.LLMConfigParameter):
|
|
@@ -33,54 +82,30 @@ class OpenRouterClient(LLMClientABC):
|
|
|
33
82
|
return cls(config)
|
|
34
83
|
|
|
35
84
|
@override
|
|
36
|
-
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem
|
|
85
|
+
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
|
|
37
86
|
param = apply_config_defaults(param, self.get_llm_config())
|
|
38
|
-
messages = convert_history_to_input(param.input, param.system, param.model)
|
|
39
|
-
tools = convert_tool_schema(param.tools)
|
|
40
87
|
|
|
41
88
|
metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
|
|
42
89
|
|
|
43
|
-
extra_body
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
} # OpenRouter: https://openrouter.ai/docs/use-cases/reasoning-tokens#anthropic-models-with-reasoning-tokens
|
|
54
|
-
elif param.thinking.reasoning_effort is not None:
|
|
55
|
-
extra_body["reasoning"] = {
|
|
56
|
-
"effort": param.thinking.reasoning_effort,
|
|
57
|
-
}
|
|
58
|
-
if param.provider_routing:
|
|
59
|
-
extra_body["provider"] = param.provider_routing.model_dump(exclude_none=True)
|
|
60
|
-
if is_claude_model(param.model):
|
|
61
|
-
extra_headers["anthropic-beta"] = (
|
|
62
|
-
"interleaved-thinking-2025-05-14" # Not working yet, maybe OpenRouter's issue, or Anthropic: Interleaved thinking is only supported for tools used via the Messages API.
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
stream = call_with_logged_payload(
|
|
66
|
-
self.client.chat.completions.create,
|
|
67
|
-
model=str(param.model),
|
|
68
|
-
tool_choice="auto",
|
|
69
|
-
parallel_tool_calls=True,
|
|
70
|
-
stream=True,
|
|
71
|
-
messages=messages,
|
|
72
|
-
temperature=param.temperature,
|
|
73
|
-
max_tokens=param.max_tokens,
|
|
74
|
-
tools=tools,
|
|
75
|
-
verbosity=param.verbosity,
|
|
90
|
+
payload, extra_body, extra_headers = build_payload(param)
|
|
91
|
+
|
|
92
|
+
log_debug(
|
|
93
|
+
json.dumps({**payload, **extra_body}, ensure_ascii=False, default=str),
|
|
94
|
+
style="yellow",
|
|
95
|
+
debug_type=DebugType.LLM_PAYLOAD,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
stream = self.client.chat.completions.create(
|
|
99
|
+
**payload,
|
|
76
100
|
extra_body=extra_body,
|
|
77
|
-
extra_headers=extra_headers,
|
|
101
|
+
extra_headers=extra_headers,
|
|
78
102
|
)
|
|
79
103
|
|
|
80
104
|
reasoning_handler = ReasoningStreamHandler(
|
|
81
105
|
param_model=str(param.model),
|
|
82
106
|
response_id=None,
|
|
83
107
|
)
|
|
108
|
+
|
|
84
109
|
state = StreamStateManager(
|
|
85
110
|
param_model=str(param.model),
|
|
86
111
|
reasoning_flusher=reasoning_handler.flush,
|
|
@@ -93,31 +118,40 @@ class OpenRouterClient(LLMClientABC):
|
|
|
93
118
|
style="blue",
|
|
94
119
|
debug_type=DebugType.LLM_STREAM,
|
|
95
120
|
)
|
|
121
|
+
|
|
96
122
|
if not state.response_id and event.id:
|
|
97
123
|
state.set_response_id(event.id)
|
|
98
124
|
reasoning_handler.set_response_id(event.id)
|
|
99
125
|
yield model.StartItem(response_id=event.id)
|
|
100
|
-
if
|
|
101
|
-
event.usage is not None and event.usage.completion_tokens is not None # pyright: ignore[reportUnnecessaryComparison]
|
|
102
|
-
): # gcp gemini will return None usage field
|
|
126
|
+
if event.usage is not None:
|
|
103
127
|
metadata_tracker.set_usage(convert_usage(event.usage, param.context_limit, param.max_tokens))
|
|
104
128
|
if event.model:
|
|
105
129
|
metadata_tracker.set_model_name(event.model)
|
|
106
130
|
if provider := getattr(event, "provider", None):
|
|
107
131
|
metadata_tracker.set_provider(str(provider))
|
|
108
|
-
|
|
109
132
|
if len(event.choices) == 0:
|
|
110
133
|
continue
|
|
111
134
|
delta = event.choices[0].delta
|
|
112
135
|
|
|
113
136
|
# Reasoning
|
|
114
|
-
if
|
|
115
|
-
reasoning_details = getattr(delta, "reasoning_details")
|
|
137
|
+
if reasoning_details := getattr(delta, "reasoning_details", None):
|
|
116
138
|
for item in reasoning_details:
|
|
117
139
|
try:
|
|
118
140
|
reasoning_detail = ReasoningDetail.model_validate(item)
|
|
119
141
|
metadata_tracker.record_token()
|
|
120
142
|
state.stage = "reasoning"
|
|
143
|
+
# Yield delta immediately for streaming
|
|
144
|
+
if reasoning_detail.text:
|
|
145
|
+
yield model.ReasoningTextDelta(
|
|
146
|
+
content=reasoning_detail.text,
|
|
147
|
+
response_id=state.response_id,
|
|
148
|
+
)
|
|
149
|
+
if reasoning_detail.summary:
|
|
150
|
+
yield model.ReasoningTextDelta(
|
|
151
|
+
content=reasoning_detail.summary,
|
|
152
|
+
response_id=state.response_id,
|
|
153
|
+
)
|
|
154
|
+
# Keep existing handler logic for final items
|
|
121
155
|
for conversation_item in reasoning_handler.on_detail(reasoning_detail):
|
|
122
156
|
yield conversation_item
|
|
123
157
|
except Exception as e:
|
|
@@ -160,7 +194,7 @@ class OpenRouterClient(LLMClientABC):
|
|
|
160
194
|
state.accumulated_tool_calls.add(delta.tool_calls)
|
|
161
195
|
|
|
162
196
|
except (openai.OpenAIError, httpx.HTTPError) as e:
|
|
163
|
-
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {
|
|
197
|
+
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
|
164
198
|
|
|
165
199
|
# Finalize
|
|
166
200
|
for item in state.flush_all():
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from enum import Enum
|
|
2
|
-
|
|
3
1
|
from pydantic import BaseModel
|
|
4
2
|
|
|
5
3
|
from klaude_code.protocol import model
|
|
@@ -18,14 +16,8 @@ class ReasoningDetail(BaseModel):
|
|
|
18
16
|
signature: str | None = None # Claude's signature
|
|
19
17
|
|
|
20
18
|
|
|
21
|
-
class ReasoningMode(str, Enum):
|
|
22
|
-
COMPLETE_CHUNK = "complete_chunk"
|
|
23
|
-
GPT5_SECTIONS = "gpt5_sections"
|
|
24
|
-
ACCUMULATE = "accumulate"
|
|
25
|
-
|
|
26
|
-
|
|
27
19
|
class ReasoningStreamHandler:
|
|
28
|
-
"""
|
|
20
|
+
"""Accumulates reasoning text and flushes on encrypted content or finalize."""
|
|
29
21
|
|
|
30
22
|
def __init__(
|
|
31
23
|
self,
|
|
@@ -37,59 +29,48 @@ class ReasoningStreamHandler:
|
|
|
37
29
|
|
|
38
30
|
self._reasoning_id: str | None = None
|
|
39
31
|
self._accumulated_reasoning: list[str] = []
|
|
40
|
-
self._gpt5_line_buffer: str = ""
|
|
41
|
-
self._gpt5_section_lines: list[str] = []
|
|
42
32
|
|
|
43
33
|
def set_response_id(self, response_id: str | None) -> None:
|
|
44
34
|
"""Update the response identifier used for emitted items."""
|
|
45
|
-
|
|
46
35
|
self._response_id = response_id
|
|
47
36
|
|
|
48
37
|
def on_detail(self, detail: ReasoningDetail) -> list[model.ConversationItem]:
|
|
49
38
|
"""Process a single reasoning detail and return streamable items."""
|
|
50
|
-
|
|
51
39
|
items: list[model.ConversationItem] = []
|
|
52
40
|
|
|
53
41
|
if detail.type == "reasoning.encrypted":
|
|
54
42
|
self._reasoning_id = detail.id
|
|
43
|
+
# Flush accumulated text before encrypted content
|
|
44
|
+
items.extend(self._flush_text())
|
|
55
45
|
if encrypted_item := self._build_encrypted_item(detail.data, detail):
|
|
56
46
|
items.append(encrypted_item)
|
|
57
47
|
return items
|
|
58
48
|
|
|
59
49
|
if detail.type in ("reasoning.text", "reasoning.summary"):
|
|
60
50
|
self._reasoning_id = detail.id
|
|
61
|
-
|
|
62
|
-
items.append(encrypted_item)
|
|
51
|
+
# Accumulate text
|
|
63
52
|
text = detail.text if detail.type == "reasoning.text" else detail.summary
|
|
64
53
|
if text:
|
|
65
|
-
|
|
54
|
+
self._accumulated_reasoning.append(text)
|
|
55
|
+
# Flush on signature (encrypted content)
|
|
56
|
+
if detail.signature:
|
|
57
|
+
items.extend(self._flush_text())
|
|
58
|
+
if encrypted_item := self._build_encrypted_item(detail.signature, detail):
|
|
59
|
+
items.append(encrypted_item)
|
|
66
60
|
|
|
67
61
|
return items
|
|
68
62
|
|
|
69
63
|
def flush(self) -> list[model.ConversationItem]:
|
|
70
|
-
"""Flush buffered reasoning text
|
|
64
|
+
"""Flush buffered reasoning text on finalize."""
|
|
65
|
+
return self._flush_text()
|
|
71
66
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
items.append(self._build_text_item("".join(self._accumulated_reasoning)))
|
|
80
|
-
self._accumulated_reasoning = []
|
|
81
|
-
|
|
82
|
-
return items
|
|
83
|
-
|
|
84
|
-
def _handle_text(self, text: str) -> list[model.ReasoningTextItem]:
|
|
85
|
-
mode = self._resolve_mode()
|
|
86
|
-
if mode is ReasoningMode.COMPLETE_CHUNK:
|
|
87
|
-
return [self._build_text_item(text)]
|
|
88
|
-
if mode is ReasoningMode.GPT5_SECTIONS:
|
|
89
|
-
sections = self._process_gpt5_text(text)
|
|
90
|
-
return [self._build_text_item(section) for section in sections]
|
|
91
|
-
self._accumulated_reasoning.append(text)
|
|
92
|
-
return []
|
|
67
|
+
def _flush_text(self) -> list[model.ConversationItem]:
|
|
68
|
+
"""Flush accumulated reasoning text as a single item."""
|
|
69
|
+
if not self._accumulated_reasoning:
|
|
70
|
+
return []
|
|
71
|
+
item = self._build_text_item("".join(self._accumulated_reasoning))
|
|
72
|
+
self._accumulated_reasoning = []
|
|
73
|
+
return [item]
|
|
93
74
|
|
|
94
75
|
def _build_text_item(self, content: str) -> model.ReasoningTextItem:
|
|
95
76
|
return model.ReasoningTextItem(
|
|
@@ -113,97 +94,3 @@ class ReasoningStreamHandler:
|
|
|
113
94
|
response_id=self._response_id,
|
|
114
95
|
model=self._param_model,
|
|
115
96
|
)
|
|
116
|
-
|
|
117
|
-
def _process_gpt5_text(self, text: str) -> list[str]:
|
|
118
|
-
emitted_sections: list[str] = []
|
|
119
|
-
self._gpt5_line_buffer += text
|
|
120
|
-
while True:
|
|
121
|
-
newline_index = self._gpt5_line_buffer.find("\n")
|
|
122
|
-
if newline_index == -1:
|
|
123
|
-
break
|
|
124
|
-
line = self._gpt5_line_buffer[:newline_index]
|
|
125
|
-
self._gpt5_line_buffer = self._gpt5_line_buffer[newline_index + 1 :]
|
|
126
|
-
remainder = line
|
|
127
|
-
while True:
|
|
128
|
-
split_result = self._split_gpt5_title_line(remainder)
|
|
129
|
-
if split_result is None:
|
|
130
|
-
break
|
|
131
|
-
prefix_segment, title_segment, remainder = split_result
|
|
132
|
-
if prefix_segment:
|
|
133
|
-
if not self._gpt5_section_lines:
|
|
134
|
-
self._gpt5_section_lines = []
|
|
135
|
-
self._gpt5_section_lines.append(f"{prefix_segment}\n")
|
|
136
|
-
if self._gpt5_section_lines:
|
|
137
|
-
emitted_sections.append("".join(self._gpt5_section_lines))
|
|
138
|
-
self._gpt5_section_lines = [f"{title_segment} \n"] # Add two spaces for markdown line break
|
|
139
|
-
if remainder:
|
|
140
|
-
if not self._gpt5_section_lines:
|
|
141
|
-
self._gpt5_section_lines = []
|
|
142
|
-
self._gpt5_section_lines.append(f"{remainder}\n")
|
|
143
|
-
return emitted_sections
|
|
144
|
-
|
|
145
|
-
def _drain_gpt5_sections(self) -> list[str]:
|
|
146
|
-
sections: list[str] = []
|
|
147
|
-
if self._gpt5_line_buffer:
|
|
148
|
-
if not self._gpt5_section_lines:
|
|
149
|
-
self._gpt5_section_lines = [self._gpt5_line_buffer]
|
|
150
|
-
else:
|
|
151
|
-
self._gpt5_section_lines.append(self._gpt5_line_buffer)
|
|
152
|
-
self._gpt5_line_buffer = ""
|
|
153
|
-
if self._gpt5_section_lines:
|
|
154
|
-
sections.append("".join(self._gpt5_section_lines))
|
|
155
|
-
self._gpt5_section_lines = []
|
|
156
|
-
return sections
|
|
157
|
-
|
|
158
|
-
def _is_gpt5(self) -> bool:
|
|
159
|
-
return "gpt-5" in self._param_model.lower()
|
|
160
|
-
|
|
161
|
-
def _is_complete_chunk_reasoning_model(self) -> bool:
|
|
162
|
-
"""Whether the current model emits reasoning in complete chunks (e.g. Gemini)."""
|
|
163
|
-
|
|
164
|
-
return self._param_model.startswith("google/gemini")
|
|
165
|
-
|
|
166
|
-
def _resolve_mode(self) -> ReasoningMode:
|
|
167
|
-
if self._is_complete_chunk_reasoning_model():
|
|
168
|
-
return ReasoningMode.COMPLETE_CHUNK
|
|
169
|
-
if self._is_gpt5():
|
|
170
|
-
return ReasoningMode.GPT5_SECTIONS
|
|
171
|
-
return ReasoningMode.ACCUMULATE
|
|
172
|
-
|
|
173
|
-
def _is_gpt5_title_line(self, line: str) -> bool:
|
|
174
|
-
stripped = line.strip()
|
|
175
|
-
if not stripped:
|
|
176
|
-
return False
|
|
177
|
-
return stripped.startswith("**") and stripped.endswith("**") and stripped.count("**") >= 2
|
|
178
|
-
|
|
179
|
-
def _split_gpt5_title_line(self, line: str) -> tuple[str | None, str, str] | None:
|
|
180
|
-
if not line:
|
|
181
|
-
return None
|
|
182
|
-
search_start = 0
|
|
183
|
-
while True:
|
|
184
|
-
opening_index = line.find("**", search_start)
|
|
185
|
-
if opening_index == -1:
|
|
186
|
-
return None
|
|
187
|
-
closing_index = line.find("**", opening_index + 2)
|
|
188
|
-
if closing_index == -1:
|
|
189
|
-
return None
|
|
190
|
-
title_candidate = line[opening_index : closing_index + 2]
|
|
191
|
-
stripped_title = title_candidate.strip()
|
|
192
|
-
if self._is_gpt5_title_line(stripped_title):
|
|
193
|
-
# Treat as a GPT-5 title only when everything after the
|
|
194
|
-
# bold segment is either whitespace or starts a new bold
|
|
195
|
-
# title. This prevents inline bold like `**xxx**yyyy`
|
|
196
|
-
# from being misclassified as a section title while
|
|
197
|
-
# preserving support for consecutive titles in one line.
|
|
198
|
-
after = line[closing_index + 2 :]
|
|
199
|
-
if after.strip() and not after.lstrip().startswith("**"):
|
|
200
|
-
search_start = closing_index + 2
|
|
201
|
-
continue
|
|
202
|
-
prefix_segment = line[:opening_index]
|
|
203
|
-
remainder_segment = after
|
|
204
|
-
return (
|
|
205
|
-
prefix_segment if prefix_segment else None,
|
|
206
|
-
stripped_title,
|
|
207
|
-
remainder_segment,
|
|
208
|
-
)
|
|
209
|
-
search_start = closing_index + 2
|
klaude_code/llm/registry.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
from
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
from typing import TYPE_CHECKING, TypeVar
|
|
2
3
|
|
|
3
4
|
from klaude_code.protocol import llm_param
|
|
4
5
|
|
|
@@ -20,13 +21,13 @@ def _load_protocol(protocol: llm_param.LLMClientProtocol) -> None:
|
|
|
20
21
|
|
|
21
22
|
# Import only the needed module to trigger @register decorator
|
|
22
23
|
if protocol == llm_param.LLMClientProtocol.ANTHROPIC:
|
|
23
|
-
from . import anthropic as _
|
|
24
|
+
from . import anthropic as _
|
|
24
25
|
elif protocol == llm_param.LLMClientProtocol.CODEX:
|
|
25
|
-
from . import codex as _
|
|
26
|
+
from . import codex as _
|
|
26
27
|
elif protocol == llm_param.LLMClientProtocol.OPENAI:
|
|
27
|
-
from . import openai_compatible as _
|
|
28
|
+
from . import openai_compatible as _
|
|
28
29
|
elif protocol == llm_param.LLMClientProtocol.OPENROUTER:
|
|
29
|
-
from . import openrouter as _
|
|
30
|
+
from . import openrouter as _
|
|
30
31
|
elif protocol == llm_param.LLMClientProtocol.RESPONSES:
|
|
31
32
|
from . import responses as _ # noqa: F401
|
|
32
33
|
|
|
@@ -6,12 +6,13 @@ import httpx
|
|
|
6
6
|
import openai
|
|
7
7
|
from openai import AsyncAzureOpenAI, AsyncOpenAI
|
|
8
8
|
from openai.types import responses
|
|
9
|
+
from openai.types.responses.response_create_params import ResponseCreateParamsStreaming
|
|
9
10
|
|
|
10
|
-
from klaude_code.llm.client import LLMClientABC
|
|
11
|
+
from klaude_code.llm.client import LLMClientABC
|
|
11
12
|
from klaude_code.llm.input_common import apply_config_defaults
|
|
12
13
|
from klaude_code.llm.registry import register
|
|
13
14
|
from klaude_code.llm.responses.input import convert_history_to_input, convert_tool_schema
|
|
14
|
-
from klaude_code.llm.usage import MetadataTracker
|
|
15
|
+
from klaude_code.llm.usage import MetadataTracker
|
|
15
16
|
from klaude_code.protocol import llm_param, model
|
|
16
17
|
from klaude_code.trace import DebugType, log_debug
|
|
17
18
|
|
|
@@ -20,11 +21,45 @@ if TYPE_CHECKING:
|
|
|
20
21
|
from openai.types.responses import ResponseStreamEvent
|
|
21
22
|
|
|
22
23
|
|
|
24
|
+
def build_payload(param: llm_param.LLMCallParameter) -> ResponseCreateParamsStreaming:
|
|
25
|
+
"""Build OpenAI Responses API request parameters."""
|
|
26
|
+
inputs = convert_history_to_input(param.input, param.model)
|
|
27
|
+
tools = convert_tool_schema(param.tools)
|
|
28
|
+
|
|
29
|
+
payload: ResponseCreateParamsStreaming = {
|
|
30
|
+
"model": str(param.model),
|
|
31
|
+
"tool_choice": "auto",
|
|
32
|
+
"parallel_tool_calls": True,
|
|
33
|
+
"include": [
|
|
34
|
+
"reasoning.encrypted_content",
|
|
35
|
+
],
|
|
36
|
+
"store": False,
|
|
37
|
+
"stream": True,
|
|
38
|
+
"temperature": param.temperature,
|
|
39
|
+
"max_output_tokens": param.max_tokens,
|
|
40
|
+
"input": inputs,
|
|
41
|
+
"instructions": param.system,
|
|
42
|
+
"tools": tools,
|
|
43
|
+
"prompt_cache_key": param.session_id or "",
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
if param.thinking and param.thinking.reasoning_effort:
|
|
47
|
+
payload["reasoning"] = {
|
|
48
|
+
"effort": param.thinking.reasoning_effort,
|
|
49
|
+
"summary": param.thinking.reasoning_summary,
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if param.verbosity:
|
|
53
|
+
payload["text"] = {"verbosity": param.verbosity}
|
|
54
|
+
|
|
55
|
+
return payload
|
|
56
|
+
|
|
57
|
+
|
|
23
58
|
async def parse_responses_stream(
|
|
24
59
|
stream: "AsyncStream[ResponseStreamEvent]",
|
|
25
60
|
param: llm_param.LLMCallParameter,
|
|
26
61
|
metadata_tracker: MetadataTracker,
|
|
27
|
-
) -> AsyncGenerator[model.ConversationItem
|
|
62
|
+
) -> AsyncGenerator[model.ConversationItem]:
|
|
28
63
|
"""Parse OpenAI Responses API stream events into ConversationItems."""
|
|
29
64
|
response_id: str | None = None
|
|
30
65
|
|
|
@@ -40,6 +75,12 @@ async def parse_responses_stream(
|
|
|
40
75
|
case responses.ResponseCreatedEvent() as event:
|
|
41
76
|
response_id = event.response.id
|
|
42
77
|
yield model.StartItem(response_id=response_id)
|
|
78
|
+
case responses.ResponseReasoningSummaryTextDeltaEvent() as event:
|
|
79
|
+
if event.delta:
|
|
80
|
+
yield model.ReasoningTextDelta(
|
|
81
|
+
content=event.delta,
|
|
82
|
+
response_id=response_id,
|
|
83
|
+
)
|
|
43
84
|
case responses.ResponseReasoningSummaryTextDoneEvent() as event:
|
|
44
85
|
if event.text:
|
|
45
86
|
yield model.ReasoningTextItem(
|
|
@@ -95,16 +136,17 @@ async def parse_responses_stream(
|
|
|
95
136
|
if event.response.incomplete_details is not None:
|
|
96
137
|
error_reason = event.response.incomplete_details.reason
|
|
97
138
|
if event.response.usage is not None:
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
139
|
+
metadata_tracker.set_usage(
|
|
140
|
+
model.Usage(
|
|
141
|
+
input_tokens=event.response.usage.input_tokens,
|
|
142
|
+
output_tokens=event.response.usage.output_tokens,
|
|
143
|
+
cached_tokens=event.response.usage.input_tokens_details.cached_tokens,
|
|
144
|
+
reasoning_tokens=event.response.usage.output_tokens_details.reasoning_tokens,
|
|
145
|
+
context_size=event.response.usage.total_tokens,
|
|
146
|
+
context_limit=param.context_limit,
|
|
147
|
+
max_tokens=param.max_tokens,
|
|
148
|
+
)
|
|
106
149
|
)
|
|
107
|
-
metadata_tracker.set_usage(usage)
|
|
108
150
|
metadata_tracker.set_model_name(str(param.model))
|
|
109
151
|
metadata_tracker.set_response_id(response_id)
|
|
110
152
|
yield metadata_tracker.finalize()
|
|
@@ -127,7 +169,7 @@ async def parse_responses_stream(
|
|
|
127
169
|
debug_type=DebugType.LLM_STREAM,
|
|
128
170
|
)
|
|
129
171
|
except (openai.OpenAIError, httpx.HTTPError) as e:
|
|
130
|
-
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {
|
|
172
|
+
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
|
131
173
|
|
|
132
174
|
|
|
133
175
|
@register(llm_param.LLMClientProtocol.RESPONSES)
|
|
@@ -157,45 +199,25 @@ class ResponsesClient(LLMClientABC):
|
|
|
157
199
|
return cls(config)
|
|
158
200
|
|
|
159
201
|
@override
|
|
160
|
-
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem
|
|
202
|
+
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
|
|
161
203
|
param = apply_config_defaults(param, self.get_llm_config())
|
|
162
204
|
|
|
163
205
|
metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
|
|
164
206
|
|
|
165
|
-
|
|
166
|
-
tools = convert_tool_schema(param.tools)
|
|
207
|
+
payload = build_payload(param)
|
|
167
208
|
|
|
209
|
+
log_debug(
|
|
210
|
+
json.dumps(payload, ensure_ascii=False, default=str),
|
|
211
|
+
style="yellow",
|
|
212
|
+
debug_type=DebugType.LLM_PAYLOAD,
|
|
213
|
+
)
|
|
168
214
|
try:
|
|
169
|
-
stream = await
|
|
170
|
-
|
|
171
|
-
model=str(param.model),
|
|
172
|
-
tool_choice="auto",
|
|
173
|
-
parallel_tool_calls=True,
|
|
174
|
-
include=[
|
|
175
|
-
"reasoning.encrypted_content",
|
|
176
|
-
],
|
|
177
|
-
store=param.store,
|
|
178
|
-
previous_response_id=param.previous_response_id,
|
|
179
|
-
stream=True,
|
|
180
|
-
temperature=param.temperature,
|
|
181
|
-
max_output_tokens=param.max_tokens,
|
|
182
|
-
input=inputs,
|
|
183
|
-
instructions=param.system,
|
|
184
|
-
tools=tools,
|
|
185
|
-
text={
|
|
186
|
-
"verbosity": param.verbosity,
|
|
187
|
-
},
|
|
188
|
-
prompt_cache_key=param.session_id or "",
|
|
189
|
-
reasoning={
|
|
190
|
-
"effort": param.thinking.reasoning_effort,
|
|
191
|
-
"summary": param.thinking.reasoning_summary,
|
|
192
|
-
}
|
|
193
|
-
if param.thinking and param.thinking.reasoning_effort
|
|
194
|
-
else None,
|
|
215
|
+
stream = await self.client.responses.create(
|
|
216
|
+
**payload,
|
|
195
217
|
extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
|
|
196
218
|
)
|
|
197
219
|
except (openai.OpenAIError, httpx.HTTPError) as e:
|
|
198
|
-
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {
|
|
220
|
+
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
|
199
221
|
return
|
|
200
222
|
|
|
201
223
|
async for item in parse_responses_stream(stream, param, metadata_tracker):
|
klaude_code/llm/usage.py
CHANGED
|
@@ -108,55 +108,7 @@ def convert_usage(
|
|
|
108
108
|
reasoning_tokens=(usage.completion_tokens_details.reasoning_tokens if usage.completion_tokens_details else 0)
|
|
109
109
|
or 0,
|
|
110
110
|
output_tokens=usage.completion_tokens,
|
|
111
|
-
|
|
112
|
-
context_limit=context_limit,
|
|
113
|
-
max_tokens=max_tokens,
|
|
114
|
-
)
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
def convert_anthropic_usage(
|
|
118
|
-
input_tokens: int,
|
|
119
|
-
output_tokens: int,
|
|
120
|
-
cached_tokens: int,
|
|
121
|
-
context_limit: int | None = None,
|
|
122
|
-
max_tokens: int | None = None,
|
|
123
|
-
) -> model.Usage:
|
|
124
|
-
"""Convert Anthropic usage data to internal Usage model.
|
|
125
|
-
|
|
126
|
-
context_token is computed from input + cached + output tokens,
|
|
127
|
-
representing the actual context window usage for this turn.
|
|
128
|
-
"""
|
|
129
|
-
context_token = input_tokens + cached_tokens + output_tokens
|
|
130
|
-
return model.Usage(
|
|
131
|
-
input_tokens=input_tokens,
|
|
132
|
-
output_tokens=output_tokens,
|
|
133
|
-
cached_tokens=cached_tokens,
|
|
134
|
-
context_token=context_token,
|
|
135
|
-
context_limit=context_limit,
|
|
136
|
-
max_tokens=max_tokens,
|
|
137
|
-
)
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
def convert_responses_usage(
|
|
141
|
-
input_tokens: int,
|
|
142
|
-
output_tokens: int,
|
|
143
|
-
cached_tokens: int,
|
|
144
|
-
reasoning_tokens: int,
|
|
145
|
-
total_tokens: int,
|
|
146
|
-
context_limit: int | None = None,
|
|
147
|
-
max_tokens: int | None = None,
|
|
148
|
-
) -> model.Usage:
|
|
149
|
-
"""Convert OpenAI Responses API usage data to internal Usage model.
|
|
150
|
-
|
|
151
|
-
context_token is set to total_tokens from the API response,
|
|
152
|
-
representing the actual context window usage for this turn.
|
|
153
|
-
"""
|
|
154
|
-
return model.Usage(
|
|
155
|
-
input_tokens=input_tokens,
|
|
156
|
-
output_tokens=output_tokens,
|
|
157
|
-
cached_tokens=cached_tokens,
|
|
158
|
-
reasoning_tokens=reasoning_tokens,
|
|
159
|
-
context_token=total_tokens,
|
|
111
|
+
context_size=usage.total_tokens,
|
|
160
112
|
context_limit=context_limit,
|
|
161
113
|
max_tokens=max_tokens,
|
|
162
114
|
)
|
klaude_code/protocol/commands.py
CHANGED
|
@@ -13,6 +13,7 @@ class CommandName(str, Enum):
|
|
|
13
13
|
EXPORT = "export"
|
|
14
14
|
STATUS = "status"
|
|
15
15
|
RELEASE_NOTES = "release-notes"
|
|
16
|
+
THINKING = "thinking"
|
|
16
17
|
# PLAN and DOC are dynamically registered now, but kept here if needed for reference
|
|
17
18
|
# or we can remove them if no code explicitly imports them.
|
|
18
19
|
# PLAN = "plan"
|
klaude_code/protocol/events.py
CHANGED
|
@@ -54,6 +54,12 @@ class ThinkingEvent(BaseModel):
|
|
|
54
54
|
content: str
|
|
55
55
|
|
|
56
56
|
|
|
57
|
+
class ThinkingDeltaEvent(BaseModel):
|
|
58
|
+
session_id: str
|
|
59
|
+
response_id: str | None = None
|
|
60
|
+
content: str
|
|
61
|
+
|
|
62
|
+
|
|
57
63
|
class AssistantMessageDeltaEvent(BaseModel):
|
|
58
64
|
session_id: str
|
|
59
65
|
response_id: str | None = None
|
|
@@ -153,6 +159,7 @@ Event = (
|
|
|
153
159
|
TaskStartEvent
|
|
154
160
|
| TaskFinishEvent
|
|
155
161
|
| ThinkingEvent
|
|
162
|
+
| ThinkingDeltaEvent
|
|
156
163
|
| AssistantMessageDeltaEvent
|
|
157
164
|
| AssistantMessageEvent
|
|
158
165
|
| ToolCallEvent
|