klaude-code 1.2.11__py3-none-any.whl → 1.2.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- klaude_code/command/__init__.py +29 -26
- klaude_code/command/clear_cmd.py +0 -2
- klaude_code/command/diff_cmd.py +0 -2
- klaude_code/command/export_cmd.py +0 -2
- klaude_code/command/help_cmd.py +0 -2
- klaude_code/command/model_cmd.py +0 -2
- klaude_code/command/refresh_cmd.py +0 -2
- klaude_code/command/registry.py +4 -8
- klaude_code/command/release_notes_cmd.py +0 -2
- klaude_code/command/status_cmd.py +2 -4
- klaude_code/command/terminal_setup_cmd.py +0 -2
- klaude_code/command/thinking_cmd.py +227 -0
- klaude_code/config/select_model.py +5 -15
- klaude_code/const/__init__.py +1 -1
- klaude_code/core/agent.py +1 -8
- klaude_code/core/prompt.py +1 -1
- klaude_code/core/task.py +2 -3
- klaude_code/core/turn.py +0 -1
- klaude_code/llm/anthropic/client.py +56 -47
- klaude_code/llm/client.py +1 -19
- klaude_code/llm/codex/client.py +49 -30
- klaude_code/llm/openai_compatible/client.py +52 -34
- klaude_code/llm/openrouter/client.py +63 -41
- klaude_code/llm/responses/client.py +56 -39
- klaude_code/llm/usage.py +1 -49
- klaude_code/protocol/commands.py +1 -0
- klaude_code/protocol/llm_param.py +1 -9
- klaude_code/protocol/model.py +3 -5
- klaude_code/session/export.py +1 -8
- klaude_code/session/selector.py +12 -7
- klaude_code/ui/modes/repl/completers.py +3 -3
- klaude_code/ui/renderers/metadata.py +1 -12
- {klaude_code-1.2.11.dist-info → klaude_code-1.2.12.dist-info}/METADATA +1 -1
- {klaude_code-1.2.11.dist-info → klaude_code-1.2.12.dist-info}/RECORD +36 -35
- {klaude_code-1.2.11.dist-info → klaude_code-1.2.12.dist-info}/WHEEL +0 -0
- {klaude_code-1.2.11.dist-info → klaude_code-1.2.12.dist-info}/entry_points.txt +0 -0
|
@@ -15,17 +15,48 @@ from anthropic.types.beta.beta_signature_delta import BetaSignatureDelta
|
|
|
15
15
|
from anthropic.types.beta.beta_text_delta import BetaTextDelta
|
|
16
16
|
from anthropic.types.beta.beta_thinking_delta import BetaThinkingDelta
|
|
17
17
|
from anthropic.types.beta.beta_tool_use_block import BetaToolUseBlock
|
|
18
|
+
from anthropic.types.beta.message_create_params import MessageCreateParamsStreaming
|
|
18
19
|
|
|
19
20
|
from klaude_code import const
|
|
20
21
|
from klaude_code.llm.anthropic.input import convert_history_to_input, convert_system_to_input, convert_tool_schema
|
|
21
|
-
from klaude_code.llm.client import LLMClientABC
|
|
22
|
+
from klaude_code.llm.client import LLMClientABC
|
|
22
23
|
from klaude_code.llm.input_common import apply_config_defaults
|
|
23
24
|
from klaude_code.llm.registry import register
|
|
24
|
-
from klaude_code.llm.usage import MetadataTracker
|
|
25
|
+
from klaude_code.llm.usage import MetadataTracker
|
|
25
26
|
from klaude_code.protocol import llm_param, model
|
|
26
27
|
from klaude_code.trace import DebugType, log_debug
|
|
27
28
|
|
|
28
29
|
|
|
30
|
+
def build_payload(param: llm_param.LLMCallParameter) -> MessageCreateParamsStreaming:
|
|
31
|
+
"""Build Anthropic API request parameters."""
|
|
32
|
+
messages = convert_history_to_input(param.input, param.model)
|
|
33
|
+
tools = convert_tool_schema(param.tools)
|
|
34
|
+
system = convert_system_to_input(param.system)
|
|
35
|
+
|
|
36
|
+
payload: MessageCreateParamsStreaming = {
|
|
37
|
+
"model": str(param.model),
|
|
38
|
+
"tool_choice": {
|
|
39
|
+
"type": "auto",
|
|
40
|
+
"disable_parallel_tool_use": False,
|
|
41
|
+
},
|
|
42
|
+
"stream": True,
|
|
43
|
+
"max_tokens": param.max_tokens or const.DEFAULT_MAX_TOKENS,
|
|
44
|
+
"temperature": param.temperature or const.DEFAULT_TEMPERATURE,
|
|
45
|
+
"messages": messages,
|
|
46
|
+
"system": system,
|
|
47
|
+
"tools": tools,
|
|
48
|
+
"betas": ["interleaved-thinking-2025-05-14", "context-1m-2025-08-07"],
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if param.thinking and param.thinking.type == "enabled":
|
|
52
|
+
payload["thinking"] = anthropic.types.ThinkingConfigEnabledParam(
|
|
53
|
+
type="enabled",
|
|
54
|
+
budget_tokens=param.thinking.budget_tokens or const.DEFAULT_ANTHROPIC_THINKING_BUDGET_TOKENS,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
return payload
|
|
58
|
+
|
|
59
|
+
|
|
29
60
|
@register(llm_param.LLMClientProtocol.ANTHROPIC)
|
|
30
61
|
class AnthropicClient(LLMClientABC):
|
|
31
62
|
def __init__(self, config: llm_param.LLMConfigParameter):
|
|
@@ -48,32 +79,16 @@ class AnthropicClient(LLMClientABC):
|
|
|
48
79
|
|
|
49
80
|
metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
|
|
50
81
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
},
|
|
62
|
-
stream=True,
|
|
63
|
-
max_tokens=param.max_tokens or const.DEFAULT_MAX_TOKENS,
|
|
64
|
-
temperature=param.temperature or const.DEFAULT_TEMPERATURE,
|
|
65
|
-
messages=messages,
|
|
66
|
-
system=system,
|
|
67
|
-
tools=tools,
|
|
68
|
-
betas=["interleaved-thinking-2025-05-14", "context-1m-2025-08-07"],
|
|
69
|
-
thinking=anthropic.types.ThinkingConfigEnabledParam(
|
|
70
|
-
type=param.thinking.type,
|
|
71
|
-
budget_tokens=param.thinking.budget_tokens or const.DEFAULT_ANTHROPIC_THINKING_BUDGET_TOKENS,
|
|
72
|
-
)
|
|
73
|
-
if param.thinking and param.thinking.type == "enabled"
|
|
74
|
-
else anthropic.types.ThinkingConfigDisabledParam(
|
|
75
|
-
type="disabled",
|
|
76
|
-
),
|
|
82
|
+
payload = build_payload(param)
|
|
83
|
+
|
|
84
|
+
log_debug(
|
|
85
|
+
json.dumps(payload, ensure_ascii=False, default=str),
|
|
86
|
+
style="yellow",
|
|
87
|
+
debug_type=DebugType.LLM_PAYLOAD,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
stream = self.client.beta.messages.create(
|
|
91
|
+
**payload,
|
|
77
92
|
extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
|
|
78
93
|
)
|
|
79
94
|
|
|
@@ -85,9 +100,8 @@ class AnthropicClient(LLMClientABC):
|
|
|
85
100
|
current_tool_call_id: str | None = None
|
|
86
101
|
current_tool_inputs: list[str] | None = None
|
|
87
102
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
output_tokens = 0
|
|
103
|
+
input_token = 0
|
|
104
|
+
cached_token = 0
|
|
91
105
|
|
|
92
106
|
try:
|
|
93
107
|
async for event in await stream:
|
|
@@ -100,11 +114,8 @@ class AnthropicClient(LLMClientABC):
|
|
|
100
114
|
match event:
|
|
101
115
|
case BetaRawMessageStartEvent() as event:
|
|
102
116
|
response_id = event.message.id
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
event.message.usage.cache_creation_input_tokens or 0
|
|
106
|
-
)
|
|
107
|
-
output_tokens = event.message.usage.output_tokens or 0
|
|
117
|
+
cached_token = event.message.usage.cache_read_input_tokens or 0
|
|
118
|
+
input_token = event.message.usage.input_tokens
|
|
108
119
|
yield model.StartItem(response_id=response_id)
|
|
109
120
|
case BetaRawContentBlockDeltaEvent() as event:
|
|
110
121
|
match event.delta:
|
|
@@ -170,18 +181,16 @@ class AnthropicClient(LLMClientABC):
|
|
|
170
181
|
current_tool_call_id = None
|
|
171
182
|
current_tool_inputs = None
|
|
172
183
|
case BetaRawMessageDeltaEvent() as event:
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
max_tokens=param.max_tokens,
|
|
184
|
+
metadata_tracker.set_usage(
|
|
185
|
+
model.Usage(
|
|
186
|
+
input_tokens=input_token + cached_token,
|
|
187
|
+
output_tokens=event.usage.output_tokens,
|
|
188
|
+
cached_tokens=cached_token,
|
|
189
|
+
context_size=input_token + cached_token + event.usage.output_tokens,
|
|
190
|
+
context_limit=param.context_limit,
|
|
191
|
+
max_tokens=param.max_tokens,
|
|
192
|
+
)
|
|
183
193
|
)
|
|
184
|
-
metadata_tracker.set_usage(usage)
|
|
185
194
|
metadata_tracker.set_model_name(str(param.model))
|
|
186
195
|
metadata_tracker.set_response_id(response_id)
|
|
187
196
|
yield metadata_tracker.finalize()
|
klaude_code/llm/client.py
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
import json
|
|
2
1
|
from abc import ABC, abstractmethod
|
|
3
2
|
from collections.abc import AsyncGenerator
|
|
4
|
-
from typing import
|
|
3
|
+
from typing import ParamSpec, TypeVar, cast
|
|
5
4
|
|
|
6
5
|
from klaude_code.protocol import llm_param, model
|
|
7
|
-
from klaude_code.trace import DebugType, log_debug
|
|
8
6
|
|
|
9
7
|
|
|
10
8
|
class LLMClientABC(ABC):
|
|
@@ -31,19 +29,3 @@ class LLMClientABC(ABC):
|
|
|
31
29
|
|
|
32
30
|
P = ParamSpec("P")
|
|
33
31
|
R = TypeVar("R")
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def call_with_logged_payload(func: Callable[P, R], *args: P.args, **kwargs: P.kwargs) -> R:
|
|
37
|
-
"""Call an SDK function while logging the JSON payload.
|
|
38
|
-
|
|
39
|
-
The function reuses the original callable's type signature via ParamSpec
|
|
40
|
-
so static type checkers can validate arguments at the call site.
|
|
41
|
-
"""
|
|
42
|
-
|
|
43
|
-
payload = {k: v for k, v in kwargs.items() if v is not None}
|
|
44
|
-
log_debug(
|
|
45
|
-
json.dumps(payload, ensure_ascii=False, default=str),
|
|
46
|
-
style="yellow",
|
|
47
|
-
debug_type=DebugType.LLM_PAYLOAD,
|
|
48
|
-
)
|
|
49
|
-
return func(*args, **kwargs)
|
klaude_code/llm/codex/client.py
CHANGED
|
@@ -1,22 +1,61 @@
|
|
|
1
1
|
"""Codex LLM client using ChatGPT subscription via OAuth."""
|
|
2
2
|
|
|
3
|
+
import json
|
|
3
4
|
from collections.abc import AsyncGenerator
|
|
4
5
|
from typing import override
|
|
5
6
|
|
|
6
7
|
import httpx
|
|
7
8
|
import openai
|
|
8
9
|
from openai import AsyncOpenAI
|
|
10
|
+
from openai.types.responses.response_create_params import ResponseCreateParamsStreaming
|
|
9
11
|
|
|
10
12
|
from klaude_code.auth.codex.exceptions import CodexNotLoggedInError
|
|
11
13
|
from klaude_code.auth.codex.oauth import CodexOAuth
|
|
12
14
|
from klaude_code.auth.codex.token_manager import CodexTokenManager
|
|
13
|
-
from klaude_code.llm.client import LLMClientABC
|
|
15
|
+
from klaude_code.llm.client import LLMClientABC
|
|
14
16
|
from klaude_code.llm.input_common import apply_config_defaults
|
|
15
17
|
from klaude_code.llm.registry import register
|
|
16
18
|
from klaude_code.llm.responses.client import parse_responses_stream
|
|
17
19
|
from klaude_code.llm.responses.input import convert_history_to_input, convert_tool_schema
|
|
18
20
|
from klaude_code.llm.usage import MetadataTracker
|
|
19
21
|
from klaude_code.protocol import llm_param, model
|
|
22
|
+
from klaude_code.trace import DebugType, log_debug
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def build_payload(param: llm_param.LLMCallParameter) -> ResponseCreateParamsStreaming:
|
|
26
|
+
"""Build Codex API request parameters."""
|
|
27
|
+
inputs = convert_history_to_input(param.input, param.model)
|
|
28
|
+
tools = convert_tool_schema(param.tools)
|
|
29
|
+
|
|
30
|
+
session_id = param.session_id or ""
|
|
31
|
+
|
|
32
|
+
payload: ResponseCreateParamsStreaming = {
|
|
33
|
+
"model": str(param.model),
|
|
34
|
+
"tool_choice": "auto",
|
|
35
|
+
"parallel_tool_calls": True,
|
|
36
|
+
"include": [
|
|
37
|
+
"reasoning.encrypted_content",
|
|
38
|
+
],
|
|
39
|
+
"store": False,
|
|
40
|
+
"stream": True,
|
|
41
|
+
"input": inputs,
|
|
42
|
+
"instructions": param.system,
|
|
43
|
+
"tools": tools,
|
|
44
|
+
"prompt_cache_key": session_id,
|
|
45
|
+
# max_output_token and temperature is not supported in Codex API
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
if param.thinking and param.thinking.reasoning_effort:
|
|
49
|
+
payload["reasoning"] = {
|
|
50
|
+
"effort": param.thinking.reasoning_effort,
|
|
51
|
+
"summary": param.thinking.reasoning_summary,
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if param.verbosity:
|
|
55
|
+
payload["text"] = {"verbosity": param.verbosity}
|
|
56
|
+
|
|
57
|
+
return payload
|
|
58
|
+
|
|
20
59
|
|
|
21
60
|
# Codex API configuration
|
|
22
61
|
CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
|
|
@@ -81,45 +120,25 @@ class CodexClient(LLMClientABC):
|
|
|
81
120
|
|
|
82
121
|
param = apply_config_defaults(param, self.get_llm_config())
|
|
83
122
|
|
|
84
|
-
# Codex API requires store=False
|
|
85
|
-
param.store = False
|
|
86
|
-
|
|
87
123
|
metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
|
|
88
124
|
|
|
89
|
-
|
|
90
|
-
tools = convert_tool_schema(param.tools)
|
|
125
|
+
payload = build_payload(param)
|
|
91
126
|
|
|
92
127
|
session_id = param.session_id or ""
|
|
93
|
-
# Must send conversation_id/session_id headers to improve ChatGPT backend prompt cache hit rate.
|
|
94
128
|
extra_headers: dict[str, str] = {}
|
|
95
129
|
if session_id:
|
|
130
|
+
# Must send conversation_id/session_id headers to improve ChatGPT backend prompt cache hit rate.
|
|
96
131
|
extra_headers["conversation_id"] = session_id
|
|
97
132
|
extra_headers["session_id"] = session_id
|
|
98
133
|
|
|
134
|
+
log_debug(
|
|
135
|
+
json.dumps(payload, ensure_ascii=False, default=str),
|
|
136
|
+
style="yellow",
|
|
137
|
+
debug_type=DebugType.LLM_PAYLOAD,
|
|
138
|
+
)
|
|
99
139
|
try:
|
|
100
|
-
stream = await
|
|
101
|
-
|
|
102
|
-
model=str(param.model),
|
|
103
|
-
tool_choice="auto",
|
|
104
|
-
parallel_tool_calls=True,
|
|
105
|
-
include=[
|
|
106
|
-
"reasoning.encrypted_content",
|
|
107
|
-
],
|
|
108
|
-
store=False, # Always False for Codex
|
|
109
|
-
stream=True,
|
|
110
|
-
input=inputs,
|
|
111
|
-
instructions=param.system,
|
|
112
|
-
tools=tools,
|
|
113
|
-
text={
|
|
114
|
-
"verbosity": param.verbosity,
|
|
115
|
-
},
|
|
116
|
-
prompt_cache_key=session_id,
|
|
117
|
-
reasoning={
|
|
118
|
-
"effort": param.thinking.reasoning_effort,
|
|
119
|
-
"summary": param.thinking.reasoning_summary,
|
|
120
|
-
}
|
|
121
|
-
if param.thinking and param.thinking.reasoning_effort
|
|
122
|
-
else None,
|
|
140
|
+
stream = await self.client.responses.create(
|
|
141
|
+
**payload,
|
|
123
142
|
extra_headers=extra_headers,
|
|
124
143
|
)
|
|
125
144
|
except (openai.OpenAIError, httpx.HTTPError) as e:
|
|
@@ -4,8 +4,9 @@ from typing import override
|
|
|
4
4
|
|
|
5
5
|
import httpx
|
|
6
6
|
import openai
|
|
7
|
+
from openai.types.chat.completion_create_params import CompletionCreateParamsStreaming
|
|
7
8
|
|
|
8
|
-
from klaude_code.llm.client import LLMClientABC
|
|
9
|
+
from klaude_code.llm.client import LLMClientABC
|
|
9
10
|
from klaude_code.llm.input_common import apply_config_defaults
|
|
10
11
|
from klaude_code.llm.openai_compatible.input import convert_history_to_input, convert_tool_schema
|
|
11
12
|
from klaude_code.llm.openai_compatible.stream_processor import StreamStateManager
|
|
@@ -15,6 +16,35 @@ from klaude_code.protocol import llm_param, model
|
|
|
15
16
|
from klaude_code.trace import DebugType, log_debug
|
|
16
17
|
|
|
17
18
|
|
|
19
|
+
def build_payload(param: llm_param.LLMCallParameter) -> tuple[CompletionCreateParamsStreaming, dict[str, object]]:
|
|
20
|
+
"""Build OpenAI API request parameters."""
|
|
21
|
+
messages = convert_history_to_input(param.input, param.system, param.model)
|
|
22
|
+
tools = convert_tool_schema(param.tools)
|
|
23
|
+
|
|
24
|
+
extra_body: dict[str, object] = {}
|
|
25
|
+
|
|
26
|
+
if param.thinking:
|
|
27
|
+
extra_body["thinking"] = {
|
|
28
|
+
"type": param.thinking.type,
|
|
29
|
+
"budget": param.thinking.budget_tokens,
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
payload: CompletionCreateParamsStreaming = {
|
|
33
|
+
"model": str(param.model),
|
|
34
|
+
"tool_choice": "auto",
|
|
35
|
+
"parallel_tool_calls": True,
|
|
36
|
+
"stream": True,
|
|
37
|
+
"messages": messages,
|
|
38
|
+
"temperature": param.temperature,
|
|
39
|
+
"max_tokens": param.max_tokens,
|
|
40
|
+
"tools": tools,
|
|
41
|
+
"reasoning_effort": param.thinking.reasoning_effort if param.thinking else None,
|
|
42
|
+
"verbosity": param.verbosity,
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return payload, extra_body
|
|
46
|
+
|
|
47
|
+
|
|
18
48
|
@register(llm_param.LLMClientProtocol.OPENAI)
|
|
19
49
|
class OpenAICompatibleClient(LLMClientABC):
|
|
20
50
|
def __init__(self, config: llm_param.LLMConfigParameter):
|
|
@@ -44,32 +74,21 @@ class OpenAICompatibleClient(LLMClientABC):
|
|
|
44
74
|
@override
|
|
45
75
|
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
|
|
46
76
|
param = apply_config_defaults(param, self.get_llm_config())
|
|
47
|
-
messages = convert_history_to_input(param.input, param.system, param.model)
|
|
48
|
-
tools = convert_tool_schema(param.tools)
|
|
49
77
|
|
|
50
78
|
metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
|
|
51
79
|
|
|
52
|
-
extra_body =
|
|
53
|
-
extra_headers = {"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)}
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
extra_body
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
parallel_tool_calls=True,
|
|
65
|
-
stream=True,
|
|
66
|
-
messages=messages,
|
|
67
|
-
temperature=param.temperature,
|
|
68
|
-
max_tokens=param.max_tokens,
|
|
69
|
-
tools=tools,
|
|
70
|
-
reasoning_effort=param.thinking.reasoning_effort if param.thinking else None,
|
|
71
|
-
verbosity=param.verbosity,
|
|
72
|
-
extra_body=extra_body, # pyright: ignore[reportUnknownArgumentType]
|
|
80
|
+
payload, extra_body = build_payload(param)
|
|
81
|
+
extra_headers: dict[str, str] = {"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)}
|
|
82
|
+
|
|
83
|
+
log_debug(
|
|
84
|
+
json.dumps({**payload, **extra_body}, ensure_ascii=False, default=str),
|
|
85
|
+
style="yellow",
|
|
86
|
+
debug_type=DebugType.LLM_PAYLOAD,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
stream = self.client.chat.completions.create(
|
|
90
|
+
**payload,
|
|
91
|
+
extra_body=extra_body,
|
|
73
92
|
extra_headers=extra_headers,
|
|
74
93
|
)
|
|
75
94
|
|
|
@@ -85,9 +104,7 @@ class OpenAICompatibleClient(LLMClientABC):
|
|
|
85
104
|
if not state.response_id and event.id:
|
|
86
105
|
state.set_response_id(event.id)
|
|
87
106
|
yield model.StartItem(response_id=event.id)
|
|
88
|
-
if
|
|
89
|
-
event.usage is not None and event.usage.completion_tokens is not None # pyright: ignore[reportUnnecessaryComparison] gcp gemini will return None usage field
|
|
90
|
-
):
|
|
107
|
+
if event.usage is not None:
|
|
91
108
|
metadata_tracker.set_usage(convert_usage(event.usage, param.context_limit, param.max_tokens))
|
|
92
109
|
if event.model:
|
|
93
110
|
metadata_tracker.set_model_name(event.model)
|
|
@@ -96,9 +113,8 @@ class OpenAICompatibleClient(LLMClientABC):
|
|
|
96
113
|
|
|
97
114
|
if len(event.choices) == 0:
|
|
98
115
|
continue
|
|
99
|
-
delta = event.choices[0].delta
|
|
100
116
|
|
|
101
|
-
# Support Kimi K2's usage field in choice
|
|
117
|
+
# Support Moonshot Kimi K2's usage field in choice
|
|
102
118
|
if hasattr(event.choices[0], "usage") and getattr(event.choices[0], "usage"):
|
|
103
119
|
metadata_tracker.set_usage(
|
|
104
120
|
convert_usage(
|
|
@@ -108,12 +124,14 @@ class OpenAICompatibleClient(LLMClientABC):
|
|
|
108
124
|
)
|
|
109
125
|
)
|
|
110
126
|
|
|
127
|
+
delta = event.choices[0].delta
|
|
128
|
+
|
|
111
129
|
# Reasoning
|
|
112
|
-
reasoning_content =
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
130
|
+
reasoning_content = (
|
|
131
|
+
getattr(delta, "reasoning_content", None)
|
|
132
|
+
or getattr(delta, "reasoning", None)
|
|
133
|
+
or ""
|
|
134
|
+
)
|
|
117
135
|
if reasoning_content:
|
|
118
136
|
metadata_tracker.record_token()
|
|
119
137
|
state.stage = "reasoning"
|
|
@@ -1,10 +1,12 @@
|
|
|
1
|
+
import json
|
|
1
2
|
from collections.abc import AsyncGenerator
|
|
2
3
|
from typing import override
|
|
3
4
|
|
|
4
5
|
import httpx
|
|
5
6
|
import openai
|
|
7
|
+
from openai.types.chat.completion_create_params import CompletionCreateParamsStreaming
|
|
6
8
|
|
|
7
|
-
from klaude_code.llm.client import LLMClientABC
|
|
9
|
+
from klaude_code.llm.client import LLMClientABC
|
|
8
10
|
from klaude_code.llm.input_common import apply_config_defaults
|
|
9
11
|
from klaude_code.llm.openai_compatible.input import convert_tool_schema
|
|
10
12
|
from klaude_code.llm.openai_compatible.stream_processor import StreamStateManager
|
|
@@ -16,6 +18,52 @@ from klaude_code.protocol import llm_param, model
|
|
|
16
18
|
from klaude_code.trace import DebugType, log, log_debug
|
|
17
19
|
|
|
18
20
|
|
|
21
|
+
def build_payload(
|
|
22
|
+
param: llm_param.LLMCallParameter,
|
|
23
|
+
) -> tuple[CompletionCreateParamsStreaming, dict[str, object], dict[str, str]]:
|
|
24
|
+
"""Build OpenRouter API request parameters."""
|
|
25
|
+
messages = convert_history_to_input(param.input, param.system, param.model)
|
|
26
|
+
tools = convert_tool_schema(param.tools)
|
|
27
|
+
|
|
28
|
+
extra_body: dict[str, object] = {
|
|
29
|
+
"usage": {"include": True} # To get the cache tokens at the end of the response
|
|
30
|
+
}
|
|
31
|
+
extra_headers: dict[str, str] = {}
|
|
32
|
+
|
|
33
|
+
if param.thinking:
|
|
34
|
+
if param.thinking.budget_tokens is not None:
|
|
35
|
+
extra_body["reasoning"] = {
|
|
36
|
+
"max_tokens": param.thinking.budget_tokens,
|
|
37
|
+
"enable": True,
|
|
38
|
+
} # OpenRouter: https://openrouter.ai/docs/use-cases/reasoning-tokens#anthropic-models-with-reasoning-tokens
|
|
39
|
+
elif param.thinking.reasoning_effort is not None:
|
|
40
|
+
extra_body["reasoning"] = {
|
|
41
|
+
"effort": param.thinking.reasoning_effort,
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
if param.provider_routing:
|
|
45
|
+
extra_body["provider"] = param.provider_routing.model_dump(exclude_none=True)
|
|
46
|
+
|
|
47
|
+
if is_claude_model(param.model):
|
|
48
|
+
extra_headers["anthropic-beta"] = (
|
|
49
|
+
"interleaved-thinking-2025-05-14" # Not working yet, maybe OpenRouter's issue, or Anthropic: Interleaved thinking is only supported for tools used via the Messages API.
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
payload: CompletionCreateParamsStreaming = {
|
|
53
|
+
"model": str(param.model),
|
|
54
|
+
"tool_choice": "auto",
|
|
55
|
+
"parallel_tool_calls": True,
|
|
56
|
+
"stream": True,
|
|
57
|
+
"messages": messages,
|
|
58
|
+
"temperature": param.temperature,
|
|
59
|
+
"max_tokens": param.max_tokens,
|
|
60
|
+
"tools": tools,
|
|
61
|
+
"verbosity": param.verbosity,
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return payload, extra_body, extra_headers
|
|
65
|
+
|
|
66
|
+
|
|
19
67
|
@register(llm_param.LLMClientProtocol.OPENROUTER)
|
|
20
68
|
class OpenRouterClient(LLMClientABC):
|
|
21
69
|
def __init__(self, config: llm_param.LLMConfigParameter):
|
|
@@ -35,52 +83,28 @@ class OpenRouterClient(LLMClientABC):
|
|
|
35
83
|
@override
|
|
36
84
|
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
|
|
37
85
|
param = apply_config_defaults(param, self.get_llm_config())
|
|
38
|
-
messages = convert_history_to_input(param.input, param.system, param.model)
|
|
39
|
-
tools = convert_tool_schema(param.tools)
|
|
40
86
|
|
|
41
87
|
metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
|
|
42
88
|
|
|
43
|
-
extra_body
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
} # OpenRouter: https://openrouter.ai/docs/use-cases/reasoning-tokens#anthropic-models-with-reasoning-tokens
|
|
54
|
-
elif param.thinking.reasoning_effort is not None:
|
|
55
|
-
extra_body["reasoning"] = {
|
|
56
|
-
"effort": param.thinking.reasoning_effort,
|
|
57
|
-
}
|
|
58
|
-
if param.provider_routing:
|
|
59
|
-
extra_body["provider"] = param.provider_routing.model_dump(exclude_none=True)
|
|
60
|
-
if is_claude_model(param.model):
|
|
61
|
-
extra_headers["anthropic-beta"] = (
|
|
62
|
-
"interleaved-thinking-2025-05-14" # Not working yet, maybe OpenRouter's issue, or Anthropic: Interleaved thinking is only supported for tools used via the Messages API.
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
stream = call_with_logged_payload(
|
|
66
|
-
self.client.chat.completions.create,
|
|
67
|
-
model=str(param.model),
|
|
68
|
-
tool_choice="auto",
|
|
69
|
-
parallel_tool_calls=True,
|
|
70
|
-
stream=True,
|
|
71
|
-
messages=messages,
|
|
72
|
-
temperature=param.temperature,
|
|
73
|
-
max_tokens=param.max_tokens,
|
|
74
|
-
tools=tools,
|
|
75
|
-
verbosity=param.verbosity,
|
|
89
|
+
payload, extra_body, extra_headers = build_payload(param)
|
|
90
|
+
|
|
91
|
+
log_debug(
|
|
92
|
+
json.dumps({**payload, **extra_body}, ensure_ascii=False, default=str),
|
|
93
|
+
style="yellow",
|
|
94
|
+
debug_type=DebugType.LLM_PAYLOAD,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
stream = self.client.chat.completions.create(
|
|
98
|
+
**payload,
|
|
76
99
|
extra_body=extra_body,
|
|
77
|
-
extra_headers=extra_headers,
|
|
100
|
+
extra_headers=extra_headers,
|
|
78
101
|
)
|
|
79
102
|
|
|
80
103
|
reasoning_handler = ReasoningStreamHandler(
|
|
81
104
|
param_model=str(param.model),
|
|
82
105
|
response_id=None,
|
|
83
106
|
)
|
|
107
|
+
|
|
84
108
|
state = StreamStateManager(
|
|
85
109
|
param_model=str(param.model),
|
|
86
110
|
reasoning_flusher=reasoning_handler.flush,
|
|
@@ -93,19 +117,17 @@ class OpenRouterClient(LLMClientABC):
|
|
|
93
117
|
style="blue",
|
|
94
118
|
debug_type=DebugType.LLM_STREAM,
|
|
95
119
|
)
|
|
120
|
+
|
|
96
121
|
if not state.response_id and event.id:
|
|
97
122
|
state.set_response_id(event.id)
|
|
98
123
|
reasoning_handler.set_response_id(event.id)
|
|
99
124
|
yield model.StartItem(response_id=event.id)
|
|
100
|
-
if
|
|
101
|
-
event.usage is not None and event.usage.completion_tokens is not None # pyright: ignore[reportUnnecessaryComparison]
|
|
102
|
-
): # gcp gemini will return None usage field
|
|
125
|
+
if event.usage is not None:
|
|
103
126
|
metadata_tracker.set_usage(convert_usage(event.usage, param.context_limit, param.max_tokens))
|
|
104
127
|
if event.model:
|
|
105
128
|
metadata_tracker.set_model_name(event.model)
|
|
106
129
|
if provider := getattr(event, "provider", None):
|
|
107
130
|
metadata_tracker.set_provider(str(provider))
|
|
108
|
-
|
|
109
131
|
if len(event.choices) == 0:
|
|
110
132
|
continue
|
|
111
133
|
delta = event.choices[0].delta
|