klaude-code 1.2.11__py3-none-any.whl → 1.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- klaude_code/auth/codex/oauth.py +3 -3
- klaude_code/cli/main.py +5 -5
- klaude_code/cli/runtime.py +19 -27
- klaude_code/cli/session_cmd.py +6 -8
- klaude_code/command/__init__.py +31 -28
- klaude_code/command/clear_cmd.py +0 -2
- klaude_code/command/diff_cmd.py +0 -2
- klaude_code/command/export_cmd.py +3 -5
- klaude_code/command/help_cmd.py +0 -2
- klaude_code/command/model_cmd.py +0 -2
- klaude_code/command/refresh_cmd.py +0 -2
- klaude_code/command/registry.py +5 -9
- klaude_code/command/release_notes_cmd.py +0 -2
- klaude_code/command/status_cmd.py +2 -4
- klaude_code/command/terminal_setup_cmd.py +2 -4
- klaude_code/command/thinking_cmd.py +229 -0
- klaude_code/config/__init__.py +1 -1
- klaude_code/config/list_model.py +1 -1
- klaude_code/config/select_model.py +5 -15
- klaude_code/const/__init__.py +1 -1
- klaude_code/core/agent.py +14 -69
- klaude_code/core/executor.py +11 -10
- klaude_code/core/manager/agent_manager.py +4 -4
- klaude_code/core/manager/llm_clients.py +10 -49
- klaude_code/core/manager/llm_clients_builder.py +8 -21
- klaude_code/core/manager/sub_agent_manager.py +3 -3
- klaude_code/core/prompt.py +3 -3
- klaude_code/core/reminders.py +1 -1
- klaude_code/core/task.py +4 -5
- klaude_code/core/tool/__init__.py +16 -25
- klaude_code/core/tool/file/_utils.py +1 -1
- klaude_code/core/tool/file/apply_patch.py +17 -25
- klaude_code/core/tool/file/apply_patch_tool.py +4 -7
- klaude_code/core/tool/file/edit_tool.py +4 -11
- klaude_code/core/tool/file/multi_edit_tool.py +2 -3
- klaude_code/core/tool/file/read_tool.py +3 -4
- klaude_code/core/tool/file/write_tool.py +2 -3
- klaude_code/core/tool/memory/memory_tool.py +2 -8
- klaude_code/core/tool/memory/skill_loader.py +3 -2
- klaude_code/core/tool/shell/command_safety.py +0 -1
- klaude_code/core/tool/tool_context.py +1 -3
- klaude_code/core/tool/tool_registry.py +2 -1
- klaude_code/core/tool/tool_runner.py +1 -1
- klaude_code/core/tool/truncation.py +2 -5
- klaude_code/core/turn.py +9 -4
- klaude_code/llm/anthropic/client.py +62 -49
- klaude_code/llm/client.py +2 -20
- klaude_code/llm/codex/client.py +51 -32
- klaude_code/llm/input_common.py +2 -2
- klaude_code/llm/openai_compatible/client.py +60 -39
- klaude_code/llm/openai_compatible/stream_processor.py +2 -1
- klaude_code/llm/openrouter/client.py +79 -45
- klaude_code/llm/openrouter/reasoning_handler.py +19 -132
- klaude_code/llm/registry.py +6 -5
- klaude_code/llm/responses/client.py +65 -43
- klaude_code/llm/usage.py +1 -49
- klaude_code/protocol/commands.py +1 -0
- klaude_code/protocol/events.py +7 -0
- klaude_code/protocol/llm_param.py +1 -9
- klaude_code/protocol/model.py +10 -6
- klaude_code/protocol/sub_agent.py +2 -1
- klaude_code/session/export.py +1 -8
- klaude_code/session/selector.py +12 -7
- klaude_code/session/session.py +2 -4
- klaude_code/trace/__init__.py +1 -1
- klaude_code/trace/log.py +1 -1
- klaude_code/ui/__init__.py +4 -9
- klaude_code/ui/core/stage_manager.py +7 -4
- klaude_code/ui/modes/repl/__init__.py +1 -1
- klaude_code/ui/modes/repl/completers.py +6 -7
- klaude_code/ui/modes/repl/display.py +3 -4
- klaude_code/ui/modes/repl/event_handler.py +63 -5
- klaude_code/ui/modes/repl/key_bindings.py +2 -3
- klaude_code/ui/modes/repl/renderer.py +2 -1
- klaude_code/ui/renderers/diffs.py +1 -4
- klaude_code/ui/renderers/metadata.py +1 -12
- klaude_code/ui/rich/markdown.py +3 -3
- klaude_code/ui/rich/searchable_text.py +6 -6
- klaude_code/ui/rich/status.py +3 -4
- klaude_code/ui/rich/theme.py +1 -4
- klaude_code/ui/terminal/control.py +7 -16
- klaude_code/ui/terminal/notifier.py +2 -4
- klaude_code/ui/utils/common.py +1 -1
- klaude_code/ui/utils/debouncer.py +2 -2
- {klaude_code-1.2.11.dist-info → klaude_code-1.2.13.dist-info}/METADATA +1 -1
- {klaude_code-1.2.11.dist-info → klaude_code-1.2.13.dist-info}/RECORD +88 -87
- {klaude_code-1.2.11.dist-info → klaude_code-1.2.13.dist-info}/WHEEL +0 -0
- {klaude_code-1.2.11.dist-info → klaude_code-1.2.13.dist-info}/entry_points.txt +0 -0
|
@@ -15,17 +15,48 @@ from anthropic.types.beta.beta_signature_delta import BetaSignatureDelta
|
|
|
15
15
|
from anthropic.types.beta.beta_text_delta import BetaTextDelta
|
|
16
16
|
from anthropic.types.beta.beta_thinking_delta import BetaThinkingDelta
|
|
17
17
|
from anthropic.types.beta.beta_tool_use_block import BetaToolUseBlock
|
|
18
|
+
from anthropic.types.beta.message_create_params import MessageCreateParamsStreaming
|
|
18
19
|
|
|
19
20
|
from klaude_code import const
|
|
20
21
|
from klaude_code.llm.anthropic.input import convert_history_to_input, convert_system_to_input, convert_tool_schema
|
|
21
|
-
from klaude_code.llm.client import LLMClientABC
|
|
22
|
+
from klaude_code.llm.client import LLMClientABC
|
|
22
23
|
from klaude_code.llm.input_common import apply_config_defaults
|
|
23
24
|
from klaude_code.llm.registry import register
|
|
24
|
-
from klaude_code.llm.usage import MetadataTracker
|
|
25
|
+
from klaude_code.llm.usage import MetadataTracker
|
|
25
26
|
from klaude_code.protocol import llm_param, model
|
|
26
27
|
from klaude_code.trace import DebugType, log_debug
|
|
27
28
|
|
|
28
29
|
|
|
30
|
+
def build_payload(param: llm_param.LLMCallParameter) -> MessageCreateParamsStreaming:
|
|
31
|
+
"""Build Anthropic API request parameters."""
|
|
32
|
+
messages = convert_history_to_input(param.input, param.model)
|
|
33
|
+
tools = convert_tool_schema(param.tools)
|
|
34
|
+
system = convert_system_to_input(param.system)
|
|
35
|
+
|
|
36
|
+
payload: MessageCreateParamsStreaming = {
|
|
37
|
+
"model": str(param.model),
|
|
38
|
+
"tool_choice": {
|
|
39
|
+
"type": "auto",
|
|
40
|
+
"disable_parallel_tool_use": False,
|
|
41
|
+
},
|
|
42
|
+
"stream": True,
|
|
43
|
+
"max_tokens": param.max_tokens or const.DEFAULT_MAX_TOKENS,
|
|
44
|
+
"temperature": param.temperature or const.DEFAULT_TEMPERATURE,
|
|
45
|
+
"messages": messages,
|
|
46
|
+
"system": system,
|
|
47
|
+
"tools": tools,
|
|
48
|
+
"betas": ["interleaved-thinking-2025-05-14", "context-1m-2025-08-07"],
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if param.thinking and param.thinking.type == "enabled":
|
|
52
|
+
payload["thinking"] = anthropic.types.ThinkingConfigEnabledParam(
|
|
53
|
+
type="enabled",
|
|
54
|
+
budget_tokens=param.thinking.budget_tokens or const.DEFAULT_ANTHROPIC_THINKING_BUDGET_TOKENS,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
return payload
|
|
58
|
+
|
|
59
|
+
|
|
29
60
|
@register(llm_param.LLMClientProtocol.ANTHROPIC)
|
|
30
61
|
class AnthropicClient(LLMClientABC):
|
|
31
62
|
def __init__(self, config: llm_param.LLMConfigParameter):
|
|
@@ -43,37 +74,21 @@ class AnthropicClient(LLMClientABC):
|
|
|
43
74
|
return cls(config)
|
|
44
75
|
|
|
45
76
|
@override
|
|
46
|
-
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem
|
|
77
|
+
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
|
|
47
78
|
param = apply_config_defaults(param, self.get_llm_config())
|
|
48
79
|
|
|
49
80
|
metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
|
|
50
81
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
},
|
|
62
|
-
stream=True,
|
|
63
|
-
max_tokens=param.max_tokens or const.DEFAULT_MAX_TOKENS,
|
|
64
|
-
temperature=param.temperature or const.DEFAULT_TEMPERATURE,
|
|
65
|
-
messages=messages,
|
|
66
|
-
system=system,
|
|
67
|
-
tools=tools,
|
|
68
|
-
betas=["interleaved-thinking-2025-05-14", "context-1m-2025-08-07"],
|
|
69
|
-
thinking=anthropic.types.ThinkingConfigEnabledParam(
|
|
70
|
-
type=param.thinking.type,
|
|
71
|
-
budget_tokens=param.thinking.budget_tokens or const.DEFAULT_ANTHROPIC_THINKING_BUDGET_TOKENS,
|
|
72
|
-
)
|
|
73
|
-
if param.thinking and param.thinking.type == "enabled"
|
|
74
|
-
else anthropic.types.ThinkingConfigDisabledParam(
|
|
75
|
-
type="disabled",
|
|
76
|
-
),
|
|
82
|
+
payload = build_payload(param)
|
|
83
|
+
|
|
84
|
+
log_debug(
|
|
85
|
+
json.dumps(payload, ensure_ascii=False, default=str),
|
|
86
|
+
style="yellow",
|
|
87
|
+
debug_type=DebugType.LLM_PAYLOAD,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
stream = self.client.beta.messages.create(
|
|
91
|
+
**payload,
|
|
77
92
|
extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
|
|
78
93
|
)
|
|
79
94
|
|
|
@@ -85,9 +100,8 @@ class AnthropicClient(LLMClientABC):
|
|
|
85
100
|
current_tool_call_id: str | None = None
|
|
86
101
|
current_tool_inputs: list[str] | None = None
|
|
87
102
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
output_tokens = 0
|
|
103
|
+
input_token = 0
|
|
104
|
+
cached_token = 0
|
|
91
105
|
|
|
92
106
|
try:
|
|
93
107
|
async for event in await stream:
|
|
@@ -100,17 +114,18 @@ class AnthropicClient(LLMClientABC):
|
|
|
100
114
|
match event:
|
|
101
115
|
case BetaRawMessageStartEvent() as event:
|
|
102
116
|
response_id = event.message.id
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
event.message.usage.cache_creation_input_tokens or 0
|
|
106
|
-
)
|
|
107
|
-
output_tokens = event.message.usage.output_tokens or 0
|
|
117
|
+
cached_token = event.message.usage.cache_read_input_tokens or 0
|
|
118
|
+
input_token = event.message.usage.input_tokens
|
|
108
119
|
yield model.StartItem(response_id=response_id)
|
|
109
120
|
case BetaRawContentBlockDeltaEvent() as event:
|
|
110
121
|
match event.delta:
|
|
111
122
|
case BetaThinkingDelta() as delta:
|
|
112
123
|
metadata_tracker.record_token()
|
|
113
124
|
accumulated_thinking.append(delta.thinking)
|
|
125
|
+
yield model.ReasoningTextDelta(
|
|
126
|
+
content=delta.thinking,
|
|
127
|
+
response_id=response_id,
|
|
128
|
+
)
|
|
114
129
|
case BetaSignatureDelta() as delta:
|
|
115
130
|
metadata_tracker.record_token()
|
|
116
131
|
yield model.ReasoningEncryptedItem(
|
|
@@ -170,22 +185,20 @@ class AnthropicClient(LLMClientABC):
|
|
|
170
185
|
current_tool_call_id = None
|
|
171
186
|
current_tool_inputs = None
|
|
172
187
|
case BetaRawMessageDeltaEvent() as event:
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
max_tokens=param.max_tokens,
|
|
188
|
+
metadata_tracker.set_usage(
|
|
189
|
+
model.Usage(
|
|
190
|
+
input_tokens=input_token + cached_token,
|
|
191
|
+
output_tokens=event.usage.output_tokens,
|
|
192
|
+
cached_tokens=cached_token,
|
|
193
|
+
context_size=input_token + cached_token + event.usage.output_tokens,
|
|
194
|
+
context_limit=param.context_limit,
|
|
195
|
+
max_tokens=param.max_tokens,
|
|
196
|
+
)
|
|
183
197
|
)
|
|
184
|
-
metadata_tracker.set_usage(usage)
|
|
185
198
|
metadata_tracker.set_model_name(str(param.model))
|
|
186
199
|
metadata_tracker.set_response_id(response_id)
|
|
187
200
|
yield metadata_tracker.finalize()
|
|
188
201
|
case _:
|
|
189
202
|
pass
|
|
190
203
|
except (APIError, httpx.HTTPError) as e:
|
|
191
|
-
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {
|
|
204
|
+
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
klaude_code/llm/client.py
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
import json
|
|
2
1
|
from abc import ABC, abstractmethod
|
|
3
2
|
from collections.abc import AsyncGenerator
|
|
4
|
-
from typing import
|
|
3
|
+
from typing import ParamSpec, TypeVar, cast
|
|
5
4
|
|
|
6
5
|
from klaude_code.protocol import llm_param, model
|
|
7
|
-
from klaude_code.trace import DebugType, log_debug
|
|
8
6
|
|
|
9
7
|
|
|
10
8
|
class LLMClientABC(ABC):
|
|
@@ -17,7 +15,7 @@ class LLMClientABC(ABC):
|
|
|
17
15
|
pass
|
|
18
16
|
|
|
19
17
|
@abstractmethod
|
|
20
|
-
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem
|
|
18
|
+
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
|
|
21
19
|
raise NotImplementedError
|
|
22
20
|
yield cast(model.ConversationItem, None)
|
|
23
21
|
|
|
@@ -31,19 +29,3 @@ class LLMClientABC(ABC):
|
|
|
31
29
|
|
|
32
30
|
P = ParamSpec("P")
|
|
33
31
|
R = TypeVar("R")
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def call_with_logged_payload(func: Callable[P, R], *args: P.args, **kwargs: P.kwargs) -> R:
|
|
37
|
-
"""Call an SDK function while logging the JSON payload.
|
|
38
|
-
|
|
39
|
-
The function reuses the original callable's type signature via ParamSpec
|
|
40
|
-
so static type checkers can validate arguments at the call site.
|
|
41
|
-
"""
|
|
42
|
-
|
|
43
|
-
payload = {k: v for k, v in kwargs.items() if v is not None}
|
|
44
|
-
log_debug(
|
|
45
|
-
json.dumps(payload, ensure_ascii=False, default=str),
|
|
46
|
-
style="yellow",
|
|
47
|
-
debug_type=DebugType.LLM_PAYLOAD,
|
|
48
|
-
)
|
|
49
|
-
return func(*args, **kwargs)
|
klaude_code/llm/codex/client.py
CHANGED
|
@@ -1,22 +1,61 @@
|
|
|
1
1
|
"""Codex LLM client using ChatGPT subscription via OAuth."""
|
|
2
2
|
|
|
3
|
+
import json
|
|
3
4
|
from collections.abc import AsyncGenerator
|
|
4
5
|
from typing import override
|
|
5
6
|
|
|
6
7
|
import httpx
|
|
7
8
|
import openai
|
|
8
9
|
from openai import AsyncOpenAI
|
|
10
|
+
from openai.types.responses.response_create_params import ResponseCreateParamsStreaming
|
|
9
11
|
|
|
10
12
|
from klaude_code.auth.codex.exceptions import CodexNotLoggedInError
|
|
11
13
|
from klaude_code.auth.codex.oauth import CodexOAuth
|
|
12
14
|
from klaude_code.auth.codex.token_manager import CodexTokenManager
|
|
13
|
-
from klaude_code.llm.client import LLMClientABC
|
|
15
|
+
from klaude_code.llm.client import LLMClientABC
|
|
14
16
|
from klaude_code.llm.input_common import apply_config_defaults
|
|
15
17
|
from klaude_code.llm.registry import register
|
|
16
18
|
from klaude_code.llm.responses.client import parse_responses_stream
|
|
17
19
|
from klaude_code.llm.responses.input import convert_history_to_input, convert_tool_schema
|
|
18
20
|
from klaude_code.llm.usage import MetadataTracker
|
|
19
21
|
from klaude_code.protocol import llm_param, model
|
|
22
|
+
from klaude_code.trace import DebugType, log_debug
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def build_payload(param: llm_param.LLMCallParameter) -> ResponseCreateParamsStreaming:
|
|
26
|
+
"""Build Codex API request parameters."""
|
|
27
|
+
inputs = convert_history_to_input(param.input, param.model)
|
|
28
|
+
tools = convert_tool_schema(param.tools)
|
|
29
|
+
|
|
30
|
+
session_id = param.session_id or ""
|
|
31
|
+
|
|
32
|
+
payload: ResponseCreateParamsStreaming = {
|
|
33
|
+
"model": str(param.model),
|
|
34
|
+
"tool_choice": "auto",
|
|
35
|
+
"parallel_tool_calls": True,
|
|
36
|
+
"include": [
|
|
37
|
+
"reasoning.encrypted_content",
|
|
38
|
+
],
|
|
39
|
+
"store": False,
|
|
40
|
+
"stream": True,
|
|
41
|
+
"input": inputs,
|
|
42
|
+
"instructions": param.system,
|
|
43
|
+
"tools": tools,
|
|
44
|
+
"prompt_cache_key": session_id,
|
|
45
|
+
# max_output_token and temperature is not supported in Codex API
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
if param.thinking and param.thinking.reasoning_effort:
|
|
49
|
+
payload["reasoning"] = {
|
|
50
|
+
"effort": param.thinking.reasoning_effort,
|
|
51
|
+
"summary": param.thinking.reasoning_summary,
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if param.verbosity:
|
|
55
|
+
payload["text"] = {"verbosity": param.verbosity}
|
|
56
|
+
|
|
57
|
+
return payload
|
|
58
|
+
|
|
20
59
|
|
|
21
60
|
# Codex API configuration
|
|
22
61
|
CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
|
|
@@ -75,55 +114,35 @@ class CodexClient(LLMClientABC):
|
|
|
75
114
|
return cls(config)
|
|
76
115
|
|
|
77
116
|
@override
|
|
78
|
-
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem
|
|
117
|
+
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
|
|
79
118
|
# Ensure token is valid before API call
|
|
80
119
|
self._ensure_valid_token()
|
|
81
120
|
|
|
82
121
|
param = apply_config_defaults(param, self.get_llm_config())
|
|
83
122
|
|
|
84
|
-
# Codex API requires store=False
|
|
85
|
-
param.store = False
|
|
86
|
-
|
|
87
123
|
metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
|
|
88
124
|
|
|
89
|
-
|
|
90
|
-
tools = convert_tool_schema(param.tools)
|
|
125
|
+
payload = build_payload(param)
|
|
91
126
|
|
|
92
127
|
session_id = param.session_id or ""
|
|
93
|
-
# Must send conversation_id/session_id headers to improve ChatGPT backend prompt cache hit rate.
|
|
94
128
|
extra_headers: dict[str, str] = {}
|
|
95
129
|
if session_id:
|
|
130
|
+
# Must send conversation_id/session_id headers to improve ChatGPT backend prompt cache hit rate.
|
|
96
131
|
extra_headers["conversation_id"] = session_id
|
|
97
132
|
extra_headers["session_id"] = session_id
|
|
98
133
|
|
|
134
|
+
log_debug(
|
|
135
|
+
json.dumps(payload, ensure_ascii=False, default=str),
|
|
136
|
+
style="yellow",
|
|
137
|
+
debug_type=DebugType.LLM_PAYLOAD,
|
|
138
|
+
)
|
|
99
139
|
try:
|
|
100
|
-
stream = await
|
|
101
|
-
|
|
102
|
-
model=str(param.model),
|
|
103
|
-
tool_choice="auto",
|
|
104
|
-
parallel_tool_calls=True,
|
|
105
|
-
include=[
|
|
106
|
-
"reasoning.encrypted_content",
|
|
107
|
-
],
|
|
108
|
-
store=False, # Always False for Codex
|
|
109
|
-
stream=True,
|
|
110
|
-
input=inputs,
|
|
111
|
-
instructions=param.system,
|
|
112
|
-
tools=tools,
|
|
113
|
-
text={
|
|
114
|
-
"verbosity": param.verbosity,
|
|
115
|
-
},
|
|
116
|
-
prompt_cache_key=session_id,
|
|
117
|
-
reasoning={
|
|
118
|
-
"effort": param.thinking.reasoning_effort,
|
|
119
|
-
"summary": param.thinking.reasoning_summary,
|
|
120
|
-
}
|
|
121
|
-
if param.thinking and param.thinking.reasoning_effort
|
|
122
|
-
else None,
|
|
140
|
+
stream = await self.client.responses.create(
|
|
141
|
+
**payload,
|
|
123
142
|
extra_headers=extra_headers,
|
|
124
143
|
)
|
|
125
144
|
except (openai.OpenAIError, httpx.HTTPError) as e:
|
|
126
|
-
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {
|
|
145
|
+
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
|
127
146
|
return
|
|
128
147
|
|
|
129
148
|
async for item in parse_responses_stream(stream, param, metadata_tracker):
|
klaude_code/llm/input_common.py
CHANGED
|
@@ -5,10 +5,10 @@ This module provides shared abstractions for providers that require message grou
|
|
|
5
5
|
since it uses a flat item list matching our internal protocol.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
from collections.abc import Iterator
|
|
8
|
+
from collections.abc import Iterable, Iterator
|
|
9
9
|
from dataclasses import dataclass, field
|
|
10
10
|
from enum import Enum
|
|
11
|
-
from typing import TYPE_CHECKING
|
|
11
|
+
from typing import TYPE_CHECKING
|
|
12
12
|
|
|
13
13
|
from klaude_code import const
|
|
14
14
|
|
|
@@ -4,8 +4,9 @@ from typing import override
|
|
|
4
4
|
|
|
5
5
|
import httpx
|
|
6
6
|
import openai
|
|
7
|
+
from openai.types.chat.completion_create_params import CompletionCreateParamsStreaming
|
|
7
8
|
|
|
8
|
-
from klaude_code.llm.client import LLMClientABC
|
|
9
|
+
from klaude_code.llm.client import LLMClientABC
|
|
9
10
|
from klaude_code.llm.input_common import apply_config_defaults
|
|
10
11
|
from klaude_code.llm.openai_compatible.input import convert_history_to_input, convert_tool_schema
|
|
11
12
|
from klaude_code.llm.openai_compatible.stream_processor import StreamStateManager
|
|
@@ -15,6 +16,35 @@ from klaude_code.protocol import llm_param, model
|
|
|
15
16
|
from klaude_code.trace import DebugType, log_debug
|
|
16
17
|
|
|
17
18
|
|
|
19
|
+
def build_payload(param: llm_param.LLMCallParameter) -> tuple[CompletionCreateParamsStreaming, dict[str, object]]:
|
|
20
|
+
"""Build OpenAI API request parameters."""
|
|
21
|
+
messages = convert_history_to_input(param.input, param.system, param.model)
|
|
22
|
+
tools = convert_tool_schema(param.tools)
|
|
23
|
+
|
|
24
|
+
extra_body: dict[str, object] = {}
|
|
25
|
+
|
|
26
|
+
if param.thinking:
|
|
27
|
+
extra_body["thinking"] = {
|
|
28
|
+
"type": param.thinking.type,
|
|
29
|
+
"budget": param.thinking.budget_tokens,
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
payload: CompletionCreateParamsStreaming = {
|
|
33
|
+
"model": str(param.model),
|
|
34
|
+
"tool_choice": "auto",
|
|
35
|
+
"parallel_tool_calls": True,
|
|
36
|
+
"stream": True,
|
|
37
|
+
"messages": messages,
|
|
38
|
+
"temperature": param.temperature,
|
|
39
|
+
"max_tokens": param.max_tokens,
|
|
40
|
+
"tools": tools,
|
|
41
|
+
"reasoning_effort": param.thinking.reasoning_effort if param.thinking else None,
|
|
42
|
+
"verbosity": param.verbosity,
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return payload, extra_body
|
|
46
|
+
|
|
47
|
+
|
|
18
48
|
@register(llm_param.LLMClientProtocol.OPENAI)
|
|
19
49
|
class OpenAICompatibleClient(LLMClientABC):
|
|
20
50
|
def __init__(self, config: llm_param.LLMConfigParameter):
|
|
@@ -42,34 +72,23 @@ class OpenAICompatibleClient(LLMClientABC):
|
|
|
42
72
|
return cls(config)
|
|
43
73
|
|
|
44
74
|
@override
|
|
45
|
-
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem
|
|
75
|
+
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
|
|
46
76
|
param = apply_config_defaults(param, self.get_llm_config())
|
|
47
|
-
messages = convert_history_to_input(param.input, param.system, param.model)
|
|
48
|
-
tools = convert_tool_schema(param.tools)
|
|
49
77
|
|
|
50
78
|
metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
|
|
51
79
|
|
|
52
|
-
extra_body =
|
|
53
|
-
extra_headers = {"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)}
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
extra_body
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
parallel_tool_calls=True,
|
|
65
|
-
stream=True,
|
|
66
|
-
messages=messages,
|
|
67
|
-
temperature=param.temperature,
|
|
68
|
-
max_tokens=param.max_tokens,
|
|
69
|
-
tools=tools,
|
|
70
|
-
reasoning_effort=param.thinking.reasoning_effort if param.thinking else None,
|
|
71
|
-
verbosity=param.verbosity,
|
|
72
|
-
extra_body=extra_body, # pyright: ignore[reportUnknownArgumentType]
|
|
80
|
+
payload, extra_body = build_payload(param)
|
|
81
|
+
extra_headers: dict[str, str] = {"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)}
|
|
82
|
+
|
|
83
|
+
log_debug(
|
|
84
|
+
json.dumps({**payload, **extra_body}, ensure_ascii=False, default=str),
|
|
85
|
+
style="yellow",
|
|
86
|
+
debug_type=DebugType.LLM_PAYLOAD,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
stream = self.client.chat.completions.create(
|
|
90
|
+
**payload,
|
|
91
|
+
extra_body=extra_body,
|
|
73
92
|
extra_headers=extra_headers,
|
|
74
93
|
)
|
|
75
94
|
|
|
@@ -85,9 +104,7 @@ class OpenAICompatibleClient(LLMClientABC):
|
|
|
85
104
|
if not state.response_id and event.id:
|
|
86
105
|
state.set_response_id(event.id)
|
|
87
106
|
yield model.StartItem(response_id=event.id)
|
|
88
|
-
if
|
|
89
|
-
event.usage is not None and event.usage.completion_tokens is not None # pyright: ignore[reportUnnecessaryComparison] gcp gemini will return None usage field
|
|
90
|
-
):
|
|
107
|
+
if event.usage is not None:
|
|
91
108
|
metadata_tracker.set_usage(convert_usage(event.usage, param.context_limit, param.max_tokens))
|
|
92
109
|
if event.model:
|
|
93
110
|
metadata_tracker.set_model_name(event.model)
|
|
@@ -96,28 +113,32 @@ class OpenAICompatibleClient(LLMClientABC):
|
|
|
96
113
|
|
|
97
114
|
if len(event.choices) == 0:
|
|
98
115
|
continue
|
|
99
|
-
delta = event.choices[0].delta
|
|
100
116
|
|
|
101
|
-
# Support Kimi K2's usage field in choice
|
|
102
|
-
if
|
|
117
|
+
# Support Moonshot Kimi K2's usage field in choice
|
|
118
|
+
if usage := getattr(event.choices[0], "usage", None):
|
|
103
119
|
metadata_tracker.set_usage(
|
|
104
120
|
convert_usage(
|
|
105
|
-
openai.types.CompletionUsage.model_validate(
|
|
121
|
+
openai.types.CompletionUsage.model_validate(usage),
|
|
106
122
|
param.context_limit,
|
|
107
123
|
param.max_tokens,
|
|
108
124
|
)
|
|
109
125
|
)
|
|
110
126
|
|
|
127
|
+
delta = event.choices[0].delta
|
|
128
|
+
|
|
111
129
|
# Reasoning
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
if reasoning_content:
|
|
130
|
+
if (
|
|
131
|
+
reasoning_content := getattr(delta, "reasoning_content", None)
|
|
132
|
+
or getattr(delta, "reasoning", None)
|
|
133
|
+
or ""
|
|
134
|
+
):
|
|
118
135
|
metadata_tracker.record_token()
|
|
119
136
|
state.stage = "reasoning"
|
|
120
137
|
state.accumulated_reasoning.append(reasoning_content)
|
|
138
|
+
yield model.ReasoningTextDelta(
|
|
139
|
+
content=reasoning_content,
|
|
140
|
+
response_id=state.response_id,
|
|
141
|
+
)
|
|
121
142
|
|
|
122
143
|
# Assistant
|
|
123
144
|
if delta.content and (
|
|
@@ -158,7 +179,7 @@ class OpenAICompatibleClient(LLMClientABC):
|
|
|
158
179
|
)
|
|
159
180
|
state.accumulated_tool_calls.add(delta.tool_calls)
|
|
160
181
|
except (openai.OpenAIError, httpx.HTTPError) as e:
|
|
161
|
-
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {
|
|
182
|
+
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
|
162
183
|
|
|
163
184
|
# Finalize
|
|
164
185
|
for item in state.flush_all():
|
|
@@ -5,7 +5,8 @@ logic for accumulating and flushing reasoning, assistant content, and tool calls
|
|
|
5
5
|
across different LLM providers (OpenAI-compatible, OpenRouter).
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
from
|
|
8
|
+
from collections.abc import Callable
|
|
9
|
+
from typing import Literal
|
|
9
10
|
|
|
10
11
|
from klaude_code.llm.openai_compatible.tool_call_accumulator import BasicToolCallAccumulator, ToolCallAccumulatorABC
|
|
11
12
|
from klaude_code.protocol import model
|