klaude-code 1.2.11__py3-none-any.whl → 1.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. klaude_code/auth/codex/oauth.py +3 -3
  2. klaude_code/cli/main.py +5 -5
  3. klaude_code/cli/runtime.py +19 -27
  4. klaude_code/cli/session_cmd.py +6 -8
  5. klaude_code/command/__init__.py +31 -28
  6. klaude_code/command/clear_cmd.py +0 -2
  7. klaude_code/command/diff_cmd.py +0 -2
  8. klaude_code/command/export_cmd.py +3 -5
  9. klaude_code/command/help_cmd.py +0 -2
  10. klaude_code/command/model_cmd.py +0 -2
  11. klaude_code/command/refresh_cmd.py +0 -2
  12. klaude_code/command/registry.py +5 -9
  13. klaude_code/command/release_notes_cmd.py +0 -2
  14. klaude_code/command/status_cmd.py +2 -4
  15. klaude_code/command/terminal_setup_cmd.py +2 -4
  16. klaude_code/command/thinking_cmd.py +229 -0
  17. klaude_code/config/__init__.py +1 -1
  18. klaude_code/config/list_model.py +1 -1
  19. klaude_code/config/select_model.py +5 -15
  20. klaude_code/const/__init__.py +1 -1
  21. klaude_code/core/agent.py +14 -69
  22. klaude_code/core/executor.py +11 -10
  23. klaude_code/core/manager/agent_manager.py +4 -4
  24. klaude_code/core/manager/llm_clients.py +10 -49
  25. klaude_code/core/manager/llm_clients_builder.py +8 -21
  26. klaude_code/core/manager/sub_agent_manager.py +3 -3
  27. klaude_code/core/prompt.py +3 -3
  28. klaude_code/core/reminders.py +1 -1
  29. klaude_code/core/task.py +4 -5
  30. klaude_code/core/tool/__init__.py +16 -25
  31. klaude_code/core/tool/file/_utils.py +1 -1
  32. klaude_code/core/tool/file/apply_patch.py +17 -25
  33. klaude_code/core/tool/file/apply_patch_tool.py +4 -7
  34. klaude_code/core/tool/file/edit_tool.py +4 -11
  35. klaude_code/core/tool/file/multi_edit_tool.py +2 -3
  36. klaude_code/core/tool/file/read_tool.py +3 -4
  37. klaude_code/core/tool/file/write_tool.py +2 -3
  38. klaude_code/core/tool/memory/memory_tool.py +2 -8
  39. klaude_code/core/tool/memory/skill_loader.py +3 -2
  40. klaude_code/core/tool/shell/command_safety.py +0 -1
  41. klaude_code/core/tool/tool_context.py +1 -3
  42. klaude_code/core/tool/tool_registry.py +2 -1
  43. klaude_code/core/tool/tool_runner.py +1 -1
  44. klaude_code/core/tool/truncation.py +2 -5
  45. klaude_code/core/turn.py +9 -4
  46. klaude_code/llm/anthropic/client.py +62 -49
  47. klaude_code/llm/client.py +2 -20
  48. klaude_code/llm/codex/client.py +51 -32
  49. klaude_code/llm/input_common.py +2 -2
  50. klaude_code/llm/openai_compatible/client.py +60 -39
  51. klaude_code/llm/openai_compatible/stream_processor.py +2 -1
  52. klaude_code/llm/openrouter/client.py +79 -45
  53. klaude_code/llm/openrouter/reasoning_handler.py +19 -132
  54. klaude_code/llm/registry.py +6 -5
  55. klaude_code/llm/responses/client.py +65 -43
  56. klaude_code/llm/usage.py +1 -49
  57. klaude_code/protocol/commands.py +1 -0
  58. klaude_code/protocol/events.py +7 -0
  59. klaude_code/protocol/llm_param.py +1 -9
  60. klaude_code/protocol/model.py +10 -6
  61. klaude_code/protocol/sub_agent.py +2 -1
  62. klaude_code/session/export.py +1 -8
  63. klaude_code/session/selector.py +12 -7
  64. klaude_code/session/session.py +2 -4
  65. klaude_code/trace/__init__.py +1 -1
  66. klaude_code/trace/log.py +1 -1
  67. klaude_code/ui/__init__.py +4 -9
  68. klaude_code/ui/core/stage_manager.py +7 -4
  69. klaude_code/ui/modes/repl/__init__.py +1 -1
  70. klaude_code/ui/modes/repl/completers.py +6 -7
  71. klaude_code/ui/modes/repl/display.py +3 -4
  72. klaude_code/ui/modes/repl/event_handler.py +63 -5
  73. klaude_code/ui/modes/repl/key_bindings.py +2 -3
  74. klaude_code/ui/modes/repl/renderer.py +2 -1
  75. klaude_code/ui/renderers/diffs.py +1 -4
  76. klaude_code/ui/renderers/metadata.py +1 -12
  77. klaude_code/ui/rich/markdown.py +3 -3
  78. klaude_code/ui/rich/searchable_text.py +6 -6
  79. klaude_code/ui/rich/status.py +3 -4
  80. klaude_code/ui/rich/theme.py +1 -4
  81. klaude_code/ui/terminal/control.py +7 -16
  82. klaude_code/ui/terminal/notifier.py +2 -4
  83. klaude_code/ui/utils/common.py +1 -1
  84. klaude_code/ui/utils/debouncer.py +2 -2
  85. {klaude_code-1.2.11.dist-info → klaude_code-1.2.13.dist-info}/METADATA +1 -1
  86. {klaude_code-1.2.11.dist-info → klaude_code-1.2.13.dist-info}/RECORD +88 -87
  87. {klaude_code-1.2.11.dist-info → klaude_code-1.2.13.dist-info}/WHEEL +0 -0
  88. {klaude_code-1.2.11.dist-info → klaude_code-1.2.13.dist-info}/entry_points.txt +0 -0
@@ -15,17 +15,48 @@ from anthropic.types.beta.beta_signature_delta import BetaSignatureDelta
15
15
  from anthropic.types.beta.beta_text_delta import BetaTextDelta
16
16
  from anthropic.types.beta.beta_thinking_delta import BetaThinkingDelta
17
17
  from anthropic.types.beta.beta_tool_use_block import BetaToolUseBlock
18
+ from anthropic.types.beta.message_create_params import MessageCreateParamsStreaming
18
19
 
19
20
  from klaude_code import const
20
21
  from klaude_code.llm.anthropic.input import convert_history_to_input, convert_system_to_input, convert_tool_schema
21
- from klaude_code.llm.client import LLMClientABC, call_with_logged_payload
22
+ from klaude_code.llm.client import LLMClientABC
22
23
  from klaude_code.llm.input_common import apply_config_defaults
23
24
  from klaude_code.llm.registry import register
24
- from klaude_code.llm.usage import MetadataTracker, convert_anthropic_usage
25
+ from klaude_code.llm.usage import MetadataTracker
25
26
  from klaude_code.protocol import llm_param, model
26
27
  from klaude_code.trace import DebugType, log_debug
27
28
 
28
29
 
30
+ def build_payload(param: llm_param.LLMCallParameter) -> MessageCreateParamsStreaming:
31
+ """Build Anthropic API request parameters."""
32
+ messages = convert_history_to_input(param.input, param.model)
33
+ tools = convert_tool_schema(param.tools)
34
+ system = convert_system_to_input(param.system)
35
+
36
+ payload: MessageCreateParamsStreaming = {
37
+ "model": str(param.model),
38
+ "tool_choice": {
39
+ "type": "auto",
40
+ "disable_parallel_tool_use": False,
41
+ },
42
+ "stream": True,
43
+ "max_tokens": param.max_tokens or const.DEFAULT_MAX_TOKENS,
44
+ "temperature": param.temperature or const.DEFAULT_TEMPERATURE,
45
+ "messages": messages,
46
+ "system": system,
47
+ "tools": tools,
48
+ "betas": ["interleaved-thinking-2025-05-14", "context-1m-2025-08-07"],
49
+ }
50
+
51
+ if param.thinking and param.thinking.type == "enabled":
52
+ payload["thinking"] = anthropic.types.ThinkingConfigEnabledParam(
53
+ type="enabled",
54
+ budget_tokens=param.thinking.budget_tokens or const.DEFAULT_ANTHROPIC_THINKING_BUDGET_TOKENS,
55
+ )
56
+
57
+ return payload
58
+
59
+
29
60
  @register(llm_param.LLMClientProtocol.ANTHROPIC)
30
61
  class AnthropicClient(LLMClientABC):
31
62
  def __init__(self, config: llm_param.LLMConfigParameter):
@@ -43,37 +74,21 @@ class AnthropicClient(LLMClientABC):
43
74
  return cls(config)
44
75
 
45
76
  @override
46
- async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
77
+ async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
47
78
  param = apply_config_defaults(param, self.get_llm_config())
48
79
 
49
80
  metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
50
81
 
51
- messages = convert_history_to_input(param.input, param.model)
52
- tools = convert_tool_schema(param.tools)
53
- system = convert_system_to_input(param.system)
54
-
55
- stream = call_with_logged_payload(
56
- self.client.beta.messages.create,
57
- model=str(param.model),
58
- tool_choice={
59
- "type": "auto",
60
- "disable_parallel_tool_use": False,
61
- },
62
- stream=True,
63
- max_tokens=param.max_tokens or const.DEFAULT_MAX_TOKENS,
64
- temperature=param.temperature or const.DEFAULT_TEMPERATURE,
65
- messages=messages,
66
- system=system,
67
- tools=tools,
68
- betas=["interleaved-thinking-2025-05-14", "context-1m-2025-08-07"],
69
- thinking=anthropic.types.ThinkingConfigEnabledParam(
70
- type=param.thinking.type,
71
- budget_tokens=param.thinking.budget_tokens or const.DEFAULT_ANTHROPIC_THINKING_BUDGET_TOKENS,
72
- )
73
- if param.thinking and param.thinking.type == "enabled"
74
- else anthropic.types.ThinkingConfigDisabledParam(
75
- type="disabled",
76
- ),
82
+ payload = build_payload(param)
83
+
84
+ log_debug(
85
+ json.dumps(payload, ensure_ascii=False, default=str),
86
+ style="yellow",
87
+ debug_type=DebugType.LLM_PAYLOAD,
88
+ )
89
+
90
+ stream = self.client.beta.messages.create(
91
+ **payload,
77
92
  extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
78
93
  )
79
94
 
@@ -85,9 +100,8 @@ class AnthropicClient(LLMClientABC):
85
100
  current_tool_call_id: str | None = None
86
101
  current_tool_inputs: list[str] | None = None
87
102
 
88
- input_tokens = 0
89
- cached_tokens = 0
90
- output_tokens = 0
103
+ input_token = 0
104
+ cached_token = 0
91
105
 
92
106
  try:
93
107
  async for event in await stream:
@@ -100,17 +114,18 @@ class AnthropicClient(LLMClientABC):
100
114
  match event:
101
115
  case BetaRawMessageStartEvent() as event:
102
116
  response_id = event.message.id
103
- cached_tokens = event.message.usage.cache_read_input_tokens or 0
104
- input_tokens = (event.message.usage.input_tokens or 0) + (
105
- event.message.usage.cache_creation_input_tokens or 0
106
- )
107
- output_tokens = event.message.usage.output_tokens or 0
117
+ cached_token = event.message.usage.cache_read_input_tokens or 0
118
+ input_token = event.message.usage.input_tokens
108
119
  yield model.StartItem(response_id=response_id)
109
120
  case BetaRawContentBlockDeltaEvent() as event:
110
121
  match event.delta:
111
122
  case BetaThinkingDelta() as delta:
112
123
  metadata_tracker.record_token()
113
124
  accumulated_thinking.append(delta.thinking)
125
+ yield model.ReasoningTextDelta(
126
+ content=delta.thinking,
127
+ response_id=response_id,
128
+ )
114
129
  case BetaSignatureDelta() as delta:
115
130
  metadata_tracker.record_token()
116
131
  yield model.ReasoningEncryptedItem(
@@ -170,22 +185,20 @@ class AnthropicClient(LLMClientABC):
170
185
  current_tool_call_id = None
171
186
  current_tool_inputs = None
172
187
  case BetaRawMessageDeltaEvent() as event:
173
- input_tokens += (event.usage.input_tokens or 0) + (event.usage.cache_creation_input_tokens or 0)
174
- output_tokens += event.usage.output_tokens or 0
175
- cached_tokens += event.usage.cache_read_input_tokens or 0
176
-
177
- usage = convert_anthropic_usage(
178
- input_tokens=input_tokens,
179
- output_tokens=output_tokens,
180
- cached_tokens=cached_tokens,
181
- context_limit=param.context_limit,
182
- max_tokens=param.max_tokens,
188
+ metadata_tracker.set_usage(
189
+ model.Usage(
190
+ input_tokens=input_token + cached_token,
191
+ output_tokens=event.usage.output_tokens,
192
+ cached_tokens=cached_token,
193
+ context_size=input_token + cached_token + event.usage.output_tokens,
194
+ context_limit=param.context_limit,
195
+ max_tokens=param.max_tokens,
196
+ )
183
197
  )
184
- metadata_tracker.set_usage(usage)
185
198
  metadata_tracker.set_model_name(str(param.model))
186
199
  metadata_tracker.set_response_id(response_id)
187
200
  yield metadata_tracker.finalize()
188
201
  case _:
189
202
  pass
190
203
  except (APIError, httpx.HTTPError) as e:
191
- yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
204
+ yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
klaude_code/llm/client.py CHANGED
@@ -1,10 +1,8 @@
1
- import json
2
1
  from abc import ABC, abstractmethod
3
2
  from collections.abc import AsyncGenerator
4
- from typing import Callable, ParamSpec, TypeVar, cast
3
+ from typing import ParamSpec, TypeVar, cast
5
4
 
6
5
  from klaude_code.protocol import llm_param, model
7
- from klaude_code.trace import DebugType, log_debug
8
6
 
9
7
 
10
8
  class LLMClientABC(ABC):
@@ -17,7 +15,7 @@ class LLMClientABC(ABC):
17
15
  pass
18
16
 
19
17
  @abstractmethod
20
- async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
18
+ async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
21
19
  raise NotImplementedError
22
20
  yield cast(model.ConversationItem, None)
23
21
 
@@ -31,19 +29,3 @@ class LLMClientABC(ABC):
31
29
 
32
30
  P = ParamSpec("P")
33
31
  R = TypeVar("R")
34
-
35
-
36
- def call_with_logged_payload(func: Callable[P, R], *args: P.args, **kwargs: P.kwargs) -> R:
37
- """Call an SDK function while logging the JSON payload.
38
-
39
- The function reuses the original callable's type signature via ParamSpec
40
- so static type checkers can validate arguments at the call site.
41
- """
42
-
43
- payload = {k: v for k, v in kwargs.items() if v is not None}
44
- log_debug(
45
- json.dumps(payload, ensure_ascii=False, default=str),
46
- style="yellow",
47
- debug_type=DebugType.LLM_PAYLOAD,
48
- )
49
- return func(*args, **kwargs)
@@ -1,22 +1,61 @@
1
1
  """Codex LLM client using ChatGPT subscription via OAuth."""
2
2
 
3
+ import json
3
4
  from collections.abc import AsyncGenerator
4
5
  from typing import override
5
6
 
6
7
  import httpx
7
8
  import openai
8
9
  from openai import AsyncOpenAI
10
+ from openai.types.responses.response_create_params import ResponseCreateParamsStreaming
9
11
 
10
12
  from klaude_code.auth.codex.exceptions import CodexNotLoggedInError
11
13
  from klaude_code.auth.codex.oauth import CodexOAuth
12
14
  from klaude_code.auth.codex.token_manager import CodexTokenManager
13
- from klaude_code.llm.client import LLMClientABC, call_with_logged_payload
15
+ from klaude_code.llm.client import LLMClientABC
14
16
  from klaude_code.llm.input_common import apply_config_defaults
15
17
  from klaude_code.llm.registry import register
16
18
  from klaude_code.llm.responses.client import parse_responses_stream
17
19
  from klaude_code.llm.responses.input import convert_history_to_input, convert_tool_schema
18
20
  from klaude_code.llm.usage import MetadataTracker
19
21
  from klaude_code.protocol import llm_param, model
22
+ from klaude_code.trace import DebugType, log_debug
23
+
24
+
25
+ def build_payload(param: llm_param.LLMCallParameter) -> ResponseCreateParamsStreaming:
26
+ """Build Codex API request parameters."""
27
+ inputs = convert_history_to_input(param.input, param.model)
28
+ tools = convert_tool_schema(param.tools)
29
+
30
+ session_id = param.session_id or ""
31
+
32
+ payload: ResponseCreateParamsStreaming = {
33
+ "model": str(param.model),
34
+ "tool_choice": "auto",
35
+ "parallel_tool_calls": True,
36
+ "include": [
37
+ "reasoning.encrypted_content",
38
+ ],
39
+ "store": False,
40
+ "stream": True,
41
+ "input": inputs,
42
+ "instructions": param.system,
43
+ "tools": tools,
44
+ "prompt_cache_key": session_id,
45
+ # max_output_token and temperature is not supported in Codex API
46
+ }
47
+
48
+ if param.thinking and param.thinking.reasoning_effort:
49
+ payload["reasoning"] = {
50
+ "effort": param.thinking.reasoning_effort,
51
+ "summary": param.thinking.reasoning_summary,
52
+ }
53
+
54
+ if param.verbosity:
55
+ payload["text"] = {"verbosity": param.verbosity}
56
+
57
+ return payload
58
+
20
59
 
21
60
  # Codex API configuration
22
61
  CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
@@ -75,55 +114,35 @@ class CodexClient(LLMClientABC):
75
114
  return cls(config)
76
115
 
77
116
  @override
78
- async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
117
+ async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
79
118
  # Ensure token is valid before API call
80
119
  self._ensure_valid_token()
81
120
 
82
121
  param = apply_config_defaults(param, self.get_llm_config())
83
122
 
84
- # Codex API requires store=False
85
- param.store = False
86
-
87
123
  metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
88
124
 
89
- inputs = convert_history_to_input(param.input, param.model)
90
- tools = convert_tool_schema(param.tools)
125
+ payload = build_payload(param)
91
126
 
92
127
  session_id = param.session_id or ""
93
- # Must send conversation_id/session_id headers to improve ChatGPT backend prompt cache hit rate.
94
128
  extra_headers: dict[str, str] = {}
95
129
  if session_id:
130
+ # Must send conversation_id/session_id headers to improve ChatGPT backend prompt cache hit rate.
96
131
  extra_headers["conversation_id"] = session_id
97
132
  extra_headers["session_id"] = session_id
98
133
 
134
+ log_debug(
135
+ json.dumps(payload, ensure_ascii=False, default=str),
136
+ style="yellow",
137
+ debug_type=DebugType.LLM_PAYLOAD,
138
+ )
99
139
  try:
100
- stream = await call_with_logged_payload(
101
- self.client.responses.create,
102
- model=str(param.model),
103
- tool_choice="auto",
104
- parallel_tool_calls=True,
105
- include=[
106
- "reasoning.encrypted_content",
107
- ],
108
- store=False, # Always False for Codex
109
- stream=True,
110
- input=inputs,
111
- instructions=param.system,
112
- tools=tools,
113
- text={
114
- "verbosity": param.verbosity,
115
- },
116
- prompt_cache_key=session_id,
117
- reasoning={
118
- "effort": param.thinking.reasoning_effort,
119
- "summary": param.thinking.reasoning_summary,
120
- }
121
- if param.thinking and param.thinking.reasoning_effort
122
- else None,
140
+ stream = await self.client.responses.create(
141
+ **payload,
123
142
  extra_headers=extra_headers,
124
143
  )
125
144
  except (openai.OpenAIError, httpx.HTTPError) as e:
126
- yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
145
+ yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
127
146
  return
128
147
 
129
148
  async for item in parse_responses_stream(stream, param, metadata_tracker):
@@ -5,10 +5,10 @@ This module provides shared abstractions for providers that require message grou
5
5
  since it uses a flat item list matching our internal protocol.
6
6
  """
7
7
 
8
- from collections.abc import Iterator
8
+ from collections.abc import Iterable, Iterator
9
9
  from dataclasses import dataclass, field
10
10
  from enum import Enum
11
- from typing import TYPE_CHECKING, Iterable
11
+ from typing import TYPE_CHECKING
12
12
 
13
13
  from klaude_code import const
14
14
 
@@ -4,8 +4,9 @@ from typing import override
4
4
 
5
5
  import httpx
6
6
  import openai
7
+ from openai.types.chat.completion_create_params import CompletionCreateParamsStreaming
7
8
 
8
- from klaude_code.llm.client import LLMClientABC, call_with_logged_payload
9
+ from klaude_code.llm.client import LLMClientABC
9
10
  from klaude_code.llm.input_common import apply_config_defaults
10
11
  from klaude_code.llm.openai_compatible.input import convert_history_to_input, convert_tool_schema
11
12
  from klaude_code.llm.openai_compatible.stream_processor import StreamStateManager
@@ -15,6 +16,35 @@ from klaude_code.protocol import llm_param, model
15
16
  from klaude_code.trace import DebugType, log_debug
16
17
 
17
18
 
19
+ def build_payload(param: llm_param.LLMCallParameter) -> tuple[CompletionCreateParamsStreaming, dict[str, object]]:
20
+ """Build OpenAI API request parameters."""
21
+ messages = convert_history_to_input(param.input, param.system, param.model)
22
+ tools = convert_tool_schema(param.tools)
23
+
24
+ extra_body: dict[str, object] = {}
25
+
26
+ if param.thinking:
27
+ extra_body["thinking"] = {
28
+ "type": param.thinking.type,
29
+ "budget": param.thinking.budget_tokens,
30
+ }
31
+
32
+ payload: CompletionCreateParamsStreaming = {
33
+ "model": str(param.model),
34
+ "tool_choice": "auto",
35
+ "parallel_tool_calls": True,
36
+ "stream": True,
37
+ "messages": messages,
38
+ "temperature": param.temperature,
39
+ "max_tokens": param.max_tokens,
40
+ "tools": tools,
41
+ "reasoning_effort": param.thinking.reasoning_effort if param.thinking else None,
42
+ "verbosity": param.verbosity,
43
+ }
44
+
45
+ return payload, extra_body
46
+
47
+
18
48
  @register(llm_param.LLMClientProtocol.OPENAI)
19
49
  class OpenAICompatibleClient(LLMClientABC):
20
50
  def __init__(self, config: llm_param.LLMConfigParameter):
@@ -42,34 +72,23 @@ class OpenAICompatibleClient(LLMClientABC):
42
72
  return cls(config)
43
73
 
44
74
  @override
45
- async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
75
+ async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
46
76
  param = apply_config_defaults(param, self.get_llm_config())
47
- messages = convert_history_to_input(param.input, param.system, param.model)
48
- tools = convert_tool_schema(param.tools)
49
77
 
50
78
  metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
51
79
 
52
- extra_body = {}
53
- extra_headers = {"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)}
54
-
55
- if param.thinking:
56
- extra_body["thinking"] = {
57
- "type": param.thinking.type,
58
- "budget": param.thinking.budget_tokens,
59
- }
60
- stream = call_with_logged_payload(
61
- self.client.chat.completions.create,
62
- model=str(param.model),
63
- tool_choice="auto",
64
- parallel_tool_calls=True,
65
- stream=True,
66
- messages=messages,
67
- temperature=param.temperature,
68
- max_tokens=param.max_tokens,
69
- tools=tools,
70
- reasoning_effort=param.thinking.reasoning_effort if param.thinking else None,
71
- verbosity=param.verbosity,
72
- extra_body=extra_body, # pyright: ignore[reportUnknownArgumentType]
80
+ payload, extra_body = build_payload(param)
81
+ extra_headers: dict[str, str] = {"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)}
82
+
83
+ log_debug(
84
+ json.dumps({**payload, **extra_body}, ensure_ascii=False, default=str),
85
+ style="yellow",
86
+ debug_type=DebugType.LLM_PAYLOAD,
87
+ )
88
+
89
+ stream = self.client.chat.completions.create(
90
+ **payload,
91
+ extra_body=extra_body,
73
92
  extra_headers=extra_headers,
74
93
  )
75
94
 
@@ -85,9 +104,7 @@ class OpenAICompatibleClient(LLMClientABC):
85
104
  if not state.response_id and event.id:
86
105
  state.set_response_id(event.id)
87
106
  yield model.StartItem(response_id=event.id)
88
- if (
89
- event.usage is not None and event.usage.completion_tokens is not None # pyright: ignore[reportUnnecessaryComparison] gcp gemini will return None usage field
90
- ):
107
+ if event.usage is not None:
91
108
  metadata_tracker.set_usage(convert_usage(event.usage, param.context_limit, param.max_tokens))
92
109
  if event.model:
93
110
  metadata_tracker.set_model_name(event.model)
@@ -96,28 +113,32 @@ class OpenAICompatibleClient(LLMClientABC):
96
113
 
97
114
  if len(event.choices) == 0:
98
115
  continue
99
- delta = event.choices[0].delta
100
116
 
101
- # Support Kimi K2's usage field in choice
102
- if hasattr(event.choices[0], "usage") and getattr(event.choices[0], "usage"):
117
+ # Support Moonshot Kimi K2's usage field in choice
118
+ if usage := getattr(event.choices[0], "usage", None):
103
119
  metadata_tracker.set_usage(
104
120
  convert_usage(
105
- openai.types.CompletionUsage.model_validate(getattr(event.choices[0], "usage")),
121
+ openai.types.CompletionUsage.model_validate(usage),
106
122
  param.context_limit,
107
123
  param.max_tokens,
108
124
  )
109
125
  )
110
126
 
127
+ delta = event.choices[0].delta
128
+
111
129
  # Reasoning
112
- reasoning_content = ""
113
- if hasattr(delta, "reasoning") and getattr(delta, "reasoning"):
114
- reasoning_content = getattr(delta, "reasoning")
115
- if hasattr(delta, "reasoning_content") and getattr(delta, "reasoning_content"):
116
- reasoning_content = getattr(delta, "reasoning_content")
117
- if reasoning_content:
130
+ if (
131
+ reasoning_content := getattr(delta, "reasoning_content", None)
132
+ or getattr(delta, "reasoning", None)
133
+ or ""
134
+ ):
118
135
  metadata_tracker.record_token()
119
136
  state.stage = "reasoning"
120
137
  state.accumulated_reasoning.append(reasoning_content)
138
+ yield model.ReasoningTextDelta(
139
+ content=reasoning_content,
140
+ response_id=state.response_id,
141
+ )
121
142
 
122
143
  # Assistant
123
144
  if delta.content and (
@@ -158,7 +179,7 @@ class OpenAICompatibleClient(LLMClientABC):
158
179
  )
159
180
  state.accumulated_tool_calls.add(delta.tool_calls)
160
181
  except (openai.OpenAIError, httpx.HTTPError) as e:
161
- yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
182
+ yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
162
183
 
163
184
  # Finalize
164
185
  for item in state.flush_all():
@@ -5,7 +5,8 @@ logic for accumulating and flushing reasoning, assistant content, and tool calls
5
5
  across different LLM providers (OpenAI-compatible, OpenRouter).
6
6
  """
7
7
 
8
- from typing import Callable, Literal
8
+ from collections.abc import Callable
9
+ from typing import Literal
9
10
 
10
11
  from klaude_code.llm.openai_compatible.tool_call_accumulator import BasicToolCallAccumulator, ToolCallAccumulatorABC
11
12
  from klaude_code.protocol import model