klaude-code 1.2.11__py3-none-any.whl → 1.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. klaude_code/auth/codex/oauth.py +3 -3
  2. klaude_code/cli/main.py +5 -5
  3. klaude_code/cli/runtime.py +19 -27
  4. klaude_code/cli/session_cmd.py +6 -8
  5. klaude_code/command/__init__.py +31 -28
  6. klaude_code/command/clear_cmd.py +0 -2
  7. klaude_code/command/diff_cmd.py +0 -2
  8. klaude_code/command/export_cmd.py +3 -5
  9. klaude_code/command/help_cmd.py +0 -2
  10. klaude_code/command/model_cmd.py +0 -2
  11. klaude_code/command/refresh_cmd.py +0 -2
  12. klaude_code/command/registry.py +5 -9
  13. klaude_code/command/release_notes_cmd.py +0 -2
  14. klaude_code/command/status_cmd.py +2 -4
  15. klaude_code/command/terminal_setup_cmd.py +2 -4
  16. klaude_code/command/thinking_cmd.py +229 -0
  17. klaude_code/config/__init__.py +1 -1
  18. klaude_code/config/list_model.py +1 -1
  19. klaude_code/config/select_model.py +5 -15
  20. klaude_code/const/__init__.py +1 -1
  21. klaude_code/core/agent.py +14 -69
  22. klaude_code/core/executor.py +11 -10
  23. klaude_code/core/manager/agent_manager.py +4 -4
  24. klaude_code/core/manager/llm_clients.py +10 -49
  25. klaude_code/core/manager/llm_clients_builder.py +8 -21
  26. klaude_code/core/manager/sub_agent_manager.py +3 -3
  27. klaude_code/core/prompt.py +3 -3
  28. klaude_code/core/reminders.py +1 -1
  29. klaude_code/core/task.py +4 -5
  30. klaude_code/core/tool/__init__.py +16 -25
  31. klaude_code/core/tool/file/_utils.py +1 -1
  32. klaude_code/core/tool/file/apply_patch.py +17 -25
  33. klaude_code/core/tool/file/apply_patch_tool.py +4 -7
  34. klaude_code/core/tool/file/edit_tool.py +4 -11
  35. klaude_code/core/tool/file/multi_edit_tool.py +2 -3
  36. klaude_code/core/tool/file/read_tool.py +3 -4
  37. klaude_code/core/tool/file/write_tool.py +2 -3
  38. klaude_code/core/tool/memory/memory_tool.py +2 -8
  39. klaude_code/core/tool/memory/skill_loader.py +3 -2
  40. klaude_code/core/tool/shell/command_safety.py +0 -1
  41. klaude_code/core/tool/tool_context.py +1 -3
  42. klaude_code/core/tool/tool_registry.py +2 -1
  43. klaude_code/core/tool/tool_runner.py +1 -1
  44. klaude_code/core/tool/truncation.py +2 -5
  45. klaude_code/core/turn.py +9 -4
  46. klaude_code/llm/anthropic/client.py +62 -49
  47. klaude_code/llm/client.py +2 -20
  48. klaude_code/llm/codex/client.py +51 -32
  49. klaude_code/llm/input_common.py +2 -2
  50. klaude_code/llm/openai_compatible/client.py +60 -39
  51. klaude_code/llm/openai_compatible/stream_processor.py +2 -1
  52. klaude_code/llm/openrouter/client.py +79 -45
  53. klaude_code/llm/openrouter/reasoning_handler.py +19 -132
  54. klaude_code/llm/registry.py +6 -5
  55. klaude_code/llm/responses/client.py +65 -43
  56. klaude_code/llm/usage.py +1 -49
  57. klaude_code/protocol/commands.py +1 -0
  58. klaude_code/protocol/events.py +7 -0
  59. klaude_code/protocol/llm_param.py +1 -9
  60. klaude_code/protocol/model.py +10 -6
  61. klaude_code/protocol/sub_agent.py +2 -1
  62. klaude_code/session/export.py +1 -8
  63. klaude_code/session/selector.py +12 -7
  64. klaude_code/session/session.py +2 -4
  65. klaude_code/trace/__init__.py +1 -1
  66. klaude_code/trace/log.py +1 -1
  67. klaude_code/ui/__init__.py +4 -9
  68. klaude_code/ui/core/stage_manager.py +7 -4
  69. klaude_code/ui/modes/repl/__init__.py +1 -1
  70. klaude_code/ui/modes/repl/completers.py +6 -7
  71. klaude_code/ui/modes/repl/display.py +3 -4
  72. klaude_code/ui/modes/repl/event_handler.py +63 -5
  73. klaude_code/ui/modes/repl/key_bindings.py +2 -3
  74. klaude_code/ui/modes/repl/renderer.py +2 -1
  75. klaude_code/ui/renderers/diffs.py +1 -4
  76. klaude_code/ui/renderers/metadata.py +1 -12
  77. klaude_code/ui/rich/markdown.py +3 -3
  78. klaude_code/ui/rich/searchable_text.py +6 -6
  79. klaude_code/ui/rich/status.py +3 -4
  80. klaude_code/ui/rich/theme.py +1 -4
  81. klaude_code/ui/terminal/control.py +7 -16
  82. klaude_code/ui/terminal/notifier.py +2 -4
  83. klaude_code/ui/utils/common.py +1 -1
  84. klaude_code/ui/utils/debouncer.py +2 -2
  85. {klaude_code-1.2.11.dist-info → klaude_code-1.2.13.dist-info}/METADATA +1 -1
  86. {klaude_code-1.2.11.dist-info → klaude_code-1.2.13.dist-info}/RECORD +88 -87
  87. {klaude_code-1.2.11.dist-info → klaude_code-1.2.13.dist-info}/WHEEL +0 -0
  88. {klaude_code-1.2.11.dist-info → klaude_code-1.2.13.dist-info}/entry_points.txt +0 -0
@@ -1,10 +1,12 @@
1
+ import json
1
2
  from collections.abc import AsyncGenerator
2
3
  from typing import override
3
4
 
4
5
  import httpx
5
6
  import openai
7
+ from openai.types.chat.completion_create_params import CompletionCreateParamsStreaming
6
8
 
7
- from klaude_code.llm.client import LLMClientABC, call_with_logged_payload
9
+ from klaude_code.llm.client import LLMClientABC
8
10
  from klaude_code.llm.input_common import apply_config_defaults
9
11
  from klaude_code.llm.openai_compatible.input import convert_tool_schema
10
12
  from klaude_code.llm.openai_compatible.stream_processor import StreamStateManager
@@ -16,6 +18,53 @@ from klaude_code.protocol import llm_param, model
16
18
  from klaude_code.trace import DebugType, log, log_debug
17
19
 
18
20
 
21
+ def build_payload(
22
+ param: llm_param.LLMCallParameter,
23
+ ) -> tuple[CompletionCreateParamsStreaming, dict[str, object], dict[str, str]]:
24
+ """Build OpenRouter API request parameters."""
25
+ messages = convert_history_to_input(param.input, param.system, param.model)
26
+ tools = convert_tool_schema(param.tools)
27
+
28
+ extra_body: dict[str, object] = {
29
+ "usage": {"include": True}, # To get the cache tokens at the end of the response
30
+ "debug": {
31
+ "echo_upstream_body": True
32
+ }, # https://openrouter.ai/docs/api/reference/errors-and-debugging#debug-option-shape
33
+ }
34
+ extra_headers: dict[str, str] = {}
35
+
36
+ if param.thinking:
37
+ if param.thinking.budget_tokens is not None:
38
+ extra_body["reasoning"] = {
39
+ "max_tokens": param.thinking.budget_tokens,
40
+ "enable": True,
41
+ } # OpenRouter: https://openrouter.ai/docs/use-cases/reasoning-tokens#anthropic-models-with-reasoning-tokens
42
+ elif param.thinking.reasoning_effort is not None:
43
+ extra_body["reasoning"] = {
44
+ "effort": param.thinking.reasoning_effort,
45
+ }
46
+
47
+ if param.provider_routing:
48
+ extra_body["provider"] = param.provider_routing.model_dump(exclude_none=True)
49
+
50
+ if is_claude_model(param.model):
51
+ extra_headers["x-anthropic-beta"] = "fine-grained-tool-streaming-2025-05-14,interleaved-thinking-2025-05-14"
52
+
53
+ payload: CompletionCreateParamsStreaming = {
54
+ "model": str(param.model),
55
+ "tool_choice": "auto",
56
+ "parallel_tool_calls": True,
57
+ "stream": True,
58
+ "messages": messages,
59
+ "temperature": param.temperature,
60
+ "max_tokens": param.max_tokens,
61
+ "tools": tools,
62
+ "verbosity": param.verbosity,
63
+ }
64
+
65
+ return payload, extra_body, extra_headers
66
+
67
+
19
68
  @register(llm_param.LLMClientProtocol.OPENROUTER)
20
69
  class OpenRouterClient(LLMClientABC):
21
70
  def __init__(self, config: llm_param.LLMConfigParameter):
@@ -33,54 +82,30 @@ class OpenRouterClient(LLMClientABC):
33
82
  return cls(config)
34
83
 
35
84
  @override
36
- async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
85
+ async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
37
86
  param = apply_config_defaults(param, self.get_llm_config())
38
- messages = convert_history_to_input(param.input, param.system, param.model)
39
- tools = convert_tool_schema(param.tools)
40
87
 
41
88
  metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
42
89
 
43
- extra_body: dict[str, object] = {
44
- "usage": {"include": True} # To get the cache tokens at the end of the response
45
- }
46
- extra_headers = {}
47
-
48
- if param.thinking:
49
- if param.thinking.budget_tokens is not None:
50
- extra_body["reasoning"] = {
51
- "max_tokens": param.thinking.budget_tokens,
52
- "enable": True,
53
- } # OpenRouter: https://openrouter.ai/docs/use-cases/reasoning-tokens#anthropic-models-with-reasoning-tokens
54
- elif param.thinking.reasoning_effort is not None:
55
- extra_body["reasoning"] = {
56
- "effort": param.thinking.reasoning_effort,
57
- }
58
- if param.provider_routing:
59
- extra_body["provider"] = param.provider_routing.model_dump(exclude_none=True)
60
- if is_claude_model(param.model):
61
- extra_headers["anthropic-beta"] = (
62
- "interleaved-thinking-2025-05-14" # Not working yet, maybe OpenRouter's issue, or Anthropic: Interleaved thinking is only supported for tools used via the Messages API.
63
- )
64
-
65
- stream = call_with_logged_payload(
66
- self.client.chat.completions.create,
67
- model=str(param.model),
68
- tool_choice="auto",
69
- parallel_tool_calls=True,
70
- stream=True,
71
- messages=messages,
72
- temperature=param.temperature,
73
- max_tokens=param.max_tokens,
74
- tools=tools,
75
- verbosity=param.verbosity,
90
+ payload, extra_body, extra_headers = build_payload(param)
91
+
92
+ log_debug(
93
+ json.dumps({**payload, **extra_body}, ensure_ascii=False, default=str),
94
+ style="yellow",
95
+ debug_type=DebugType.LLM_PAYLOAD,
96
+ )
97
+
98
+ stream = self.client.chat.completions.create(
99
+ **payload,
76
100
  extra_body=extra_body,
77
- extra_headers=extra_headers, # pyright: ignore[reportUnknownArgumentType]
101
+ extra_headers=extra_headers,
78
102
  )
79
103
 
80
104
  reasoning_handler = ReasoningStreamHandler(
81
105
  param_model=str(param.model),
82
106
  response_id=None,
83
107
  )
108
+
84
109
  state = StreamStateManager(
85
110
  param_model=str(param.model),
86
111
  reasoning_flusher=reasoning_handler.flush,
@@ -93,31 +118,40 @@ class OpenRouterClient(LLMClientABC):
93
118
  style="blue",
94
119
  debug_type=DebugType.LLM_STREAM,
95
120
  )
121
+
96
122
  if not state.response_id and event.id:
97
123
  state.set_response_id(event.id)
98
124
  reasoning_handler.set_response_id(event.id)
99
125
  yield model.StartItem(response_id=event.id)
100
- if (
101
- event.usage is not None and event.usage.completion_tokens is not None # pyright: ignore[reportUnnecessaryComparison]
102
- ): # gcp gemini will return None usage field
126
+ if event.usage is not None:
103
127
  metadata_tracker.set_usage(convert_usage(event.usage, param.context_limit, param.max_tokens))
104
128
  if event.model:
105
129
  metadata_tracker.set_model_name(event.model)
106
130
  if provider := getattr(event, "provider", None):
107
131
  metadata_tracker.set_provider(str(provider))
108
-
109
132
  if len(event.choices) == 0:
110
133
  continue
111
134
  delta = event.choices[0].delta
112
135
 
113
136
  # Reasoning
114
- if hasattr(delta, "reasoning_details") and getattr(delta, "reasoning_details"):
115
- reasoning_details = getattr(delta, "reasoning_details")
137
+ if reasoning_details := getattr(delta, "reasoning_details", None):
116
138
  for item in reasoning_details:
117
139
  try:
118
140
  reasoning_detail = ReasoningDetail.model_validate(item)
119
141
  metadata_tracker.record_token()
120
142
  state.stage = "reasoning"
143
+ # Yield delta immediately for streaming
144
+ if reasoning_detail.text:
145
+ yield model.ReasoningTextDelta(
146
+ content=reasoning_detail.text,
147
+ response_id=state.response_id,
148
+ )
149
+ if reasoning_detail.summary:
150
+ yield model.ReasoningTextDelta(
151
+ content=reasoning_detail.summary,
152
+ response_id=state.response_id,
153
+ )
154
+ # Keep existing handler logic for final items
121
155
  for conversation_item in reasoning_handler.on_detail(reasoning_detail):
122
156
  yield conversation_item
123
157
  except Exception as e:
@@ -160,7 +194,7 @@ class OpenRouterClient(LLMClientABC):
160
194
  state.accumulated_tool_calls.add(delta.tool_calls)
161
195
 
162
196
  except (openai.OpenAIError, httpx.HTTPError) as e:
163
- yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
197
+ yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
164
198
 
165
199
  # Finalize
166
200
  for item in state.flush_all():
@@ -1,5 +1,3 @@
1
- from enum import Enum
2
-
3
1
  from pydantic import BaseModel
4
2
 
5
3
  from klaude_code.protocol import model
@@ -18,14 +16,8 @@ class ReasoningDetail(BaseModel):
18
16
  signature: str | None = None # Claude's signature
19
17
 
20
18
 
21
- class ReasoningMode(str, Enum):
22
- COMPLETE_CHUNK = "complete_chunk"
23
- GPT5_SECTIONS = "gpt5_sections"
24
- ACCUMULATE = "accumulate"
25
-
26
-
27
19
  class ReasoningStreamHandler:
28
- """Encapsulates reasoning stream handling across different model behaviors."""
20
+ """Accumulates reasoning text and flushes on encrypted content or finalize."""
29
21
 
30
22
  def __init__(
31
23
  self,
@@ -37,59 +29,48 @@ class ReasoningStreamHandler:
37
29
 
38
30
  self._reasoning_id: str | None = None
39
31
  self._accumulated_reasoning: list[str] = []
40
- self._gpt5_line_buffer: str = ""
41
- self._gpt5_section_lines: list[str] = []
42
32
 
43
33
  def set_response_id(self, response_id: str | None) -> None:
44
34
  """Update the response identifier used for emitted items."""
45
-
46
35
  self._response_id = response_id
47
36
 
48
37
  def on_detail(self, detail: ReasoningDetail) -> list[model.ConversationItem]:
49
38
  """Process a single reasoning detail and return streamable items."""
50
-
51
39
  items: list[model.ConversationItem] = []
52
40
 
53
41
  if detail.type == "reasoning.encrypted":
54
42
  self._reasoning_id = detail.id
43
+ # Flush accumulated text before encrypted content
44
+ items.extend(self._flush_text())
55
45
  if encrypted_item := self._build_encrypted_item(detail.data, detail):
56
46
  items.append(encrypted_item)
57
47
  return items
58
48
 
59
49
  if detail.type in ("reasoning.text", "reasoning.summary"):
60
50
  self._reasoning_id = detail.id
61
- if encrypted_item := self._build_encrypted_item(detail.signature, detail):
62
- items.append(encrypted_item)
51
+ # Accumulate text
63
52
  text = detail.text if detail.type == "reasoning.text" else detail.summary
64
53
  if text:
65
- items.extend(self._handle_text(text))
54
+ self._accumulated_reasoning.append(text)
55
+ # Flush on signature (encrypted content)
56
+ if detail.signature:
57
+ items.extend(self._flush_text())
58
+ if encrypted_item := self._build_encrypted_item(detail.signature, detail):
59
+ items.append(encrypted_item)
66
60
 
67
61
  return items
68
62
 
69
63
  def flush(self) -> list[model.ConversationItem]:
70
- """Flush buffered reasoning text and encrypted payloads."""
64
+ """Flush buffered reasoning text on finalize."""
65
+ return self._flush_text()
71
66
 
72
- items: list[model.ConversationItem] = []
73
- mode = self._resolve_mode()
74
-
75
- if mode is ReasoningMode.GPT5_SECTIONS:
76
- for section in self._drain_gpt5_sections():
77
- items.append(self._build_text_item(section))
78
- elif self._accumulated_reasoning and mode is ReasoningMode.ACCUMULATE:
79
- items.append(self._build_text_item("".join(self._accumulated_reasoning)))
80
- self._accumulated_reasoning = []
81
-
82
- return items
83
-
84
- def _handle_text(self, text: str) -> list[model.ReasoningTextItem]:
85
- mode = self._resolve_mode()
86
- if mode is ReasoningMode.COMPLETE_CHUNK:
87
- return [self._build_text_item(text)]
88
- if mode is ReasoningMode.GPT5_SECTIONS:
89
- sections = self._process_gpt5_text(text)
90
- return [self._build_text_item(section) for section in sections]
91
- self._accumulated_reasoning.append(text)
92
- return []
67
+ def _flush_text(self) -> list[model.ConversationItem]:
68
+ """Flush accumulated reasoning text as a single item."""
69
+ if not self._accumulated_reasoning:
70
+ return []
71
+ item = self._build_text_item("".join(self._accumulated_reasoning))
72
+ self._accumulated_reasoning = []
73
+ return [item]
93
74
 
94
75
  def _build_text_item(self, content: str) -> model.ReasoningTextItem:
95
76
  return model.ReasoningTextItem(
@@ -113,97 +94,3 @@ class ReasoningStreamHandler:
113
94
  response_id=self._response_id,
114
95
  model=self._param_model,
115
96
  )
116
-
117
- def _process_gpt5_text(self, text: str) -> list[str]:
118
- emitted_sections: list[str] = []
119
- self._gpt5_line_buffer += text
120
- while True:
121
- newline_index = self._gpt5_line_buffer.find("\n")
122
- if newline_index == -1:
123
- break
124
- line = self._gpt5_line_buffer[:newline_index]
125
- self._gpt5_line_buffer = self._gpt5_line_buffer[newline_index + 1 :]
126
- remainder = line
127
- while True:
128
- split_result = self._split_gpt5_title_line(remainder)
129
- if split_result is None:
130
- break
131
- prefix_segment, title_segment, remainder = split_result
132
- if prefix_segment:
133
- if not self._gpt5_section_lines:
134
- self._gpt5_section_lines = []
135
- self._gpt5_section_lines.append(f"{prefix_segment}\n")
136
- if self._gpt5_section_lines:
137
- emitted_sections.append("".join(self._gpt5_section_lines))
138
- self._gpt5_section_lines = [f"{title_segment} \n"] # Add two spaces for markdown line break
139
- if remainder:
140
- if not self._gpt5_section_lines:
141
- self._gpt5_section_lines = []
142
- self._gpt5_section_lines.append(f"{remainder}\n")
143
- return emitted_sections
144
-
145
- def _drain_gpt5_sections(self) -> list[str]:
146
- sections: list[str] = []
147
- if self._gpt5_line_buffer:
148
- if not self._gpt5_section_lines:
149
- self._gpt5_section_lines = [self._gpt5_line_buffer]
150
- else:
151
- self._gpt5_section_lines.append(self._gpt5_line_buffer)
152
- self._gpt5_line_buffer = ""
153
- if self._gpt5_section_lines:
154
- sections.append("".join(self._gpt5_section_lines))
155
- self._gpt5_section_lines = []
156
- return sections
157
-
158
- def _is_gpt5(self) -> bool:
159
- return "gpt-5" in self._param_model.lower()
160
-
161
- def _is_complete_chunk_reasoning_model(self) -> bool:
162
- """Whether the current model emits reasoning in complete chunks (e.g. Gemini)."""
163
-
164
- return self._param_model.startswith("google/gemini")
165
-
166
- def _resolve_mode(self) -> ReasoningMode:
167
- if self._is_complete_chunk_reasoning_model():
168
- return ReasoningMode.COMPLETE_CHUNK
169
- if self._is_gpt5():
170
- return ReasoningMode.GPT5_SECTIONS
171
- return ReasoningMode.ACCUMULATE
172
-
173
- def _is_gpt5_title_line(self, line: str) -> bool:
174
- stripped = line.strip()
175
- if not stripped:
176
- return False
177
- return stripped.startswith("**") and stripped.endswith("**") and stripped.count("**") >= 2
178
-
179
- def _split_gpt5_title_line(self, line: str) -> tuple[str | None, str, str] | None:
180
- if not line:
181
- return None
182
- search_start = 0
183
- while True:
184
- opening_index = line.find("**", search_start)
185
- if opening_index == -1:
186
- return None
187
- closing_index = line.find("**", opening_index + 2)
188
- if closing_index == -1:
189
- return None
190
- title_candidate = line[opening_index : closing_index + 2]
191
- stripped_title = title_candidate.strip()
192
- if self._is_gpt5_title_line(stripped_title):
193
- # Treat as a GPT-5 title only when everything after the
194
- # bold segment is either whitespace or starts a new bold
195
- # title. This prevents inline bold like `**xxx**yyyy`
196
- # from being misclassified as a section title while
197
- # preserving support for consecutive titles in one line.
198
- after = line[closing_index + 2 :]
199
- if after.strip() and not after.lstrip().startswith("**"):
200
- search_start = closing_index + 2
201
- continue
202
- prefix_segment = line[:opening_index]
203
- remainder_segment = after
204
- return (
205
- prefix_segment if prefix_segment else None,
206
- stripped_title,
207
- remainder_segment,
208
- )
209
- search_start = closing_index + 2
@@ -1,4 +1,5 @@
1
- from typing import TYPE_CHECKING, Callable, TypeVar
1
+ from collections.abc import Callable
2
+ from typing import TYPE_CHECKING, TypeVar
2
3
 
3
4
  from klaude_code.protocol import llm_param
4
5
 
@@ -20,13 +21,13 @@ def _load_protocol(protocol: llm_param.LLMClientProtocol) -> None:
20
21
 
21
22
  # Import only the needed module to trigger @register decorator
22
23
  if protocol == llm_param.LLMClientProtocol.ANTHROPIC:
23
- from . import anthropic as _ # noqa: F401
24
+ from . import anthropic as _
24
25
  elif protocol == llm_param.LLMClientProtocol.CODEX:
25
- from . import codex as _ # noqa: F401
26
+ from . import codex as _
26
27
  elif protocol == llm_param.LLMClientProtocol.OPENAI:
27
- from . import openai_compatible as _ # noqa: F401
28
+ from . import openai_compatible as _
28
29
  elif protocol == llm_param.LLMClientProtocol.OPENROUTER:
29
- from . import openrouter as _ # noqa: F401
30
+ from . import openrouter as _
30
31
  elif protocol == llm_param.LLMClientProtocol.RESPONSES:
31
32
  from . import responses as _ # noqa: F401
32
33
 
@@ -6,12 +6,13 @@ import httpx
6
6
  import openai
7
7
  from openai import AsyncAzureOpenAI, AsyncOpenAI
8
8
  from openai.types import responses
9
+ from openai.types.responses.response_create_params import ResponseCreateParamsStreaming
9
10
 
10
- from klaude_code.llm.client import LLMClientABC, call_with_logged_payload
11
+ from klaude_code.llm.client import LLMClientABC
11
12
  from klaude_code.llm.input_common import apply_config_defaults
12
13
  from klaude_code.llm.registry import register
13
14
  from klaude_code.llm.responses.input import convert_history_to_input, convert_tool_schema
14
- from klaude_code.llm.usage import MetadataTracker, convert_responses_usage
15
+ from klaude_code.llm.usage import MetadataTracker
15
16
  from klaude_code.protocol import llm_param, model
16
17
  from klaude_code.trace import DebugType, log_debug
17
18
 
@@ -20,11 +21,45 @@ if TYPE_CHECKING:
20
21
  from openai.types.responses import ResponseStreamEvent
21
22
 
22
23
 
24
+ def build_payload(param: llm_param.LLMCallParameter) -> ResponseCreateParamsStreaming:
25
+ """Build OpenAI Responses API request parameters."""
26
+ inputs = convert_history_to_input(param.input, param.model)
27
+ tools = convert_tool_schema(param.tools)
28
+
29
+ payload: ResponseCreateParamsStreaming = {
30
+ "model": str(param.model),
31
+ "tool_choice": "auto",
32
+ "parallel_tool_calls": True,
33
+ "include": [
34
+ "reasoning.encrypted_content",
35
+ ],
36
+ "store": False,
37
+ "stream": True,
38
+ "temperature": param.temperature,
39
+ "max_output_tokens": param.max_tokens,
40
+ "input": inputs,
41
+ "instructions": param.system,
42
+ "tools": tools,
43
+ "prompt_cache_key": param.session_id or "",
44
+ }
45
+
46
+ if param.thinking and param.thinking.reasoning_effort:
47
+ payload["reasoning"] = {
48
+ "effort": param.thinking.reasoning_effort,
49
+ "summary": param.thinking.reasoning_summary,
50
+ }
51
+
52
+ if param.verbosity:
53
+ payload["text"] = {"verbosity": param.verbosity}
54
+
55
+ return payload
56
+
57
+
23
58
  async def parse_responses_stream(
24
59
  stream: "AsyncStream[ResponseStreamEvent]",
25
60
  param: llm_param.LLMCallParameter,
26
61
  metadata_tracker: MetadataTracker,
27
- ) -> AsyncGenerator[model.ConversationItem, None]:
62
+ ) -> AsyncGenerator[model.ConversationItem]:
28
63
  """Parse OpenAI Responses API stream events into ConversationItems."""
29
64
  response_id: str | None = None
30
65
 
@@ -40,6 +75,12 @@ async def parse_responses_stream(
40
75
  case responses.ResponseCreatedEvent() as event:
41
76
  response_id = event.response.id
42
77
  yield model.StartItem(response_id=response_id)
78
+ case responses.ResponseReasoningSummaryTextDeltaEvent() as event:
79
+ if event.delta:
80
+ yield model.ReasoningTextDelta(
81
+ content=event.delta,
82
+ response_id=response_id,
83
+ )
43
84
  case responses.ResponseReasoningSummaryTextDoneEvent() as event:
44
85
  if event.text:
45
86
  yield model.ReasoningTextItem(
@@ -95,16 +136,17 @@ async def parse_responses_stream(
95
136
  if event.response.incomplete_details is not None:
96
137
  error_reason = event.response.incomplete_details.reason
97
138
  if event.response.usage is not None:
98
- usage = convert_responses_usage(
99
- input_tokens=event.response.usage.input_tokens,
100
- output_tokens=event.response.usage.output_tokens,
101
- cached_tokens=event.response.usage.input_tokens_details.cached_tokens,
102
- reasoning_tokens=event.response.usage.output_tokens_details.reasoning_tokens,
103
- total_tokens=event.response.usage.total_tokens,
104
- context_limit=param.context_limit,
105
- max_tokens=param.max_tokens,
139
+ metadata_tracker.set_usage(
140
+ model.Usage(
141
+ input_tokens=event.response.usage.input_tokens,
142
+ output_tokens=event.response.usage.output_tokens,
143
+ cached_tokens=event.response.usage.input_tokens_details.cached_tokens,
144
+ reasoning_tokens=event.response.usage.output_tokens_details.reasoning_tokens,
145
+ context_size=event.response.usage.total_tokens,
146
+ context_limit=param.context_limit,
147
+ max_tokens=param.max_tokens,
148
+ )
106
149
  )
107
- metadata_tracker.set_usage(usage)
108
150
  metadata_tracker.set_model_name(str(param.model))
109
151
  metadata_tracker.set_response_id(response_id)
110
152
  yield metadata_tracker.finalize()
@@ -127,7 +169,7 @@ async def parse_responses_stream(
127
169
  debug_type=DebugType.LLM_STREAM,
128
170
  )
129
171
  except (openai.OpenAIError, httpx.HTTPError) as e:
130
- yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
172
+ yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
131
173
 
132
174
 
133
175
  @register(llm_param.LLMClientProtocol.RESPONSES)
@@ -157,45 +199,25 @@ class ResponsesClient(LLMClientABC):
157
199
  return cls(config)
158
200
 
159
201
  @override
160
- async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
202
+ async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
161
203
  param = apply_config_defaults(param, self.get_llm_config())
162
204
 
163
205
  metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
164
206
 
165
- inputs = convert_history_to_input(param.input, param.model)
166
- tools = convert_tool_schema(param.tools)
207
+ payload = build_payload(param)
167
208
 
209
+ log_debug(
210
+ json.dumps(payload, ensure_ascii=False, default=str),
211
+ style="yellow",
212
+ debug_type=DebugType.LLM_PAYLOAD,
213
+ )
168
214
  try:
169
- stream = await call_with_logged_payload(
170
- self.client.responses.create,
171
- model=str(param.model),
172
- tool_choice="auto",
173
- parallel_tool_calls=True,
174
- include=[
175
- "reasoning.encrypted_content",
176
- ],
177
- store=param.store,
178
- previous_response_id=param.previous_response_id,
179
- stream=True,
180
- temperature=param.temperature,
181
- max_output_tokens=param.max_tokens,
182
- input=inputs,
183
- instructions=param.system,
184
- tools=tools,
185
- text={
186
- "verbosity": param.verbosity,
187
- },
188
- prompt_cache_key=param.session_id or "",
189
- reasoning={
190
- "effort": param.thinking.reasoning_effort,
191
- "summary": param.thinking.reasoning_summary,
192
- }
193
- if param.thinking and param.thinking.reasoning_effort
194
- else None,
215
+ stream = await self.client.responses.create(
216
+ **payload,
195
217
  extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
196
218
  )
197
219
  except (openai.OpenAIError, httpx.HTTPError) as e:
198
- yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
220
+ yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
199
221
  return
200
222
 
201
223
  async for item in parse_responses_stream(stream, param, metadata_tracker):
klaude_code/llm/usage.py CHANGED
@@ -108,55 +108,7 @@ def convert_usage(
108
108
  reasoning_tokens=(usage.completion_tokens_details.reasoning_tokens if usage.completion_tokens_details else 0)
109
109
  or 0,
110
110
  output_tokens=usage.completion_tokens,
111
- context_token=usage.total_tokens,
112
- context_limit=context_limit,
113
- max_tokens=max_tokens,
114
- )
115
-
116
-
117
- def convert_anthropic_usage(
118
- input_tokens: int,
119
- output_tokens: int,
120
- cached_tokens: int,
121
- context_limit: int | None = None,
122
- max_tokens: int | None = None,
123
- ) -> model.Usage:
124
- """Convert Anthropic usage data to internal Usage model.
125
-
126
- context_token is computed from input + cached + output tokens,
127
- representing the actual context window usage for this turn.
128
- """
129
- context_token = input_tokens + cached_tokens + output_tokens
130
- return model.Usage(
131
- input_tokens=input_tokens,
132
- output_tokens=output_tokens,
133
- cached_tokens=cached_tokens,
134
- context_token=context_token,
135
- context_limit=context_limit,
136
- max_tokens=max_tokens,
137
- )
138
-
139
-
140
- def convert_responses_usage(
141
- input_tokens: int,
142
- output_tokens: int,
143
- cached_tokens: int,
144
- reasoning_tokens: int,
145
- total_tokens: int,
146
- context_limit: int | None = None,
147
- max_tokens: int | None = None,
148
- ) -> model.Usage:
149
- """Convert OpenAI Responses API usage data to internal Usage model.
150
-
151
- context_token is set to total_tokens from the API response,
152
- representing the actual context window usage for this turn.
153
- """
154
- return model.Usage(
155
- input_tokens=input_tokens,
156
- output_tokens=output_tokens,
157
- cached_tokens=cached_tokens,
158
- reasoning_tokens=reasoning_tokens,
159
- context_token=total_tokens,
111
+ context_size=usage.total_tokens,
160
112
  context_limit=context_limit,
161
113
  max_tokens=max_tokens,
162
114
  )
@@ -13,6 +13,7 @@ class CommandName(str, Enum):
13
13
  EXPORT = "export"
14
14
  STATUS = "status"
15
15
  RELEASE_NOTES = "release-notes"
16
+ THINKING = "thinking"
16
17
  # PLAN and DOC are dynamically registered now, but kept here if needed for reference
17
18
  # or we can remove them if no code explicitly imports them.
18
19
  # PLAN = "plan"
@@ -54,6 +54,12 @@ class ThinkingEvent(BaseModel):
54
54
  content: str
55
55
 
56
56
 
57
+ class ThinkingDeltaEvent(BaseModel):
58
+ session_id: str
59
+ response_id: str | None = None
60
+ content: str
61
+
62
+
57
63
  class AssistantMessageDeltaEvent(BaseModel):
58
64
  session_id: str
59
65
  response_id: str | None = None
@@ -153,6 +159,7 @@ Event = (
153
159
  TaskStartEvent
154
160
  | TaskFinishEvent
155
161
  | ThinkingEvent
162
+ | ThinkingDeltaEvent
156
163
  | AssistantMessageDeltaEvent
157
164
  | AssistantMessageEvent
158
165
  | ToolCallEvent