PyPI - klaude-code - Versions diffs - 1.2.11__py3-none-any.whl → 1.2.13__py3-none-any.whl - Mend

klaude-code 1.2.11py3-none-any.whl → 1.2.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

klaude_code/auth/codex/oauth.py +3 -3
klaude_code/cli/main.py +5 -5
klaude_code/cli/runtime.py +19 -27
klaude_code/cli/session_cmd.py +6 -8
klaude_code/command/__init__.py +31 -28
klaude_code/command/clear_cmd.py +0 -2
klaude_code/command/diff_cmd.py +0 -2
klaude_code/command/export_cmd.py +3 -5
klaude_code/command/help_cmd.py +0 -2
klaude_code/command/model_cmd.py +0 -2
klaude_code/command/refresh_cmd.py +0 -2
klaude_code/command/registry.py +5 -9
klaude_code/command/release_notes_cmd.py +0 -2
klaude_code/command/status_cmd.py +2 -4
klaude_code/command/terminal_setup_cmd.py +2 -4
klaude_code/command/thinking_cmd.py +229 -0
klaude_code/config/__init__.py +1 -1
klaude_code/config/list_model.py +1 -1
klaude_code/config/select_model.py +5 -15
klaude_code/const/__init__.py +1 -1
klaude_code/core/agent.py +14 -69
klaude_code/core/executor.py +11 -10
klaude_code/core/manager/agent_manager.py +4 -4
klaude_code/core/manager/llm_clients.py +10 -49
klaude_code/core/manager/llm_clients_builder.py +8 -21
klaude_code/core/manager/sub_agent_manager.py +3 -3
klaude_code/core/prompt.py +3 -3
klaude_code/core/reminders.py +1 -1
klaude_code/core/task.py +4 -5
klaude_code/core/tool/__init__.py +16 -25
klaude_code/core/tool/file/_utils.py +1 -1
klaude_code/core/tool/file/apply_patch.py +17 -25
klaude_code/core/tool/file/apply_patch_tool.py +4 -7
klaude_code/core/tool/file/edit_tool.py +4 -11
klaude_code/core/tool/file/multi_edit_tool.py +2 -3
klaude_code/core/tool/file/read_tool.py +3 -4
klaude_code/core/tool/file/write_tool.py +2 -3
klaude_code/core/tool/memory/memory_tool.py +2 -8
klaude_code/core/tool/memory/skill_loader.py +3 -2
klaude_code/core/tool/shell/command_safety.py +0 -1
klaude_code/core/tool/tool_context.py +1 -3
klaude_code/core/tool/tool_registry.py +2 -1
klaude_code/core/tool/tool_runner.py +1 -1
klaude_code/core/tool/truncation.py +2 -5
klaude_code/core/turn.py +9 -4
klaude_code/llm/anthropic/client.py +62 -49
klaude_code/llm/client.py +2 -20
klaude_code/llm/codex/client.py +51 -32
klaude_code/llm/input_common.py +2 -2
klaude_code/llm/openai_compatible/client.py +60 -39
klaude_code/llm/openai_compatible/stream_processor.py +2 -1
klaude_code/llm/openrouter/client.py +79 -45
klaude_code/llm/openrouter/reasoning_handler.py +19 -132
klaude_code/llm/registry.py +6 -5
klaude_code/llm/responses/client.py +65 -43
klaude_code/llm/usage.py +1 -49
klaude_code/protocol/commands.py +1 -0
klaude_code/protocol/events.py +7 -0
klaude_code/protocol/llm_param.py +1 -9
klaude_code/protocol/model.py +10 -6
klaude_code/protocol/sub_agent.py +2 -1
klaude_code/session/export.py +1 -8
klaude_code/session/selector.py +12 -7
klaude_code/session/session.py +2 -4
klaude_code/trace/__init__.py +1 -1
klaude_code/trace/log.py +1 -1
klaude_code/ui/__init__.py +4 -9
klaude_code/ui/core/stage_manager.py +7 -4
klaude_code/ui/modes/repl/__init__.py +1 -1
klaude_code/ui/modes/repl/completers.py +6 -7
klaude_code/ui/modes/repl/display.py +3 -4
klaude_code/ui/modes/repl/event_handler.py +63 -5
klaude_code/ui/modes/repl/key_bindings.py +2 -3
klaude_code/ui/modes/repl/renderer.py +2 -1
klaude_code/ui/renderers/diffs.py +1 -4
klaude_code/ui/renderers/metadata.py +1 -12
klaude_code/ui/rich/markdown.py +3 -3
klaude_code/ui/rich/searchable_text.py +6 -6
klaude_code/ui/rich/status.py +3 -4
klaude_code/ui/rich/theme.py +1 -4
klaude_code/ui/terminal/control.py +7 -16
klaude_code/ui/terminal/notifier.py +2 -4
klaude_code/ui/utils/common.py +1 -1
klaude_code/ui/utils/debouncer.py +2 -2
{klaude_code-1.2.11.dist-info → klaude_code-1.2.13.dist-info}/METADATA +1 -1
{klaude_code-1.2.11.dist-info → klaude_code-1.2.13.dist-info}/RECORD +88 -87
{klaude_code-1.2.11.dist-info → klaude_code-1.2.13.dist-info}/WHEEL +0 -0
{klaude_code-1.2.11.dist-info → klaude_code-1.2.13.dist-info}/entry_points.txt +0 -0

klaude_code/llm/openrouter/client.py CHANGED Viewed

@@ -1,10 +1,12 @@
+import json
 from collections.abc import AsyncGenerator
 from typing import override
 import httpx
 import openai
+from openai.types.chat.completion_create_params import CompletionCreateParamsStreaming
-from klaude_code.llm.client import LLMClientABC, call_with_logged_payload
+from klaude_code.llm.client import LLMClientABC
 from klaude_code.llm.input_common import apply_config_defaults
 from klaude_code.llm.openai_compatible.input import convert_tool_schema
 from klaude_code.llm.openai_compatible.stream_processor import StreamStateManager
@@ -16,6 +18,53 @@ from klaude_code.protocol import llm_param, model
 from klaude_code.trace import DebugType, log, log_debug
+def build_payload(
+    param: llm_param.LLMCallParameter,
+) -> tuple[CompletionCreateParamsStreaming, dict[str, object], dict[str, str]]:
+    """Build OpenRouter API request parameters."""
+    messages = convert_history_to_input(param.input, param.system, param.model)
+    tools = convert_tool_schema(param.tools)
+    extra_body: dict[str, object] = {
+        "usage": {"include": True},  # To get the cache tokens at the end of the response
+        "debug": {
+            "echo_upstream_body": True
+        },  # https://openrouter.ai/docs/api/reference/errors-and-debugging#debug-option-shape
+    }
+    extra_headers: dict[str, str] = {}
+    if param.thinking:
+        if param.thinking.budget_tokens is not None:
+            extra_body["reasoning"] = {
+                "max_tokens": param.thinking.budget_tokens,
+                "enable": True,
+            }  # OpenRouter: https://openrouter.ai/docs/use-cases/reasoning-tokens#anthropic-models-with-reasoning-tokens
+        elif param.thinking.reasoning_effort is not None:
+            extra_body["reasoning"] = {
+                "effort": param.thinking.reasoning_effort,
+            }
+    if param.provider_routing:
+        extra_body["provider"] = param.provider_routing.model_dump(exclude_none=True)
+    if is_claude_model(param.model):
+        extra_headers["x-anthropic-beta"] = "fine-grained-tool-streaming-2025-05-14,interleaved-thinking-2025-05-14"
+    payload: CompletionCreateParamsStreaming = {
+        "model": str(param.model),
+        "tool_choice": "auto",
+        "parallel_tool_calls": True,
+        "stream": True,
+        "messages": messages,
+        "temperature": param.temperature,
+        "max_tokens": param.max_tokens,
+        "tools": tools,
+        "verbosity": param.verbosity,
+    }
+    return payload, extra_body, extra_headers
 @register(llm_param.LLMClientProtocol.OPENROUTER)
 class OpenRouterClient(LLMClientABC):
     def __init__(self, config: llm_param.LLMConfigParameter):
@@ -33,54 +82,30 @@ class OpenRouterClient(LLMClientABC):
         return cls(config)
     @override
-    async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
+    async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
         param = apply_config_defaults(param, self.get_llm_config())
-        messages = convert_history_to_input(param.input, param.system, param.model)
-        tools = convert_tool_schema(param.tools)
         metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
-        extra_body: dict[str, object] = {
-            "usage": {"include": True}  # To get the cache tokens at the end of the response
-        }
-        extra_headers = {}
-        if param.thinking:
-            if param.thinking.budget_tokens is not None:
-                extra_body["reasoning"] = {
-                    "max_tokens": param.thinking.budget_tokens,
-                    "enable": True,
-                }  # OpenRouter: https://openrouter.ai/docs/use-cases/reasoning-tokens#anthropic-models-with-reasoning-tokens
-            elif param.thinking.reasoning_effort is not None:
-                extra_body["reasoning"] = {
-                    "effort": param.thinking.reasoning_effort,
-                }
-        if param.provider_routing:
-            extra_body["provider"] = param.provider_routing.model_dump(exclude_none=True)
-        if is_claude_model(param.model):
-            extra_headers["anthropic-beta"] = (
-                "interleaved-thinking-2025-05-14"  # Not working yet, maybe OpenRouter's issue, or Anthropic: Interleaved thinking is only supported for tools used via the Messages API.
-            )
-        stream = call_with_logged_payload(
-            self.client.chat.completions.create,
-            model=str(param.model),
-            tool_choice="auto",
-            parallel_tool_calls=True,
-            stream=True,
-            messages=messages,
-            temperature=param.temperature,
-            max_tokens=param.max_tokens,
-            tools=tools,
-            verbosity=param.verbosity,
+        payload, extra_body, extra_headers = build_payload(param)
+        log_debug(
+            json.dumps({**payload, **extra_body}, ensure_ascii=False, default=str),
+            style="yellow",
+            debug_type=DebugType.LLM_PAYLOAD,
+        )
+        stream = self.client.chat.completions.create(
+            **payload,
             extra_body=extra_body,
-            extra_headers=extra_headers,  # pyright: ignore[reportUnknownArgumentType]
+            extra_headers=extra_headers,
         )
         reasoning_handler = ReasoningStreamHandler(
             param_model=str(param.model),
             response_id=None,
         )
         state = StreamStateManager(
             param_model=str(param.model),
             reasoning_flusher=reasoning_handler.flush,
@@ -93,31 +118,40 @@ class OpenRouterClient(LLMClientABC):
                     style="blue",
                     debug_type=DebugType.LLM_STREAM,
                 )
                 if not state.response_id and event.id:
                     state.set_response_id(event.id)
                     reasoning_handler.set_response_id(event.id)
                     yield model.StartItem(response_id=event.id)
-                if (
-                    event.usage is not None and event.usage.completion_tokens is not None  # pyright: ignore[reportUnnecessaryComparison]
-                ):  # gcp gemini will return None usage field
+                if event.usage is not None:
                     metadata_tracker.set_usage(convert_usage(event.usage, param.context_limit, param.max_tokens))
                 if event.model:
                     metadata_tracker.set_model_name(event.model)
                 if provider := getattr(event, "provider", None):
                     metadata_tracker.set_provider(str(provider))
                 if len(event.choices) == 0:
                     continue
                 delta = event.choices[0].delta
                 # Reasoning
-                if hasattr(delta, "reasoning_details") and getattr(delta, "reasoning_details"):
-                    reasoning_details = getattr(delta, "reasoning_details")
+                if reasoning_details := getattr(delta, "reasoning_details", None):
                     for item in reasoning_details:
                         try:
                             reasoning_detail = ReasoningDetail.model_validate(item)
                             metadata_tracker.record_token()
                             state.stage = "reasoning"
+                            # Yield delta immediately for streaming
+                            if reasoning_detail.text:
+                                yield model.ReasoningTextDelta(
+                                    content=reasoning_detail.text,
+                                    response_id=state.response_id,
+                                )
+                            if reasoning_detail.summary:
+                                yield model.ReasoningTextDelta(
+                                    content=reasoning_detail.summary,
+                                    response_id=state.response_id,
+                                )
+                            # Keep existing handler logic for final items
                             for conversation_item in reasoning_handler.on_detail(reasoning_detail):
                                 yield conversation_item
                         except Exception as e:
@@ -160,7 +194,7 @@ class OpenRouterClient(LLMClientABC):
                     state.accumulated_tool_calls.add(delta.tool_calls)
         except (openai.OpenAIError, httpx.HTTPError) as e:
-            yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
+            yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
         # Finalize
         for item in state.flush_all():

klaude_code/llm/openrouter/reasoning_handler.py CHANGED Viewed

@@ -1,5 +1,3 @@
-from enum import Enum
 from pydantic import BaseModel
 from klaude_code.protocol import model
@@ -18,14 +16,8 @@ class ReasoningDetail(BaseModel):
     signature: str | None = None  # Claude's signature
-class ReasoningMode(str, Enum):
-    COMPLETE_CHUNK = "complete_chunk"
-    GPT5_SECTIONS = "gpt5_sections"
-    ACCUMULATE = "accumulate"
 class ReasoningStreamHandler:
-    """Encapsulates reasoning stream handling across different model behaviors."""
+    """Accumulates reasoning text and flushes on encrypted content or finalize."""
     def __init__(
         self,
@@ -37,59 +29,48 @@ class ReasoningStreamHandler:
         self._reasoning_id: str | None = None
         self._accumulated_reasoning: list[str] = []
-        self._gpt5_line_buffer: str = ""
-        self._gpt5_section_lines: list[str] = []
     def set_response_id(self, response_id: str | None) -> None:
         """Update the response identifier used for emitted items."""
         self._response_id = response_id
     def on_detail(self, detail: ReasoningDetail) -> list[model.ConversationItem]:
         """Process a single reasoning detail and return streamable items."""
         items: list[model.ConversationItem] = []
         if detail.type == "reasoning.encrypted":
             self._reasoning_id = detail.id
+            # Flush accumulated text before encrypted content
+            items.extend(self._flush_text())
             if encrypted_item := self._build_encrypted_item(detail.data, detail):
                 items.append(encrypted_item)
             return items
         if detail.type in ("reasoning.text", "reasoning.summary"):
             self._reasoning_id = detail.id
-            if encrypted_item := self._build_encrypted_item(detail.signature, detail):
-                items.append(encrypted_item)
+            # Accumulate text
             text = detail.text if detail.type == "reasoning.text" else detail.summary
             if text:
-                items.extend(self._handle_text(text))
+                self._accumulated_reasoning.append(text)
+            # Flush on signature (encrypted content)
+            if detail.signature:
+                items.extend(self._flush_text())
+                if encrypted_item := self._build_encrypted_item(detail.signature, detail):
+                    items.append(encrypted_item)
         return items
     def flush(self) -> list[model.ConversationItem]:
-        """Flush buffered reasoning text and encrypted payloads."""
+        """Flush buffered reasoning text on finalize."""
+        return self._flush_text()
-        items: list[model.ConversationItem] = []
-        mode = self._resolve_mode()
-        if mode is ReasoningMode.GPT5_SECTIONS:
-            for section in self._drain_gpt5_sections():
-                items.append(self._build_text_item(section))
-        elif self._accumulated_reasoning and mode is ReasoningMode.ACCUMULATE:
-            items.append(self._build_text_item("".join(self._accumulated_reasoning)))
-            self._accumulated_reasoning = []
-        return items
-    def _handle_text(self, text: str) -> list[model.ReasoningTextItem]:
-        mode = self._resolve_mode()
-        if mode is ReasoningMode.COMPLETE_CHUNK:
-            return [self._build_text_item(text)]
-        if mode is ReasoningMode.GPT5_SECTIONS:
-            sections = self._process_gpt5_text(text)
-            return [self._build_text_item(section) for section in sections]
-        self._accumulated_reasoning.append(text)
-        return []
+    def _flush_text(self) -> list[model.ConversationItem]:
+        """Flush accumulated reasoning text as a single item."""
+        if not self._accumulated_reasoning:
+            return []
+        item = self._build_text_item("".join(self._accumulated_reasoning))
+        self._accumulated_reasoning = []
+        return [item]
     def _build_text_item(self, content: str) -> model.ReasoningTextItem:
         return model.ReasoningTextItem(
@@ -113,97 +94,3 @@ class ReasoningStreamHandler:
             response_id=self._response_id,
             model=self._param_model,
         )
-    def _process_gpt5_text(self, text: str) -> list[str]:
-        emitted_sections: list[str] = []
-        self._gpt5_line_buffer += text
-        while True:
-            newline_index = self._gpt5_line_buffer.find("\n")
-            if newline_index == -1:
-                break
-            line = self._gpt5_line_buffer[:newline_index]
-            self._gpt5_line_buffer = self._gpt5_line_buffer[newline_index + 1 :]
-            remainder = line
-            while True:
-                split_result = self._split_gpt5_title_line(remainder)
-                if split_result is None:
-                    break
-                prefix_segment, title_segment, remainder = split_result
-                if prefix_segment:
-                    if not self._gpt5_section_lines:
-                        self._gpt5_section_lines = []
-                    self._gpt5_section_lines.append(f"{prefix_segment}\n")
-                if self._gpt5_section_lines:
-                    emitted_sections.append("".join(self._gpt5_section_lines))
-                self._gpt5_section_lines = [f"{title_segment}  \n"]  # Add two spaces for markdown line break
-            if remainder:
-                if not self._gpt5_section_lines:
-                    self._gpt5_section_lines = []
-                self._gpt5_section_lines.append(f"{remainder}\n")
-        return emitted_sections
-    def _drain_gpt5_sections(self) -> list[str]:
-        sections: list[str] = []
-        if self._gpt5_line_buffer:
-            if not self._gpt5_section_lines:
-                self._gpt5_section_lines = [self._gpt5_line_buffer]
-            else:
-                self._gpt5_section_lines.append(self._gpt5_line_buffer)
-            self._gpt5_line_buffer = ""
-        if self._gpt5_section_lines:
-            sections.append("".join(self._gpt5_section_lines))
-            self._gpt5_section_lines = []
-        return sections
-    def _is_gpt5(self) -> bool:
-        return "gpt-5" in self._param_model.lower()
-    def _is_complete_chunk_reasoning_model(self) -> bool:
-        """Whether the current model emits reasoning in complete chunks (e.g. Gemini)."""
-        return self._param_model.startswith("google/gemini")
-    def _resolve_mode(self) -> ReasoningMode:
-        if self._is_complete_chunk_reasoning_model():
-            return ReasoningMode.COMPLETE_CHUNK
-        if self._is_gpt5():
-            return ReasoningMode.GPT5_SECTIONS
-        return ReasoningMode.ACCUMULATE
-    def _is_gpt5_title_line(self, line: str) -> bool:
-        stripped = line.strip()
-        if not stripped:
-            return False
-        return stripped.startswith("**") and stripped.endswith("**") and stripped.count("**") >= 2
-    def _split_gpt5_title_line(self, line: str) -> tuple[str | None, str, str] | None:
-        if not line:
-            return None
-        search_start = 0
-        while True:
-            opening_index = line.find("**", search_start)
-            if opening_index == -1:
-                return None
-            closing_index = line.find("**", opening_index + 2)
-            if closing_index == -1:
-                return None
-            title_candidate = line[opening_index : closing_index + 2]
-            stripped_title = title_candidate.strip()
-            if self._is_gpt5_title_line(stripped_title):
-                # Treat as a GPT-5 title only when everything after the
-                # bold segment is either whitespace or starts a new bold
-                # title. This prevents inline bold like `**xxx**yyyy`
-                # from being misclassified as a section title while
-                # preserving support for consecutive titles in one line.
-                after = line[closing_index + 2 :]
-                if after.strip() and not after.lstrip().startswith("**"):
-                    search_start = closing_index + 2
-                    continue
-                prefix_segment = line[:opening_index]
-                remainder_segment = after
-                return (
-                    prefix_segment if prefix_segment else None,
-                    stripped_title,
-                    remainder_segment,
-                )
-            search_start = closing_index + 2

klaude_code/llm/registry.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from typing import TYPE_CHECKING, Callable, TypeVar
+from collections.abc import Callable
+from typing import TYPE_CHECKING, TypeVar
 from klaude_code.protocol import llm_param
@@ -20,13 +21,13 @@ def _load_protocol(protocol: llm_param.LLMClientProtocol) -> None:
     # Import only the needed module to trigger @register decorator
     if protocol == llm_param.LLMClientProtocol.ANTHROPIC:
-        from . import anthropic as _  # noqa: F401
+        from . import anthropic as _
     elif protocol == llm_param.LLMClientProtocol.CODEX:
-        from . import codex as _  # noqa: F401
+        from . import codex as _
     elif protocol == llm_param.LLMClientProtocol.OPENAI:
-        from . import openai_compatible as _  # noqa: F401
+        from . import openai_compatible as _
     elif protocol == llm_param.LLMClientProtocol.OPENROUTER:
-        from . import openrouter as _  # noqa: F401
+        from . import openrouter as _
     elif protocol == llm_param.LLMClientProtocol.RESPONSES:
         from . import responses as _  # noqa: F401

klaude_code/llm/responses/client.py CHANGED Viewed

@@ -6,12 +6,13 @@ import httpx
 import openai
 from openai import AsyncAzureOpenAI, AsyncOpenAI
 from openai.types import responses
+from openai.types.responses.response_create_params import ResponseCreateParamsStreaming
-from klaude_code.llm.client import LLMClientABC, call_with_logged_payload
+from klaude_code.llm.client import LLMClientABC
 from klaude_code.llm.input_common import apply_config_defaults
 from klaude_code.llm.registry import register
 from klaude_code.llm.responses.input import convert_history_to_input, convert_tool_schema
-from klaude_code.llm.usage import MetadataTracker, convert_responses_usage
+from klaude_code.llm.usage import MetadataTracker
 from klaude_code.protocol import llm_param, model
 from klaude_code.trace import DebugType, log_debug
@@ -20,11 +21,45 @@ if TYPE_CHECKING:
     from openai.types.responses import ResponseStreamEvent
+def build_payload(param: llm_param.LLMCallParameter) -> ResponseCreateParamsStreaming:
+    """Build OpenAI Responses API request parameters."""
+    inputs = convert_history_to_input(param.input, param.model)
+    tools = convert_tool_schema(param.tools)
+    payload: ResponseCreateParamsStreaming = {
+        "model": str(param.model),
+        "tool_choice": "auto",
+        "parallel_tool_calls": True,
+        "include": [
+            "reasoning.encrypted_content",
+        ],
+        "store": False,
+        "stream": True,
+        "temperature": param.temperature,
+        "max_output_tokens": param.max_tokens,
+        "input": inputs,
+        "instructions": param.system,
+        "tools": tools,
+        "prompt_cache_key": param.session_id or "",
+    }
+    if param.thinking and param.thinking.reasoning_effort:
+        payload["reasoning"] = {
+            "effort": param.thinking.reasoning_effort,
+            "summary": param.thinking.reasoning_summary,
+        }
+    if param.verbosity:
+        payload["text"] = {"verbosity": param.verbosity}
+    return payload
 async def parse_responses_stream(
     stream: "AsyncStream[ResponseStreamEvent]",
     param: llm_param.LLMCallParameter,
     metadata_tracker: MetadataTracker,
-) -> AsyncGenerator[model.ConversationItem, None]:
+) -> AsyncGenerator[model.ConversationItem]:
     """Parse OpenAI Responses API stream events into ConversationItems."""
     response_id: str | None = None
@@ -40,6 +75,12 @@ async def parse_responses_stream(
                 case responses.ResponseCreatedEvent() as event:
                     response_id = event.response.id
                     yield model.StartItem(response_id=response_id)
+                case responses.ResponseReasoningSummaryTextDeltaEvent() as event:
+                    if event.delta:
+                        yield model.ReasoningTextDelta(
+                            content=event.delta,
+                            response_id=response_id,
+                        )
                 case responses.ResponseReasoningSummaryTextDoneEvent() as event:
                     if event.text:
                         yield model.ReasoningTextItem(
@@ -95,16 +136,17 @@ async def parse_responses_stream(
                     if event.response.incomplete_details is not None:
                         error_reason = event.response.incomplete_details.reason
                     if event.response.usage is not None:
-                        usage = convert_responses_usage(
-                            input_tokens=event.response.usage.input_tokens,
-                            output_tokens=event.response.usage.output_tokens,
-                            cached_tokens=event.response.usage.input_tokens_details.cached_tokens,
-                            reasoning_tokens=event.response.usage.output_tokens_details.reasoning_tokens,
-                            total_tokens=event.response.usage.total_tokens,
-                            context_limit=param.context_limit,
-                            max_tokens=param.max_tokens,
+                        metadata_tracker.set_usage(
+                            model.Usage(
+                                input_tokens=event.response.usage.input_tokens,
+                                output_tokens=event.response.usage.output_tokens,
+                                cached_tokens=event.response.usage.input_tokens_details.cached_tokens,
+                                reasoning_tokens=event.response.usage.output_tokens_details.reasoning_tokens,
+                                context_size=event.response.usage.total_tokens,
+                                context_limit=param.context_limit,
+                                max_tokens=param.max_tokens,
+                            )
                         )
-                        metadata_tracker.set_usage(usage)
                     metadata_tracker.set_model_name(str(param.model))
                     metadata_tracker.set_response_id(response_id)
                     yield metadata_tracker.finalize()
@@ -127,7 +169,7 @@ async def parse_responses_stream(
                         debug_type=DebugType.LLM_STREAM,
                     )
     except (openai.OpenAIError, httpx.HTTPError) as e:
-        yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
+        yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
 @register(llm_param.LLMClientProtocol.RESPONSES)
@@ -157,45 +199,25 @@ class ResponsesClient(LLMClientABC):
         return cls(config)
     @override
-    async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
+    async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
         param = apply_config_defaults(param, self.get_llm_config())
         metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
-        inputs = convert_history_to_input(param.input, param.model)
-        tools = convert_tool_schema(param.tools)
+        payload = build_payload(param)
+        log_debug(
+            json.dumps(payload, ensure_ascii=False, default=str),
+            style="yellow",
+            debug_type=DebugType.LLM_PAYLOAD,
+        )
         try:
-            stream = await call_with_logged_payload(
-                self.client.responses.create,
-                model=str(param.model),
-                tool_choice="auto",
-                parallel_tool_calls=True,
-                include=[
-                    "reasoning.encrypted_content",
-                ],
-                store=param.store,
-                previous_response_id=param.previous_response_id,
-                stream=True,
-                temperature=param.temperature,
-                max_output_tokens=param.max_tokens,
-                input=inputs,
-                instructions=param.system,
-                tools=tools,
-                text={
-                    "verbosity": param.verbosity,
-                },
-                prompt_cache_key=param.session_id or "",
-                reasoning={
-                    "effort": param.thinking.reasoning_effort,
-                    "summary": param.thinking.reasoning_summary,
-                }
-                if param.thinking and param.thinking.reasoning_effort
-                else None,
+            stream = await self.client.responses.create(
+                **payload,
                 extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
             )
         except (openai.OpenAIError, httpx.HTTPError) as e:
-            yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
+            yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
             return
         async for item in parse_responses_stream(stream, param, metadata_tracker):

klaude_code/llm/usage.py CHANGED Viewed

@@ -108,55 +108,7 @@ def convert_usage(
         reasoning_tokens=(usage.completion_tokens_details.reasoning_tokens if usage.completion_tokens_details else 0)
         or 0,
         output_tokens=usage.completion_tokens,
-        context_token=usage.total_tokens,
-        context_limit=context_limit,
-        max_tokens=max_tokens,
-    )
-def convert_anthropic_usage(
-    input_tokens: int,
-    output_tokens: int,
-    cached_tokens: int,
-    context_limit: int | None = None,
-    max_tokens: int | None = None,
-) -> model.Usage:
-    """Convert Anthropic usage data to internal Usage model.
-    context_token is computed from input + cached + output tokens,
-    representing the actual context window usage for this turn.
-    """
-    context_token = input_tokens + cached_tokens + output_tokens
-    return model.Usage(
-        input_tokens=input_tokens,
-        output_tokens=output_tokens,
-        cached_tokens=cached_tokens,
-        context_token=context_token,
-        context_limit=context_limit,
-        max_tokens=max_tokens,
-    )
-def convert_responses_usage(
-    input_tokens: int,
-    output_tokens: int,
-    cached_tokens: int,
-    reasoning_tokens: int,
-    total_tokens: int,
-    context_limit: int | None = None,
-    max_tokens: int | None = None,
-) -> model.Usage:
-    """Convert OpenAI Responses API usage data to internal Usage model.
-    context_token is set to total_tokens from the API response,
-    representing the actual context window usage for this turn.
-    """
-    return model.Usage(
-        input_tokens=input_tokens,
-        output_tokens=output_tokens,
-        cached_tokens=cached_tokens,
-        reasoning_tokens=reasoning_tokens,
-        context_token=total_tokens,
+        context_size=usage.total_tokens,
         context_limit=context_limit,
         max_tokens=max_tokens,
     )

klaude_code/protocol/commands.py CHANGED Viewed

@@ -13,6 +13,7 @@ class CommandName(str, Enum):
     EXPORT = "export"
     STATUS = "status"
     RELEASE_NOTES = "release-notes"
+    THINKING = "thinking"
     # PLAN and DOC are dynamically registered now, but kept here if needed for reference
     # or we can remove them if no code explicitly imports them.
     # PLAN = "plan"

klaude_code/protocol/events.py CHANGED Viewed

@@ -54,6 +54,12 @@ class ThinkingEvent(BaseModel):
     content: str
+class ThinkingDeltaEvent(BaseModel):
+    session_id: str
+    response_id: str | None = None
+    content: str
 class AssistantMessageDeltaEvent(BaseModel):
     session_id: str
     response_id: str | None = None
@@ -153,6 +159,7 @@ Event = (
     TaskStartEvent
     | TaskFinishEvent
     | ThinkingEvent
+    | ThinkingDeltaEvent
     | AssistantMessageDeltaEvent
     | AssistantMessageEvent
     | ToolCallEvent

klaude-code 1.2.11__py3-none-any.whl → 1.2.13__py3-none-any.whl

klaude-code 1.2.11py3-none-any.whl → 1.2.13py3-none-any.whl