PyPI - klaude-code - Versions diffs - 1.2.10__py3-none-any.whl → 1.2.12__py3-none-any.whl - Mend

klaude-code 1.2.10py3-none-any.whl → 1.2.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

klaude_code/cli/main.py +2 -7
klaude_code/cli/runtime.py +23 -19
klaude_code/command/__init__.py +29 -26
klaude_code/command/clear_cmd.py +0 -2
klaude_code/command/diff_cmd.py +0 -2
klaude_code/command/export_cmd.py +0 -2
klaude_code/command/help_cmd.py +0 -2
klaude_code/command/model_cmd.py +0 -2
klaude_code/command/refresh_cmd.py +0 -2
klaude_code/command/registry.py +4 -8
klaude_code/command/release_notes_cmd.py +0 -2
klaude_code/command/status_cmd.py +2 -4
klaude_code/command/terminal_setup_cmd.py +0 -2
klaude_code/command/thinking_cmd.py +227 -0
klaude_code/config/select_model.py +5 -15
klaude_code/const/__init__.py +1 -1
klaude_code/core/agent.py +1 -1
klaude_code/core/executor.py +1 -4
klaude_code/core/manager/agent_manager.py +15 -9
klaude_code/core/manager/llm_clients_builder.py +4 -7
klaude_code/core/prompt.py +5 -5
klaude_code/core/prompts/prompt-claude-code.md +1 -12
klaude_code/core/prompts/prompt-minimal.md +12 -0
klaude_code/core/task.py +5 -2
klaude_code/core/tool/memory/memory_tool.md +4 -0
klaude_code/core/tool/memory/skill_loader.py +1 -1
klaude_code/core/tool/todo/todo_write_tool.md +0 -157
klaude_code/core/tool/todo/todo_write_tool_raw.md +182 -0
klaude_code/core/tool/tool_registry.py +3 -4
klaude_code/core/turn.py +0 -1
klaude_code/llm/anthropic/client.py +56 -47
klaude_code/llm/client.py +1 -19
klaude_code/llm/codex/client.py +49 -30
klaude_code/llm/openai_compatible/client.py +52 -34
klaude_code/llm/openrouter/client.py +63 -41
klaude_code/llm/responses/client.py +56 -39
klaude_code/llm/usage.py +1 -49
klaude_code/protocol/commands.py +1 -0
klaude_code/protocol/llm_param.py +1 -9
klaude_code/protocol/model.py +4 -3
klaude_code/protocol/op.py +5 -2
klaude_code/protocol/sub_agent.py +1 -0
klaude_code/session/export.py +3 -0
klaude_code/session/selector.py +12 -7
klaude_code/session/session.py +1 -5
klaude_code/session/templates/export_session.html +155 -0
klaude_code/ui/modes/repl/completers.py +3 -3
klaude_code/ui/modes/repl/event_handler.py +1 -5
klaude_code/ui/modes/repl/input_prompt_toolkit.py +3 -34
klaude_code/ui/renderers/metadata.py +11 -1
klaude_code/ui/renderers/tools.py +13 -2
klaude_code/ui/rich/markdown.py +4 -1
klaude_code/ui/terminal/__init__.py +55 -0
{klaude_code-1.2.10.dist-info → klaude_code-1.2.12.dist-info}/METADATA +1 -4
{klaude_code-1.2.10.dist-info → klaude_code-1.2.12.dist-info}/RECORD +57 -54
{klaude_code-1.2.10.dist-info → klaude_code-1.2.12.dist-info}/WHEEL +0 -0
{klaude_code-1.2.10.dist-info → klaude_code-1.2.12.dist-info}/entry_points.txt +0 -0

klaude_code/core/turn.py CHANGED Viewed

@@ -158,7 +158,6 @@ class TurnExecutor:
                 input=session_ctx.get_conversation_history(),
                 system=ctx.system_prompt,
                 tools=ctx.tools,
-                store=False,
                 session_id=session_ctx.session_id,
             )
         ):

klaude_code/llm/anthropic/client.py CHANGED Viewed

@@ -15,17 +15,48 @@ from anthropic.types.beta.beta_signature_delta import BetaSignatureDelta
 from anthropic.types.beta.beta_text_delta import BetaTextDelta
 from anthropic.types.beta.beta_thinking_delta import BetaThinkingDelta
 from anthropic.types.beta.beta_tool_use_block import BetaToolUseBlock
+from anthropic.types.beta.message_create_params import MessageCreateParamsStreaming
 from klaude_code import const
 from klaude_code.llm.anthropic.input import convert_history_to_input, convert_system_to_input, convert_tool_schema
-from klaude_code.llm.client import LLMClientABC, call_with_logged_payload
+from klaude_code.llm.client import LLMClientABC
 from klaude_code.llm.input_common import apply_config_defaults
 from klaude_code.llm.registry import register
-from klaude_code.llm.usage import MetadataTracker, convert_anthropic_usage
+from klaude_code.llm.usage import MetadataTracker
 from klaude_code.protocol import llm_param, model
 from klaude_code.trace import DebugType, log_debug
+def build_payload(param: llm_param.LLMCallParameter) -> MessageCreateParamsStreaming:
+    """Build Anthropic API request parameters."""
+    messages = convert_history_to_input(param.input, param.model)
+    tools = convert_tool_schema(param.tools)
+    system = convert_system_to_input(param.system)
+    payload: MessageCreateParamsStreaming = {
+        "model": str(param.model),
+        "tool_choice": {
+            "type": "auto",
+            "disable_parallel_tool_use": False,
+        },
+        "stream": True,
+        "max_tokens": param.max_tokens or const.DEFAULT_MAX_TOKENS,
+        "temperature": param.temperature or const.DEFAULT_TEMPERATURE,
+        "messages": messages,
+        "system": system,
+        "tools": tools,
+        "betas": ["interleaved-thinking-2025-05-14", "context-1m-2025-08-07"],
+    }
+    if param.thinking and param.thinking.type == "enabled":
+        payload["thinking"] = anthropic.types.ThinkingConfigEnabledParam(
+            type="enabled",
+            budget_tokens=param.thinking.budget_tokens or const.DEFAULT_ANTHROPIC_THINKING_BUDGET_TOKENS,
+        )
+    return payload
 @register(llm_param.LLMClientProtocol.ANTHROPIC)
 class AnthropicClient(LLMClientABC):
     def __init__(self, config: llm_param.LLMConfigParameter):
@@ -48,32 +79,16 @@ class AnthropicClient(LLMClientABC):
         metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
-        messages = convert_history_to_input(param.input, param.model)
-        tools = convert_tool_schema(param.tools)
-        system = convert_system_to_input(param.system)
-        stream = call_with_logged_payload(
-            self.client.beta.messages.create,
-            model=str(param.model),
-            tool_choice={
-                "type": "auto",
-                "disable_parallel_tool_use": False,
-            },
-            stream=True,
-            max_tokens=param.max_tokens or const.DEFAULT_MAX_TOKENS,
-            temperature=param.temperature or const.DEFAULT_TEMPERATURE,
-            messages=messages,
-            system=system,
-            tools=tools,
-            betas=["interleaved-thinking-2025-05-14", "context-1m-2025-08-07"],
-            thinking=anthropic.types.ThinkingConfigEnabledParam(
-                type=param.thinking.type,
-                budget_tokens=param.thinking.budget_tokens or const.DEFAULT_ANTHROPIC_THINKING_BUDGET_TOKENS,
-            )
-            if param.thinking and param.thinking.type == "enabled"
-            else anthropic.types.ThinkingConfigDisabledParam(
-                type="disabled",
-            ),
+        payload = build_payload(param)
+        log_debug(
+            json.dumps(payload, ensure_ascii=False, default=str),
+            style="yellow",
+            debug_type=DebugType.LLM_PAYLOAD,
+        )
+        stream = self.client.beta.messages.create(
+            **payload,
             extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
         )
@@ -85,9 +100,8 @@ class AnthropicClient(LLMClientABC):
         current_tool_call_id: str | None = None
         current_tool_inputs: list[str] | None = None
-        input_tokens = 0
-        cached_tokens = 0
-        output_tokens = 0
+        input_token = 0
+        cached_token = 0
         try:
             async for event in await stream:
@@ -100,11 +114,8 @@ class AnthropicClient(LLMClientABC):
                 match event:
                     case BetaRawMessageStartEvent() as event:
                         response_id = event.message.id
-                        cached_tokens = event.message.usage.cache_read_input_tokens or 0
-                        input_tokens = (event.message.usage.input_tokens or 0) + (
-                            event.message.usage.cache_creation_input_tokens or 0
-                        )
-                        output_tokens = event.message.usage.output_tokens or 0
+                        cached_token = event.message.usage.cache_read_input_tokens or 0
+                        input_token = event.message.usage.input_tokens
                         yield model.StartItem(response_id=response_id)
                     case BetaRawContentBlockDeltaEvent() as event:
                         match event.delta:
@@ -170,18 +181,16 @@ class AnthropicClient(LLMClientABC):
                             current_tool_call_id = None
                             current_tool_inputs = None
                     case BetaRawMessageDeltaEvent() as event:
-                        input_tokens += (event.usage.input_tokens or 0) + (event.usage.cache_creation_input_tokens or 0)
-                        output_tokens += event.usage.output_tokens or 0
-                        cached_tokens += event.usage.cache_read_input_tokens or 0
-                        usage = convert_anthropic_usage(
-                            input_tokens=input_tokens,
-                            output_tokens=output_tokens,
-                            cached_tokens=cached_tokens,
-                            context_limit=param.context_limit,
-                            max_tokens=param.max_tokens,
+                        metadata_tracker.set_usage(
+                            model.Usage(
+                                input_tokens=input_token + cached_token,
+                                output_tokens=event.usage.output_tokens,
+                                cached_tokens=cached_token,
+                                context_size=input_token + cached_token + event.usage.output_tokens,
+                                context_limit=param.context_limit,
+                                max_tokens=param.max_tokens,
+                            )
                         )
-                        metadata_tracker.set_usage(usage)
                         metadata_tracker.set_model_name(str(param.model))
                         metadata_tracker.set_response_id(response_id)
                         yield metadata_tracker.finalize()

klaude_code/llm/client.py CHANGED Viewed

@@ -1,10 +1,8 @@
-import json
 from abc import ABC, abstractmethod
 from collections.abc import AsyncGenerator
-from typing import Callable, ParamSpec, TypeVar, cast
+from typing import ParamSpec, TypeVar, cast
 from klaude_code.protocol import llm_param, model
-from klaude_code.trace import DebugType, log_debug
 class LLMClientABC(ABC):
@@ -31,19 +29,3 @@ class LLMClientABC(ABC):
 P = ParamSpec("P")
 R = TypeVar("R")
-def call_with_logged_payload(func: Callable[P, R], *args: P.args, **kwargs: P.kwargs) -> R:
-    """Call an SDK function while logging the JSON payload.
-    The function reuses the original callable's type signature via ParamSpec
-    so static type checkers can validate arguments at the call site.
-    """
-    payload = {k: v for k, v in kwargs.items() if v is not None}
-    log_debug(
-        json.dumps(payload, ensure_ascii=False, default=str, sort_keys=True),
-        style="yellow",
-        debug_type=DebugType.LLM_PAYLOAD,
-    )
-    return func(*args, **kwargs)

klaude_code/llm/codex/client.py CHANGED Viewed

@@ -1,22 +1,61 @@
 """Codex LLM client using ChatGPT subscription via OAuth."""
+import json
 from collections.abc import AsyncGenerator
 from typing import override
 import httpx
 import openai
 from openai import AsyncOpenAI
+from openai.types.responses.response_create_params import ResponseCreateParamsStreaming
 from klaude_code.auth.codex.exceptions import CodexNotLoggedInError
 from klaude_code.auth.codex.oauth import CodexOAuth
 from klaude_code.auth.codex.token_manager import CodexTokenManager
-from klaude_code.llm.client import LLMClientABC, call_with_logged_payload
+from klaude_code.llm.client import LLMClientABC
 from klaude_code.llm.input_common import apply_config_defaults
 from klaude_code.llm.registry import register
 from klaude_code.llm.responses.client import parse_responses_stream
 from klaude_code.llm.responses.input import convert_history_to_input, convert_tool_schema
 from klaude_code.llm.usage import MetadataTracker
 from klaude_code.protocol import llm_param, model
+from klaude_code.trace import DebugType, log_debug
+def build_payload(param: llm_param.LLMCallParameter) -> ResponseCreateParamsStreaming:
+    """Build Codex API request parameters."""
+    inputs = convert_history_to_input(param.input, param.model)
+    tools = convert_tool_schema(param.tools)
+    session_id = param.session_id or ""
+    payload: ResponseCreateParamsStreaming = {
+        "model": str(param.model),
+        "tool_choice": "auto",
+        "parallel_tool_calls": True,
+        "include": [
+            "reasoning.encrypted_content",
+        ],
+        "store": False,
+        "stream": True,
+        "input": inputs,
+        "instructions": param.system,
+        "tools": tools,
+        "prompt_cache_key": session_id,
+        # max_output_token and temperature is not supported in Codex API
+    }
+    if param.thinking and param.thinking.reasoning_effort:
+        payload["reasoning"] = {
+            "effort": param.thinking.reasoning_effort,
+            "summary": param.thinking.reasoning_summary,
+        }
+    if param.verbosity:
+        payload["text"] = {"verbosity": param.verbosity}
+    return payload
 # Codex API configuration
 CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
@@ -81,45 +120,25 @@ class CodexClient(LLMClientABC):
         param = apply_config_defaults(param, self.get_llm_config())
-        # Codex API requires store=False
-        param.store = False
         metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
-        inputs = convert_history_to_input(param.input, param.model)
-        tools = convert_tool_schema(param.tools)
+        payload = build_payload(param)
         session_id = param.session_id or ""
-        # Must send conversation_id/session_id headers to improve ChatGPT backend prompt cache hit rate.
         extra_headers: dict[str, str] = {}
         if session_id:
+            # Must send conversation_id/session_id headers to improve ChatGPT backend prompt cache hit rate.
             extra_headers["conversation_id"] = session_id
             extra_headers["session_id"] = session_id
+        log_debug(
+            json.dumps(payload, ensure_ascii=False, default=str),
+            style="yellow",
+            debug_type=DebugType.LLM_PAYLOAD,
+        )
         try:
-            stream = await call_with_logged_payload(
-                self.client.responses.create,
-                model=str(param.model),
-                tool_choice="auto",
-                parallel_tool_calls=True,
-                include=[
-                    "reasoning.encrypted_content",
-                ],
-                store=False,  # Always False for Codex
-                stream=True,
-                input=inputs,
-                instructions=param.system,
-                tools=tools,
-                text={
-                    "verbosity": param.verbosity,
-                },
-                prompt_cache_key=session_id,
-                reasoning={
-                    "effort": param.thinking.reasoning_effort,
-                    "summary": param.thinking.reasoning_summary,
-                }
-                if param.thinking and param.thinking.reasoning_effort
-                else None,
+            stream = await self.client.responses.create(
+                **payload,
                 extra_headers=extra_headers,
             )
         except (openai.OpenAIError, httpx.HTTPError) as e:

klaude_code/llm/openai_compatible/client.py CHANGED Viewed

@@ -4,8 +4,9 @@ from typing import override
 import httpx
 import openai
+from openai.types.chat.completion_create_params import CompletionCreateParamsStreaming
-from klaude_code.llm.client import LLMClientABC, call_with_logged_payload
+from klaude_code.llm.client import LLMClientABC
 from klaude_code.llm.input_common import apply_config_defaults
 from klaude_code.llm.openai_compatible.input import convert_history_to_input, convert_tool_schema
 from klaude_code.llm.openai_compatible.stream_processor import StreamStateManager
@@ -15,6 +16,35 @@ from klaude_code.protocol import llm_param, model
 from klaude_code.trace import DebugType, log_debug
+def build_payload(param: llm_param.LLMCallParameter) -> tuple[CompletionCreateParamsStreaming, dict[str, object]]:
+    """Build OpenAI API request parameters."""
+    messages = convert_history_to_input(param.input, param.system, param.model)
+    tools = convert_tool_schema(param.tools)
+    extra_body: dict[str, object] = {}
+    if param.thinking:
+        extra_body["thinking"] = {
+            "type": param.thinking.type,
+            "budget": param.thinking.budget_tokens,
+        }
+    payload: CompletionCreateParamsStreaming = {
+        "model": str(param.model),
+        "tool_choice": "auto",
+        "parallel_tool_calls": True,
+        "stream": True,
+        "messages": messages,
+        "temperature": param.temperature,
+        "max_tokens": param.max_tokens,
+        "tools": tools,
+        "reasoning_effort": param.thinking.reasoning_effort if param.thinking else None,
+        "verbosity": param.verbosity,
+    }
+    return payload, extra_body
 @register(llm_param.LLMClientProtocol.OPENAI)
 class OpenAICompatibleClient(LLMClientABC):
     def __init__(self, config: llm_param.LLMConfigParameter):
@@ -44,32 +74,21 @@ class OpenAICompatibleClient(LLMClientABC):
     @override
     async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
         param = apply_config_defaults(param, self.get_llm_config())
-        messages = convert_history_to_input(param.input, param.system, param.model)
-        tools = convert_tool_schema(param.tools)
         metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
-        extra_body = {}
-        extra_headers = {"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)}
-        if param.thinking:
-            extra_body["thinking"] = {
-                "type": param.thinking.type,
-                "budget": param.thinking.budget_tokens,
-            }
-        stream = call_with_logged_payload(
-            self.client.chat.completions.create,
-            model=str(param.model),
-            tool_choice="auto",
-            parallel_tool_calls=True,
-            stream=True,
-            messages=messages,
-            temperature=param.temperature,
-            max_tokens=param.max_tokens,
-            tools=tools,
-            reasoning_effort=param.thinking.reasoning_effort if param.thinking else None,
-            verbosity=param.verbosity,
-            extra_body=extra_body,  # pyright: ignore[reportUnknownArgumentType]
+        payload, extra_body = build_payload(param)
+        extra_headers: dict[str, str] = {"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)}
+        log_debug(
+            json.dumps({**payload, **extra_body}, ensure_ascii=False, default=str),
+            style="yellow",
+            debug_type=DebugType.LLM_PAYLOAD,
+        )
+        stream = self.client.chat.completions.create(
+            **payload,
+            extra_body=extra_body,
             extra_headers=extra_headers,
         )
@@ -85,9 +104,7 @@ class OpenAICompatibleClient(LLMClientABC):
                 if not state.response_id and event.id:
                     state.set_response_id(event.id)
                     yield model.StartItem(response_id=event.id)
-                if (
-                    event.usage is not None and event.usage.completion_tokens is not None  # pyright: ignore[reportUnnecessaryComparison] gcp gemini will return None usage field
-                ):
+                if event.usage is not None:
                     metadata_tracker.set_usage(convert_usage(event.usage, param.context_limit, param.max_tokens))
                 if event.model:
                     metadata_tracker.set_model_name(event.model)
@@ -96,9 +113,8 @@ class OpenAICompatibleClient(LLMClientABC):
                 if len(event.choices) == 0:
                     continue
-                delta = event.choices[0].delta
-                # Support Kimi K2's usage field in choice
+                # Support Moonshot Kimi K2's usage field in choice
                 if hasattr(event.choices[0], "usage") and getattr(event.choices[0], "usage"):
                     metadata_tracker.set_usage(
                         convert_usage(
@@ -108,12 +124,14 @@ class OpenAICompatibleClient(LLMClientABC):
                         )
                     )
+                delta = event.choices[0].delta
                 # Reasoning
-                reasoning_content = ""
-                if hasattr(delta, "reasoning") and getattr(delta, "reasoning"):
-                    reasoning_content = getattr(delta, "reasoning")
-                if hasattr(delta, "reasoning_content") and getattr(delta, "reasoning_content"):
-                    reasoning_content = getattr(delta, "reasoning_content")
+                reasoning_content = (
+                    getattr(delta, "reasoning_content", None)
+                    or getattr(delta, "reasoning", None)
+                    or ""
+                )
                 if reasoning_content:
                     metadata_tracker.record_token()
                     state.stage = "reasoning"

klaude_code/llm/openrouter/client.py CHANGED Viewed

@@ -1,10 +1,12 @@
+import json
 from collections.abc import AsyncGenerator
 from typing import override
 import httpx
 import openai
+from openai.types.chat.completion_create_params import CompletionCreateParamsStreaming
-from klaude_code.llm.client import LLMClientABC, call_with_logged_payload
+from klaude_code.llm.client import LLMClientABC
 from klaude_code.llm.input_common import apply_config_defaults
 from klaude_code.llm.openai_compatible.input import convert_tool_schema
 from klaude_code.llm.openai_compatible.stream_processor import StreamStateManager
@@ -16,6 +18,52 @@ from klaude_code.protocol import llm_param, model
 from klaude_code.trace import DebugType, log, log_debug
+def build_payload(
+    param: llm_param.LLMCallParameter,
+) -> tuple[CompletionCreateParamsStreaming, dict[str, object], dict[str, str]]:
+    """Build OpenRouter API request parameters."""
+    messages = convert_history_to_input(param.input, param.system, param.model)
+    tools = convert_tool_schema(param.tools)
+    extra_body: dict[str, object] = {
+        "usage": {"include": True}  # To get the cache tokens at the end of the response
+    }
+    extra_headers: dict[str, str] = {}
+    if param.thinking:
+        if param.thinking.budget_tokens is not None:
+            extra_body["reasoning"] = {
+                "max_tokens": param.thinking.budget_tokens,
+                "enable": True,
+            }  # OpenRouter: https://openrouter.ai/docs/use-cases/reasoning-tokens#anthropic-models-with-reasoning-tokens
+        elif param.thinking.reasoning_effort is not None:
+            extra_body["reasoning"] = {
+                "effort": param.thinking.reasoning_effort,
+            }
+    if param.provider_routing:
+        extra_body["provider"] = param.provider_routing.model_dump(exclude_none=True)
+    if is_claude_model(param.model):
+        extra_headers["anthropic-beta"] = (
+            "interleaved-thinking-2025-05-14"  # Not working yet, maybe OpenRouter's issue, or Anthropic: Interleaved thinking is only supported for tools used via the Messages API.
+        )
+    payload: CompletionCreateParamsStreaming = {
+        "model": str(param.model),
+        "tool_choice": "auto",
+        "parallel_tool_calls": True,
+        "stream": True,
+        "messages": messages,
+        "temperature": param.temperature,
+        "max_tokens": param.max_tokens,
+        "tools": tools,
+        "verbosity": param.verbosity,
+    }
+    return payload, extra_body, extra_headers
 @register(llm_param.LLMClientProtocol.OPENROUTER)
 class OpenRouterClient(LLMClientABC):
     def __init__(self, config: llm_param.LLMConfigParameter):
@@ -35,52 +83,28 @@ class OpenRouterClient(LLMClientABC):
     @override
     async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
         param = apply_config_defaults(param, self.get_llm_config())
-        messages = convert_history_to_input(param.input, param.system, param.model)
-        tools = convert_tool_schema(param.tools)
         metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
-        extra_body: dict[str, object] = {
-            "usage": {"include": True}  # To get the cache tokens at the end of the response
-        }
-        extra_headers = {}
-        if param.thinking:
-            if param.thinking.budget_tokens is not None:
-                extra_body["reasoning"] = {
-                    "max_tokens": param.thinking.budget_tokens,
-                    "enable": True,
-                }  # OpenRouter: https://openrouter.ai/docs/use-cases/reasoning-tokens#anthropic-models-with-reasoning-tokens
-            elif param.thinking.reasoning_effort is not None:
-                extra_body["reasoning"] = {
-                    "effort": param.thinking.reasoning_effort,
-                }
-        if param.provider_routing:
-            extra_body["provider"] = param.provider_routing.model_dump(exclude_none=True)
-        if is_claude_model(param.model):
-            extra_headers["anthropic-beta"] = (
-                "interleaved-thinking-2025-05-14"  # Not working yet, maybe OpenRouter's issue, or Anthropic: Interleaved thinking is only supported for tools used via the Messages API.
-            )
-        stream = call_with_logged_payload(
-            self.client.chat.completions.create,
-            model=str(param.model),
-            tool_choice="auto",
-            parallel_tool_calls=True,
-            stream=True,
-            messages=messages,
-            temperature=param.temperature,
-            max_tokens=param.max_tokens,
-            tools=tools,
-            verbosity=param.verbosity,
+        payload, extra_body, extra_headers = build_payload(param)
+        log_debug(
+            json.dumps({**payload, **extra_body}, ensure_ascii=False, default=str),
+            style="yellow",
+            debug_type=DebugType.LLM_PAYLOAD,
+        )
+        stream = self.client.chat.completions.create(
+            **payload,
             extra_body=extra_body,
-            extra_headers=extra_headers,  # pyright: ignore[reportUnknownArgumentType]
+            extra_headers=extra_headers,
         )
         reasoning_handler = ReasoningStreamHandler(
             param_model=str(param.model),
             response_id=None,
         )
         state = StreamStateManager(
             param_model=str(param.model),
             reasoning_flusher=reasoning_handler.flush,
@@ -93,19 +117,17 @@ class OpenRouterClient(LLMClientABC):
                     style="blue",
                     debug_type=DebugType.LLM_STREAM,
                 )
                 if not state.response_id and event.id:
                     state.set_response_id(event.id)
                     reasoning_handler.set_response_id(event.id)
                     yield model.StartItem(response_id=event.id)
-                if (
-                    event.usage is not None and event.usage.completion_tokens is not None  # pyright: ignore[reportUnnecessaryComparison]
-                ):  # gcp gemini will return None usage field
+                if event.usage is not None:
                     metadata_tracker.set_usage(convert_usage(event.usage, param.context_limit, param.max_tokens))
                 if event.model:
                     metadata_tracker.set_model_name(event.model)
                 if provider := getattr(event, "provider", None):
                     metadata_tracker.set_provider(str(provider))
                 if len(event.choices) == 0:
                     continue
                 delta = event.choices[0].delta

klaude-code 1.2.10__py3-none-any.whl → 1.2.12__py3-none-any.whl

klaude-code 1.2.10py3-none-any.whl → 1.2.12py3-none-any.whl