PyPI - klaude-code - Versions diffs - 1.2.6__py3-none-any.whl → 1.8.0__py3-none-any.whl - Mend

klaude-code 1.2.6py3-none-any.whl → 1.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (205) hide show

klaude_code/auth/__init__.py +24 -0
klaude_code/auth/codex/__init__.py +20 -0
klaude_code/auth/codex/exceptions.py +17 -0
klaude_code/auth/codex/jwt_utils.py +45 -0
klaude_code/auth/codex/oauth.py +229 -0
klaude_code/auth/codex/token_manager.py +84 -0
klaude_code/cli/auth_cmd.py +73 -0
klaude_code/cli/config_cmd.py +91 -0
klaude_code/cli/cost_cmd.py +338 -0
klaude_code/cli/debug.py +78 -0
klaude_code/cli/list_model.py +307 -0
klaude_code/cli/main.py +233 -134
klaude_code/cli/runtime.py +309 -117
klaude_code/{version.py → cli/self_update.py} +114 -5
klaude_code/cli/session_cmd.py +37 -21
klaude_code/command/__init__.py +88 -27
klaude_code/command/clear_cmd.py +8 -7
klaude_code/command/command_abc.py +31 -31
klaude_code/command/debug_cmd.py +79 -0
klaude_code/command/export_cmd.py +19 -53
klaude_code/command/export_online_cmd.py +154 -0
klaude_code/command/fork_session_cmd.py +267 -0
klaude_code/command/help_cmd.py +7 -8
klaude_code/command/model_cmd.py +60 -10
klaude_code/command/model_select.py +84 -0
klaude_code/command/prompt-jj-describe.md +32 -0
klaude_code/command/prompt_command.py +19 -11
klaude_code/command/refresh_cmd.py +8 -10
klaude_code/command/registry.py +139 -40
klaude_code/command/release_notes_cmd.py +84 -0
klaude_code/command/resume_cmd.py +111 -0
klaude_code/command/status_cmd.py +104 -60
klaude_code/command/terminal_setup_cmd.py +7 -9
klaude_code/command/thinking_cmd.py +98 -0
klaude_code/config/__init__.py +14 -6
klaude_code/config/assets/__init__.py +1 -0
klaude_code/config/assets/builtin_config.yaml +303 -0
klaude_code/config/builtin_config.py +38 -0
klaude_code/config/config.py +378 -109
klaude_code/config/select_model.py +117 -53
klaude_code/config/thinking.py +269 -0
klaude_code/{const/__init__.py → const.py} +50 -19
klaude_code/core/agent.py +20 -28
klaude_code/core/executor.py +327 -112
klaude_code/core/manager/__init__.py +2 -4
klaude_code/core/manager/llm_clients.py +1 -15
klaude_code/core/manager/llm_clients_builder.py +10 -11
klaude_code/core/manager/sub_agent_manager.py +37 -6
klaude_code/core/prompt.py +63 -44
klaude_code/core/prompts/prompt-claude-code.md +2 -13
klaude_code/core/prompts/prompt-codex-gpt-5-1-codex-max.md +117 -0
klaude_code/core/prompts/prompt-codex-gpt-5-2-codex.md +117 -0
klaude_code/core/prompts/prompt-codex.md +9 -42
klaude_code/core/prompts/prompt-minimal.md +12 -0
klaude_code/core/prompts/{prompt-subagent-explore.md → prompt-sub-agent-explore.md} +16 -3
klaude_code/core/prompts/{prompt-subagent-oracle.md → prompt-sub-agent-oracle.md} +1 -2
klaude_code/core/prompts/prompt-sub-agent-web.md +51 -0
klaude_code/core/reminders.py +283 -95
klaude_code/core/task.py +113 -75
klaude_code/core/tool/__init__.py +24 -31
klaude_code/core/tool/file/_utils.py +36 -0
klaude_code/core/tool/file/apply_patch.py +17 -25
klaude_code/core/tool/file/apply_patch_tool.py +57 -77
klaude_code/core/tool/file/diff_builder.py +151 -0
klaude_code/core/tool/file/edit_tool.py +50 -63
klaude_code/core/tool/file/move_tool.md +41 -0
klaude_code/core/tool/file/move_tool.py +435 -0
klaude_code/core/tool/file/read_tool.md +1 -1
klaude_code/core/tool/file/read_tool.py +86 -86
klaude_code/core/tool/file/write_tool.py +59 -69
klaude_code/core/tool/report_back_tool.py +84 -0
klaude_code/core/tool/shell/bash_tool.py +265 -22
klaude_code/core/tool/shell/command_safety.py +3 -6
klaude_code/core/tool/{memory → skill}/skill_tool.py +16 -26
klaude_code/core/tool/sub_agent_tool.py +13 -2
klaude_code/core/tool/todo/todo_write_tool.md +0 -157
klaude_code/core/tool/todo/todo_write_tool.py +1 -1
klaude_code/core/tool/todo/todo_write_tool_raw.md +182 -0
klaude_code/core/tool/todo/update_plan_tool.py +1 -1
klaude_code/core/tool/tool_abc.py +18 -0
klaude_code/core/tool/tool_context.py +27 -12
klaude_code/core/tool/tool_registry.py +7 -7
klaude_code/core/tool/tool_runner.py +44 -36
klaude_code/core/tool/truncation.py +29 -14
klaude_code/core/tool/web/mermaid_tool.md +43 -0
klaude_code/core/tool/web/mermaid_tool.py +2 -5
klaude_code/core/tool/web/web_fetch_tool.md +1 -1
klaude_code/core/tool/web/web_fetch_tool.py +112 -22
klaude_code/core/tool/web/web_search_tool.md +23 -0
klaude_code/core/tool/web/web_search_tool.py +130 -0
klaude_code/core/turn.py +168 -66
klaude_code/llm/__init__.py +2 -10
klaude_code/llm/anthropic/client.py +190 -178
klaude_code/llm/anthropic/input.py +39 -15
klaude_code/llm/bedrock/__init__.py +3 -0
klaude_code/llm/bedrock/client.py +60 -0
klaude_code/llm/client.py +7 -21
klaude_code/llm/codex/__init__.py +5 -0
klaude_code/llm/codex/client.py +149 -0
klaude_code/llm/google/__init__.py +3 -0
klaude_code/llm/google/client.py +309 -0
klaude_code/llm/google/input.py +215 -0
klaude_code/llm/input_common.py +3 -9
klaude_code/llm/openai_compatible/client.py +72 -164
klaude_code/llm/openai_compatible/input.py +6 -4
klaude_code/llm/openai_compatible/stream.py +273 -0
klaude_code/llm/openai_compatible/tool_call_accumulator.py +17 -1
klaude_code/llm/openrouter/client.py +89 -160
klaude_code/llm/openrouter/input.py +18 -30
klaude_code/llm/openrouter/reasoning.py +118 -0
klaude_code/llm/registry.py +39 -7
klaude_code/llm/responses/client.py +184 -171
klaude_code/llm/responses/input.py +20 -1
klaude_code/llm/usage.py +17 -12
klaude_code/protocol/commands.py +17 -1
klaude_code/protocol/events.py +31 -4
klaude_code/protocol/llm_param.py +13 -10
klaude_code/protocol/model.py +232 -29
klaude_code/protocol/op.py +90 -1
klaude_code/protocol/op_handler.py +35 -1
klaude_code/protocol/sub_agent/__init__.py +117 -0
klaude_code/protocol/sub_agent/explore.py +63 -0
klaude_code/protocol/sub_agent/oracle.py +91 -0
klaude_code/protocol/sub_agent/task.py +61 -0
klaude_code/protocol/sub_agent/web.py +79 -0
klaude_code/protocol/tools.py +4 -2
klaude_code/session/__init__.py +2 -2
klaude_code/session/codec.py +71 -0
klaude_code/session/export.py +293 -86
klaude_code/session/selector.py +89 -67
klaude_code/session/session.py +320 -309
klaude_code/session/store.py +220 -0
klaude_code/session/templates/export_session.html +595 -83
klaude_code/session/templates/mermaid_viewer.html +926 -0
klaude_code/skill/__init__.py +27 -0
klaude_code/skill/assets/deslop/SKILL.md +17 -0
klaude_code/skill/assets/dev-docs/SKILL.md +108 -0
klaude_code/skill/assets/handoff/SKILL.md +39 -0
klaude_code/skill/assets/jj-workspace/SKILL.md +20 -0
klaude_code/skill/assets/skill-creator/SKILL.md +139 -0
klaude_code/{core/tool/memory/skill_loader.py → skill/loader.py} +55 -15
klaude_code/skill/manager.py +70 -0
klaude_code/skill/system_skills.py +192 -0
klaude_code/trace/__init__.py +20 -2
klaude_code/trace/log.py +150 -5
klaude_code/ui/__init__.py +4 -9
klaude_code/ui/core/input.py +1 -1
klaude_code/ui/core/stage_manager.py +7 -7
klaude_code/ui/modes/debug/display.py +2 -1
klaude_code/ui/modes/repl/__init__.py +3 -48
klaude_code/ui/modes/repl/clipboard.py +5 -5
klaude_code/ui/modes/repl/completers.py +487 -123
klaude_code/ui/modes/repl/display.py +5 -4
klaude_code/ui/modes/repl/event_handler.py +370 -117
klaude_code/ui/modes/repl/input_prompt_toolkit.py +552 -105
klaude_code/ui/modes/repl/key_bindings.py +146 -23
klaude_code/ui/modes/repl/renderer.py +189 -99
klaude_code/ui/renderers/assistant.py +9 -2
klaude_code/ui/renderers/bash_syntax.py +178 -0
klaude_code/ui/renderers/common.py +78 -0
klaude_code/ui/renderers/developer.py +104 -48
klaude_code/ui/renderers/diffs.py +87 -6
klaude_code/ui/renderers/errors.py +11 -6
klaude_code/ui/renderers/mermaid_viewer.py +57 -0
klaude_code/ui/renderers/metadata.py +112 -76
klaude_code/ui/renderers/sub_agent.py +92 -7
klaude_code/ui/renderers/thinking.py +40 -18
klaude_code/ui/renderers/tools.py +405 -227
klaude_code/ui/renderers/user_input.py +73 -13
klaude_code/ui/rich/__init__.py +10 -1
klaude_code/ui/rich/cjk_wrap.py +228 -0
klaude_code/ui/rich/code_panel.py +131 -0
klaude_code/ui/rich/live.py +17 -0
klaude_code/ui/rich/markdown.py +305 -170
klaude_code/ui/rich/searchable_text.py +10 -13
klaude_code/ui/rich/status.py +190 -49
klaude_code/ui/rich/theme.py +135 -39
klaude_code/ui/terminal/__init__.py +55 -0
klaude_code/ui/terminal/color.py +1 -1
klaude_code/ui/terminal/control.py +13 -22
klaude_code/ui/terminal/notifier.py +44 -4
klaude_code/ui/terminal/selector.py +658 -0
klaude_code/ui/utils/common.py +0 -18
klaude_code-1.8.0.dist-info/METADATA +377 -0
klaude_code-1.8.0.dist-info/RECORD +219 -0
{klaude_code-1.2.6.dist-info → klaude_code-1.8.0.dist-info}/entry_points.txt +1 -0
klaude_code/command/diff_cmd.py +0 -138
klaude_code/command/prompt-dev-docs-update.md +0 -56
klaude_code/command/prompt-dev-docs.md +0 -46
klaude_code/config/list_model.py +0 -162
klaude_code/core/manager/agent_manager.py +0 -127
klaude_code/core/prompts/prompt-subagent-webfetch.md +0 -46
klaude_code/core/tool/file/multi_edit_tool.md +0 -42
klaude_code/core/tool/file/multi_edit_tool.py +0 -199
klaude_code/core/tool/memory/memory_tool.md +0 -16
klaude_code/core/tool/memory/memory_tool.py +0 -462
klaude_code/llm/openrouter/reasoning_handler.py +0 -209
klaude_code/protocol/sub_agent.py +0 -348
klaude_code/ui/utils/debouncer.py +0 -42
klaude_code-1.2.6.dist-info/METADATA +0 -178
klaude_code-1.2.6.dist-info/RECORD +0 -167
/klaude_code/core/prompts/{prompt-subagent.md → prompt-sub-agent.md} +0 -0
/klaude_code/core/tool/{memory → skill}/__init__.py +0 -0
/klaude_code/core/tool/{memory → skill}/skill_tool.md +0 -0
{klaude_code-1.2.6.dist-info → klaude_code-1.8.0.dist-info}/WHEEL +0 -0

klaude_code/llm/anthropic/client.py CHANGED Viewed

@@ -1,11 +1,11 @@
 import json
-import time
+import os
 from collections.abc import AsyncGenerator
-from typing import override
+from typing import Any, override
 import anthropic
 import httpx
-from anthropic import RateLimitError
+from anthropic import APIError
 from anthropic.types.beta.beta_input_json_delta import BetaInputJSONDelta
 from anthropic.types.beta.beta_raw_content_block_delta_event import BetaRawContentBlockDeltaEvent
 from anthropic.types.beta.beta_raw_content_block_start_event import BetaRawContentBlockStartEvent
@@ -16,26 +16,190 @@ from anthropic.types.beta.beta_signature_delta import BetaSignatureDelta
 from anthropic.types.beta.beta_text_delta import BetaTextDelta
 from anthropic.types.beta.beta_thinking_delta import BetaThinkingDelta
 from anthropic.types.beta.beta_tool_use_block import BetaToolUseBlock
+from anthropic.types.beta.message_create_params import MessageCreateParamsStreaming
 from klaude_code import const
 from klaude_code.llm.anthropic.input import convert_history_to_input, convert_system_to_input, convert_tool_schema
-from klaude_code.llm.client import LLMClientABC, call_with_logged_payload
+from klaude_code.llm.client import LLMClientABC
 from klaude_code.llm.input_common import apply_config_defaults
 from klaude_code.llm.registry import register
-from klaude_code.llm.usage import calculate_cost
+from klaude_code.llm.usage import MetadataTracker
 from klaude_code.protocol import llm_param, model
 from klaude_code.trace import DebugType, log_debug
+def build_payload(param: llm_param.LLMCallParameter) -> MessageCreateParamsStreaming:
+    """Build Anthropic API request parameters."""
+    messages = convert_history_to_input(param.input, param.model)
+    tools = convert_tool_schema(param.tools)
+    system = convert_system_to_input(param.system)
+    payload: MessageCreateParamsStreaming = {
+        "model": str(param.model),
+        "tool_choice": {
+            "type": "auto",
+            "disable_parallel_tool_use": False,
+        },
+        "stream": True,
+        "max_tokens": param.max_tokens or const.DEFAULT_MAX_TOKENS,
+        "temperature": param.temperature or const.DEFAULT_TEMPERATURE,
+        "messages": messages,
+        "system": system,
+        "tools": tools,
+        "betas": ["interleaved-thinking-2025-05-14", "context-1m-2025-08-07"],
+    }
+    if param.thinking and param.thinking.type == "enabled":
+        payload["thinking"] = anthropic.types.ThinkingConfigEnabledParam(
+            type="enabled",
+            budget_tokens=param.thinking.budget_tokens or const.DEFAULT_ANTHROPIC_THINKING_BUDGET_TOKENS,
+        )
+    return payload
+async def parse_anthropic_stream(
+    stream: Any,
+    param: llm_param.LLMCallParameter,
+    metadata_tracker: MetadataTracker,
+) -> AsyncGenerator[model.ConversationItem]:
+    """Parse Anthropic beta messages stream and yield conversation items.
+    This function is shared between AnthropicClient and BedrockClient.
+    """
+    accumulated_thinking: list[str] = []
+    accumulated_content: list[str] = []
+    response_id: str | None = None
+    current_tool_name: str | None = None
+    current_tool_call_id: str | None = None
+    current_tool_inputs: list[str] | None = None
+    input_token = 0
+    cached_token = 0
+    async for event in await stream:
+        log_debug(
+            f"[{event.type}]",
+            event.model_dump_json(exclude_none=True),
+            style="blue",
+            debug_type=DebugType.LLM_STREAM,
+        )
+        match event:
+            case BetaRawMessageStartEvent() as event:
+                response_id = event.message.id
+                cached_token = event.message.usage.cache_read_input_tokens or 0
+                input_token = event.message.usage.input_tokens
+                yield model.StartItem(response_id=response_id)
+            case BetaRawContentBlockDeltaEvent() as event:
+                match event.delta:
+                    case BetaThinkingDelta() as delta:
+                        if delta.thinking:
+                            metadata_tracker.record_token()
+                        accumulated_thinking.append(delta.thinking)
+                        yield model.ReasoningTextDelta(
+                            content=delta.thinking,
+                            response_id=response_id,
+                        )
+                    case BetaSignatureDelta() as delta:
+                        yield model.ReasoningEncryptedItem(
+                            encrypted_content=delta.signature,
+                            response_id=response_id,
+                            model=str(param.model),
+                        )
+                    case BetaTextDelta() as delta:
+                        if delta.text:
+                            metadata_tracker.record_token()
+                        accumulated_content.append(delta.text)
+                        yield model.AssistantMessageDelta(
+                            content=delta.text,
+                            response_id=response_id,
+                        )
+                    case BetaInputJSONDelta() as delta:
+                        if current_tool_inputs is not None:
+                            if delta.partial_json:
+                                metadata_tracker.record_token()
+                            current_tool_inputs.append(delta.partial_json)
+                    case _:
+                        pass
+            case BetaRawContentBlockStartEvent() as event:
+                match event.content_block:
+                    case BetaToolUseBlock() as block:
+                        metadata_tracker.record_token()
+                        yield model.ToolCallStartItem(
+                            response_id=response_id,
+                            call_id=block.id,
+                            name=block.name,
+                        )
+                        current_tool_name = block.name
+                        current_tool_call_id = block.id
+                        current_tool_inputs = []
+                    case _:
+                        pass
+            case BetaRawContentBlockStopEvent():
+                if len(accumulated_thinking) > 0:
+                    metadata_tracker.record_token()
+                    full_thinking = "".join(accumulated_thinking)
+                    yield model.ReasoningTextItem(
+                        content=full_thinking,
+                        response_id=response_id,
+                        model=str(param.model),
+                    )
+                    accumulated_thinking.clear()
+                if len(accumulated_content) > 0:
+                    metadata_tracker.record_token()
+                    yield model.AssistantMessageItem(
+                        content="".join(accumulated_content),
+                        response_id=response_id,
+                    )
+                    accumulated_content.clear()
+                if current_tool_name and current_tool_call_id:
+                    metadata_tracker.record_token()
+                    yield model.ToolCallItem(
+                        name=current_tool_name,
+                        call_id=current_tool_call_id,
+                        arguments="".join(current_tool_inputs) if current_tool_inputs else "",
+                        response_id=response_id,
+                    )
+                    current_tool_name = None
+                    current_tool_call_id = None
+                    current_tool_inputs = None
+            case BetaRawMessageDeltaEvent() as event:
+                metadata_tracker.set_usage(
+                    model.Usage(
+                        input_tokens=input_token + cached_token,
+                        output_tokens=event.usage.output_tokens,
+                        cached_tokens=cached_token,
+                        context_size=input_token + cached_token + event.usage.output_tokens,
+                        context_limit=param.context_limit,
+                        max_tokens=param.max_tokens,
+                    )
+                )
+                metadata_tracker.set_model_name(str(param.model))
+                metadata_tracker.set_response_id(response_id)
+                yield metadata_tracker.finalize()
+            case _:
+                pass
 @register(llm_param.LLMClientProtocol.ANTHROPIC)
 class AnthropicClient(LLMClientABC):
     def __init__(self, config: llm_param.LLMConfigParameter):
         super().__init__(config)
-        client = anthropic.AsyncAnthropic(
-            api_key=config.api_key,
-            base_url=config.base_url,
-            timeout=httpx.Timeout(300.0, connect=15.0, read=285.0),
-        )
+        # Remove ANTHROPIC_AUTH_TOKEN env var to prevent anthropic SDK from adding
+        # Authorization: Bearer header that may conflict with third-party APIs
+        # (e.g., deepseek, moonshot) that use Authorization header for authentication.
+        # The API key will be sent via X-Api-Key header instead.
+        saved_auth_token = os.environ.pop("ANTHROPIC_AUTH_TOKEN", None)
+        try:
+            client = anthropic.AsyncAnthropic(
+                api_key=config.api_key,
+                base_url=config.base_url,
+                timeout=httpx.Timeout(300.0, connect=15.0, read=285.0),
+            )
+        finally:
+            if saved_auth_token is not None:
+                os.environ["ANTHROPIC_AUTH_TOKEN"] = saved_auth_token
         self.client: anthropic.AsyncAnthropic = client
     @classmethod
@@ -44,178 +208,26 @@ class AnthropicClient(LLMClientABC):
         return cls(config)
     @override
-    async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
+    async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
         param = apply_config_defaults(param, self.get_llm_config())
-        request_start_time = time.time()
-        first_token_time: float | None = None
-        last_token_time: float | None = None
-        messages = convert_history_to_input(param.input, param.model)
-        tools = convert_tool_schema(param.tools)
-        system = convert_system_to_input(param.system)
-        stream = call_with_logged_payload(
-            self.client.beta.messages.create,
-            model=str(param.model),
-            tool_choice={
-                "type": "auto",
-                "disable_parallel_tool_use": False,
-            },
-            stream=True,
-            max_tokens=param.max_tokens or const.DEFAULT_MAX_TOKENS,
-            temperature=param.temperature or const.DEFAULT_TEMPERATURE,
-            messages=messages,
-            system=system,
-            tools=tools,
-            betas=["interleaved-thinking-2025-05-14", "context-1m-2025-08-07"],
-            thinking=anthropic.types.ThinkingConfigEnabledParam(
-                type=param.thinking.type,
-                budget_tokens=param.thinking.budget_tokens or const.DEFAULT_ANTHROPIC_THINKING_BUDGET_TOKENS,
-            )
-            if param.thinking and param.thinking.type == "enabled"
-            else anthropic.types.ThinkingConfigDisabledParam(
-                type="disabled",
-            ),
-            extra_headers={"extra": json.dumps({"session_id": param.session_id})},
-        )
+        metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
-        accumulated_thinking: list[str] = []
-        accumulated_content: list[str] = []
-        response_id: str | None = None
+        payload = build_payload(param)
-        current_tool_name: str | None = None
-        current_tool_call_id: str | None = None
-        current_tool_inputs: list[str] | None = None
+        log_debug(
+            json.dumps(payload, ensure_ascii=False, default=str),
+            style="yellow",
+            debug_type=DebugType.LLM_PAYLOAD,
+        )
-        input_tokens = 0
-        cached_tokens = 0
-        output_tokens = 0
+        stream = self.client.beta.messages.create(
+            **payload,
+            extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
+        )
         try:
-            async for event in await stream:
-                log_debug(
-                    f"[{event.type}]",
-                    event.model_dump_json(exclude_none=True),
-                    style="blue",
-                    debug_type=DebugType.LLM_STREAM,
-                )
-                match event:
-                    case BetaRawMessageStartEvent() as event:
-                        response_id = event.message.id
-                        cached_tokens = event.message.usage.cache_read_input_tokens or 0
-                        input_tokens = (event.message.usage.input_tokens or 0) + (
-                            event.message.usage.cache_creation_input_tokens or 0
-                        )
-                        output_tokens = event.message.usage.output_tokens or 0
-                        yield model.StartItem(response_id=response_id)
-                    case BetaRawContentBlockDeltaEvent() as event:
-                        match event.delta:
-                            case BetaThinkingDelta() as delta:
-                                if first_token_time is None:
-                                    first_token_time = time.time()
-                                last_token_time = time.time()
-                                accumulated_thinking.append(delta.thinking)
-                            case BetaSignatureDelta() as delta:
-                                if first_token_time is None:
-                                    first_token_time = time.time()
-                                last_token_time = time.time()
-                                yield model.ReasoningEncryptedItem(
-                                    encrypted_content=delta.signature,
-                                    response_id=response_id,
-                                    model=str(param.model),
-                                )
-                            case BetaTextDelta() as delta:
-                                if first_token_time is None:
-                                    first_token_time = time.time()
-                                last_token_time = time.time()
-                                accumulated_content.append(delta.text)
-                                yield model.AssistantMessageDelta(
-                                    content=delta.text,
-                                    response_id=response_id,
-                                )
-                            case BetaInputJSONDelta() as delta:
-                                if first_token_time is None:
-                                    first_token_time = time.time()
-                                last_token_time = time.time()
-                                if current_tool_inputs is not None:
-                                    current_tool_inputs.append(delta.partial_json)
-                            case _:
-                                pass
-                    case BetaRawContentBlockStartEvent() as event:
-                        match event.content_block:
-                            case BetaToolUseBlock() as block:
-                                yield model.ToolCallStartItem(
-                                    response_id=response_id,
-                                    call_id=block.id,
-                                    name=block.name,
-                                )
-                                current_tool_name = block.name
-                                current_tool_call_id = block.id
-                                current_tool_inputs = []
-                            case _:
-                                pass
-                    case BetaRawContentBlockStopEvent() as event:
-                        if len(accumulated_thinking) > 0:
-                            full_thinking = "".join(accumulated_thinking)
-                            yield model.ReasoningTextItem(
-                                content=full_thinking,
-                                response_id=response_id,
-                                model=str(param.model),
-                            )
-                            accumulated_thinking.clear()
-                        if len(accumulated_content) > 0:
-                            yield model.AssistantMessageItem(
-                                content="".join(accumulated_content),
-                                response_id=response_id,
-                            )
-                            accumulated_content.clear()
-                        if current_tool_name and current_tool_call_id:
-                            yield model.ToolCallItem(
-                                name=current_tool_name,
-                                call_id=current_tool_call_id,
-                                arguments="".join(current_tool_inputs) if current_tool_inputs else "",
-                                response_id=response_id,
-                            )
-                            current_tool_name = None
-                            current_tool_call_id = None
-                            current_tool_inputs = None
-                    case BetaRawMessageDeltaEvent() as event:
-                        input_tokens += (event.usage.input_tokens or 0) + (event.usage.cache_creation_input_tokens or 0)
-                        output_tokens += event.usage.output_tokens or 0
-                        cached_tokens += event.usage.cache_read_input_tokens or 0
-                        total_tokens = input_tokens + cached_tokens + output_tokens
-                        context_usage_percent = (
-                            (total_tokens / param.context_limit) * 100 if param.context_limit else None
-                        )
-                        throughput_tps: float | None = None
-                        first_token_latency_ms: float | None = None
-                        if first_token_time is not None:
-                            first_token_latency_ms = (first_token_time - request_start_time) * 1000
-                        if first_token_time is not None and last_token_time is not None and output_tokens > 0:
-                            time_duration = last_token_time - first_token_time
-                            if time_duration >= 0.15:
-                                throughput_tps = output_tokens / time_duration
-                        usage = model.Usage(
-                            input_tokens=input_tokens,
-                            output_tokens=output_tokens,
-                            cached_tokens=cached_tokens,
-                            total_tokens=total_tokens,
-                            context_usage_percent=context_usage_percent,
-                            throughput_tps=throughput_tps,
-                            first_token_latency_ms=first_token_latency_ms,
-                        )
-                        calculate_cost(usage, self._config.cost)
-                        yield model.ResponseMetadataItem(
-                            usage=usage,
-                            response_id=response_id,
-                            model_name=str(param.model),
-                        )
-                    case _:
-                        pass
-        except RateLimitError as e:
-            yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
+            async for item in parse_anthropic_stream(stream, param, metadata_tracker):
+                yield item
+        except (APIError, httpx.HTTPError) as e:
+            yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")

klaude_code/llm/anthropic/input.py CHANGED Viewed

@@ -73,7 +73,8 @@ def _user_group_to_message(group: UserGroup) -> BetaMessageParam:
     return {"role": "user", "content": blocks}
-def _tool_group_to_message(group: ToolGroup) -> BetaMessageParam:
+def _tool_group_to_block(group: ToolGroup) -> dict[str, object]:
+    """Convert a single ToolGroup to a tool_result block."""
     tool_content: list[BetaTextBlockParam | BetaImageBlockParam] = []
     merged_text = merge_reminder_text(
         group.tool_result.output or "<system-reminder>Tool ran without output or errors</system-reminder>",
@@ -84,34 +85,41 @@ def _tool_group_to_message(group: ToolGroup) -> BetaMessageParam:
         tool_content.append(_image_part_to_block(image))
     for image in group.reminder_images:
         tool_content.append(_image_part_to_block(image))
+    return {
+        "type": "tool_result",
+        "tool_use_id": group.tool_result.call_id,
+        "is_error": group.tool_result.status == "error",
+        "content": tool_content,
+    }
+def _tool_groups_to_message(groups: list[ToolGroup]) -> BetaMessageParam:
+    """Convert one or more ToolGroups to a single user message with multiple tool_result blocks."""
     return {
         "role": "user",
-        "content": [
-            {
-                "type": "tool_result",
-                "tool_use_id": group.tool_result.call_id,
-                "is_error": group.tool_result.status == "error",
-                "content": tool_content,
-            }
-        ],
+        "content": [_tool_group_to_block(group) for group in groups],
     }
 def _assistant_group_to_message(group: AssistantGroup, model_name: str | None) -> BetaMessageParam:
     content: list[dict[str, object]] = []
     current_reasoning_content: str | None = None
+    degraded_thinking_texts: list[str] = []
     # Process reasoning items in original order so that text and
     # encrypted parts are paired correctly for the given model.
+    # For cross-model scenarios, degrade thinking to plain text.
     for item in group.reasoning_items:
         if isinstance(item, model.ReasoningTextItem):
             if model_name != item.model:
-                continue
-            current_reasoning_content = item.content
+                # Cross-model: collect thinking text for degradation
+                if item.content:
+                    degraded_thinking_texts.append(item.content)
+            else:
+                current_reasoning_content = item.content
         else:
-            if model_name != item.model:
-                continue
-            if item.encrypted_content and len(item.encrypted_content) > 0:
+            # Same model: preserve signature
+            if model_name == item.model and item.encrypted_content and len(item.encrypted_content) > 0:
                 content.append(
                     {
                         "type": "thinking",
@@ -127,6 +135,11 @@ def _assistant_group_to_message(group: AssistantGroup, model_name: str | None) -
     if len(current_reasoning_content or "") > 0:
         content.insert(0, {"type": "thinking", "thinking": current_reasoning_content})
+    # Cross-model: degrade thinking to plain text with <thinking> tags
+    if degraded_thinking_texts:
+        degraded_text = "<thinking>\n" + "\n".join(degraded_thinking_texts) + "\n</thinking>"
+        content.insert(0, {"type": "text", "text": degraded_text})
     if group.text_content:
         content.append({"type": "text", "text": group.text_content})
@@ -165,15 +178,26 @@ def convert_history_to_input(
         model_name: Model name. Used to verify that signatures are valid for the same model
     """
     messages: list[BetaMessageParam] = []
+    pending_tool_groups: list[ToolGroup] = []
+    def flush_tool_groups() -> None:
+        nonlocal pending_tool_groups
+        if pending_tool_groups:
+            messages.append(_tool_groups_to_message(pending_tool_groups))
+            pending_tool_groups = []
     for group in parse_message_groups(history):
         match group:
             case UserGroup():
+                flush_tool_groups()
                 messages.append(_user_group_to_message(group))
             case ToolGroup():
-                messages.append(_tool_group_to_message(group))
+                pending_tool_groups.append(group)
             case AssistantGroup():
+                flush_tool_groups()
                 messages.append(_assistant_group_to_message(group, model_name))
+    flush_tool_groups()
     _add_cache_control(messages)
     return messages

klaude_code/llm/bedrock/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from klaude_code.llm.bedrock.client import BedrockClient
+__all__ = ["BedrockClient"]

klaude_code/llm/bedrock/client.py ADDED Viewed

@@ -0,0 +1,60 @@
+"""AWS Bedrock LLM client using Anthropic SDK."""
+import json
+from collections.abc import AsyncGenerator
+from typing import override
+import anthropic
+import httpx
+from anthropic import APIError
+from klaude_code.llm.anthropic.client import build_payload, parse_anthropic_stream
+from klaude_code.llm.client import LLMClientABC
+from klaude_code.llm.input_common import apply_config_defaults
+from klaude_code.llm.registry import register
+from klaude_code.llm.usage import MetadataTracker
+from klaude_code.protocol import llm_param, model
+from klaude_code.trace import DebugType, log_debug
+@register(llm_param.LLMClientProtocol.BEDROCK)
+class BedrockClient(LLMClientABC):
+    """LLM client for AWS Bedrock using Anthropic SDK."""
+    def __init__(self, config: llm_param.LLMConfigParameter):
+        super().__init__(config)
+        self.client = anthropic.AsyncAnthropicBedrock(
+            aws_access_key=config.aws_access_key,
+            aws_secret_key=config.aws_secret_key,
+            aws_region=config.aws_region,
+            aws_session_token=config.aws_session_token,
+            aws_profile=config.aws_profile,
+            timeout=httpx.Timeout(300.0, connect=15.0, read=285.0),
+        )
+    @classmethod
+    @override
+    def create(cls, config: llm_param.LLMConfigParameter) -> "LLMClientABC":
+        return cls(config)
+    @override
+    async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
+        param = apply_config_defaults(param, self.get_llm_config())
+        metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
+        payload = build_payload(param)
+        log_debug(
+            json.dumps(payload, ensure_ascii=False, default=str),
+            style="yellow",
+            debug_type=DebugType.LLM_PAYLOAD,
+        )
+        stream = self.client.beta.messages.create(**payload)
+        try:
+            async for item in parse_anthropic_stream(stream, param, metadata_tracker):
+                yield item
+        except (APIError, httpx.HTTPError) as e:
+            yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")

klaude_code/llm/client.py CHANGED Viewed

@@ -1,10 +1,8 @@
-import json
 from abc import ABC, abstractmethod
 from collections.abc import AsyncGenerator
-from typing import Callable, ParamSpec, TypeVar, cast
+from typing import ParamSpec, TypeVar, cast
 from klaude_code.protocol import llm_param, model
-from klaude_code.trace import DebugType, log_debug
 class LLMClientABC(ABC):
@@ -17,9 +15,9 @@ class LLMClientABC(ABC):
         pass
     @abstractmethod
-    async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem, None]:
+    async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
         raise NotImplementedError
-        yield cast(model.ConversationItem, None)  # pyright: ignore[reportUnreachable]
+        yield cast(model.ConversationItem, None)
     def get_llm_config(self) -> llm_param.LLMConfigParameter:
         return self._config
@@ -28,22 +26,10 @@ class LLMClientABC(ABC):
     def model_name(self) -> str:
         return self._config.model or ""
+    @property
+    def protocol(self) -> llm_param.LLMClientProtocol:
+        return self._config.protocol
 P = ParamSpec("P")
 R = TypeVar("R")
-def call_with_logged_payload(func: Callable[P, R], *args: P.args, **kwargs: P.kwargs) -> R:
-    """Call an SDK function while logging the JSON payload.
-    The function reuses the original callable's type signature via ParamSpec
-    so static type checkers can validate arguments at the call site.
-    """
-    payload = {k: v for k, v in kwargs.items() if v is not None}
-    log_debug(
-        json.dumps(payload, ensure_ascii=False, default=str),
-        style="yellow",
-        debug_type=DebugType.LLM_PAYLOAD,
-    )
-    return func(*args, **kwargs)

klaude_code/llm/codex/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Codex LLM client using ChatGPT subscription."""
+from klaude_code.llm.codex.client import CodexClient
+__all__ = ["CodexClient"]

klaude-code 1.2.6__py3-none-any.whl → 1.8.0__py3-none-any.whl

klaude-code 1.2.6py3-none-any.whl → 1.8.0py3-none-any.whl