klaude-code 1.2.6__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- klaude_code/auth/__init__.py +24 -0
- klaude_code/auth/codex/__init__.py +20 -0
- klaude_code/auth/codex/exceptions.py +17 -0
- klaude_code/auth/codex/jwt_utils.py +45 -0
- klaude_code/auth/codex/oauth.py +229 -0
- klaude_code/auth/codex/token_manager.py +84 -0
- klaude_code/cli/auth_cmd.py +73 -0
- klaude_code/cli/config_cmd.py +91 -0
- klaude_code/cli/cost_cmd.py +338 -0
- klaude_code/cli/debug.py +78 -0
- klaude_code/cli/list_model.py +307 -0
- klaude_code/cli/main.py +233 -134
- klaude_code/cli/runtime.py +309 -117
- klaude_code/{version.py → cli/self_update.py} +114 -5
- klaude_code/cli/session_cmd.py +37 -21
- klaude_code/command/__init__.py +88 -27
- klaude_code/command/clear_cmd.py +8 -7
- klaude_code/command/command_abc.py +31 -31
- klaude_code/command/debug_cmd.py +79 -0
- klaude_code/command/export_cmd.py +19 -53
- klaude_code/command/export_online_cmd.py +154 -0
- klaude_code/command/fork_session_cmd.py +267 -0
- klaude_code/command/help_cmd.py +7 -8
- klaude_code/command/model_cmd.py +60 -10
- klaude_code/command/model_select.py +84 -0
- klaude_code/command/prompt-jj-describe.md +32 -0
- klaude_code/command/prompt_command.py +19 -11
- klaude_code/command/refresh_cmd.py +8 -10
- klaude_code/command/registry.py +139 -40
- klaude_code/command/release_notes_cmd.py +84 -0
- klaude_code/command/resume_cmd.py +111 -0
- klaude_code/command/status_cmd.py +104 -60
- klaude_code/command/terminal_setup_cmd.py +7 -9
- klaude_code/command/thinking_cmd.py +98 -0
- klaude_code/config/__init__.py +14 -6
- klaude_code/config/assets/__init__.py +1 -0
- klaude_code/config/assets/builtin_config.yaml +303 -0
- klaude_code/config/builtin_config.py +38 -0
- klaude_code/config/config.py +378 -109
- klaude_code/config/select_model.py +117 -53
- klaude_code/config/thinking.py +269 -0
- klaude_code/{const/__init__.py → const.py} +50 -19
- klaude_code/core/agent.py +20 -28
- klaude_code/core/executor.py +327 -112
- klaude_code/core/manager/__init__.py +2 -4
- klaude_code/core/manager/llm_clients.py +1 -15
- klaude_code/core/manager/llm_clients_builder.py +10 -11
- klaude_code/core/manager/sub_agent_manager.py +37 -6
- klaude_code/core/prompt.py +63 -44
- klaude_code/core/prompts/prompt-claude-code.md +2 -13
- klaude_code/core/prompts/prompt-codex-gpt-5-1-codex-max.md +117 -0
- klaude_code/core/prompts/prompt-codex-gpt-5-2-codex.md +117 -0
- klaude_code/core/prompts/prompt-codex.md +9 -42
- klaude_code/core/prompts/prompt-minimal.md +12 -0
- klaude_code/core/prompts/{prompt-subagent-explore.md → prompt-sub-agent-explore.md} +16 -3
- klaude_code/core/prompts/{prompt-subagent-oracle.md → prompt-sub-agent-oracle.md} +1 -2
- klaude_code/core/prompts/prompt-sub-agent-web.md +51 -0
- klaude_code/core/reminders.py +283 -95
- klaude_code/core/task.py +113 -75
- klaude_code/core/tool/__init__.py +24 -31
- klaude_code/core/tool/file/_utils.py +36 -0
- klaude_code/core/tool/file/apply_patch.py +17 -25
- klaude_code/core/tool/file/apply_patch_tool.py +57 -77
- klaude_code/core/tool/file/diff_builder.py +151 -0
- klaude_code/core/tool/file/edit_tool.py +50 -63
- klaude_code/core/tool/file/move_tool.md +41 -0
- klaude_code/core/tool/file/move_tool.py +435 -0
- klaude_code/core/tool/file/read_tool.md +1 -1
- klaude_code/core/tool/file/read_tool.py +86 -86
- klaude_code/core/tool/file/write_tool.py +59 -69
- klaude_code/core/tool/report_back_tool.py +84 -0
- klaude_code/core/tool/shell/bash_tool.py +265 -22
- klaude_code/core/tool/shell/command_safety.py +3 -6
- klaude_code/core/tool/{memory → skill}/skill_tool.py +16 -26
- klaude_code/core/tool/sub_agent_tool.py +13 -2
- klaude_code/core/tool/todo/todo_write_tool.md +0 -157
- klaude_code/core/tool/todo/todo_write_tool.py +1 -1
- klaude_code/core/tool/todo/todo_write_tool_raw.md +182 -0
- klaude_code/core/tool/todo/update_plan_tool.py +1 -1
- klaude_code/core/tool/tool_abc.py +18 -0
- klaude_code/core/tool/tool_context.py +27 -12
- klaude_code/core/tool/tool_registry.py +7 -7
- klaude_code/core/tool/tool_runner.py +44 -36
- klaude_code/core/tool/truncation.py +29 -14
- klaude_code/core/tool/web/mermaid_tool.md +43 -0
- klaude_code/core/tool/web/mermaid_tool.py +2 -5
- klaude_code/core/tool/web/web_fetch_tool.md +1 -1
- klaude_code/core/tool/web/web_fetch_tool.py +112 -22
- klaude_code/core/tool/web/web_search_tool.md +23 -0
- klaude_code/core/tool/web/web_search_tool.py +130 -0
- klaude_code/core/turn.py +168 -66
- klaude_code/llm/__init__.py +2 -10
- klaude_code/llm/anthropic/client.py +190 -178
- klaude_code/llm/anthropic/input.py +39 -15
- klaude_code/llm/bedrock/__init__.py +3 -0
- klaude_code/llm/bedrock/client.py +60 -0
- klaude_code/llm/client.py +7 -21
- klaude_code/llm/codex/__init__.py +5 -0
- klaude_code/llm/codex/client.py +149 -0
- klaude_code/llm/google/__init__.py +3 -0
- klaude_code/llm/google/client.py +309 -0
- klaude_code/llm/google/input.py +215 -0
- klaude_code/llm/input_common.py +3 -9
- klaude_code/llm/openai_compatible/client.py +72 -164
- klaude_code/llm/openai_compatible/input.py +6 -4
- klaude_code/llm/openai_compatible/stream.py +273 -0
- klaude_code/llm/openai_compatible/tool_call_accumulator.py +17 -1
- klaude_code/llm/openrouter/client.py +89 -160
- klaude_code/llm/openrouter/input.py +18 -30
- klaude_code/llm/openrouter/reasoning.py +118 -0
- klaude_code/llm/registry.py +39 -7
- klaude_code/llm/responses/client.py +184 -171
- klaude_code/llm/responses/input.py +20 -1
- klaude_code/llm/usage.py +17 -12
- klaude_code/protocol/commands.py +17 -1
- klaude_code/protocol/events.py +31 -4
- klaude_code/protocol/llm_param.py +13 -10
- klaude_code/protocol/model.py +232 -29
- klaude_code/protocol/op.py +90 -1
- klaude_code/protocol/op_handler.py +35 -1
- klaude_code/protocol/sub_agent/__init__.py +117 -0
- klaude_code/protocol/sub_agent/explore.py +63 -0
- klaude_code/protocol/sub_agent/oracle.py +91 -0
- klaude_code/protocol/sub_agent/task.py +61 -0
- klaude_code/protocol/sub_agent/web.py +79 -0
- klaude_code/protocol/tools.py +4 -2
- klaude_code/session/__init__.py +2 -2
- klaude_code/session/codec.py +71 -0
- klaude_code/session/export.py +293 -86
- klaude_code/session/selector.py +89 -67
- klaude_code/session/session.py +320 -309
- klaude_code/session/store.py +220 -0
- klaude_code/session/templates/export_session.html +595 -83
- klaude_code/session/templates/mermaid_viewer.html +926 -0
- klaude_code/skill/__init__.py +27 -0
- klaude_code/skill/assets/deslop/SKILL.md +17 -0
- klaude_code/skill/assets/dev-docs/SKILL.md +108 -0
- klaude_code/skill/assets/handoff/SKILL.md +39 -0
- klaude_code/skill/assets/jj-workspace/SKILL.md +20 -0
- klaude_code/skill/assets/skill-creator/SKILL.md +139 -0
- klaude_code/{core/tool/memory/skill_loader.py → skill/loader.py} +55 -15
- klaude_code/skill/manager.py +70 -0
- klaude_code/skill/system_skills.py +192 -0
- klaude_code/trace/__init__.py +20 -2
- klaude_code/trace/log.py +150 -5
- klaude_code/ui/__init__.py +4 -9
- klaude_code/ui/core/input.py +1 -1
- klaude_code/ui/core/stage_manager.py +7 -7
- klaude_code/ui/modes/debug/display.py +2 -1
- klaude_code/ui/modes/repl/__init__.py +3 -48
- klaude_code/ui/modes/repl/clipboard.py +5 -5
- klaude_code/ui/modes/repl/completers.py +487 -123
- klaude_code/ui/modes/repl/display.py +5 -4
- klaude_code/ui/modes/repl/event_handler.py +370 -117
- klaude_code/ui/modes/repl/input_prompt_toolkit.py +552 -105
- klaude_code/ui/modes/repl/key_bindings.py +146 -23
- klaude_code/ui/modes/repl/renderer.py +189 -99
- klaude_code/ui/renderers/assistant.py +9 -2
- klaude_code/ui/renderers/bash_syntax.py +178 -0
- klaude_code/ui/renderers/common.py +78 -0
- klaude_code/ui/renderers/developer.py +104 -48
- klaude_code/ui/renderers/diffs.py +87 -6
- klaude_code/ui/renderers/errors.py +11 -6
- klaude_code/ui/renderers/mermaid_viewer.py +57 -0
- klaude_code/ui/renderers/metadata.py +112 -76
- klaude_code/ui/renderers/sub_agent.py +92 -7
- klaude_code/ui/renderers/thinking.py +40 -18
- klaude_code/ui/renderers/tools.py +405 -227
- klaude_code/ui/renderers/user_input.py +73 -13
- klaude_code/ui/rich/__init__.py +10 -1
- klaude_code/ui/rich/cjk_wrap.py +228 -0
- klaude_code/ui/rich/code_panel.py +131 -0
- klaude_code/ui/rich/live.py +17 -0
- klaude_code/ui/rich/markdown.py +305 -170
- klaude_code/ui/rich/searchable_text.py +10 -13
- klaude_code/ui/rich/status.py +190 -49
- klaude_code/ui/rich/theme.py +135 -39
- klaude_code/ui/terminal/__init__.py +55 -0
- klaude_code/ui/terminal/color.py +1 -1
- klaude_code/ui/terminal/control.py +13 -22
- klaude_code/ui/terminal/notifier.py +44 -4
- klaude_code/ui/terminal/selector.py +658 -0
- klaude_code/ui/utils/common.py +0 -18
- klaude_code-1.8.0.dist-info/METADATA +377 -0
- klaude_code-1.8.0.dist-info/RECORD +219 -0
- {klaude_code-1.2.6.dist-info → klaude_code-1.8.0.dist-info}/entry_points.txt +1 -0
- klaude_code/command/diff_cmd.py +0 -138
- klaude_code/command/prompt-dev-docs-update.md +0 -56
- klaude_code/command/prompt-dev-docs.md +0 -46
- klaude_code/config/list_model.py +0 -162
- klaude_code/core/manager/agent_manager.py +0 -127
- klaude_code/core/prompts/prompt-subagent-webfetch.md +0 -46
- klaude_code/core/tool/file/multi_edit_tool.md +0 -42
- klaude_code/core/tool/file/multi_edit_tool.py +0 -199
- klaude_code/core/tool/memory/memory_tool.md +0 -16
- klaude_code/core/tool/memory/memory_tool.py +0 -462
- klaude_code/llm/openrouter/reasoning_handler.py +0 -209
- klaude_code/protocol/sub_agent.py +0 -348
- klaude_code/ui/utils/debouncer.py +0 -42
- klaude_code-1.2.6.dist-info/METADATA +0 -178
- klaude_code-1.2.6.dist-info/RECORD +0 -167
- /klaude_code/core/prompts/{prompt-subagent.md → prompt-sub-agent.md} +0 -0
- /klaude_code/core/tool/{memory → skill}/__init__.py +0 -0
- /klaude_code/core/tool/{memory → skill}/skill_tool.md +0 -0
- {klaude_code-1.2.6.dist-info → klaude_code-1.8.0.dist-info}/WHEEL +0 -0
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import json
|
|
2
|
-
import
|
|
2
|
+
import os
|
|
3
3
|
from collections.abc import AsyncGenerator
|
|
4
|
-
from typing import override
|
|
4
|
+
from typing import Any, override
|
|
5
5
|
|
|
6
6
|
import anthropic
|
|
7
7
|
import httpx
|
|
8
|
-
from anthropic import
|
|
8
|
+
from anthropic import APIError
|
|
9
9
|
from anthropic.types.beta.beta_input_json_delta import BetaInputJSONDelta
|
|
10
10
|
from anthropic.types.beta.beta_raw_content_block_delta_event import BetaRawContentBlockDeltaEvent
|
|
11
11
|
from anthropic.types.beta.beta_raw_content_block_start_event import BetaRawContentBlockStartEvent
|
|
@@ -16,26 +16,190 @@ from anthropic.types.beta.beta_signature_delta import BetaSignatureDelta
|
|
|
16
16
|
from anthropic.types.beta.beta_text_delta import BetaTextDelta
|
|
17
17
|
from anthropic.types.beta.beta_thinking_delta import BetaThinkingDelta
|
|
18
18
|
from anthropic.types.beta.beta_tool_use_block import BetaToolUseBlock
|
|
19
|
+
from anthropic.types.beta.message_create_params import MessageCreateParamsStreaming
|
|
19
20
|
|
|
20
21
|
from klaude_code import const
|
|
21
22
|
from klaude_code.llm.anthropic.input import convert_history_to_input, convert_system_to_input, convert_tool_schema
|
|
22
|
-
from klaude_code.llm.client import LLMClientABC
|
|
23
|
+
from klaude_code.llm.client import LLMClientABC
|
|
23
24
|
from klaude_code.llm.input_common import apply_config_defaults
|
|
24
25
|
from klaude_code.llm.registry import register
|
|
25
|
-
from klaude_code.llm.usage import
|
|
26
|
+
from klaude_code.llm.usage import MetadataTracker
|
|
26
27
|
from klaude_code.protocol import llm_param, model
|
|
27
28
|
from klaude_code.trace import DebugType, log_debug
|
|
28
29
|
|
|
29
30
|
|
|
31
|
+
def build_payload(param: llm_param.LLMCallParameter) -> MessageCreateParamsStreaming:
|
|
32
|
+
"""Build Anthropic API request parameters."""
|
|
33
|
+
messages = convert_history_to_input(param.input, param.model)
|
|
34
|
+
tools = convert_tool_schema(param.tools)
|
|
35
|
+
system = convert_system_to_input(param.system)
|
|
36
|
+
|
|
37
|
+
payload: MessageCreateParamsStreaming = {
|
|
38
|
+
"model": str(param.model),
|
|
39
|
+
"tool_choice": {
|
|
40
|
+
"type": "auto",
|
|
41
|
+
"disable_parallel_tool_use": False,
|
|
42
|
+
},
|
|
43
|
+
"stream": True,
|
|
44
|
+
"max_tokens": param.max_tokens or const.DEFAULT_MAX_TOKENS,
|
|
45
|
+
"temperature": param.temperature or const.DEFAULT_TEMPERATURE,
|
|
46
|
+
"messages": messages,
|
|
47
|
+
"system": system,
|
|
48
|
+
"tools": tools,
|
|
49
|
+
"betas": ["interleaved-thinking-2025-05-14", "context-1m-2025-08-07"],
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if param.thinking and param.thinking.type == "enabled":
|
|
53
|
+
payload["thinking"] = anthropic.types.ThinkingConfigEnabledParam(
|
|
54
|
+
type="enabled",
|
|
55
|
+
budget_tokens=param.thinking.budget_tokens or const.DEFAULT_ANTHROPIC_THINKING_BUDGET_TOKENS,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
return payload
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
async def parse_anthropic_stream(
|
|
62
|
+
stream: Any,
|
|
63
|
+
param: llm_param.LLMCallParameter,
|
|
64
|
+
metadata_tracker: MetadataTracker,
|
|
65
|
+
) -> AsyncGenerator[model.ConversationItem]:
|
|
66
|
+
"""Parse Anthropic beta messages stream and yield conversation items.
|
|
67
|
+
|
|
68
|
+
This function is shared between AnthropicClient and BedrockClient.
|
|
69
|
+
"""
|
|
70
|
+
accumulated_thinking: list[str] = []
|
|
71
|
+
accumulated_content: list[str] = []
|
|
72
|
+
response_id: str | None = None
|
|
73
|
+
|
|
74
|
+
current_tool_name: str | None = None
|
|
75
|
+
current_tool_call_id: str | None = None
|
|
76
|
+
current_tool_inputs: list[str] | None = None
|
|
77
|
+
|
|
78
|
+
input_token = 0
|
|
79
|
+
cached_token = 0
|
|
80
|
+
|
|
81
|
+
async for event in await stream:
|
|
82
|
+
log_debug(
|
|
83
|
+
f"[{event.type}]",
|
|
84
|
+
event.model_dump_json(exclude_none=True),
|
|
85
|
+
style="blue",
|
|
86
|
+
debug_type=DebugType.LLM_STREAM,
|
|
87
|
+
)
|
|
88
|
+
match event:
|
|
89
|
+
case BetaRawMessageStartEvent() as event:
|
|
90
|
+
response_id = event.message.id
|
|
91
|
+
cached_token = event.message.usage.cache_read_input_tokens or 0
|
|
92
|
+
input_token = event.message.usage.input_tokens
|
|
93
|
+
yield model.StartItem(response_id=response_id)
|
|
94
|
+
case BetaRawContentBlockDeltaEvent() as event:
|
|
95
|
+
match event.delta:
|
|
96
|
+
case BetaThinkingDelta() as delta:
|
|
97
|
+
if delta.thinking:
|
|
98
|
+
metadata_tracker.record_token()
|
|
99
|
+
accumulated_thinking.append(delta.thinking)
|
|
100
|
+
yield model.ReasoningTextDelta(
|
|
101
|
+
content=delta.thinking,
|
|
102
|
+
response_id=response_id,
|
|
103
|
+
)
|
|
104
|
+
case BetaSignatureDelta() as delta:
|
|
105
|
+
yield model.ReasoningEncryptedItem(
|
|
106
|
+
encrypted_content=delta.signature,
|
|
107
|
+
response_id=response_id,
|
|
108
|
+
model=str(param.model),
|
|
109
|
+
)
|
|
110
|
+
case BetaTextDelta() as delta:
|
|
111
|
+
if delta.text:
|
|
112
|
+
metadata_tracker.record_token()
|
|
113
|
+
accumulated_content.append(delta.text)
|
|
114
|
+
yield model.AssistantMessageDelta(
|
|
115
|
+
content=delta.text,
|
|
116
|
+
response_id=response_id,
|
|
117
|
+
)
|
|
118
|
+
case BetaInputJSONDelta() as delta:
|
|
119
|
+
if current_tool_inputs is not None:
|
|
120
|
+
if delta.partial_json:
|
|
121
|
+
metadata_tracker.record_token()
|
|
122
|
+
current_tool_inputs.append(delta.partial_json)
|
|
123
|
+
case _:
|
|
124
|
+
pass
|
|
125
|
+
case BetaRawContentBlockStartEvent() as event:
|
|
126
|
+
match event.content_block:
|
|
127
|
+
case BetaToolUseBlock() as block:
|
|
128
|
+
metadata_tracker.record_token()
|
|
129
|
+
yield model.ToolCallStartItem(
|
|
130
|
+
response_id=response_id,
|
|
131
|
+
call_id=block.id,
|
|
132
|
+
name=block.name,
|
|
133
|
+
)
|
|
134
|
+
current_tool_name = block.name
|
|
135
|
+
current_tool_call_id = block.id
|
|
136
|
+
current_tool_inputs = []
|
|
137
|
+
case _:
|
|
138
|
+
pass
|
|
139
|
+
case BetaRawContentBlockStopEvent():
|
|
140
|
+
if len(accumulated_thinking) > 0:
|
|
141
|
+
metadata_tracker.record_token()
|
|
142
|
+
full_thinking = "".join(accumulated_thinking)
|
|
143
|
+
yield model.ReasoningTextItem(
|
|
144
|
+
content=full_thinking,
|
|
145
|
+
response_id=response_id,
|
|
146
|
+
model=str(param.model),
|
|
147
|
+
)
|
|
148
|
+
accumulated_thinking.clear()
|
|
149
|
+
if len(accumulated_content) > 0:
|
|
150
|
+
metadata_tracker.record_token()
|
|
151
|
+
yield model.AssistantMessageItem(
|
|
152
|
+
content="".join(accumulated_content),
|
|
153
|
+
response_id=response_id,
|
|
154
|
+
)
|
|
155
|
+
accumulated_content.clear()
|
|
156
|
+
if current_tool_name and current_tool_call_id:
|
|
157
|
+
metadata_tracker.record_token()
|
|
158
|
+
yield model.ToolCallItem(
|
|
159
|
+
name=current_tool_name,
|
|
160
|
+
call_id=current_tool_call_id,
|
|
161
|
+
arguments="".join(current_tool_inputs) if current_tool_inputs else "",
|
|
162
|
+
response_id=response_id,
|
|
163
|
+
)
|
|
164
|
+
current_tool_name = None
|
|
165
|
+
current_tool_call_id = None
|
|
166
|
+
current_tool_inputs = None
|
|
167
|
+
case BetaRawMessageDeltaEvent() as event:
|
|
168
|
+
metadata_tracker.set_usage(
|
|
169
|
+
model.Usage(
|
|
170
|
+
input_tokens=input_token + cached_token,
|
|
171
|
+
output_tokens=event.usage.output_tokens,
|
|
172
|
+
cached_tokens=cached_token,
|
|
173
|
+
context_size=input_token + cached_token + event.usage.output_tokens,
|
|
174
|
+
context_limit=param.context_limit,
|
|
175
|
+
max_tokens=param.max_tokens,
|
|
176
|
+
)
|
|
177
|
+
)
|
|
178
|
+
metadata_tracker.set_model_name(str(param.model))
|
|
179
|
+
metadata_tracker.set_response_id(response_id)
|
|
180
|
+
yield metadata_tracker.finalize()
|
|
181
|
+
case _:
|
|
182
|
+
pass
|
|
183
|
+
|
|
184
|
+
|
|
30
185
|
@register(llm_param.LLMClientProtocol.ANTHROPIC)
|
|
31
186
|
class AnthropicClient(LLMClientABC):
|
|
32
187
|
def __init__(self, config: llm_param.LLMConfigParameter):
|
|
33
188
|
super().__init__(config)
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
)
|
|
189
|
+
# Remove ANTHROPIC_AUTH_TOKEN env var to prevent anthropic SDK from adding
|
|
190
|
+
# Authorization: Bearer header that may conflict with third-party APIs
|
|
191
|
+
# (e.g., deepseek, moonshot) that use Authorization header for authentication.
|
|
192
|
+
# The API key will be sent via X-Api-Key header instead.
|
|
193
|
+
saved_auth_token = os.environ.pop("ANTHROPIC_AUTH_TOKEN", None)
|
|
194
|
+
try:
|
|
195
|
+
client = anthropic.AsyncAnthropic(
|
|
196
|
+
api_key=config.api_key,
|
|
197
|
+
base_url=config.base_url,
|
|
198
|
+
timeout=httpx.Timeout(300.0, connect=15.0, read=285.0),
|
|
199
|
+
)
|
|
200
|
+
finally:
|
|
201
|
+
if saved_auth_token is not None:
|
|
202
|
+
os.environ["ANTHROPIC_AUTH_TOKEN"] = saved_auth_token
|
|
39
203
|
self.client: anthropic.AsyncAnthropic = client
|
|
40
204
|
|
|
41
205
|
@classmethod
|
|
@@ -44,178 +208,26 @@ class AnthropicClient(LLMClientABC):
|
|
|
44
208
|
return cls(config)
|
|
45
209
|
|
|
46
210
|
@override
|
|
47
|
-
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem
|
|
211
|
+
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
|
|
48
212
|
param = apply_config_defaults(param, self.get_llm_config())
|
|
49
213
|
|
|
50
|
-
|
|
51
|
-
first_token_time: float | None = None
|
|
52
|
-
last_token_time: float | None = None
|
|
53
|
-
|
|
54
|
-
messages = convert_history_to_input(param.input, param.model)
|
|
55
|
-
tools = convert_tool_schema(param.tools)
|
|
56
|
-
system = convert_system_to_input(param.system)
|
|
57
|
-
|
|
58
|
-
stream = call_with_logged_payload(
|
|
59
|
-
self.client.beta.messages.create,
|
|
60
|
-
model=str(param.model),
|
|
61
|
-
tool_choice={
|
|
62
|
-
"type": "auto",
|
|
63
|
-
"disable_parallel_tool_use": False,
|
|
64
|
-
},
|
|
65
|
-
stream=True,
|
|
66
|
-
max_tokens=param.max_tokens or const.DEFAULT_MAX_TOKENS,
|
|
67
|
-
temperature=param.temperature or const.DEFAULT_TEMPERATURE,
|
|
68
|
-
messages=messages,
|
|
69
|
-
system=system,
|
|
70
|
-
tools=tools,
|
|
71
|
-
betas=["interleaved-thinking-2025-05-14", "context-1m-2025-08-07"],
|
|
72
|
-
thinking=anthropic.types.ThinkingConfigEnabledParam(
|
|
73
|
-
type=param.thinking.type,
|
|
74
|
-
budget_tokens=param.thinking.budget_tokens or const.DEFAULT_ANTHROPIC_THINKING_BUDGET_TOKENS,
|
|
75
|
-
)
|
|
76
|
-
if param.thinking and param.thinking.type == "enabled"
|
|
77
|
-
else anthropic.types.ThinkingConfigDisabledParam(
|
|
78
|
-
type="disabled",
|
|
79
|
-
),
|
|
80
|
-
extra_headers={"extra": json.dumps({"session_id": param.session_id})},
|
|
81
|
-
)
|
|
214
|
+
metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
|
|
82
215
|
|
|
83
|
-
|
|
84
|
-
accumulated_content: list[str] = []
|
|
85
|
-
response_id: str | None = None
|
|
216
|
+
payload = build_payload(param)
|
|
86
217
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
218
|
+
log_debug(
|
|
219
|
+
json.dumps(payload, ensure_ascii=False, default=str),
|
|
220
|
+
style="yellow",
|
|
221
|
+
debug_type=DebugType.LLM_PAYLOAD,
|
|
222
|
+
)
|
|
90
223
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
224
|
+
stream = self.client.beta.messages.create(
|
|
225
|
+
**payload,
|
|
226
|
+
extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
|
|
227
|
+
)
|
|
94
228
|
|
|
95
229
|
try:
|
|
96
|
-
async for
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
style="blue",
|
|
101
|
-
debug_type=DebugType.LLM_STREAM,
|
|
102
|
-
)
|
|
103
|
-
match event:
|
|
104
|
-
case BetaRawMessageStartEvent() as event:
|
|
105
|
-
response_id = event.message.id
|
|
106
|
-
cached_tokens = event.message.usage.cache_read_input_tokens or 0
|
|
107
|
-
input_tokens = (event.message.usage.input_tokens or 0) + (
|
|
108
|
-
event.message.usage.cache_creation_input_tokens or 0
|
|
109
|
-
)
|
|
110
|
-
output_tokens = event.message.usage.output_tokens or 0
|
|
111
|
-
yield model.StartItem(response_id=response_id)
|
|
112
|
-
case BetaRawContentBlockDeltaEvent() as event:
|
|
113
|
-
match event.delta:
|
|
114
|
-
case BetaThinkingDelta() as delta:
|
|
115
|
-
if first_token_time is None:
|
|
116
|
-
first_token_time = time.time()
|
|
117
|
-
last_token_time = time.time()
|
|
118
|
-
accumulated_thinking.append(delta.thinking)
|
|
119
|
-
case BetaSignatureDelta() as delta:
|
|
120
|
-
if first_token_time is None:
|
|
121
|
-
first_token_time = time.time()
|
|
122
|
-
last_token_time = time.time()
|
|
123
|
-
yield model.ReasoningEncryptedItem(
|
|
124
|
-
encrypted_content=delta.signature,
|
|
125
|
-
response_id=response_id,
|
|
126
|
-
model=str(param.model),
|
|
127
|
-
)
|
|
128
|
-
case BetaTextDelta() as delta:
|
|
129
|
-
if first_token_time is None:
|
|
130
|
-
first_token_time = time.time()
|
|
131
|
-
last_token_time = time.time()
|
|
132
|
-
accumulated_content.append(delta.text)
|
|
133
|
-
yield model.AssistantMessageDelta(
|
|
134
|
-
content=delta.text,
|
|
135
|
-
response_id=response_id,
|
|
136
|
-
)
|
|
137
|
-
case BetaInputJSONDelta() as delta:
|
|
138
|
-
if first_token_time is None:
|
|
139
|
-
first_token_time = time.time()
|
|
140
|
-
last_token_time = time.time()
|
|
141
|
-
if current_tool_inputs is not None:
|
|
142
|
-
current_tool_inputs.append(delta.partial_json)
|
|
143
|
-
case _:
|
|
144
|
-
pass
|
|
145
|
-
case BetaRawContentBlockStartEvent() as event:
|
|
146
|
-
match event.content_block:
|
|
147
|
-
case BetaToolUseBlock() as block:
|
|
148
|
-
yield model.ToolCallStartItem(
|
|
149
|
-
response_id=response_id,
|
|
150
|
-
call_id=block.id,
|
|
151
|
-
name=block.name,
|
|
152
|
-
)
|
|
153
|
-
current_tool_name = block.name
|
|
154
|
-
current_tool_call_id = block.id
|
|
155
|
-
current_tool_inputs = []
|
|
156
|
-
case _:
|
|
157
|
-
pass
|
|
158
|
-
case BetaRawContentBlockStopEvent() as event:
|
|
159
|
-
if len(accumulated_thinking) > 0:
|
|
160
|
-
full_thinking = "".join(accumulated_thinking)
|
|
161
|
-
yield model.ReasoningTextItem(
|
|
162
|
-
content=full_thinking,
|
|
163
|
-
response_id=response_id,
|
|
164
|
-
model=str(param.model),
|
|
165
|
-
)
|
|
166
|
-
accumulated_thinking.clear()
|
|
167
|
-
if len(accumulated_content) > 0:
|
|
168
|
-
yield model.AssistantMessageItem(
|
|
169
|
-
content="".join(accumulated_content),
|
|
170
|
-
response_id=response_id,
|
|
171
|
-
)
|
|
172
|
-
accumulated_content.clear()
|
|
173
|
-
if current_tool_name and current_tool_call_id:
|
|
174
|
-
yield model.ToolCallItem(
|
|
175
|
-
name=current_tool_name,
|
|
176
|
-
call_id=current_tool_call_id,
|
|
177
|
-
arguments="".join(current_tool_inputs) if current_tool_inputs else "",
|
|
178
|
-
response_id=response_id,
|
|
179
|
-
)
|
|
180
|
-
current_tool_name = None
|
|
181
|
-
current_tool_call_id = None
|
|
182
|
-
current_tool_inputs = None
|
|
183
|
-
case BetaRawMessageDeltaEvent() as event:
|
|
184
|
-
input_tokens += (event.usage.input_tokens or 0) + (event.usage.cache_creation_input_tokens or 0)
|
|
185
|
-
output_tokens += event.usage.output_tokens or 0
|
|
186
|
-
cached_tokens += event.usage.cache_read_input_tokens or 0
|
|
187
|
-
total_tokens = input_tokens + cached_tokens + output_tokens
|
|
188
|
-
context_usage_percent = (
|
|
189
|
-
(total_tokens / param.context_limit) * 100 if param.context_limit else None
|
|
190
|
-
)
|
|
191
|
-
|
|
192
|
-
throughput_tps: float | None = None
|
|
193
|
-
first_token_latency_ms: float | None = None
|
|
194
|
-
|
|
195
|
-
if first_token_time is not None:
|
|
196
|
-
first_token_latency_ms = (first_token_time - request_start_time) * 1000
|
|
197
|
-
|
|
198
|
-
if first_token_time is not None and last_token_time is not None and output_tokens > 0:
|
|
199
|
-
time_duration = last_token_time - first_token_time
|
|
200
|
-
if time_duration >= 0.15:
|
|
201
|
-
throughput_tps = output_tokens / time_duration
|
|
202
|
-
|
|
203
|
-
usage = model.Usage(
|
|
204
|
-
input_tokens=input_tokens,
|
|
205
|
-
output_tokens=output_tokens,
|
|
206
|
-
cached_tokens=cached_tokens,
|
|
207
|
-
total_tokens=total_tokens,
|
|
208
|
-
context_usage_percent=context_usage_percent,
|
|
209
|
-
throughput_tps=throughput_tps,
|
|
210
|
-
first_token_latency_ms=first_token_latency_ms,
|
|
211
|
-
)
|
|
212
|
-
calculate_cost(usage, self._config.cost)
|
|
213
|
-
yield model.ResponseMetadataItem(
|
|
214
|
-
usage=usage,
|
|
215
|
-
response_id=response_id,
|
|
216
|
-
model_name=str(param.model),
|
|
217
|
-
)
|
|
218
|
-
case _:
|
|
219
|
-
pass
|
|
220
|
-
except RateLimitError as e:
|
|
221
|
-
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
|
|
230
|
+
async for item in parse_anthropic_stream(stream, param, metadata_tracker):
|
|
231
|
+
yield item
|
|
232
|
+
except (APIError, httpx.HTTPError) as e:
|
|
233
|
+
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
|
@@ -73,7 +73,8 @@ def _user_group_to_message(group: UserGroup) -> BetaMessageParam:
|
|
|
73
73
|
return {"role": "user", "content": blocks}
|
|
74
74
|
|
|
75
75
|
|
|
76
|
-
def
|
|
76
|
+
def _tool_group_to_block(group: ToolGroup) -> dict[str, object]:
|
|
77
|
+
"""Convert a single ToolGroup to a tool_result block."""
|
|
77
78
|
tool_content: list[BetaTextBlockParam | BetaImageBlockParam] = []
|
|
78
79
|
merged_text = merge_reminder_text(
|
|
79
80
|
group.tool_result.output or "<system-reminder>Tool ran without output or errors</system-reminder>",
|
|
@@ -84,34 +85,41 @@ def _tool_group_to_message(group: ToolGroup) -> BetaMessageParam:
|
|
|
84
85
|
tool_content.append(_image_part_to_block(image))
|
|
85
86
|
for image in group.reminder_images:
|
|
86
87
|
tool_content.append(_image_part_to_block(image))
|
|
88
|
+
return {
|
|
89
|
+
"type": "tool_result",
|
|
90
|
+
"tool_use_id": group.tool_result.call_id,
|
|
91
|
+
"is_error": group.tool_result.status == "error",
|
|
92
|
+
"content": tool_content,
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _tool_groups_to_message(groups: list[ToolGroup]) -> BetaMessageParam:
|
|
97
|
+
"""Convert one or more ToolGroups to a single user message with multiple tool_result blocks."""
|
|
87
98
|
return {
|
|
88
99
|
"role": "user",
|
|
89
|
-
"content": [
|
|
90
|
-
{
|
|
91
|
-
"type": "tool_result",
|
|
92
|
-
"tool_use_id": group.tool_result.call_id,
|
|
93
|
-
"is_error": group.tool_result.status == "error",
|
|
94
|
-
"content": tool_content,
|
|
95
|
-
}
|
|
96
|
-
],
|
|
100
|
+
"content": [_tool_group_to_block(group) for group in groups],
|
|
97
101
|
}
|
|
98
102
|
|
|
99
103
|
|
|
100
104
|
def _assistant_group_to_message(group: AssistantGroup, model_name: str | None) -> BetaMessageParam:
|
|
101
105
|
content: list[dict[str, object]] = []
|
|
102
106
|
current_reasoning_content: str | None = None
|
|
107
|
+
degraded_thinking_texts: list[str] = []
|
|
103
108
|
|
|
104
109
|
# Process reasoning items in original order so that text and
|
|
105
110
|
# encrypted parts are paired correctly for the given model.
|
|
111
|
+
# For cross-model scenarios, degrade thinking to plain text.
|
|
106
112
|
for item in group.reasoning_items:
|
|
107
113
|
if isinstance(item, model.ReasoningTextItem):
|
|
108
114
|
if model_name != item.model:
|
|
109
|
-
|
|
110
|
-
|
|
115
|
+
# Cross-model: collect thinking text for degradation
|
|
116
|
+
if item.content:
|
|
117
|
+
degraded_thinking_texts.append(item.content)
|
|
118
|
+
else:
|
|
119
|
+
current_reasoning_content = item.content
|
|
111
120
|
else:
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
if item.encrypted_content and len(item.encrypted_content) > 0:
|
|
121
|
+
# Same model: preserve signature
|
|
122
|
+
if model_name == item.model and item.encrypted_content and len(item.encrypted_content) > 0:
|
|
115
123
|
content.append(
|
|
116
124
|
{
|
|
117
125
|
"type": "thinking",
|
|
@@ -127,6 +135,11 @@ def _assistant_group_to_message(group: AssistantGroup, model_name: str | None) -
|
|
|
127
135
|
if len(current_reasoning_content or "") > 0:
|
|
128
136
|
content.insert(0, {"type": "thinking", "thinking": current_reasoning_content})
|
|
129
137
|
|
|
138
|
+
# Cross-model: degrade thinking to plain text with <thinking> tags
|
|
139
|
+
if degraded_thinking_texts:
|
|
140
|
+
degraded_text = "<thinking>\n" + "\n".join(degraded_thinking_texts) + "\n</thinking>"
|
|
141
|
+
content.insert(0, {"type": "text", "text": degraded_text})
|
|
142
|
+
|
|
130
143
|
if group.text_content:
|
|
131
144
|
content.append({"type": "text", "text": group.text_content})
|
|
132
145
|
|
|
@@ -165,15 +178,26 @@ def convert_history_to_input(
|
|
|
165
178
|
model_name: Model name. Used to verify that signatures are valid for the same model
|
|
166
179
|
"""
|
|
167
180
|
messages: list[BetaMessageParam] = []
|
|
181
|
+
pending_tool_groups: list[ToolGroup] = []
|
|
182
|
+
|
|
183
|
+
def flush_tool_groups() -> None:
|
|
184
|
+
nonlocal pending_tool_groups
|
|
185
|
+
if pending_tool_groups:
|
|
186
|
+
messages.append(_tool_groups_to_message(pending_tool_groups))
|
|
187
|
+
pending_tool_groups = []
|
|
188
|
+
|
|
168
189
|
for group in parse_message_groups(history):
|
|
169
190
|
match group:
|
|
170
191
|
case UserGroup():
|
|
192
|
+
flush_tool_groups()
|
|
171
193
|
messages.append(_user_group_to_message(group))
|
|
172
194
|
case ToolGroup():
|
|
173
|
-
|
|
195
|
+
pending_tool_groups.append(group)
|
|
174
196
|
case AssistantGroup():
|
|
197
|
+
flush_tool_groups()
|
|
175
198
|
messages.append(_assistant_group_to_message(group, model_name))
|
|
176
199
|
|
|
200
|
+
flush_tool_groups()
|
|
177
201
|
_add_cache_control(messages)
|
|
178
202
|
return messages
|
|
179
203
|
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""AWS Bedrock LLM client using Anthropic SDK."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from collections.abc import AsyncGenerator
|
|
5
|
+
from typing import override
|
|
6
|
+
|
|
7
|
+
import anthropic
|
|
8
|
+
import httpx
|
|
9
|
+
from anthropic import APIError
|
|
10
|
+
|
|
11
|
+
from klaude_code.llm.anthropic.client import build_payload, parse_anthropic_stream
|
|
12
|
+
from klaude_code.llm.client import LLMClientABC
|
|
13
|
+
from klaude_code.llm.input_common import apply_config_defaults
|
|
14
|
+
from klaude_code.llm.registry import register
|
|
15
|
+
from klaude_code.llm.usage import MetadataTracker
|
|
16
|
+
from klaude_code.protocol import llm_param, model
|
|
17
|
+
from klaude_code.trace import DebugType, log_debug
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@register(llm_param.LLMClientProtocol.BEDROCK)
|
|
21
|
+
class BedrockClient(LLMClientABC):
|
|
22
|
+
"""LLM client for AWS Bedrock using Anthropic SDK."""
|
|
23
|
+
|
|
24
|
+
def __init__(self, config: llm_param.LLMConfigParameter):
|
|
25
|
+
super().__init__(config)
|
|
26
|
+
self.client = anthropic.AsyncAnthropicBedrock(
|
|
27
|
+
aws_access_key=config.aws_access_key,
|
|
28
|
+
aws_secret_key=config.aws_secret_key,
|
|
29
|
+
aws_region=config.aws_region,
|
|
30
|
+
aws_session_token=config.aws_session_token,
|
|
31
|
+
aws_profile=config.aws_profile,
|
|
32
|
+
timeout=httpx.Timeout(300.0, connect=15.0, read=285.0),
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
@override
|
|
37
|
+
def create(cls, config: llm_param.LLMConfigParameter) -> "LLMClientABC":
|
|
38
|
+
return cls(config)
|
|
39
|
+
|
|
40
|
+
@override
|
|
41
|
+
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
|
|
42
|
+
param = apply_config_defaults(param, self.get_llm_config())
|
|
43
|
+
|
|
44
|
+
metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
|
|
45
|
+
|
|
46
|
+
payload = build_payload(param)
|
|
47
|
+
|
|
48
|
+
log_debug(
|
|
49
|
+
json.dumps(payload, ensure_ascii=False, default=str),
|
|
50
|
+
style="yellow",
|
|
51
|
+
debug_type=DebugType.LLM_PAYLOAD,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
stream = self.client.beta.messages.create(**payload)
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
async for item in parse_anthropic_stream(stream, param, metadata_tracker):
|
|
58
|
+
yield item
|
|
59
|
+
except (APIError, httpx.HTTPError) as e:
|
|
60
|
+
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
klaude_code/llm/client.py
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
import json
|
|
2
1
|
from abc import ABC, abstractmethod
|
|
3
2
|
from collections.abc import AsyncGenerator
|
|
4
|
-
from typing import
|
|
3
|
+
from typing import ParamSpec, TypeVar, cast
|
|
5
4
|
|
|
6
5
|
from klaude_code.protocol import llm_param, model
|
|
7
|
-
from klaude_code.trace import DebugType, log_debug
|
|
8
6
|
|
|
9
7
|
|
|
10
8
|
class LLMClientABC(ABC):
|
|
@@ -17,9 +15,9 @@ class LLMClientABC(ABC):
|
|
|
17
15
|
pass
|
|
18
16
|
|
|
19
17
|
@abstractmethod
|
|
20
|
-
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem
|
|
18
|
+
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
|
|
21
19
|
raise NotImplementedError
|
|
22
|
-
yield cast(model.ConversationItem, None)
|
|
20
|
+
yield cast(model.ConversationItem, None)
|
|
23
21
|
|
|
24
22
|
def get_llm_config(self) -> llm_param.LLMConfigParameter:
|
|
25
23
|
return self._config
|
|
@@ -28,22 +26,10 @@ class LLMClientABC(ABC):
|
|
|
28
26
|
def model_name(self) -> str:
|
|
29
27
|
return self._config.model or ""
|
|
30
28
|
|
|
29
|
+
@property
|
|
30
|
+
def protocol(self) -> llm_param.LLMClientProtocol:
|
|
31
|
+
return self._config.protocol
|
|
32
|
+
|
|
31
33
|
|
|
32
34
|
P = ParamSpec("P")
|
|
33
35
|
R = TypeVar("R")
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def call_with_logged_payload(func: Callable[P, R], *args: P.args, **kwargs: P.kwargs) -> R:
|
|
37
|
-
"""Call an SDK function while logging the JSON payload.
|
|
38
|
-
|
|
39
|
-
The function reuses the original callable's type signature via ParamSpec
|
|
40
|
-
so static type checkers can validate arguments at the call site.
|
|
41
|
-
"""
|
|
42
|
-
|
|
43
|
-
payload = {k: v for k, v in kwargs.items() if v is not None}
|
|
44
|
-
log_debug(
|
|
45
|
-
json.dumps(payload, ensure_ascii=False, default=str),
|
|
46
|
-
style="yellow",
|
|
47
|
-
debug_type=DebugType.LLM_PAYLOAD,
|
|
48
|
-
)
|
|
49
|
-
return func(*args, **kwargs)
|