klaude-code 1.2.6__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- klaude_code/auth/__init__.py +24 -0
- klaude_code/auth/codex/__init__.py +20 -0
- klaude_code/auth/codex/exceptions.py +17 -0
- klaude_code/auth/codex/jwt_utils.py +45 -0
- klaude_code/auth/codex/oauth.py +229 -0
- klaude_code/auth/codex/token_manager.py +84 -0
- klaude_code/cli/auth_cmd.py +73 -0
- klaude_code/cli/config_cmd.py +91 -0
- klaude_code/cli/cost_cmd.py +338 -0
- klaude_code/cli/debug.py +78 -0
- klaude_code/cli/list_model.py +307 -0
- klaude_code/cli/main.py +233 -134
- klaude_code/cli/runtime.py +309 -117
- klaude_code/{version.py → cli/self_update.py} +114 -5
- klaude_code/cli/session_cmd.py +37 -21
- klaude_code/command/__init__.py +88 -27
- klaude_code/command/clear_cmd.py +8 -7
- klaude_code/command/command_abc.py +31 -31
- klaude_code/command/debug_cmd.py +79 -0
- klaude_code/command/export_cmd.py +19 -53
- klaude_code/command/export_online_cmd.py +154 -0
- klaude_code/command/fork_session_cmd.py +267 -0
- klaude_code/command/help_cmd.py +7 -8
- klaude_code/command/model_cmd.py +60 -10
- klaude_code/command/model_select.py +84 -0
- klaude_code/command/prompt-jj-describe.md +32 -0
- klaude_code/command/prompt_command.py +19 -11
- klaude_code/command/refresh_cmd.py +8 -10
- klaude_code/command/registry.py +139 -40
- klaude_code/command/release_notes_cmd.py +84 -0
- klaude_code/command/resume_cmd.py +111 -0
- klaude_code/command/status_cmd.py +104 -60
- klaude_code/command/terminal_setup_cmd.py +7 -9
- klaude_code/command/thinking_cmd.py +98 -0
- klaude_code/config/__init__.py +14 -6
- klaude_code/config/assets/__init__.py +1 -0
- klaude_code/config/assets/builtin_config.yaml +303 -0
- klaude_code/config/builtin_config.py +38 -0
- klaude_code/config/config.py +378 -109
- klaude_code/config/select_model.py +117 -53
- klaude_code/config/thinking.py +269 -0
- klaude_code/{const/__init__.py → const.py} +50 -19
- klaude_code/core/agent.py +20 -28
- klaude_code/core/executor.py +327 -112
- klaude_code/core/manager/__init__.py +2 -4
- klaude_code/core/manager/llm_clients.py +1 -15
- klaude_code/core/manager/llm_clients_builder.py +10 -11
- klaude_code/core/manager/sub_agent_manager.py +37 -6
- klaude_code/core/prompt.py +63 -44
- klaude_code/core/prompts/prompt-claude-code.md +2 -13
- klaude_code/core/prompts/prompt-codex-gpt-5-1-codex-max.md +117 -0
- klaude_code/core/prompts/prompt-codex-gpt-5-2-codex.md +117 -0
- klaude_code/core/prompts/prompt-codex.md +9 -42
- klaude_code/core/prompts/prompt-minimal.md +12 -0
- klaude_code/core/prompts/{prompt-subagent-explore.md → prompt-sub-agent-explore.md} +16 -3
- klaude_code/core/prompts/{prompt-subagent-oracle.md → prompt-sub-agent-oracle.md} +1 -2
- klaude_code/core/prompts/prompt-sub-agent-web.md +51 -0
- klaude_code/core/reminders.py +283 -95
- klaude_code/core/task.py +113 -75
- klaude_code/core/tool/__init__.py +24 -31
- klaude_code/core/tool/file/_utils.py +36 -0
- klaude_code/core/tool/file/apply_patch.py +17 -25
- klaude_code/core/tool/file/apply_patch_tool.py +57 -77
- klaude_code/core/tool/file/diff_builder.py +151 -0
- klaude_code/core/tool/file/edit_tool.py +50 -63
- klaude_code/core/tool/file/move_tool.md +41 -0
- klaude_code/core/tool/file/move_tool.py +435 -0
- klaude_code/core/tool/file/read_tool.md +1 -1
- klaude_code/core/tool/file/read_tool.py +86 -86
- klaude_code/core/tool/file/write_tool.py +59 -69
- klaude_code/core/tool/report_back_tool.py +84 -0
- klaude_code/core/tool/shell/bash_tool.py +265 -22
- klaude_code/core/tool/shell/command_safety.py +3 -6
- klaude_code/core/tool/{memory → skill}/skill_tool.py +16 -26
- klaude_code/core/tool/sub_agent_tool.py +13 -2
- klaude_code/core/tool/todo/todo_write_tool.md +0 -157
- klaude_code/core/tool/todo/todo_write_tool.py +1 -1
- klaude_code/core/tool/todo/todo_write_tool_raw.md +182 -0
- klaude_code/core/tool/todo/update_plan_tool.py +1 -1
- klaude_code/core/tool/tool_abc.py +18 -0
- klaude_code/core/tool/tool_context.py +27 -12
- klaude_code/core/tool/tool_registry.py +7 -7
- klaude_code/core/tool/tool_runner.py +44 -36
- klaude_code/core/tool/truncation.py +29 -14
- klaude_code/core/tool/web/mermaid_tool.md +43 -0
- klaude_code/core/tool/web/mermaid_tool.py +2 -5
- klaude_code/core/tool/web/web_fetch_tool.md +1 -1
- klaude_code/core/tool/web/web_fetch_tool.py +112 -22
- klaude_code/core/tool/web/web_search_tool.md +23 -0
- klaude_code/core/tool/web/web_search_tool.py +130 -0
- klaude_code/core/turn.py +168 -66
- klaude_code/llm/__init__.py +2 -10
- klaude_code/llm/anthropic/client.py +190 -178
- klaude_code/llm/anthropic/input.py +39 -15
- klaude_code/llm/bedrock/__init__.py +3 -0
- klaude_code/llm/bedrock/client.py +60 -0
- klaude_code/llm/client.py +7 -21
- klaude_code/llm/codex/__init__.py +5 -0
- klaude_code/llm/codex/client.py +149 -0
- klaude_code/llm/google/__init__.py +3 -0
- klaude_code/llm/google/client.py +309 -0
- klaude_code/llm/google/input.py +215 -0
- klaude_code/llm/input_common.py +3 -9
- klaude_code/llm/openai_compatible/client.py +72 -164
- klaude_code/llm/openai_compatible/input.py +6 -4
- klaude_code/llm/openai_compatible/stream.py +273 -0
- klaude_code/llm/openai_compatible/tool_call_accumulator.py +17 -1
- klaude_code/llm/openrouter/client.py +89 -160
- klaude_code/llm/openrouter/input.py +18 -30
- klaude_code/llm/openrouter/reasoning.py +118 -0
- klaude_code/llm/registry.py +39 -7
- klaude_code/llm/responses/client.py +184 -171
- klaude_code/llm/responses/input.py +20 -1
- klaude_code/llm/usage.py +17 -12
- klaude_code/protocol/commands.py +17 -1
- klaude_code/protocol/events.py +31 -4
- klaude_code/protocol/llm_param.py +13 -10
- klaude_code/protocol/model.py +232 -29
- klaude_code/protocol/op.py +90 -1
- klaude_code/protocol/op_handler.py +35 -1
- klaude_code/protocol/sub_agent/__init__.py +117 -0
- klaude_code/protocol/sub_agent/explore.py +63 -0
- klaude_code/protocol/sub_agent/oracle.py +91 -0
- klaude_code/protocol/sub_agent/task.py +61 -0
- klaude_code/protocol/sub_agent/web.py +79 -0
- klaude_code/protocol/tools.py +4 -2
- klaude_code/session/__init__.py +2 -2
- klaude_code/session/codec.py +71 -0
- klaude_code/session/export.py +293 -86
- klaude_code/session/selector.py +89 -67
- klaude_code/session/session.py +320 -309
- klaude_code/session/store.py +220 -0
- klaude_code/session/templates/export_session.html +595 -83
- klaude_code/session/templates/mermaid_viewer.html +926 -0
- klaude_code/skill/__init__.py +27 -0
- klaude_code/skill/assets/deslop/SKILL.md +17 -0
- klaude_code/skill/assets/dev-docs/SKILL.md +108 -0
- klaude_code/skill/assets/handoff/SKILL.md +39 -0
- klaude_code/skill/assets/jj-workspace/SKILL.md +20 -0
- klaude_code/skill/assets/skill-creator/SKILL.md +139 -0
- klaude_code/{core/tool/memory/skill_loader.py → skill/loader.py} +55 -15
- klaude_code/skill/manager.py +70 -0
- klaude_code/skill/system_skills.py +192 -0
- klaude_code/trace/__init__.py +20 -2
- klaude_code/trace/log.py +150 -5
- klaude_code/ui/__init__.py +4 -9
- klaude_code/ui/core/input.py +1 -1
- klaude_code/ui/core/stage_manager.py +7 -7
- klaude_code/ui/modes/debug/display.py +2 -1
- klaude_code/ui/modes/repl/__init__.py +3 -48
- klaude_code/ui/modes/repl/clipboard.py +5 -5
- klaude_code/ui/modes/repl/completers.py +487 -123
- klaude_code/ui/modes/repl/display.py +5 -4
- klaude_code/ui/modes/repl/event_handler.py +370 -117
- klaude_code/ui/modes/repl/input_prompt_toolkit.py +552 -105
- klaude_code/ui/modes/repl/key_bindings.py +146 -23
- klaude_code/ui/modes/repl/renderer.py +189 -99
- klaude_code/ui/renderers/assistant.py +9 -2
- klaude_code/ui/renderers/bash_syntax.py +178 -0
- klaude_code/ui/renderers/common.py +78 -0
- klaude_code/ui/renderers/developer.py +104 -48
- klaude_code/ui/renderers/diffs.py +87 -6
- klaude_code/ui/renderers/errors.py +11 -6
- klaude_code/ui/renderers/mermaid_viewer.py +57 -0
- klaude_code/ui/renderers/metadata.py +112 -76
- klaude_code/ui/renderers/sub_agent.py +92 -7
- klaude_code/ui/renderers/thinking.py +40 -18
- klaude_code/ui/renderers/tools.py +405 -227
- klaude_code/ui/renderers/user_input.py +73 -13
- klaude_code/ui/rich/__init__.py +10 -1
- klaude_code/ui/rich/cjk_wrap.py +228 -0
- klaude_code/ui/rich/code_panel.py +131 -0
- klaude_code/ui/rich/live.py +17 -0
- klaude_code/ui/rich/markdown.py +305 -170
- klaude_code/ui/rich/searchable_text.py +10 -13
- klaude_code/ui/rich/status.py +190 -49
- klaude_code/ui/rich/theme.py +135 -39
- klaude_code/ui/terminal/__init__.py +55 -0
- klaude_code/ui/terminal/color.py +1 -1
- klaude_code/ui/terminal/control.py +13 -22
- klaude_code/ui/terminal/notifier.py +44 -4
- klaude_code/ui/terminal/selector.py +658 -0
- klaude_code/ui/utils/common.py +0 -18
- klaude_code-1.8.0.dist-info/METADATA +377 -0
- klaude_code-1.8.0.dist-info/RECORD +219 -0
- {klaude_code-1.2.6.dist-info → klaude_code-1.8.0.dist-info}/entry_points.txt +1 -0
- klaude_code/command/diff_cmd.py +0 -138
- klaude_code/command/prompt-dev-docs-update.md +0 -56
- klaude_code/command/prompt-dev-docs.md +0 -46
- klaude_code/config/list_model.py +0 -162
- klaude_code/core/manager/agent_manager.py +0 -127
- klaude_code/core/prompts/prompt-subagent-webfetch.md +0 -46
- klaude_code/core/tool/file/multi_edit_tool.md +0 -42
- klaude_code/core/tool/file/multi_edit_tool.py +0 -199
- klaude_code/core/tool/memory/memory_tool.md +0 -16
- klaude_code/core/tool/memory/memory_tool.py +0 -462
- klaude_code/llm/openrouter/reasoning_handler.py +0 -209
- klaude_code/protocol/sub_agent.py +0 -348
- klaude_code/ui/utils/debouncer.py +0 -42
- klaude_code-1.2.6.dist-info/METADATA +0 -178
- klaude_code-1.2.6.dist-info/RECORD +0 -167
- /klaude_code/core/prompts/{prompt-subagent.md → prompt-sub-agent.md} +0 -0
- /klaude_code/core/tool/{memory → skill}/__init__.py +0 -0
- /klaude_code/core/tool/{memory → skill}/skill_tool.md +0 -0
- {klaude_code-1.2.6.dist-info → klaude_code-1.8.0.dist-info}/WHEEL +0 -0
|
@@ -1,20 +1,181 @@
|
|
|
1
1
|
import json
|
|
2
|
-
import time
|
|
3
2
|
from collections.abc import AsyncGenerator
|
|
4
|
-
from typing import override
|
|
3
|
+
from typing import TYPE_CHECKING, override
|
|
5
4
|
|
|
6
5
|
import httpx
|
|
7
|
-
|
|
6
|
+
import openai
|
|
7
|
+
from openai import AsyncAzureOpenAI, AsyncOpenAI
|
|
8
8
|
from openai.types import responses
|
|
9
|
+
from openai.types.responses.response_create_params import ResponseCreateParamsStreaming
|
|
9
10
|
|
|
10
|
-
from klaude_code.llm.client import LLMClientABC
|
|
11
|
+
from klaude_code.llm.client import LLMClientABC
|
|
11
12
|
from klaude_code.llm.input_common import apply_config_defaults
|
|
12
13
|
from klaude_code.llm.registry import register
|
|
13
14
|
from klaude_code.llm.responses.input import convert_history_to_input, convert_tool_schema
|
|
14
|
-
from klaude_code.llm.usage import
|
|
15
|
+
from klaude_code.llm.usage import MetadataTracker
|
|
15
16
|
from klaude_code.protocol import llm_param, model
|
|
16
17
|
from klaude_code.trace import DebugType, log_debug
|
|
17
18
|
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from openai import AsyncStream
|
|
21
|
+
from openai.types.responses import ResponseStreamEvent
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def build_payload(param: llm_param.LLMCallParameter) -> ResponseCreateParamsStreaming:
|
|
25
|
+
"""Build OpenAI Responses API request parameters."""
|
|
26
|
+
inputs = convert_history_to_input(param.input, param.model)
|
|
27
|
+
tools = convert_tool_schema(param.tools)
|
|
28
|
+
|
|
29
|
+
payload: ResponseCreateParamsStreaming = {
|
|
30
|
+
"model": str(param.model),
|
|
31
|
+
"tool_choice": "auto",
|
|
32
|
+
"parallel_tool_calls": True,
|
|
33
|
+
"include": [
|
|
34
|
+
"reasoning.encrypted_content",
|
|
35
|
+
],
|
|
36
|
+
"store": False,
|
|
37
|
+
"stream": True,
|
|
38
|
+
"temperature": param.temperature,
|
|
39
|
+
"max_output_tokens": param.max_tokens,
|
|
40
|
+
"input": inputs,
|
|
41
|
+
"instructions": param.system,
|
|
42
|
+
"tools": tools,
|
|
43
|
+
"prompt_cache_key": param.session_id or "",
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
if param.thinking and param.thinking.reasoning_effort:
|
|
47
|
+
payload["reasoning"] = {
|
|
48
|
+
"effort": param.thinking.reasoning_effort,
|
|
49
|
+
"summary": param.thinking.reasoning_summary,
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if param.verbosity:
|
|
53
|
+
payload["text"] = {"verbosity": param.verbosity}
|
|
54
|
+
|
|
55
|
+
return payload
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
async def parse_responses_stream(
|
|
59
|
+
stream: "AsyncStream[ResponseStreamEvent]",
|
|
60
|
+
param: llm_param.LLMCallParameter,
|
|
61
|
+
metadata_tracker: MetadataTracker,
|
|
62
|
+
) -> AsyncGenerator[model.ConversationItem]:
|
|
63
|
+
"""Parse OpenAI Responses API stream events into ConversationItems."""
|
|
64
|
+
response_id: str | None = None
|
|
65
|
+
|
|
66
|
+
try:
|
|
67
|
+
async for event in stream:
|
|
68
|
+
log_debug(
|
|
69
|
+
f"[{event.type}]",
|
|
70
|
+
event.model_dump_json(exclude_none=True),
|
|
71
|
+
style="blue",
|
|
72
|
+
debug_type=DebugType.LLM_STREAM,
|
|
73
|
+
)
|
|
74
|
+
match event:
|
|
75
|
+
case responses.ResponseCreatedEvent() as event:
|
|
76
|
+
response_id = event.response.id
|
|
77
|
+
yield model.StartItem(response_id=response_id)
|
|
78
|
+
case responses.ResponseReasoningSummaryTextDeltaEvent() as event:
|
|
79
|
+
if event.delta:
|
|
80
|
+
metadata_tracker.record_token()
|
|
81
|
+
yield model.ReasoningTextDelta(
|
|
82
|
+
content=event.delta,
|
|
83
|
+
response_id=response_id,
|
|
84
|
+
)
|
|
85
|
+
case responses.ResponseReasoningSummaryTextDoneEvent() as event:
|
|
86
|
+
if event.text:
|
|
87
|
+
yield model.ReasoningTextItem(
|
|
88
|
+
content=event.text,
|
|
89
|
+
response_id=response_id,
|
|
90
|
+
model=str(param.model),
|
|
91
|
+
)
|
|
92
|
+
case responses.ResponseTextDeltaEvent() as event:
|
|
93
|
+
if event.delta:
|
|
94
|
+
metadata_tracker.record_token()
|
|
95
|
+
yield model.AssistantMessageDelta(content=event.delta, response_id=response_id)
|
|
96
|
+
case responses.ResponseOutputItemAddedEvent() as event:
|
|
97
|
+
if isinstance(event.item, responses.ResponseFunctionToolCall):
|
|
98
|
+
metadata_tracker.record_token()
|
|
99
|
+
yield model.ToolCallStartItem(
|
|
100
|
+
response_id=response_id,
|
|
101
|
+
call_id=event.item.call_id,
|
|
102
|
+
name=event.item.name,
|
|
103
|
+
)
|
|
104
|
+
case responses.ResponseOutputItemDoneEvent() as event:
|
|
105
|
+
match event.item:
|
|
106
|
+
case responses.ResponseReasoningItem() as item:
|
|
107
|
+
if item.encrypted_content:
|
|
108
|
+
metadata_tracker.record_token()
|
|
109
|
+
yield model.ReasoningEncryptedItem(
|
|
110
|
+
id=item.id,
|
|
111
|
+
encrypted_content=item.encrypted_content,
|
|
112
|
+
response_id=response_id,
|
|
113
|
+
model=str(param.model),
|
|
114
|
+
)
|
|
115
|
+
case responses.ResponseOutputMessage() as item:
|
|
116
|
+
metadata_tracker.record_token()
|
|
117
|
+
yield model.AssistantMessageItem(
|
|
118
|
+
content="\n".join(
|
|
119
|
+
[
|
|
120
|
+
part.text
|
|
121
|
+
for part in item.content
|
|
122
|
+
if isinstance(part, responses.ResponseOutputText)
|
|
123
|
+
]
|
|
124
|
+
),
|
|
125
|
+
id=item.id,
|
|
126
|
+
response_id=response_id,
|
|
127
|
+
)
|
|
128
|
+
case responses.ResponseFunctionToolCall() as item:
|
|
129
|
+
metadata_tracker.record_token()
|
|
130
|
+
yield model.ToolCallItem(
|
|
131
|
+
name=item.name,
|
|
132
|
+
arguments=item.arguments.strip(),
|
|
133
|
+
call_id=item.call_id,
|
|
134
|
+
id=item.id,
|
|
135
|
+
response_id=response_id,
|
|
136
|
+
)
|
|
137
|
+
case _:
|
|
138
|
+
pass
|
|
139
|
+
case responses.ResponseCompletedEvent() as event:
|
|
140
|
+
error_reason: str | None = None
|
|
141
|
+
if event.response.incomplete_details is not None:
|
|
142
|
+
error_reason = event.response.incomplete_details.reason
|
|
143
|
+
if event.response.usage is not None:
|
|
144
|
+
metadata_tracker.set_usage(
|
|
145
|
+
model.Usage(
|
|
146
|
+
input_tokens=event.response.usage.input_tokens,
|
|
147
|
+
output_tokens=event.response.usage.output_tokens,
|
|
148
|
+
cached_tokens=event.response.usage.input_tokens_details.cached_tokens,
|
|
149
|
+
reasoning_tokens=event.response.usage.output_tokens_details.reasoning_tokens,
|
|
150
|
+
context_size=event.response.usage.total_tokens,
|
|
151
|
+
context_limit=param.context_limit,
|
|
152
|
+
max_tokens=param.max_tokens,
|
|
153
|
+
)
|
|
154
|
+
)
|
|
155
|
+
metadata_tracker.set_model_name(str(param.model))
|
|
156
|
+
metadata_tracker.set_response_id(response_id)
|
|
157
|
+
yield metadata_tracker.finalize()
|
|
158
|
+
if event.response.status != "completed":
|
|
159
|
+
error_message = f"LLM response finished with status '{event.response.status}'"
|
|
160
|
+
if error_reason:
|
|
161
|
+
error_message = f"{error_message}: {error_reason}"
|
|
162
|
+
log_debug(
|
|
163
|
+
"[LLM status warning]",
|
|
164
|
+
error_message,
|
|
165
|
+
style="red",
|
|
166
|
+
debug_type=DebugType.LLM_STREAM,
|
|
167
|
+
)
|
|
168
|
+
yield model.StreamErrorItem(error=error_message)
|
|
169
|
+
case _:
|
|
170
|
+
log_debug(
|
|
171
|
+
"[Unhandled stream event]",
|
|
172
|
+
str(event),
|
|
173
|
+
style="red",
|
|
174
|
+
debug_type=DebugType.LLM_STREAM,
|
|
175
|
+
)
|
|
176
|
+
except (openai.OpenAIError, httpx.HTTPError) as e:
|
|
177
|
+
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
|
178
|
+
|
|
18
179
|
|
|
19
180
|
@register(llm_param.LLMClientProtocol.RESPONSES)
|
|
20
181
|
class ResponsesClient(LLMClientABC):
|
|
@@ -43,174 +204,26 @@ class ResponsesClient(LLMClientABC):
|
|
|
43
204
|
return cls(config)
|
|
44
205
|
|
|
45
206
|
@override
|
|
46
|
-
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem
|
|
207
|
+
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
|
|
47
208
|
param = apply_config_defaults(param, self.get_llm_config())
|
|
48
209
|
|
|
49
|
-
|
|
50
|
-
first_token_time: float | None = None
|
|
51
|
-
last_token_time: float | None = None
|
|
52
|
-
response_id: str | None = None
|
|
53
|
-
|
|
54
|
-
inputs = convert_history_to_input(param.input, param.model)
|
|
55
|
-
tools = convert_tool_schema(param.tools)
|
|
56
|
-
|
|
57
|
-
parallel_tool_calls = True
|
|
58
|
-
|
|
59
|
-
stream = call_with_logged_payload(
|
|
60
|
-
self.client.responses.create,
|
|
61
|
-
model=str(param.model),
|
|
62
|
-
tool_choice="auto",
|
|
63
|
-
parallel_tool_calls=parallel_tool_calls, # OpenAI's Codex is always False, we try to enable it here. It seems gpt-5-codex has bugs when parallel_tool_calls is True.
|
|
64
|
-
include=[
|
|
65
|
-
"reasoning.encrypted_content",
|
|
66
|
-
],
|
|
67
|
-
store=param.store,
|
|
68
|
-
previous_response_id=param.previous_response_id,
|
|
69
|
-
stream=True,
|
|
70
|
-
temperature=param.temperature,
|
|
71
|
-
max_output_tokens=param.max_tokens,
|
|
72
|
-
input=inputs,
|
|
73
|
-
instructions=param.system,
|
|
74
|
-
tools=tools,
|
|
75
|
-
text={
|
|
76
|
-
"verbosity": param.verbosity,
|
|
77
|
-
},
|
|
78
|
-
reasoning={
|
|
79
|
-
"effort": param.thinking.reasoning_effort,
|
|
80
|
-
"summary": param.thinking.reasoning_summary,
|
|
81
|
-
}
|
|
82
|
-
if param.thinking and param.thinking.reasoning_effort
|
|
83
|
-
else None,
|
|
84
|
-
extra_headers={"extra": json.dumps({"session_id": param.session_id})},
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
try:
|
|
88
|
-
async for event in await stream:
|
|
89
|
-
log_debug(
|
|
90
|
-
f"[{event.type}]",
|
|
91
|
-
event.model_dump_json(exclude_none=True),
|
|
92
|
-
style="blue",
|
|
93
|
-
debug_type=DebugType.LLM_STREAM,
|
|
94
|
-
)
|
|
95
|
-
match event:
|
|
96
|
-
case responses.ResponseCreatedEvent() as event:
|
|
97
|
-
response_id = event.response.id
|
|
98
|
-
yield model.StartItem(response_id=response_id)
|
|
99
|
-
case responses.ResponseReasoningSummaryTextDoneEvent() as event:
|
|
100
|
-
if event.text:
|
|
101
|
-
yield model.ReasoningTextItem(
|
|
102
|
-
content=event.text,
|
|
103
|
-
response_id=response_id,
|
|
104
|
-
model=str(param.model),
|
|
105
|
-
)
|
|
106
|
-
case responses.ResponseTextDeltaEvent() as event:
|
|
107
|
-
if first_token_time is None:
|
|
108
|
-
first_token_time = time.time()
|
|
109
|
-
last_token_time = time.time()
|
|
110
|
-
yield model.AssistantMessageDelta(content=event.delta, response_id=response_id)
|
|
111
|
-
case responses.ResponseOutputItemAddedEvent() as event:
|
|
112
|
-
if isinstance(event.item, responses.ResponseFunctionToolCall):
|
|
113
|
-
yield model.ToolCallStartItem(
|
|
114
|
-
response_id=response_id,
|
|
115
|
-
call_id=event.item.call_id,
|
|
116
|
-
name=event.item.name,
|
|
117
|
-
)
|
|
118
|
-
case responses.ResponseOutputItemDoneEvent() as event:
|
|
119
|
-
match event.item:
|
|
120
|
-
case responses.ResponseReasoningItem() as item:
|
|
121
|
-
if item.encrypted_content:
|
|
122
|
-
yield model.ReasoningEncryptedItem(
|
|
123
|
-
id=item.id,
|
|
124
|
-
encrypted_content=item.encrypted_content,
|
|
125
|
-
response_id=response_id,
|
|
126
|
-
model=str(param.model),
|
|
127
|
-
)
|
|
128
|
-
case responses.ResponseOutputMessage() as item:
|
|
129
|
-
yield model.AssistantMessageItem(
|
|
130
|
-
content="\n".join(
|
|
131
|
-
[
|
|
132
|
-
part.text
|
|
133
|
-
for part in item.content
|
|
134
|
-
if isinstance(part, responses.ResponseOutputText)
|
|
135
|
-
]
|
|
136
|
-
),
|
|
137
|
-
id=item.id,
|
|
138
|
-
response_id=response_id,
|
|
139
|
-
)
|
|
140
|
-
case responses.ResponseFunctionToolCall() as item:
|
|
141
|
-
if first_token_time is None:
|
|
142
|
-
first_token_time = time.time()
|
|
143
|
-
last_token_time = time.time()
|
|
144
|
-
yield model.ToolCallItem(
|
|
145
|
-
name=item.name,
|
|
146
|
-
arguments=item.arguments.strip(),
|
|
147
|
-
call_id=item.call_id,
|
|
148
|
-
id=item.id,
|
|
149
|
-
response_id=response_id,
|
|
150
|
-
)
|
|
151
|
-
case _:
|
|
152
|
-
pass
|
|
153
|
-
case responses.ResponseCompletedEvent() as event:
|
|
154
|
-
usage: model.Usage | None = None
|
|
155
|
-
error_reason: str | None = None
|
|
156
|
-
if event.response.incomplete_details is not None:
|
|
157
|
-
error_reason = event.response.incomplete_details.reason
|
|
158
|
-
if event.response.usage is not None:
|
|
159
|
-
total_tokens = event.response.usage.total_tokens
|
|
160
|
-
context_usage_percent = (
|
|
161
|
-
(total_tokens / param.context_limit) * 100 if param.context_limit else None
|
|
162
|
-
)
|
|
163
|
-
|
|
164
|
-
throughput_tps: float | None = None
|
|
165
|
-
first_token_latency_ms: float | None = None
|
|
210
|
+
metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
|
|
166
211
|
|
|
167
|
-
|
|
168
|
-
first_token_latency_ms = (first_token_time - request_start_time) * 1000
|
|
212
|
+
payload = build_payload(param)
|
|
169
213
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
214
|
+
log_debug(
|
|
215
|
+
json.dumps(payload, ensure_ascii=False, default=str),
|
|
216
|
+
style="yellow",
|
|
217
|
+
debug_type=DebugType.LLM_PAYLOAD,
|
|
218
|
+
)
|
|
219
|
+
try:
|
|
220
|
+
stream = await self.client.responses.create(
|
|
221
|
+
**payload,
|
|
222
|
+
extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
|
|
223
|
+
)
|
|
224
|
+
except (openai.OpenAIError, httpx.HTTPError) as e:
|
|
225
|
+
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
|
226
|
+
return
|
|
178
227
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
cached_tokens=event.response.usage.input_tokens_details.cached_tokens,
|
|
182
|
-
reasoning_tokens=event.response.usage.output_tokens_details.reasoning_tokens,
|
|
183
|
-
output_tokens=event.response.usage.output_tokens,
|
|
184
|
-
total_tokens=total_tokens,
|
|
185
|
-
context_usage_percent=context_usage_percent,
|
|
186
|
-
throughput_tps=throughput_tps,
|
|
187
|
-
first_token_latency_ms=first_token_latency_ms,
|
|
188
|
-
)
|
|
189
|
-
calculate_cost(usage, self._config.cost)
|
|
190
|
-
yield model.ResponseMetadataItem(
|
|
191
|
-
usage=usage,
|
|
192
|
-
response_id=response_id,
|
|
193
|
-
model_name=str(param.model),
|
|
194
|
-
status=event.response.status,
|
|
195
|
-
error_reason=error_reason,
|
|
196
|
-
)
|
|
197
|
-
if event.response.status != "completed":
|
|
198
|
-
error_message = f"LLM response finished with status '{event.response.status}'"
|
|
199
|
-
if error_reason:
|
|
200
|
-
error_message = f"{error_message}: {error_reason}"
|
|
201
|
-
log_debug(
|
|
202
|
-
"[LLM status warning]",
|
|
203
|
-
error_message,
|
|
204
|
-
style="red",
|
|
205
|
-
debug_type=DebugType.LLM_STREAM,
|
|
206
|
-
)
|
|
207
|
-
yield model.StreamErrorItem(error=error_message)
|
|
208
|
-
case _:
|
|
209
|
-
log_debug(
|
|
210
|
-
"[Unhandled stream event]",
|
|
211
|
-
str(event),
|
|
212
|
-
style="red",
|
|
213
|
-
debug_type=DebugType.LLM_STREAM,
|
|
214
|
-
)
|
|
215
|
-
except RateLimitError as e:
|
|
216
|
-
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {str(e)}")
|
|
228
|
+
async for item in parse_responses_stream(stream, param, metadata_tracker):
|
|
229
|
+
yield item
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# pyright: reportReturnType=false
|
|
2
2
|
# pyright: reportArgumentType=false
|
|
3
|
+
# pyright: reportAssignmentType=false
|
|
3
4
|
|
|
4
5
|
from typing import Any
|
|
5
6
|
|
|
@@ -34,7 +35,7 @@ def _build_tool_result_item(tool: model.ToolResultItem) -> responses.ResponseInp
|
|
|
34
35
|
"call_id": tool.call_id,
|
|
35
36
|
"output": content_parts,
|
|
36
37
|
}
|
|
37
|
-
return item
|
|
38
|
+
return item
|
|
38
39
|
|
|
39
40
|
|
|
40
41
|
def convert_history_to_input(
|
|
@@ -51,6 +52,7 @@ def convert_history_to_input(
|
|
|
51
52
|
items: list[responses.ResponseInputItemParam] = []
|
|
52
53
|
|
|
53
54
|
pending_reasoning_text: str | None = None
|
|
55
|
+
degraded_thinking_texts: list[str] = []
|
|
54
56
|
|
|
55
57
|
for item in history:
|
|
56
58
|
match item:
|
|
@@ -60,6 +62,9 @@ def convert_history_to_input(
|
|
|
60
62
|
# or we can choose to output it if the next item is NOT reasoning?
|
|
61
63
|
# For now, based on instructions, we pair them.
|
|
62
64
|
if model_name != item.model:
|
|
65
|
+
# Cross-model: collect thinking text for degradation
|
|
66
|
+
if item.content:
|
|
67
|
+
degraded_thinking_texts.append(item.content)
|
|
63
68
|
continue
|
|
64
69
|
pending_reasoning_text = item.content
|
|
65
70
|
|
|
@@ -130,6 +135,20 @@ def convert_history_to_input(
|
|
|
130
135
|
# Other items may be Metadata
|
|
131
136
|
continue
|
|
132
137
|
|
|
138
|
+
# Cross-model: degrade thinking to plain text with <thinking> tags
|
|
139
|
+
if degraded_thinking_texts:
|
|
140
|
+
degraded_item: responses.ResponseInputItemParam = {
|
|
141
|
+
"type": "message",
|
|
142
|
+
"role": "assistant",
|
|
143
|
+
"content": [
|
|
144
|
+
{
|
|
145
|
+
"type": "output_text",
|
|
146
|
+
"text": "<thinking>\n" + "\n".join(degraded_thinking_texts) + "\n</thinking>",
|
|
147
|
+
}
|
|
148
|
+
],
|
|
149
|
+
}
|
|
150
|
+
items.insert(0, degraded_item)
|
|
151
|
+
|
|
133
152
|
return items
|
|
134
153
|
|
|
135
154
|
|
klaude_code/llm/usage.py
CHANGED
|
@@ -14,6 +14,9 @@ def calculate_cost(usage: model.Usage, cost_config: llm_param.Cost | None) -> No
|
|
|
14
14
|
if cost_config is None:
|
|
15
15
|
return
|
|
16
16
|
|
|
17
|
+
# Set currency
|
|
18
|
+
usage.currency = cost_config.currency
|
|
19
|
+
|
|
17
20
|
# Non-cached input tokens cost
|
|
18
21
|
non_cached_input = usage.input_tokens - usage.cached_tokens
|
|
19
22
|
usage.input_cost = (non_cached_input / 1_000_000) * cost_config.input
|
|
@@ -24,9 +27,6 @@ def calculate_cost(usage: model.Usage, cost_config: llm_param.Cost | None) -> No
|
|
|
24
27
|
# Cache read cost
|
|
25
28
|
usage.cache_read_cost = (usage.cached_tokens / 1_000_000) * cost_config.cache_read
|
|
26
29
|
|
|
27
|
-
# Total cost
|
|
28
|
-
usage.total_cost = usage.input_cost + usage.output_cost + usage.cache_read_cost
|
|
29
|
-
|
|
30
30
|
|
|
31
31
|
class MetadataTracker:
|
|
32
32
|
"""Tracks timing and metadata for LLM responses."""
|
|
@@ -81,7 +81,7 @@ class MetadataTracker:
|
|
|
81
81
|
) * 1000
|
|
82
82
|
|
|
83
83
|
if self._last_token_time is not None and self._metadata_item.usage.output_tokens > 0:
|
|
84
|
-
time_duration = self._last_token_time - self.
|
|
84
|
+
time_duration = self._last_token_time - self._request_start_time
|
|
85
85
|
if time_duration >= 0.15:
|
|
86
86
|
self._metadata_item.usage.throughput_tps = self._metadata_item.usage.output_tokens / time_duration
|
|
87
87
|
|
|
@@ -92,18 +92,23 @@ class MetadataTracker:
|
|
|
92
92
|
return self._metadata_item
|
|
93
93
|
|
|
94
94
|
|
|
95
|
-
def convert_usage(
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
95
|
+
def convert_usage(
|
|
96
|
+
usage: openai.types.CompletionUsage,
|
|
97
|
+
context_limit: int | None = None,
|
|
98
|
+
max_tokens: int | None = None,
|
|
99
|
+
) -> model.Usage:
|
|
100
|
+
"""Convert OpenAI CompletionUsage to internal Usage model.
|
|
101
|
+
|
|
102
|
+
context_token is set to total_tokens from the API response,
|
|
103
|
+
representing the actual context window usage for this turn.
|
|
104
|
+
"""
|
|
99
105
|
return model.Usage(
|
|
100
106
|
input_tokens=usage.prompt_tokens,
|
|
101
107
|
cached_tokens=(usage.prompt_tokens_details.cached_tokens if usage.prompt_tokens_details else 0) or 0,
|
|
102
108
|
reasoning_tokens=(usage.completion_tokens_details.reasoning_tokens if usage.completion_tokens_details else 0)
|
|
103
109
|
or 0,
|
|
104
110
|
output_tokens=usage.completion_tokens,
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
first_token_latency_ms=None,
|
|
111
|
+
context_size=usage.total_tokens,
|
|
112
|
+
context_limit=context_limit,
|
|
113
|
+
max_tokens=max_tokens,
|
|
109
114
|
)
|
klaude_code/protocol/commands.py
CHANGED
|
@@ -1,9 +1,20 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
1
2
|
from enum import Enum
|
|
2
3
|
|
|
3
4
|
|
|
5
|
+
@dataclass(frozen=True, slots=True)
|
|
6
|
+
class CommandInfo:
|
|
7
|
+
"""Lightweight command metadata for UI purposes (no logic)."""
|
|
8
|
+
|
|
9
|
+
name: str
|
|
10
|
+
summary: str
|
|
11
|
+
support_addition_params: bool = False
|
|
12
|
+
placeholder: str = ""
|
|
13
|
+
|
|
14
|
+
|
|
4
15
|
class CommandName(str, Enum):
|
|
5
16
|
INIT = "init"
|
|
6
|
-
|
|
17
|
+
DEBUG = "debug"
|
|
7
18
|
HELP = "help"
|
|
8
19
|
MODEL = "model"
|
|
9
20
|
COMPACT = "compact"
|
|
@@ -11,7 +22,12 @@ class CommandName(str, Enum):
|
|
|
11
22
|
CLEAR = "clear"
|
|
12
23
|
TERMINAL_SETUP = "terminal-setup"
|
|
13
24
|
EXPORT = "export"
|
|
25
|
+
EXPORT_ONLINE = "export-online"
|
|
14
26
|
STATUS = "status"
|
|
27
|
+
RELEASE_NOTES = "release-notes"
|
|
28
|
+
THINKING = "thinking"
|
|
29
|
+
FORK_SESSION = "fork-session"
|
|
30
|
+
RESUME = "resume"
|
|
15
31
|
# PLAN and DOC are dynamically registered now, but kept here if needed for reference
|
|
16
32
|
# or we can remove them if no code explicitly imports them.
|
|
17
33
|
# PLAN = "plan"
|
klaude_code/protocol/events.py
CHANGED
|
@@ -16,6 +16,7 @@ class EndEvent(BaseModel):
|
|
|
16
16
|
class ErrorEvent(BaseModel):
|
|
17
17
|
error_message: str
|
|
18
18
|
can_retry: bool = False
|
|
19
|
+
session_id: str | None = None
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
class TaskStartEvent(BaseModel):
|
|
@@ -26,6 +27,7 @@ class TaskStartEvent(BaseModel):
|
|
|
26
27
|
class TaskFinishEvent(BaseModel):
|
|
27
28
|
session_id: str
|
|
28
29
|
task_result: str
|
|
30
|
+
has_structured_output: bool = False
|
|
29
31
|
|
|
30
32
|
|
|
31
33
|
class TurnStartEvent(BaseModel):
|
|
@@ -54,6 +56,12 @@ class ThinkingEvent(BaseModel):
|
|
|
54
56
|
content: str
|
|
55
57
|
|
|
56
58
|
|
|
59
|
+
class ThinkingDeltaEvent(BaseModel):
|
|
60
|
+
session_id: str
|
|
61
|
+
response_id: str | None = None
|
|
62
|
+
content: str
|
|
63
|
+
|
|
64
|
+
|
|
57
65
|
class AssistantMessageDeltaEvent(BaseModel):
|
|
58
66
|
session_id: str
|
|
59
67
|
response_id: str | None = None
|
|
@@ -79,7 +87,6 @@ class ToolCallEvent(BaseModel):
|
|
|
79
87
|
tool_call_id: str
|
|
80
88
|
tool_name: str
|
|
81
89
|
arguments: str
|
|
82
|
-
is_replay: bool = False
|
|
83
90
|
|
|
84
91
|
|
|
85
92
|
class ToolResultEvent(BaseModel):
|
|
@@ -90,16 +97,23 @@ class ToolResultEvent(BaseModel):
|
|
|
90
97
|
result: str
|
|
91
98
|
ui_extra: model.ToolResultUIExtra | None = None
|
|
92
99
|
status: Literal["success", "error"]
|
|
93
|
-
|
|
100
|
+
task_metadata: model.TaskMetadata | None = None # Sub-agent task metadata
|
|
94
101
|
|
|
95
102
|
|
|
96
103
|
class ResponseMetadataEvent(BaseModel):
|
|
97
|
-
"""
|
|
104
|
+
"""Internal event for turn-level metadata. Not exposed to UI directly."""
|
|
98
105
|
|
|
99
106
|
session_id: str
|
|
100
107
|
metadata: model.ResponseMetadataItem
|
|
101
108
|
|
|
102
109
|
|
|
110
|
+
class TaskMetadataEvent(BaseModel):
|
|
111
|
+
"""Task-level aggregated metadata for UI display."""
|
|
112
|
+
|
|
113
|
+
session_id: str
|
|
114
|
+
metadata: model.TaskMetadataItem
|
|
115
|
+
|
|
116
|
+
|
|
103
117
|
class UserMessageEvent(BaseModel):
|
|
104
118
|
session_id: str
|
|
105
119
|
content: str
|
|
@@ -120,16 +134,26 @@ class TodoChangeEvent(BaseModel):
|
|
|
120
134
|
todos: list[model.TodoItem]
|
|
121
135
|
|
|
122
136
|
|
|
137
|
+
class ContextUsageEvent(BaseModel):
|
|
138
|
+
"""Real-time context usage update during task execution."""
|
|
139
|
+
|
|
140
|
+
session_id: str
|
|
141
|
+
context_percent: float # Context usage percentage (0-100)
|
|
142
|
+
|
|
143
|
+
|
|
123
144
|
HistoryItemEvent = (
|
|
124
145
|
ThinkingEvent
|
|
146
|
+
| TaskStartEvent
|
|
147
|
+
| TaskFinishEvent
|
|
125
148
|
| TurnStartEvent # This event is used for UI to print new empty line
|
|
126
149
|
| AssistantMessageEvent
|
|
127
150
|
| ToolCallEvent
|
|
128
151
|
| ToolResultEvent
|
|
129
152
|
| UserMessageEvent
|
|
130
|
-
|
|
|
153
|
+
| TaskMetadataEvent
|
|
131
154
|
| InterruptEvent
|
|
132
155
|
| DeveloperMessageEvent
|
|
156
|
+
| ErrorEvent
|
|
133
157
|
)
|
|
134
158
|
|
|
135
159
|
|
|
@@ -144,11 +168,13 @@ Event = (
|
|
|
144
168
|
TaskStartEvent
|
|
145
169
|
| TaskFinishEvent
|
|
146
170
|
| ThinkingEvent
|
|
171
|
+
| ThinkingDeltaEvent
|
|
147
172
|
| AssistantMessageDeltaEvent
|
|
148
173
|
| AssistantMessageEvent
|
|
149
174
|
| ToolCallEvent
|
|
150
175
|
| ToolResultEvent
|
|
151
176
|
| ResponseMetadataEvent
|
|
177
|
+
| TaskMetadataEvent
|
|
152
178
|
| ReplayHistoryEvent
|
|
153
179
|
| ErrorEvent
|
|
154
180
|
| EndEvent
|
|
@@ -160,4 +186,5 @@ Event = (
|
|
|
160
186
|
| TurnStartEvent
|
|
161
187
|
| TurnEndEvent
|
|
162
188
|
| TurnToolCallStartEvent
|
|
189
|
+
| ContextUsageEvent
|
|
163
190
|
)
|