klaude-code 2.4.2__py3-none-any.whl → 2.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- klaude_code/app/runtime.py +2 -6
- klaude_code/cli/main.py +0 -1
- klaude_code/config/assets/builtin_config.yaml +7 -0
- klaude_code/const.py +7 -4
- klaude_code/core/agent.py +10 -1
- klaude_code/core/agent_profile.py +47 -35
- klaude_code/core/executor.py +6 -21
- klaude_code/core/manager/sub_agent_manager.py +17 -1
- klaude_code/core/prompts/prompt-sub-agent-web.md +4 -4
- klaude_code/core/task.py +65 -4
- klaude_code/core/tool/__init__.py +0 -5
- klaude_code/core/tool/context.py +12 -1
- klaude_code/core/tool/offload.py +311 -0
- klaude_code/core/tool/shell/bash_tool.md +1 -43
- klaude_code/core/tool/sub_agent_tool.py +1 -0
- klaude_code/core/tool/todo/todo_write_tool.md +0 -23
- klaude_code/core/tool/tool_runner.py +14 -9
- klaude_code/core/tool/web/web_fetch_tool.md +1 -1
- klaude_code/core/tool/web/web_fetch_tool.py +14 -39
- klaude_code/core/turn.py +128 -139
- klaude_code/llm/anthropic/client.py +176 -82
- klaude_code/llm/bedrock/client.py +8 -12
- klaude_code/llm/claude/client.py +11 -15
- klaude_code/llm/client.py +31 -4
- klaude_code/llm/codex/client.py +7 -11
- klaude_code/llm/google/client.py +150 -69
- klaude_code/llm/openai_compatible/client.py +10 -15
- klaude_code/llm/openai_compatible/stream.py +68 -6
- klaude_code/llm/openrouter/client.py +9 -15
- klaude_code/llm/partial_message.py +35 -0
- klaude_code/llm/responses/client.py +134 -68
- klaude_code/llm/usage.py +30 -0
- klaude_code/protocol/commands.py +0 -4
- klaude_code/protocol/events/metadata.py +1 -0
- klaude_code/protocol/events/system.py +0 -4
- klaude_code/protocol/model.py +2 -15
- klaude_code/protocol/sub_agent/explore.py +0 -10
- klaude_code/protocol/sub_agent/image_gen.py +0 -7
- klaude_code/protocol/sub_agent/task.py +0 -10
- klaude_code/protocol/sub_agent/web.py +4 -12
- klaude_code/session/templates/export_session.html +4 -4
- klaude_code/skill/manager.py +2 -1
- klaude_code/tui/components/metadata.py +41 -49
- klaude_code/tui/components/rich/markdown.py +1 -3
- klaude_code/tui/components/rich/theme.py +2 -2
- klaude_code/tui/components/tools.py +0 -31
- klaude_code/tui/components/welcome.py +1 -32
- klaude_code/tui/input/prompt_toolkit.py +25 -9
- klaude_code/tui/machine.py +2 -1
- {klaude_code-2.4.2.dist-info → klaude_code-2.5.0.dist-info}/METADATA +1 -1
- {klaude_code-2.4.2.dist-info → klaude_code-2.5.0.dist-info}/RECORD +53 -53
- klaude_code/core/prompts/prompt-nano-banana.md +0 -1
- klaude_code/core/tool/truncation.py +0 -203
- {klaude_code-2.4.2.dist-info → klaude_code-2.5.0.dist-info}/WHEEL +0 -0
- {klaude_code-2.4.2.dist-info → klaude_code-2.5.0.dist-info}/entry_points.txt +0 -0
|
@@ -31,10 +31,11 @@ from klaude_code.const import (
|
|
|
31
31
|
LLM_HTTP_TIMEOUT_TOTAL,
|
|
32
32
|
)
|
|
33
33
|
from klaude_code.llm.anthropic.input import convert_history_to_input, convert_system_to_input, convert_tool_schema
|
|
34
|
-
from klaude_code.llm.client import LLMClientABC
|
|
34
|
+
from klaude_code.llm.client import LLMClientABC, LLMStreamABC
|
|
35
35
|
from klaude_code.llm.input_common import apply_config_defaults
|
|
36
|
+
from klaude_code.llm.partial_message import degrade_thinking_to_text
|
|
36
37
|
from klaude_code.llm.registry import register
|
|
37
|
-
from klaude_code.llm.usage import MetadataTracker,
|
|
38
|
+
from klaude_code.llm.usage import MetadataTracker, error_llm_stream
|
|
38
39
|
from klaude_code.log import DebugType, log_debug
|
|
39
40
|
from klaude_code.protocol import llm_param, message, model
|
|
40
41
|
|
|
@@ -54,6 +55,94 @@ def _map_anthropic_stop_reason(reason: str) -> model.StopReason | None:
|
|
|
54
55
|
return mapping.get(reason)
|
|
55
56
|
|
|
56
57
|
|
|
58
|
+
class AnthropicStreamStateManager:
|
|
59
|
+
"""Manages streaming state for Anthropic API responses.
|
|
60
|
+
|
|
61
|
+
Accumulates thinking, content, and tool call parts during streaming
|
|
62
|
+
to support partial message retrieval on cancellation.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
def __init__(self, model_id: str) -> None:
|
|
66
|
+
self.model_id = model_id
|
|
67
|
+
self.accumulated_thinking: list[str] = []
|
|
68
|
+
self.accumulated_content: list[str] = []
|
|
69
|
+
self.parts: list[message.Part] = []
|
|
70
|
+
self.response_id: str | None = None
|
|
71
|
+
self.pending_signature: str | None = None
|
|
72
|
+
self.stop_reason: model.StopReason | None = None
|
|
73
|
+
|
|
74
|
+
# Tool call state
|
|
75
|
+
self.current_tool_name: str | None = None
|
|
76
|
+
self.current_tool_call_id: str | None = None
|
|
77
|
+
self.current_tool_inputs: list[str] | None = None
|
|
78
|
+
|
|
79
|
+
# Token tracking
|
|
80
|
+
self.input_token: int = 0
|
|
81
|
+
self.cached_token: int = 0
|
|
82
|
+
|
|
83
|
+
def flush_thinking(self) -> None:
|
|
84
|
+
"""Flush accumulated thinking content into parts."""
|
|
85
|
+
if not self.accumulated_thinking:
|
|
86
|
+
return
|
|
87
|
+
full_thinking = "".join(self.accumulated_thinking)
|
|
88
|
+
self.parts.append(message.ThinkingTextPart(text=full_thinking, model_id=self.model_id))
|
|
89
|
+
if self.pending_signature:
|
|
90
|
+
self.parts.append(
|
|
91
|
+
message.ThinkingSignaturePart(
|
|
92
|
+
signature=self.pending_signature,
|
|
93
|
+
model_id=self.model_id,
|
|
94
|
+
format="anthropic",
|
|
95
|
+
)
|
|
96
|
+
)
|
|
97
|
+
self.accumulated_thinking.clear()
|
|
98
|
+
self.pending_signature = None
|
|
99
|
+
|
|
100
|
+
def flush_content(self) -> None:
|
|
101
|
+
"""Flush accumulated content into parts."""
|
|
102
|
+
if not self.accumulated_content:
|
|
103
|
+
return
|
|
104
|
+
self.parts.append(message.TextPart(text="".join(self.accumulated_content)))
|
|
105
|
+
self.accumulated_content.clear()
|
|
106
|
+
|
|
107
|
+
def flush_tool_call(self) -> None:
|
|
108
|
+
"""Flush current tool call into parts."""
|
|
109
|
+
if self.current_tool_name and self.current_tool_call_id:
|
|
110
|
+
self.parts.append(
|
|
111
|
+
message.ToolCallPart(
|
|
112
|
+
call_id=self.current_tool_call_id,
|
|
113
|
+
tool_name=self.current_tool_name,
|
|
114
|
+
arguments_json="".join(self.current_tool_inputs) if self.current_tool_inputs else "",
|
|
115
|
+
)
|
|
116
|
+
)
|
|
117
|
+
self.current_tool_name = None
|
|
118
|
+
self.current_tool_call_id = None
|
|
119
|
+
self.current_tool_inputs = None
|
|
120
|
+
|
|
121
|
+
def flush_all(self) -> list[message.Part]:
|
|
122
|
+
"""Flush all accumulated content in order and return parts."""
|
|
123
|
+
self.flush_thinking()
|
|
124
|
+
self.flush_content()
|
|
125
|
+
self.flush_tool_call()
|
|
126
|
+
return list(self.parts)
|
|
127
|
+
|
|
128
|
+
def get_partial_message(self) -> message.AssistantMessage | None:
|
|
129
|
+
"""Build a partial AssistantMessage from accumulated state.
|
|
130
|
+
|
|
131
|
+
Flushes all accumulated content and returns the message with
|
|
132
|
+
stop_reason="aborted". Returns None if no content has been accumulated.
|
|
133
|
+
"""
|
|
134
|
+
self.flush_thinking()
|
|
135
|
+
self.flush_content()
|
|
136
|
+
parts = degrade_thinking_to_text(list(self.parts))
|
|
137
|
+
if not parts:
|
|
138
|
+
return None
|
|
139
|
+
return message.AssistantMessage(
|
|
140
|
+
parts=parts,
|
|
141
|
+
response_id=self.response_id,
|
|
142
|
+
stop_reason="aborted",
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
|
|
57
146
|
def build_payload(
|
|
58
147
|
param: llm_param.LLMCallParameter,
|
|
59
148
|
*,
|
|
@@ -113,22 +202,13 @@ async def parse_anthropic_stream(
|
|
|
113
202
|
stream: Any,
|
|
114
203
|
param: llm_param.LLMCallParameter,
|
|
115
204
|
metadata_tracker: MetadataTracker,
|
|
205
|
+
state: AnthropicStreamStateManager,
|
|
116
206
|
) -> AsyncGenerator[message.LLMStreamItem]:
|
|
117
|
-
"""Parse Anthropic beta messages stream and yield stream items.
|
|
118
|
-
accumulated_thinking: list[str] = []
|
|
119
|
-
accumulated_content: list[str] = []
|
|
120
|
-
parts: list[message.Part] = []
|
|
121
|
-
response_id: str | None = None
|
|
122
|
-
stop_reason: model.StopReason | None = None
|
|
123
|
-
pending_signature: str | None = None
|
|
124
|
-
|
|
125
|
-
current_tool_name: str | None = None
|
|
126
|
-
current_tool_call_id: str | None = None
|
|
127
|
-
current_tool_inputs: list[str] | None = None
|
|
128
|
-
|
|
129
|
-
input_token = 0
|
|
130
|
-
cached_token = 0
|
|
207
|
+
"""Parse Anthropic beta messages stream and yield stream items.
|
|
131
208
|
|
|
209
|
+
The state parameter allows external access to accumulated content
|
|
210
|
+
for cancellation scenarios.
|
|
211
|
+
"""
|
|
132
212
|
async for event in await stream:
|
|
133
213
|
log_debug(
|
|
134
214
|
f"[{event.type}]",
|
|
@@ -138,34 +218,33 @@ async def parse_anthropic_stream(
|
|
|
138
218
|
)
|
|
139
219
|
match event:
|
|
140
220
|
case BetaRawMessageStartEvent() as event:
|
|
141
|
-
response_id = event.message.id
|
|
142
|
-
cached_token = event.message.usage.cache_read_input_tokens or 0
|
|
143
|
-
input_token = event.message.usage.input_tokens
|
|
221
|
+
state.response_id = event.message.id
|
|
222
|
+
state.cached_token = event.message.usage.cache_read_input_tokens or 0
|
|
223
|
+
state.input_token = event.message.usage.input_tokens
|
|
144
224
|
case BetaRawContentBlockDeltaEvent() as event:
|
|
145
225
|
match event.delta:
|
|
146
226
|
case BetaThinkingDelta() as delta:
|
|
147
227
|
if delta.thinking:
|
|
148
228
|
metadata_tracker.record_token()
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
229
|
+
state.accumulated_thinking.append(delta.thinking)
|
|
230
|
+
yield message.ThinkingTextDelta(
|
|
231
|
+
content=delta.thinking,
|
|
232
|
+
response_id=state.response_id,
|
|
233
|
+
)
|
|
154
234
|
case BetaSignatureDelta() as delta:
|
|
155
|
-
pending_signature = delta.signature
|
|
235
|
+
state.pending_signature = delta.signature
|
|
156
236
|
case BetaTextDelta() as delta:
|
|
157
237
|
if delta.text:
|
|
158
238
|
metadata_tracker.record_token()
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
239
|
+
state.accumulated_content.append(delta.text)
|
|
240
|
+
yield message.AssistantTextDelta(
|
|
241
|
+
content=delta.text,
|
|
242
|
+
response_id=state.response_id,
|
|
243
|
+
)
|
|
164
244
|
case BetaInputJSONDelta() as delta:
|
|
165
|
-
if current_tool_inputs is not None:
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
current_tool_inputs.append(delta.partial_json)
|
|
245
|
+
if state.current_tool_inputs is not None and delta.partial_json:
|
|
246
|
+
metadata_tracker.record_token()
|
|
247
|
+
state.current_tool_inputs.append(delta.partial_json)
|
|
169
248
|
case _:
|
|
170
249
|
pass
|
|
171
250
|
case BetaRawContentBlockStartEvent() as event:
|
|
@@ -173,74 +252,92 @@ async def parse_anthropic_stream(
|
|
|
173
252
|
case BetaToolUseBlock() as block:
|
|
174
253
|
metadata_tracker.record_token()
|
|
175
254
|
yield message.ToolCallStartDelta(
|
|
176
|
-
response_id=response_id,
|
|
255
|
+
response_id=state.response_id,
|
|
177
256
|
call_id=block.id,
|
|
178
257
|
name=block.name,
|
|
179
258
|
)
|
|
180
|
-
current_tool_name = block.name
|
|
181
|
-
current_tool_call_id = block.id
|
|
182
|
-
current_tool_inputs = []
|
|
259
|
+
state.current_tool_name = block.name
|
|
260
|
+
state.current_tool_call_id = block.id
|
|
261
|
+
state.current_tool_inputs = []
|
|
183
262
|
case _:
|
|
184
263
|
pass
|
|
185
264
|
case BetaRawContentBlockStopEvent():
|
|
186
|
-
if accumulated_thinking:
|
|
265
|
+
if state.accumulated_thinking:
|
|
187
266
|
metadata_tracker.record_token()
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
if pending_signature:
|
|
191
|
-
parts.append(
|
|
192
|
-
message.ThinkingSignaturePart(
|
|
193
|
-
signature=pending_signature,
|
|
194
|
-
model_id=str(param.model_id),
|
|
195
|
-
format="anthropic",
|
|
196
|
-
)
|
|
197
|
-
)
|
|
198
|
-
accumulated_thinking.clear()
|
|
199
|
-
pending_signature = None
|
|
200
|
-
if accumulated_content:
|
|
267
|
+
state.flush_thinking()
|
|
268
|
+
if state.accumulated_content:
|
|
201
269
|
metadata_tracker.record_token()
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
if current_tool_name and current_tool_call_id:
|
|
270
|
+
state.flush_content()
|
|
271
|
+
if state.current_tool_name and state.current_tool_call_id:
|
|
205
272
|
metadata_tracker.record_token()
|
|
206
|
-
|
|
207
|
-
message.ToolCallPart(
|
|
208
|
-
call_id=current_tool_call_id,
|
|
209
|
-
tool_name=current_tool_name,
|
|
210
|
-
arguments_json="".join(current_tool_inputs) if current_tool_inputs else "",
|
|
211
|
-
)
|
|
212
|
-
)
|
|
213
|
-
current_tool_name = None
|
|
214
|
-
current_tool_call_id = None
|
|
215
|
-
current_tool_inputs = None
|
|
273
|
+
state.flush_tool_call()
|
|
216
274
|
case BetaRawMessageDeltaEvent() as event:
|
|
217
275
|
metadata_tracker.set_usage(
|
|
218
276
|
model.Usage(
|
|
219
|
-
input_tokens=input_token + cached_token,
|
|
277
|
+
input_tokens=state.input_token + state.cached_token,
|
|
220
278
|
output_tokens=event.usage.output_tokens,
|
|
221
|
-
cached_tokens=cached_token,
|
|
222
|
-
context_size=input_token + cached_token + event.usage.output_tokens,
|
|
279
|
+
cached_tokens=state.cached_token,
|
|
280
|
+
context_size=state.input_token + state.cached_token + event.usage.output_tokens,
|
|
223
281
|
context_limit=param.context_limit,
|
|
224
282
|
max_tokens=param.max_tokens,
|
|
225
283
|
)
|
|
226
284
|
)
|
|
227
285
|
metadata_tracker.set_model_name(str(param.model_id))
|
|
228
|
-
metadata_tracker.set_response_id(response_id)
|
|
286
|
+
metadata_tracker.set_response_id(state.response_id)
|
|
229
287
|
raw_stop_reason = getattr(event, "stop_reason", None)
|
|
230
288
|
if isinstance(raw_stop_reason, str):
|
|
231
|
-
stop_reason = _map_anthropic_stop_reason(raw_stop_reason)
|
|
289
|
+
state.stop_reason = _map_anthropic_stop_reason(raw_stop_reason)
|
|
232
290
|
case _:
|
|
233
291
|
pass
|
|
234
292
|
|
|
293
|
+
parts = state.flush_all()
|
|
294
|
+
if parts:
|
|
295
|
+
metadata_tracker.record_token()
|
|
235
296
|
metadata = metadata_tracker.finalize()
|
|
236
297
|
yield message.AssistantMessage(
|
|
237
298
|
parts=parts,
|
|
238
|
-
response_id=response_id,
|
|
299
|
+
response_id=state.response_id,
|
|
239
300
|
usage=metadata,
|
|
240
|
-
stop_reason=stop_reason,
|
|
301
|
+
stop_reason=state.stop_reason,
|
|
241
302
|
)
|
|
242
303
|
|
|
243
304
|
|
|
305
|
+
class AnthropicLLMStream(LLMStreamABC):
|
|
306
|
+
"""LLMStream implementation for Anthropic-compatible clients."""
|
|
307
|
+
|
|
308
|
+
def __init__(
|
|
309
|
+
self,
|
|
310
|
+
stream: Any,
|
|
311
|
+
*,
|
|
312
|
+
param: llm_param.LLMCallParameter,
|
|
313
|
+
metadata_tracker: MetadataTracker,
|
|
314
|
+
) -> None:
|
|
315
|
+
self._stream = stream
|
|
316
|
+
self._param = param
|
|
317
|
+
self._metadata_tracker = metadata_tracker
|
|
318
|
+
self._state = AnthropicStreamStateManager(model_id=str(param.model_id))
|
|
319
|
+
self._completed = False
|
|
320
|
+
|
|
321
|
+
def __aiter__(self) -> AsyncGenerator[message.LLMStreamItem]:
|
|
322
|
+
return self._iterate()
|
|
323
|
+
|
|
324
|
+
async def _iterate(self) -> AsyncGenerator[message.LLMStreamItem]:
|
|
325
|
+
async for item in parse_anthropic_stream(
|
|
326
|
+
self._stream,
|
|
327
|
+
self._param,
|
|
328
|
+
self._metadata_tracker,
|
|
329
|
+
self._state,
|
|
330
|
+
):
|
|
331
|
+
if isinstance(item, message.AssistantMessage):
|
|
332
|
+
self._completed = True
|
|
333
|
+
yield item
|
|
334
|
+
|
|
335
|
+
def get_partial_message(self) -> message.AssistantMessage | None:
|
|
336
|
+
if self._completed:
|
|
337
|
+
return None
|
|
338
|
+
return self._state.get_partial_message()
|
|
339
|
+
|
|
340
|
+
|
|
244
341
|
@register(llm_param.LLMClientProtocol.ANTHROPIC)
|
|
245
342
|
class AnthropicClient(LLMClientABC):
|
|
246
343
|
def __init__(self, config: llm_param.LLMConfigParameter):
|
|
@@ -269,7 +366,7 @@ class AnthropicClient(LLMClientABC):
|
|
|
269
366
|
return cls(config)
|
|
270
367
|
|
|
271
368
|
@override
|
|
272
|
-
async def call(self, param: llm_param.LLMCallParameter) ->
|
|
369
|
+
async def call(self, param: llm_param.LLMCallParameter) -> LLMStreamABC:
|
|
273
370
|
param = apply_config_defaults(param, self.get_llm_config())
|
|
274
371
|
|
|
275
372
|
metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
|
|
@@ -282,15 +379,12 @@ class AnthropicClient(LLMClientABC):
|
|
|
282
379
|
debug_type=DebugType.LLM_PAYLOAD,
|
|
283
380
|
)
|
|
284
381
|
|
|
285
|
-
stream = self.client.beta.messages.create(
|
|
286
|
-
**payload,
|
|
287
|
-
extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
|
|
288
|
-
)
|
|
289
|
-
|
|
290
382
|
try:
|
|
291
|
-
|
|
292
|
-
|
|
383
|
+
stream = self.client.beta.messages.create(
|
|
384
|
+
**payload,
|
|
385
|
+
extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
|
|
386
|
+
)
|
|
387
|
+
return AnthropicLLMStream(stream, param=param, metadata_tracker=metadata_tracker)
|
|
293
388
|
except (APIError, httpx.HTTPError) as e:
|
|
294
389
|
error_message = f"{e.__class__.__name__} {e!s}"
|
|
295
|
-
|
|
296
|
-
yield item
|
|
390
|
+
return error_llm_stream(metadata_tracker, error=error_message)
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""AWS Bedrock LLM client using Anthropic SDK."""
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
|
-
from collections.abc import AsyncGenerator
|
|
5
4
|
from typing import override
|
|
6
5
|
|
|
7
6
|
import anthropic
|
|
@@ -9,13 +8,13 @@ import httpx
|
|
|
9
8
|
from anthropic import APIError
|
|
10
9
|
|
|
11
10
|
from klaude_code.const import LLM_HTTP_TIMEOUT_CONNECT, LLM_HTTP_TIMEOUT_READ, LLM_HTTP_TIMEOUT_TOTAL
|
|
12
|
-
from klaude_code.llm.anthropic.client import
|
|
13
|
-
from klaude_code.llm.client import LLMClientABC
|
|
11
|
+
from klaude_code.llm.anthropic.client import AnthropicLLMStream, build_payload
|
|
12
|
+
from klaude_code.llm.client import LLMClientABC, LLMStreamABC
|
|
14
13
|
from klaude_code.llm.input_common import apply_config_defaults
|
|
15
14
|
from klaude_code.llm.registry import register
|
|
16
|
-
from klaude_code.llm.usage import MetadataTracker,
|
|
15
|
+
from klaude_code.llm.usage import MetadataTracker, error_llm_stream
|
|
17
16
|
from klaude_code.log import DebugType, log_debug
|
|
18
|
-
from klaude_code.protocol import llm_param
|
|
17
|
+
from klaude_code.protocol import llm_param
|
|
19
18
|
|
|
20
19
|
|
|
21
20
|
@register(llm_param.LLMClientProtocol.BEDROCK)
|
|
@@ -39,7 +38,7 @@ class BedrockClient(LLMClientABC):
|
|
|
39
38
|
return cls(config)
|
|
40
39
|
|
|
41
40
|
@override
|
|
42
|
-
async def call(self, param: llm_param.LLMCallParameter) ->
|
|
41
|
+
async def call(self, param: llm_param.LLMCallParameter) -> LLMStreamABC:
|
|
43
42
|
param = apply_config_defaults(param, self.get_llm_config())
|
|
44
43
|
|
|
45
44
|
metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
|
|
@@ -52,12 +51,9 @@ class BedrockClient(LLMClientABC):
|
|
|
52
51
|
debug_type=DebugType.LLM_PAYLOAD,
|
|
53
52
|
)
|
|
54
53
|
|
|
55
|
-
stream = self.client.beta.messages.create(**payload)
|
|
56
|
-
|
|
57
54
|
try:
|
|
58
|
-
|
|
59
|
-
|
|
55
|
+
stream = self.client.beta.messages.create(**payload)
|
|
56
|
+
return AnthropicLLMStream(stream, param=param, metadata_tracker=metadata_tracker)
|
|
60
57
|
except (APIError, httpx.HTTPError) as e:
|
|
61
58
|
error_message = f"{e.__class__.__name__} {e!s}"
|
|
62
|
-
|
|
63
|
-
yield item
|
|
59
|
+
return error_llm_stream(metadata_tracker, error=error_message)
|
klaude_code/llm/claude/client.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import json
|
|
2
|
-
from collections.abc import AsyncGenerator
|
|
3
2
|
from typing import override
|
|
4
3
|
|
|
5
4
|
import anthropic
|
|
@@ -17,13 +16,13 @@ from klaude_code.const import (
|
|
|
17
16
|
LLM_HTTP_TIMEOUT_READ,
|
|
18
17
|
LLM_HTTP_TIMEOUT_TOTAL,
|
|
19
18
|
)
|
|
20
|
-
from klaude_code.llm.anthropic.client import
|
|
21
|
-
from klaude_code.llm.client import LLMClientABC
|
|
19
|
+
from klaude_code.llm.anthropic.client import AnthropicLLMStream, build_payload
|
|
20
|
+
from klaude_code.llm.client import LLMClientABC, LLMStreamABC
|
|
22
21
|
from klaude_code.llm.input_common import apply_config_defaults
|
|
23
22
|
from klaude_code.llm.registry import register
|
|
24
|
-
from klaude_code.llm.usage import MetadataTracker,
|
|
23
|
+
from klaude_code.llm.usage import MetadataTracker, error_llm_stream
|
|
25
24
|
from klaude_code.log import DebugType, log_debug
|
|
26
|
-
from klaude_code.protocol import llm_param
|
|
25
|
+
from klaude_code.protocol import llm_param
|
|
27
26
|
|
|
28
27
|
_CLAUDE_OAUTH_REQUIRED_BETAS: tuple[str, ...] = (
|
|
29
28
|
ANTHROPIC_BETA_OAUTH,
|
|
@@ -71,7 +70,7 @@ class ClaudeClient(LLMClientABC):
|
|
|
71
70
|
return cls(config)
|
|
72
71
|
|
|
73
72
|
@override
|
|
74
|
-
async def call(self, param: llm_param.LLMCallParameter) ->
|
|
73
|
+
async def call(self, param: llm_param.LLMCallParameter) -> LLMStreamABC:
|
|
75
74
|
self._ensure_valid_token()
|
|
76
75
|
param = apply_config_defaults(param, self.get_llm_config())
|
|
77
76
|
|
|
@@ -91,15 +90,12 @@ class ClaudeClient(LLMClientABC):
|
|
|
91
90
|
debug_type=DebugType.LLM_PAYLOAD,
|
|
92
91
|
)
|
|
93
92
|
|
|
94
|
-
stream = self.client.beta.messages.create(
|
|
95
|
-
**payload,
|
|
96
|
-
extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
|
|
97
|
-
)
|
|
98
|
-
|
|
99
93
|
try:
|
|
100
|
-
|
|
101
|
-
|
|
94
|
+
stream = self.client.beta.messages.create(
|
|
95
|
+
**payload,
|
|
96
|
+
extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
|
|
97
|
+
)
|
|
98
|
+
return AnthropicLLMStream(stream, param=param, metadata_tracker=metadata_tracker)
|
|
102
99
|
except (APIError, httpx.HTTPError) as e:
|
|
103
100
|
error_message = f"{e.__class__.__name__} {e!s}"
|
|
104
|
-
|
|
105
|
-
yield item
|
|
101
|
+
return error_llm_stream(metadata_tracker, error=error_message)
|
klaude_code/llm/client.py
CHANGED
|
@@ -1,10 +1,34 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
2
|
from collections.abc import AsyncGenerator
|
|
3
|
-
from typing import ParamSpec, TypeVar
|
|
3
|
+
from typing import ParamSpec, TypeVar
|
|
4
4
|
|
|
5
5
|
from klaude_code.protocol import llm_param, message
|
|
6
6
|
|
|
7
7
|
|
|
8
|
+
class LLMStreamABC(ABC):
|
|
9
|
+
"""Abstract base class for LLM streaming response with state access.
|
|
10
|
+
|
|
11
|
+
Provides both async iteration over stream items and access to accumulated
|
|
12
|
+
message state for cancellation scenarios.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
@abstractmethod
|
|
16
|
+
def __aiter__(self) -> AsyncGenerator[message.LLMStreamItem]:
|
|
17
|
+
"""Iterate over stream items."""
|
|
18
|
+
...
|
|
19
|
+
|
|
20
|
+
@abstractmethod
|
|
21
|
+
def get_partial_message(self) -> message.AssistantMessage | None:
|
|
22
|
+
"""Get accumulated message for cancel scenarios.
|
|
23
|
+
|
|
24
|
+
Returns the message constructed from accumulated parts so far,
|
|
25
|
+
including thinking and assistant text. Returns None if no content
|
|
26
|
+
has been accumulated yet.
|
|
27
|
+
|
|
28
|
+
"""
|
|
29
|
+
...
|
|
30
|
+
|
|
31
|
+
|
|
8
32
|
class LLMClientABC(ABC):
|
|
9
33
|
def __init__(self, config: llm_param.LLMConfigParameter) -> None:
|
|
10
34
|
self._config = config
|
|
@@ -15,9 +39,12 @@ class LLMClientABC(ABC):
|
|
|
15
39
|
pass
|
|
16
40
|
|
|
17
41
|
@abstractmethod
|
|
18
|
-
async def call(self, param: llm_param.LLMCallParameter) ->
|
|
19
|
-
|
|
20
|
-
|
|
42
|
+
async def call(self, param: llm_param.LLMCallParameter) -> LLMStreamABC:
|
|
43
|
+
"""Start an LLM call and return a stream object.
|
|
44
|
+
|
|
45
|
+
The returned stream can be iterated to receive stream items,
|
|
46
|
+
and provides get_partial_message() for cancellation scenarios.
|
|
47
|
+
"""
|
|
21
48
|
raise NotImplementedError
|
|
22
49
|
|
|
23
50
|
def get_llm_config(self) -> llm_param.LLMConfigParameter:
|
klaude_code/llm/codex/client.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""Codex LLM client using ChatGPT subscription via OAuth."""
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
|
-
from collections.abc import AsyncGenerator
|
|
5
4
|
from typing import override
|
|
6
5
|
|
|
7
6
|
import httpx
|
|
@@ -19,14 +18,14 @@ from klaude_code.const import (
|
|
|
19
18
|
LLM_HTTP_TIMEOUT_READ,
|
|
20
19
|
LLM_HTTP_TIMEOUT_TOTAL,
|
|
21
20
|
)
|
|
22
|
-
from klaude_code.llm.client import LLMClientABC
|
|
21
|
+
from klaude_code.llm.client import LLMClientABC, LLMStreamABC
|
|
23
22
|
from klaude_code.llm.input_common import apply_config_defaults
|
|
24
23
|
from klaude_code.llm.registry import register
|
|
25
|
-
from klaude_code.llm.responses.client import
|
|
24
|
+
from klaude_code.llm.responses.client import ResponsesLLMStream
|
|
26
25
|
from klaude_code.llm.responses.input import convert_history_to_input, convert_tool_schema
|
|
27
|
-
from klaude_code.llm.usage import MetadataTracker,
|
|
26
|
+
from klaude_code.llm.usage import MetadataTracker, error_llm_stream
|
|
28
27
|
from klaude_code.log import DebugType, log_debug
|
|
29
|
-
from klaude_code.protocol import llm_param
|
|
28
|
+
from klaude_code.protocol import llm_param
|
|
30
29
|
|
|
31
30
|
|
|
32
31
|
def build_payload(param: llm_param.LLMCallParameter) -> ResponseCreateParamsStreaming:
|
|
@@ -118,7 +117,7 @@ class CodexClient(LLMClientABC):
|
|
|
118
117
|
return cls(config)
|
|
119
118
|
|
|
120
119
|
@override
|
|
121
|
-
async def call(self, param: llm_param.LLMCallParameter) ->
|
|
120
|
+
async def call(self, param: llm_param.LLMCallParameter) -> LLMStreamABC:
|
|
122
121
|
# Ensure token is valid before API call
|
|
123
122
|
self._ensure_valid_token()
|
|
124
123
|
|
|
@@ -147,9 +146,6 @@ class CodexClient(LLMClientABC):
|
|
|
147
146
|
)
|
|
148
147
|
except (openai.OpenAIError, httpx.HTTPError) as e:
|
|
149
148
|
error_message = f"{e.__class__.__name__} {e!s}"
|
|
150
|
-
|
|
151
|
-
yield item
|
|
152
|
-
return
|
|
149
|
+
return error_llm_stream(metadata_tracker, error=error_message)
|
|
153
150
|
|
|
154
|
-
|
|
155
|
-
yield item
|
|
151
|
+
return ResponsesLLMStream(stream, param=param, metadata_tracker=metadata_tracker)
|